diff options
author | David Morley <morleyd@google.com> | 2023-10-06 01:18:40 +0000 |
---|---|---|
committer | Paolo Abeni <pabeni@redhat.com> | 2023-10-10 10:02:59 +0200 |
commit | 95b9a87c6a6b708cccda1f9b7baf9920b80cdabf (patch) | |
tree | d06f62ced178016f8a07a882290350d1b01ca1a9 | |
parent | 8cea95b0bd7930367f11e2abceda6e096dd18943 (diff) |
tcp: record last received ipv6 flowlabel
In order to better estimate whether a data packet has been
retransmitted or is the result of a TLP, we save the last received
ipv6 flowlabel.
To make space for this field we resize the "ato" field in
inet_connection_sock as the current value of TCP_DELACK_MAX can be
fully contained in 8 bits and add a compile_time_assert ensuring this
field is the required size.
v2: addressed kernel bot feedback about dccp_delack_timer()
v3: addressed build error introduced by commit bbf80d713fe7 ("tcp:
derive delack_max from rto_min")
Signed-off-by: David Morley <morleyd@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Tested-by: David Morley <morleyd@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
-rw-r--r-- | include/net/inet_connection_sock.h | 5 | ||||
-rw-r--r-- | include/net/tcp.h | 2 | ||||
-rw-r--r-- | net/dccp/timer.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 15 | ||||
-rw-r--r-- | net/ipv4/tcp_timer.c | 2 |
6 files changed, 26 insertions, 6 deletions
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 5d2fcc137b88..d6d9d1c1985a 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -114,7 +114,10 @@ struct inet_connection_sock { __u8 quick; /* Scheduled number of quick acks */ __u8 pingpong; /* The session is interactive */ __u8 retry; /* Number of attempts */ - __u32 ato; /* Predicted tick of soft clock */ + #define ATO_BITS 8 + __u32 ato:ATO_BITS, /* Predicted tick of soft clock */ + lrcv_flowlabel:20, /* last received ipv6 flowlabel */ + unused:4; unsigned long timeout; /* Currently scheduled timeout */ __u32 lrcvtime; /* timestamp of last received data packet */ __u16 last_seg_size; /* Size of last incoming segment */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 9eb0a2855311..7fdedf5c71f0 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -131,6 +131,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCP_FIN_TIMEOUT_MAX (120 * HZ) /* max TCP_LINGER2 value (two minutes) */ #define TCP_DELACK_MAX ((unsigned)(HZ/5)) /* maximal time to delay before sending an ACK */ +static_assert((1 << ATO_BITS) > TCP_DELACK_MAX); + #if HZ >= 100 #define TCP_DELACK_MIN ((unsigned)(HZ/25)) /* minimal time to delay before sending an ACK */ #define TCP_ATO_MIN ((unsigned)(HZ/25)) diff --git a/net/dccp/timer.c b/net/dccp/timer.c index b3255e87cc7e..a4cfb47b60e5 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -196,8 +196,8 @@ static void dccp_delack_timer(struct timer_list *t) if (inet_csk_ack_scheduled(sk)) { if (!inet_csk_in_pingpong_mode(sk)) { /* Delayed ACK missed: inflate ATO. */ - icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, - icsk->icsk_rto); + icsk->icsk_ack.ato = min_t(u32, icsk->icsk_ack.ato << 1, + icsk->icsk_rto); } else { /* Delayed ACK missed: leave pingpong mode and * deflate ATO. diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 9a8b134d8ada..faabb5a4a378 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3756,8 +3756,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_options |= TCPI_OPT_SYN_DATA; info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto); - info->tcpi_ato = jiffies_to_usecs(min(icsk->icsk_ack.ato, - tcp_delack_max(sk))); + info->tcpi_ato = jiffies_to_usecs(min_t(u32, icsk->icsk_ack.ato, + tcp_delack_max(sk))); info->tcpi_snd_mss = tp->mss_cache; info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 4b8f2e74d71d..2ae4b211c125 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -778,6 +778,16 @@ new_measure: tp->rcvq_space.time = tp->tcp_mstamp; } +static void tcp_save_lrcv_flowlabel(struct sock *sk, const struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct inet_connection_sock *icsk = inet_csk(sk); + + if (skb->protocol == htons(ETH_P_IPV6)) + icsk->icsk_ack.lrcv_flowlabel = ntohl(ip6_flowlabel(ipv6_hdr(skb))); +#endif +} + /* There is something which you must keep in mind when you analyze the * behavior of the tp->ato delayed ack timeout interval. When a * connection starts up, we want to ack as quickly as possible. The @@ -826,6 +836,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) } } icsk->icsk_ack.lrcvtime = now; + tcp_save_lrcv_flowlabel(sk, skb); tcp_ecn_check_ce(sk, skb); @@ -4519,6 +4530,9 @@ static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb) if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq && sk_rethink_txhash(sk)) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDUPLICATEDATAREHASH); + + /* Save last flowlabel after a spurious retrans. */ + tcp_save_lrcv_flowlabel(sk, skb); } static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) @@ -4835,6 +4849,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) u32 seq, end_seq; bool fragstolen; + tcp_save_lrcv_flowlabel(sk, skb); tcp_ecn_check_ce(sk, skb); if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) { diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 3f61c6a70a1f..0862b73dd3b5 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -322,7 +322,7 @@ void tcp_delack_timer_handler(struct sock *sk) if (inet_csk_ack_scheduled(sk)) { if (!inet_csk_in_pingpong_mode(sk)) { /* Delayed ACK missed: inflate ATO. */ - icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto); + icsk->icsk_ack.ato = min_t(u32, icsk->icsk_ack.ato << 1, icsk->icsk_rto); } else { /* Delayed ACK missed: leave pingpong mode and * deflate ATO. |