diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 51 |
1 files changed, 22 insertions, 29 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 47e08c1b5bc3..188980c58f87 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -426,26 +426,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) } } -/* 3. Tuning rcvbuf, when connection enters established state. */ -static void tcp_fixup_rcvbuf(struct sock *sk) -{ - u32 mss = tcp_sk(sk)->advmss; - int rcvmem; - - rcvmem = 2 * SKB_TRUESIZE(mss + MAX_TCP_HEADER) * - tcp_default_init_rwnd(mss); - - /* Dynamic Right Sizing (DRS) has 2 to 3 RTT latency - * Allow enough cushion so that sender is not limited by our window - */ - if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) - rcvmem <<= 2; - - if (sk->sk_rcvbuf < rcvmem) - sk->sk_rcvbuf = min(rcvmem, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); -} - -/* 4. Try to fixup all. It is made immediately after connection enters +/* 3. Try to fixup all. It is made immediately after connection enters * established state. */ void tcp_init_buffer_space(struct sock *sk) @@ -454,12 +435,10 @@ void tcp_init_buffer_space(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); int maxwin; - if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) - tcp_fixup_rcvbuf(sk); if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK)) tcp_sndbuf_expand(sk); - tp->rcvq_space.space = tp->rcv_wnd; + tp->rcvq_space.space = min_t(u32, tp->rcv_wnd, TCP_INIT_CWND * tp->advmss); tcp_mstamp_refresh(tp); tp->rcvq_space.time = tp->tcp_mstamp; tp->rcvq_space.seq = tp->copied_seq; @@ -485,7 +464,7 @@ void tcp_init_buffer_space(struct sock *sk) tp->snd_cwnd_stamp = tcp_jiffies32; } -/* 5. Recalculate window clamp after socket hit its memory bounds. */ +/* 4. Recalculate window clamp after socket hit its memory bounds. */ static void tcp_clamp_window(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); @@ -1305,7 +1284,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev, */ tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, start_seq, end_seq, dup_sack, pcount, - skb->skb_mstamp); + tcp_skb_timestamp_us(skb)); tcp_rate_skb_delivered(sk, skb, state->rate); if (skb == tp->lost_skb_hint) @@ -1580,7 +1559,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, TCP_SKB_CB(skb)->end_seq, dup_sack, tcp_skb_pcount(skb), - skb->skb_mstamp); + tcp_skb_timestamp_us(skb)); tcp_rate_skb_delivered(sk, skb, state->rate); if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) list_del_init(&skb->tcp_tsorted_anchor); @@ -3103,7 +3082,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack, tp->retrans_out -= acked_pcount; flag |= FLAG_RETRANS_DATA_ACKED; } else if (!(sacked & TCPCB_SACKED_ACKED)) { - last_ackt = skb->skb_mstamp; + last_ackt = tcp_skb_timestamp_us(skb); WARN_ON_ONCE(last_ackt == 0); if (!first_ackt) first_ackt = last_ackt; @@ -3121,7 +3100,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack, tp->delivered += acked_pcount; if (!tcp_skb_spurious_retrans(tp, skb)) tcp_rack_advance(tp, sacked, scb->end_seq, - skb->skb_mstamp); + tcp_skb_timestamp_us(skb)); } if (sacked & TCPCB_LOST) tp->lost_out -= acked_pcount; @@ -3215,7 +3194,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack, tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta); } } else if (skb && rtt_update && sack_rtt_us >= 0 && - sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp)) { + sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp, + tcp_skb_timestamp_us(skb))) { /* Do not re-arm RTO if the sack RTT is measured from data sent * after when the head was last (re)transmitted. Otherwise the * timeout may continue to extend in loss recovery. @@ -4199,6 +4179,17 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq) tcp_sack_extend(tp->duplicate_sack, seq, end_seq); } +static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb) +{ + /* When the ACK path fails or drops most ACKs, the sender would + * timeout and spuriously retransmit the same segment repeatedly. + * The receiver remembers and reflects via DSACKs. Leverage the + * DSACK state and change the txhash to re-route speculatively. + */ + if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq) + sk_rethink_txhash(sk); +} + static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); @@ -4211,6 +4202,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) { u32 end_seq = TCP_SKB_CB(skb)->end_seq; + tcp_rcv_spurious_retrans(sk, skb); if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) end_seq = tp->rcv_nxt; tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq); @@ -4755,6 +4747,7 @@ queue_and_out: } if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { + tcp_rcv_spurious_retrans(sk, skb); /* A retransmit, 2nd most common case. Force an immediate ack. */ NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); |