diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/tcp.c | 1 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 7 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp_recovery.c | 48 |
4 files changed, 58 insertions, 2 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a7a0f316eb86..c4cb19ed4628 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -447,6 +447,7 @@ void tcp_init_sock(struct sock *sk) tcp_assign_congestion_control(sk); tp->tsoffset = 0; + tp->rack.reo_wnd_steps = 1; sk->sk_state = TCP_CLOSE; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8393b405ea98..0ada8bfc2ebd 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -856,6 +856,7 @@ void tcp_disable_fack(struct tcp_sock *tp) static void tcp_dsack_seen(struct tcp_sock *tp) { tp->rx_opt.sack_ok |= TCP_DSACK_SEEN; + tp->rack.dsack_seen = 1; } static void tcp_update_reordering(struct sock *sk, const int metric, @@ -2408,6 +2409,8 @@ static bool tcp_try_undo_recovery(struct sock *sk) mib_idx = LINUX_MIB_TCPFULLUNDO; NET_INC_STATS(sock_net(sk), mib_idx); + } else if (tp->rack.reo_wnd_persist) { + tp->rack.reo_wnd_persist--; } if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) { /* Hold old state until something *above* high_seq @@ -2427,6 +2430,8 @@ static bool tcp_try_undo_dsack(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); if (tp->undo_marker && !tp->undo_retrans) { + tp->rack.reo_wnd_persist = min(TCP_RACK_RECOVERY_THRESH, + tp->rack.reo_wnd_persist + 1); DBGUNDO(sk, "D-SACK"); tcp_undo_cwnd_reduction(sk, false); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKUNDO); @@ -3644,6 +3649,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked, &sack_state); + tcp_rack_update_reo_wnd(sk, &rs); + if (tp->tlp_high_seq) tcp_process_tlp_ack(sk, ack, flag); /* If needed, reset TLP/RTO timer; RACK may later override this. */ diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 3c65c1a3f944..4bb86580decd 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -551,6 +551,10 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->syn_data_acked = 0; newtp->rack.mstamp = 0; newtp->rack.advanced = 0; + newtp->rack.reo_wnd_steps = 1; + newtp->rack.last_delivered = 0; + newtp->rack.reo_wnd_persist = 0; + newtp->rack.dsack_seen = 0; __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS); } diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index ac3e9c6d3a3d..d3ea89020c69 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -44,6 +44,7 @@ static bool tcp_rack_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2) static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout) { struct tcp_sock *tp = tcp_sk(sk); + u32 min_rtt = tcp_min_rtt(tp); struct sk_buff *skb, *n; u32 reo_wnd; @@ -54,8 +55,10 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout) * to queuing or delayed ACKs. */ reo_wnd = 1000; - if ((tp->rack.reord || !tp->lost_out) && tcp_min_rtt(tp) != ~0U) - reo_wnd = max(tcp_min_rtt(tp) >> 2, reo_wnd); + if ((tp->rack.reord || !tp->lost_out) && min_rtt != ~0U) { + reo_wnd = max((min_rtt >> 2) * tp->rack.reo_wnd_steps, reo_wnd); + reo_wnd = min(reo_wnd, tp->srtt_us >> 3); + } list_for_each_entry_safe(skb, n, &tp->tsorted_sent_queue, tcp_tsorted_anchor) { @@ -160,3 +163,44 @@ void tcp_rack_reo_timeout(struct sock *sk) if (inet_csk(sk)->icsk_pending != ICSK_TIME_RETRANS) tcp_rearm_rto(sk); } + +/* Updates the RACK's reo_wnd based on DSACK and no. of recoveries. + * + * If DSACK is received, increment reo_wnd by min_rtt/4 (upper bounded + * by srtt), since there is possibility that spurious retransmission was + * due to reordering delay longer than reo_wnd. + * + * Persist the current reo_wnd value for TCP_RACK_RECOVERY_THRESH (16) + * no. of successful recoveries (accounts for full DSACK-based loss + * recovery undo). After that, reset it to default (min_rtt/4). + * + * At max, reo_wnd is incremented only once per rtt. So that the new + * DSACK on which we are reacting, is due to the spurious retx (approx) + * after the reo_wnd has been updated last time. + * + * reo_wnd is tracked in terms of steps (of min_rtt/4), rather than + * absolute value to account for change in rtt. + */ +void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_STATIC_REO_WND || + !rs->prior_delivered) + return; + + /* Disregard DSACK if a rtt has not passed since we adjusted reo_wnd */ + if (before(rs->prior_delivered, tp->rack.last_delivered)) + tp->rack.dsack_seen = 0; + + /* Adjust the reo_wnd if update is pending */ + if (tp->rack.dsack_seen) { + tp->rack.reo_wnd_steps = min_t(u32, 0xFF, + tp->rack.reo_wnd_steps + 1); + tp->rack.dsack_seen = 0; + tp->rack.last_delivered = tp->delivered; + tp->rack.reo_wnd_persist = TCP_RACK_RECOVERY_THRESH; + } else if (!tp->rack.reo_wnd_persist) { + tp->rack.reo_wnd_steps = 1; + } +} |