summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/tcp.c1
-rw-r--r--net/ipv4/tcp_input.c7
-rw-r--r--net/ipv4/tcp_minisocks.c4
-rw-r--r--net/ipv4/tcp_recovery.c48
4 files changed, 58 insertions, 2 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index a7a0f316eb86..c4cb19ed4628 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -447,6 +447,7 @@ void tcp_init_sock(struct sock *sk)
tcp_assign_congestion_control(sk);
tp->tsoffset = 0;
+ tp->rack.reo_wnd_steps = 1;
sk->sk_state = TCP_CLOSE;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 8393b405ea98..0ada8bfc2ebd 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -856,6 +856,7 @@ void tcp_disable_fack(struct tcp_sock *tp)
static void tcp_dsack_seen(struct tcp_sock *tp)
{
tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
+ tp->rack.dsack_seen = 1;
}
static void tcp_update_reordering(struct sock *sk, const int metric,
@@ -2408,6 +2409,8 @@ static bool tcp_try_undo_recovery(struct sock *sk)
mib_idx = LINUX_MIB_TCPFULLUNDO;
NET_INC_STATS(sock_net(sk), mib_idx);
+ } else if (tp->rack.reo_wnd_persist) {
+ tp->rack.reo_wnd_persist--;
}
if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
/* Hold old state until something *above* high_seq
@@ -2427,6 +2430,8 @@ static bool tcp_try_undo_dsack(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
if (tp->undo_marker && !tp->undo_retrans) {
+ tp->rack.reo_wnd_persist = min(TCP_RACK_RECOVERY_THRESH,
+ tp->rack.reo_wnd_persist + 1);
DBGUNDO(sk, "D-SACK");
tcp_undo_cwnd_reduction(sk, false);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
@@ -3644,6 +3649,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked,
&sack_state);
+ tcp_rack_update_reo_wnd(sk, &rs);
+
if (tp->tlp_high_seq)
tcp_process_tlp_ack(sk, ack, flag);
/* If needed, reset TLP/RTO timer; RACK may later override this. */
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 3c65c1a3f944..4bb86580decd 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -551,6 +551,10 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->syn_data_acked = 0;
newtp->rack.mstamp = 0;
newtp->rack.advanced = 0;
+ newtp->rack.reo_wnd_steps = 1;
+ newtp->rack.last_delivered = 0;
+ newtp->rack.reo_wnd_persist = 0;
+ newtp->rack.dsack_seen = 0;
__TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);
}
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index ac3e9c6d3a3d..d3ea89020c69 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -44,6 +44,7 @@ static bool tcp_rack_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2)
static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
{
struct tcp_sock *tp = tcp_sk(sk);
+ u32 min_rtt = tcp_min_rtt(tp);
struct sk_buff *skb, *n;
u32 reo_wnd;
@@ -54,8 +55,10 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
* to queuing or delayed ACKs.
*/
reo_wnd = 1000;
- if ((tp->rack.reord || !tp->lost_out) && tcp_min_rtt(tp) != ~0U)
- reo_wnd = max(tcp_min_rtt(tp) >> 2, reo_wnd);
+ if ((tp->rack.reord || !tp->lost_out) && min_rtt != ~0U) {
+ reo_wnd = max((min_rtt >> 2) * tp->rack.reo_wnd_steps, reo_wnd);
+ reo_wnd = min(reo_wnd, tp->srtt_us >> 3);
+ }
list_for_each_entry_safe(skb, n, &tp->tsorted_sent_queue,
tcp_tsorted_anchor) {
@@ -160,3 +163,44 @@ void tcp_rack_reo_timeout(struct sock *sk)
if (inet_csk(sk)->icsk_pending != ICSK_TIME_RETRANS)
tcp_rearm_rto(sk);
}
+
+/* Updates the RACK's reo_wnd based on DSACK and no. of recoveries.
+ *
+ * If DSACK is received, increment reo_wnd by min_rtt/4 (upper bounded
+ * by srtt), since there is possibility that spurious retransmission was
+ * due to reordering delay longer than reo_wnd.
+ *
+ * Persist the current reo_wnd value for TCP_RACK_RECOVERY_THRESH (16)
+ * no. of successful recoveries (accounts for full DSACK-based loss
+ * recovery undo). After that, reset it to default (min_rtt/4).
+ *
+ * At max, reo_wnd is incremented only once per rtt. So that the new
+ * DSACK on which we are reacting, is due to the spurious retx (approx)
+ * after the reo_wnd has been updated last time.
+ *
+ * reo_wnd is tracked in terms of steps (of min_rtt/4), rather than
+ * absolute value to account for change in rtt.
+ */
+void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_STATIC_REO_WND ||
+ !rs->prior_delivered)
+ return;
+
+ /* Disregard DSACK if a rtt has not passed since we adjusted reo_wnd */
+ if (before(rs->prior_delivered, tp->rack.last_delivered))
+ tp->rack.dsack_seen = 0;
+
+ /* Adjust the reo_wnd if update is pending */
+ if (tp->rack.dsack_seen) {
+ tp->rack.reo_wnd_steps = min_t(u32, 0xFF,
+ tp->rack.reo_wnd_steps + 1);
+ tp->rack.dsack_seen = 0;
+ tp->rack.last_delivered = tp->delivered;
+ tp->rack.reo_wnd_persist = TCP_RACK_RECOVERY_THRESH;
+ } else if (!tp->rack.reo_wnd_persist) {
+ tp->rack.reo_wnd_steps = 1;
+ }
+}