tcp: optimize tcp internal pacing

When TCP implements its own pacing (when no fq packet scheduler is used), it is arming high resolution timer after a packet is sent. But in many cases (like TCP_RR kind of workloads), this high resolution timer expires before the application attempts to write the following packet. This overhead also happens when the flow is ACK clocked and cwnd limited instead of being limited by the pacing rate. This leads to extra overhead (high number of IRQ) Now tcp_wstamp_ns is reserved for the pacing timer only (after commit "tcp: do not change tcp_wstamp_ns in tcp_mstamp_refresh"), we can setup the timer only when a packet is about to be sent, and if tcp_wstamp_ns is in the future. This leads to a ~10% performance increase in TCP_RR workloads. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
author: Eric Dumazet <edumazet@google.com> 2018-10-15 09:37:56 -0700
committer: David S. Miller <davem@davemloft.net> 2018-10-15 22:56:42 -0700
commit: 864e5c090749448e879e86bec06ee396aa2c19c5 (patch)
tree: 25eae7e82f14b3b683fc0288a1cd83d3b87a4af2 /net/ipv4/tcp_output.c
parent: 7baf33bdac37da65ddce3adf4daa8c7805802174 (diff)
1 files changed, 16 insertions, 15 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 5474c9854f25..d212e4cbc689 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -975,16 +975,6 @@ enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer)
 	return HRTIMER_NORESTART;
 }
 
-static void tcp_internal_pacing(struct sock *sk)
-{
-	if (!tcp_needs_internal_pacing(sk))
-		return;
-	hrtimer_start(&tcp_sk(sk)->pacing_timer,
-		      ns_to_ktime(tcp_sk(sk)->tcp_wstamp_ns),
-		      HRTIMER_MODE_ABS_PINNED_SOFT);
-	sock_hold(sk);
-}
-
 static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb,
 				      u64 prior_wstamp)
 {
@@ -1005,8 +995,6 @@ static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb,
 			/* take into account OS jitter */
 			len_ns -= min_t(u64, len_ns / 2, credit);
 			tp->tcp_wstamp_ns += len_ns;
-
-			tcp_internal_pacing(sk);
 		}
 	}
 	list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
@@ -2186,10 +2174,23 @@ static int tcp_mtu_probe(struct sock *sk)
 	return -1;
 }
 
-static bool tcp_pacing_check(const struct sock *sk)
+static bool tcp_pacing_check(struct sock *sk)
 {
-	return tcp_needs_internal_pacing(sk) &&
-	       hrtimer_is_queued(&tcp_sk(sk)->pacing_timer);
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	if (!tcp_needs_internal_pacing(sk))
+		return false;
+
+	if (tp->tcp_wstamp_ns <= tp->tcp_clock_cache)
+		return false;
+
+	if (!hrtimer_is_queued(&tp->pacing_timer)) {
+		hrtimer_start(&tp->pacing_timer,
+			      ns_to_ktime(tp->tcp_wstamp_ns),
+			      HRTIMER_MODE_ABS_PINNED_SOFT);
+		sock_hold(sk);
+	}
+	return true;
 }
 
 /* TCP Small Queues :
author	Eric Dumazet <edumazet@google.com>	2018-10-15 09:37:56 -0700
committer	David S. Miller <davem@davemloft.net>	2018-10-15 22:56:42 -0700
commit	864e5c090749448e879e86bec06ee396aa2c19c5 (patch)
tree	25eae7e82f14b3b683fc0288a1cd83d3b87a4af2 /net/ipv4/tcp_output.c
parent	7baf33bdac37da65ddce3adf4daa8c7805802174 (diff)