From baebdf48c360080710f80699eea3affbb13d6c65 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Sat, 12 Feb 2022 00:38:38 +0100 Subject: net: dev: Makes sure netif_rx() can be invoked in any context. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dave suggested a while ago (eleven years by now) "Let's make netif_rx() work in all contexts and get rid of netif_rx_ni()". Eric agreed and pointed out that modern devices should use netif_receive_skb() to avoid the overhead. In the meantime someone added another variant, netif_rx_any_context(), which behaves as suggested. netif_rx() must be invoked with disabled bottom halves to ensure that pending softirqs, which were raised within the function, are handled. netif_rx_ni() can be invoked only from process context (bottom halves must be enabled) because the function handles pending softirqs without checking if bottom halves were disabled or not. netif_rx_any_context() invokes on the former functions by checking in_interrupts(). netif_rx() could be taught to handle both cases (disabled and enabled bottom halves) by simply disabling bottom halves while invoking netif_rx_internal(). The local_bh_enable() invocation will then invoke pending softirqs only if the BH-disable counter drops to zero. Eric is concerned about the overhead of BH-disable+enable especially in regard to the loopback driver. As critical as this driver is, it will receive a shortcut to avoid the additional overhead which is not needed. Add a local_bh_disable() section in netif_rx() to ensure softirqs are handled if needed. Provide __netif_rx() which does not disable BH and has a lockdep assert to ensure that interrupts are disabled. Use this shortcut in the loopback driver and in drivers/net/*.c. Make netif_rx_ni() and netif_rx_any_context() invoke netif_rx() so they can be removed once they are no more users left. Link: https://lkml.kernel.org/r/20100415.020246.218622820.davem@davemloft.net Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Eric Dumazet Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller --- drivers/net/loopback.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/loopback.c') diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index ed0edf5884ef..d05f86fe78c9 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -78,7 +78,7 @@ static netdev_tx_t loopback_xmit(struct sk_buff *skb, skb_orphan(skb); - /* Before queueing this packet to netif_rx(), + /* Before queueing this packet to __netif_rx(), * make sure dst is refcounted. */ skb_dst_force(skb); @@ -86,7 +86,7 @@ static netdev_tx_t loopback_xmit(struct sk_buff *skb, skb->protocol = eth_type_trans(skb, dev); len = skb->len; - if (likely(netif_rx(skb) == NET_RX_SUCCESS)) + if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) dev_lstats_add(dev, len); return NETDEV_TX_OK; -- cgit v1.2.3-58-ga151 From de799101519aad23c6096041ba2744d7b5517e6a Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 2 Mar 2022 11:55:31 -0800 Subject: net: Add skb_clear_tstamp() to keep the mono delivery_time Right now, skb->tstamp is reset to 0 whenever the skb is forwarded. If skb->tstamp has the mono delivery_time, clearing it can hurt the performance when it finally transmits out to fq@phy-dev. The earlier patch added a skb->mono_delivery_time bit to flag the skb->tstamp carrying the mono delivery_time. This patch adds skb_clear_tstamp() helper which keeps the mono delivery_time and clears everything else. The delivery_time clearing will be postponed until the stack knows the skb will be delivered locally. It will be done in a latter patch. Signed-off-by: Martin KaFai Lau Signed-off-by: David S. Miller --- drivers/net/loopback.c | 2 +- include/linux/skbuff.h | 10 +++++++++- net/bridge/br_forward.c | 2 +- net/core/filter.c | 6 +++--- net/core/skbuff.c | 2 +- net/ipv4/ip_forward.c | 2 +- net/ipv6/ip6_output.c | 2 +- net/netfilter/ipvs/ip_vs_xmit.c | 6 +++--- net/netfilter/nf_dup_netdev.c | 2 +- net/netfilter/nf_flow_table_ip.c | 4 ++-- net/netfilter/nft_fwd_netdev.c | 2 +- net/openvswitch/vport.c | 2 +- net/xfrm/xfrm_interface.c | 2 +- 13 files changed, 26 insertions(+), 18 deletions(-) (limited to 'drivers/net/loopback.c') diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index d05f86fe78c9..720394c0639b 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -74,7 +74,7 @@ static netdev_tx_t loopback_xmit(struct sk_buff *skb, skb_tx_timestamp(skb); /* do not fool net_timestamp_check() with various clock bases */ - skb->tstamp = 0; + skb_clear_tstamp(skb); skb_orphan(skb); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 803ffa63dea6..27a28920e7b3 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3996,6 +3996,14 @@ static inline void skb_set_delivery_time(struct sk_buff *skb, ktime_t kt, skb->mono_delivery_time = 0; } +static inline void skb_clear_tstamp(struct sk_buff *skb) +{ + if (skb->mono_delivery_time) + return; + + skb->tstamp = 0; +} + static inline u8 skb_metadata_len(const struct sk_buff *skb) { return skb_shinfo(skb)->meta_len; @@ -4852,7 +4860,7 @@ static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress) #ifdef CONFIG_NET_REDIRECT skb->from_ingress = from_ingress; if (skb->from_ingress) - skb->tstamp = 0; + skb_clear_tstamp(skb); #endif } diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index ec646656dbf1..02bb620d3b8d 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -62,7 +62,7 @@ EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit); int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { - skb->tstamp = 0; + skb_clear_tstamp(skb); return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING, net, sk, skb, NULL, skb->dev, br_dev_queue_push_xmit); diff --git a/net/core/filter.c b/net/core/filter.c index 65869fd510e8..cfcf9b4d1ec2 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2107,7 +2107,7 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) } skb->dev = dev; - skb->tstamp = 0; + skb_clear_tstamp(skb); dev_xmit_recursion_inc(); ret = dev_queue_xmit(skb); @@ -2176,7 +2176,7 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb, } skb->dev = dev; - skb->tstamp = 0; + skb_clear_tstamp(skb); if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { skb = skb_expand_head(skb, hh_len); @@ -2274,7 +2274,7 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb, } skb->dev = dev; - skb->tstamp = 0; + skb_clear_tstamp(skb); if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { skb = skb_expand_head(skb, hh_len); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b32c5d782fe1..9abb0028309f 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5381,7 +5381,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) ipvs_reset(skb); skb->mark = 0; - skb->tstamp = 0; + skb_clear_tstamp(skb); } EXPORT_SYMBOL_GPL(skb_scrub_packet); diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 00ec819f949b..92ba3350274b 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -79,7 +79,7 @@ static int ip_forward_finish(struct net *net, struct sock *sk, struct sk_buff *s if (unlikely(opt->optlen)) ip_forward_options(skb); - skb->tstamp = 0; + skb_clear_tstamp(skb); return dst_output(net, sk, skb); } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index dad4e3d0492e..50db9b20d746 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -440,7 +440,7 @@ static inline int ip6_forward_finish(struct net *net, struct sock *sk, } #endif - skb->tstamp = 0; + skb_clear_tstamp(skb); return dst_output(net, sk, skb); } diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index d2e5a8f644b8..029171379884 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -610,7 +610,7 @@ static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb, nf_reset_ct(skb); skb_forward_csum(skb); if (skb->dev) - skb->tstamp = 0; + skb_clear_tstamp(skb); } return ret; } @@ -652,7 +652,7 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb, if (!local) { skb_forward_csum(skb); if (skb->dev) - skb->tstamp = 0; + skb_clear_tstamp(skb); NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb, NULL, skb_dst(skb)->dev, dst_output); } else @@ -674,7 +674,7 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb, ip_vs_drop_early_demux_sk(skb); skb_forward_csum(skb); if (skb->dev) - skb->tstamp = 0; + skb_clear_tstamp(skb); NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb, NULL, skb_dst(skb)->dev, dst_output); } else diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c index a579e59ee5c5..7873bd1389c3 100644 --- a/net/netfilter/nf_dup_netdev.c +++ b/net/netfilter/nf_dup_netdev.c @@ -19,7 +19,7 @@ static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev) skb_push(skb, skb->mac_len); skb->dev = dev; - skb->tstamp = 0; + skb_clear_tstamp(skb); dev_queue_xmit(skb); } diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 889cf88d3dba..f1d387129f02 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -376,7 +376,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, nf_flow_nat_ip(flow, skb, thoff, dir, iph); ip_decrease_ttl(iph); - skb->tstamp = 0; + skb_clear_tstamp(skb); if (flow_table->flags & NF_FLOWTABLE_COUNTER) nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len); @@ -611,7 +611,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, nf_flow_nat_ipv6(flow, skb, dir, ip6h); ip6h->hop_limit--; - skb->tstamp = 0; + skb_clear_tstamp(skb); if (flow_table->flags & NF_FLOWTABLE_COUNTER) nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len); diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index 619e394a91de..08e7a289738e 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -145,7 +145,7 @@ static void nft_fwd_neigh_eval(const struct nft_expr *expr, return; skb->dev = dev; - skb->tstamp = 0; + skb_clear_tstamp(skb); neigh_xmit(neigh_table, dev, addr, skb); out: regs->verdict.code = verdict; diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index cf2ce5812489..82a74f998966 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -507,7 +507,7 @@ void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto) } skb->dev = vport->dev; - skb->tstamp = 0; + skb_clear_tstamp(skb); vport->ops->send(skb); return; diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 57448fc519fc..4991e99ced9a 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -190,7 +190,7 @@ static void xfrmi_dev_uninit(struct net_device *dev) static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet) { - skb->tstamp = 0; + skb_clear_tstamp(skb); skb->pkt_type = PACKET_HOST; skb->skb_iif = 0; skb->ignore_df = 0; -- cgit v1.2.3-58-ga151