From ece4b296904167336d0aaab26bd7122018835202 Mon Sep 17 00:00:00 2001 From: Brad Cowie Date: Wed, 22 May 2024 17:07:11 +1200 Subject: net: netfilter: Make ct zone opts configurable for bpf ct helpers Add ct zone id and direction to bpf_ct_opts so that arbitrary ct zones can be used for xdp/tc bpf ct helper functions bpf_{xdp,skb}_ct_alloc and bpf_{xdp,skb}_ct_lookup. Signed-off-by: Brad Cowie Link: https://lore.kernel.org/r/20240522050712.732558-1-brad@faucet.nz Signed-off-by: Martin KaFai Lau --- net/netfilter/nf_conntrack_bpf.c | 68 ++++++++++++++++++++++++++++++++-------- 1 file changed, 55 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c index d2492d050fe6..4a136fc3a9c0 100644 --- a/net/netfilter/nf_conntrack_bpf.c +++ b/net/netfilter/nf_conntrack_bpf.c @@ -32,7 +32,9 @@ * -EINVAL - Passed NULL for bpf_tuple pointer * -EINVAL - opts->reserved is not 0 * -EINVAL - netns_id is less than -1 - * -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (12) + * -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (16) or 12 + * -EINVAL - opts->ct_zone_id set when + opts__sz isn't NF_BPF_CT_OPTS_SZ (16) * -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP * -ENONET - No network namespace found for netns_id * -ENOENT - Conntrack lookup could not find entry for tuple @@ -42,6 +44,8 @@ * Values: * IPPROTO_TCP, IPPROTO_UDP * @dir: - connection tracking tuple direction. + * @ct_zone_id - connection tracking zone id. + * @ct_zone_dir - connection tracking zone direction. * @reserved - Reserved member, will be reused for more options in future * Values: * 0 @@ -51,11 +55,13 @@ struct bpf_ct_opts { s32 error; u8 l4proto; u8 dir; - u8 reserved[2]; + u16 ct_zone_id; + u8 ct_zone_dir; + u8 reserved[3]; }; enum { - NF_BPF_CT_OPTS_SZ = 12, + NF_BPF_CT_OPTS_SZ = 16, }; static int bpf_nf_ct_tuple_parse(struct bpf_sock_tuple *bpf_tuple, @@ -104,12 +110,21 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple, u32 timeout) { struct nf_conntrack_tuple otuple, rtuple; + struct nf_conntrack_zone ct_zone; struct nf_conn *ct; int err; - if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] || - opts_len != NF_BPF_CT_OPTS_SZ) + if (!opts || !bpf_tuple) return ERR_PTR(-EINVAL); + if (!(opts_len == NF_BPF_CT_OPTS_SZ || opts_len == 12)) + return ERR_PTR(-EINVAL); + if (opts_len == NF_BPF_CT_OPTS_SZ) { + if (opts->reserved[0] || opts->reserved[1] || opts->reserved[2]) + return ERR_PTR(-EINVAL); + } else { + if (opts->ct_zone_id) + return ERR_PTR(-EINVAL); + } if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS)) return ERR_PTR(-EINVAL); @@ -130,7 +145,16 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple, return ERR_PTR(-ENONET); } - ct = nf_conntrack_alloc(net, &nf_ct_zone_dflt, &otuple, &rtuple, + if (opts_len == NF_BPF_CT_OPTS_SZ) { + if (opts->ct_zone_dir == 0) + opts->ct_zone_dir = NF_CT_DEFAULT_ZONE_DIR; + nf_ct_zone_init(&ct_zone, + opts->ct_zone_id, opts->ct_zone_dir, 0); + } else { + ct_zone = nf_ct_zone_dflt; + } + + ct = nf_conntrack_alloc(net, &ct_zone, &otuple, &rtuple, GFP_ATOMIC); if (IS_ERR(ct)) goto out; @@ -152,12 +176,21 @@ static struct nf_conn *__bpf_nf_ct_lookup(struct net *net, { struct nf_conntrack_tuple_hash *hash; struct nf_conntrack_tuple tuple; + struct nf_conntrack_zone ct_zone; struct nf_conn *ct; int err; - if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] || - opts_len != NF_BPF_CT_OPTS_SZ) + if (!opts || !bpf_tuple) return ERR_PTR(-EINVAL); + if (!(opts_len == NF_BPF_CT_OPTS_SZ || opts_len == 12)) + return ERR_PTR(-EINVAL); + if (opts_len == NF_BPF_CT_OPTS_SZ) { + if (opts->reserved[0] || opts->reserved[1] || opts->reserved[2]) + return ERR_PTR(-EINVAL); + } else { + if (opts->ct_zone_id) + return ERR_PTR(-EINVAL); + } if (unlikely(opts->l4proto != IPPROTO_TCP && opts->l4proto != IPPROTO_UDP)) return ERR_PTR(-EPROTO); if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS)) @@ -174,7 +207,16 @@ static struct nf_conn *__bpf_nf_ct_lookup(struct net *net, return ERR_PTR(-ENONET); } - hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple); + if (opts_len == NF_BPF_CT_OPTS_SZ) { + if (opts->ct_zone_dir == 0) + opts->ct_zone_dir = NF_CT_DEFAULT_ZONE_DIR; + nf_ct_zone_init(&ct_zone, + opts->ct_zone_id, opts->ct_zone_dir, 0); + } else { + ct_zone = nf_ct_zone_dflt; + } + + hash = nf_conntrack_find_get(net, &ct_zone, &tuple); if (opts->netns_id >= 0) put_net(net); if (!hash) @@ -245,7 +287,7 @@ __bpf_kfunc_start_defs(); * @opts - Additional options for allocation (documented above) * Cannot be NULL * @opts__sz - Length of the bpf_ct_opts structure - * Must be NF_BPF_CT_OPTS_SZ (12) + * Must be NF_BPF_CT_OPTS_SZ (16) or 12 */ __bpf_kfunc struct nf_conn___init * bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple, @@ -279,7 +321,7 @@ bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple, * @opts - Additional options for lookup (documented above) * Cannot be NULL * @opts__sz - Length of the bpf_ct_opts structure - * Must be NF_BPF_CT_OPTS_SZ (12) + * Must be NF_BPF_CT_OPTS_SZ (16) or 12 */ __bpf_kfunc struct nf_conn * bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple, @@ -312,7 +354,7 @@ bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple, * @opts - Additional options for allocation (documented above) * Cannot be NULL * @opts__sz - Length of the bpf_ct_opts structure - * Must be NF_BPF_CT_OPTS_SZ (12) + * Must be NF_BPF_CT_OPTS_SZ (16) or 12 */ __bpf_kfunc struct nf_conn___init * bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple, @@ -347,7 +389,7 @@ bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple, * @opts - Additional options for lookup (documented above) * Cannot be NULL * @opts__sz - Length of the bpf_ct_opts structure - * Must be NF_BPF_CT_OPTS_SZ (12) + * Must be NF_BPF_CT_OPTS_SZ (16) or 12 */ __bpf_kfunc struct nf_conn * bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple, -- cgit v1.2.3-58-ga151 From 96f887a612e4cda89efc3f54bc10c1997e3ab0e9 Mon Sep 17 00:00:00 2001 From: Hagar Hemdan Date: Sat, 18 May 2024 13:04:39 +0000 Subject: net: esp: cleanup esp_output_tail_tcp() in case of unsupported ESPINTCP xmit() functions should consume skb or return error codes in error paths. When the configuration "CONFIG_INET_ESPINTCP" is not set, the implementation of the function "esp_output_tail_tcp" violates this rule. The function frees the skb and returns the error code. This change removes the kfree_skb from both functions, for both esp4 and esp6. WARN_ON is added because esp_output_tail_tcp() should never be called if CONFIG_INET_ESPINTCP is not set. This bug was discovered and resolved using Coverity Static Analysis Security Testing (SAST) by Synopsys, Inc. Fixes: e27cca96cd68 ("xfrm: add espintcp (RFC 8229)") Signed-off-by: Hagar Hemdan Signed-off-by: Steffen Klassert --- net/ipv4/esp4.c | 3 +-- net/ipv6/esp6.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 3968d3f98e08..619a4df7be1e 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -239,8 +239,7 @@ static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb) #else static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb) { - kfree_skb(skb); - + WARN_ON(1); return -EOPNOTSUPP; } #endif diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 34a9a5b9ed00..3920e8aa1031 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -256,8 +256,7 @@ static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb) #else static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb) { - kfree_skb(skb); - + WARN_ON(1); return -EOPNOTSUPP; } #endif -- cgit v1.2.3-58-ga151 From 23daf1b4c91db9b26f8425cc7039cf96d22ccbfe Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 15 May 2024 14:16:00 +0200 Subject: wifi: nl80211: disallow setting special AP channel widths Setting the AP channel width is meant for use with the normal 20/40/... MHz channel width progression, and switching around in S1G or narrow channels isn't supported. Disallow that. Reported-by: syzbot+bc0f5b92cc7091f45fb6@syzkaller.appspotmail.com Link: https://msgid.link/20240515141600.d4a9590bfe32.I19a32d60097e81b527eafe6b0924f6c5fbb2dc45@changeid Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 3c0bca4238d3..93c313149f57 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3415,6 +3415,33 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, if (chandef.chan != cur_chan) return -EBUSY; + /* only allow this for regular channel widths */ + switch (wdev->links[link_id].ap.chandef.width) { + case NL80211_CHAN_WIDTH_20_NOHT: + case NL80211_CHAN_WIDTH_20: + case NL80211_CHAN_WIDTH_40: + case NL80211_CHAN_WIDTH_80: + case NL80211_CHAN_WIDTH_80P80: + case NL80211_CHAN_WIDTH_160: + case NL80211_CHAN_WIDTH_320: + break; + default: + return -EINVAL; + } + + switch (chandef.width) { + case NL80211_CHAN_WIDTH_20_NOHT: + case NL80211_CHAN_WIDTH_20: + case NL80211_CHAN_WIDTH_40: + case NL80211_CHAN_WIDTH_80: + case NL80211_CHAN_WIDTH_80P80: + case NL80211_CHAN_WIDTH_160: + case NL80211_CHAN_WIDTH_320: + break; + default: + return -EINVAL; + } + result = rdev_set_ap_chanwidth(rdev, dev, link_id, &chandef); if (result) -- cgit v1.2.3-58-ga151 From 2449db1f2186e6cd8410631d0ee9e951289fe5ae Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 6 May 2024 21:00:02 +0200 Subject: wifi: cfg80211: sort trace events again They were meant to be split into ops and APIs, but some ops were added in the wrong place. Fix that. Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240506210002.0b3a86a5d8d7.I5591c03223bdb95597e181de63a2eded424de34c@changeid Signed-off-by: Johannes Berg --- net/wireless/trace.h | 138 +++++++++++++++++++++++++-------------------------- 1 file changed, 69 insertions(+), 69 deletions(-) (limited to 'net') diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 9bf987519811..b76e3b21051a 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2889,6 +2889,75 @@ DEFINE_EVENT(wiphy_wdev_link_evt, rdev_del_intf_link, TP_ARGS(wiphy, wdev, link_id) ); +TRACE_EVENT(rdev_del_link_station, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct link_station_del_parameters *params), + TP_ARGS(wiphy, netdev, params), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + __array(u8, mld_mac, 6) + __field(u32, link_id) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + memset(__entry->mld_mac, 0, 6); + if (params->mld_mac) + memcpy(__entry->mld_mac, params->mld_mac, 6); + __entry->link_id = params->link_id; + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: %pM" + ", link id: %u", + WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->mld_mac, + __entry->link_id) +); + +TRACE_EVENT(rdev_set_hw_timestamp, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_set_hw_timestamp *hwts), + + TP_ARGS(wiphy, netdev, hwts), + + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + MAC_ENTRY(macaddr) + __field(bool, enable) + ), + + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + MAC_ASSIGN(macaddr, hwts->macaddr); + __entry->enable = hwts->enable; + ), + + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", mac %pM, enable: %u", + WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->macaddr, + __entry->enable) +); + +TRACE_EVENT(rdev_set_ttlm, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_ttlm_params *params), + TP_ARGS(wiphy, netdev, params), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + __array(u8, dlink, sizeof(u16) * 8) + __array(u8, ulink, sizeof(u16) * 8) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + memcpy(__entry->dlink, params->dlink, sizeof(params->dlink)); + memcpy(__entry->ulink, params->ulink, sizeof(params->ulink)); + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT, + WIPHY_PR_ARG, NETDEV_PR_ARG) +); + /************************************************************* * cfg80211 exported functions traces * *************************************************************/ @@ -3923,55 +3992,6 @@ DEFINE_EVENT(link_station_add_mod, rdev_mod_link_station, TP_ARGS(wiphy, netdev, params) ); -TRACE_EVENT(rdev_del_link_station, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, - struct link_station_del_parameters *params), - TP_ARGS(wiphy, netdev, params), - TP_STRUCT__entry( - WIPHY_ENTRY - NETDEV_ENTRY - __array(u8, mld_mac, 6) - __field(u32, link_id) - ), - TP_fast_assign( - WIPHY_ASSIGN; - NETDEV_ASSIGN; - memset(__entry->mld_mac, 0, 6); - if (params->mld_mac) - memcpy(__entry->mld_mac, params->mld_mac, 6); - __entry->link_id = params->link_id; - ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: %pM" - ", link id: %u", - WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->mld_mac, - __entry->link_id) -); - -TRACE_EVENT(rdev_set_hw_timestamp, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, - struct cfg80211_set_hw_timestamp *hwts), - - TP_ARGS(wiphy, netdev, hwts), - - TP_STRUCT__entry( - WIPHY_ENTRY - NETDEV_ENTRY - MAC_ENTRY(macaddr) - __field(bool, enable) - ), - - TP_fast_assign( - WIPHY_ASSIGN; - NETDEV_ASSIGN; - MAC_ASSIGN(macaddr, hwts->macaddr); - __entry->enable = hwts->enable; - ), - - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", mac %pM, enable: %u", - WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->macaddr, - __entry->enable) -); - TRACE_EVENT(cfg80211_links_removed, TP_PROTO(struct net_device *netdev, u16 link_mask), TP_ARGS(netdev, link_mask), @@ -3987,26 +4007,6 @@ TRACE_EVENT(cfg80211_links_removed, __entry->link_mask) ); -TRACE_EVENT(rdev_set_ttlm, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, - struct cfg80211_ttlm_params *params), - TP_ARGS(wiphy, netdev, params), - TP_STRUCT__entry( - WIPHY_ENTRY - NETDEV_ENTRY - __array(u8, dlink, sizeof(u16) * 8) - __array(u8, ulink, sizeof(u16) * 8) - ), - TP_fast_assign( - WIPHY_ASSIGN; - NETDEV_ASSIGN; - memcpy(__entry->dlink, params->dlink, sizeof(params->dlink)); - memcpy(__entry->ulink, params->ulink, sizeof(params->ulink)); - ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT, - WIPHY_PR_ARG, NETDEV_PR_ARG) -); - #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH -- cgit v1.2.3-58-ga151 From eb745c7c854e200d7fd6ba0944691a1aaf882d4d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 6 May 2024 21:00:03 +0200 Subject: wifi: cfg80211: add tracing for wiphy work Add trace events to trace when wiphy works are queued (or delayed ones scheduled), and other APIs are called. Also add an event when the worker starts, before acquiring the mutex, to be able to see potential delays due to locking. Reviewed-by: Miriam Rachel Korenblit Reviewed-by: Benjamin Berg Link: https://msgid.link/20240506210002.bf1840a1d22d.I4abba048c1c4017345640219cf1384a0b2288dd3@changeid Signed-off-by: Johannes Berg --- net/wireless/core.c | 12 +++++++++ net/wireless/trace.h | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index 3fb1b637352a..61f7cd8a8e9c 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -421,6 +421,8 @@ static void cfg80211_wiphy_work(struct work_struct *work) rdev = container_of(work, struct cfg80211_registered_device, wiphy_work); + trace_wiphy_work_worker_start(&rdev->wiphy); + wiphy_lock(&rdev->wiphy); if (rdev->suspended) goto out; @@ -434,6 +436,7 @@ static void cfg80211_wiphy_work(struct work_struct *work) schedule_work(work); spin_unlock_irq(&rdev->wiphy_work_lock); + trace_wiphy_work_run(&rdev->wiphy, wk); wk->func(&rdev->wiphy, wk); } else { spin_unlock_irq(&rdev->wiphy_work_lock); @@ -1066,6 +1069,7 @@ void cfg80211_process_wiphy_works(struct cfg80211_registered_device *rdev, list_del_init(&wk->entry); spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags); + trace_wiphy_work_run(&rdev->wiphy, wk); wk->func(&rdev->wiphy, wk); spin_lock_irqsave(&rdev->wiphy_work_lock, flags); @@ -1610,6 +1614,8 @@ void wiphy_work_queue(struct wiphy *wiphy, struct wiphy_work *work) struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); unsigned long flags; + trace_wiphy_work_queue(wiphy, work); + spin_lock_irqsave(&rdev->wiphy_work_lock, flags); if (list_empty(&work->entry)) list_add_tail(&work->entry, &rdev->wiphy_work_list); @@ -1626,6 +1632,8 @@ void wiphy_work_cancel(struct wiphy *wiphy, struct wiphy_work *work) lockdep_assert_held(&wiphy->mtx); + trace_wiphy_work_cancel(wiphy, work); + spin_lock_irqsave(&rdev->wiphy_work_lock, flags); if (!list_empty(&work->entry)) list_del_init(&work->entry); @@ -1639,6 +1647,8 @@ void wiphy_work_flush(struct wiphy *wiphy, struct wiphy_work *work) unsigned long flags; bool run; + trace_wiphy_work_flush(wiphy, work); + spin_lock_irqsave(&rdev->wiphy_work_lock, flags); run = !work || !list_empty(&work->entry); spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags); @@ -1660,6 +1670,8 @@ void wiphy_delayed_work_queue(struct wiphy *wiphy, struct wiphy_delayed_work *dwork, unsigned long delay) { + trace_wiphy_delayed_work_queue(wiphy, &dwork->work, delay); + if (!delay) { del_timer(&dwork->timer); wiphy_work_queue(wiphy, &dwork->work); diff --git a/net/wireless/trace.h b/net/wireless/trace.h index b76e3b21051a..14cfa0aba93a 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -242,6 +242,80 @@ } \ } while (0) +/************************************************************* + * wiphy work traces * + *************************************************************/ + +DECLARE_EVENT_CLASS(wiphy_work_event, + TP_PROTO(struct wiphy *wiphy, struct wiphy_work *work), + TP_ARGS(wiphy, work), + TP_STRUCT__entry( + WIPHY_ENTRY + __field(void *, instance) + __field(void *, func) + ), + TP_fast_assign( + WIPHY_ASSIGN; + __entry->instance = work; + __entry->func = work ? work->func : NULL; + ), + TP_printk(WIPHY_PR_FMT " instance=%p func=%pS", + WIPHY_PR_ARG, __entry->instance, __entry->func) +); + +DEFINE_EVENT(wiphy_work_event, wiphy_work_queue, + TP_PROTO(struct wiphy *wiphy, struct wiphy_work *work), + TP_ARGS(wiphy, work) +); + +DEFINE_EVENT(wiphy_work_event, wiphy_work_run, + TP_PROTO(struct wiphy *wiphy, struct wiphy_work *work), + TP_ARGS(wiphy, work) +); + +DEFINE_EVENT(wiphy_work_event, wiphy_work_cancel, + TP_PROTO(struct wiphy *wiphy, struct wiphy_work *work), + TP_ARGS(wiphy, work) +); + +DEFINE_EVENT(wiphy_work_event, wiphy_work_flush, + TP_PROTO(struct wiphy *wiphy, struct wiphy_work *work), + TP_ARGS(wiphy, work) +); + +TRACE_EVENT(wiphy_delayed_work_queue, + TP_PROTO(struct wiphy *wiphy, struct wiphy_work *work, + unsigned long delay), + TP_ARGS(wiphy, work, delay), + TP_STRUCT__entry( + WIPHY_ENTRY + __field(void *, instance) + __field(void *, func) + __field(unsigned long, delay) + ), + TP_fast_assign( + WIPHY_ASSIGN; + __entry->instance = work; + __entry->func = work->func; + __entry->delay = delay; + ), + TP_printk(WIPHY_PR_FMT " instance=%p func=%pS delay=%ld", + WIPHY_PR_ARG, __entry->instance, __entry->func, + __entry->delay) +); + +TRACE_EVENT(wiphy_work_worker_start, + TP_PROTO(struct wiphy *wiphy), + TP_ARGS(wiphy), + TP_STRUCT__entry( + WIPHY_ENTRY + ), + TP_fast_assign( + WIPHY_ASSIGN; + ), + TP_printk(WIPHY_PR_FMT, WIPHY_PR_ARG) +); + /************************************************************* * rdev->ops traces * *************************************************************/ -- cgit v1.2.3-58-ga151 From 7aa5c8b4f9de463abc0407da38c9ffccdbc757d1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 6 May 2024 21:14:22 +0200 Subject: wifi: mac80211: remove outdated comments These comments are no longer correct, it's a wiphy work now so it will go away immediately when canceled. Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240506211422.68bc10efbd8a.If80f43f4c8b9db1f5266f70d93a805f8c7463fe2@changeid Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 4 ---- net/mac80211/util.c | 4 ---- 2 files changed, 8 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index b08e5d7687e3..88af2b8037cb 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3482,10 +3482,6 @@ static void ieee80211_end_cac(struct wiphy *wiphy, lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry(sdata, &local->interfaces, list) { - /* it might be waiting for the local->mtx, but then - * by the time it gets it, sdata->wdev.cac_started - * will no longer be true - */ wiphy_delayed_work_cancel(wiphy, &sdata->deflink.dfs_cac_timer_work); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 0b893e958959..6139c930b572 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -3456,10 +3456,6 @@ void ieee80211_dfs_cac_cancel(struct ieee80211_local *local) lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry(sdata, &local->interfaces, list) { - /* it might be waiting for the local->mtx, but then - * by the time it gets it, sdata->wdev.cac_started - * will no longer be true - */ wiphy_delayed_work_cancel(local->hw.wiphy, &sdata->deflink.dfs_cac_timer_work); -- cgit v1.2.3-58-ga151 From 5c24e83f6895a01a8d79a79d3ec567bd0ec13b56 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 6 May 2024 21:19:34 +0200 Subject: wifi: mac80211: remove extra link STA functions There's no need to have a lockdep assert and then call another function, just move everything into one place. Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240506211934.9759564a25f4.I88d43aa459d15c1d6230152e76b7757c2cdd6085@changeid Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 49 +++++++++++++------------------------------------ 1 file changed, 13 insertions(+), 36 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 88af2b8037cb..3673c7f6b21a 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -4974,13 +4974,17 @@ static void ieee80211_del_intf_link(struct wiphy *wiphy, ieee80211_vif_set_links(sdata, wdev->valid_links, 0); } -static int sta_add_link_station(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, - struct link_station_parameters *params) +static int +ieee80211_add_link_station(struct wiphy *wiphy, struct net_device *dev, + struct link_station_parameters *params) { + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = wiphy_priv(wiphy); struct sta_info *sta; int ret; + lockdep_assert_wiphy(local->hw.wiphy); + sta = sta_info_get_bss(sdata, params->mld_mac); if (!sta) return -ENOENT; @@ -5006,23 +5010,15 @@ static int sta_add_link_station(struct ieee80211_local *local, } static int -ieee80211_add_link_station(struct wiphy *wiphy, struct net_device *dev, +ieee80211_mod_link_station(struct wiphy *wiphy, struct net_device *dev, struct link_station_parameters *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wiphy_priv(wiphy); - - lockdep_assert_wiphy(sdata->local->hw.wiphy); - - return sta_add_link_station(local, sdata, params); -} - -static int sta_mod_link_station(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, - struct link_station_parameters *params) -{ struct sta_info *sta; + lockdep_assert_wiphy(local->hw.wiphy); + sta = sta_info_get_bss(sdata, params->mld_mac); if (!sta) return -ENOENT; @@ -5034,22 +5030,14 @@ static int sta_mod_link_station(struct ieee80211_local *local, } static int -ieee80211_mod_link_station(struct wiphy *wiphy, struct net_device *dev, - struct link_station_parameters *params) +ieee80211_del_link_station(struct wiphy *wiphy, struct net_device *dev, + struct link_station_del_parameters *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_local *local = wiphy_priv(wiphy); + struct sta_info *sta; lockdep_assert_wiphy(sdata->local->hw.wiphy); - return sta_mod_link_station(local, sdata, params); -} - -static int sta_del_link_station(struct ieee80211_sub_if_data *sdata, - struct link_station_del_parameters *params) -{ - struct sta_info *sta; - sta = sta_info_get_bss(sdata, params->mld_mac); if (!sta) return -ENOENT; @@ -5066,17 +5054,6 @@ static int sta_del_link_station(struct ieee80211_sub_if_data *sdata, return 0; } -static int -ieee80211_del_link_station(struct wiphy *wiphy, struct net_device *dev, - struct link_station_del_parameters *params) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - - lockdep_assert_wiphy(sdata->local->hw.wiphy); - - return sta_del_link_station(sdata, params); -} - static int ieee80211_set_hw_timestamp(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_set_hw_timestamp *hwts) -- cgit v1.2.3-58-ga151 From e3bae9b22873338d1a7989a2568c5aca2c44bde7 Mon Sep 17 00:00:00 2001 From: Mukesh Sisodiya Date: Mon, 6 May 2024 21:37:53 +0200 Subject: wifi: mac80211: update 6 GHz AP power type before association 6 GHz AP power type details are required to set proper tx power used to send frames. Update AP power type received in beacon while preparing for connection instead of after association so the frames before association can use the correct tx power. Reviewed-by: Miriam Rachel Korenblit Reviewed-by: Johannes Berg Signed-off-by: Mukesh Sisodiya Link: https://msgid.link/20240506214536.310434f55f76.I6aca291ee06265e3f63e0f9024ba19a850b53a33@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 55 ++++++++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index a5f2d3cfe60d..e410a43dc681 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -4445,40 +4445,12 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, if (elems->he_operation && link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HE && elems->he_cap) { - const struct ieee80211_he_6ghz_oper *he_6ghz_oper; - ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband, elems->he_cap, elems->he_cap_len, elems->he_6ghz_capa, link_sta); - he_6ghz_oper = ieee80211_he_6ghz_oper(elems->he_operation); - - if (is_6ghz && he_6ghz_oper) { - switch (u8_get_bits(he_6ghz_oper->control, - IEEE80211_HE_6GHZ_OPER_CTRL_REG_INFO)) { - case IEEE80211_6GHZ_CTRL_REG_LPI_AP: - case IEEE80211_6GHZ_CTRL_REG_INDOOR_LPI_AP: - bss_conf->power_type = IEEE80211_REG_LPI_AP; - break; - case IEEE80211_6GHZ_CTRL_REG_SP_AP: - case IEEE80211_6GHZ_CTRL_REG_INDOOR_SP_AP: - bss_conf->power_type = IEEE80211_REG_SP_AP; - break; - case IEEE80211_6GHZ_CTRL_REG_VLP_AP: - bss_conf->power_type = IEEE80211_REG_VLP_AP; - break; - default: - bss_conf->power_type = IEEE80211_REG_UNSET_AP; - break; - } - } else if (is_6ghz) { - link_info(link, - "HE 6 GHz operation missing (on %d MHz), expect issues\n", - bss_conf->chanreq.oper.chan->center_freq); - } - bss_conf->he_support = link_sta->pub->he_cap.has_he; if (elems->rsnx && elems->rsnx_len && (elems->rsnx[0] & WLAN_RSNX_CAPA_PROTECTED_TWT) && @@ -5020,6 +4992,23 @@ ieee80211_determine_our_sta_mode_assoc(struct ieee80211_sub_if_data *sdata, conn->bw_limit, tmp.bw_limit); } +static enum ieee80211_ap_reg_power +ieee80211_ap_power_type(u8 control) +{ + switch (u8_get_bits(control, IEEE80211_HE_6GHZ_OPER_CTRL_REG_INFO)) { + case IEEE80211_6GHZ_CTRL_REG_LPI_AP: + case IEEE80211_6GHZ_CTRL_REG_INDOOR_LPI_AP: + return IEEE80211_REG_LPI_AP; + case IEEE80211_6GHZ_CTRL_REG_SP_AP: + case IEEE80211_6GHZ_CTRL_REG_INDOOR_SP_AP: + return IEEE80211_REG_SP_AP; + case IEEE80211_6GHZ_CTRL_REG_VLP_AP: + return IEEE80211_REG_VLP_AP; + default: + return IEEE80211_REG_UNSET_AP; + } +} + static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, struct ieee80211_link_data *link, int link_id, @@ -5052,6 +5041,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, } if (link && is_6ghz && conn->mode >= IEEE80211_CONN_MODE_HE) { + const struct ieee80211_he_6ghz_oper *he_6ghz_oper; struct ieee80211_bss_conf *bss_conf; u8 j = 0; @@ -5072,6 +5062,15 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, elems->tx_pwr_env_len[i]); j++; } + + he_6ghz_oper = ieee80211_he_6ghz_oper(elems->he_operation); + if (he_6ghz_oper) + bss_conf->power_type = + ieee80211_ap_power_type(he_6ghz_oper->control); + else + link_info(link, + "HE 6 GHz operation missing (on %d MHz), expect issues\n", + cbss->channel->center_freq); } rcu_read_unlock(); /* the element data was RCU protected so no longer valid anyway */ -- cgit v1.2.3-58-ga151 From 39dc8b8ea387ce7f4fe2d2d6d550ed52aa9aa040 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 6 May 2024 21:37:56 +0200 Subject: wifi: mac80211: pass parsed TPE data to drivers Instead of passing the full TPE elements, in all their glory and mixed up data formats for HE backward compatibility, parse them fully into the right values, and pass that to the drivers. Also introduce proper validation already in mac80211, so that drivers don't need to do it, and parse the EHT portions. The code now passes the values in the right order according to the channel used by an interface, which could also be a subset of the data advertised by the AP, if we couldn't connect with the full bandwidth (for whatever reason.) Also add kunit tests for the more complicated bits of it. Reviewed-by: Miriam Rachel Korenblit Acked-by: Kalle Valo Link: https://msgid.link/20240506214536.2aa839969b60.I265b28209e0b29772b2f125f7f83de44a4da877b@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath11k/mac.c | 143 ++++------------- include/linux/ieee80211.h | 102 ++++++++++-- include/net/mac80211.h | 41 ++++- net/mac80211/ieee80211_i.h | 13 +- net/mac80211/mlme.c | 230 +++++++++++++++++++++++---- net/mac80211/parse.c | 92 +++++++++-- net/mac80211/tests/Makefile | 2 +- net/mac80211/tests/tpe.c | 284 ++++++++++++++++++++++++++++++++++ net/mac80211/util.c | 25 +++ 9 files changed, 761 insertions(+), 171 deletions(-) create mode 100644 net/mac80211/tests/tpe.c (limited to 'net') diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 73876760afe9..d4225bf30e51 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -7507,32 +7507,6 @@ static int ath11k_mac_stop_vdev_early(struct ieee80211_hw *hw, return 0; } -static u8 ath11k_mac_get_tpe_count(u8 txpwr_intrprt, u8 txpwr_cnt) -{ - switch (txpwr_intrprt) { - /* Refer "Table 9-276-Meaning of Maximum Transmit Power Count subfield - * if the Maximum Transmit Power Interpretation subfield is 0 or 2" of - * "IEEE Std 802.11ax 2021". - */ - case IEEE80211_TPE_LOCAL_EIRP: - case IEEE80211_TPE_REG_CLIENT_EIRP: - txpwr_cnt = txpwr_cnt <= 3 ? txpwr_cnt : 3; - txpwr_cnt = txpwr_cnt + 1; - break; - /* Refer "Table 9-277-Meaning of Maximum Transmit Power Count subfield - * if Maximum Transmit Power Interpretation subfield is 1 or 3" of - * "IEEE Std 802.11ax 2021". - */ - case IEEE80211_TPE_LOCAL_EIRP_PSD: - case IEEE80211_TPE_REG_CLIENT_EIRP_PSD: - txpwr_cnt = txpwr_cnt <= 4 ? txpwr_cnt : 4; - txpwr_cnt = txpwr_cnt ? (BIT(txpwr_cnt - 1)) : 1; - break; - } - - return txpwr_cnt; -} - static u8 ath11k_mac_get_num_pwr_levels(struct cfg80211_chan_def *chan_def) { if (chan_def->chan->flags & IEEE80211_CHAN_PSD) { @@ -7859,33 +7833,23 @@ static void ath11k_mac_parse_tx_pwr_env(struct ath11k *ar, struct ath11k_base *ab = ar->ab; struct ath11k_vif *arvif = ath11k_vif_to_arvif(vif); struct ieee80211_bss_conf *bss_conf = &vif->bss_conf; - struct ieee80211_tx_pwr_env *single_tpe; + struct ieee80211_parsed_tpe_eirp *non_psd = NULL; + struct ieee80211_parsed_tpe_psd *psd = NULL; enum wmi_reg_6ghz_client_type client_type; struct cur_regulatory_info *reg_info; + u8 local_tpe_count, reg_tpe_count; + bool use_local_tpe; int i; - u8 pwr_count, pwr_interpret, pwr_category; - u8 psd_index = 0, non_psd_index = 0, local_tpe_count = 0, reg_tpe_count = 0; - bool use_local_tpe, non_psd_set = false, psd_set = false; reg_info = &ab->reg_info_store[ar->pdev_idx]; client_type = reg_info->client_type; - for (i = 0; i < bss_conf->tx_pwr_env_num; i++) { - single_tpe = &bss_conf->tx_pwr_env[i]; - pwr_category = u8_get_bits(single_tpe->tx_power_info, - IEEE80211_TX_PWR_ENV_INFO_CATEGORY); - pwr_interpret = u8_get_bits(single_tpe->tx_power_info, - IEEE80211_TX_PWR_ENV_INFO_INTERPRET); - - if (pwr_category == client_type) { - if (pwr_interpret == IEEE80211_TPE_LOCAL_EIRP || - pwr_interpret == IEEE80211_TPE_LOCAL_EIRP_PSD) - local_tpe_count++; - else if (pwr_interpret == IEEE80211_TPE_REG_CLIENT_EIRP || - pwr_interpret == IEEE80211_TPE_REG_CLIENT_EIRP_PSD) - reg_tpe_count++; - } - } + local_tpe_count = + bss_conf->tpe.max_local[client_type].valid + + bss_conf->tpe.psd_local[client_type].valid; + reg_tpe_count = + bss_conf->tpe.max_reg_client[client_type].valid + + bss_conf->tpe.psd_reg_client[client_type].valid; if (!reg_tpe_count && !local_tpe_count) { ath11k_warn(ab, @@ -7898,83 +7862,44 @@ static void ath11k_mac_parse_tx_pwr_env(struct ath11k *ar, use_local_tpe = false; } - for (i = 0; i < bss_conf->tx_pwr_env_num; i++) { - single_tpe = &bss_conf->tx_pwr_env[i]; - pwr_category = u8_get_bits(single_tpe->tx_power_info, - IEEE80211_TX_PWR_ENV_INFO_CATEGORY); - pwr_interpret = u8_get_bits(single_tpe->tx_power_info, - IEEE80211_TX_PWR_ENV_INFO_INTERPRET); - - if (pwr_category != client_type) - continue; - - /* get local transmit power envelope */ - if (use_local_tpe) { - if (pwr_interpret == IEEE80211_TPE_LOCAL_EIRP) { - non_psd_index = i; - non_psd_set = true; - } else if (pwr_interpret == IEEE80211_TPE_LOCAL_EIRP_PSD) { - psd_index = i; - psd_set = true; - } - /* get regulatory transmit power envelope */ - } else { - if (pwr_interpret == IEEE80211_TPE_REG_CLIENT_EIRP) { - non_psd_index = i; - non_psd_set = true; - } else if (pwr_interpret == IEEE80211_TPE_REG_CLIENT_EIRP_PSD) { - psd_index = i; - psd_set = true; - } - } + if (use_local_tpe) { + psd = &bss_conf->tpe.psd_local[client_type]; + if (!psd->valid) + psd = NULL; + non_psd = &bss_conf->tpe.max_local[client_type]; + if (!non_psd->valid) + non_psd = NULL; + } else { + psd = &bss_conf->tpe.psd_reg_client[client_type]; + if (!psd->valid) + psd = NULL; + non_psd = &bss_conf->tpe.max_reg_client[client_type]; + if (!non_psd->valid) + non_psd = NULL; } - if (non_psd_set && !psd_set) { - single_tpe = &bss_conf->tx_pwr_env[non_psd_index]; - pwr_count = u8_get_bits(single_tpe->tx_power_info, - IEEE80211_TX_PWR_ENV_INFO_COUNT); - pwr_interpret = u8_get_bits(single_tpe->tx_power_info, - IEEE80211_TX_PWR_ENV_INFO_INTERPRET); + if (non_psd && !psd) { arvif->reg_tpc_info.is_psd_power = false; arvif->reg_tpc_info.eirp_power = 0; - arvif->reg_tpc_info.num_pwr_levels = - ath11k_mac_get_tpe_count(pwr_interpret, pwr_count); + arvif->reg_tpc_info.num_pwr_levels = non_psd->count; for (i = 0; i < arvif->reg_tpc_info.num_pwr_levels; i++) { ath11k_dbg(ab, ATH11K_DBG_MAC, "non PSD power[%d] : %d\n", - i, single_tpe->tx_power[i]); - arvif->reg_tpc_info.tpe[i] = single_tpe->tx_power[i] / 2; + i, non_psd->power[i]); + arvif->reg_tpc_info.tpe[i] = non_psd->power[i] / 2; } } - if (psd_set) { - single_tpe = &bss_conf->tx_pwr_env[psd_index]; - pwr_count = u8_get_bits(single_tpe->tx_power_info, - IEEE80211_TX_PWR_ENV_INFO_COUNT); - pwr_interpret = u8_get_bits(single_tpe->tx_power_info, - IEEE80211_TX_PWR_ENV_INFO_INTERPRET); - arvif->reg_tpc_info.is_psd_power = true; + if (psd) { + arvif->reg_tpc_info.num_pwr_levels = psd->count; - if (pwr_count == 0) { + for (i = 0; i < arvif->reg_tpc_info.num_pwr_levels; i++) { ath11k_dbg(ab, ATH11K_DBG_MAC, - "TPE PSD power : %d\n", single_tpe->tx_power[0]); - arvif->reg_tpc_info.num_pwr_levels = - ath11k_mac_get_num_pwr_levels(&ctx->def); - - for (i = 0; i < arvif->reg_tpc_info.num_pwr_levels; i++) - arvif->reg_tpc_info.tpe[i] = single_tpe->tx_power[0] / 2; - } else { - arvif->reg_tpc_info.num_pwr_levels = - ath11k_mac_get_tpe_count(pwr_interpret, pwr_count); - - for (i = 0; i < arvif->reg_tpc_info.num_pwr_levels; i++) { - ath11k_dbg(ab, ATH11K_DBG_MAC, - "TPE PSD power[%d] : %d\n", - i, single_tpe->tx_power[i]); - arvif->reg_tpc_info.tpe[i] = single_tpe->tx_power[i] / 2; - } + "TPE PSD power[%d] : %d\n", + i, psd->power[i]); + arvif->reg_tpc_info.tpe[i] = psd->power[i] / 2; } } } diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 1c3a683a3ee2..769008a51809 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2825,17 +2825,6 @@ struct ieee80211_he_6ghz_oper { u8 minrate; } __packed; -/* - * In "9.4.2.161 Transmit Power Envelope element" of "IEEE Std 802.11ax-2021", - * it show four types in "Table 9-275a-Maximum Transmit Power Interpretation - * subfield encoding", and two category for each type in "Table E-12-Regulatory - * Info subfield encoding in the United States". - * So it it totally max 8 Transmit Power Envelope element. - */ -#define IEEE80211_TPE_MAX_IE_COUNT 8 - -#define IEEE80211_TPE_MAX_POWER_COUNT 8 - /* transmit power interpretation type of transmit power envelope element */ enum ieee80211_tx_power_intrpt_type { IEEE80211_TPE_LOCAL_EIRP, @@ -2844,24 +2833,107 @@ enum ieee80211_tx_power_intrpt_type { IEEE80211_TPE_REG_CLIENT_EIRP_PSD, }; +/* category type of transmit power envelope element */ +enum ieee80211_tx_power_category_6ghz { + IEEE80211_TPE_CAT_6GHZ_DEFAULT = 0, + IEEE80211_TPE_CAT_6GHZ_SUBORDINATE = 1, +}; + +/* + * For IEEE80211_TPE_LOCAL_EIRP / IEEE80211_TPE_REG_CLIENT_EIRP, + * setting to 63.5 dBm means no constraint. + */ +#define IEEE80211_TPE_MAX_TX_PWR_NO_CONSTRAINT 127 + +/* + * For IEEE80211_TPE_LOCAL_EIRP_PSD / IEEE80211_TPE_REG_CLIENT_EIRP_PSD, + * setting to 127 indicates no PSD limit for the 20 MHz channel. + */ +#define IEEE80211_TPE_PSD_NO_LIMIT 127 + /** * struct ieee80211_tx_pwr_env - Transmit Power Envelope - * @tx_power_info: Transmit Power Information field - * @tx_power: Maximum Transmit Power field + * @info: Transmit Power Information field + * @variable: Maximum Transmit Power field * * This structure represents the payload of the "Transmit Power * Envelope element" as described in IEEE Std 802.11ax-2021 section * 9.4.2.161 */ struct ieee80211_tx_pwr_env { - u8 tx_power_info; - s8 tx_power[IEEE80211_TPE_MAX_POWER_COUNT]; + u8 info; + u8 variable[]; } __packed; #define IEEE80211_TX_PWR_ENV_INFO_COUNT 0x7 #define IEEE80211_TX_PWR_ENV_INFO_INTERPRET 0x38 #define IEEE80211_TX_PWR_ENV_INFO_CATEGORY 0xC0 +#define IEEE80211_TX_PWR_ENV_EXT_COUNT 0xF + +static inline bool ieee80211_valid_tpe_element(const u8 *data, u8 len) +{ + const struct ieee80211_tx_pwr_env *env = (const void *)data; + u8 count, interpret, category; + u8 needed = sizeof(*env); + u8 N; /* also called N in the spec */ + + if (len < needed) + return false; + + count = u8_get_bits(env->info, IEEE80211_TX_PWR_ENV_INFO_COUNT); + interpret = u8_get_bits(env->info, IEEE80211_TX_PWR_ENV_INFO_INTERPRET); + category = u8_get_bits(env->info, IEEE80211_TX_PWR_ENV_INFO_CATEGORY); + + switch (category) { + case IEEE80211_TPE_CAT_6GHZ_DEFAULT: + case IEEE80211_TPE_CAT_6GHZ_SUBORDINATE: + break; + default: + return false; + } + + switch (interpret) { + case IEEE80211_TPE_LOCAL_EIRP: + case IEEE80211_TPE_REG_CLIENT_EIRP: + if (count > 3) + return false; + + /* count == 0 encodes 1 value for 20 MHz, etc. */ + needed += count + 1; + + if (len < needed) + return false; + + /* there can be extension fields not accounted for in 'count' */ + + return true; + case IEEE80211_TPE_LOCAL_EIRP_PSD: + case IEEE80211_TPE_REG_CLIENT_EIRP_PSD: + if (count > 4) + return false; + + N = count ? 1 << (count - 1) : 1; + needed += N; + + if (len < needed) + return false; + + if (len > needed) { + u8 K = u8_get_bits(env->variable[N], + IEEE80211_TX_PWR_ENV_EXT_COUNT); + + needed += 1 + K; + if (len < needed) + return false; + } + + return true; + } + + return false; +} + /* * ieee80211_he_oper_size - calculate 802.11ax HE Operations IE size * @he_oper_ie: byte data of the He Operations IE, stating from the byte diff --git a/include/net/mac80211.h b/include/net/mac80211.h index cafc664ee531..a4efbfb8d796 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -550,6 +550,39 @@ struct ieee80211_fils_discovery { u32 max_interval; }; +#define IEEE80211_TPE_EIRP_ENTRIES_320MHZ 5 +struct ieee80211_parsed_tpe_eirp { + bool valid; + s8 power[IEEE80211_TPE_EIRP_ENTRIES_320MHZ]; + u8 count; +}; + +#define IEEE80211_TPE_PSD_ENTRIES_320MHZ 16 +struct ieee80211_parsed_tpe_psd { + bool valid; + s8 power[IEEE80211_TPE_PSD_ENTRIES_320MHZ]; + u8 count, n; +}; + +/** + * struct ieee80211_parsed_tpe - parsed transmit power envelope information + * @max_local: maximum local EIRP, one value for 20, 40, 80, 160, 320 MHz each + * (indexed by TX power category) + * @max_reg_client: maximum regulatory client EIRP, one value for 20, 40, 80, + * 160, 320 MHz each + * (indexed by TX power category) + * @psd_local: maximum local power spectral density, one value for each 20 MHz + * subchannel per bss_conf's chanreq.oper + * (indexed by TX power category) + * @psd_reg_client: maximum regulatory power spectral density, one value for + * each 20 MHz subchannel per bss_conf's chanreq.oper + * (indexed by TX power category) + */ +struct ieee80211_parsed_tpe { + struct ieee80211_parsed_tpe_eirp max_local[2], max_reg_client[2]; + struct ieee80211_parsed_tpe_psd psd_local[2], psd_reg_client[2]; +}; + /** * struct ieee80211_bss_conf - holds the BSS's changing parameters * @@ -662,8 +695,7 @@ struct ieee80211_fils_discovery { * @beacon_tx_rate: The configured beacon transmit rate that needs to be passed * to driver when rate control is offloaded to firmware. * @power_type: power type of BSS for 6 GHz - * @tx_pwr_env: transmit power envelope array of BSS. - * @tx_pwr_env_num: number of @tx_pwr_env. + * @tpe: transmit power envelope information * @pwr_reduction: power constraint of BSS. * @eht_support: does this BSS support EHT * @csa_active: marks whether a channel switch is going on. @@ -766,8 +798,9 @@ struct ieee80211_bss_conf { u32 unsol_bcast_probe_resp_interval; struct cfg80211_bitrate_mask beacon_tx_rate; enum ieee80211_ap_reg_power power_type; - struct ieee80211_tx_pwr_env tx_pwr_env[IEEE80211_TPE_MAX_IE_COUNT]; - u8 tx_pwr_env_num; + + struct ieee80211_parsed_tpe tpe; + u8 pwr_reduction; bool eht_support; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index eb62b7d4b4f7..7d541a2355f6 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -1708,7 +1709,6 @@ struct ieee802_11_elems { const struct ieee80211_he_spr *he_spr; const struct ieee80211_mu_edca_param_set *mu_edca_param_set; const struct ieee80211_he_6ghz_capa *he_6ghz_capa; - const struct ieee80211_tx_pwr_env *tx_pwr_env[IEEE80211_TPE_MAX_IE_COUNT]; const u8 *uora_element; const u8 *mesh_id; const u8 *peering; @@ -1746,6 +1746,9 @@ struct ieee802_11_elems { const struct ieee80211_bandwidth_indication *bandwidth_indication; const struct ieee80211_ttlm_elem *ttlm[IEEE80211_TTLM_MAX_CNT]; + /* not the order in the psd values is per element, not per chandef */ + struct ieee80211_parsed_tpe tpe; + /* length of them, respectively */ u8 ext_capab_len; u8 ssid_len; @@ -1764,8 +1767,6 @@ struct ieee802_11_elems { u8 perr_len; u8 country_elem_len; u8 bssid_index_len; - u8 tx_pwr_env_len[IEEE80211_TPE_MAX_IE_COUNT]; - u8 tx_pwr_env_num; u8 eht_cap_len; /* mult-link element can be de-fragmented and thus u8 is not sufficient */ @@ -2243,6 +2244,7 @@ int ieee80211_frame_duration(enum nl80211_band band, size_t len, void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata, struct ieee80211_tx_queue_params *qparam, int ac); +void ieee80211_clear_tpe(struct ieee80211_parsed_tpe *tpe); void ieee80211_set_wmm_default(struct ieee80211_link_data *link, bool bss_notify, bool enable_qos); void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, @@ -2681,6 +2683,11 @@ void ieee80211_remove_wbrf(struct ieee80211_local *local, struct cfg80211_chan_d #define VISIBLE_IF_MAC80211_KUNIT ieee80211_rx_result ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx); +int ieee80211_calc_chandef_subchan_offset(const struct cfg80211_chan_def *ap, + u8 n_partial_subchans); +void ieee80211_rearrange_tpe_psd(struct ieee80211_parsed_tpe_psd *psd, + const struct cfg80211_chan_def *ap, + const struct cfg80211_chan_def *used); #else #define EXPORT_SYMBOL_IF_MAC80211_KUNIT(sym) #define VISIBLE_IF_MAC80211_KUNIT static diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index e410a43dc681..ed9851faac05 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -606,11 +606,195 @@ static bool ieee80211_chandef_usable(struct ieee80211_sub_if_data *sdata, return true; } +static int ieee80211_chandef_num_subchans(const struct cfg80211_chan_def *c) +{ + if (c->width == NL80211_CHAN_WIDTH_80P80) + return 4 + 4; + + return nl80211_chan_width_to_mhz(c->width) / 20; +} + +static int ieee80211_chandef_num_widths(const struct cfg80211_chan_def *c) +{ + switch (c->width) { + case NL80211_CHAN_WIDTH_20: + case NL80211_CHAN_WIDTH_20_NOHT: + return 1; + case NL80211_CHAN_WIDTH_40: + return 2; + case NL80211_CHAN_WIDTH_80P80: + case NL80211_CHAN_WIDTH_80: + return 3; + case NL80211_CHAN_WIDTH_160: + return 4; + case NL80211_CHAN_WIDTH_320: + return 5; + default: + WARN_ON(1); + return 0; + } +} + +VISIBLE_IF_MAC80211_KUNIT int +ieee80211_calc_chandef_subchan_offset(const struct cfg80211_chan_def *ap, + u8 n_partial_subchans) +{ + int n = ieee80211_chandef_num_subchans(ap); + struct cfg80211_chan_def tmp = *ap; + int offset = 0; + + /* + * Given a chandef (in this context, it's the AP's) and a number + * of subchannels that we want to look at ('n_partial_subchans'), + * calculate the offset in number of subchannels between the full + * and the subset with the desired width. + */ + + /* same number of subchannels means no offset, obviously */ + if (n == n_partial_subchans) + return 0; + + /* don't WARN - misconfigured APs could cause this if their N > width */ + if (n < n_partial_subchans) + return 0; + + while (ieee80211_chandef_num_subchans(&tmp) > n_partial_subchans) { + u32 prev = tmp.center_freq1; + + ieee80211_chandef_downgrade(&tmp, NULL); + + /* + * if center_freq moved up, half the original channels + * are gone now but were below, so increase offset + */ + if (prev < tmp.center_freq1) + offset += ieee80211_chandef_num_subchans(&tmp); + } + + /* + * 80+80 with secondary 80 below primary - four subchannels for it + * (we cannot downgrade *to* 80+80, so no need to consider 'tmp') + */ + if (ap->width == NL80211_CHAN_WIDTH_80P80 && + ap->center_freq2 < ap->center_freq1) + offset += 4; + + return offset; +} +EXPORT_SYMBOL_IF_MAC80211_KUNIT(ieee80211_calc_chandef_subchan_offset); + +VISIBLE_IF_MAC80211_KUNIT void +ieee80211_rearrange_tpe_psd(struct ieee80211_parsed_tpe_psd *psd, + const struct cfg80211_chan_def *ap, + const struct cfg80211_chan_def *used) +{ + u8 needed = ieee80211_chandef_num_subchans(used); + u8 have = ieee80211_chandef_num_subchans(ap); + u8 tmp[IEEE80211_TPE_PSD_ENTRIES_320MHZ]; + u8 offset; + + if (!psd->valid) + return; + + /* if N is zero, all defaults were used, no point in rearranging */ + if (!psd->n) + goto out; + + BUILD_BUG_ON(sizeof(tmp) != sizeof(psd->power)); + + /* + * This assumes that 'N' is consistent with the HE channel, as + * it should be (otherwise the AP is broken). + * + * In psd->power we have values in the order 0..N, 0..K, where + * N+K should cover the entire channel per 'ap', but even if it + * doesn't then we've pre-filled 'unlimited' as defaults. + * + * But this is all the wrong order, we want to have them in the + * order of the 'used' channel. + * + * So for example, we could have a 320 MHz EHT AP, which has the + * HE channel as 80 MHz (e.g. due to puncturing, which doesn't + * seem to be considered for the TPE), as follows: + * + * EHT 320: | | | | | | | | | | | | | | | | | + * HE 80: | | | | | + * used 160: | | | | | | | | | + * + * N entries: |--|--|--|--| + * K entries: |--|--|--|--|--|--|--|--| |--|--|--|--| + * power idx: 4 5 6 7 8 9 10 11 0 1 2 3 12 13 14 15 + * full chan: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + * used chan: 0 1 2 3 4 5 6 7 + * + * The idx in the power array ('power idx') is like this since it + * comes directly from the element's N and K entries in their + * element order, and those are this way for HE compatibility. + * + * Rearrange them as desired here, first by putting them into the + * 'full chan' order, and then selecting the necessary subset for + * the 'used chan'. + */ + + /* first reorder according to AP channel */ + offset = ieee80211_calc_chandef_subchan_offset(ap, psd->n); + for (int i = 0; i < have; i++) { + if (i < offset) + tmp[i] = psd->power[i + psd->n]; + else if (i < offset + psd->n) + tmp[i] = psd->power[i - offset]; + else + tmp[i] = psd->power[i]; + } + + /* + * and then select the subset for the used channel + * (set everything to defaults first in case a driver is confused) + */ + memset(psd->power, IEEE80211_TPE_PSD_NO_LIMIT, sizeof(psd->power)); + offset = ieee80211_calc_chandef_subchan_offset(ap, needed); + for (int i = 0; i < needed; i++) + psd->power[i] = tmp[offset + i]; + +out: + /* limit, but don't lie if there are defaults in the data */ + if (needed < psd->count) + psd->count = needed; +} +EXPORT_SYMBOL_IF_MAC80211_KUNIT(ieee80211_rearrange_tpe_psd); + +static void ieee80211_rearrange_tpe(struct ieee80211_parsed_tpe *tpe, + const struct cfg80211_chan_def *ap, + const struct cfg80211_chan_def *used) +{ + /* ignore this completely for narrow/invalid channels */ + if (!ieee80211_chandef_num_subchans(ap) || + !ieee80211_chandef_num_subchans(used)) { + ieee80211_clear_tpe(tpe); + return; + } + + for (int i = 0; i < 2; i++) { + int needed_pwr_count; + + ieee80211_rearrange_tpe_psd(&tpe->psd_local[i], ap, used); + ieee80211_rearrange_tpe_psd(&tpe->psd_reg_client[i], ap, used); + + /* limit this to the widths we actually need */ + needed_pwr_count = ieee80211_chandef_num_widths(used); + if (needed_pwr_count < tpe->max_local[i].count) + tpe->max_local[i].count = needed_pwr_count; + if (needed_pwr_count < tpe->max_reg_client[i].count) + tpe->max_reg_client[i].count = needed_pwr_count; + } +} + static struct ieee802_11_elems * ieee80211_determine_chan_mode(struct ieee80211_sub_if_data *sdata, struct ieee80211_conn_settings *conn, struct cfg80211_bss *cbss, int link_id, - struct ieee80211_chan_req *chanreq) + struct ieee80211_chan_req *chanreq, + struct cfg80211_chan_def *ap_chandef) { const struct cfg80211_bss_ies *ies = rcu_dereference(cbss->ies); struct ieee80211_bss *bss = (void *)cbss->priv; @@ -623,7 +807,6 @@ ieee80211_determine_chan_mode(struct ieee80211_sub_if_data *sdata, }; struct ieee802_11_elems *elems; struct ieee80211_supported_band *sband; - struct cfg80211_chan_def ap_chandef; enum ieee80211_conn_mode ap_mode; int ret; @@ -634,7 +817,7 @@ again: return ERR_PTR(-ENOMEM); ap_mode = ieee80211_determine_ap_chan(sdata, channel, bss->vht_cap_info, - elems, false, conn, &ap_chandef); + elems, false, conn, ap_chandef); /* this should be impossible since parsing depends on our mode */ if (WARN_ON(ap_mode > conn->mode)) { @@ -701,12 +884,12 @@ again: break; } - chanreq->oper = ap_chandef; + chanreq->oper = *ap_chandef; /* wider-bandwidth OFDMA is only done in EHT */ if (conn->mode >= IEEE80211_CONN_MODE_EHT && !(sdata->vif.driver_flags & IEEE80211_VIF_IGNORE_OFDMA_WIDER_BW)) - chanreq->ap = ap_chandef; + chanreq->ap = *ap_chandef; else chanreq->ap.chan = NULL; @@ -738,7 +921,7 @@ again: IEEE80211_CONN_BW_LIMIT_160); } - if (chanreq->oper.width != ap_chandef.width || ap_mode != conn->mode) + if (chanreq->oper.width != ap_chandef->width || ap_mode != conn->mode) sdata_info(sdata, "regulatory prevented using AP config, downgraded\n"); @@ -3275,9 +3458,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.power_type = IEEE80211_REG_UNSET_AP; sdata->vif.bss_conf.pwr_reduction = 0; - sdata->vif.bss_conf.tx_pwr_env_num = 0; - memset(sdata->vif.bss_conf.tx_pwr_env, 0, - sizeof(sdata->vif.bss_conf.tx_pwr_env)); + ieee80211_clear_tpe(&sdata->vif.bss_conf.tpe); sdata->vif.cfg.eml_cap = 0; sdata->vif.cfg.eml_med_sync_delay = 0; @@ -5018,15 +5199,15 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; bool is_6ghz = cbss->channel->band == NL80211_BAND_6GHZ; struct ieee80211_chan_req chanreq = {}; + struct cfg80211_chan_def ap_chandef; struct ieee802_11_elems *elems; int ret; - u32 i; lockdep_assert_wiphy(local->hw.wiphy); rcu_read_lock(); elems = ieee80211_determine_chan_mode(sdata, conn, cbss, link_id, - &chanreq); + &chanreq, &ap_chandef); if (IS_ERR(elems)) { rcu_read_unlock(); @@ -5042,35 +5223,22 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, if (link && is_6ghz && conn->mode >= IEEE80211_CONN_MODE_HE) { const struct ieee80211_he_6ghz_oper *he_6ghz_oper; - struct ieee80211_bss_conf *bss_conf; - u8 j = 0; - - bss_conf = link->conf; if (elems->pwr_constr_elem) - bss_conf->pwr_reduction = *elems->pwr_constr_elem; - - BUILD_BUG_ON(ARRAY_SIZE(bss_conf->tx_pwr_env) != - ARRAY_SIZE(elems->tx_pwr_env)); - - for (i = 0; i < elems->tx_pwr_env_num; i++) { - if (elems->tx_pwr_env_len[i] > sizeof(bss_conf->tx_pwr_env[j])) - continue; - - bss_conf->tx_pwr_env_num++; - memcpy(&bss_conf->tx_pwr_env[j], elems->tx_pwr_env[i], - elems->tx_pwr_env_len[i]); - j++; - } + link->conf->pwr_reduction = *elems->pwr_constr_elem; he_6ghz_oper = ieee80211_he_6ghz_oper(elems->he_operation); if (he_6ghz_oper) - bss_conf->power_type = + link->conf->power_type = ieee80211_ap_power_type(he_6ghz_oper->control); else link_info(link, "HE 6 GHz operation missing (on %d MHz), expect issues\n", cbss->channel->center_freq); + + link->conf->tpe = elems->tpe; + ieee80211_rearrange_tpe(&link->conf->tpe, &ap_chandef, + &chanreq.oper); } rcu_read_unlock(); /* the element data was RCU protected so no longer valid anyway */ @@ -7558,6 +7726,8 @@ void ieee80211_mgd_setup_link(struct ieee80211_link_data *link) wiphy_delayed_work_init(&link->u.mgd.chswitch_work, ieee80211_chswitch_work); + ieee80211_clear_tpe(&link->conf->tpe); + if (sdata->u.mgd.assoc_data) ether_addr_copy(link->conf->addr, sdata->u.mgd.assoc_data->link[link_id].addr); diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c index 55e5497f8978..6efeb977f8e5 100644 --- a/net/mac80211/parse.c +++ b/net/mac80211/parse.c @@ -187,6 +187,84 @@ ieee80211_parse_extension_element(u32 *crc, *crc = crc32_be(*crc, (void *)elem, elem->datalen + 2); } +static void ieee80211_parse_tpe(struct ieee80211_parsed_tpe *tpe, + const u8 *data, u8 len) +{ + const struct ieee80211_tx_pwr_env *env = (const void *)data; + u8 count, interpret, category; + u8 *out, N, *cnt_out = NULL, *N_out = NULL; + + if (!ieee80211_valid_tpe_element(data, len)) + return; + + count = u8_get_bits(env->info, IEEE80211_TX_PWR_ENV_INFO_COUNT); + interpret = u8_get_bits(env->info, IEEE80211_TX_PWR_ENV_INFO_INTERPRET); + category = u8_get_bits(env->info, IEEE80211_TX_PWR_ENV_INFO_CATEGORY); + + switch (interpret) { + case IEEE80211_TPE_LOCAL_EIRP: + out = tpe->max_local[category].power; + cnt_out = &tpe->max_local[category].count; + tpe->max_local[category].valid = true; + break; + case IEEE80211_TPE_REG_CLIENT_EIRP: + out = tpe->max_reg_client[category].power; + cnt_out = &tpe->max_reg_client[category].count; + tpe->max_reg_client[category].valid = true; + break; + case IEEE80211_TPE_LOCAL_EIRP_PSD: + out = tpe->psd_local[category].power; + cnt_out = &tpe->psd_local[category].count; + N_out = &tpe->psd_local[category].n; + tpe->psd_local[category].valid = true; + break; + case IEEE80211_TPE_REG_CLIENT_EIRP_PSD: + out = tpe->psd_reg_client[category].power; + cnt_out = &tpe->psd_reg_client[category].count; + N_out = &tpe->psd_reg_client[category].n; + tpe->psd_reg_client[category].valid = true; + break; + } + + switch (interpret) { + case IEEE80211_TPE_LOCAL_EIRP: + case IEEE80211_TPE_REG_CLIENT_EIRP: + /* count was validated <= 3, plus 320 MHz */ + BUILD_BUG_ON(IEEE80211_TPE_EIRP_ENTRIES_320MHZ < 5); + memcpy(out, env->variable, count + 1); + *cnt_out = count + 1; + /* separately take 320 MHz if present */ + if (count == 3 && len > sizeof(*env) + count + 1) { + out[4] = env->variable[count + 2]; + *cnt_out = 5; + } + break; + case IEEE80211_TPE_LOCAL_EIRP_PSD: + case IEEE80211_TPE_REG_CLIENT_EIRP_PSD: + if (!count) { + memset(out, env->variable[0], + IEEE80211_TPE_PSD_ENTRIES_320MHZ); + *cnt_out = IEEE80211_TPE_PSD_ENTRIES_320MHZ; + break; + } + + N = 1 << (count - 1); + memcpy(out, env->variable, N); + *cnt_out = N; + *N_out = N; + + if (len > sizeof(*env) + N) { + int K = u8_get_bits(env->variable[N], + IEEE80211_TX_PWR_ENV_EXT_COUNT); + + K = min(K, IEEE80211_TPE_PSD_ENTRIES_320MHZ - N); + memcpy(out + N, env->variable + N + 1, K); + (*cnt_out) += K; + } + break; + } +} + static u32 _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, struct ieee80211_elems_parse *elems_parse, @@ -593,16 +671,9 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, elems->rsnx_len = elen; break; case WLAN_EID_TX_POWER_ENVELOPE: - if (elen < 1 || - elen > sizeof(struct ieee80211_tx_pwr_env)) - break; - - if (elems->tx_pwr_env_num >= ARRAY_SIZE(elems->tx_pwr_env)) + if (params->mode < IEEE80211_CONN_MODE_HE) break; - - elems->tx_pwr_env[elems->tx_pwr_env_num] = (void *)pos; - elems->tx_pwr_env_len[elems->tx_pwr_env_num] = elen; - elems->tx_pwr_env_num++; + ieee80211_parse_tpe(&elems->tpe, pos, elen); break; case WLAN_EID_EXTENSION: ieee80211_parse_extension_element(calc_crc ? @@ -889,6 +960,9 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) elems->ie_start = params->start; elems->total_len = params->len; + /* set all TPE entries to unlimited (but invalid) */ + ieee80211_clear_tpe(&elems->tpe); + nontransmitted_profile = elems_parse->scratch_pos; nontransmitted_profile_len = ieee802_11_find_bssid_profile(params->start, params->len, diff --git a/net/mac80211/tests/Makefile b/net/mac80211/tests/Makefile index 4fdaf3feaca3..511dfa226699 100644 --- a/net/mac80211/tests/Makefile +++ b/net/mac80211/tests/Makefile @@ -1,3 +1,3 @@ -mac80211-tests-y += module.o elems.o mfp.o +mac80211-tests-y += module.o elems.o mfp.o tpe.o obj-$(CONFIG_MAC80211_KUNIT_TEST) += mac80211-tests.o diff --git a/net/mac80211/tests/tpe.c b/net/mac80211/tests/tpe.c new file mode 100644 index 000000000000..dd63303a2985 --- /dev/null +++ b/net/mac80211/tests/tpe.c @@ -0,0 +1,284 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * KUnit tests for TPE element handling + * + * Copyright (C) 2024 Intel Corporation + */ +#include +#include "../ieee80211_i.h" + +MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); + +static struct ieee80211_channel chan6g_1 = { + .band = NL80211_BAND_6GHZ, + .center_freq = 5955, +}; + +static struct ieee80211_channel chan6g_33 = { + .band = NL80211_BAND_6GHZ, + .center_freq = 6115, +}; + +static struct ieee80211_channel chan6g_61 = { + .band = NL80211_BAND_6GHZ, + .center_freq = 6255, +}; + +static const struct subchan_test_case { + const char *desc; + struct cfg80211_chan_def c; + u8 n; + int expect; +} subchan_offset_cases[] = { + { + .desc = "identical 20 MHz", + .c.width = NL80211_CHAN_WIDTH_20, + .c.chan = &chan6g_1, + .c.center_freq1 = 5955, + .n = 1, + .expect = 0, + }, + { + .desc = "identical 40 MHz", + .c.width = NL80211_CHAN_WIDTH_40, + .c.chan = &chan6g_1, + .c.center_freq1 = 5965, + .n = 2, + .expect = 0, + }, + { + .desc = "identical 80+80 MHz", + /* not really is valid? doesn't matter for the test */ + .c.width = NL80211_CHAN_WIDTH_80P80, + .c.chan = &chan6g_1, + .c.center_freq1 = 5985, + .c.center_freq2 = 6225, + .n = 16, + .expect = 0, + }, + { + .desc = "identical 320 MHz", + .c.width = NL80211_CHAN_WIDTH_320, + .c.chan = &chan6g_1, + .c.center_freq1 = 6105, + .n = 16, + .expect = 0, + }, + { + .desc = "lower 160 MHz of 320 MHz", + .c.width = NL80211_CHAN_WIDTH_320, + .c.chan = &chan6g_1, + .c.center_freq1 = 6105, + .n = 8, + .expect = 0, + }, + { + .desc = "upper 160 MHz of 320 MHz", + .c.width = NL80211_CHAN_WIDTH_320, + .c.chan = &chan6g_61, + .c.center_freq1 = 6105, + .n = 8, + .expect = 8, + }, + { + .desc = "upper 160 MHz of 320 MHz, go to 40", + .c.width = NL80211_CHAN_WIDTH_320, + .c.chan = &chan6g_61, + .c.center_freq1 = 6105, + .n = 2, + .expect = 8 + 4 + 2, + }, + { + .desc = "secondary 80 above primary in 80+80 MHz", + /* not really is valid? doesn't matter for the test */ + .c.width = NL80211_CHAN_WIDTH_80P80, + .c.chan = &chan6g_1, + .c.center_freq1 = 5985, + .c.center_freq2 = 6225, + .n = 4, + .expect = 0, + }, + { + .desc = "secondary 80 below primary in 80+80 MHz", + /* not really is valid? doesn't matter for the test */ + .c.width = NL80211_CHAN_WIDTH_80P80, + .c.chan = &chan6g_61, + .c.center_freq1 = 6225, + .c.center_freq2 = 5985, + .n = 4, + .expect = 4, + }, + { + .desc = "secondary 80 below primary in 80+80 MHz, go to 20", + /* not really is valid? doesn't matter for the test */ + .c.width = NL80211_CHAN_WIDTH_80P80, + .c.chan = &chan6g_61, + .c.center_freq1 = 6225, + .c.center_freq2 = 5985, + .n = 1, + .expect = 7, + }, +}; + +KUNIT_ARRAY_PARAM_DESC(subchan_offset, subchan_offset_cases, desc); + +static void subchan_offset(struct kunit *test) +{ + const struct subchan_test_case *params = test->param_value; + int offset; + + KUNIT_ASSERT_EQ(test, cfg80211_chandef_valid(¶ms->c), true); + + offset = ieee80211_calc_chandef_subchan_offset(¶ms->c, params->n); + + KUNIT_EXPECT_EQ(test, params->expect, offset); +} + +static const struct psd_reorder_test_case { + const char *desc; + struct cfg80211_chan_def ap, used; + struct ieee80211_parsed_tpe_psd psd, out; +} psd_reorder_cases[] = { + { + .desc = "no changes, 320 MHz", + + .ap.width = NL80211_CHAN_WIDTH_320, + .ap.chan = &chan6g_1, + .ap.center_freq1 = 6105, + + .used.width = NL80211_CHAN_WIDTH_320, + .used.chan = &chan6g_1, + .used.center_freq1 = 6105, + + .psd.valid = true, + .psd.count = 16, + .psd.n = 8, + .psd.power = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + + .out.valid = true, + .out.count = 16, + .out.n = 8, + .out.power = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + }, + { + .desc = "no changes, 320 MHz, 160 MHz used, n=0", + + .ap.width = NL80211_CHAN_WIDTH_320, + .ap.chan = &chan6g_1, + .ap.center_freq1 = 6105, + + .used.width = NL80211_CHAN_WIDTH_160, + .used.chan = &chan6g_1, + .used.center_freq1 = 6025, + + .psd.valid = true, + .psd.count = 16, + .psd.n = 0, + .psd.power = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, + + .out.valid = true, + .out.count = 8, + .out.n = 0, + .out.power = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, + }, + { + .desc = "320 MHz, HE is 80, used 160, all lower", + + .ap.width = NL80211_CHAN_WIDTH_320, + .ap.chan = &chan6g_1, + .ap.center_freq1 = 6105, + + .used.width = NL80211_CHAN_WIDTH_160, + .used.chan = &chan6g_1, + .used.center_freq1 = 6025, + + .psd.valid = true, + .psd.count = 16, + .psd.n = 4, + .psd.power = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + + .out.valid = true, + .out.count = 8, + .out.n = 4, + .out.power = { 0, 1, 2, 3, 4, 5, 6, 7, 127, 127, 127, 127, 127, 127, 127, 127}, + }, + { + .desc = "320 MHz, HE is 80, used 160, all upper", + /* + * EHT: | | | | | | | | | | | | | | | | | + * HE: | | | | | + * used: | | | | | | | | | + */ + + .ap.width = NL80211_CHAN_WIDTH_320, + .ap.chan = &chan6g_61, + .ap.center_freq1 = 6105, + + .used.width = NL80211_CHAN_WIDTH_160, + .used.chan = &chan6g_61, + .used.center_freq1 = 6185, + + .psd.valid = true, + .psd.count = 16, + .psd.n = 4, + .psd.power = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + + .out.valid = true, + .out.count = 8, + .out.n = 4, + .out.power = { 12, 13, 14, 15, 0, 1, 2, 3, 127, 127, 127, 127, 127, 127, 127, 127}, + }, + { + .desc = "320 MHz, HE is 80, used 160, split", + /* + * EHT: | | | | | | | | | | | | | | | | | + * HE: | | | | | + * used: | | | | | | | | | + */ + + .ap.width = NL80211_CHAN_WIDTH_320, + .ap.chan = &chan6g_33, + .ap.center_freq1 = 6105, + + .used.width = NL80211_CHAN_WIDTH_160, + .used.chan = &chan6g_33, + .used.center_freq1 = 6185, + + .psd.valid = true, + .psd.count = 16, + .psd.n = 4, + .psd.power = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + + .out.valid = true, + .out.count = 8, + .out.n = 4, + .out.power = { 0, 1, 2, 3, 12, 13, 14, 15, 127, 127, 127, 127, 127, 127, 127, 127}, + }, +}; + +KUNIT_ARRAY_PARAM_DESC(psd_reorder, psd_reorder_cases, desc); + +static void psd_reorder(struct kunit *test) +{ + const struct psd_reorder_test_case *params = test->param_value; + struct ieee80211_parsed_tpe_psd tmp = params->psd; + + KUNIT_ASSERT_EQ(test, cfg80211_chandef_valid(¶ms->ap), true); + KUNIT_ASSERT_EQ(test, cfg80211_chandef_valid(¶ms->used), true); + + ieee80211_rearrange_tpe_psd(&tmp, ¶ms->ap, ¶ms->used); + KUNIT_EXPECT_MEMEQ(test, &tmp, ¶ms->out, sizeof(tmp)); +} + +static struct kunit_case tpe_test_cases[] = { + KUNIT_CASE_PARAM(subchan_offset, subchan_offset_gen_params), + KUNIT_CASE_PARAM(psd_reorder, psd_reorder_gen_params), + {} +}; + +static struct kunit_suite tpe = { + .name = "mac80211-tpe", + .test_cases = tpe_test_cases, +}; + +kunit_test_suite(tpe); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 6139c930b572..183de2075fb9 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -4334,3 +4334,28 @@ ieee80211_min_bw_limit_from_chandef(struct cfg80211_chan_def *chandef) return IEEE80211_CONN_BW_LIMIT_20; } } + +void ieee80211_clear_tpe(struct ieee80211_parsed_tpe *tpe) +{ + for (int i = 0; i < 2; i++) { + tpe->max_local[i].valid = false; + memset(tpe->max_local[i].power, + IEEE80211_TPE_MAX_TX_PWR_NO_CONSTRAINT, + sizeof(tpe->max_local[i].power)); + + tpe->max_reg_client[i].valid = false; + memset(tpe->max_reg_client[i].power, + IEEE80211_TPE_MAX_TX_PWR_NO_CONSTRAINT, + sizeof(tpe->max_reg_client[i].power)); + + tpe->psd_local[i].valid = false; + memset(tpe->psd_local[i].power, + IEEE80211_TPE_PSD_NO_LIMIT, + sizeof(tpe->psd_local[i].power)); + + tpe->psd_reg_client[i].valid = false; + memset(tpe->psd_reg_client[i].power, + IEEE80211_TPE_PSD_NO_LIMIT, + sizeof(tpe->psd_reg_client[i].power)); + } +} -- cgit v1.2.3-58-ga151 From 5a009b42e0418d30b3ffaff2f46c534cd79b3f23 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 6 May 2024 21:37:57 +0200 Subject: wifi: mac80211: track changes in AP's TPE If the TPE (transmit power envelope) is changed, detect and report that to the driver. Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240506214536.103dda923f45.I990877e409ab8eade9ed7c172272e0cae57256cf@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 ++ net/mac80211/mlme.c | 16 ++++++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a4efbfb8d796..a59eacfe0480 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -362,6 +362,7 @@ struct ieee80211_vif_chanctx_switch { * status changed. * @BSS_CHANGED_MLD_VALID_LINKS: MLD valid links status changed. * @BSS_CHANGED_MLD_TTLM: negotiated TID to link mapping was changed + * @BSS_CHANGED_TPE: transmit power envelope changed */ enum ieee80211_bss_change { BSS_CHANGED_ASSOC = 1<<0, @@ -398,6 +399,7 @@ enum ieee80211_bss_change { BSS_CHANGED_UNSOL_BCAST_PROBE_RESP = 1<<31, BSS_CHANGED_MLD_VALID_LINKS = BIT_ULL(33), BSS_CHANGED_MLD_TTLM = BIT_ULL(34), + BSS_CHANGED_TPE = BIT_ULL(35), /* when adding here, make sure to change ieee80211_reconfig */ }; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index ed9851faac05..08c0999746fb 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -973,6 +973,7 @@ static int ieee80211_config_bw(struct ieee80211_link_data *link, struct ieee80211_channel *channel = link->conf->chanreq.oper.chan; struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_chan_req chanreq = {}; + struct cfg80211_chan_def ap_chandef; enum ieee80211_conn_mode ap_mode; u32 vht_cap_info = 0; u16 ht_opmode; @@ -988,7 +989,7 @@ static int ieee80211_config_bw(struct ieee80211_link_data *link, ap_mode = ieee80211_determine_ap_chan(sdata, channel, vht_cap_info, elems, true, &link->u.mgd.conn, - &chanreq.ap); + &ap_chandef); if (ap_mode != link->u.mgd.conn.mode) { link_info(link, @@ -998,7 +999,8 @@ static int ieee80211_config_bw(struct ieee80211_link_data *link, return -EINVAL; } - chanreq.oper = chanreq.ap; + chanreq.ap = ap_chandef; + chanreq.oper = ap_chandef; if (link->u.mgd.conn.mode < IEEE80211_CONN_MODE_EHT || sdata->vif.driver_flags & IEEE80211_VIF_IGNORE_OFDMA_WIDER_BW) chanreq.ap.chan = NULL; @@ -1026,6 +1028,16 @@ static int ieee80211_config_bw(struct ieee80211_link_data *link, ieee80211_min_bw_limit_from_chandef(&chanreq.oper)) ieee80211_chandef_downgrade(&chanreq.oper, NULL); + if (ap_chandef.chan->band == NL80211_BAND_6GHZ && + link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HE) { + ieee80211_rearrange_tpe(&elems->tpe, &ap_chandef, + &chanreq.oper); + if (memcmp(&link->conf->tpe, &elems->tpe, sizeof(elems->tpe))) { + link->conf->tpe = elems->tpe; + *changed |= BSS_CHANGED_TPE; + } + } + if (ieee80211_chanreq_identical(&chanreq, &link->conf->chanreq)) return 0; -- cgit v1.2.3-58-ga151 From ce9e660ef32e87441bf59b04f67a24113e82546a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 6 May 2024 21:13:12 +0200 Subject: wifi: mac80211: move radar detect work to sdata At some point we thought perhaps this could be per link, but really that didn't happen, and it's confusing. Radar detection still uses the deflink to allocate the channel, but the work need not be there. Move it back. Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240506211311.43bd82c6da04.Ib39bec3aa198d137385f081e7e1910dcbde3aa1b@changeid Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 6 +++--- net/mac80211/ieee80211_i.h | 3 ++- net/mac80211/iface.c | 4 +++- net/mac80211/link.c | 2 -- net/mac80211/mlme.c | 9 ++++----- net/mac80211/util.c | 2 +- 6 files changed, 13 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 3673c7f6b21a..0ed7e8bbf5c0 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1666,7 +1666,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, if (sdata->wdev.cac_started) { chandef = link_conf->chanreq.oper; - wiphy_delayed_work_cancel(wiphy, &link->dfs_cac_timer_work); + wiphy_delayed_work_cancel(wiphy, &sdata->dfs_cac_timer_work); cfg80211_cac_event(sdata->dev, &chandef, NL80211_RADAR_CAC_ABORTED, GFP_KERNEL); @@ -3466,7 +3466,7 @@ static int ieee80211_start_radar_detection(struct wiphy *wiphy, if (err) goto out_unlock; - wiphy_delayed_work_queue(wiphy, &sdata->deflink.dfs_cac_timer_work, + wiphy_delayed_work_queue(wiphy, &sdata->dfs_cac_timer_work, msecs_to_jiffies(cac_time_ms)); out_unlock: @@ -3483,7 +3483,7 @@ static void ieee80211_end_cac(struct wiphy *wiphy, list_for_each_entry(sdata, &local->interfaces, list) { wiphy_delayed_work_cancel(wiphy, - &sdata->deflink.dfs_cac_timer_work); + &sdata->dfs_cac_timer_work); if (sdata->wdev.cac_started) { ieee80211_link_release_channel(&sdata->deflink); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 7d541a2355f6..4f4ef6710e8d 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1060,7 +1060,6 @@ struct ieee80211_link_data { int ap_power_level; /* in dBm */ bool radar_required; - struct wiphy_delayed_work dfs_cac_timer_work; union { struct ieee80211_link_data_managed mgd; @@ -1159,6 +1158,8 @@ struct ieee80211_sub_if_data { struct ieee80211_link_data deflink; struct ieee80211_link_data __rcu *link[IEEE80211_MLD_MAX_NUM_LINKS]; + struct wiphy_delayed_work dfs_cac_timer_work; + /* for ieee80211_set_active_links_async() */ struct wiphy_work activate_links_work; u16 desired_active_links; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index dc42902e2693..1ef4b6dc3730 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -554,7 +554,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do wiphy_work_cancel(local->hw.wiphy, &sdata->deflink.color_change_finalize_work); wiphy_delayed_work_cancel(local->hw.wiphy, - &sdata->deflink.dfs_cac_timer_work); + &sdata->dfs_cac_timer_work); if (sdata->wdev.cac_started) { chandef = sdata->vif.bss_conf.chanreq.oper; @@ -1746,6 +1746,8 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, wiphy_work_init(&sdata->work, ieee80211_iface_work); wiphy_work_init(&sdata->activate_links_work, ieee80211_activate_links_work); + wiphy_delayed_work_init(&sdata->dfs_cac_timer_work, + ieee80211_dfs_cac_timer_work); switch (type) { case NL80211_IFTYPE_P2P_GO: diff --git a/net/mac80211/link.c b/net/mac80211/link.c index af0321408a97..f57282488222 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -45,8 +45,6 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata, ieee80211_color_collision_detection_work); INIT_LIST_HEAD(&link->assigned_chanctx_list); INIT_LIST_HEAD(&link->reserved_chanctx_list); - wiphy_delayed_work_init(&link->dfs_cac_timer_work, - ieee80211_dfs_cac_timer_work); if (!deflink) { switch (sdata->vif.type) { diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 08c0999746fb..a2b8bfc1fe0e 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2797,16 +2797,15 @@ void ieee80211_dynamic_ps_timer(struct timer_list *t) void ieee80211_dfs_cac_timer_work(struct wiphy *wiphy, struct wiphy_work *work) { - struct ieee80211_link_data *link = - container_of(work, struct ieee80211_link_data, + struct ieee80211_sub_if_data *sdata = + container_of(work, struct ieee80211_sub_if_data, dfs_cac_timer_work.work); - struct cfg80211_chan_def chandef = link->conf->chanreq.oper; - struct ieee80211_sub_if_data *sdata = link->sdata; + struct cfg80211_chan_def chandef = sdata->vif.bss_conf.chanreq.oper; lockdep_assert_wiphy(sdata->local->hw.wiphy); if (sdata->wdev.cac_started) { - ieee80211_link_release_channel(link); + ieee80211_link_release_channel(&sdata->deflink); cfg80211_cac_event(sdata->dev, &chandef, NL80211_RADAR_CAC_FINISHED, GFP_KERNEL); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 183de2075fb9..43625ca87d5e 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -3457,7 +3457,7 @@ void ieee80211_dfs_cac_cancel(struct ieee80211_local *local) list_for_each_entry(sdata, &local->interfaces, list) { wiphy_delayed_work_cancel(local->hw.wiphy, - &sdata->deflink.dfs_cac_timer_work); + &sdata->dfs_cac_timer_work); if (sdata->wdev.cac_started) { chandef = sdata->vif.bss_conf.chanreq.oper; -- cgit v1.2.3-58-ga151 From 2d33ecf5d0148671c74e68e18755b9411a7ba923 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 6 May 2024 21:11:59 +0200 Subject: wifi: cfg80211: restrict operation during radar detection Just like it's not currently possible to start radar detection while already operating, it shouldn't be possible to start operating while radar detection is running. Fix that. Also, improve the check whether operating (carrier might not be up if e.g. attempting to join IBSS). Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240506211158.ae8dca3d0d6c.I7c70a66a5fbdbc63a78fee8a34f31d1995491bc3@changeid Signed-off-by: Johannes Berg --- net/wireless/ibss.c | 5 ++++- net/wireless/mesh.c | 5 ++++- net/wireless/nl80211.c | 21 +++++++++++++++------ 3 files changed, 23 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index 9f02ee5f08be..34e5acff3935 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -3,7 +3,7 @@ * Some IBSS support code for cfg80211. * * Copyright 2009 Johannes Berg - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #include @@ -94,6 +94,9 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, lockdep_assert_held(&rdev->wiphy.mtx); + if (wdev->cac_started) + return -EBUSY; + if (wdev->u.ibss.ssid_len) return -EALREADY; diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 83306979fbe2..aaca65b66af4 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * Portions - * Copyright (C) 2022-2023 Intel Corporation + * Copyright (C) 2022-2024 Intel Corporation */ #include #include @@ -127,6 +127,9 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, if (!rdev->ops->join_mesh) return -EOPNOTSUPP; + if (wdev->cac_started) + return -EBUSY; + if (!setup->chandef.chan) { /* if no channel explicitly given, use preset channel */ setup->chandef = wdev->u.mesh.preset_chandef; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 93c313149f57..6ba988a6f5a2 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5964,6 +5964,9 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->start_ap) return -EOPNOTSUPP; + if (wdev->cac_started) + return -EBUSY; + if (wdev->links[link_id].ap.beacon_interval) return -EALREADY; @@ -9956,6 +9959,17 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, flush_delayed_work(&rdev->dfs_update_channels_wk); + switch (wdev->iftype) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_P2P_GO: + case NL80211_IFTYPE_MESH_POINT: + case NL80211_IFTYPE_ADHOC: + break; + default: + /* caution - see cfg80211_beaconing_iface_active() below */ + return -EINVAL; + } + wiphy_lock(wiphy); dfs_region = reg_get_dfs_region(wiphy); @@ -9986,12 +10000,7 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, goto unlock; } - if (netif_carrier_ok(dev)) { - err = -EBUSY; - goto unlock; - } - - if (wdev->cac_started) { + if (cfg80211_beaconing_iface_active(wdev) || wdev->cac_started) { err = -EBUSY; goto unlock; } -- cgit v1.2.3-58-ga151 From 7ef8f6821d16623a3f6ea279542f9f47ece9ff1e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 15 Apr 2024 11:27:11 +0300 Subject: wifi: mac80211: mlme: handle cross-link CSA If we see a channel switch announcement on one link for another, handle that case and start the CSA. The driver can react to this in whatever way it needs. The stack will have the ability to track it via the RNR/MLE in the reporting link's beacon if it sees it for inactive links and adjust everything accordingly. Note that currently the timings for the CSA aren't set, the values are only used by the Intel drivers, and they don't need this for newer devices that support MLO, so I've left it out for now. Signed-off-by: Miri Korenblit Link: https://msgid.link/20240415112355.4d34b6a31be7.Ie8453979f5805873a8411c99346bcc3810cd6476@changeid Signed-off-by: Johannes Berg --- net/mac80211/driver-ops.h | 12 ++ net/mac80211/ieee80211_i.h | 1 + net/mac80211/link.c | 12 ++ net/mac80211/mlme.c | 470 ++++++++++++++++++++++++++++++++++++--------- 4 files changed, 403 insertions(+), 92 deletions(-) (limited to 'net') diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 5d078c0a2323..d4e73d3630e0 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1150,6 +1150,9 @@ drv_pre_channel_switch(struct ieee80211_sub_if_data *sdata, if (!check_sdata_in_driver(sdata)) return -EIO; + if (!ieee80211_vif_link_active(&sdata->vif, ch_switch->link_id)) + return 0; + trace_drv_pre_channel_switch(local, sdata, ch_switch); if (local->ops->pre_channel_switch) ret = local->ops->pre_channel_switch(&local->hw, &sdata->vif, @@ -1171,6 +1174,9 @@ drv_post_channel_switch(struct ieee80211_link_data *link) if (!check_sdata_in_driver(sdata)) return -EIO; + if (!ieee80211_vif_link_active(&sdata->vif, link->link_id)) + return 0; + trace_drv_post_channel_switch(local, sdata); if (local->ops->post_channel_switch) ret = local->ops->post_channel_switch(&local->hw, &sdata->vif, @@ -1191,6 +1197,9 @@ drv_abort_channel_switch(struct ieee80211_link_data *link) if (!check_sdata_in_driver(sdata)) return; + if (!ieee80211_vif_link_active(&sdata->vif, link->link_id)) + return; + trace_drv_abort_channel_switch(local, sdata); if (local->ops->abort_channel_switch) @@ -1210,6 +1219,9 @@ drv_channel_switch_rx_beacon(struct ieee80211_sub_if_data *sdata, if (!check_sdata_in_driver(sdata)) return; + if (!ieee80211_vif_link_active(&sdata->vif, ch_switch->link_id)) + return; + trace_drv_channel_switch_rx_beacon(local, sdata, ch_switch); if (local->ops->channel_switch_rx_beacon) local->ops->channel_switch_rx_beacon(&local->hw, &sdata->vif, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 4f4ef6710e8d..4ce34e95902e 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -978,6 +978,7 @@ struct ieee80211_link_data_managed { bool csa_waiting_bcn; bool csa_ignored_same_chan; bool csa_blocked_tx; + unsigned long csa_time; struct wiphy_delayed_work chswitch_work; struct wiphy_work request_smps_work; diff --git a/net/mac80211/link.c b/net/mac80211/link.c index f57282488222..508759cc9979 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -357,6 +357,18 @@ static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata, ieee80211_teardown_tdls_peers(link); __ieee80211_link_release_channel(link, true); + + /* + * If CSA is (still) active while the link is deactivated, + * just schedule the channel switch work for the time we + * had previously calculated, and we'll take the process + * from there. + */ + if (link->conf->csa_active) + wiphy_delayed_work_queue(local->hw.wiphy, + &link->u.mgd.chswitch_work, + link->u.mgd.csa_time - + jiffies); } list_for_each_entry(sta, &local->sta_list, list) { diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index a2b8bfc1fe0e..aedd1ce1fb8d 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2079,6 +2079,18 @@ static void ieee80211_chswitch_work(struct wiphy *wiphy, if (!link->conf->csa_active) return; + /* + * If the link isn't active (now), we cannot wait for beacons, won't + * have a reserved chanctx, etc. Just switch over the chandef and + * update cfg80211 directly. + */ + if (!ieee80211_vif_link_active(&sdata->vif, link->link_id)) { + link->conf->chanreq = link->csa_chanreq; + cfg80211_ch_switch_notify(sdata->dev, &link->csa_chanreq.oper, + link->link_id); + return; + } + /* * using reservation isn't immediate as it may be deferred until later * with multi-vif. once reservation is complete it will re-schedule the @@ -2097,9 +2109,9 @@ static void ieee80211_chswitch_work(struct wiphy *wiphy, ret = ieee80211_link_use_reserved_context(link); if (ret) { - sdata_info(sdata, - "failed to use reserved channel context, disconnecting (err=%d)\n", - ret); + link_info(link, + "failed to use reserved channel context, disconnecting (err=%d)\n", + ret); wiphy_work_queue(sdata->local->hw.wiphy, &ifmgd->csa_connection_drop_work); } @@ -2108,8 +2120,8 @@ static void ieee80211_chswitch_work(struct wiphy *wiphy, if (!ieee80211_chanreq_identical(&link->conf->chanreq, &link->csa_chanreq)) { - sdata_info(sdata, - "failed to finalize channel switch, disconnecting\n"); + link_info(link, + "failed to finalize channel switch, disconnecting\n"); wiphy_work_queue(sdata->local->hw.wiphy, &ifmgd->csa_connection_drop_work); return; @@ -2144,14 +2156,14 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_link_data *link) ret = drv_post_channel_switch(link); if (ret) { - sdata_info(sdata, - "driver post channel switch failed, disconnecting\n"); + link_info(link, + "driver post channel switch failed, disconnecting\n"); wiphy_work_queue(sdata->local->hw.wiphy, &ifmgd->csa_connection_drop_work); return; } - cfg80211_ch_switch_notify(sdata->dev, &link->reserved.oper, + cfg80211_ch_switch_notify(sdata->dev, &link->conf->chanreq.oper, link->link_id); } @@ -2166,7 +2178,8 @@ void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success, if (!success) { sdata_info(sdata, - "driver channel switch failed, disconnecting\n"); + "driver channel switch failed (link %d), disconnecting\n", + link_id); wiphy_work_queue(sdata->local->hw.wiphy, &sdata->u.mgd.csa_connection_drop_work); } else { @@ -2211,69 +2224,221 @@ ieee80211_sta_abort_chanswitch(struct ieee80211_link_data *link) drv_abort_channel_switch(link); } +struct sta_csa_rnr_iter_data { + struct ieee80211_link_data *link; + struct ieee80211_channel *chan; + u8 mld_id; +}; + +static enum cfg80211_rnr_iter_ret +ieee80211_sta_csa_rnr_iter(void *_data, u8 type, + const struct ieee80211_neighbor_ap_info *info, + const u8 *tbtt_info, u8 tbtt_info_len) +{ + struct sta_csa_rnr_iter_data *data = _data; + struct ieee80211_link_data *link = data->link; + struct ieee80211_sub_if_data *sdata = link->sdata; + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + const struct ieee80211_tbtt_info_ge_11 *ti; + enum nl80211_band band; + unsigned int center_freq; + int link_id; + + if (type != IEEE80211_TBTT_INFO_TYPE_TBTT) + return RNR_ITER_CONTINUE; + + if (tbtt_info_len < sizeof(*ti)) + return RNR_ITER_CONTINUE; + + ti = (const void *)tbtt_info; + + if (ti->mld_params.mld_id != data->mld_id) + return RNR_ITER_CONTINUE; + + link_id = le16_get_bits(ti->mld_params.params, + IEEE80211_RNR_MLD_PARAMS_LINK_ID); + if (link_id != data->link->link_id) + return RNR_ITER_CONTINUE; + + /* we found the entry for our link! */ + + /* this AP is confused, it had this right before ... just disconnect */ + if (!ieee80211_operating_class_to_band(info->op_class, &band)) { + link_info(link, + "AP now has invalid operating class in RNR, disconnect\n"); + wiphy_work_queue(sdata->local->hw.wiphy, + &ifmgd->csa_connection_drop_work); + return RNR_ITER_BREAK; + } + + center_freq = ieee80211_channel_to_frequency(info->channel, band); + data->chan = ieee80211_get_channel(sdata->local->hw.wiphy, center_freq); + + return RNR_ITER_BREAK; +} + +static void +ieee80211_sta_other_link_csa_disappeared(struct ieee80211_link_data *link, + struct ieee802_11_elems *elems) +{ + struct ieee80211_sub_if_data *sdata = link->sdata; + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + struct sta_csa_rnr_iter_data data = { + .link = link, + }; + + /* + * If we get here, we see a beacon from another link without + * CSA still being reported for it, so now we have to check + * if the CSA was aborted or completed. This may not even be + * perfectly possible if the CSA was only done for changing + * the puncturing, but in that case if the link in inactive + * we don't really care, and if it's an active link (or when + * it's activated later) we'll get a beacon and adjust. + */ + + if (WARN_ON(!elems->ml_basic)) + return; + + data.mld_id = ieee80211_mle_get_mld_id((const void *)elems->ml_basic); + + /* + * So in order to do this, iterate the RNR element(s) and see + * what channel is reported now. + */ + cfg80211_iter_rnr(elems->ie_start, elems->total_len, + ieee80211_sta_csa_rnr_iter, &data); + + if (!data.chan) { + link_info(link, + "couldn't find (valid) channel in RNR for CSA, disconnect\n"); + wiphy_work_queue(sdata->local->hw.wiphy, + &ifmgd->csa_connection_drop_work); + return; + } + + /* + * If it doesn't match the CSA, then assume it aborted. This + * may erroneously detect that it was _not_ aborted when it + * was in fact aborted, but only changed the bandwidth or the + * puncturing configuration, but we don't have enough data to + * detect that. + */ + if (data.chan != link->csa_chanreq.oper.chan) + ieee80211_sta_abort_chanswitch(link); +} + +enum ieee80211_csa_source { + IEEE80211_CSA_SOURCE_BEACON, + IEEE80211_CSA_SOURCE_OTHER_LINK, + IEEE80211_CSA_SOURCE_ACTION, +}; + static void ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, u64 timestamp, u32 device_timestamp, - struct ieee802_11_elems *elems, - bool beacon) + struct ieee802_11_elems *full_elems, + struct ieee802_11_elems *csa_elems, + enum ieee80211_csa_source source) { struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - struct cfg80211_bss *cbss = link->conf->bss; + struct ieee80211_chanctx *chanctx = NULL; struct ieee80211_chanctx_conf *conf; - struct ieee80211_chanctx *chanctx; - enum nl80211_band current_band; - struct ieee80211_csa_ie csa_ie; + struct ieee80211_csa_ie csa_ie = {}; struct ieee80211_channel_switch ch_switch = { .link_id = link->link_id, + .timestamp = timestamp, + .device_timestamp = device_timestamp, }; - struct ieee80211_bss *bss; - unsigned long timeout; + unsigned long now; int res; lockdep_assert_wiphy(local->hw.wiphy); - if (!cbss) - return; + if (csa_elems) { + struct cfg80211_bss *cbss = link->conf->bss; + enum nl80211_band current_band; + struct ieee80211_bss *bss; - current_band = cbss->channel->band; - bss = (void *)cbss->priv; - res = ieee80211_parse_ch_switch_ie(sdata, elems, current_band, - bss->vht_cap_info, - &link->u.mgd.conn, - link->u.mgd.bssid, &csa_ie); + if (WARN_ON(!cbss)) + return; - if (!res) { - ch_switch.timestamp = timestamp; - ch_switch.device_timestamp = device_timestamp; - ch_switch.block_tx = csa_ie.mode; - ch_switch.chandef = csa_ie.chanreq.oper; - ch_switch.count = csa_ie.count; - ch_switch.delay = csa_ie.max_switch_time; + current_band = cbss->channel->band; + bss = (void *)cbss->priv; + + res = ieee80211_parse_ch_switch_ie(sdata, csa_elems, + current_band, + bss->vht_cap_info, + &link->u.mgd.conn, + link->u.mgd.bssid, &csa_ie); + if (res == 0) { + ch_switch.block_tx = csa_ie.mode; + ch_switch.chandef = csa_ie.chanreq.oper; + ch_switch.count = csa_ie.count; + ch_switch.delay = csa_ie.max_switch_time; + } + } else { + /* + * If there was no per-STA profile for this link, we + * get called with csa_elems == NULL. This of course means + * there are no CSA elements, so set res=1 indicating + * no more CSA. + */ + res = 1; } if (res < 0) goto drop_connection; if (link->conf->csa_active) { - /* already processing - disregard action frames */ - if (!beacon) + switch (source) { + case IEEE80211_CSA_SOURCE_ACTION: + /* already processing - disregard action frames */ return; + case IEEE80211_CSA_SOURCE_BEACON: + if (link->u.mgd.csa_waiting_bcn) { + ieee80211_chswitch_post_beacon(link); + /* + * If the CSA is still present after the switch + * we need to consider it as a new CSA (possibly + * to self). This happens by not returning here + * so we'll get to the check below. + */ + } else if (res) { + ieee80211_sta_abort_chanswitch(link); + return; + } else { + drv_channel_switch_rx_beacon(sdata, &ch_switch); + return; + } + break; + case IEEE80211_CSA_SOURCE_OTHER_LINK: + /* active link: we want to see the beacon to continue */ + if (ieee80211_vif_link_active(&sdata->vif, + link->link_id)) + return; - if (link->u.mgd.csa_waiting_bcn) { - ieee80211_chswitch_post_beacon(link); - /* - * If the CSA IE is still present in the beacon after - * the switch, we need to consider it as a new CSA - * (possibly to self) - this happens by not returning - * here so we'll get to the check below. - */ - } else if (res) { - ieee80211_sta_abort_chanswitch(link); - return; - } else { - drv_channel_switch_rx_beacon(sdata, &ch_switch); + /* switch work ran, so just complete the process */ + if (link->u.mgd.csa_waiting_bcn) { + ieee80211_chswitch_post_beacon(link); + /* + * If the CSA is still present after the switch + * we need to consider it as a new CSA (possibly + * to self). This happens by not returning here + * so we'll get to the check below. + */ + break; + } + + /* link still has CSA but we already know, do nothing */ + if (!res) + return; + + /* check in the RNR if the CSA aborted */ + ieee80211_sta_other_link_csa_disappeared(link, + full_elems); return; } } @@ -2284,40 +2449,38 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, if (link->conf->chanreq.oper.chan->band != csa_ie.chanreq.oper.chan->band) { - sdata_info(sdata, - "AP %pM switches to different band (%d MHz, width:%d, CF1/2: %d/%d MHz), disconnecting\n", - link->u.mgd.bssid, - csa_ie.chanreq.oper.chan->center_freq, - csa_ie.chanreq.oper.width, - csa_ie.chanreq.oper.center_freq1, - csa_ie.chanreq.oper.center_freq2); + link_info(link, + "AP %pM switches to different band (%d MHz, width:%d, CF1/2: %d/%d MHz), disconnecting\n", + link->u.mgd.bssid, + csa_ie.chanreq.oper.chan->center_freq, + csa_ie.chanreq.oper.width, + csa_ie.chanreq.oper.center_freq1, + csa_ie.chanreq.oper.center_freq2); goto drop_connection; } if (!cfg80211_chandef_usable(local->hw.wiphy, &csa_ie.chanreq.oper, IEEE80211_CHAN_DISABLED)) { - sdata_info(sdata, - "AP %pM switches to unsupported channel " - "(%d.%03d MHz, width:%d, CF1/2: %d.%03d/%d MHz), " - "disconnecting\n", - link->u.mgd.bssid, - csa_ie.chanreq.oper.chan->center_freq, - csa_ie.chanreq.oper.chan->freq_offset, - csa_ie.chanreq.oper.width, - csa_ie.chanreq.oper.center_freq1, - csa_ie.chanreq.oper.freq1_offset, - csa_ie.chanreq.oper.center_freq2); + link_info(link, + "AP %pM switches to unsupported channel (%d.%03d MHz, width:%d, CF1/2: %d.%03d/%d MHz), disconnecting\n", + link->u.mgd.bssid, + csa_ie.chanreq.oper.chan->center_freq, + csa_ie.chanreq.oper.chan->freq_offset, + csa_ie.chanreq.oper.width, + csa_ie.chanreq.oper.center_freq1, + csa_ie.chanreq.oper.freq1_offset, + csa_ie.chanreq.oper.center_freq2); goto drop_connection; } if (cfg80211_chandef_identical(&csa_ie.chanreq.oper, &link->conf->chanreq.oper) && - (!csa_ie.mode || !beacon)) { + (!csa_ie.mode || source != IEEE80211_CSA_SOURCE_BEACON)) { if (link->u.mgd.csa_ignored_same_chan) return; - sdata_info(sdata, - "AP %pM tries to chanswitch to same channel, ignore\n", - link->u.mgd.bssid); + link_info(link, + "AP %pM tries to chanswitch to same channel, ignore\n", + link->u.mgd.bssid); link->u.mgd.csa_ignored_same_chan = true; return; } @@ -2333,33 +2496,36 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, conf = rcu_dereference_protected(link->conf->chanctx_conf, lockdep_is_held(&local->hw.wiphy->mtx)); - if (!conf) { - sdata_info(sdata, - "no channel context assigned to vif?, disconnecting\n"); + if (ieee80211_vif_link_active(&sdata->vif, link->link_id) && !conf) { + link_info(link, + "no channel context assigned to vif?, disconnecting\n"); goto drop_connection; } - chanctx = container_of(conf, struct ieee80211_chanctx, conf); + if (conf) + chanctx = container_of(conf, struct ieee80211_chanctx, conf); if (!ieee80211_hw_check(&local->hw, CHANCTX_STA_CSA)) { - sdata_info(sdata, - "driver doesn't support chan-switch with channel contexts\n"); + link_info(link, + "driver doesn't support chan-switch with channel contexts\n"); goto drop_connection; } if (drv_pre_channel_switch(sdata, &ch_switch)) { - sdata_info(sdata, - "preparing for channel switch failed, disconnecting\n"); + link_info(link, + "preparing for channel switch failed, disconnecting\n"); goto drop_connection; } - res = ieee80211_link_reserve_chanctx(link, &csa_ie.chanreq, - chanctx->mode, false); - if (res) { - sdata_info(sdata, - "failed to reserve channel context for channel switch, disconnecting (err=%d)\n", - res); - goto drop_connection; + if (chanctx) { + res = ieee80211_link_reserve_chanctx(link, &csa_ie.chanreq, + chanctx->mode, false); + if (res) { + link_info(link, + "failed to reserve channel context for channel switch, disconnecting (err=%d)\n", + res); + goto drop_connection; + } } link->conf->csa_active = true; @@ -2379,18 +2545,28 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, link->link_id, csa_ie.count, csa_ie.mode); - if (local->ops->channel_switch) { - /* use driver's channel switch callback */ + /* we may have to handle timeout for deactivated link in software */ + now = jiffies; + link->u.mgd.csa_time = now + + TU_TO_JIFFIES((max_t(int, csa_ie.count, 1) - 1) * + link->conf->beacon_int); + + if (ieee80211_vif_link_active(&sdata->vif, link->link_id) && + local->ops->channel_switch) { + /* + * Use driver's channel switch callback, the driver will + * later call ieee80211_chswitch_done(). It may deactivate + * the link as well, we handle that elsewhere and queue + * the chswitch_work for the calculated time then. + */ drv_channel_switch(local, sdata, &ch_switch); return; } /* channel switch handled in software */ - timeout = TU_TO_JIFFIES((max_t(int, csa_ie.count, 1) - 1) * - cbss->beacon_interval); wiphy_delayed_work_queue(local->hw.wiphy, &link->u.mgd.chswitch_work, - timeout); + link->u.mgd.csa_time - now); return; drop_connection: /* @@ -6328,6 +6504,110 @@ static void ieee80211_process_adv_ttlm(struct ieee80211_sub_if_data *sdata, } } +static void +ieee80211_mgd_check_cross_link_csa(struct ieee80211_sub_if_data *sdata, + int reporting_link_id, + struct ieee802_11_elems *elems) +{ + const struct element *sta_profiles[IEEE80211_MLD_MAX_NUM_LINKS] = {}; + ssize_t sta_profiles_len[IEEE80211_MLD_MAX_NUM_LINKS] = {}; + const struct element *sub; + const u8 *subelems; + size_t subelems_len; + u8 common_size; + int link_id; + + if (!ieee80211_mle_size_ok((u8 *)elems->ml_basic, elems->ml_basic_len)) + return; + + common_size = ieee80211_mle_common_size((u8 *)elems->ml_basic); + subelems = (u8 *)elems->ml_basic + common_size; + subelems_len = elems->ml_basic_len - common_size; + + for_each_element_id(sub, IEEE80211_MLE_SUBELEM_PER_STA_PROFILE, + subelems, subelems_len) { + struct ieee80211_mle_per_sta_profile *prof = (void *)sub->data; + struct ieee80211_link_data *link; + ssize_t len; + + if (!ieee80211_mle_basic_sta_prof_size_ok(sub->data, + sub->datalen)) + continue; + + link_id = le16_get_bits(prof->control, + IEEE80211_MLE_STA_CONTROL_LINK_ID); + /* need a valid link ID, but also not our own, both AP bugs */ + if (link_id == reporting_link_id || + link_id >= IEEE80211_MLD_MAX_NUM_LINKS) + continue; + + link = sdata_dereference(sdata->link[link_id], sdata); + if (!link) + continue; + + len = cfg80211_defragment_element(sub, subelems, subelems_len, + NULL, 0, + IEEE80211_MLE_SUBELEM_FRAGMENT); + if (WARN_ON(len < 0)) + continue; + + sta_profiles[link_id] = sub; + sta_profiles_len[link_id] = len; + } + + for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { + struct ieee80211_mle_per_sta_profile *prof; + struct ieee802_11_elems *prof_elems; + struct ieee80211_link_data *link; + ssize_t len; + + if (link_id == reporting_link_id) + continue; + + link = sdata_dereference(sdata->link[link_id], sdata); + if (!link) + continue; + + if (!sta_profiles[link_id]) { + prof_elems = NULL; + goto handle; + } + + /* we can defragment in-place, won't use the buffer again */ + len = cfg80211_defragment_element(sta_profiles[link_id], + subelems, subelems_len, + (void *)sta_profiles[link_id], + sta_profiles_len[link_id], + IEEE80211_MLE_SUBELEM_FRAGMENT); + if (WARN_ON(len != sta_profiles_len[link_id])) + continue; + + prof = (void *)sta_profiles[link_id]; + prof_elems = ieee802_11_parse_elems(prof->variable + + (prof->sta_info_len - 1), + len - + (prof->sta_info_len - 1), + false, NULL); + + /* memory allocation failed - let's hope that's transient */ + if (!prof_elems) + continue; + +handle: + /* + * FIXME: the timings here are obviously incorrect, + * but only older Intel drivers seem to care, and + * those don't have MLO. If you really need this, + * the problem is having to calculate it with the + * TSF offset etc. The device_timestamp is still + * correct, of course. + */ + ieee80211_sta_process_chanswitch(link, 0, 0, elems, prof_elems, + IEEE80211_CSA_SOURCE_OTHER_LINK); + kfree(prof_elems); + } +} + static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, struct ieee80211_hdr *hdr, size_t len, struct ieee80211_rx_status *rx_status) @@ -6552,7 +6832,11 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, ieee80211_sta_process_chanswitch(link, rx_status->mactime, rx_status->device_timestamp, - elems, true); + elems, elems, + IEEE80211_CSA_SOURCE_BEACON); + + /* note that after this elems->ml_basic can no longer be used fully */ + ieee80211_mgd_check_cross_link_csa(sdata, rx_status->link_id, elems); if (!link->u.mgd.disable_wmm_tracking && ieee80211_sta_wmm_params(local, link, elems->wmm_param, @@ -7148,7 +7432,8 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, ieee80211_sta_process_chanswitch(link, rx_status->mactime, rx_status->device_timestamp, - elems, false); + elems, elems, + IEEE80211_CSA_SOURCE_ACTION); kfree(elems); } else if (mgmt->u.action.category == WLAN_CATEGORY_PUBLIC) { struct ieee802_11_elems *elems; @@ -7176,7 +7461,8 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, ieee80211_sta_process_chanswitch(link, rx_status->mactime, rx_status->device_timestamp, - elems, false); + elems, elems, + IEEE80211_CSA_SOURCE_ACTION); } kfree(elems); -- cgit v1.2.3-58-ga151 From 344d18cec23142b3921e068a90f58e8fc5f7637a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 6 May 2024 21:54:49 +0200 Subject: wifi: mac80211: collect some CSA data into sub-structs Collect the CSA data in ieee80211_link_data_managed and ieee80211_link_data into a csa sub-struct to clean up a bit and make adding new things more obvious. Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240506215543.29f954b1f576.I9a683a9647c33d4dd3011aade6677982428c1082@changeid Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 16 +++++------ net/mac80211/chan.c | 4 +-- net/mac80211/ibss.c | 6 ++-- net/mac80211/ieee80211_i.h | 18 +++++++----- net/mac80211/iface.c | 4 +-- net/mac80211/link.c | 8 +++--- net/mac80211/mlme.c | 70 +++++++++++++++++++++++----------------------- 7 files changed, 65 insertions(+), 61 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 0ed7e8bbf5c0..3319c9360507 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3633,10 +3633,10 @@ void ieee80211_csa_finish(struct ieee80211_vif *vif, unsigned int link_id) continue; wiphy_work_queue(iter->local->hw.wiphy, - &iter->deflink.csa_finalize_work); + &iter->deflink.csa.finalize_work); } } - wiphy_work_queue(local->hw.wiphy, &link_data->csa_finalize_work); + wiphy_work_queue(local->hw.wiphy, &link_data->csa.finalize_work); rcu_read_unlock(); } @@ -3723,7 +3723,7 @@ static int __ieee80211_csa_finalize(struct ieee80211_link_data *link_data) } if (!cfg80211_chandef_identical(&link_conf->chanreq.oper, - &link_data->csa_chanreq.oper)) + &link_data->csa.chanreq.oper)) return -EINVAL; link_conf->csa_active = false; @@ -3744,7 +3744,7 @@ static int __ieee80211_csa_finalize(struct ieee80211_link_data *link_data) if (err) return err; - cfg80211_ch_switch_notify(sdata->dev, &link_data->csa_chanreq.oper, + cfg80211_ch_switch_notify(sdata->dev, &link_data->csa.chanreq.oper, link_data->link_id); return 0; @@ -3765,7 +3765,7 @@ static void ieee80211_csa_finalize(struct ieee80211_link_data *link_data) void ieee80211_csa_finalize_work(struct wiphy *wiphy, struct wiphy_work *work) { struct ieee80211_link_data *link = - container_of(work, struct ieee80211_link_data, csa_finalize_work); + container_of(work, struct ieee80211_link_data, csa.finalize_work); struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_local *local = sdata->local; @@ -4012,7 +4012,7 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, goto out; } - link_data->csa_chanreq = chanreq; + link_data->csa.chanreq = chanreq; link_conf->csa_active = true; if (params->block_tx && @@ -4023,12 +4023,12 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, } cfg80211_ch_switch_started_notify(sdata->dev, - &link_data->csa_chanreq.oper, 0, + &link_data->csa.chanreq.oper, 0, params->count, params->block_tx); if (changed) { ieee80211_link_info_change_notify(sdata, link_data, changed); - drv_channel_switch_beacon(sdata, &link_data->csa_chanreq.oper); + drv_channel_switch_beacon(sdata, &link_data->csa.chanreq.oper); } else { /* if the beacon didn't change, we can finalize immediately */ ieee80211_csa_finalize(link_data); diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 380695fdc32f..ec16d7676088 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -1162,11 +1162,11 @@ ieee80211_link_chanctx_reservation_complete(struct ieee80211_link_data *link) case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_OCB: wiphy_work_queue(sdata->local->hw.wiphy, - &link->csa_finalize_work); + &link->csa.finalize_work); break; case NL80211_IFTYPE_STATION: wiphy_delayed_work_queue(sdata->local->hw.wiphy, - &link->u.mgd.chswitch_work, 0); + &link->u.mgd.csa.switch_work, 0); break; case NL80211_IFTYPE_UNSPECIFIED: case NL80211_IFTYPE_AP_VLAN: diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 7ace5cdc6c26..bf338f3d4dd3 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -9,7 +9,7 @@ * Copyright 2009, Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright(c) 2016 Intel Deutschland GmbH - * Copyright(c) 2018-2023 Intel Corporation + * Copyright(c) 2018-2024 Intel Corporation */ #include @@ -533,12 +533,12 @@ int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata, u64 *changed) IEEE80211_PRIVACY(ifibss->privacy)); /* XXX: should not really modify cfg80211 data */ if (cbss) { - cbss->channel = sdata->deflink.csa_chanreq.oper.chan; + cbss->channel = sdata->deflink.csa.chanreq.oper.chan; cfg80211_put_bss(sdata->local->hw.wiphy, cbss); } } - ifibss->chandef = sdata->deflink.csa_chanreq.oper; + ifibss->chandef = sdata->deflink.csa.chanreq.oper; /* generate the beacon */ return ieee80211_ibss_csa_beacon(sdata, NULL, changed); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 4ce34e95902e..cba1c2c3d9f1 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -975,11 +975,13 @@ struct ieee80211_link_data_managed { bool disable_wmm_tracking; bool operating_11g_mode; - bool csa_waiting_bcn; - bool csa_ignored_same_chan; - bool csa_blocked_tx; - unsigned long csa_time; - struct wiphy_delayed_work chswitch_work; + struct { + struct wiphy_delayed_work switch_work; + unsigned long time; + bool waiting_bcn; + bool ignored_same_chan; + bool blocked_tx; + } csa; struct wiphy_work request_smps_work; /* used to reconfigure hardware SM PS */ @@ -1038,11 +1040,13 @@ struct ieee80211_link_data { struct ieee80211_key __rcu *default_mgmt_key; struct ieee80211_key __rcu *default_beacon_key; - struct wiphy_work csa_finalize_work; bool operating_11g_mode; - struct ieee80211_chan_req csa_chanreq; + struct { + struct wiphy_work finalize_work; + struct ieee80211_chan_req chanreq; + } csa; struct wiphy_work color_change_finalize_work; struct delayed_work color_collision_detect_work; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 1ef4b6dc3730..d1a49ee4a194 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -543,14 +543,14 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do sdata->vif.bss_conf.csa_active = false; if (sdata->vif.type == NL80211_IFTYPE_STATION) - sdata->deflink.u.mgd.csa_waiting_bcn = false; + sdata->deflink.u.mgd.csa.waiting_bcn = false; if (sdata->csa_blocked_queues) { ieee80211_wake_vif_queues(local, sdata, IEEE80211_QUEUE_STOP_REASON_CSA); sdata->csa_blocked_queues = false; } - wiphy_work_cancel(local->hw.wiphy, &sdata->deflink.csa_finalize_work); + wiphy_work_cancel(local->hw.wiphy, &sdata->deflink.csa.finalize_work); wiphy_work_cancel(local->hw.wiphy, &sdata->deflink.color_change_finalize_work); wiphy_delayed_work_cancel(local->hw.wiphy, diff --git a/net/mac80211/link.c b/net/mac80211/link.c index 508759cc9979..2e6e92defbca 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -37,7 +37,7 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata, link_conf->link_id = link_id; link_conf->vif = &sdata->vif; - wiphy_work_init(&link->csa_finalize_work, + wiphy_work_init(&link->csa.finalize_work, ieee80211_csa_finalize_work); wiphy_work_init(&link->color_change_finalize_work, ieee80211_color_change_finalize_work); @@ -72,7 +72,7 @@ void ieee80211_link_stop(struct ieee80211_link_data *link) cancel_delayed_work_sync(&link->color_collision_detect_work); wiphy_work_cancel(link->sdata->local->hw.wiphy, - &link->csa_finalize_work); + &link->csa.finalize_work); ieee80211_link_release_channel(link); } @@ -366,8 +366,8 @@ static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata, */ if (link->conf->csa_active) wiphy_delayed_work_queue(local->hw.wiphy, - &link->u.mgd.chswitch_work, - link->u.mgd.csa_time - + &link->u.mgd.csa.switch_work, + link->u.mgd.csa.time - jiffies); } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index aedd1ce1fb8d..75d53257dd97 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2057,12 +2057,12 @@ void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local, } /* spectrum management related things */ -static void ieee80211_chswitch_work(struct wiphy *wiphy, - struct wiphy_work *work) +static void ieee80211_csa_switch_work(struct wiphy *wiphy, + struct wiphy_work *work) { struct ieee80211_link_data *link = container_of(work, struct ieee80211_link_data, - u.mgd.chswitch_work.work); + u.mgd.csa.switch_work.work); struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; @@ -2085,8 +2085,8 @@ static void ieee80211_chswitch_work(struct wiphy *wiphy, * update cfg80211 directly. */ if (!ieee80211_vif_link_active(&sdata->vif, link->link_id)) { - link->conf->chanreq = link->csa_chanreq; - cfg80211_ch_switch_notify(sdata->dev, &link->csa_chanreq.oper, + link->conf->chanreq = link->csa.chanreq; + cfg80211_ch_switch_notify(sdata->dev, &link->csa.chanreq.oper, link->link_id); return; } @@ -2119,7 +2119,7 @@ static void ieee80211_chswitch_work(struct wiphy *wiphy, } if (!ieee80211_chanreq_identical(&link->conf->chanreq, - &link->csa_chanreq)) { + &link->csa.chanreq)) { link_info(link, "failed to finalize channel switch, disconnecting\n"); wiphy_work_queue(sdata->local->hw.wiphy, @@ -2127,7 +2127,7 @@ static void ieee80211_chswitch_work(struct wiphy *wiphy, return; } - link->u.mgd.csa_waiting_bcn = true; + link->u.mgd.csa.waiting_bcn = true; ieee80211_sta_reset_beacon_monitor(sdata); ieee80211_sta_reset_conn_monitor(sdata); @@ -2151,8 +2151,8 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_link_data *link) } link->conf->csa_active = false; - link->u.mgd.csa_blocked_tx = false; - link->u.mgd.csa_waiting_bcn = false; + link->u.mgd.csa.blocked_tx = false; + link->u.mgd.csa.waiting_bcn = false; ret = drv_post_channel_switch(link); if (ret) { @@ -2192,7 +2192,7 @@ void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success, } wiphy_delayed_work_queue(sdata->local->hw.wiphy, - &link->u.mgd.chswitch_work, 0); + &link->u.mgd.csa.switch_work, 0); } rcu_read_unlock(); @@ -2219,7 +2219,7 @@ ieee80211_sta_abort_chanswitch(struct ieee80211_link_data *link) } link->conf->csa_active = false; - link->u.mgd.csa_blocked_tx = false; + link->u.mgd.csa.blocked_tx = false; drv_abort_channel_switch(link); } @@ -2324,7 +2324,7 @@ ieee80211_sta_other_link_csa_disappeared(struct ieee80211_link_data *link, * puncturing configuration, but we don't have enough data to * detect that. */ - if (data.chan != link->csa_chanreq.oper.chan) + if (data.chan != link->csa.chanreq.oper.chan) ieee80211_sta_abort_chanswitch(link); } @@ -2398,7 +2398,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, /* already processing - disregard action frames */ return; case IEEE80211_CSA_SOURCE_BEACON: - if (link->u.mgd.csa_waiting_bcn) { + if (link->u.mgd.csa.waiting_bcn) { ieee80211_chswitch_post_beacon(link); /* * If the CSA is still present after the switch @@ -2421,7 +2421,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, return; /* switch work ran, so just complete the process */ - if (link->u.mgd.csa_waiting_bcn) { + if (link->u.mgd.csa.waiting_bcn) { ieee80211_chswitch_post_beacon(link); /* * If the CSA is still present after the switch @@ -2476,12 +2476,12 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, if (cfg80211_chandef_identical(&csa_ie.chanreq.oper, &link->conf->chanreq.oper) && (!csa_ie.mode || source != IEEE80211_CSA_SOURCE_BEACON)) { - if (link->u.mgd.csa_ignored_same_chan) + if (link->u.mgd.csa.ignored_same_chan) return; link_info(link, "AP %pM tries to chanswitch to same channel, ignore\n", link->u.mgd.bssid); - link->u.mgd.csa_ignored_same_chan = true; + link->u.mgd.csa.ignored_same_chan = true; return; } @@ -2529,10 +2529,10 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, } link->conf->csa_active = true; - link->csa_chanreq = csa_ie.chanreq; - link->u.mgd.csa_ignored_same_chan = false; + link->csa.chanreq = csa_ie.chanreq; + link->u.mgd.csa.ignored_same_chan = false; link->u.mgd.beacon_crc_valid = false; - link->u.mgd.csa_blocked_tx = csa_ie.mode; + link->u.mgd.csa.blocked_tx = csa_ie.mode; if (csa_ie.mode && !ieee80211_hw_check(&local->hw, HANDLES_QUIET_CSA)) { @@ -2547,7 +2547,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, /* we may have to handle timeout for deactivated link in software */ now = jiffies; - link->u.mgd.csa_time = now + + link->u.mgd.csa.time = now + TU_TO_JIFFIES((max_t(int, csa_ie.count, 1) - 1) * link->conf->beacon_int); @@ -2557,7 +2557,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, * Use driver's channel switch callback, the driver will * later call ieee80211_chswitch_done(). It may deactivate * the link as well, we handle that elsewhere and queue - * the chswitch_work for the calculated time then. + * the csa.switch_work for the calculated time then. */ drv_channel_switch(local, sdata, &ch_switch); return; @@ -2565,8 +2565,8 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, /* channel switch handled in software */ wiphy_delayed_work_queue(local->hw.wiphy, - &link->u.mgd.chswitch_work, - link->u.mgd.csa_time - now); + &link->u.mgd.csa.switch_work, + link->u.mgd.csa.time - now); return; drop_connection: /* @@ -2577,7 +2577,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, * reset when the disconnection worker runs. */ link->conf->csa_active = true; - link->u.mgd.csa_blocked_tx = csa_ie.mode; + link->u.mgd.csa.blocked_tx = csa_ie.mode; sdata->csa_blocked_queues = csa_ie.mode && !ieee80211_hw_check(&local->hw, HANDLES_QUIET_CSA); @@ -3630,9 +3630,9 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, } sdata->vif.bss_conf.csa_active = false; - sdata->deflink.u.mgd.csa_blocked_tx = false; - sdata->deflink.u.mgd.csa_waiting_bcn = false; - sdata->deflink.u.mgd.csa_ignored_same_chan = false; + sdata->deflink.u.mgd.csa.blocked_tx = false; + sdata->deflink.u.mgd.csa.waiting_bcn = false; + sdata->deflink.u.mgd.csa.ignored_same_chan = false; if (sdata->csa_blocked_queues) { ieee80211_wake_vif_queues(local, sdata, IEEE80211_QUEUE_STOP_REASON_CSA); @@ -3960,7 +3960,7 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) if (WARN_ON_ONCE(!link)) continue; - if (link->u.mgd.csa_blocked_tx) + if (link->u.mgd.csa.blocked_tx) continue; tx = true; @@ -3997,8 +3997,8 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) tx, frame_buf); /* the other links will be destroyed */ sdata->vif.bss_conf.csa_active = false; - sdata->deflink.u.mgd.csa_waiting_bcn = false; - sdata->deflink.u.mgd.csa_blocked_tx = false; + sdata->deflink.u.mgd.csa.waiting_bcn = false; + sdata->deflink.u.mgd.csa.blocked_tx = false; if (sdata->csa_blocked_queues) { ieee80211_wake_vif_queues(local, sdata, IEEE80211_QUEUE_STOP_REASON_CSA); @@ -7785,7 +7785,7 @@ static void ieee80211_sta_bcn_mon_timer(struct timer_list *t) return; if (sdata->vif.bss_conf.csa_active && - !sdata->deflink.u.mgd.csa_waiting_bcn) + !sdata->deflink.u.mgd.csa.waiting_bcn) return; if (sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER) @@ -7809,7 +7809,7 @@ static void ieee80211_sta_conn_mon_timer(struct timer_list *t) return; if (sdata->vif.bss_conf.csa_active && - !sdata->deflink.u.mgd.csa_waiting_bcn) + !sdata->deflink.u.mgd.csa.waiting_bcn) return; sta = sta_info_get(sdata, sdata->vif.cfg.ap_addr); @@ -8020,8 +8020,8 @@ void ieee80211_mgd_setup_link(struct ieee80211_link_data *link) else link->u.mgd.req_smps = IEEE80211_SMPS_OFF; - wiphy_delayed_work_init(&link->u.mgd.chswitch_work, - ieee80211_chswitch_work); + wiphy_delayed_work_init(&link->u.mgd.csa.switch_work, + ieee80211_csa_switch_work); ieee80211_clear_tpe(&link->conf->tpe); @@ -9152,7 +9152,7 @@ void ieee80211_mgd_stop_link(struct ieee80211_link_data *link) wiphy_work_cancel(link->sdata->local->hw.wiphy, &link->u.mgd.recalc_smps); wiphy_delayed_work_cancel(link->sdata->local->hw.wiphy, - &link->u.mgd.chswitch_work); + &link->u.mgd.csa.switch_work); } void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata) -- cgit v1.2.3-58-ga151 From f81747a9ad2e773ee72d881717e859a9f7a01c76 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 6 May 2024 21:54:50 +0200 Subject: wifi: mac80211: handle wider bandwidth OFDMA during CSA During channel switch, track the AP configuration in the chanreq, so that wider bandwidth OFDMA is taken into account correctly, since multiple channel contexts may be needed due to sharing not being possible due to wider bandwidth OFDMA. Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240506215543.b2c5a72dac1b.I69f65cb2e75d4a49a174b1aede68bf8ff0a3cab3@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 8 ++++++-- net/mac80211/spectmgmt.c | 5 ++++- 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 75d53257dd97..1d0b414619a4 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2517,8 +2517,13 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, goto drop_connection; } + link->csa.chanreq = csa_ie.chanreq; + if (link->u.mgd.conn.mode < IEEE80211_CONN_MODE_EHT || + sdata->vif.driver_flags & IEEE80211_VIF_IGNORE_OFDMA_WIDER_BW) + link->csa.chanreq.ap.chan = NULL; + if (chanctx) { - res = ieee80211_link_reserve_chanctx(link, &csa_ie.chanreq, + res = ieee80211_link_reserve_chanctx(link, &link->csa.chanreq, chanctx->mode, false); if (res) { link_info(link, @@ -2529,7 +2534,6 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, } link->conf->csa_active = true; - link->csa.chanreq = csa_ie.chanreq; link->u.mgd.csa.ignored_same_chan = false; link->u.mgd.beacon_crc_valid = false; link->u.mgd.csa.blocked_tx = csa_ie.mode; diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index b2de4c6fb808..df96d3db1c0e 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu * Copyright 2007-2008, Intel Corporation * Copyright 2008, Johannes Berg - * Copyright (C) 2018, 2020, 2022-2023 Intel Corporation + * Copyright (C) 2018, 2020, 2022-2024 Intel Corporation */ #include @@ -366,6 +366,9 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, validate_chandef_by_ht_vht_oper(sdata, conn, vht_cap_info, &new_chandef); + /* capture the AP chandef before (potential) downgrading */ + csa_ie->chanreq.ap = new_chandef; + /* if data is there validate the bandwidth & use it */ if (new_chandef.chan) { if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_320 && -- cgit v1.2.3-58-ga151 From 4540568136fefe0fcc366ded125cc56b0d9e1724 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 6 May 2024 21:54:51 +0200 Subject: wifi: mac80211: handle TPE element during CSA Handle the transmit power envelope (TPE) element during channel switch, applying it when the channel switch is done. Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240506215543.486c33157d18.Idf971ad801b6961c177bdf42cc323fd1a4ca8165@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 3 +++ net/mac80211/mlme.c | 18 ++++++++++++++++++ net/mac80211/parse.c | 8 ++++++++ 3 files changed, 29 insertions(+) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index cba1c2c3d9f1..76965d64a0fa 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -977,6 +977,8 @@ struct ieee80211_link_data_managed { struct { struct wiphy_delayed_work switch_work; + struct cfg80211_chan_def ap_chandef; + struct ieee80211_parsed_tpe tpe; unsigned long time; bool waiting_bcn; bool ignored_same_chan; @@ -1754,6 +1756,7 @@ struct ieee802_11_elems { /* not the order in the psd values is per element, not per chandef */ struct ieee80211_parsed_tpe tpe; + struct ieee80211_parsed_tpe csa_tpe; /* length of them, respectively */ u8 ext_capab_len; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 1d0b414619a4..18793f2fb91c 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2129,6 +2129,20 @@ static void ieee80211_csa_switch_work(struct wiphy *wiphy, link->u.mgd.csa.waiting_bcn = true; + /* apply new TPE restrictions immediately on the new channel */ + if (link->u.mgd.csa.ap_chandef.chan->band == NL80211_BAND_6GHZ && + link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HE) { + ieee80211_rearrange_tpe(&link->u.mgd.csa.tpe, + &link->u.mgd.csa.ap_chandef, + &link->conf->chanreq.oper); + if (memcmp(&link->conf->tpe, &link->u.mgd.csa.tpe, + sizeof(link->u.mgd.csa.tpe))) { + link->conf->tpe = link->u.mgd.csa.tpe; + ieee80211_link_info_change_notify(sdata, link, + BSS_CHANGED_TPE); + } + } + ieee80211_sta_reset_beacon_monitor(sdata); ieee80211_sta_reset_conn_monitor(sdata); } @@ -2379,6 +2393,8 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, ch_switch.count = csa_ie.count; ch_switch.delay = csa_ie.max_switch_time; } + + link->u.mgd.csa.tpe = csa_elems->csa_tpe; } else { /* * If there was no per-STA profile for this link, we @@ -2517,6 +2533,8 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, goto drop_connection; } + link->u.mgd.csa.ap_chandef = csa_ie.chanreq.ap; + link->csa.chanreq = csa_ie.chanreq; if (link->u.mgd.conn.mode < IEEE80211_CONN_MODE_EHT || sdata->vif.driver_flags & IEEE80211_VIF_IGNORE_OFDMA_WIDER_BW) diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c index 6efeb977f8e5..5c5c21ecb2b7 100644 --- a/net/mac80211/parse.c +++ b/net/mac80211/parse.c @@ -607,6 +607,13 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, elem_parse_failed = IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; } + + subelem = cfg80211_find_ext_elem(WLAN_EID_TX_POWER_ENVELOPE, + pos, elen); + if (subelem) + ieee80211_parse_tpe(&elems->csa_tpe, + subelem->data + 1, + subelem->datalen - 1); break; case WLAN_EID_COUNTRY: elems->country_elem = pos; @@ -962,6 +969,7 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) /* set all TPE entries to unlimited (but invalid) */ ieee80211_clear_tpe(&elems->tpe); + ieee80211_clear_tpe(&elems->csa_tpe); nontransmitted_profile = elems_parse->scratch_pos; nontransmitted_profile_len = -- cgit v1.2.3-58-ga151 From 9f472520f6f7b8ed3cf82d800b75c91c4c3d4ef1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 6 May 2024 21:54:52 +0200 Subject: wifi: mac80211: refactor chanreq.ap setting There are now three places setting up chanreq.ap which always depends on the mode (EHT being used or not) and override flag. Refactor that code into a common function with a comment, to make that clearer. Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240506215543.5cd6a209e58a.I3be318959d9e2df5dccd2d0938c3d2fcc6688030@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 43 +++++++++++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 18793f2fb91c..a974a75df266 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -789,6 +789,29 @@ static void ieee80211_rearrange_tpe(struct ieee80211_parsed_tpe *tpe, } } +/* + * The AP part of the channel request is used to distinguish settings + * to the device used for wider bandwidth OFDMA. This is used in the + * channel context code to assign two channel contexts even if they're + * both for the same channel, if the AP bandwidths are incompatible. + * If not EHT (or driver override) then ap.chan == NULL indicates that + * there's no wider BW OFDMA used. + */ +static void ieee80211_set_chanreq_ap(struct ieee80211_sub_if_data *sdata, + struct ieee80211_chan_req *chanreq, + struct ieee80211_conn_settings *conn, + struct cfg80211_chan_def *ap_chandef) +{ + chanreq->ap.chan = NULL; + + if (conn->mode < IEEE80211_CONN_MODE_EHT) + return; + if (sdata->vif.driver_flags & IEEE80211_VIF_IGNORE_OFDMA_WIDER_BW) + return; + + chanreq->ap = *ap_chandef; +} + static struct ieee802_11_elems * ieee80211_determine_chan_mode(struct ieee80211_sub_if_data *sdata, struct ieee80211_conn_settings *conn, @@ -886,12 +909,7 @@ again: chanreq->oper = *ap_chandef; - /* wider-bandwidth OFDMA is only done in EHT */ - if (conn->mode >= IEEE80211_CONN_MODE_EHT && - !(sdata->vif.driver_flags & IEEE80211_VIF_IGNORE_OFDMA_WIDER_BW)) - chanreq->ap = *ap_chandef; - else - chanreq->ap.chan = NULL; + ieee80211_set_chanreq_ap(sdata, chanreq, conn, ap_chandef); while (!ieee80211_chandef_usable(sdata, &chanreq->oper, IEEE80211_CHAN_DISABLED)) { @@ -999,11 +1017,9 @@ static int ieee80211_config_bw(struct ieee80211_link_data *link, return -EINVAL; } - chanreq.ap = ap_chandef; chanreq.oper = ap_chandef; - if (link->u.mgd.conn.mode < IEEE80211_CONN_MODE_EHT || - sdata->vif.driver_flags & IEEE80211_VIF_IGNORE_OFDMA_WIDER_BW) - chanreq.ap.chan = NULL; + ieee80211_set_chanreq_ap(sdata, &chanreq, &link->u.mgd.conn, + &ap_chandef); /* * if HT operation mode changed store the new one - @@ -2535,10 +2551,9 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, link->u.mgd.csa.ap_chandef = csa_ie.chanreq.ap; - link->csa.chanreq = csa_ie.chanreq; - if (link->u.mgd.conn.mode < IEEE80211_CONN_MODE_EHT || - sdata->vif.driver_flags & IEEE80211_VIF_IGNORE_OFDMA_WIDER_BW) - link->csa.chanreq.ap.chan = NULL; + link->csa.chanreq.oper = csa_ie.chanreq.oper; + ieee80211_set_chanreq_ap(sdata, &link->csa.chanreq, &link->u.mgd.conn, + &csa_ie.chanreq.ap); if (chanctx) { res = ieee80211_link_reserve_chanctx(link, &link->csa.chanreq, -- cgit v1.2.3-58-ga151 From f9a0757a4b2f5df4376963c25a3d7d7aeba78444 Mon Sep 17 00:00:00 2001 From: Pradeep Kumar Chitrapu Date: Wed, 15 May 2024 11:13:18 -0700 Subject: wifi: mac80211: Add EHT UL MU-MIMO flag in ieee80211_bss_conf Add flag for Full Bandwidth UL MU-MIMO for EHT. This is utilized to pass EHT MU-MIMO configurations from user space to driver in AP mode. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.0.1-00029-QCAHKSWPL_SILICONZ-1 Signed-off-by: Pradeep Kumar Chitrapu Link: https://msgid.link/20240515181327.12855-2-quic_pradeepc@quicinc.com Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 ++++ net/mac80211/cfg.c | 5 +++++ 2 files changed, 9 insertions(+) (limited to 'net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a59eacfe0480..ecfa65ade226 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -735,6 +735,9 @@ struct ieee80211_parsed_tpe { * beamformee * @eht_mu_beamformer: in AP-mode, does this BSS enable operation as an EHT MU * beamformer + * @eht_80mhz_full_bw_ul_mumimo: in AP-mode, does this BSS support the + * reception of an EHT TB PPDU on an RU that spans the entire PPDU + * bandwidth */ struct ieee80211_bss_conf { struct ieee80211_vif *vif; @@ -828,6 +831,7 @@ struct ieee80211_bss_conf { bool eht_su_beamformer; bool eht_su_beamformee; bool eht_mu_beamformer; + bool eht_80mhz_full_bw_ul_mumimo; }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 3319c9360507..62119e957cd8 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1379,6 +1379,11 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, (IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_80MHZ | IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_160MHZ | IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_320MHZ); + link_conf->eht_80mhz_full_bw_ul_mumimo = + params->eht_cap->fixed.phy_cap_info[7] & + (IEEE80211_EHT_PHY_CAP7_NON_OFDMA_UL_MU_MIMO_80MHZ | + IEEE80211_EHT_PHY_CAP7_NON_OFDMA_UL_MU_MIMO_160MHZ | + IEEE80211_EHT_PHY_CAP7_NON_OFDMA_UL_MU_MIMO_320MHZ); } else { link_conf->eht_su_beamformer = false; link_conf->eht_su_beamformee = false; -- cgit v1.2.3-58-ga151 From 2fe0a605d083b884490ee4de02be071b5b4291b1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 6 May 2024 21:18:52 +0200 Subject: wifi: mac80211: fix TTLM teardown work The worker calculates the wrong sdata pointer, so if it ever runs, it'll crash. Fix that. Fixes: a17a58ad2ff2 ("wifi: mac80211: add support for tearing down negotiated TTLM") Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240506211853.e6471800c76d.I8b7c2d6984c89a11cd33d1a610e9645fa965f6e1@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index a974a75df266..a29c4a4deb0b 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -7333,7 +7333,7 @@ static void ieee80211_teardown_ttlm_work(struct wiphy *wiphy, u16 new_dormant_links; struct ieee80211_sub_if_data *sdata = container_of(work, struct ieee80211_sub_if_data, - u.mgd.neg_ttlm_timeout_work.work); + u.mgd.teardown_ttlm_work); if (!sdata->vif.neg_ttlm.valid) return; -- cgit v1.2.3-58-ga151 From 53b739fd46462dc40fd18390d76f2ee05c18ea3a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 6 May 2024 21:10:33 +0200 Subject: wifi: mac80211: cancel multi-link reconf work on disconnect This work shouldn't run after we're disconnecting. Cancel it earlier (and then don't cancel it in stop later.) Fixes: 8eb8dd2ffbbb ("wifi: mac80211: Support link removal using Reconfiguration ML element") Reviewed-by: Ilan Peer Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240506211034.ac754794279f.Ib9fbb1dab50c6b67f6de9be09a6c452ce89bbd50@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index a29c4a4deb0b..ab6b379d522b 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3694,6 +3694,11 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->neg_ttlm_timeout_work); + + sdata->u.mgd.removed_links = 0; + wiphy_delayed_work_cancel(sdata->local->hw.wiphy, + &sdata->u.mgd.ml_reconf_work); + ieee80211_vif_set_links(sdata, 0, 0); ifmgd->mcast_seq_last = IEEE80211_SN_MODULO; @@ -9211,8 +9216,6 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata) &ifmgd->teardown_ttlm_work); wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->tdls_peer_del_work); - wiphy_delayed_work_cancel(sdata->local->hw.wiphy, - &ifmgd->ml_reconf_work); wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->ttlm_work); wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->neg_ttlm_timeout_work); -- cgit v1.2.3-58-ga151 From 3567bd6dcd1467d2ad0f597be94114c6f9c62680 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 6 May 2024 21:10:34 +0200 Subject: wifi: mac80211: cancel TTLM teardown work earlier It shouldn't be possible to run this after disconnecting, so cancel the work earlier. Fixes: a17a58ad2ff2 ("wifi: mac80211: add support for tearing down negotiated TTLM") Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240506211034.096a10ccebec.I5584a21c27eb9b3e87b9e26380b627114b32ccba@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index ab6b379d522b..e0b44f47747b 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3699,6 +3699,9 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &sdata->u.mgd.ml_reconf_work); + wiphy_work_cancel(sdata->local->hw.wiphy, + &ifmgd->teardown_ttlm_work); + ieee80211_vif_set_links(sdata, 0, 0); ifmgd->mcast_seq_last = IEEE80211_SN_MODULO; @@ -9212,8 +9215,6 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata) &ifmgd->beacon_connection_loss_work); wiphy_work_cancel(sdata->local->hw.wiphy, &ifmgd->csa_connection_drop_work); - wiphy_work_cancel(sdata->local->hw.wiphy, - &ifmgd->teardown_ttlm_work); wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->tdls_peer_del_work); wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->ttlm_work); -- cgit v1.2.3-58-ga151 From 0d22026f3287ef5af80d983db150ff260e7117e8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 6 May 2024 21:10:35 +0200 Subject: wifi: mac80211: don't stop TTLM works again There's no need to stop works that have already been stopped during disconnect, so don't. Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240506211034.f8434be19f56.I021afadc538508da3bc8f95c89f424ca62b94bef@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index e0b44f47747b..09451a6f74b8 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -9217,9 +9217,6 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata) &ifmgd->csa_connection_drop_work); wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->tdls_peer_del_work); - wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->ttlm_work); - wiphy_delayed_work_cancel(sdata->local->hw.wiphy, - &ifmgd->neg_ttlm_timeout_work); if (ifmgd->assoc_data) ieee80211_destroy_assoc_data(sdata, ASSOC_TIMEOUT); -- cgit v1.2.3-58-ga151 From 609c12a2af046c3674af2d5c7978b025718de5e8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 6 May 2024 21:18:59 +0200 Subject: wifi: mac80211: reset negotiated TTLM on disconnect The negotiated TTLM data must be reset on disconnect, otherwise it may end up getting reused on another connection. Fix that. Fixes: 8f500fbc6c65 ("wifi: mac80211: process and save negotiated TID to Link mapping request") Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240506211858.04142e8fe01c.Ia144457e086ebd8ddcfa31bdf5ff210b4b351c22@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 09451a6f74b8..ef3280fafbe9 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3692,6 +3692,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, sizeof(sdata->u.mgd.ttlm_info)); wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->ttlm_work); + memset(&sdata->vif.neg_ttlm, 0, sizeof(sdata->vif.neg_ttlm)); wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->neg_ttlm_timeout_work); -- cgit v1.2.3-58-ga151 From a92fd2d9327ba877f29753eec15b93072ac300b0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 10 May 2024 11:26:02 +0200 Subject: wifi: mac80211: send DelBA with correct BSSID In MLO, the deflink BSSID is clearly invalid. Since we fill the addresses as MLD addresses and translate later, use the AP address here instead. This fixes an issue that happens with HW restart, where the DelBA frame is transmitted, but not processed correctly due to the wrong BSSID (or even just discarded entirely). As a result, the BA sessions are kept alive; however, as other state is reset during HW restart, this then fails (reorder, etc.) and data doesn't go through until new BA sessions are established. Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240510112601.f4e1effdea29.I98e81f22166b68d4b6211191bcaaf8531b324a77@changeid Signed-off-by: Johannes Berg --- net/mac80211/ht.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index d7e8cf8e48b7..79caeb485fd5 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -475,7 +475,7 @@ void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, sdata->vif.type == NL80211_IFTYPE_MESH_POINT) memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN); else if (sdata->vif.type == NL80211_IFTYPE_STATION) - memcpy(mgmt->bssid, sdata->deflink.u.mgd.bssid, ETH_ALEN); + memcpy(mgmt->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN); else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) memcpy(mgmt->bssid, sdata->u.ibss.bssid, ETH_ALEN); -- cgit v1.2.3-58-ga151 From 4d25ca2d6801cfcf26f7f39c561611ba5be99bf8 Mon Sep 17 00:00:00 2001 From: Abhishek Chauhan Date: Thu, 9 May 2024 14:18:32 -0700 Subject: net: Rename mono_delivery_time to tstamp_type for scalabilty mono_delivery_time was added to check if skb->tstamp has delivery time in mono clock base (i.e. EDT) otherwise skb->tstamp has timestamp in ingress and delivery_time at egress. Renaming the bitfield from mono_delivery_time to tstamp_type is for extensibilty for other timestamps such as userspace timestamp (i.e. SO_TXTIME) set via sock opts. As we are renaming the mono_delivery_time to tstamp_type, it makes sense to start assigning tstamp_type based on enum defined in this commit. Earlier we used bool arg flag to check if the tstamp is mono in function skb_set_delivery_time, Now the signature of the functions accepts tstamp_type to distinguish between mono and real time. Also skb_set_delivery_type_by_clockid is a new function which accepts clockid to determine the tstamp_type. In future tstamp_type:1 can be extended to support userspace timestamp by increasing the bitfield. Signed-off-by: Abhishek Chauhan Reviewed-by: Willem de Bruijn Reviewed-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20240509211834.3235191-2-quic_abchauha@quicinc.com Signed-off-by: Martin KaFai Lau --- include/linux/skbuff.h | 52 ++++++++++++++++++++++-------- include/net/inet_frag.h | 4 +-- net/bridge/netfilter/nf_conntrack_bridge.c | 6 ++-- net/core/dev.c | 2 +- net/core/filter.c | 10 +++--- net/ieee802154/6lowpan/reassembly.c | 2 +- net/ipv4/inet_fragment.c | 2 +- net/ipv4/ip_fragment.c | 2 +- net/ipv4/ip_output.c | 9 +++--- net/ipv4/tcp_output.c | 14 ++++---- net/ipv6/ip6_output.c | 6 ++-- net/ipv6/netfilter.c | 6 ++-- net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +- net/ipv6/reassembly.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- net/sched/act_bpf.c | 4 +-- net/sched/cls_bpf.c | 4 +-- 17 files changed, 78 insertions(+), 51 deletions(-) (limited to 'net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c0b97c93a6de..3a721cc3b644 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -706,6 +706,11 @@ typedef unsigned int sk_buff_data_t; typedef unsigned char *sk_buff_data_t; #endif +enum skb_tstamp_type { + SKB_CLOCK_REALTIME, + SKB_CLOCK_MONOTONIC, +}; + /** * DOC: Basic sk_buff geometry * @@ -823,10 +828,8 @@ typedef unsigned char *sk_buff_data_t; * @dst_pending_confirm: need to confirm neighbour * @decrypted: Decrypted SKB * @slow_gro: state present at GRO time, slower prepare step required - * @mono_delivery_time: When set, skb->tstamp has the - * delivery_time in mono clock base (i.e. EDT). Otherwise, the - * skb->tstamp has the (rcv) timestamp at ingress and - * delivery_time at egress. + * @tstamp_type: When set, skb->tstamp has the + * delivery_time clock base of skb->tstamp. * @napi_id: id of the NAPI struct this skb came from * @sender_cpu: (aka @napi_id) source CPU in XPS * @alloc_cpu: CPU which did the skb allocation. @@ -954,7 +957,7 @@ struct sk_buff { /* private: */ __u8 __mono_tc_offset[0]; /* public: */ - __u8 mono_delivery_time:1; /* See SKB_MONO_DELIVERY_TIME_MASK */ + __u8 tstamp_type:1; /* See skb_tstamp_type */ #ifdef CONFIG_NET_XGRESS __u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */ __u8 tc_skip_classify:1; @@ -4183,7 +4186,7 @@ static inline void skb_get_new_timestampns(const struct sk_buff *skb, static inline void __net_timestamp(struct sk_buff *skb) { skb->tstamp = ktime_get_real(); - skb->mono_delivery_time = 0; + skb->tstamp_type = SKB_CLOCK_REALTIME; } static inline ktime_t net_timedelta(ktime_t t) @@ -4192,10 +4195,33 @@ static inline ktime_t net_timedelta(ktime_t t) } static inline void skb_set_delivery_time(struct sk_buff *skb, ktime_t kt, - bool mono) + u8 tstamp_type) { skb->tstamp = kt; - skb->mono_delivery_time = kt && mono; + + if (kt) + skb->tstamp_type = tstamp_type; + else + skb->tstamp_type = SKB_CLOCK_REALTIME; +} + +static inline void skb_set_delivery_type_by_clockid(struct sk_buff *skb, + ktime_t kt, clockid_t clockid) +{ + u8 tstamp_type = SKB_CLOCK_REALTIME; + + switch (clockid) { + case CLOCK_REALTIME: + break; + case CLOCK_MONOTONIC: + tstamp_type = SKB_CLOCK_MONOTONIC; + break; + default: + WARN_ON_ONCE(1); + kt = 0; + } + + skb_set_delivery_time(skb, kt, tstamp_type); } DECLARE_STATIC_KEY_FALSE(netstamp_needed_key); @@ -4205,8 +4231,8 @@ DECLARE_STATIC_KEY_FALSE(netstamp_needed_key); */ static inline void skb_clear_delivery_time(struct sk_buff *skb) { - if (skb->mono_delivery_time) { - skb->mono_delivery_time = 0; + if (skb->tstamp_type) { + skb->tstamp_type = SKB_CLOCK_REALTIME; if (static_branch_unlikely(&netstamp_needed_key)) skb->tstamp = ktime_get_real(); else @@ -4216,7 +4242,7 @@ static inline void skb_clear_delivery_time(struct sk_buff *skb) static inline void skb_clear_tstamp(struct sk_buff *skb) { - if (skb->mono_delivery_time) + if (skb->tstamp_type) return; skb->tstamp = 0; @@ -4224,7 +4250,7 @@ static inline void skb_clear_tstamp(struct sk_buff *skb) static inline ktime_t skb_tstamp(const struct sk_buff *skb) { - if (skb->mono_delivery_time) + if (skb->tstamp_type) return 0; return skb->tstamp; @@ -4232,7 +4258,7 @@ static inline ktime_t skb_tstamp(const struct sk_buff *skb) static inline ktime_t skb_tstamp_cond(const struct sk_buff *skb, bool cond) { - if (!skb->mono_delivery_time && skb->tstamp) + if (skb->tstamp_type != SKB_CLOCK_MONOTONIC && skb->tstamp) return skb->tstamp; if (static_branch_unlikely(&netstamp_needed_key) || cond) diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 153960663ce4..5af6eb14c5db 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -76,7 +76,7 @@ struct frag_v6_compare_key { * @stamp: timestamp of the last received fragment * @len: total length of the original datagram * @meat: length of received fragments so far - * @mono_delivery_time: stamp has a mono delivery time (EDT) + * @tstamp_type: stamp has a mono delivery time (EDT) * @flags: fragment queue flags * @max_size: maximum received fragment size * @fqdir: pointer to struct fqdir @@ -97,7 +97,7 @@ struct inet_frag_queue { ktime_t stamp; int len; int meat; - u8 mono_delivery_time; + u8 tstamp_type; __u8 flags; u16 max_size; struct fqdir *fqdir; diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c index c3c51b9a6826..816bb0fde718 100644 --- a/net/bridge/netfilter/nf_conntrack_bridge.c +++ b/net/bridge/netfilter/nf_conntrack_bridge.c @@ -32,7 +32,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *)) { int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; - bool mono_delivery_time = skb->mono_delivery_time; + u8 tstamp_type = skb->tstamp_type; unsigned int hlen, ll_rs, mtu; ktime_t tstamp = skb->tstamp; struct ip_frag_state state; @@ -82,7 +82,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk, if (iter.frag) ip_fraglist_prepare(skb, &iter); - skb_set_delivery_time(skb, tstamp, mono_delivery_time); + skb_set_delivery_time(skb, tstamp, tstamp_type); err = output(net, sk, data, skb); if (err || !iter.frag) break; @@ -113,7 +113,7 @@ slow_path: goto blackhole; } - skb_set_delivery_time(skb2, tstamp, mono_delivery_time); + skb_set_delivery_time(skb2, tstamp, tstamp_type); err = output(net, sk, data, skb2); if (err) goto blackhole; diff --git a/net/core/dev.c b/net/core/dev.c index e1bb6d7856d9..85fe8138f3e4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2160,7 +2160,7 @@ EXPORT_SYMBOL(net_disable_timestamp); static inline void net_timestamp_set(struct sk_buff *skb) { skb->tstamp = 0; - skb->mono_delivery_time = 0; + skb->tstamp_type = SKB_CLOCK_REALTIME; if (static_branch_unlikely(&netstamp_needed_key)) skb->tstamp = ktime_get_real(); } diff --git a/net/core/filter.c b/net/core/filter.c index 2510464692af..a3781a796da4 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7730,13 +7730,13 @@ BPF_CALL_3(bpf_skb_set_tstamp, struct sk_buff *, skb, if (!tstamp) return -EINVAL; skb->tstamp = tstamp; - skb->mono_delivery_time = 1; + skb->tstamp_type = SKB_CLOCK_MONOTONIC; break; case BPF_SKB_TSTAMP_UNSPEC: if (tstamp) return -EINVAL; skb->tstamp = 0; - skb->mono_delivery_time = 0; + skb->tstamp_type = SKB_CLOCK_REALTIME; break; default: return -EINVAL; @@ -9443,7 +9443,7 @@ static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog, TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK); *insn++ = BPF_JMP32_IMM(BPF_JNE, tmp_reg, TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK, 2); - /* skb->tc_at_ingress && skb->mono_delivery_time, + /* skb->tc_at_ingress && skb->tstamp_type, * read 0 as the (rcv) timestamp. */ *insn++ = BPF_MOV64_IMM(value_reg, 0); @@ -9468,7 +9468,7 @@ static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog, * the bpf prog is aware the tstamp could have delivery time. * Thus, write skb->tstamp as is if tstamp_type_access is true. * Otherwise, writing at ingress will have to clear the - * mono_delivery_time bit also. + * skb->tstamp_type bit also. */ if (!prog->tstamp_type_access) { __u8 tmp_reg = BPF_REG_AX; @@ -9478,7 +9478,7 @@ static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog, *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, TC_AT_INGRESS_MASK, 1); /* goto */ *insn++ = BPF_JMP_A(2); - /* : mono_delivery_time */ + /* : skb->tstamp_type */ *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, ~SKB_MONO_DELIVERY_TIME_MASK); *insn++ = BPF_STX_MEM(BPF_B, skb_reg, tmp_reg, SKB_BF_MONO_TC_OFFSET); } diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index 56ef873828f4..867d637d86f0 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -130,7 +130,7 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq, goto err; fq->q.stamp = skb->tstamp; - fq->q.mono_delivery_time = skb->mono_delivery_time; + fq->q.tstamp_type = skb->tstamp_type; if (frag_type == LOWPAN_DISPATCH_FRAG1) fq->q.flags |= INET_FRAG_FIRST_IN; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index faaec92a46ac..d179a2c84222 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -619,7 +619,7 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, skb_mark_not_on_list(head); head->prev = NULL; head->tstamp = q->stamp; - head->mono_delivery_time = q->mono_delivery_time; + head->tstamp_type = q->tstamp_type; if (sk) refcount_add(sum_truesize - head_truesize, &sk->sk_wmem_alloc); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 08e2c92e25ab..a92664a5ef2e 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -355,7 +355,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) qp->iif = dev->ifindex; qp->q.stamp = skb->tstamp; - qp->q.mono_delivery_time = skb->mono_delivery_time; + qp->q.tstamp_type = skb->tstamp_type; qp->q.meat += skb->len; qp->ecn |= ecn; add_frag_mem_limit(qp->q.fqdir, skb->truesize); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 9500031a1f55..fe86cadfa85b 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -764,7 +764,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, { struct iphdr *iph; struct sk_buff *skb2; - bool mono_delivery_time = skb->mono_delivery_time; + u8 tstamp_type = skb->tstamp_type; struct rtable *rt = skb_rtable(skb); unsigned int mtu, hlen, ll_rs; struct ip_fraglist_iter iter; @@ -856,7 +856,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, } } - skb_set_delivery_time(skb, tstamp, mono_delivery_time); + skb_set_delivery_time(skb, tstamp, tstamp_type); err = output(net, sk, skb); if (!err) @@ -912,7 +912,7 @@ slow_path: /* * Put this fragment into the sending queue. */ - skb_set_delivery_time(skb2, tstamp, mono_delivery_time); + skb_set_delivery_time(skb2, tstamp, tstamp_type); err = output(net, sk, skb2); if (err) goto fail; @@ -1649,7 +1649,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, arg->csumoffset) = csum_fold(csum_add(nskb->csum, arg->csum)); nskb->ip_summed = CHECKSUM_NONE; - nskb->mono_delivery_time = !!transmit_time; + if (transmit_time) + nskb->tstamp_type = SKB_CLOCK_MONOTONIC; if (txhash) skb_set_hash(nskb, txhash, PKT_HASH_TYPE_L4); ip_push_pending_frames(sk, &fl4); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 95caf8aaa8be..d44371cfa6ec 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1301,7 +1301,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, tp = tcp_sk(sk); prior_wstamp = tp->tcp_wstamp_ns; tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache); - skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true); + skb_set_delivery_time(skb, tp->tcp_wstamp_ns, SKB_CLOCK_MONOTONIC); if (clone_it) { oskb = skb; @@ -1655,7 +1655,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, skb_split(skb, buff, len); - skb_set_delivery_time(buff, skb->tstamp, true); + skb_set_delivery_time(buff, skb->tstamp, SKB_CLOCK_MONOTONIC); tcp_fragment_tstamp(skb, buff); old_factor = tcp_skb_pcount(skb); @@ -2764,7 +2764,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) { /* "skb_mstamp_ns" is used as a start point for the retransmit timer */ tp->tcp_wstamp_ns = tp->tcp_clock_cache; - skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true); + skb_set_delivery_time(skb, tp->tcp_wstamp_ns, SKB_CLOCK_MONOTONIC); list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue); tcp_init_tso_segs(skb, mss_now); goto repair; /* Skip network transmission */ @@ -3752,11 +3752,11 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, #ifdef CONFIG_SYN_COOKIES if (unlikely(synack_type == TCP_SYNACK_COOKIE && ireq->tstamp_ok)) skb_set_delivery_time(skb, cookie_init_timestamp(req, now), - true); + SKB_CLOCK_MONOTONIC); else #endif { - skb_set_delivery_time(skb, now, true); + skb_set_delivery_time(skb, now, SKB_CLOCK_MONOTONIC); if (!tcp_rsk(req)->snt_synack) /* Timestamp first SYNACK */ tcp_rsk(req)->snt_synack = tcp_skb_timestamp_us(skb); } @@ -3843,7 +3843,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, bpf_skops_write_hdr_opt((struct sock *)sk, skb, req, syn_skb, synack_type, &opts); - skb_set_delivery_time(skb, now, true); + skb_set_delivery_time(skb, now, SKB_CLOCK_MONOTONIC); tcp_add_tx_delay(skb, tp); return skb; @@ -4027,7 +4027,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation); - skb_set_delivery_time(syn, syn_data->skb_mstamp_ns, true); + skb_set_delivery_time(syn, syn_data->skb_mstamp_ns, SKB_CLOCK_MONOTONIC); /* Now full SYN+DATA was cloned and sent (or not), * remove the SYN from the original skb (syn_data) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 27d8725445e3..1ab0f23d37bf 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -859,7 +859,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? inet6_sk(skb->sk) : NULL; - bool mono_delivery_time = skb->mono_delivery_time; + u8 tstamp_type = skb->tstamp_type; struct ip6_frag_state state; unsigned int mtu, hlen, nexthdr_offset; ktime_t tstamp = skb->tstamp; @@ -955,7 +955,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, if (iter.frag) ip6_fraglist_prepare(skb, &iter); - skb_set_delivery_time(skb, tstamp, mono_delivery_time); + skb_set_delivery_time(skb, tstamp, tstamp_type); err = output(net, sk, skb); if (!err) IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), @@ -1016,7 +1016,7 @@ slow_path: /* * Put this fragment into the sending queue. */ - skb_set_delivery_time(frag, tstamp, mono_delivery_time); + skb_set_delivery_time(frag, tstamp, tstamp_type); err = output(net, sk, frag); if (err) goto fail; diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 53d255838e6a..e0c2347b4dc6 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -126,7 +126,7 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, struct sk_buff *)) { int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; - bool mono_delivery_time = skb->mono_delivery_time; + u8 tstamp_type = skb->tstamp_type; ktime_t tstamp = skb->tstamp; struct ip6_frag_state state; u8 *prevhdr, nexthdr = 0; @@ -192,7 +192,7 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, if (iter.frag) ip6_fraglist_prepare(skb, &iter); - skb_set_delivery_time(skb, tstamp, mono_delivery_time); + skb_set_delivery_time(skb, tstamp, tstamp_type); err = output(net, sk, data, skb); if (err || !iter.frag) break; @@ -225,7 +225,7 @@ slow_path: goto blackhole; } - skb_set_delivery_time(skb2, tstamp, mono_delivery_time); + skb_set_delivery_time(skb2, tstamp, tstamp_type); err = output(net, sk, data, skb2); if (err) goto blackhole; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 5e1b50c6a44d..6f0844c9315d 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -263,7 +263,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, fq->iif = dev->ifindex; fq->q.stamp = skb->tstamp; - fq->q.mono_delivery_time = skb->mono_delivery_time; + fq->q.tstamp_type = skb->tstamp_type; fq->q.meat += skb->len; fq->ecn |= ecn; if (payload_len > fq->q.max_size) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 327caca64257..a48be617a8ab 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -198,7 +198,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, fq->iif = dev->ifindex; fq->q.stamp = skb->tstamp; - fq->q.mono_delivery_time = skb->mono_delivery_time; + fq->q.tstamp_type = skb->tstamp_type; fq->q.meat += skb->len; fq->ecn |= ecn; add_frag_mem_limit(fq->q.fqdir, skb->truesize); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 4c3605485b68..8333005c5c2e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -975,7 +975,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 mark = inet_twsk(sk)->tw_mark; else mark = READ_ONCE(sk->sk_mark); - skb_set_delivery_time(buff, tcp_transmit_time(sk), true); + skb_set_delivery_time(buff, tcp_transmit_time(sk), SKB_CLOCK_MONOTONIC); } if (txhash) { /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */ diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 0e3cf11ae5fc..396b576390d0 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -54,8 +54,8 @@ TC_INDIRECT_SCOPE int tcf_bpf_act(struct sk_buff *skb, bpf_compute_data_pointers(skb); filter_res = bpf_prog_run(filter, skb); } - if (unlikely(!skb->tstamp && skb->mono_delivery_time)) - skb->mono_delivery_time = 0; + if (unlikely(!skb->tstamp && skb->tstamp_type)) + skb->tstamp_type = SKB_CLOCK_REALTIME; if (skb_sk_is_prefetched(skb) && filter_res != TC_ACT_OK) skb_orphan(skb); diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 5e83e890f6a4..1941ebec23ff 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -104,8 +104,8 @@ TC_INDIRECT_SCOPE int cls_bpf_classify(struct sk_buff *skb, bpf_compute_data_pointers(skb); filter_res = bpf_prog_run(prog->filter, skb); } - if (unlikely(!skb->tstamp && skb->mono_delivery_time)) - skb->mono_delivery_time = 0; + if (unlikely(!skb->tstamp && skb->tstamp_type)) + skb->tstamp_type = SKB_CLOCK_REALTIME; if (prog->exts_integrated) { res->class = 0; -- cgit v1.2.3-58-ga151 From 1693c5db6ab8262e6f5263f9d211855959aa5acd Mon Sep 17 00:00:00 2001 From: Abhishek Chauhan Date: Thu, 9 May 2024 14:18:33 -0700 Subject: net: Add additional bit to support clockid_t timestamp type tstamp_type is now set based on actual clockid_t compressed into 2 bits. To make the design scalable for future needs this commit bring in the change to extend the tstamp_type:1 to tstamp_type:2 to support other clockid_t timestamp. We now support CLOCK_TAI as part of tstamp_type as part of this commit with existing support CLOCK_MONOTONIC and CLOCK_REALTIME. Signed-off-by: Abhishek Chauhan Reviewed-by: Willem de Bruijn Reviewed-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20240509211834.3235191-3-quic_abchauha@quicinc.com Signed-off-by: Martin KaFai Lau --- include/linux/skbuff.h | 18 +++++++++++------ include/uapi/linux/bpf.h | 15 +++++++++----- net/core/filter.c | 46 ++++++++++++++++++++++++------------------ net/ipv4/ip_output.c | 5 ++++- net/ipv4/raw.c | 2 +- net/ipv4/tcp_ipv4.c | 2 ++ net/ipv6/ip6_output.c | 5 ++++- net/ipv6/raw.c | 2 +- net/ipv6/tcp_ipv6.c | 10 +++++++-- net/packet/af_packet.c | 7 +++---- tools/include/uapi/linux/bpf.h | 15 +++++++++----- 11 files changed, 81 insertions(+), 46 deletions(-) (limited to 'net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 3a721cc3b644..1e5c97daaa37 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -709,6 +709,8 @@ typedef unsigned char *sk_buff_data_t; enum skb_tstamp_type { SKB_CLOCK_REALTIME, SKB_CLOCK_MONOTONIC, + SKB_CLOCK_TAI, + __SKB_CLOCK_MAX = SKB_CLOCK_TAI, }; /** @@ -957,7 +959,7 @@ struct sk_buff { /* private: */ __u8 __mono_tc_offset[0]; /* public: */ - __u8 tstamp_type:1; /* See skb_tstamp_type */ + __u8 tstamp_type:2; /* See skb_tstamp_type */ #ifdef CONFIG_NET_XGRESS __u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */ __u8 tc_skip_classify:1; @@ -1087,15 +1089,16 @@ struct sk_buff { #endif #define PKT_TYPE_OFFSET offsetof(struct sk_buff, __pkt_type_offset) -/* if you move tc_at_ingress or mono_delivery_time +/* if you move tc_at_ingress or tstamp_type * around, you also must adapt these constants. */ #ifdef __BIG_ENDIAN_BITFIELD -#define SKB_MONO_DELIVERY_TIME_MASK (1 << 7) -#define TC_AT_INGRESS_MASK (1 << 6) +#define SKB_TSTAMP_TYPE_MASK (3 << 6) +#define SKB_TSTAMP_TYPE_RSHIFT (6) +#define TC_AT_INGRESS_MASK (1 << 5) #else -#define SKB_MONO_DELIVERY_TIME_MASK (1 << 0) -#define TC_AT_INGRESS_MASK (1 << 1) +#define SKB_TSTAMP_TYPE_MASK (3) +#define TC_AT_INGRESS_MASK (1 << 2) #endif #define SKB_BF_MONO_TC_OFFSET offsetof(struct sk_buff, __mono_tc_offset) @@ -4216,6 +4219,9 @@ static inline void skb_set_delivery_type_by_clockid(struct sk_buff *skb, case CLOCK_MONOTONIC: tstamp_type = SKB_CLOCK_MONOTONIC; break; + case CLOCK_TAI: + tstamp_type = SKB_CLOCK_TAI; + break; default: WARN_ON_ONCE(1); kt = 0; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 90706a47f6ff..25ea393cf084 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6207,12 +6207,17 @@ union { \ __u64 :64; \ } __attribute__((aligned(8))) +/* The enum used in skb->tstamp_type. It specifies the clock type + * of the time stored in the skb->tstamp. + */ enum { - BPF_SKB_TSTAMP_UNSPEC, - BPF_SKB_TSTAMP_DELIVERY_MONO, /* tstamp has mono delivery time */ - /* For any BPF_SKB_TSTAMP_* that the bpf prog cannot handle, - * the bpf prog should handle it like BPF_SKB_TSTAMP_UNSPEC - * and try to deduce it by ingress, egress or skb->sk->sk_clockid. + BPF_SKB_TSTAMP_UNSPEC = 0, /* DEPRECATED */ + BPF_SKB_TSTAMP_DELIVERY_MONO = 1, /* DEPRECATED */ + BPF_SKB_CLOCK_REALTIME = 0, + BPF_SKB_CLOCK_MONOTONIC = 1, + BPF_SKB_CLOCK_TAI = 2, + /* For any future BPF_SKB_CLOCK_* that the bpf prog cannot handle, + * the bpf prog can try to deduce it by ingress/egress/skb->sk->sk_clockid. */ }; diff --git a/net/core/filter.c b/net/core/filter.c index a3781a796da4..c6edfe9f41bc 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7726,17 +7726,21 @@ BPF_CALL_3(bpf_skb_set_tstamp, struct sk_buff *, skb, return -EOPNOTSUPP; switch (tstamp_type) { - case BPF_SKB_TSTAMP_DELIVERY_MONO: + case BPF_SKB_CLOCK_REALTIME: + skb->tstamp = tstamp; + skb->tstamp_type = SKB_CLOCK_REALTIME; + break; + case BPF_SKB_CLOCK_MONOTONIC: if (!tstamp) return -EINVAL; skb->tstamp = tstamp; skb->tstamp_type = SKB_CLOCK_MONOTONIC; break; - case BPF_SKB_TSTAMP_UNSPEC: - if (tstamp) + case BPF_SKB_CLOCK_TAI: + if (!tstamp) return -EINVAL; - skb->tstamp = 0; - skb->tstamp_type = SKB_CLOCK_REALTIME; + skb->tstamp = tstamp; + skb->tstamp_type = SKB_CLOCK_TAI; break; default: return -EINVAL; @@ -9387,16 +9391,17 @@ static struct bpf_insn *bpf_convert_tstamp_type_read(const struct bpf_insn *si, { __u8 value_reg = si->dst_reg; __u8 skb_reg = si->src_reg; - /* AX is needed because src_reg and dst_reg could be the same */ - __u8 tmp_reg = BPF_REG_AX; - - *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, - SKB_BF_MONO_TC_OFFSET); - *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, - SKB_MONO_DELIVERY_TIME_MASK, 2); - *insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_UNSPEC); - *insn++ = BPF_JMP_A(1); - *insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_DELIVERY_MONO); + BUILD_BUG_ON(__SKB_CLOCK_MAX != (int)BPF_SKB_CLOCK_TAI); + BUILD_BUG_ON(SKB_CLOCK_REALTIME != (int)BPF_SKB_CLOCK_REALTIME); + BUILD_BUG_ON(SKB_CLOCK_MONOTONIC != (int)BPF_SKB_CLOCK_MONOTONIC); + BUILD_BUG_ON(SKB_CLOCK_TAI != (int)BPF_SKB_CLOCK_TAI); + *insn++ = BPF_LDX_MEM(BPF_B, value_reg, skb_reg, SKB_BF_MONO_TC_OFFSET); + *insn++ = BPF_ALU32_IMM(BPF_AND, value_reg, SKB_TSTAMP_TYPE_MASK); +#ifdef __BIG_ENDIAN_BITFIELD + *insn++ = BPF_ALU32_IMM(BPF_RSH, value_reg, SKB_TSTAMP_TYPE_RSHIFT); +#else + BUILD_BUG_ON(!(SKB_TSTAMP_TYPE_MASK & 0x1)); +#endif return insn; } @@ -9439,10 +9444,11 @@ static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog, __u8 tmp_reg = BPF_REG_AX; *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, SKB_BF_MONO_TC_OFFSET); - *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, - TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK); - *insn++ = BPF_JMP32_IMM(BPF_JNE, tmp_reg, - TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK, 2); + /* check if ingress mask bits is set */ + *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, TC_AT_INGRESS_MASK, 1); + *insn++ = BPF_JMP_A(4); + *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, SKB_TSTAMP_TYPE_MASK, 1); + *insn++ = BPF_JMP_A(2); /* skb->tc_at_ingress && skb->tstamp_type, * read 0 as the (rcv) timestamp. */ @@ -9479,7 +9485,7 @@ static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog, /* goto */ *insn++ = BPF_JMP_A(2); /* : skb->tstamp_type */ - *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, ~SKB_MONO_DELIVERY_TIME_MASK); + *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, ~SKB_TSTAMP_TYPE_MASK); *insn++ = BPF_STX_MEM(BPF_B, skb_reg, tmp_reg, SKB_BF_MONO_TC_OFFSET); } #endif diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index fe86cadfa85b..b90d0f78ac80 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1457,7 +1457,10 @@ struct sk_buff *__ip_make_skb(struct sock *sk, skb->priority = (cork->tos != -1) ? cork->priority: READ_ONCE(sk->sk_priority); skb->mark = cork->mark; - skb->tstamp = cork->transmit_time; + if (sk_is_tcp(sk)) + skb_set_delivery_time(skb, cork->transmit_time, SKB_CLOCK_MONOTONIC); + else + skb_set_delivery_type_by_clockid(skb, cork->transmit_time, sk->sk_clockid); /* * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec * on dst refcount diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 4cb43401e0e0..1a0953650356 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -360,7 +360,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, skb->protocol = htons(ETH_P_IP); skb->priority = READ_ONCE(sk->sk_priority); skb->mark = sockc->mark; - skb->tstamp = sockc->transmit_time; + skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, sk->sk_clockid); skb_dst_set(skb, &rt->dst); *rtp = NULL; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 30ef0c8f5e92..8f70b8d1d1e5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -3625,6 +3625,8 @@ void __init tcp_v4_init(void) */ inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO; + sk->sk_clockid = CLOCK_MONOTONIC; + per_cpu(ipv4_tcp_sk, cpu) = sk; } if (register_pernet_subsys(&tcp_sk_ops)) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 1ab0f23d37bf..e7a19df3125e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1924,7 +1924,10 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, skb->priority = READ_ONCE(sk->sk_priority); skb->mark = cork->base.mark; - skb->tstamp = cork->base.transmit_time; + if (sk_is_tcp(sk)) + skb_set_delivery_time(skb, cork->base.transmit_time, SKB_CLOCK_MONOTONIC); + else + skb_set_delivery_type_by_clockid(skb, cork->base.transmit_time, sk->sk_clockid); ip6_cork_steal_dst(skb, cork); IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 2eedf255600b..f838366e8256 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -621,7 +621,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, skb->protocol = htons(ETH_P_IPV6); skb->priority = READ_ONCE(sk->sk_priority); skb->mark = sockc->mark; - skb->tstamp = sockc->transmit_time; + skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, sk->sk_clockid); skb_put(skb, length); skb_reset_network_header(skb); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 8333005c5c2e..750aa681779c 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2387,8 +2387,14 @@ static struct inet_protosw tcpv6_protosw = { static int __net_init tcpv6_net_init(struct net *net) { - return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6, - SOCK_RAW, IPPROTO_TCP, net); + int res; + + res = inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6, + SOCK_RAW, IPPROTO_TCP, net); + if (!res) + net->ipv6.tcp_sk->sk_clockid = CLOCK_MONOTONIC; + + return res; } static void __net_exit tcpv6_net_exit(struct net *net) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ea3ebc160e25..fce390887591 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2056,8 +2056,7 @@ retry: skb->dev = dev; skb->priority = READ_ONCE(sk->sk_priority); skb->mark = READ_ONCE(sk->sk_mark); - skb->tstamp = sockc.transmit_time; - + skb_set_delivery_type_by_clockid(skb, sockc.transmit_time, sk->sk_clockid); skb_setup_tx_timestamp(skb, sockc.tsflags); if (unlikely(extra_len == 4)) @@ -2584,7 +2583,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb->dev = dev; skb->priority = READ_ONCE(po->sk.sk_priority); skb->mark = READ_ONCE(po->sk.sk_mark); - skb->tstamp = sockc->transmit_time; + skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, po->sk.sk_clockid); skb_setup_tx_timestamp(skb, sockc->tsflags); skb_zcopy_set_nouarg(skb, ph.raw); @@ -3062,7 +3061,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) skb->dev = dev; skb->priority = READ_ONCE(sk->sk_priority); skb->mark = sockc.mark; - skb->tstamp = sockc.transmit_time; + skb_set_delivery_type_by_clockid(skb, sockc.transmit_time, sk->sk_clockid); if (unlikely(extra_len == 4)) skb->no_fcs = 1; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 90706a47f6ff..25ea393cf084 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -6207,12 +6207,17 @@ union { \ __u64 :64; \ } __attribute__((aligned(8))) +/* The enum used in skb->tstamp_type. It specifies the clock type + * of the time stored in the skb->tstamp. + */ enum { - BPF_SKB_TSTAMP_UNSPEC, - BPF_SKB_TSTAMP_DELIVERY_MONO, /* tstamp has mono delivery time */ - /* For any BPF_SKB_TSTAMP_* that the bpf prog cannot handle, - * the bpf prog should handle it like BPF_SKB_TSTAMP_UNSPEC - * and try to deduce it by ingress, egress or skb->sk->sk_clockid. + BPF_SKB_TSTAMP_UNSPEC = 0, /* DEPRECATED */ + BPF_SKB_TSTAMP_DELIVERY_MONO = 1, /* DEPRECATED */ + BPF_SKB_CLOCK_REALTIME = 0, + BPF_SKB_CLOCK_MONOTONIC = 1, + BPF_SKB_CLOCK_TAI = 2, + /* For any future BPF_SKB_CLOCK_* that the bpf prog cannot handle, + * the bpf prog can try to deduce it by ingress/egress/skb->sk->sk_clockid. */ }; -- cgit v1.2.3-58-ga151 From d9cbd8343b010016fcaabc361c37720dcafddcbe Mon Sep 17 00:00:00 2001 From: yunshui Date: Thu, 23 May 2024 11:35:20 +0800 Subject: bpf, net: Use DEV_STAT_INC() syzbot/KCSAN reported that races happen when multiple CPUs updating dev->stats.tx_error concurrently. Adopt SMP safe DEV_STATS_INC() to update the dev->stats fields. Reported-by: syzbot Signed-off-by: yunshui Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20240523033520.4029314-1-jiangyunshui@kylinos.cn --- net/core/filter.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index c6edfe9f41bc..7c46ecba3b01 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2274,12 +2274,12 @@ static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev, err = bpf_out_neigh_v6(net, skb, dev, nh); if (unlikely(net_xmit_eval(err))) - dev->stats.tx_errors++; + DEV_STATS_INC(dev, tx_errors); else ret = NET_XMIT_SUCCESS; goto out_xmit; out_drop: - dev->stats.tx_errors++; + DEV_STATS_INC(dev, tx_errors); kfree_skb(skb); out_xmit: return ret; @@ -2380,12 +2380,12 @@ static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev, err = bpf_out_neigh_v4(net, skb, dev, nh); if (unlikely(net_xmit_eval(err))) - dev->stats.tx_errors++; + DEV_STATS_INC(dev, tx_errors); else ret = NET_XMIT_SUCCESS; goto out_xmit; out_drop: - dev->stats.tx_errors++; + DEV_STATS_INC(dev, tx_errors); kfree_skb(skb); out_xmit: return ret; -- cgit v1.2.3-58-ga151 From c65b6521115ed478471bf8c4a3687059fcea01aa Mon Sep 17 00:00:00 2001 From: Gou Hao Date: Sun, 26 May 2024 22:57:17 +0800 Subject: net/core: remove redundant sk_callback_lock initialization sk_callback_lock has already been initialized in sk_init_common(). Signed-off-by: Gou Hao Reviewed-by: Breno Leitao Link: https://lore.kernel.org/r/20240526145718.9542-1-gouhao@uniontech.com Signed-off-by: Paolo Abeni --- net/core/sock.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 8629f9aecf91..67b10954e0cf 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3460,7 +3460,6 @@ void sock_init_data_uid(struct socket *sock, struct sock *sk, kuid_t uid) } sk->sk_uid = uid; - rwlock_init(&sk->sk_callback_lock); if (sk->sk_kern_sock) lockdep_set_class_and_name( &sk->sk_callback_lock, -- cgit v1.2.3-58-ga151 From de31e96cf423848a1d541142446801430c12da3c Mon Sep 17 00:00:00 2001 From: Gou Hao Date: Sun, 26 May 2024 22:57:18 +0800 Subject: net/core: move the lockdep-init of sk_callback_lock to sk_init_common() In commit cdfbabfb2f0c ("net: Work around lockdep limitation in sockets that use sockets"), it introduces 'af_kern_callback_keys' to lockdep-init of sk_callback_lock according to 'sk_kern_sock', it modifies sock_init_data() only, and sk_clone_lock() calls sk_init_common() to initialize sk_callback_lock too, so the lockdep-init of sk_callback_lock should be moved to sk_init_common(). Signed-off-by: Gou Hao Link: https://lore.kernel.org/r/20240526145718.9542-2-gouhao@uniontech.com Signed-off-by: Paolo Abeni --- net/core/sock.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 67b10954e0cf..521e6373d4f7 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2262,7 +2262,12 @@ static void sk_init_common(struct sock *sk) lockdep_set_class_and_name(&sk->sk_error_queue.lock, af_elock_keys + sk->sk_family, af_family_elock_key_strings[sk->sk_family]); - lockdep_set_class_and_name(&sk->sk_callback_lock, + if (sk->sk_kern_sock) + lockdep_set_class_and_name(&sk->sk_callback_lock, + af_kern_callback_keys + sk->sk_family, + af_family_kern_clock_key_strings[sk->sk_family]); + else + lockdep_set_class_and_name(&sk->sk_callback_lock, af_callback_keys + sk->sk_family, af_family_clock_key_strings[sk->sk_family]); } @@ -3460,17 +3465,6 @@ void sock_init_data_uid(struct socket *sock, struct sock *sk, kuid_t uid) } sk->sk_uid = uid; - if (sk->sk_kern_sock) - lockdep_set_class_and_name( - &sk->sk_callback_lock, - af_kern_callback_keys + sk->sk_family, - af_family_kern_clock_key_strings[sk->sk_family]); - else - lockdep_set_class_and_name( - &sk->sk_callback_lock, - af_callback_keys + sk->sk_family, - af_family_clock_key_strings[sk->sk_family]); - sk->sk_state_change = sock_def_wakeup; sk->sk_data_ready = sock_def_readable; sk->sk_write_space = sock_def_write_space; -- cgit v1.2.3-58-ga151 From 874aa96d78c791565781bb38570971f614d800ff Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 27 May 2024 19:04:19 +0200 Subject: net/neighbour: constify ctl_table arguments of utility function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysctl core is preparing to only expose instances of struct ctl_table as "const". This will also affect the ctl_table argument of sysctl handlers. As the function prototype of all sysctl handlers throughout the tree needs to stay consistent that change will be done in one commit. To reduce the size of that final commit, switch utility functions which are not bound by "typedef proc_handler" to "const struct ctl_table". No functional change. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20240527-sysctl-const-handler-net-v1-1-16523767d0b2@weissschuh.net Signed-off-by: Jakub Kicinski --- net/core/neighbour.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 45fd88405b6b..277751375b0a 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -3578,7 +3578,7 @@ static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p, rcu_read_unlock(); } -static void neigh_proc_update(struct ctl_table *ctl, int write) +static void neigh_proc_update(const struct ctl_table *ctl, int write) { struct net_device *dev = ctl->extra1; struct neigh_parms *p = ctl->extra2; -- cgit v1.2.3-58-ga151 From 551814313f113206800b025d89d4af99b2db13a2 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 27 May 2024 19:04:20 +0200 Subject: net/ipv4/sysctl: constify ctl_table arguments of utility functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysctl core is preparing to only expose instances of struct ctl_table as "const". This will also affect the ctl_table argument of sysctl handlers. As the function prototype of all sysctl handlers throughout the tree needs to stay consistent that change will be done in one commit. To reduce the size of that final commit, switch utility functions which are not bound by "typedef proc_handler" to "const struct ctl_table". No functional change. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20240527-sysctl-const-handler-net-v1-2-16523767d0b2@weissschuh.net Signed-off-by: Jakub Kicinski --- net/ipv4/sysctl_net_ipv4.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 162a0a3b6ba5..d7892f34a15b 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -130,7 +130,8 @@ static int ipv4_privileged_ports(struct ctl_table *table, int write, return ret; } -static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low, kgid_t *high) +static void inet_get_ping_group_range_table(const struct ctl_table *table, + kgid_t *low, kgid_t *high) { kgid_t *data = table->data; struct net *net = @@ -145,7 +146,8 @@ static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low } /* Update system visible IP port range */ -static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t high) +static void set_ping_group_range(const struct ctl_table *table, + kgid_t low, kgid_t high) { kgid_t *data = table->data; struct net *net = -- cgit v1.2.3-58-ga151 From c55eb03765f413d43ee96e2c4eeac27fde8e63d8 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 27 May 2024 19:04:21 +0200 Subject: net/ipv6/addrconf: constify ctl_table arguments of utility functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysctl core is preparing to only expose instances of struct ctl_table as "const". This will also affect the ctl_table argument of sysctl handlers. As the function prototype of all sysctl handlers throughout the tree needs to stay consistent that change will be done in one commit. To reduce the size of that final commit, switch utility functions which are not bound by "typedef proc_handler" to "const struct ctl_table". No functional change. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20240527-sysctl-const-handler-net-v1-3-16523767d0b2@weissschuh.net Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 5c424a0e7232..1e69756d53d9 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -863,7 +863,7 @@ static void addrconf_forward_change(struct net *net, __s32 newf) } } -static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf) +static int addrconf_fixup_forwarding(const struct ctl_table *table, int *p, int newf) { struct net *net; int old; @@ -931,7 +931,7 @@ static void addrconf_linkdown_change(struct net *net, __s32 newf) } } -static int addrconf_fixup_linkdown(struct ctl_table *table, int *p, int newf) +static int addrconf_fixup_linkdown(const struct ctl_table *table, int *p, int newf) { struct net *net; int old; @@ -6378,7 +6378,7 @@ static void addrconf_disable_change(struct net *net, __s32 newf) } } -static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf) +static int addrconf_disable_ipv6(const struct ctl_table *table, int *p, int newf) { struct net *net = (struct net *)table->extra2; int old; @@ -6669,7 +6669,7 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val) } static -int addrconf_disable_policy(struct ctl_table *ctl, int *valp, int val) +int addrconf_disable_policy(const struct ctl_table *ctl, int *valp, int val) { struct net *net = (struct net *)ctl->extra2; struct inet6_dev *idev; -- cgit v1.2.3-58-ga151 From 7a20cd1e71d87907dbba8887cd434a2195699c85 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 27 May 2024 19:04:22 +0200 Subject: net/ipv6/ndisc: constify ctl_table arguments of utility function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysctl core is preparing to only expose instances of struct ctl_table as "const". This will also affect the ctl_table argument of sysctl handlers. As the function prototype of all sysctl handlers throughout the tree needs to stay consistent that change will be done in one commit. To reduce the size of that final commit, switch utility functions which are not bound by "typedef proc_handler" to "const struct ctl_table". No functional change. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20240527-sysctl-const-handler-net-v1-4-16523767d0b2@weissschuh.net Signed-off-by: Jakub Kicinski --- net/ipv6/ndisc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index d914b23256ce..254b192c5705 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1936,7 +1936,7 @@ static struct notifier_block ndisc_netdev_notifier = { }; #ifdef CONFIG_SYSCTL -static void ndisc_warn_deprecated_sysctl(struct ctl_table *ctl, +static void ndisc_warn_deprecated_sysctl(const struct ctl_table *ctl, const char *func, const char *dev_name) { static char warncomm[TASK_COMM_LEN]; -- cgit v1.2.3-58-ga151 From 0a9f788fdde4af08547455fcdc21e21b822218f2 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 27 May 2024 19:04:23 +0200 Subject: ipvs: constify ctl_table arguments of utility functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysctl core is preparing to only expose instances of struct ctl_table as "const". This will also affect the ctl_table argument of sysctl handlers. As the function prototype of all sysctl handlers throughout the tree needs to stay consistent that change will be done in one commit. To reduce the size of that final commit, switch utility functions which are not bound by "typedef proc_handler" to "const struct ctl_table". No functional change. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20240527-sysctl-const-handler-net-v1-5-16523767d0b2@weissschuh.net Signed-off-by: Jakub Kicinski --- net/netfilter/ipvs/ip_vs_ctl.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index b6d0dcf3a5c3..78a1cc72dc38 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1924,7 +1924,8 @@ proc_do_sync_ports(struct ctl_table *table, int write, return rc; } -static int ipvs_proc_est_cpumask_set(struct ctl_table *table, void *buffer) +static int ipvs_proc_est_cpumask_set(const struct ctl_table *table, + void *buffer) { struct netns_ipvs *ipvs = table->extra2; cpumask_var_t *valp = table->data; @@ -1962,8 +1963,8 @@ out: return ret; } -static int ipvs_proc_est_cpumask_get(struct ctl_table *table, void *buffer, - size_t size) +static int ipvs_proc_est_cpumask_get(const struct ctl_table *table, + void *buffer, size_t size) { struct netns_ipvs *ipvs = table->extra2; cpumask_var_t *valp = table->data; -- cgit v1.2.3-58-ga151 From a0ca76e5b7d550fcd74753d5fdaaf23f1a9bfdb4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 May 2024 12:11:40 +0200 Subject: wifi: mac80211: check ieee80211_bss_info_change_notify() against MLD It's not valid to call ieee80211_bss_info_change_notify() with an sdata that's an MLD, remove the FIXME comment (it's not true) and add a warning. Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240523121140.97a589b13d24.I61988788d81fb3cf97a490dfd3167f67a141d1fd@changeid Signed-off-by: Johannes Berg --- net/mac80211/main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 4eaea0a9975b..40fbf397ce74 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -337,6 +337,8 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, might_sleep(); + WARN_ON_ONCE(ieee80211_vif_is_mld(&sdata->vif)); + if (!changed || sdata->vif.type == NL80211_IFTYPE_AP_VLAN) return; @@ -369,7 +371,6 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, if (changed & ~BSS_CHANGED_VIF_CFG_FLAGS) { u64 ch = changed & ~BSS_CHANGED_VIF_CFG_FLAGS; - /* FIXME: should be for each link */ trace_drv_link_info_changed(local, sdata, &sdata->vif.bss_conf, changed); if (local->ops->link_info_changed) -- cgit v1.2.3-58-ga151 From 6322e0e75a01d30b914fa87470ad00d8db844f72 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 May 2024 12:03:52 +0200 Subject: wifi: mac80211: handle HW restart during ROC If we have a HW restart in the middle of a ROC period, then there are two cases: - if it's a software ROC, we really don't need to do anything, since the ROC work will still be queued and will run later, albeit with the interruption due to the restart; - if it's a hardware ROC, then it may have begun or not, if it did begin already we can only remove it and tell userspace about that. In both cases, this fixes the warning that would appear in ieee80211_start_next_roc() in this case. In the case of some drivers such as iwlwifi, the part of restarting is never going to happen since the driver will cancel the ROC, but flushing the work to ensure nothing is pending here will also result in no longer being able to trigger the warning in this case. Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240523120352.f1924b5411ea.Ifc02a45a5ce23868dc7e428bad8d0e6996dd10f4@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 1 + net/mac80211/offchannel.c | 35 ++++++++++++++++++++++++++++++++++- net/mac80211/util.c | 3 +-- 3 files changed, 36 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 76965d64a0fa..3fd7b1adbfab 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1978,6 +1978,7 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local); void ieee80211_offchannel_return(struct ieee80211_local *local); void ieee80211_roc_setup(struct ieee80211_local *local); void ieee80211_start_next_roc(struct ieee80211_local *local); +void ieee80211_reconfig_roc(struct ieee80211_local *local); void ieee80211_roc_purge(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata); int ieee80211_remain_on_channel(struct wiphy *wiphy, struct wireless_dev *wdev, diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 65e1e9e971fd..28d03196ef75 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -8,7 +8,7 @@ * Copyright 2006-2007 Jiri Benc * Copyright 2007, Michael Wu * Copyright 2009 Johannes Berg - * Copyright (C) 2019, 2022-2023 Intel Corporation + * Copyright (C) 2019, 2022-2024 Intel Corporation */ #include #include @@ -413,6 +413,39 @@ void ieee80211_start_next_roc(struct ieee80211_local *local) } } +void ieee80211_reconfig_roc(struct ieee80211_local *local) +{ + struct ieee80211_roc_work *roc, *tmp; + + /* + * In the software implementation can just continue with the + * interruption due to reconfig, roc_work is still queued if + * needed. + */ + if (!local->ops->remain_on_channel) + return; + + /* flush work so nothing from the driver is still pending */ + wiphy_work_flush(local->hw.wiphy, &local->hw_roc_start); + wiphy_work_flush(local->hw.wiphy, &local->hw_roc_done); + + list_for_each_entry_safe(roc, tmp, &local->roc_list, list) { + if (!roc->started) + break; + + if (!roc->hw_begun) { + /* it didn't start in HW yet, so we can restart it */ + roc->started = false; + continue; + } + + /* otherwise destroy it and tell userspace */ + ieee80211_roc_notify_destroy(roc); + } + + ieee80211_start_next_roc(local); +} + static void __ieee80211_roc_work(struct ieee80211_local *local) { struct ieee80211_roc_work *roc; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 43625ca87d5e..927f752a0209 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2175,8 +2175,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) local->in_reconfig = false; barrier(); - /* Restart deferred ROCs */ - ieee80211_start_next_roc(local); + ieee80211_reconfig_roc(local); /* Requeue all works */ list_for_each_entry(sdata, &local->interfaces, list) -- cgit v1.2.3-58-ga151 From 8526f8c877baf3f9e678b31fd7d1066b776775cc Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 May 2024 12:02:13 +0200 Subject: wifi: nl80211: clean up coalescing rule handling There's no need to allocate a tiny struct and then an array again, just allocate the two together and use __counted_by(). Also unify the freeing. Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240523120213.48a40cfb96f9.Ia02bf8f8fefbf533c64c5fa26175848d4a3a7899@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 +- net/wireless/core.c | 3 ++- net/wireless/nl80211.c | 47 ++++++++++++++++------------------------------- net/wireless/nl80211.h | 4 ++-- 4 files changed, 21 insertions(+), 35 deletions(-) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d79180bec7a1..5da9bb0ac6a4 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3566,8 +3566,8 @@ struct cfg80211_coalesce_rules { * @n_rules: number of rules */ struct cfg80211_coalesce { - struct cfg80211_coalesce_rules *rules; int n_rules; + struct cfg80211_coalesce_rules rules[] __counted_by(n_rules); }; /** diff --git a/net/wireless/core.c b/net/wireless/core.c index 61f7cd8a8e9c..7c35349b9596 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1145,7 +1145,8 @@ void wiphy_unregister(struct wiphy *wiphy) flush_work(&rdev->background_cac_abort_wk); cfg80211_rdev_free_wowlan(rdev); - cfg80211_rdev_free_coalesce(rdev); + cfg80211_free_coalesce(rdev->coalesce); + rdev->coalesce = NULL; } EXPORT_SYMBOL(wiphy_unregister); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 6ba988a6f5a2..8ff5f79d446a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -13897,9 +13897,8 @@ nla_put_failure: return -ENOBUFS; } -void cfg80211_rdev_free_coalesce(struct cfg80211_registered_device *rdev) +void cfg80211_free_coalesce(struct cfg80211_coalesce *coalesce) { - struct cfg80211_coalesce *coalesce = rdev->coalesce; int i, j; struct cfg80211_coalesce_rules *rule; @@ -13908,13 +13907,13 @@ void cfg80211_rdev_free_coalesce(struct cfg80211_registered_device *rdev) for (i = 0; i < coalesce->n_rules; i++) { rule = &coalesce->rules[i]; + if (!rule) + continue; for (j = 0; j < rule->n_patterns; j++) kfree(rule->patterns[j].mask); kfree(rule->patterns); } - kfree(coalesce->rules); kfree(coalesce); - rdev->coalesce = NULL; } static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev, @@ -14012,17 +14011,16 @@ static int nl80211_set_coalesce(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; const struct wiphy_coalesce_support *coalesce = rdev->wiphy.coalesce; - struct cfg80211_coalesce new_coalesce = {}; - struct cfg80211_coalesce *n_coalesce; - int err, rem_rule, n_rules = 0, i, j; + struct cfg80211_coalesce *new_coalesce; + int err, rem_rule, n_rules = 0, i; struct nlattr *rule; - struct cfg80211_coalesce_rules *tmp_rule; if (!rdev->wiphy.coalesce || !rdev->ops->set_coalesce) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_COALESCE_RULE]) { - cfg80211_rdev_free_coalesce(rdev); + cfg80211_free_coalesce(rdev->coalesce); + rdev->coalesce = NULL; rdev_set_coalesce(rdev, NULL); return 0; } @@ -14033,47 +14031,34 @@ static int nl80211_set_coalesce(struct sk_buff *skb, struct genl_info *info) if (n_rules > coalesce->n_rules) return -EINVAL; - new_coalesce.rules = kcalloc(n_rules, sizeof(new_coalesce.rules[0]), - GFP_KERNEL); - if (!new_coalesce.rules) + new_coalesce = kzalloc(struct_size(new_coalesce, rules, n_rules), + GFP_KERNEL); + if (!new_coalesce) return -ENOMEM; - new_coalesce.n_rules = n_rules; + new_coalesce->n_rules = n_rules; i = 0; nla_for_each_nested(rule, info->attrs[NL80211_ATTR_COALESCE_RULE], rem_rule) { err = nl80211_parse_coalesce_rule(rdev, rule, - &new_coalesce.rules[i]); + &new_coalesce->rules[i]); if (err) goto error; i++; } - err = rdev_set_coalesce(rdev, &new_coalesce); + err = rdev_set_coalesce(rdev, new_coalesce); if (err) goto error; - n_coalesce = kmemdup(&new_coalesce, sizeof(new_coalesce), GFP_KERNEL); - if (!n_coalesce) { - err = -ENOMEM; - goto error; - } - cfg80211_rdev_free_coalesce(rdev); - rdev->coalesce = n_coalesce; + cfg80211_free_coalesce(rdev->coalesce); + rdev->coalesce = new_coalesce; return 0; error: - for (i = 0; i < new_coalesce.n_rules; i++) { - tmp_rule = &new_coalesce.rules[i]; - if (!tmp_rule) - continue; - for (j = 0; j < tmp_rule->n_patterns; j++) - kfree(tmp_rule->patterns[j].mask); - kfree(tmp_rule->patterns); - } - kfree(new_coalesce.rules); + cfg80211_free_coalesce(new_coalesce); return err; } diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 6376f3a87f8a..ffaab9a92e5b 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * Portions of this file - * Copyright (C) 2018, 2020-2022 Intel Corporation + * Copyright (C) 2018, 2020-2024 Intel Corporation */ #ifndef __NET_WIRELESS_NL80211_H #define __NET_WIRELESS_NL80211_H @@ -119,7 +119,7 @@ nl80211_radar_notify(struct cfg80211_registered_device *rdev, void nl80211_send_ap_stopped(struct wireless_dev *wdev, unsigned int link_id); -void cfg80211_rdev_free_coalesce(struct cfg80211_registered_device *rdev); +void cfg80211_free_coalesce(struct cfg80211_coalesce *coalesce); /* peer measurement */ int nl80211_pmsr_start(struct sk_buff *skb, struct genl_info *info); -- cgit v1.2.3-58-ga151 From 5e514f1cba090e1c8fff03e92a175eccfe46305f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 May 2024 12:52:50 +0000 Subject: tcp: add tcp_done_with_error() helper tcp_reset() ends with a sequence that is carefuly ordered. We need to fix [e]poll bugs in the following patches, it makes sense to use a common helper. Suggested-by: Neal Cardwell Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Link: https://lore.kernel.org/r/20240528125253.1966136-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 1 + net/ipv4/tcp.c | 2 +- net/ipv4/tcp_input.c | 32 +++++++++++++++++++++----------- 3 files changed, 23 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 060e95b331a2..32815a40dea1 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -677,6 +677,7 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb, /* tcp_input.c */ void tcp_rearm_rto(struct sock *sk); void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req); +void tcp_done_with_error(struct sock *sk, int err); void tcp_reset(struct sock *sk, struct sk_buff *skb); void tcp_fin(struct sock *sk); void tcp_check_space(struct sock *sk); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 681b54e1f3a6..2a8f8d8676ff 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -598,7 +598,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) */ mask |= EPOLLOUT | EPOLLWRNORM; } - /* This barrier is coupled with smp_wmb() in tcp_reset() */ + /* This barrier is coupled with smp_wmb() in tcp_done_with_error() */ smp_rmb(); if (READ_ONCE(sk->sk_err) || !skb_queue_empty_lockless(&sk->sk_error_queue)) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9c04a9c8be9d..5aadf64e554d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4436,9 +4436,26 @@ static enum skb_drop_reason tcp_sequence(const struct tcp_sock *tp, return SKB_NOT_DROPPED_YET; } + +void tcp_done_with_error(struct sock *sk, int err) +{ + /* This barrier is coupled with smp_rmb() in tcp_poll() */ + WRITE_ONCE(sk->sk_err, err); + smp_wmb(); + + tcp_write_queue_purge(sk); + tcp_done(sk); + + if (!sock_flag(sk, SOCK_DEAD)) + sk_error_report(sk); +} +EXPORT_SYMBOL(tcp_done_with_error); + /* When we get a reset we do this. */ void tcp_reset(struct sock *sk, struct sk_buff *skb) { + int err; + trace_tcp_receive_reset(sk); /* mptcp can't tell us to ignore reset pkts, @@ -4450,24 +4467,17 @@ void tcp_reset(struct sock *sk, struct sk_buff *skb) /* We want the right error as BSD sees it (and indeed as we do). */ switch (sk->sk_state) { case TCP_SYN_SENT: - WRITE_ONCE(sk->sk_err, ECONNREFUSED); + err = ECONNREFUSED; break; case TCP_CLOSE_WAIT: - WRITE_ONCE(sk->sk_err, EPIPE); + err = EPIPE; break; case TCP_CLOSE: return; default: - WRITE_ONCE(sk->sk_err, ECONNRESET); + err = ECONNRESET; } - /* This barrier is coupled with smp_rmb() in tcp_poll() */ - smp_wmb(); - - tcp_write_queue_purge(sk); - tcp_done(sk); - - if (!sock_flag(sk, SOCK_DEAD)) - sk_error_report(sk); + tcp_done_with_error(sk, err); } /* -- cgit v1.2.3-58-ga151 From 853c3bd7b7917670224c9fe5245bd045cac411dd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 May 2024 12:52:51 +0000 Subject: tcp: fix race in tcp_write_err() I noticed flakes in a packetdrill test, expecting an epoll_wait() to return EPOLLERR | EPOLLHUP on a failed connect() attempt, after multiple SYN retransmits. It sometimes return EPOLLERR only. The issue is that tcp_write_err(): 1) writes an error in sk->sk_err, 2) calls sk_error_report(), 3) then calls tcp_done(). tcp_done() is writing SHUTDOWN_MASK into sk->sk_shutdown, among other things. Problem is that the awaken user thread (from 2) sk_error_report()) might call tcp_poll() before tcp_done() has written sk->sk_shutdown. tcp_poll() only sees a non zero sk->sk_err and returns EPOLLERR. This patch fixes the issue by making sure to call sk_error_report() after tcp_done(). tcp_write_err() also lacks an smp_wmb(). We can reuse tcp_done_with_error() to factor out the details, as Neal suggested. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Link: https://lore.kernel.org/r/20240528125253.1966136-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_timer.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 83fe7f62f7f1..3e8604ae7d06 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -74,11 +74,7 @@ u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when) static void tcp_write_err(struct sock *sk) { - WRITE_ONCE(sk->sk_err, READ_ONCE(sk->sk_err_soft) ? : ETIMEDOUT); - sk_error_report(sk); - - tcp_write_queue_purge(sk); - tcp_done(sk); + tcp_done_with_error(sk, READ_ONCE(sk->sk_err_soft) ? : ETIMEDOUT); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT); } -- cgit v1.2.3-58-ga151 From 5ce4645c23cf5f048eb8e9ce49e514bababdee85 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 May 2024 12:52:52 +0000 Subject: tcp: fix races in tcp_abort() tcp_abort() has the same issue than the one fixed in the prior patch in tcp_write_err(). In order to get consistent results from tcp_poll(), we must call sk_error_report() after tcp_done(). We can use tcp_done_with_error() to centralize this logic. Fixes: c1e64e298b8c ("net: diag: Support destroying TCP sockets.") Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Link: https://lore.kernel.org/r/20240528125253.1966136-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2a8f8d8676ff..5fa68e7f6ddb 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4576,14 +4576,10 @@ int tcp_abort(struct sock *sk, int err) bh_lock_sock(sk); if (!sock_flag(sk, SOCK_DEAD)) { - WRITE_ONCE(sk->sk_err, err); - /* This barrier is coupled with smp_rmb() in tcp_poll() */ - smp_wmb(); - sk_error_report(sk); if (tcp_need_reset(sk->sk_state)) tcp_send_active_reset(sk, GFP_ATOMIC, SK_RST_REASON_NOT_SPECIFIED); - tcp_done(sk); + tcp_done_with_error(sk, err); } bh_unlock_sock(sk); -- cgit v1.2.3-58-ga151 From fde6f897f2a184546bf5516ac736523ef24dc6a7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 May 2024 12:52:53 +0000 Subject: tcp: fix races in tcp_v[46]_err() These functions have races when they: 1) Write sk->sk_err 2) call sk_error_report(sk) 3) call tcp_done(sk) As described in prior patches in this series: An smp_wmb() is missing. We should call tcp_done() before sk_error_report(sk) to have consistent tcp_poll() results on SMP hosts. Use tcp_done_with_error() where we centralized the correct sequence. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Link: https://lore.kernel.org/r/20240528125253.1966136-5-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_ipv4.c | 11 +++-------- net/ipv6/tcp_ipv6.c | 10 +++------- 2 files changed, 6 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 8f70b8d1d1e5..041c7eda9abe 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -611,15 +611,10 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) ip_icmp_error(sk, skb, err, th->dest, info, (u8 *)th); - if (!sock_owned_by_user(sk)) { - WRITE_ONCE(sk->sk_err, err); - - sk_error_report(sk); - - tcp_done(sk); - } else { + if (!sock_owned_by_user(sk)) + tcp_done_with_error(sk, err); + else WRITE_ONCE(sk->sk_err_soft, err); - } goto out; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 750aa681779c..1ac7502e1bf5 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -490,14 +490,10 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th); - if (!sock_owned_by_user(sk)) { - WRITE_ONCE(sk->sk_err, err); - sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ - - tcp_done(sk); - } else { + if (!sock_owned_by_user(sk)) + tcp_done_with_error(sk, err); + else WRITE_ONCE(sk->sk_err_soft, err); - } goto out; case TCP_LISTEN: break; -- cgit v1.2.3-58-ga151 From bbb31b7ae14594aa2a7e74923ee38f312404ad66 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 28 May 2024 16:05:09 +0100 Subject: net: dsa: remove mac_prepare()/mac_finish() shims No DSA driver makes use of the mac_prepare()/mac_finish() shimmed operations anymore, so we can remove these. Signed-off-by: Russell King (Oracle) Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/E1sByNx-00ELW1-Vp@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 6 ------ net/dsa/dsa.c | 2 -- net/dsa/port.c | 32 -------------------------------- 3 files changed, 40 deletions(-) (limited to 'net') diff --git a/include/net/dsa.h b/include/net/dsa.h index b60e7e410aba..f9ae3ca66b6f 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -882,15 +882,9 @@ struct dsa_switch_ops { struct phylink_pcs *(*phylink_mac_select_pcs)(struct dsa_switch *ds, int port, phy_interface_t iface); - int (*phylink_mac_prepare)(struct dsa_switch *ds, int port, - unsigned int mode, - phy_interface_t interface); void (*phylink_mac_config)(struct dsa_switch *ds, int port, unsigned int mode, const struct phylink_link_state *state); - int (*phylink_mac_finish)(struct dsa_switch *ds, int port, - unsigned int mode, - phy_interface_t interface); void (*phylink_mac_link_down)(struct dsa_switch *ds, int port, unsigned int mode, phy_interface_t interface); diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 12521a7d4048..668c729946ea 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -1507,9 +1507,7 @@ static int dsa_switch_probe(struct dsa_switch *ds) if (ds->phylink_mac_ops) { if (ds->ops->phylink_mac_select_pcs || - ds->ops->phylink_mac_prepare || ds->ops->phylink_mac_config || - ds->ops->phylink_mac_finish || ds->ops->phylink_mac_link_down || ds->ops->phylink_mac_link_up) return -EINVAL; diff --git a/net/dsa/port.c b/net/dsa/port.c index 9a249d4ac3a5..e23db9507546 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -1549,21 +1549,6 @@ dsa_port_phylink_mac_select_pcs(struct phylink_config *config, return pcs; } -static int dsa_port_phylink_mac_prepare(struct phylink_config *config, - unsigned int mode, - phy_interface_t interface) -{ - struct dsa_port *dp = dsa_phylink_to_port(config); - struct dsa_switch *ds = dp->ds; - int err = 0; - - if (ds->ops->phylink_mac_prepare) - err = ds->ops->phylink_mac_prepare(ds, dp->index, mode, - interface); - - return err; -} - static void dsa_port_phylink_mac_config(struct phylink_config *config, unsigned int mode, const struct phylink_link_state *state) @@ -1577,21 +1562,6 @@ static void dsa_port_phylink_mac_config(struct phylink_config *config, ds->ops->phylink_mac_config(ds, dp->index, mode, state); } -static int dsa_port_phylink_mac_finish(struct phylink_config *config, - unsigned int mode, - phy_interface_t interface) -{ - struct dsa_port *dp = dsa_phylink_to_port(config); - struct dsa_switch *ds = dp->ds; - int err = 0; - - if (ds->ops->phylink_mac_finish) - err = ds->ops->phylink_mac_finish(ds, dp->index, mode, - interface); - - return err; -} - static void dsa_port_phylink_mac_link_down(struct phylink_config *config, unsigned int mode, phy_interface_t interface) @@ -1624,9 +1594,7 @@ static void dsa_port_phylink_mac_link_up(struct phylink_config *config, static const struct phylink_mac_ops dsa_port_phylink_mac_ops = { .mac_select_pcs = dsa_port_phylink_mac_select_pcs, - .mac_prepare = dsa_port_phylink_mac_prepare, .mac_config = dsa_port_phylink_mac_config, - .mac_finish = dsa_port_phylink_mac_finish, .mac_link_down = dsa_port_phylink_mac_link_down, .mac_link_up = dsa_port_phylink_mac_link_up, }; -- cgit v1.2.3-58-ga151 From 73287fe228721b05690e671adbcccc6cf5435be6 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Wed, 29 May 2024 23:59:39 -0700 Subject: bpf: pass bpf_struct_ops_link to callbacks in bpf_struct_ops. Pass an additional pointer of bpf_struct_ops_link to callback function reg, unreg, and update provided by subsystems defined in bpf_struct_ops. A bpf_struct_ops_map can be registered for multiple links. Passing a pointer of bpf_struct_ops_link helps subsystems to distinguish them. This pointer will be used in the later patches to let the subsystem initiate a detachment on a link that was registered to it previously. Signed-off-by: Kui-Feng Lee Link: https://lore.kernel.org/r/20240530065946.979330-2-thinker.li@gmail.com Signed-off-by: Martin KaFai Lau --- include/linux/bpf.h | 6 +++--- kernel/bpf/bpf_struct_ops.c | 10 +++++----- net/bpf/bpf_dummy_struct_ops.c | 4 ++-- net/ipv4/bpf_tcp_ca.c | 6 +++--- tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c | 4 ++-- tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c | 6 +++--- 6 files changed, 18 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5e694a308081..19f8836382fc 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1730,9 +1730,9 @@ struct bpf_struct_ops { int (*init_member)(const struct btf_type *t, const struct btf_member *member, void *kdata, const void *udata); - int (*reg)(void *kdata); - void (*unreg)(void *kdata); - int (*update)(void *kdata, void *old_kdata); + int (*reg)(void *kdata, struct bpf_link *link); + void (*unreg)(void *kdata, struct bpf_link *link); + int (*update)(void *kdata, void *old_kdata, struct bpf_link *link); int (*validate)(void *kdata); void *cfi_stubs; struct module *owner; diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 86c7884abaf8..1542dded7489 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -757,7 +757,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, goto unlock; } - err = st_ops->reg(kdata); + err = st_ops->reg(kdata, NULL); if (likely(!err)) { /* This refcnt increment on the map here after * 'st_ops->reg()' is secure since the state of the @@ -805,7 +805,7 @@ static long bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key) BPF_STRUCT_OPS_STATE_TOBEFREE); switch (prev_state) { case BPF_STRUCT_OPS_STATE_INUSE: - st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data); + st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, NULL); bpf_map_put(map); return 0; case BPF_STRUCT_OPS_STATE_TOBEFREE: @@ -1060,7 +1060,7 @@ static void bpf_struct_ops_map_link_dealloc(struct bpf_link *link) /* st_link->map can be NULL if * bpf_struct_ops_link_create() fails to register. */ - st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data); + st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, link); bpf_map_put(&st_map->map); } kfree(st_link); @@ -1125,7 +1125,7 @@ static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map goto err_out; } - err = st_map->st_ops_desc->st_ops->update(st_map->kvalue.data, old_st_map->kvalue.data); + err = st_map->st_ops_desc->st_ops->update(st_map->kvalue.data, old_st_map->kvalue.data, link); if (err) goto err_out; @@ -1176,7 +1176,7 @@ int bpf_struct_ops_link_create(union bpf_attr *attr) if (err) goto err_out; - err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data); + err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data, &link->link); if (err) { bpf_link_cleanup(&link_primer); link = NULL; diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c index 891cdf61c65a..3ea52b05adfb 100644 --- a/net/bpf/bpf_dummy_struct_ops.c +++ b/net/bpf/bpf_dummy_struct_ops.c @@ -272,12 +272,12 @@ static int bpf_dummy_init_member(const struct btf_type *t, return -EOPNOTSUPP; } -static int bpf_dummy_reg(void *kdata) +static int bpf_dummy_reg(void *kdata, struct bpf_link *link) { return -EOPNOTSUPP; } -static void bpf_dummy_unreg(void *kdata) +static void bpf_dummy_unreg(void *kdata, struct bpf_link *link) { } diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 18227757ec0c..3f88d0961e5b 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -260,17 +260,17 @@ static int bpf_tcp_ca_check_member(const struct btf_type *t, return 0; } -static int bpf_tcp_ca_reg(void *kdata) +static int bpf_tcp_ca_reg(void *kdata, struct bpf_link *link) { return tcp_register_congestion_control(kdata); } -static void bpf_tcp_ca_unreg(void *kdata) +static void bpf_tcp_ca_unreg(void *kdata, struct bpf_link *link) { tcp_unregister_congestion_control(kdata); } -static int bpf_tcp_ca_update(void *kdata, void *old_kdata) +static int bpf_tcp_ca_update(void *kdata, void *old_kdata, struct bpf_link *link) { return tcp_update_congestion_control(kdata, old_kdata); } diff --git a/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c b/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c index b1dd889d5d7d..948eb3962732 100644 --- a/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c +++ b/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c @@ -22,12 +22,12 @@ static int dummy_init_member(const struct btf_type *t, return 0; } -static int dummy_reg(void *kdata) +static int dummy_reg(void *kdata, struct bpf_link *link) { return 0; } -static void dummy_unreg(void *kdata) +static void dummy_unreg(void *kdata, struct bpf_link *link) { } diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 2a18bd320e92..0a09732cde4b 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -820,7 +820,7 @@ static const struct bpf_verifier_ops bpf_testmod_verifier_ops = { .is_valid_access = bpf_testmod_ops_is_valid_access, }; -static int bpf_dummy_reg(void *kdata) +static int bpf_dummy_reg(void *kdata, struct bpf_link *link) { struct bpf_testmod_ops *ops = kdata; @@ -835,7 +835,7 @@ static int bpf_dummy_reg(void *kdata) return 0; } -static void bpf_dummy_unreg(void *kdata) +static void bpf_dummy_unreg(void *kdata, struct bpf_link *link) { } @@ -871,7 +871,7 @@ struct bpf_struct_ops bpf_bpf_testmod_ops = { .owner = THIS_MODULE, }; -static int bpf_dummy_reg2(void *kdata) +static int bpf_dummy_reg2(void *kdata, struct bpf_link *link) { struct bpf_testmod_ops2 *ops = kdata; -- cgit v1.2.3-58-ga151 From a79d8fe2ff8e78e549dc86cc853a61b029404871 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 29 May 2024 12:09:08 +0800 Subject: ipv6: sr: restruct ifdefines There are too many ifdef in IPv6 segment routing code that may cause logic problems. like commit 160e9d275218 ("ipv6: sr: fix invalid unregister error path"). To avoid this, the init functions are redefined for both cases. The code could be more clear after all fidefs are removed. Suggested-by: Simon Horman Suggested-by: David Ahern Signed-off-by: Hangbin Liu Reviewed-by: Sabrina Dubroca Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240529040908.3472952-1-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- include/net/seg6.h | 7 +++++++ include/net/seg6_hmac.h | 7 +++++++ net/ipv6/seg6.c | 33 +++++---------------------------- 3 files changed, 19 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/include/net/seg6.h b/include/net/seg6.h index af668f17b398..82b3fbbcbb93 100644 --- a/include/net/seg6.h +++ b/include/net/seg6.h @@ -52,10 +52,17 @@ static inline struct seg6_pernet_data *seg6_pernet(struct net *net) extern int seg6_init(void); extern void seg6_exit(void); +#ifdef CONFIG_IPV6_SEG6_LWTUNNEL extern int seg6_iptunnel_init(void); extern void seg6_iptunnel_exit(void); extern int seg6_local_init(void); extern void seg6_local_exit(void); +#else +static inline int seg6_iptunnel_init(void) { return 0; } +static inline void seg6_iptunnel_exit(void) {} +static inline int seg6_local_init(void) { return 0; } +static inline void seg6_local_exit(void) {} +#endif extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len, bool reduced); extern struct ipv6_sr_hdr *seg6_get_srh(struct sk_buff *skb, int flags); diff --git a/include/net/seg6_hmac.h b/include/net/seg6_hmac.h index 2b5d2ee5613e..24f733b3e3fe 100644 --- a/include/net/seg6_hmac.h +++ b/include/net/seg6_hmac.h @@ -49,9 +49,16 @@ extern int seg6_hmac_info_del(struct net *net, u32 key); extern int seg6_push_hmac(struct net *net, struct in6_addr *saddr, struct ipv6_sr_hdr *srh); extern bool seg6_hmac_validate_skb(struct sk_buff *skb); +#ifdef CONFIG_IPV6_SEG6_HMAC extern int seg6_hmac_init(void); extern void seg6_hmac_exit(void); extern int seg6_hmac_net_init(struct net *net); extern void seg6_hmac_net_exit(struct net *net); +#else +static inline int seg6_hmac_init(void) { return 0; } +static inline void seg6_hmac_exit(void) {} +static inline int seg6_hmac_net_init(struct net *net) { return 0; } +static inline void seg6_hmac_net_exit(struct net *net) {} +#endif #endif diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index a31521e270f7..180da19c148c 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -21,9 +21,7 @@ #include #include #include -#ifdef CONFIG_IPV6_SEG6_HMAC #include -#endif bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len, bool reduced) { @@ -437,13 +435,11 @@ static int __net_init seg6_net_init(struct net *net) net->ipv6.seg6_data = sdata; -#ifdef CONFIG_IPV6_SEG6_HMAC if (seg6_hmac_net_init(net)) { kfree(rcu_dereference_raw(sdata->tun_src)); kfree(sdata); return -ENOMEM; } -#endif return 0; } @@ -452,9 +448,7 @@ static void __net_exit seg6_net_exit(struct net *net) { struct seg6_pernet_data *sdata = seg6_pernet(net); -#ifdef CONFIG_IPV6_SEG6_HMAC seg6_hmac_net_exit(net); -#endif kfree(rcu_dereference_raw(sdata->tun_src)); kfree(sdata); @@ -520,41 +514,28 @@ int __init seg6_init(void) if (err) goto out_unregister_pernet; -#ifdef CONFIG_IPV6_SEG6_LWTUNNEL err = seg6_iptunnel_init(); if (err) goto out_unregister_genl; err = seg6_local_init(); - if (err) { - seg6_iptunnel_exit(); - goto out_unregister_genl; - } -#endif + if (err) + goto out_unregister_iptun; -#ifdef CONFIG_IPV6_SEG6_HMAC err = seg6_hmac_init(); if (err) - goto out_unregister_iptun; -#endif + goto out_unregister_seg6; pr_info("Segment Routing with IPv6\n"); out: return err; -#ifdef CONFIG_IPV6_SEG6_HMAC -out_unregister_iptun: -#ifdef CONFIG_IPV6_SEG6_LWTUNNEL +out_unregister_seg6: seg6_local_exit(); +out_unregister_iptun: seg6_iptunnel_exit(); -#endif -#endif -#ifdef CONFIG_IPV6_SEG6_LWTUNNEL out_unregister_genl: -#endif -#if IS_ENABLED(CONFIG_IPV6_SEG6_LWTUNNEL) || IS_ENABLED(CONFIG_IPV6_SEG6_HMAC) genl_unregister_family(&seg6_genl_family); -#endif out_unregister_pernet: unregister_pernet_subsys(&ip6_segments_ops); goto out; @@ -562,13 +543,9 @@ out_unregister_pernet: void seg6_exit(void) { -#ifdef CONFIG_IPV6_SEG6_HMAC seg6_hmac_exit(); -#endif -#ifdef CONFIG_IPV6_SEG6_LWTUNNEL seg6_local_exit(); seg6_iptunnel_exit(); -#endif genl_unregister_family(&seg6_genl_family); unregister_pernet_subsys(&ip6_segments_ops); } -- cgit v1.2.3-58-ga151 From 73451e9aaa24e8e44cb91e5fd6b59bf53e069c1f Mon Sep 17 00:00:00 2001 From: Abhishek Chauhan Date: Wed, 29 May 2024 11:31:30 -0700 Subject: net: validate SO_TXTIME clockid coming from userspace Currently there are no strict checks while setting SO_TXTIME from userspace. With the recent development in skb->tstamp_type clockid with unsupported clocks results in warn_on_once, which causes unnecessary aborts in some systems which enables panic on warns. Add validation in setsockopt to support only CLOCK_REALTIME, CLOCK_MONOTONIC and CLOCK_TAI to be set from userspace. Link: https://lore.kernel.org/netdev/bc037db4-58bb-4861-ac31-a361a93841d3@linux.dev/ Link: https://lore.kernel.org/lkml/6bdba7b6-fd22-4ea5-a356-12268674def1@quicinc.com/ Fixes: 1693c5db6ab8 ("net: Add additional bit to support clockid_t timestamp type") Reported-by: syzbot+d7b227731ec589e7f4f0@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=d7b227731ec589e7f4f0 Reported-by: syzbot+30a35a2e9c5067cc43fa@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=30a35a2e9c5067cc43fa Signed-off-by: Abhishek Chauhan Acked-by: Martin KaFai Lau Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20240529183130.1717083-1-quic_abchauha@quicinc.com Signed-off-by: Jakub Kicinski --- net/core/sock.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 521e6373d4f7..69baddcfbd8c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1083,6 +1083,17 @@ bool sockopt_capable(int cap) } EXPORT_SYMBOL(sockopt_capable); +static int sockopt_validate_clockid(__kernel_clockid_t value) +{ + switch (value) { + case CLOCK_REALTIME: + case CLOCK_MONOTONIC: + case CLOCK_TAI: + return 0; + } + return -EINVAL; +} + /* * This is meant for all protocols to use and covers goings on * at the socket level. Everything here is generic. @@ -1497,6 +1508,11 @@ set_sndbuf: ret = -EPERM; break; } + + ret = sockopt_validate_clockid(sk_txtime.clockid); + if (ret) + break; + sock_valbool_flag(sk, SOCK_TXTIME, true); sk->sk_clockid = sk_txtime.clockid; sk->sk_txtime_deadline_mode = -- cgit v1.2.3-58-ga151 From 19249c0724f2048ab68179eac69004947b07d431 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Fri, 31 May 2024 01:27:21 +0200 Subject: net: make net.core.{r,w}mem_{default,max} namespaced The following sysctl are global and can't be read from a netns: net.core.rmem_default net.core.rmem_max net.core.wmem_default net.core.wmem_max Make the following sysctl parameters available readonly from within a network namespace, allowing a container to read them. Signed-off-by: Matteo Croce Reviewed-by: Eric Dumazet Reviewed-by: Shakeel Butt Link: https://lore.kernel.org/r/20240530232722.45255-2-technoboy85@gmail.com Signed-off-by: Jakub Kicinski --- net/core/sysctl_net_core.c | 75 ++++++++++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index c9fb9ad87485..2079000691e2 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -382,38 +382,6 @@ proc_dolongvec_minmax_bpf_restricted(struct ctl_table *table, int write, #endif static struct ctl_table net_core_table[] = { - { - .procname = "wmem_max", - .data = &sysctl_wmem_max, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_sndbuf, - }, - { - .procname = "rmem_max", - .data = &sysctl_rmem_max, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_rcvbuf, - }, - { - .procname = "wmem_default", - .data = &sysctl_wmem_default, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_sndbuf, - }, - { - .procname = "rmem_default", - .data = &sysctl_rmem_default, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_rcvbuf, - }, { .procname = "mem_pcpu_rsv", .data = &net_hotdata.sysctl_mem_pcpu_rsv, @@ -697,6 +665,41 @@ static struct ctl_table netns_core_table[] = { .extra2 = SYSCTL_ONE, .proc_handler = proc_dou8vec_minmax, }, + /* sysctl_core_net_init() will set the values after this + * to readonly in network namespaces + */ + { + .procname = "wmem_max", + .data = &sysctl_wmem_max, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &min_sndbuf, + }, + { + .procname = "rmem_max", + .data = &sysctl_rmem_max, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &min_rcvbuf, + }, + { + .procname = "wmem_default", + .data = &sysctl_wmem_default, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &min_sndbuf, + }, + { + .procname = "rmem_default", + .data = &sysctl_rmem_default, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &min_rcvbuf, + }, }; static int __init fb_tunnels_only_for_init_net_sysctl_setup(char *str) @@ -724,8 +727,14 @@ static __net_init int sysctl_core_net_init(struct net *net) if (tbl == NULL) goto err_dup; - for (i = 0; i < table_size; ++i) + for (i = 0; i < table_size; ++i) { + if (tbl[i].data == &sysctl_wmem_max) + break; + tbl[i].data += (char *)net - (char *)&init_net; + } + for (; i < table_size; ++i) + tbl[i].mode &= ~0222; } net->core.sysctl_hdr = register_net_sysctl_sz(net, "net/core", tbl, table_size); -- cgit v1.2.3-58-ga151 From b5c089880723b2c18531c40e445235bd646a51d1 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 29 May 2024 07:46:48 -0700 Subject: af_unix: Remove dead code in unix_stream_read_generic(). When splice() support was added in commit 2b514574f7e8 ("net: af_unix: implement splice for stream af_unix sockets"), we had to release unix_sk(sk)->readlock (current iolock) before calling splice_to_pipe(). Due to the unlock, commit 73ed5d25dce0 ("af-unix: fix use-after-free with concurrent readers while splicing") added a safeguard in unix_stream_read_generic(); we had to bump the skb refcount before calling ->recv_actor() and then check if the skb was consumed by a concurrent reader. However, the pipe side locking was refactored, and since commit 25869262ef7a ("skb_splice_bits(): get rid of callback"), we can call splice_to_pipe() without releasing unix_sk(sk)->iolock. Now, the skb is always alive after the ->recv_actor() callback, so let's remove the unnecessary drop_skb logic. This is mostly the revert of commit 73ed5d25dce0 ("af-unix: fix use-after-free with concurrent readers while splicing"). Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240529144648.68591-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/unix/af_unix.c | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 25b49efc0926..861793b489f6 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -654,8 +654,8 @@ static void unix_release_sock(struct sock *sk, int embrion) while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { if (state == TCP_LISTEN) unix_release_sock(skb->sk, 1); + /* passed fds are erased in the kfree_skb hook */ - UNIXCB(skb).consumed = skb->len; kfree_skb(skb); } @@ -2704,9 +2704,8 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, skip = max(sk_peek_offset(sk, flags), 0); do { - int chunk; - bool drop_skb; struct sk_buff *skb, *last; + int chunk; redo: unix_state_lock(sk); @@ -2802,11 +2801,7 @@ unlock: } chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size); - skb_get(skb); chunk = state->recv_actor(skb, skip, chunk, state); - drop_skb = !unix_skb_len(skb); - /* skb is only safe to use if !drop_skb */ - consume_skb(skb); if (chunk < 0) { if (copied == 0) copied = -EFAULT; @@ -2815,18 +2810,6 @@ unlock: copied += chunk; size -= chunk; - if (drop_skb) { - /* the skb was touched by a concurrent reader; - * we should not expect anything from this skb - * anymore and assume it invalid - we can be - * sure it was dropped from the socket queue - * - * let's report a short read - */ - err = 0; - break; - } - /* Mark read part of skb as used */ if (!(flags & MSG_PEEK)) { UNIXCB(skb).consumed += chunk; -- cgit v1.2.3-58-ga151 From 3ac14b9dfbd345e891d48d89f6c2fa519848f0f4 Mon Sep 17 00:00:00 2001 From: Guangguan Wang Date: Mon, 3 Jun 2024 11:00:18 +0800 Subject: net/smc: set rmb's SG_MAX_SINGLE_ALLOC limitation only when CONFIG_ARCH_NO_SG_CHAIN is defined SG_MAX_SINGLE_ALLOC is used to limit maximum number of entries that will be allocated in one piece of scatterlist. When the entries of scatterlist exceeds SG_MAX_SINGLE_ALLOC, sg chain will be used. From commit 7c703e54cc71 ("arch: switch the default on ARCH_HAS_SG_CHAIN"), we can know that the macro CONFIG_ARCH_NO_SG_CHAIN is used to identify whether sg chain is supported. So, SMC-R's rmb buffer should be limited by SG_MAX_SINGLE_ALLOC only when the macro CONFIG_ARCH_NO_SG_CHAIN is defined. Fixes: a3fe3d01bd0d ("net/smc: introduce sg-logic for RMBs") Signed-off-by: Guangguan Wang Co-developed-by: Wen Gu Signed-off-by: Wen Gu Signed-off-by: David S. Miller --- net/smc/smc_core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index fafdb97adfad..acca3b1a068f 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -2015,7 +2015,6 @@ out: */ static u8 smc_compress_bufsize(int size, bool is_smcd, bool is_rmb) { - const unsigned int max_scat = SG_MAX_SINGLE_ALLOC * PAGE_SIZE; u8 compressed; if (size <= SMC_BUF_MIN_SIZE) @@ -2025,9 +2024,11 @@ static u8 smc_compress_bufsize(int size, bool is_smcd, bool is_rmb) compressed = min_t(u8, ilog2(size) + 1, is_smcd ? SMCD_DMBE_SIZES : SMCR_RMBE_SIZES); +#ifdef CONFIG_ARCH_NO_SG_CHAIN if (!is_smcd && is_rmb) /* RMBs are backed by & limited to max size of scatterlists */ - compressed = min_t(u8, compressed, ilog2(max_scat >> 14)); + compressed = min_t(u8, compressed, ilog2((SG_MAX_SINGLE_ALLOC * PAGE_SIZE) >> 14)); +#endif return compressed; } -- cgit v1.2.3-58-ga151 From 2f4b101c542e102c680d136fc6dca081e3fbda4a Mon Sep 17 00:00:00 2001 From: Guangguan Wang Date: Mon, 3 Jun 2024 11:00:19 +0800 Subject: net/smc: change SMCR_RMBE_SIZES from 5 to 15 SMCR_RMBE_SIZES is the upper boundary of SMC-R's snd_buf and rcv_buf. The maximum bytes of snd_buf and rcv_buf can be calculated by 2^SMCR_ RMBE_SIZES * 16KB. SMCR_RMBE_SIZES = 5 means the upper boundary is 512KB. TCP's snd_buf and rcv_buf max size is configured by net.ipv4.tcp_w/rmem[2] whose default value is 4MB or 6MB, is much larger than SMC-R's upper boundary. In some scenarios, such as Recommendation System, the communication pattern is mainly large size send/recv, where the size of snd_buf and rcv_buf greatly affects performance. Due to the upper boundary disadvantage, SMC-R performs poor than TCP in those scenarios. So it is time to enlarge the upper boundary size of SMC-R's snd_buf and rcv_buf, so that the SMC-R's snd_buf and rcv_buf can be configured to larger size for performance gain in such scenarios. The SMC-R rcv_buf's size will be transferred to peer by the field rmbe_size in clc accept and confirm message. The length of the field rmbe_size is four bits, which means the maximum value of SMCR_RMBE_SIZES is 15. In case of frequently adjusting the value of SMCR_RMBE_SIZES in different scenarios, set the value of SMCR_RMBE_SIZES to the maximum value 15, which means the upper boundary of SMC-R's snd_buf and rcv_buf is 512MB. As the real memory usage is determined by the value of net.smc.w/rmem, not by the upper boundary, set the value of SMCR_RMBE_SIZES to the maximum value has no side affects. Signed-off-by: Guangguan Wang Co-developed-by: Wen Gu Signed-off-by: Wen Gu Signed-off-by: David S. Miller --- net/smc/smc_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index acca3b1a068f..3b95828d9976 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -2006,7 +2006,7 @@ out: } #define SMCD_DMBE_SIZES 6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */ -#define SMCR_RMBE_SIZES 5 /* 0 -> 16KB, 1 -> 32KB, .. 5 -> 512KB */ +#define SMCR_RMBE_SIZES 15 /* 0 -> 16KB, 1 -> 32KB, .. 15 -> 512MB */ /* convert the RMB size into the compressed notation (minimum 16K, see * SMCD/R_DMBE_SIZES. -- cgit v1.2.3-58-ga151 From 4fdb6b6063f07d959a1c52a2ee580afc4da34e2d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 29 May 2024 09:25:27 -0700 Subject: net: count drops due to missing qdisc as dev->tx_drops Catching and debugging missing qdiscs is pretty tricky. When qdisc is deleted we replace it with a noop qdisc, which silently drops all the packets. Since the noop qdisc has a single static instance we can't count drops at the qdisc level. Count them as dev->tx_drops. ip netns add red ip link add type veth peer netns red ip link set dev veth0 up ip -netns red link set dev veth0 up ip a a dev veth0 10.0.0.1/24 ip -netns red a a dev veth0 10.0.0.2/24 ping -c 2 10.0.0.2 # 2 packets transmitted, 2 received, 0% packet loss, time 1031ms ip -s link show dev veth0 # TX: bytes packets errors dropped carrier collsns # 1314 17 0 0 0 0 tc qdisc replace dev veth0 root handle 1234: mq tc qdisc replace dev veth0 parent 1234:1 pfifo tc qdisc del dev veth0 parent 1234:1 ping -c 2 10.0.0.2 # 2 packets transmitted, 0 received, 100% packet loss, time 1034ms ip -s link show dev veth0 # TX: bytes packets errors dropped carrier collsns # 1314 17 0 3 0 0 Signed-off-by: Jakub Kicinski Link: https://lore.kernel.org/r/20240529162527.3688979-1-kuba@kernel.org Signed-off-by: Paolo Abeni --- net/sched/sch_generic.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 2a637a17061b..1417f1991452 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -633,6 +633,7 @@ EXPORT_SYMBOL_GPL(netif_carrier_event); static int noop_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, struct sk_buff **to_free) { + dev_core_stats_tx_dropped_inc(skb->dev); __qdisc_drop(skb, to_free); return NET_XMIT_CN; } -- cgit v1.2.3-58-ga151 From 668b6a2ef832a878494cc1b12a881c8ec0494b25 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 30 May 2024 19:08:34 +0200 Subject: flow_dissector: add support for tunnel control flags Dissect [no]csum, [no]dontfrag, [no]oam, [no]crit flags from skb metadata. This is a prerequisite for matching these control flags using TC flower. Suggested-by: Ilya Maximets Signed-off-by: Davide Caratti Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- include/net/flow_dissector.h | 9 +++++++++ include/net/ip_tunnels.h | 12 ++++++++++++ net/core/flow_dissector.c | 16 +++++++++++++++- 3 files changed, 36 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 9ab376d1a677..99626475c3f4 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -329,6 +329,14 @@ struct flow_dissector_key_cfm { #define FLOW_DIS_CFM_MDL_MASK GENMASK(7, 5) #define FLOW_DIS_CFM_MDL_MAX 7 +/** + * struct flow_dissector_key_enc_flags: tunnel metadata control flags + * @flags: tunnel control flags + */ +struct flow_dissector_key_enc_flags { + u32 flags; +}; + enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */ FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ @@ -363,6 +371,7 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_L2TPV3, /* struct flow_dissector_key_l2tpv3 */ FLOW_DISSECTOR_KEY_CFM, /* struct flow_dissector_key_cfm */ FLOW_DISSECTOR_KEY_IPSEC, /* struct flow_dissector_key_ipsec */ + FLOW_DISSECTOR_KEY_ENC_FLAGS, /* struct flow_dissector_key_enc_flags */ FLOW_DISSECTOR_KEY_MAX, }; diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 9a6a08ec7713..5a530d4fb02c 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -247,6 +247,18 @@ static inline bool ip_tunnel_is_options_present(const unsigned long *flags) return ip_tunnel_flags_intersect(flags, present); } +static inline void ip_tunnel_set_encflags_present(unsigned long *flags) +{ + IP_TUNNEL_DECLARE_FLAGS(present) = { }; + + __set_bit(IP_TUNNEL_CSUM_BIT, present); + __set_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, present); + __set_bit(IP_TUNNEL_OAM_BIT, present); + __set_bit(IP_TUNNEL_CRIT_OPT_BIT, present); + + ip_tunnel_flags_or(flags, flags, present); +} + static inline bool ip_tunnel_flags_is_be16_compat(const unsigned long *flags) { IP_TUNNEL_DECLARE_FLAGS(supp) = { }; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index f82e9a7d3b37..59fe46077b3c 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -382,7 +382,9 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb, !dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_IP) && !dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_ENC_OPTS)) + FLOW_DISSECTOR_KEY_ENC_OPTS) && + !dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_FLAGS)) return; info = skb_tunnel_info(skb); @@ -475,6 +477,18 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb, IP_TUNNEL_GENEVE_OPT_BIT); enc_opt->dst_opt_type = val < __IP_TUNNEL_FLAG_NUM ? val : 0; } + + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_FLAGS)) { + struct flow_dissector_key_enc_flags *enc_flags; + IP_TUNNEL_DECLARE_FLAGS(flags) = {}; + + enc_flags = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_FLAGS, + target_container); + ip_tunnel_set_encflags_present(flags); + ip_tunnel_flags_and(flags, flags, info->key.tun_flags); + enc_flags->flags = bitmap_read(flags, IP_TUNNEL_CSUM_BIT, 32); + } } EXPORT_SYMBOL(skb_flow_dissect_tunnel_info); -- cgit v1.2.3-58-ga151 From 1d17568e74dedbcb54d36af0662a15128297d681 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 30 May 2024 19:08:35 +0200 Subject: net/sched: cls_flower: add support for matching tunnel control flags extend cls_flower to match TUNNEL_FLAGS_PRESENT bits in tunnel metadata. Suggested-by: Ilya Maximets Acked-by: Jamal Hadi Salim Signed-off-by: Davide Caratti Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- include/uapi/linux/pkt_cls.h | 3 +++ net/sched/cls_flower.c | 56 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 58 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 229fc925ec3a..b6d38f5fd7c0 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -554,6 +554,9 @@ enum { TCA_FLOWER_KEY_SPI, /* be32 */ TCA_FLOWER_KEY_SPI_MASK, /* be32 */ + TCA_FLOWER_KEY_ENC_FLAGS, /* u32 */ + TCA_FLOWER_KEY_ENC_FLAGS_MASK, /* u32 */ + __TCA_FLOWER_MAX, }; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index fd9a6f20b60b..eef570c577ac 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -41,6 +41,12 @@ #define TCA_FLOWER_KEY_CT_FLAGS_MASK \ (TCA_FLOWER_KEY_CT_FLAGS_MAX - 1) +#define TUNNEL_FLAGS_PRESENT (\ + _BITUL(IP_TUNNEL_CSUM_BIT) | \ + _BITUL(IP_TUNNEL_DONT_FRAGMENT_BIT) | \ + _BITUL(IP_TUNNEL_OAM_BIT) | \ + _BITUL(IP_TUNNEL_CRIT_OPT_BIT)) + struct fl_flow_key { struct flow_dissector_key_meta meta; struct flow_dissector_key_control control; @@ -75,6 +81,7 @@ struct fl_flow_key { struct flow_dissector_key_l2tpv3 l2tpv3; struct flow_dissector_key_ipsec ipsec; struct flow_dissector_key_cfm cfm; + struct flow_dissector_key_enc_flags enc_flags; } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ struct fl_flow_mask_range { @@ -732,6 +739,10 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_SPI_MASK] = { .type = NLA_U32 }, [TCA_FLOWER_L2_MISS] = NLA_POLICY_MAX(NLA_U8, 1), [TCA_FLOWER_KEY_CFM] = { .type = NLA_NESTED }, + [TCA_FLOWER_KEY_ENC_FLAGS] = NLA_POLICY_MASK(NLA_U32, + TUNNEL_FLAGS_PRESENT), + [TCA_FLOWER_KEY_ENC_FLAGS_MASK] = NLA_POLICY_MASK(NLA_U32, + TUNNEL_FLAGS_PRESENT), }; static const struct nla_policy @@ -1825,6 +1836,21 @@ static int fl_set_key_cfm(struct nlattr **tb, return 0; } +static int fl_set_key_enc_flags(struct nlattr **tb, u32 *flags_key, + u32 *flags_mask, struct netlink_ext_ack *extack) +{ + /* mask is mandatory for flags */ + if (NL_REQ_ATTR_CHECK(extack, NULL, tb, TCA_FLOWER_KEY_ENC_FLAGS_MASK)) { + NL_SET_ERR_MSG(extack, "missing enc_flags mask"); + return -EINVAL; + } + + *flags_key = nla_get_u32(tb[TCA_FLOWER_KEY_ENC_FLAGS]); + *flags_mask = nla_get_u32(tb[TCA_FLOWER_KEY_ENC_FLAGS_MASK]); + + return 0; +} + static int fl_set_key(struct net *net, struct nlattr **tb, struct fl_flow_key *key, struct fl_flow_key *mask, struct netlink_ext_ack *extack) @@ -2059,9 +2085,16 @@ static int fl_set_key(struct net *net, struct nlattr **tb, if (ret) return ret; - if (tb[TCA_FLOWER_KEY_FLAGS]) + if (tb[TCA_FLOWER_KEY_FLAGS]) { ret = fl_set_key_flags(tb, &key->control.flags, &mask->control.flags, extack); + if (ret) + return ret; + } + + if (tb[TCA_FLOWER_KEY_ENC_FLAGS]) + ret = fl_set_key_enc_flags(tb, &key->enc_flags.flags, + &mask->enc_flags.flags, extack); return ret; } @@ -2175,6 +2208,8 @@ static void fl_init_dissector(struct flow_dissector *dissector, FLOW_DISSECTOR_KEY_IPSEC, ipsec); FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_CFM, cfm); + FL_KEY_SET_IF_MASKED(mask, keys, cnt, + FLOW_DISSECTOR_KEY_ENC_FLAGS, enc_flags); skb_flow_dissector_init(dissector, keys, cnt); } @@ -3291,6 +3326,22 @@ err_cfm_opts: return err; } +static int fl_dump_key_enc_flags(struct sk_buff *skb, + struct flow_dissector_key_enc_flags *key, + struct flow_dissector_key_enc_flags *mask) +{ + if (!memchr_inv(mask, 0, sizeof(*mask))) + return 0; + + if (nla_put_u32(skb, TCA_FLOWER_KEY_ENC_FLAGS, key->flags)) + return -EMSGSIZE; + + if (nla_put_u32(skb, TCA_FLOWER_KEY_ENC_FLAGS_MASK, mask->flags)) + return -EMSGSIZE; + + return 0; +} + static int fl_dump_key_options(struct sk_buff *skb, int enc_opt_type, struct flow_dissector_key_enc_opts *enc_opts) { @@ -3592,6 +3643,9 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net, if (fl_dump_key_cfm(skb, &key->cfm, &mask->cfm)) goto nla_put_failure; + if (fl_dump_key_enc_flags(skb, &key->enc_flags, &mask->enc_flags)) + goto nla_put_failure; + return 0; nla_put_failure: -- cgit v1.2.3-58-ga151 From 071115301838c6c265065dd5d6bf43a9a987a550 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 30 May 2024 16:36:14 -0700 Subject: tcp: wrap mptcp and decrypted checks into tcp_skb_can_collapse_rx() tcp_skb_can_collapse() checks for conditions which don't make sense on input. Because of this we ended up sprinkling a few pairs of mptcp_skb_can_collapse() and skb_cmp_decrypted() calls on the input path. Group them in a new helper. This should make it less likely that someone will check mptcp and not decrypted or vice versa when adding new code. This implicitly adds a decrypted check early in tcp_collapse(). AFAIU this will very slightly increase our ability to collapse packets under memory pressure, not a real bug. Signed-off-by: Jakub Kicinski Reviewed-by: Eric Dumazet Reviewed-by: Matthieu Baerts (NGI0) Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- include/net/tcp.h | 7 +++++++ net/ipv4/tcp_input.c | 11 +++-------- net/ipv4/tcp_ipv4.c | 3 +-- 3 files changed, 11 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 32815a40dea1..32741856da01 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1071,6 +1071,13 @@ static inline bool tcp_skb_can_collapse(const struct sk_buff *to, skb_pure_zcopy_same(to, from)); } +static inline bool tcp_skb_can_collapse_rx(const struct sk_buff *to, + const struct sk_buff *from) +{ + return likely(mptcp_skb_can_collapse(to, from) && + !skb_cmp_decrypted(to, from)); +} + /* Events passed to congestion control interface */ enum tcp_ca_event { CA_EVENT_TX_START, /* first transmit when no packets in flight */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5aadf64e554d..212b6fd0caf7 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4813,10 +4813,7 @@ static bool tcp_try_coalesce(struct sock *sk, if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq) return false; - if (!mptcp_skb_can_collapse(to, from)) - return false; - - if (skb_cmp_decrypted(from, to)) + if (!tcp_skb_can_collapse_rx(to, from)) return false; if (!skb_try_coalesce(to, from, fragstolen, &delta)) @@ -5372,7 +5369,7 @@ restart: break; } - if (n && n != tail && mptcp_skb_can_collapse(skb, n) && + if (n && n != tail && tcp_skb_can_collapse_rx(skb, n) && TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(n)->seq) { end_of_skbs = false; break; @@ -5423,11 +5420,9 @@ restart: skb = tcp_collapse_one(sk, skb, list, root); if (!skb || skb == tail || - !mptcp_skb_can_collapse(nskb, skb) || + !tcp_skb_can_collapse_rx(nskb, skb) || (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN))) goto end; - if (skb_cmp_decrypted(skb, nskb)) - goto end; } } } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 59d5b064f233..04044605cadf 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2044,8 +2044,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb, TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_ACK) || ((TCP_SKB_CB(tail)->tcp_flags ^ TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_ECE | TCPHDR_CWR)) || - !mptcp_skb_can_collapse(tail, skb) || - skb_cmp_decrypted(tail, skb) || + !tcp_skb_can_collapse_rx(tail, skb) || thtail->doff != th->doff || memcmp(thtail + 1, th + 1, hdrlen - sizeof(*th))) goto no_coalesce; -- cgit v1.2.3-58-ga151 From 1be68a87ab333af37b02ad928a724a722a5a8203 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 30 May 2024 16:36:15 -0700 Subject: tcp: add a helper for setting EOR on tail skb TLS (and hopefully soon PSP will) use EOR to prevent skbs with different decrypted state from getting merged, without adding new tests to the skb handling. In both cases once the connection switches to an "encrypted" state, all subsequent skbs will be encrypted, so a single "EOR fence" is sufficient to prevent mixing. Add a helper for setting the EOR bit, to make this arrangement more explicit. Signed-off-by: Jakub Kicinski Reviewed-by: Eric Dumazet Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- include/net/tcp.h | 9 +++++++++ net/tls/tls_device.c | 11 ++--------- 2 files changed, 11 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 32741856da01..08c3b99501cf 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1066,6 +1066,7 @@ static inline bool tcp_skb_can_collapse_to(const struct sk_buff *skb) static inline bool tcp_skb_can_collapse(const struct sk_buff *to, const struct sk_buff *from) { + /* skb_cmp_decrypted() not needed, use tcp_write_collapse_fence() */ return likely(tcp_skb_can_collapse_to(to) && mptcp_skb_can_collapse(to, from) && skb_pure_zcopy_same(to, from)); @@ -2102,6 +2103,14 @@ static inline void tcp_rtx_queue_unlink_and_free(struct sk_buff *skb, struct soc tcp_wmem_free_skb(sk, skb); } +static inline void tcp_write_collapse_fence(struct sock *sk) +{ + struct sk_buff *skb = tcp_write_queue_tail(sk); + + if (skb) + TCP_SKB_CB(skb)->eor = 1; +} + static inline void tcp_push_pending_frames(struct sock *sk) { if (tcp_send_head(sk)) { diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index ab6e694f7bc2..dc063c2c7950 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -231,14 +231,10 @@ static void tls_device_resync_tx(struct sock *sk, struct tls_context *tls_ctx, u32 seq) { struct net_device *netdev; - struct sk_buff *skb; int err = 0; u8 *rcd_sn; - skb = tcp_write_queue_tail(sk); - if (skb) - TCP_SKB_CB(skb)->eor = 1; - + tcp_write_collapse_fence(sk); rcd_sn = tls_ctx->tx.rec_seq; trace_tls_device_tx_resync_send(sk, seq, rcd_sn); @@ -1067,7 +1063,6 @@ int tls_set_device_offload(struct sock *sk) struct tls_prot_info *prot; struct net_device *netdev; struct tls_context *ctx; - struct sk_buff *skb; char *iv, *rec_seq; int rc; @@ -1138,9 +1133,7 @@ int tls_set_device_offload(struct sock *sk) * SKBs where only part of the payload needs to be encrypted. * So mark the last skb in the write queue as end of record. */ - skb = tcp_write_queue_tail(sk); - if (skb) - TCP_SKB_CB(skb)->eor = 1; + tcp_write_collapse_fence(sk); /* Avoid offloading if the device is down * We don't want to offload new flows after -- cgit v1.2.3-58-ga151 From 99b8add01f98a8ecf498de1467e8bf13dbf02daa Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 30 May 2024 16:36:16 -0700 Subject: net: skb: add compatibility warnings to skb_shift() According to current semantics we should never try to shift data between skbs which differ on decrypted or pp_recycle status. Signed-off-by: Jakub Kicinski Reviewed-by: Eric Dumazet Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- net/core/skbuff.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 466999a7515e..c8ac79851cd6 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4139,6 +4139,9 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) if (skb_zcopy(tgt) || skb_zcopy(skb)) return 0; + DEBUG_NET_WARN_ON_ONCE(tgt->pp_recycle != skb->pp_recycle); + DEBUG_NET_WARN_ON_ONCE(skb_cmp_decrypted(tgt, skb)); + todo = shiftlen; from = 0; to = skb_shinfo(tgt)->nr_frags; -- cgit v1.2.3-58-ga151 From 8c3fdff2171c834df5fa5ff353b94ada2e5376ca Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 31 May 2024 04:15:49 -0700 Subject: openvswitch: Move stats allocation to core With commit 34d21de99cea9 ("net: Move {l,t,d}stats allocation to core and convert veth & vrf"), stats allocation could be done on net core instead of this driver. With this new approach, the driver doesn't have to bother with error handling (allocation failure checking, making sure free happens in the right spot, etc). This is core responsibility now. Move openvswitch driver to leverage the core allocation. Signed-off-by: Breno Leitao Link: https://lore.kernel.org/r/20240531111552.3209198-1-leitao@debian.org Signed-off-by: Paolo Abeni --- net/openvswitch/vport-internal_dev.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'net') diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 74c88a6baa43..7daba6ac6912 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -140,11 +140,7 @@ static struct vport *internal_dev_create(const struct vport_parms *parms) err = -ENOMEM; goto error_free_vport; } - vport->dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!vport->dev->tstats) { - err = -ENOMEM; - goto error_free_netdev; - } + dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; dev_net_set(vport->dev, ovs_dp_get_net(vport->dp)); dev->ifindex = parms->desired_ifindex; @@ -169,8 +165,6 @@ static struct vport *internal_dev_create(const struct vport_parms *parms) error_unlock: rtnl_unlock(); - free_percpu(dev->tstats); -error_free_netdev: free_netdev(dev); error_free_vport: ovs_vport_free(vport); @@ -186,7 +180,6 @@ static void internal_dev_destroy(struct vport *vport) /* unregister_netdevice() waits for an RCU grace period. */ unregister_netdevice(vport->dev); - free_percpu(vport->dev->tstats); rtnl_unlock(); } -- cgit v1.2.3-58-ga151 From 2b438c5774cc491a4aa8bb1ec4f49c1a0760a173 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 31 May 2024 04:15:50 -0700 Subject: openvswitch: Remove generic .ndo_get_stats64 Commit 3e2f544dd8a33 ("net: get stats64 if device if driver is configured") moved the callback to dev_get_tstats64() to net core, so, unless the driver is doing some custom stats collection, it does not need to set .ndo_get_stats64. Since this driver is now relying in NETDEV_PCPU_STAT_TSTATS, then, it doesn't need to set the dev_get_tstats64() generic .ndo_get_stats64 function pointer. Signed-off-by: Breno Leitao Reviewed-by: Subbaraya Sundeep Link: https://lore.kernel.org/r/20240531111552.3209198-2-leitao@debian.org Signed-off-by: Paolo Abeni --- net/openvswitch/vport-internal_dev.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 7daba6ac6912..4b33133cbdff 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -85,7 +85,6 @@ static const struct net_device_ops internal_dev_netdev_ops = { .ndo_stop = internal_dev_stop, .ndo_start_xmit = internal_dev_xmit, .ndo_set_mac_address = eth_mac_addr, - .ndo_get_stats64 = dev_get_tstats64, }; static struct rtnl_link_ops internal_dev_link_ops __read_mostly = { -- cgit v1.2.3-58-ga151 From cdbdb3c62af5bc13e6e8a9310470c71c33340d26 Mon Sep 17 00:00:00 2001 From: Chen Hanxiao Date: Fri, 31 May 2024 16:54:02 +0800 Subject: net: bridge: fix an inconsistent indentation Smatch complains: net/bridge/br_netlink_tunnel.c: 318 br_process_vlan_tunnel_info() warn: inconsistent indenting Fix it with a proper indenting Signed-off-by: Chen Hanxiao Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_netlink_tunnel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bridge/br_netlink_tunnel.c b/net/bridge/br_netlink_tunnel.c index 17abf092f7ca..71a12da30004 100644 --- a/net/bridge/br_netlink_tunnel.c +++ b/net/bridge/br_netlink_tunnel.c @@ -315,8 +315,8 @@ int br_process_vlan_tunnel_info(const struct net_bridge *br, if (curr_change) *changed = curr_change; - __vlan_tunnel_handle_range(p, &v_start, &v_end, v, - curr_change); + __vlan_tunnel_handle_range(p, &v_start, &v_end, v, + curr_change); } if (v_start && v_end) br_vlan_notify(br, p, v_start->vid, v_end->vid, -- cgit v1.2.3-58-ga151 From 61e2bbafb00e4b9a5de45e6448a7b6b818658576 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Fri, 31 May 2024 23:46:34 +0800 Subject: net: remove NULL-pointer net parameter in ip_metrics_convert When I was doing some experiments, I found that when using the first parameter, namely, struct net, in ip_metrics_convert() always triggers NULL pointer crash. Then I digged into this part, realizing that we can remove this one due to its uselessness. Signed-off-by: Jason Xing Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/net/ip.h | 3 +-- include/net/tcp.h | 2 +- net/ipv4/fib_semantics.c | 5 ++--- net/ipv4/metrics.c | 8 ++++---- net/ipv4/tcp_cong.c | 11 +++++------ net/ipv6/route.c | 2 +- 6 files changed, 14 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/include/net/ip.h b/include/net/ip.h index 6d735e00d3f3..c5606cadb1a5 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -506,8 +506,7 @@ static inline unsigned int ip_skb_dst_mtu(struct sock *sk, return mtu - lwtunnel_headroom(skb_dst(skb)->lwtstate, mtu); } -struct dst_metrics *ip_fib_metrics_init(struct net *net, struct nlattr *fc_mx, - int fc_mx_len, +struct dst_metrics *ip_fib_metrics_init(struct nlattr *fc_mx, int fc_mx_len, struct netlink_ext_ack *extack); static inline void ip_fib_metrics_put(struct dst_metrics *fib_metrics) { diff --git a/include/net/tcp.h b/include/net/tcp.h index 08c3b99501cf..a70fc39090fe 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1224,7 +1224,7 @@ extern struct tcp_congestion_ops tcp_reno; struct tcp_congestion_ops *tcp_ca_find(const char *name); struct tcp_congestion_ops *tcp_ca_find_key(u32 key); -u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca); +u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca); #ifdef CONFIG_INET char *tcp_ca_get_name_by_key(u32 key, char *buffer); #else diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index f669da98d11d..7b6b042208bd 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1030,7 +1030,7 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi) bool ecn_ca = false; nla_strscpy(tmp, nla, sizeof(tmp)); - val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca); + val = tcp_ca_get_key_by_name(tmp, &ecn_ca); } else { if (nla_len(nla) != sizeof(u32)) return false; @@ -1459,8 +1459,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg, fi = kzalloc(struct_size(fi, fib_nh, nhs), GFP_KERNEL); if (!fi) goto failure; - fi->fib_metrics = ip_fib_metrics_init(fi->fib_net, cfg->fc_mx, - cfg->fc_mx_len, extack); + fi->fib_metrics = ip_fib_metrics_init(cfg->fc_mx, cfg->fc_mx_len, extack); if (IS_ERR(fi->fib_metrics)) { err = PTR_ERR(fi->fib_metrics); kfree(fi); diff --git a/net/ipv4/metrics.c b/net/ipv4/metrics.c index 0e3ee1532848..8ddac1f595ed 100644 --- a/net/ipv4/metrics.c +++ b/net/ipv4/metrics.c @@ -7,7 +7,7 @@ #include #include -static int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, +static int ip_metrics_convert(struct nlattr *fc_mx, int fc_mx_len, u32 *metrics, struct netlink_ext_ack *extack) { @@ -31,7 +31,7 @@ static int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, char tmp[TCP_CA_NAME_MAX]; nla_strscpy(tmp, nla, sizeof(tmp)); - val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca); + val = tcp_ca_get_key_by_name(tmp, &ecn_ca); if (val == TCP_CA_UNSPEC) { NL_SET_ERR_MSG(extack, "Unknown tcp congestion algorithm"); return -EINVAL; @@ -63,7 +63,7 @@ static int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, return 0; } -struct dst_metrics *ip_fib_metrics_init(struct net *net, struct nlattr *fc_mx, +struct dst_metrics *ip_fib_metrics_init(struct nlattr *fc_mx, int fc_mx_len, struct netlink_ext_ack *extack) { @@ -77,7 +77,7 @@ struct dst_metrics *ip_fib_metrics_init(struct net *net, struct nlattr *fc_mx, if (unlikely(!fib_metrics)) return ERR_PTR(-ENOMEM); - err = ip_metrics_convert(net, fc_mx, fc_mx_len, fib_metrics->metrics, + err = ip_metrics_convert(fc_mx, fc_mx_len, fib_metrics->metrics, extack); if (!err) { refcount_set(&fib_metrics->refcnt, 1); diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 28ffcfbeef14..48617d99abb0 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -46,8 +46,7 @@ void tcp_set_ca_state(struct sock *sk, const u8 ca_state) } /* Must be called with rcu lock held */ -static struct tcp_congestion_ops *tcp_ca_find_autoload(struct net *net, - const char *name) +static struct tcp_congestion_ops *tcp_ca_find_autoload(const char *name) { struct tcp_congestion_ops *ca = tcp_ca_find(name); @@ -178,7 +177,7 @@ int tcp_update_congestion_control(struct tcp_congestion_ops *ca, struct tcp_cong return ret; } -u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca) +u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca) { const struct tcp_congestion_ops *ca; u32 key = TCP_CA_UNSPEC; @@ -186,7 +185,7 @@ u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca) might_sleep(); rcu_read_lock(); - ca = tcp_ca_find_autoload(net, name); + ca = tcp_ca_find_autoload(name); if (ca) { key = ca->key; *ecn_ca = ca->flags & TCP_CONG_NEEDS_ECN; @@ -283,7 +282,7 @@ int tcp_set_default_congestion_control(struct net *net, const char *name) int ret; rcu_read_lock(); - ca = tcp_ca_find_autoload(net, name); + ca = tcp_ca_find_autoload(name); if (!ca) { ret = -ENOENT; } else if (!bpf_try_module_get(ca, ca->owner)) { @@ -421,7 +420,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, if (!load) ca = tcp_ca_find(name); else - ca = tcp_ca_find_autoload(sock_net(sk), name); + ca = tcp_ca_find_autoload(name); /* No change asking for existing value */ if (ca == icsk->icsk_ca_ops) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a504b88ec06b..12f647d0fec0 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3761,7 +3761,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, if (!rt) goto out; - rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len, + rt->fib6_metrics = ip_fib_metrics_init(cfg->fc_mx, cfg->fc_mx_len, extack); if (IS_ERR(rt->fib6_metrics)) { err = PTR_ERR(rt->fib6_metrics); -- cgit v1.2.3-58-ga151 From 6f49c3fb563c0a95a838216eaf7d9b02ece44bf5 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sat, 1 Jun 2024 00:29:17 +0100 Subject: net: caif: remove unused structs 'cfpktq' has been unused since commit 73d6ac633c6c ("caif: code cleanup"). 'caif_packet_funcs' is declared but never defined. Remove both of them. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Jacob Keller Signed-off-by: David S. Miller --- include/net/caif/caif_layer.h | 2 -- net/caif/cfpkt_skbuff.c | 7 ------- 2 files changed, 9 deletions(-) (limited to 'net') diff --git a/include/net/caif/caif_layer.h b/include/net/caif/caif_layer.h index 51f7bb42a936..0f45d875905f 100644 --- a/include/net/caif/caif_layer.h +++ b/include/net/caif/caif_layer.h @@ -11,9 +11,7 @@ struct cflayer; struct cfpkt; -struct cfpktq; struct caif_payload_info; -struct caif_packet_funcs; #define CAIF_LAYER_NAME_SZ 16 diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c index 7796414d47e5..2ae8cfa3df88 100644 --- a/net/caif/cfpkt_skbuff.c +++ b/net/caif/cfpkt_skbuff.c @@ -21,13 +21,6 @@ do { \ pr_warn(errmsg); \ } while (0) -struct cfpktq { - struct sk_buff_head head; - atomic_t count; - /* Lock protects count updates */ - spinlock_t lock; -}; - /* * net/caif/ is generic and does not * understand SKB, so we do this typecast -- cgit v1.2.3-58-ga151 From a23b0034e93453cecf435637dfdb14560eeda6c8 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sat, 1 Jun 2024 00:30:06 +0100 Subject: net: ethtool: remove unused struct 'cable_test_tdr_req_info' 'cable_test_tdr_req_info' is unused since the original commit f2bc8ad31a7f ("net: ethtool: Allow PHY cable test TDR data to configured"). Remove it. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- net/ethtool/cabletest.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net') diff --git a/net/ethtool/cabletest.c b/net/ethtool/cabletest.c index 06a151165c31..f6f136ec7ddf 100644 --- a/net/ethtool/cabletest.c +++ b/net/ethtool/cabletest.c @@ -207,10 +207,6 @@ err: } EXPORT_SYMBOL_GPL(ethnl_cable_test_fault_length); -struct cable_test_tdr_req_info { - struct ethnl_req_info base; -}; - static const struct nla_policy cable_test_tdr_act_cfg_policy[] = { [ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST] = { .type = NLA_U32 }, [ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST] = { .type = NLA_U32 }, -- cgit v1.2.3-58-ga151 From 82dc29b9737edf2d13561ebcf6212c0b88c41129 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 2 Jun 2024 16:18:52 +0200 Subject: devlink: Constify the 'table_ops' parameter of devl_dpipe_table_register() "struct devlink_dpipe_table_ops" only contains some function pointers. Update "struct devlink_dpipe_table" and the 'table_ops' parameter of devl_dpipe_table_register() so that structures in drivers can be constified. Constifying these structures will move some data to a read-only section, so increase overall security. Signed-off-by: Christophe JAILLET Reviewed-by: Wojciech Drewek Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- include/net/devlink.h | 4 ++-- net/devlink/dpipe.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/include/net/devlink.h b/include/net/devlink.h index 35eb0f884386..db5eff6cb60f 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -352,7 +352,7 @@ struct devlink_dpipe_table { bool resource_valid; u64 resource_id; u64 resource_units; - struct devlink_dpipe_table_ops *table_ops; + const struct devlink_dpipe_table_ops *table_ops; struct rcu_head rcu; }; @@ -1751,7 +1751,7 @@ void devl_sb_unregister(struct devlink *devlink, unsigned int sb_index); void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index); int devl_dpipe_table_register(struct devlink *devlink, const char *table_name, - struct devlink_dpipe_table_ops *table_ops, + const struct devlink_dpipe_table_ops *table_ops, void *priv, bool counter_control_extern); void devl_dpipe_table_unregister(struct devlink *devlink, const char *table_name); diff --git a/net/devlink/dpipe.c b/net/devlink/dpipe.c index a72a9292efc5..55009b377447 100644 --- a/net/devlink/dpipe.c +++ b/net/devlink/dpipe.c @@ -839,7 +839,7 @@ EXPORT_SYMBOL_GPL(devlink_dpipe_table_counter_enabled); */ int devl_dpipe_table_register(struct devlink *devlink, const char *table_name, - struct devlink_dpipe_table_ops *table_ops, + const struct devlink_dpipe_table_ops *table_ops, void *priv, bool counter_control_extern) { struct devlink_dpipe_table *table; -- cgit v1.2.3-58-ga151 From 69e0b33a7fce4d96649b9fa32e56b696921aa48e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 3 Jun 2024 15:51:06 +0000 Subject: tcp: annotate data-races around tw->tw_ts_recent and tw->tw_ts_recent_stamp These fields can be read and written locklessly, add annotations around these minor races. Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 12 +++++++----- net/ipv4/tcp_minisocks.c | 22 ++++++++++++++-------- net/ipv6/tcp_ipv6.c | 6 +++--- 3 files changed, 24 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 04044605cadf..3ef4b274c24b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -114,6 +114,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) const struct inet_timewait_sock *tw = inet_twsk(sktw); const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw); struct tcp_sock *tp = tcp_sk(sk); + int ts_recent_stamp; if (reuse == 2) { /* Still does not detect *everything* that goes through @@ -152,9 +153,10 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) If TW bucket has been already destroyed we fall back to VJ's scheme and use initial timestamp retrieved from peer table. */ - if (tcptw->tw_ts_recent_stamp && + ts_recent_stamp = READ_ONCE(tcptw->tw_ts_recent_stamp); + if (ts_recent_stamp && (!twp || (reuse && time_after32(ktime_get_seconds(), - tcptw->tw_ts_recent_stamp)))) { + ts_recent_stamp)))) { /* inet_twsk_hashdance() sets sk_refcnt after putting twsk * and releasing the bucket lock. */ @@ -178,8 +180,8 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) if (!seq) seq = 1; WRITE_ONCE(tp->write_seq, seq); - tp->rx_opt.ts_recent = tcptw->tw_ts_recent; - tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; + tp->rx_opt.ts_recent = READ_ONCE(tcptw->tw_ts_recent); + tp->rx_opt.ts_recent_stamp = ts_recent_stamp; } return 1; @@ -1064,7 +1066,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcp_tw_tsval(tcptw), - tcptw->tw_ts_recent, + READ_ONCE(tcptw->tw_ts_recent), tw->tw_bound_dev_if, &key, tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0, tw->tw_tos, diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 538c06f95918..4c894e540730 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -101,16 +101,18 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, struct tcp_options_received tmp_opt; struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); bool paws_reject = false; + int ts_recent_stamp; tmp_opt.saw_tstamp = 0; - if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { + ts_recent_stamp = READ_ONCE(tcptw->tw_ts_recent_stamp); + if (th->doff > (sizeof(*th) >> 2) && ts_recent_stamp) { tcp_parse_options(twsk_net(tw), skb, &tmp_opt, 0, NULL); if (tmp_opt.saw_tstamp) { if (tmp_opt.rcv_tsecr) tmp_opt.rcv_tsecr -= tcptw->tw_ts_offset; - tmp_opt.ts_recent = tcptw->tw_ts_recent; - tmp_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; + tmp_opt.ts_recent = READ_ONCE(tcptw->tw_ts_recent); + tmp_opt.ts_recent_stamp = ts_recent_stamp; paws_reject = tcp_paws_reject(&tmp_opt, th->rst); } } @@ -152,8 +154,10 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, twsk_rcv_nxt_update(tcptw, TCP_SKB_CB(skb)->end_seq); if (tmp_opt.saw_tstamp) { - tcptw->tw_ts_recent_stamp = ktime_get_seconds(); - tcptw->tw_ts_recent = tmp_opt.rcv_tsval; + WRITE_ONCE(tcptw->tw_ts_recent_stamp, + ktime_get_seconds()); + WRITE_ONCE(tcptw->tw_ts_recent, + tmp_opt.rcv_tsval); } inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN); @@ -197,8 +201,10 @@ kill: } if (tmp_opt.saw_tstamp) { - tcptw->tw_ts_recent = tmp_opt.rcv_tsval; - tcptw->tw_ts_recent_stamp = ktime_get_seconds(); + WRITE_ONCE(tcptw->tw_ts_recent, + tmp_opt.rcv_tsval); + WRITE_ONCE(tcptw->tw_ts_recent_stamp, + ktime_get_seconds()); } inet_twsk_put(tw); @@ -225,7 +231,7 @@ kill: if (th->syn && !th->rst && !th->ack && !paws_reject && (after(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt) || (tmp_opt.saw_tstamp && - (s32)(tcptw->tw_ts_recent - tmp_opt.rcv_tsval) < 0))) { + (s32)(READ_ONCE(tcptw->tw_ts_recent) - tmp_opt.rcv_tsval) < 0))) { u32 isn = tcptw->tw_snd_nxt + 65535 + 2; if (isn == 0) isn++; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5fe4365de89d..f4777c03cdd7 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1196,9 +1196,9 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcp_tw_tsval(tcptw), - tcptw->tw_ts_recent, tw->tw_bound_dev_if, &key, - tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority, - tw->tw_txhash); + READ_ONCE(tcptw->tw_ts_recent), tw->tw_bound_dev_if, + &key, tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), + tw->tw_priority, tw->tw_txhash); #ifdef CONFIG_TCP_AO out: -- cgit v1.2.3-58-ga151 From 512bd0f9f926a05c724a9fd72bc4e14213845e01 Mon Sep 17 00:00:00 2001 From: Kevin Yang Date: Mon, 3 Jun 2024 21:30:53 +0000 Subject: tcp: derive delack_max with tcp_rto_min helper Rto_min now has multiple sources, ordered by preprecedence high to low: ip route option rto_min, icsk->icsk_rto_min. When derive delack_max from rto_min, we should not only use ip route option, but should use tcp_rto_min helper to get the correct rto_min. Signed-off-by: Kevin Yang Reviewed-by: Neal Cardwell Reviewed-by: Yuchung Cheng Reviewed-by: Eric Dumazet Reviewed-by: Tony Lu Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f97e098f18a5..090fb0c24599 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -4163,16 +4163,9 @@ EXPORT_SYMBOL(tcp_connect); u32 tcp_delack_max(const struct sock *sk) { - const struct dst_entry *dst = __sk_dst_get(sk); - u32 delack_max = inet_csk(sk)->icsk_delack_max; - - if (dst && dst_metric_locked(dst, RTAX_RTO_MIN)) { - u32 rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN); - u32 delack_from_rto_min = max_t(int, 1, rto_min - 1); + u32 delack_from_rto_min = max(tcp_rto_min(sk), 2) - 1; - delack_max = min_t(u32, delack_max, delack_from_rto_min); - } - return delack_max; + return min(inet_csk(sk)->icsk_delack_max, delack_from_rto_min); } /* Send out a delayed ack, the caller does the policy checking -- cgit v1.2.3-58-ga151 From f086edef71be7174a16c1ed67ac65a085cda28b1 Mon Sep 17 00:00:00 2001 From: Kevin Yang Date: Mon, 3 Jun 2024 21:30:54 +0000 Subject: tcp: add sysctl_tcp_rto_min_us Adding a sysctl knob to allow user to specify a default rto_min at socket init time, other than using the hard coded 200ms default rto_min. Note that the rto_min route option has the highest precedence for configuring this setting, followed by the TCP_BPF_RTO_MIN socket option, followed by the tcp_rto_min_us sysctl. Signed-off-by: Kevin Yang Reviewed-by: Neal Cardwell Reviewed-by: Yuchung Cheng Reviewed-by: Eric Dumazet Reviewed-by: Tony Lu Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.rst | 13 +++++++++++++ include/net/netns/ipv4.h | 1 + net/ipv4/sysctl_net_ipv4.c | 8 ++++++++ net/ipv4/tcp.c | 4 +++- net/ipv4/tcp_ipv4.c | 1 + 5 files changed, 26 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index bd50df6a5a42..6e99eccdb837 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1196,6 +1196,19 @@ tcp_pingpong_thresh - INTEGER Default: 1 +tcp_rto_min_us - INTEGER + Minimal TCP retransmission timeout (in microseconds). Note that the + rto_min route option has the highest precedence for configuring this + setting, followed by the TCP_BPF_RTO_MIN socket option, followed by + this tcp_rto_min_us sysctl. + + The recommended practice is to use a value less or equal to 200000 + microseconds. + + Possible Values: 1 - INT_MAX + + Default: 200000 + UDP variables ============= diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index c356c458b340..a91bb971f901 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -170,6 +170,7 @@ struct netns_ipv4 { u8 sysctl_tcp_sack; u8 sysctl_tcp_window_scaling; u8 sysctl_tcp_timestamps; + int sysctl_tcp_rto_min_us; u8 sysctl_tcp_recovery; u8 sysctl_tcp_thin_linear_timeouts; u8 sysctl_tcp_slow_start_after_idle; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index d7892f34a15b..bb64c0ef092d 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -1503,6 +1503,14 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ONE, }, + { + .procname = "tcp_rto_min_us", + .data = &init_net.ipv4.sysctl_tcp_rto_min_us, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ONE, + }, }; static __net_init int ipv4_sysctl_init_net(struct net *net) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 5fa68e7f6ddb..fa43aaacd92b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -420,6 +420,7 @@ void tcp_init_sock(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); + int rto_min_us; tp->out_of_order_queue = RB_ROOT; sk->tcp_rtx_queue = RB_ROOT; @@ -428,7 +429,8 @@ void tcp_init_sock(struct sock *sk) INIT_LIST_HEAD(&tp->tsorted_sent_queue); icsk->icsk_rto = TCP_TIMEOUT_INIT; - icsk->icsk_rto_min = TCP_RTO_MIN; + rto_min_us = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rto_min_us); + icsk->icsk_rto_min = usecs_to_jiffies(rto_min_us); icsk->icsk_delack_max = TCP_DELACK_MAX; tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); minmax_reset(&tp->rtt_min, tcp_jiffies32, ~0U); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3ef4b274c24b..3613e08ca794 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -3502,6 +3502,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_shrink_window = 0; net->ipv4.sysctl_tcp_pingpong_thresh = 1; + net->ipv4.sysctl_tcp_rto_min_us = jiffies_to_usecs(TCP_RTO_MIN); return 0; } -- cgit v1.2.3-58-ga151 From b4cb4a1391dcdc640c4ade003aaf0ee19cc8d509 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 4 Jun 2024 11:16:03 +0000 Subject: net: use unrcu_pointer() helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Toke mentioned unrcu_pointer() existence, allowing to remove some of the ugly casts we have when using xchg() for rcu protected pointers. Also make inet_rcv_compat const. Signed-off-by: Eric Dumazet Cc: Toke Høiland-Jørgensen Reviewed-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20240604111603.45871-1-edumazet@google.com Signed-off-by: Paolo Abeni --- include/net/sock.h | 2 +- net/core/gen_estimator.c | 2 +- net/core/sock_diag.c | 8 +++----- net/ipv4/cipso_ipv4.c | 2 +- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_fastopen.c | 7 ++++--- net/ipv4/udp.c | 2 +- net/ipv6/af_inet6.c | 2 +- net/ipv6/ip6_fib.c | 2 +- net/ipv6/ipv6_sockglue.c | 3 +-- net/ipv6/route.c | 6 +++--- net/sched/act_api.c | 2 +- 12 files changed, 19 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/include/net/sock.h b/include/net/sock.h index 953c8dc4e259..b30ea0c342a6 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2095,7 +2095,7 @@ sk_dst_set(struct sock *sk, struct dst_entry *dst) sk_tx_queue_clear(sk); WRITE_ONCE(sk->sk_dst_pending_confirm, 0); - old_dst = xchg((__force struct dst_entry **)&sk->sk_dst_cache, dst); + old_dst = unrcu_pointer(xchg(&sk->sk_dst_cache, RCU_INITIALIZER(dst))); dst_release(old_dst); } diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index fae9c4694186..412816076b8b 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -206,7 +206,7 @@ void gen_kill_estimator(struct net_rate_estimator __rcu **rate_est) { struct net_rate_estimator *est; - est = xchg((__force struct net_rate_estimator **)rate_est, NULL); + est = unrcu_pointer(xchg(rate_est, NULL)); if (est) { timer_shutdown_sync(&est->timer); kfree_rcu(est, rcu); diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 654122838025..a08eed9b9142 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -18,7 +18,7 @@ static const struct sock_diag_handler __rcu *sock_diag_handlers[AF_MAX]; -static struct sock_diag_inet_compat __rcu *inet_rcv_compat; +static const struct sock_diag_inet_compat __rcu *inet_rcv_compat; static struct workqueue_struct *broadcast_wq; @@ -187,8 +187,7 @@ void sock_diag_broadcast_destroy(struct sock *sk) void sock_diag_register_inet_compat(const struct sock_diag_inet_compat *ptr) { - xchg((__force const struct sock_diag_inet_compat **)&inet_rcv_compat, - ptr); + xchg(&inet_rcv_compat, RCU_INITIALIZER(ptr)); } EXPORT_SYMBOL_GPL(sock_diag_register_inet_compat); @@ -196,8 +195,7 @@ void sock_diag_unregister_inet_compat(const struct sock_diag_inet_compat *ptr) { const struct sock_diag_inet_compat *old; - old = xchg((__force const struct sock_diag_inet_compat **)&inet_rcv_compat, - NULL); + old = unrcu_pointer(xchg(&inet_rcv_compat, NULL)); WARN_ON_ONCE(old != ptr); } EXPORT_SYMBOL_GPL(sock_diag_unregister_inet_compat); diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index dd6d46015058..3a95c0f13ce3 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1953,7 +1953,7 @@ int cipso_v4_req_setattr(struct request_sock *req, buf = NULL; req_inet = inet_rsk(req); - opt = xchg((__force struct ip_options_rcu **)&req_inet->ireq_opt, opt); + opt = unrcu_pointer(xchg(&req_inet->ireq_opt, RCU_INITIALIZER(opt))); if (opt) kfree_rcu(opt, rcu); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index fa43aaacd92b..f727bc8d82a6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3081,7 +3081,7 @@ int tcp_disconnect(struct sock *sk, int flags) icsk->icsk_ack.rcv_mss = TCP_MIN_MSS; memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); __sk_dst_reset(sk); - dst_release(xchg((__force struct dst_entry **)&sk->sk_rx_dst, NULL)); + dst_release(unrcu_pointer(xchg(&sk->sk_rx_dst, NULL))); tcp_saved_syn_free(tp); tp->compressed_ack = 0; tp->segs_in = 0; diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 8ed54e7334a9..0f523cbfe329 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -49,7 +49,7 @@ void tcp_fastopen_ctx_destroy(struct net *net) { struct tcp_fastopen_context *ctxt; - ctxt = xchg((__force struct tcp_fastopen_context **)&net->ipv4.tcp_fastopen_ctx, NULL); + ctxt = unrcu_pointer(xchg(&net->ipv4.tcp_fastopen_ctx, NULL)); if (ctxt) call_rcu(&ctxt->rcu, tcp_fastopen_ctx_free); @@ -80,9 +80,10 @@ int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, if (sk) { q = &inet_csk(sk)->icsk_accept_queue.fastopenq; - octx = xchg((__force struct tcp_fastopen_context **)&q->ctx, ctx); + octx = unrcu_pointer(xchg(&q->ctx, RCU_INITIALIZER(ctx))); } else { - octx = xchg((__force struct tcp_fastopen_context **)&net->ipv4.tcp_fastopen_ctx, ctx); + octx = unrcu_pointer(xchg(&net->ipv4.tcp_fastopen_ctx, + RCU_INITIALIZER(ctx))); } if (octx) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 189c9113fe9a..c9ca6d285347 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2230,7 +2230,7 @@ bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) struct dst_entry *old; if (dst_hold_safe(dst)) { - old = xchg((__force struct dst_entry **)&sk->sk_rx_dst, dst); + old = unrcu_pointer(xchg(&sk->sk_rx_dst, RCU_INITIALIZER(dst))); dst_release(old); return old != dst; } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 8041dc181bd4..e03fb9a1dbeb 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -509,7 +509,7 @@ void inet6_cleanup_sock(struct sock *sk) /* Free tx options */ - opt = xchg((__force struct ipv6_txoptions **)&np->opt, NULL); + opt = unrcu_pointer(xchg(&np->opt, NULL)); if (opt) { atomic_sub(opt->tot_len, &sk->sk_omem_alloc); txopt_put(opt); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 31d77885bcae..15f9abe50656 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -984,7 +984,7 @@ static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh, if (pcpu_rt && rcu_access_pointer(pcpu_rt->from) == match) { struct fib6_info *from; - from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL); + from = unrcu_pointer(xchg(&pcpu_rt->from, NULL)); fib6_info_release(from); } } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index d4c28ec1bc51..cd342d5015c6 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -111,8 +111,7 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk, icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); } } - opt = xchg((__force struct ipv6_txoptions **)&inet6_sk(sk)->opt, - opt); + opt = unrcu_pointer(xchg(&inet6_sk(sk)->opt, RCU_INITIALIZER(opt))); sk_dst_reset(sk); return opt; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 12f647d0fec0..ae6fbdf8995d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -368,7 +368,7 @@ static void ip6_dst_destroy(struct dst_entry *dst) in6_dev_put(idev); } - from = xchg((__force struct fib6_info **)&rt->from, NULL); + from = unrcu_pointer(xchg(&rt->from, NULL)); fib6_info_release(from); } @@ -1437,7 +1437,7 @@ static struct rt6_info *rt6_make_pcpu_route(struct net *net, if (res->f6i->fib6_destroying) { struct fib6_info *from; - from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL); + from = unrcu_pointer(xchg(&pcpu_rt->from, NULL)); fib6_info_release(from); } @@ -1466,7 +1466,7 @@ static void rt6_remove_exception(struct rt6_exception_bucket *bucket, /* purge completely the exception to allow releasing the held resources: * some [sk] cache may keep the dst around for unlimited time */ - from = xchg((__force struct fib6_info **)&rt6_ex->rt6i->from, NULL); + from = unrcu_pointer(xchg(&rt6_ex->rt6i->from, NULL)); fib6_info_release(from); dst_dev_put(&rt6_ex->rt6i->dst); diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 9ee622fb1160..7458b3154426 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -62,7 +62,7 @@ static void tcf_set_action_cookie(struct tc_cookie __rcu **old_cookie, { struct tc_cookie *old; - old = xchg((__force struct tc_cookie **)old_cookie, new_cookie); + old = unrcu_pointer(xchg(old_cookie, RCU_INITIALIZER(new_cookie))); if (old) call_rcu(&old->rcu, tcf_free_cookie_rcu); } -- cgit v1.2.3-58-ga151 From 98aa546af5e4f180439c82da403cc77926d3953b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 4 Jun 2024 16:51:50 +0000 Subject: inet: remove (struct uncached_list)->quarantine This list is used to tranfert dst that are handled by rt_flush_dev() and rt6_uncached_list_flush_dev() out of the per-cpu lists. But quarantine list is not used later. If we simply use list_del_init(&rt->dst.rt_uncached), this also removes the dst from per-cpu list. This patch also makes the future calls to rt_del_uncached_list() and rt6_uncached_list_del() faster, because no spinlock acquisition is needed anymore. Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240604165150.726382-1-edumazet@google.com Signed-off-by: Paolo Abeni --- net/ipv4/route.c | 4 +--- net/ipv6/route.c | 5 +---- 2 files changed, 2 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b3073d1c8f8f..cb0bdf34ed50 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1481,7 +1481,6 @@ static bool rt_cache_route(struct fib_nh_common *nhc, struct rtable *rt) struct uncached_list { spinlock_t lock; struct list_head head; - struct list_head quarantine; }; static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list); @@ -1532,7 +1531,7 @@ void rt_flush_dev(struct net_device *dev) rt->dst.dev = blackhole_netdev; netdev_ref_replace(dev, blackhole_netdev, &rt->dst.dev_tracker, GFP_ATOMIC); - list_move(&rt->dst.rt_uncached, &ul->quarantine); + list_del_init(&rt->dst.rt_uncached); } spin_unlock_bh(&ul->lock); } @@ -3661,7 +3660,6 @@ int __init ip_rt_init(void) struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu); INIT_LIST_HEAD(&ul->head); - INIT_LIST_HEAD(&ul->quarantine); spin_lock_init(&ul->lock); } #ifdef CONFIG_IP_ROUTE_CLASSID diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ae6fbdf8995d..ec5b59b221ef 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -131,7 +131,6 @@ static struct fib6_info *rt6_get_route_info(struct net *net, struct uncached_list { spinlock_t lock; struct list_head head; - struct list_head quarantine; }; static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list); @@ -189,8 +188,7 @@ static void rt6_uncached_list_flush_dev(struct net_device *dev) handled = true; } if (handled) - list_move(&rt->dst.rt_uncached, - &ul->quarantine); + list_del_init(&rt->dst.rt_uncached); } spin_unlock_bh(&ul->lock); } @@ -6755,7 +6753,6 @@ int __init ip6_route_init(void) struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu); INIT_LIST_HEAD(&ul->head); - INIT_LIST_HEAD(&ul->quarantine); spin_lock_init(&ul->lock); } -- cgit v1.2.3-58-ga151 From 9b6a30febddf4857cb4c367f5294bb6118340e30 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Wed, 5 Jun 2024 10:29:32 +0800 Subject: net: allow rps/rfs related configs to be switched After John Sperbeck reported a compile error if the CONFIG_RFS_ACCEL is off, I found that I cannot easily enable/disable the config because of lack of the prompt when using 'make menuconfig'. Therefore, I decided to change rps/rfc related configs altogether. Signed-off-by: Jason Xing Link: https://lore.kernel.org/r/20240605022932.33703-1-kerneljasonxing@gmail.com Signed-off-by: Paolo Abeni --- net/Kconfig | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/Kconfig b/net/Kconfig index f0a8692496ff..9fe65fa26e48 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -290,15 +290,21 @@ config MAX_SKB_FRAGS If unsure, say 17. config RPS - bool + bool "Receive packet steering" depends on SMP && SYSFS default y + help + Software receive side packet steering (RPS) distributes the + load of received packet processing across multiple CPUs. config RFS_ACCEL - bool + bool "Hardware acceleration of RFS" depends on RPS select CPU_RMAP default y + help + Allowing drivers for multiqueue hardware with flow filter tables to + accelerate RFS. config SOCK_RX_QUEUE_MAPPING bool @@ -351,7 +357,7 @@ config BPF_STREAM_PARSER BPF_MAP_TYPE_SOCKMAP. config NET_FLOW_LIMIT - bool + bool "Net flow limit" depends on RPS default y help -- cgit v1.2.3-58-ga151 From 5f0d0649c83f72399c19b18591ea1413ca94c015 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 5 Jun 2024 09:15:40 +0200 Subject: mptcp: use mptcp_win_from_space helper The MPTCP dedicated win_from_space helper mptcp_win_from_space() is defined in protocol.h, use it in mptcp_rcv_space_adjust() instead of using the TCP one. Here scaling_ratio is the same as msk->scaling_ratio. Signed-off-by: Geliang Tang Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Paolo Abeni --- net/mptcp/protocol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 7d44196ec5b6..546c80c6702a 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2046,7 +2046,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) if (rcvbuf > sk->sk_rcvbuf) { u32 window_clamp; - window_clamp = __tcp_win_from_space(scaling_ratio, rcvbuf); + window_clamp = mptcp_win_from_space(sk, rcvbuf); WRITE_ONCE(sk->sk_rcvbuf, rcvbuf); /* Make subflows follow along. If we do not do this, we -- cgit v1.2.3-58-ga151 From 5cdedad62eaba22c45b9c45c8199bacd461afd87 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 5 Jun 2024 09:15:41 +0200 Subject: mptcp: add mptcp_space_from_win helper As a wrapper of __tcp_space_from_win(), this patch adds a MPTCP dedicated space_from_win helper mptcp_space_from_win() in protocol.h to paired with mptcp_win_from_space(). Use it instead of __tcp_space_from_win() in both mptcp_rcv_space_adjust() and mptcp_set_rcvlowat(). Signed-off-by: Geliang Tang Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Paolo Abeni --- net/mptcp/protocol.c | 2 +- net/mptcp/protocol.h | 5 +++++ net/mptcp/sockopt.c | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 546c80c6702a..7ce11bee3b79 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2040,7 +2040,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) do_div(grow, msk->rcvq_space.space); rcvwin += (grow << 1); - rcvbuf = min_t(u64, __tcp_space_from_win(scaling_ratio, rcvwin), + rcvbuf = min_t(u64, mptcp_space_from_win(sk, rcvwin), READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])); if (rcvbuf > sk->sk_rcvbuf) { diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 7aa47e2dd52b..b11a4e50d52b 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -386,6 +386,11 @@ static inline int mptcp_win_from_space(const struct sock *sk, int space) return __tcp_win_from_space(mptcp_sk(sk)->scaling_ratio, space); } +static inline int mptcp_space_from_win(const struct sock *sk, int win) +{ + return __tcp_space_from_win(mptcp_sk(sk)->scaling_ratio, win); +} + static inline int __mptcp_space(const struct sock *sk) { return mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - __mptcp_rmem(sk)); diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index f9a4fb17b5b7..2026a9a36f80 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -1579,7 +1579,7 @@ int mptcp_set_rcvlowat(struct sock *sk, int val) if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) return 0; - space = __tcp_space_from_win(mptcp_sk(sk)->scaling_ratio, val); + space = mptcp_space_from_win(sk, val); if (space <= sk->sk_rcvbuf) return 0; -- cgit v1.2.3-58-ga151 From 92f74c1e05b044b51398d6d4a85e659e4384f2cb Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 5 Jun 2024 09:15:42 +0200 Subject: mptcp: refer to 'MPTCP' socket in comments We used to call it 'master' socket at the early stages of MPTCP development, but the correct wording is 'MPTCP' socket opposed to 'TCP subflows': convert the last 3 comments to use a more appropriate term. Signed-off-by: Davide Caratti Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Paolo Abeni --- net/mptcp/protocol.c | 4 ++-- net/mptcp/subflow.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 7ce11bee3b79..ead0bf63cf95 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2202,7 +2202,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (skb_queue_empty(&msk->receive_queue) && __mptcp_move_skbs(msk)) continue; - /* only the master socket status is relevant here. The exit + /* only the MPTCP socket status is relevant here. The exit * conditions mirror closely tcp_recvmsg() */ if (copied >= target) @@ -3521,7 +3521,7 @@ void mptcp_subflow_process_delegated(struct sock *ssk, long status) static int mptcp_hash(struct sock *sk) { /* should never be called, - * we hash the TCP subflows not the master socket + * we hash the TCP subflows not the MPTCP socket */ WARN_ON_ONCE(1); return 0; diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 612c38570a64..39e2cbdf3801 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1719,7 +1719,7 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family, mptcp_sockopt_sync_locked(mptcp_sk(sk), sf->sk); release_sock(sf->sk); - /* the newly created socket really belongs to the owning MPTCP master + /* the newly created socket really belongs to the owning MPTCP * socket, even if for additional subflows the allocation is performed * by a kernel workqueue. Adjust inode references, so that the * procfs/diag interfaces really show this one belonging to the correct -- cgit v1.2.3-58-ga151 From adbe695a9765fb704d2ac0d3e284f28bcc8b5bf3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 5 Jun 2024 07:15:52 +0000 Subject: tcp: move inet_reqsk_alloc() close to inet_reqsk_clone() inet_reqsk_alloc() does not belong to tcp_input.c, move it to inet_connection_sock.c instead. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/ipv4/inet_connection_sock.c | 25 +++++++++++++++++++++++++ net/ipv4/tcp_input.c | 25 ------------------------- 2 files changed, 25 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index d81f74ce0f02..a9d2e6308910 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -911,6 +911,31 @@ int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req) } EXPORT_SYMBOL(inet_rtx_syn_ack); +struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, + struct sock *sk_listener, + bool attach_listener) +{ + struct request_sock *req = reqsk_alloc(ops, sk_listener, + attach_listener); + + if (req) { + struct inet_request_sock *ireq = inet_rsk(req); + + ireq->ireq_opt = NULL; +#if IS_ENABLED(CONFIG_IPV6) + ireq->pktopts = NULL; +#endif + atomic64_set(&ireq->ir_cookie, 0); + ireq->ireq_state = TCP_NEW_SYN_RECV; + write_pnet(&ireq->ireq_net, sock_net(sk_listener)); + ireq->ireq_family = sk_listener->sk_family; + req->timeout = TCP_TIMEOUT_INIT; + } + + return req; +} +EXPORT_SYMBOL(inet_reqsk_alloc); + static struct request_sock *inet_reqsk_clone(struct request_sock *req, struct sock *sk) { diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 212b6fd0caf7..eb187450e4d7 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6986,31 +6986,6 @@ static void tcp_openreq_init(struct request_sock *req, #endif } -struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, - struct sock *sk_listener, - bool attach_listener) -{ - struct request_sock *req = reqsk_alloc(ops, sk_listener, - attach_listener); - - if (req) { - struct inet_request_sock *ireq = inet_rsk(req); - - ireq->ireq_opt = NULL; -#if IS_ENABLED(CONFIG_IPV6) - ireq->pktopts = NULL; -#endif - atomic64_set(&ireq->ir_cookie, 0); - ireq->ireq_state = TCP_NEW_SYN_RECV; - write_pnet(&ireq->ireq_net, sock_net(sk_listener)); - ireq->ireq_family = sk_listener->sk_family; - req->timeout = TCP_TIMEOUT_INIT; - } - - return req; -} -EXPORT_SYMBOL(inet_reqsk_alloc); - /* * Return true if a syncookie should be sent */ -- cgit v1.2.3-58-ga151 From 6971d21672827a701c5ea180891b7ea6cf06f6a7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 5 Jun 2024 07:15:53 +0000 Subject: tcp: move reqsk_alloc() to inet_connection_sock.c reqsk_alloc() has a single caller, no need to expose it in include/net/request_sock.h. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- include/net/request_sock.h | 33 --------------------------------- net/ipv4/inet_connection_sock.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/include/net/request_sock.h b/include/net/request_sock.h index a8f82216c628..b07b1cd14e9f 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -128,39 +128,6 @@ static inline struct sock *skb_steal_sock(struct sk_buff *skb, return sk; } -static inline struct request_sock * -reqsk_alloc_noprof(const struct request_sock_ops *ops, struct sock *sk_listener, - bool attach_listener) -{ - struct request_sock *req; - - req = kmem_cache_alloc_noprof(ops->slab, GFP_ATOMIC | __GFP_NOWARN); - if (!req) - return NULL; - req->rsk_listener = NULL; - if (attach_listener) { - if (unlikely(!refcount_inc_not_zero(&sk_listener->sk_refcnt))) { - kmem_cache_free(ops->slab, req); - return NULL; - } - req->rsk_listener = sk_listener; - } - req->rsk_ops = ops; - req_to_sk(req)->sk_prot = sk_listener->sk_prot; - sk_node_init(&req_to_sk(req)->sk_node); - sk_tx_queue_clear(req_to_sk(req)); - req->saved_syn = NULL; - req->syncookie = 0; - req->timeout = 0; - req->num_timeout = 0; - req->num_retrans = 0; - req->sk = NULL; - refcount_set(&req->rsk_refcnt, 0); - - return req; -} -#define reqsk_alloc(...) alloc_hooks(reqsk_alloc_noprof(__VA_ARGS__)) - static inline void __reqsk_free(struct request_sock *req) { req->rsk_ops->destructor(req); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index a9d2e6308910..7ced569778ab 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -911,6 +911,39 @@ int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req) } EXPORT_SYMBOL(inet_rtx_syn_ack); +static struct request_sock * +reqsk_alloc_noprof(const struct request_sock_ops *ops, struct sock *sk_listener, + bool attach_listener) +{ + struct request_sock *req; + + req = kmem_cache_alloc_noprof(ops->slab, GFP_ATOMIC | __GFP_NOWARN); + if (!req) + return NULL; + req->rsk_listener = NULL; + if (attach_listener) { + if (unlikely(!refcount_inc_not_zero(&sk_listener->sk_refcnt))) { + kmem_cache_free(ops->slab, req); + return NULL; + } + req->rsk_listener = sk_listener; + } + req->rsk_ops = ops; + req_to_sk(req)->sk_prot = sk_listener->sk_prot; + sk_node_init(&req_to_sk(req)->sk_node); + sk_tx_queue_clear(req_to_sk(req)); + req->saved_syn = NULL; + req->syncookie = 0; + req->timeout = 0; + req->num_timeout = 0; + req->num_retrans = 0; + req->sk = NULL; + refcount_set(&req->rsk_refcnt, 0); + + return req; +} +#define reqsk_alloc(...) alloc_hooks(reqsk_alloc_noprof(__VA_ARGS__)) + struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, bool attach_listener) -- cgit v1.2.3-58-ga151 From 3e61103b2f7887af0be402a79b9c70425ceba3e3 Mon Sep 17 00:00:00 2001 From: David Wei Date: Wed, 5 Jun 2024 09:19:24 -0700 Subject: page_pool: remove WARN_ON() with OR Having an OR in WARN_ON() makes me sad because it's impossible to tell which condition is true when triggered. Split a WARN_ON() with an OR in page_pool_disable_direct_recycling(). Signed-off-by: David Wei Reviewed-by: Mina Almasry Reviewed-by: Somnath Kotur Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/core/page_pool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/page_pool.c b/net/core/page_pool.c index f4444b4e39e6..3927a0a7fa9a 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -1027,8 +1027,8 @@ static void page_pool_disable_direct_recycling(struct page_pool *pool) /* To avoid races with recycling and additional barriers make sure * pool and NAPI are unlinked when NAPI is disabled. */ - WARN_ON(!test_bit(NAPI_STATE_SCHED, &pool->p.napi->state) || - READ_ONCE(pool->p.napi->list_owner) != -1); + WARN_ON(!test_bit(NAPI_STATE_SCHED, &pool->p.napi->state)); + WARN_ON(READ_ONCE(pool->p.napi->list_owner) != -1); WRITE_ONCE(pool->p.napi, NULL); } -- cgit v1.2.3-58-ga151 From b334b924c9b709bc969644fb5c406f5c9d01dceb Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Thu, 6 Jun 2024 17:11:37 +0200 Subject: net: tcp/dccp: prepare for tw_timer un-pinning The TCP timewait timer is proving to be problematic for setups where scheduler CPU isolation is achieved at runtime via cpusets (as opposed to statically via isolcpus=domains). What happens there is a CPU goes through tcp_time_wait(), arming the time_wait timer, then gets isolated. TCP_TIMEWAIT_LEN later, the timer fires, causing interference for the now-isolated CPU. This is conceptually similar to the issue described in commit e02b93124855 ("workqueue: Unbind kworkers before sending them to exit()") Move inet_twsk_schedule() to within inet_twsk_hashdance(), with the ehash lock held. Expand the lock's critical section from inet_twsk_kill() to inet_twsk_deschedule_put(), serializing the scheduling vs descheduling of the timer. IOW, this prevents the following race: tcp_time_wait() inet_twsk_hashdance() inet_twsk_deschedule_put() del_timer_sync() inet_twsk_schedule() Thanks to Paolo Abeni for suggesting to leverage the ehash lock. This also restores a comment from commit ec94c2696f0b ("tcp/dccp: avoid one atomic operation for timewait hashdance") as inet_twsk_hashdance() had a "Step 1" and "Step 3" comment, but the "Step 2" had gone missing. inet_twsk_deschedule_put() now acquires the ehash spinlock to synchronize with inet_twsk_hashdance_schedule(). To ease possible regression search, actual un-pin is done in next patch. Link: https://lore.kernel.org/all/ZPhpfMjSiHVjQkTk@localhost.localdomain/ Reviewed-by: Eric Dumazet Signed-off-by: Valentin Schneider Co-developed-by: Florian Westphal Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/inet_timewait_sock.h | 6 +++-- net/dccp/minisocks.c | 3 +-- net/ipv4/inet_timewait_sock.c | 52 ++++++++++++++++++++++++++++++++++------ net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/tcp_minisocks.c | 3 +-- 5 files changed, 52 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 2a536eea9424..5b43d220243d 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -93,8 +93,10 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, struct inet_timewait_death_row *dr, const int state); -void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, - struct inet_hashinfo *hashinfo); +void inet_twsk_hashdance_schedule(struct inet_timewait_sock *tw, + struct sock *sk, + struct inet_hashinfo *hashinfo, + int timeo); void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm); diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 251a57cf5822..deb52d7d31b4 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -59,11 +59,10 @@ void dccp_time_wait(struct sock *sk, int state, int timeo) * we complete the initialization. */ local_bh_disable(); - inet_twsk_schedule(tw, timeo); /* Linkage updates. * Note that access to tw after this point is illegal. */ - inet_twsk_hashdance(tw, sk, &dccp_hashinfo); + inet_twsk_hashdance_schedule(tw, sk, &dccp_hashinfo, timeo); local_bh_enable(); } else { /* Sorry, if we're out of memory, just CLOSE this diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index e28075f0006e..628d33a41ce5 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -96,9 +96,13 @@ static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw, * Enter the time wait state. This is called with locally disabled BH. * Essentially we whip up a timewait bucket, copy the relevant info into it * from the SK, and mess with hash chains and list linkage. + * + * The caller must not access @tw anymore after this function returns. */ -void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, - struct inet_hashinfo *hashinfo) +void inet_twsk_hashdance_schedule(struct inet_timewait_sock *tw, + struct sock *sk, + struct inet_hashinfo *hashinfo, + int timeo) { const struct inet_sock *inet = inet_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); @@ -129,26 +133,33 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, spin_lock(lock); + /* Step 2: Hash TW into tcp ehash chain */ inet_twsk_add_node_rcu(tw, &ehead->chain); /* Step 3: Remove SK from hash chain */ if (__sk_nulls_del_node_init_rcu(sk)) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - spin_unlock(lock); + /* Ensure above writes are committed into memory before updating the + * refcount. + * Provides ordering vs later refcount_inc(). + */ + smp_wmb(); /* tw_refcnt is set to 3 because we have : * - one reference for bhash chain. * - one reference for ehash chain. * - one reference for timer. - * We can use atomic_set() because prior spin_lock()/spin_unlock() - * committed into memory all tw fields. * Also note that after this point, we lost our implicit reference * so we are not allowed to use tw anymore. */ refcount_set(&tw->tw_refcnt, 3); + + inet_twsk_schedule(tw, timeo); + + spin_unlock(lock); } -EXPORT_SYMBOL_GPL(inet_twsk_hashdance); +EXPORT_SYMBOL_GPL(inet_twsk_hashdance_schedule); static void tw_timer_handler(struct timer_list *t) { @@ -217,7 +228,34 @@ EXPORT_SYMBOL_GPL(inet_twsk_alloc); */ void inet_twsk_deschedule_put(struct inet_timewait_sock *tw) { - if (del_timer_sync(&tw->tw_timer)) + struct inet_hashinfo *hashinfo = tw->tw_dr->hashinfo; + spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); + + /* inet_twsk_purge() walks over all sockets, including tw ones, + * and removes them via inet_twsk_deschedule_put() after a + * refcount_inc_not_zero(). + * + * inet_twsk_hashdance_schedule() must (re)init the refcount before + * arming the timer, i.e. inet_twsk_purge can obtain a reference to + * a twsk that did not yet schedule the timer. + * + * The ehash lock synchronizes these two: + * After acquiring the lock, the timer is always scheduled (else + * timer_shutdown returns false), because hashdance_schedule releases + * the ehash lock only after completing the timer initialization. + * + * Without grabbing the ehash lock, we get: + * 1) cpu x sets twsk refcount to 3 + * 2) cpu y bumps refcount to 4 + * 3) cpu y calls inet_twsk_deschedule_put() and shuts timer down + * 4) cpu x tries to start timer, but mod_timer is a noop post-shutdown + * -> timer refcount is never decremented. + */ + spin_lock(lock); + /* Makes sure hashdance_schedule() has completed */ + spin_unlock(lock); + + if (timer_shutdown_sync(&tw->tw_timer)) inet_twsk_kill(tw); inet_twsk_put(tw); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3613e08ca794..e61c7c974745 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -157,7 +157,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) if (ts_recent_stamp && (!twp || (reuse && time_after32(ktime_get_seconds(), ts_recent_stamp)))) { - /* inet_twsk_hashdance() sets sk_refcnt after putting twsk + /* inet_twsk_hashdance_schedule() sets sk_refcnt after putting twsk * and releasing the bucket lock. */ if (unlikely(!refcount_inc_not_zero(&sktw->sk_refcnt))) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 4c894e540730..fc9a850ed9bd 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -350,11 +350,10 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) * we complete the initialization. */ local_bh_disable(); - inet_twsk_schedule(tw, timeo); /* Linkage updates. * Note that access to tw after this point is illegal. */ - inet_twsk_hashdance(tw, sk, net->ipv4.tcp_death_row.hashinfo); + inet_twsk_hashdance_schedule(tw, sk, net->ipv4.tcp_death_row.hashinfo, timeo); local_bh_enable(); } else { /* Sorry, if we're out of memory, just CLOSE this -- cgit v1.2.3-58-ga151 From c75ad7c759ccead1be462d4cec42bd4ad3578989 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 6 Jun 2024 17:11:38 +0200 Subject: net: tcp: un-pin the tw_timer After previous patch, even if timer fires immediately on another CPU, context that schedules the timer now holds the ehash spinlock, so timer cannot reap tw socket until ehash lock is released. BH disable is moved into hashdance_schedule. Signed-off-by: Florian Westphal Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/dccp/minisocks.c | 6 ------ net/ipv4/inet_timewait_sock.c | 6 ++++-- net/ipv4/tcp_minisocks.c | 6 ------ 3 files changed, 4 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index deb52d7d31b4..fecc8190064f 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -54,16 +54,10 @@ void dccp_time_wait(struct sock *sk, int state, int timeo) if (state == DCCP_TIME_WAIT) timeo = DCCP_TIMEWAIT_LEN; - /* tw_timer is pinned, so we need to make sure BH are disabled - * in following section, otherwise timer handler could run before - * we complete the initialization. - */ - local_bh_disable(); /* Linkage updates. * Note that access to tw after this point is illegal. */ inet_twsk_hashdance_schedule(tw, sk, &dccp_hashinfo, timeo); - local_bh_enable(); } else { /* Sorry, if we're out of memory, just CLOSE this * socket up. We've got bigger problems than diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 628d33a41ce5..b2d97c816c99 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -93,7 +93,7 @@ static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw, } /* - * Enter the time wait state. This is called with locally disabled BH. + * Enter the time wait state. * Essentially we whip up a timewait bucket, copy the relevant info into it * from the SK, and mess with hash chains and list linkage. * @@ -118,6 +118,7 @@ void inet_twsk_hashdance_schedule(struct inet_timewait_sock *tw, hashinfo->bhash_size)]; bhead2 = inet_bhashfn_portaddr(hashinfo, sk, twsk_net(tw), inet->inet_num); + local_bh_disable(); spin_lock(&bhead->lock); spin_lock(&bhead2->lock); @@ -158,6 +159,7 @@ void inet_twsk_hashdance_schedule(struct inet_timewait_sock *tw, inet_twsk_schedule(tw, timeo); spin_unlock(lock); + local_bh_enable(); } EXPORT_SYMBOL_GPL(inet_twsk_hashdance_schedule); @@ -203,7 +205,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, tw->tw_prot = sk->sk_prot_creator; atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie)); twsk_net_set(tw, sock_net(sk)); - timer_setup(&tw->tw_timer, tw_timer_handler, TIMER_PINNED); + timer_setup(&tw->tw_timer, tw_timer_handler, 0); /* * Because we use RCU lookups, we should not set tw_refcnt * to a non null value before everything is setup for this diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index fc9a850ed9bd..bc67f6b9efae 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -345,16 +345,10 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) if (state == TCP_TIME_WAIT) timeo = TCP_TIMEWAIT_LEN; - /* tw_timer is pinned, so we need to make sure BH are disabled - * in following section, otherwise timer handler could run before - * we complete the initialization. - */ - local_bh_disable(); /* Linkage updates. * Note that access to tw after this point is illegal. */ inet_twsk_hashdance_schedule(tw, sk, net->ipv4.tcp_death_row.hashinfo, timeo); - local_bh_enable(); } else { /* Sorry, if we're out of memory, just CLOSE this * socket up. We've got bigger problems than -- cgit v1.2.3-58-ga151 From f81d0dd2fde35fd1acc30b3f4de6aaf57d514551 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 6 Jun 2024 17:11:39 +0200 Subject: tcp: move inet_twsk_schedule helper out of header Its no longer used outside inet_timewait_sock.c, so move it there. Reviewed-by: Eric Dumazet Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/inet_timewait_sock.h | 5 ----- net/ipv4/inet_timewait_sock.c | 5 +++++ 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 5b43d220243d..f88b68269012 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -101,11 +101,6 @@ void inet_twsk_hashdance_schedule(struct inet_timewait_sock *tw, void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm); -static inline void inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo) -{ - __inet_twsk_schedule(tw, timeo, false); -} - static inline void inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo) { __inet_twsk_schedule(tw, timeo, true); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index b2d97c816c99..337390ba85b4 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -92,6 +92,11 @@ static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw, hlist_nulls_add_head_rcu(&tw->tw_node, list); } +static void inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo) +{ + __inet_twsk_schedule(tw, timeo, false); +} + /* * Enter the time wait state. * Essentially we whip up a timewait bucket, copy the relevant info into it -- cgit v1.2.3-58-ga151 From 5380d64f8d766576ac5c0f627418b2d0e1d2641f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 6 Jun 2024 12:29:05 -0700 Subject: rtnetlink: move rtnl_lock handling out of af_netlink Now that we have an intermediate layer of code for handling rtnl-level netlink dump quirks, we can move the rtnl_lock taking there. For dump handlers with RTNL_FLAG_DUMP_SPLIT_NLM_DONE we can avoid taking rtnl_lock just to generate NLM_DONE, once again. Signed-off-by: Jakub Kicinski Reviewed-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 9 +++++++-- net/netlink/af_netlink.c | 2 -- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 4668d6718040..eabfc8290f5e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -6486,6 +6486,7 @@ static int rtnl_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, static int rtnl_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { + const bool needs_lock = !(cb->flags & RTNL_FLAG_DUMP_UNLOCKED); rtnl_dumpit_func dumpit = cb->data; int err; @@ -6495,7 +6496,11 @@ static int rtnl_dumpit(struct sk_buff *skb, struct netlink_callback *cb) if (!dumpit) return 0; + if (needs_lock) + rtnl_lock(); err = dumpit(skb, cb); + if (needs_lock) + rtnl_unlock(); /* Old dump handlers used to send NLM_DONE as in a separate recvmsg(). * Some applications which parse netlink manually depend on this. @@ -6515,7 +6520,8 @@ static int rtnetlink_dump_start(struct sock *ssk, struct sk_buff *skb, const struct nlmsghdr *nlh, struct netlink_dump_control *control) { - if (control->flags & RTNL_FLAG_DUMP_SPLIT_NLM_DONE) { + if (control->flags & RTNL_FLAG_DUMP_SPLIT_NLM_DONE || + !(control->flags & RTNL_FLAG_DUMP_UNLOCKED)) { WARN_ON(control->data); control->data = control->dump; control->dump = rtnl_dumpit; @@ -6703,7 +6709,6 @@ static int __net_init rtnetlink_net_init(struct net *net) struct netlink_kernel_cfg cfg = { .groups = RTNLGRP_MAX, .input = rtnetlink_rcv, - .cb_mutex = &rtnl_mutex, .flags = NL_CFG_F_NONROOT_RECV, .bind = rtnetlink_bind, }; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index fa9c090cf629..8bbbe75e75db 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2330,8 +2330,6 @@ static int netlink_dump(struct sock *sk, bool lock_taken) cb->extack = &extack; - if (cb->flags & RTNL_FLAG_DUMP_UNLOCKED) - extra_mutex = NULL; if (extra_mutex) mutex_lock(extra_mutex); nlk->dump_done_errno = cb->dump(skb, cb); -- cgit v1.2.3-58-ga151 From 5fbf57a937f418fe204f9dbb7735e91984f4ee6a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 6 Jun 2024 12:29:06 -0700 Subject: net: netlink: remove the cb_mutex "injection" from netlink core Back in 2007, in commit af65bdfce98d ("[NETLINK]: Switch cb_lock spinlock to mutex and allow to override it") netlink core was extended to allow subsystems to replace the dump mutex lock with its own lock. The mechanism was used by rtnetlink to take rtnl_lock but it isn't sufficiently flexible for other users. Over the 17 years since it was added no other user appeared. Since rtnetlink needs conditional locking now, and doesn't use it either, axe this feature complete. Signed-off-by: Jakub Kicinski Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- include/linux/netlink.h | 1 - net/netlink/af_netlink.c | 18 +++--------------- 2 files changed, 3 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 5df7340d4dab..b332c2048c75 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -47,7 +47,6 @@ struct netlink_kernel_cfg { unsigned int groups; unsigned int flags; void (*input)(struct sk_buff *skb); - struct mutex *cb_mutex; int (*bind)(struct net *net, int group); void (*unbind)(struct net *net, int group); void (*release) (struct sock *sk, unsigned long *groups); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 8bbbe75e75db..0b7a89db3ab7 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -636,8 +636,7 @@ static struct proto netlink_proto = { }; static int __netlink_create(struct net *net, struct socket *sock, - struct mutex *dump_cb_mutex, int protocol, - int kern) + int protocol, int kern) { struct sock *sk; struct netlink_sock *nlk; @@ -655,7 +654,6 @@ static int __netlink_create(struct net *net, struct socket *sock, lockdep_set_class_and_name(&nlk->nl_cb_mutex, nlk_cb_mutex_keys + protocol, nlk_cb_mutex_key_strings[protocol]); - nlk->dump_cb_mutex = dump_cb_mutex; init_waitqueue_head(&nlk->wait); sk->sk_destruct = netlink_sock_destruct; @@ -667,7 +665,6 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, int kern) { struct module *module = NULL; - struct mutex *cb_mutex; struct netlink_sock *nlk; int (*bind)(struct net *net, int group); void (*unbind)(struct net *net, int group); @@ -696,7 +693,6 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, module = nl_table[protocol].module; else err = -EPROTONOSUPPORT; - cb_mutex = nl_table[protocol].cb_mutex; bind = nl_table[protocol].bind; unbind = nl_table[protocol].unbind; release = nl_table[protocol].release; @@ -705,7 +701,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, if (err < 0) goto out; - err = __netlink_create(net, sock, cb_mutex, protocol, kern); + err = __netlink_create(net, sock, protocol, kern); if (err < 0) goto out_module; @@ -2016,7 +2012,6 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module, struct sock *sk; struct netlink_sock *nlk; struct listeners *listeners = NULL; - struct mutex *cb_mutex = cfg ? cfg->cb_mutex : NULL; unsigned int groups; BUG_ON(!nl_table); @@ -2027,7 +2022,7 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module, if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) return NULL; - if (__netlink_create(net, sock, cb_mutex, unit, 1) < 0) + if (__netlink_create(net, sock, unit, 1) < 0) goto out_sock_release_nosk; sk = sock->sk; @@ -2055,7 +2050,6 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module, if (!nl_table[unit].registered) { nl_table[unit].groups = groups; rcu_assign_pointer(nl_table[unit].listeners, listeners); - nl_table[unit].cb_mutex = cb_mutex; nl_table[unit].module = module; if (cfg) { nl_table[unit].bind = cfg->bind; @@ -2326,15 +2320,9 @@ static int netlink_dump(struct sock *sk, bool lock_taken) netlink_skb_set_owner_r(skb, sk); if (nlk->dump_done_errno > 0) { - struct mutex *extra_mutex = nlk->dump_cb_mutex; - cb->extack = &extack; - if (extra_mutex) - mutex_lock(extra_mutex); nlk->dump_done_errno = cb->dump(skb, cb); - if (extra_mutex) - mutex_unlock(extra_mutex); /* EMSGSIZE plus something already in the skb means * that there's more to dump but current skb has filled up. -- cgit v1.2.3-58-ga151 From 77f7541248fcfcc79afff68ff9acf1f09ab8e1e4 Mon Sep 17 00:00:00 2001 From: Marek Behún Date: Wed, 5 Jun 2024 15:33:28 +0200 Subject: net: dsa: deduplicate code adding / deleting the port address to fdb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sequence if (dsa_switch_supports_uc_filtering(ds)) dsa_port_standalone_host_fdb_add(dp, addr, 0); if (!ether_addr_equal(addr, conduit->dev_addr)) dev_uc_add(conduit, addr); is executed both in dsa_user_open() and dsa_user_set_mac_addr(). Its reverse is executed both in dsa_user_close() and dsa_user_set_mac_addr(). Refactor these sequences into new functions dsa_user_host_uc_install() and dsa_user_host_uc_uninstall(). Signed-off-by: Marek Behún Signed-off-by: David S. Miller --- net/dsa/user.c | 91 ++++++++++++++++++++++++++++++---------------------------- 1 file changed, 47 insertions(+), 44 deletions(-) (limited to 'net') diff --git a/net/dsa/user.c b/net/dsa/user.c index 867c5fe9a4da..efbb34c711cb 100644 --- a/net/dsa/user.c +++ b/net/dsa/user.c @@ -355,60 +355,82 @@ static int dsa_user_get_iflink(const struct net_device *dev) return READ_ONCE(dsa_user_to_conduit(dev)->ifindex); } -static int dsa_user_open(struct net_device *dev) +static int dsa_user_host_uc_install(struct net_device *dev, const u8 *addr) { struct net_device *conduit = dsa_user_to_conduit(dev); struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; int err; - err = dev_open(conduit, NULL); - if (err < 0) { - netdev_err(dev, "failed to open conduit %s\n", conduit->name); - goto out; - } - if (dsa_switch_supports_uc_filtering(ds)) { - err = dsa_port_standalone_host_fdb_add(dp, dev->dev_addr, 0); + err = dsa_port_standalone_host_fdb_add(dp, addr, 0); if (err) goto out; } - if (!ether_addr_equal(dev->dev_addr, conduit->dev_addr)) { - err = dev_uc_add(conduit, dev->dev_addr); + if (!ether_addr_equal(addr, conduit->dev_addr)) { + err = dev_uc_add(conduit, addr); if (err < 0) goto del_host_addr; } - err = dsa_port_enable_rt(dp, dev->phydev); - if (err) - goto del_unicast; - return 0; -del_unicast: - if (!ether_addr_equal(dev->dev_addr, conduit->dev_addr)) - dev_uc_del(conduit, dev->dev_addr); del_host_addr: if (dsa_switch_supports_uc_filtering(ds)) - dsa_port_standalone_host_fdb_del(dp, dev->dev_addr, 0); + dsa_port_standalone_host_fdb_del(dp, addr, 0); out: return err; } -static int dsa_user_close(struct net_device *dev) +static void dsa_user_host_uc_uninstall(struct net_device *dev) { struct net_device *conduit = dsa_user_to_conduit(dev); struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; - dsa_port_disable_rt(dp); - if (!ether_addr_equal(dev->dev_addr, conduit->dev_addr)) dev_uc_del(conduit, dev->dev_addr); if (dsa_switch_supports_uc_filtering(ds)) dsa_port_standalone_host_fdb_del(dp, dev->dev_addr, 0); +} + +static int dsa_user_open(struct net_device *dev) +{ + struct net_device *conduit = dsa_user_to_conduit(dev); + struct dsa_port *dp = dsa_user_to_port(dev); + int err; + + err = dev_open(conduit, NULL); + if (err < 0) { + netdev_err(dev, "failed to open conduit %s\n", conduit->name); + goto out; + } + + err = dsa_user_host_uc_install(dev, dev->dev_addr); + if (err) + goto out; + + err = dsa_port_enable_rt(dp, dev->phydev); + if (err) + goto out_del_host_uc; + + return 0; + +out_del_host_uc: + dsa_user_host_uc_uninstall(dev); +out: + return err; +} + +static int dsa_user_close(struct net_device *dev) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + + dsa_port_disable_rt(dp); + + dsa_user_host_uc_uninstall(dev); return 0; } @@ -448,7 +470,6 @@ static void dsa_user_set_rx_mode(struct net_device *dev) static int dsa_user_set_mac_address(struct net_device *dev, void *a) { - struct net_device *conduit = dsa_user_to_conduit(dev); struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; struct sockaddr *addr = a; @@ -470,34 +491,16 @@ static int dsa_user_set_mac_address(struct net_device *dev, void *a) if (!(dev->flags & IFF_UP)) goto out_change_dev_addr; - if (dsa_switch_supports_uc_filtering(ds)) { - err = dsa_port_standalone_host_fdb_add(dp, addr->sa_data, 0); - if (err) - return err; - } - - if (!ether_addr_equal(addr->sa_data, conduit->dev_addr)) { - err = dev_uc_add(conduit, addr->sa_data); - if (err < 0) - goto del_unicast; - } - - if (!ether_addr_equal(dev->dev_addr, conduit->dev_addr)) - dev_uc_del(conduit, dev->dev_addr); + err = dsa_user_host_uc_install(dev, addr->sa_data); + if (err) + return err; - if (dsa_switch_supports_uc_filtering(ds)) - dsa_port_standalone_host_fdb_del(dp, dev->dev_addr, 0); + dsa_user_host_uc_uninstall(dev); out_change_dev_addr: eth_hw_addr_set(dev, addr->sa_data); return 0; - -del_unicast: - if (dsa_switch_supports_uc_filtering(ds)) - dsa_port_standalone_host_fdb_del(dp, addr->sa_data, 0); - - return err; } struct dsa_user_dump_ctx { -- cgit v1.2.3-58-ga151 From eef8e906aea270f4b8912a0b51403b80aec54d30 Mon Sep 17 00:00:00 2001 From: Marek Behún Date: Wed, 5 Jun 2024 15:33:29 +0200 Subject: net: dsa: update the unicast MAC address when changing conduit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When changing DSA user interface conduit while the user interface is up, DSA exhibits different behavior in comparison to when the interface is down. This different behavior concerns the primary unicast MAC address stored in the port standalone FDB and in the conduit device UC database. If we put a switch port down while changing the conduit with ip link set sw0p0 down ip link set sw0p0 type dsa conduit conduit1 ip link set sw0p0 up we delete the address in dsa_user_close() and install the (possibly different) address in dsa_user_open(). But when changing the conduit on the fly, the old address is not deleted and the new one is not installed. Since we explicitly want to support live-changing the conduit, uninstall the old address before calling dsa_port_assign_conduit() and install the (possibly different) new address after the call. Because conduit change might also trigger address change (the user interface is supposed to inherit the conduit interface MAC address if no address is defined in hardware (dp->mac is a zero address)), move the eth_hw_addr_inherit() call from dsa_user_change_conduit() to dsa_port_change_conduit(), just before installing the new address. Although this is in theory a flaw in DSA core, it needs not be backported, since there is currently no DSA driver that can be affected by this. The only DSA driver that supports changing conduit is felix, and, as explained by Vladimir Oltean [1]: There are 2 reasons why with felix the bug does not manifest itself. First is because both the 'ocelot' and the alternate 'ocelot-8021q' tagging protocols have the 'promisc_on_conduit = true' flag. So the unicast address doesn't have to be in the conduit's RX filter - neither the old or the new conduit. Second, dsa_user_host_uc_install() theoretically leaves behind host FDB entries installed towards the wrong (old) CPU port. But in felix_fdb_add(), we treat any FDB entry requested towards any CPU port as if it was a multicast FDB entry programmed towards _all_ CPU ports. For that reason, it is installed towards the port mask of the PGID_CPU port group ID: if (dsa_port_is_cpu(dp)) port = PGID_CPU; Therefore no Fixes tag for this change. [1] https://lore.kernel.org/netdev/20240507201827.47suw4fwcjrbungy@skbuf/ Signed-off-by: Marek Behún Reviewed-by: Vladimir Oltean Tested-by: Vladimir Oltean Signed-off-by: David S. Miller --- net/dsa/port.c | 40 ++++++++++++++++++++++++++++++++++++++++ net/dsa/user.c | 10 ++-------- net/dsa/user.h | 2 ++ 3 files changed, 44 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/dsa/port.c b/net/dsa/port.c index e23db9507546..25258b33e59e 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -1467,10 +1467,34 @@ int dsa_port_change_conduit(struct dsa_port *dp, struct net_device *conduit, */ dsa_user_unsync_ha(dev); + /* If live-changing, we also need to uninstall the user device address + * from the port FDB and the conduit interface. + */ + if (dev->flags & IFF_UP) + dsa_user_host_uc_uninstall(dev); + err = dsa_port_assign_conduit(dp, conduit, extack, true); if (err) goto rewind_old_addrs; + /* If the port doesn't have its own MAC address and relies on the DSA + * conduit's one, inherit it again from the new DSA conduit. + */ + if (is_zero_ether_addr(dp->mac)) + eth_hw_addr_inherit(dev, conduit); + + /* If live-changing, we need to install the user device address to the + * port FDB and the conduit interface. + */ + if (dev->flags & IFF_UP) { + err = dsa_user_host_uc_install(dev, dev->dev_addr); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to install host UC address"); + goto rewind_addr_inherit; + } + } + dsa_user_sync_ha(dev); if (vlan_filtering) { @@ -1500,10 +1524,26 @@ rewind_new_vlan: rewind_new_addrs: dsa_user_unsync_ha(dev); + if (dev->flags & IFF_UP) + dsa_user_host_uc_uninstall(dev); + +rewind_addr_inherit: + if (is_zero_ether_addr(dp->mac)) + eth_hw_addr_inherit(dev, old_conduit); + dsa_port_assign_conduit(dp, old_conduit, NULL, false); /* Restore the objects on the old CPU port */ rewind_old_addrs: + if (dev->flags & IFF_UP) { + tmp = dsa_user_host_uc_install(dev, dev->dev_addr); + if (tmp) { + dev_err(ds->dev, + "port %d failed to restore host UC address: %pe\n", + dp->index, ERR_PTR(tmp)); + } + } + dsa_user_sync_ha(dev); if (vlan_filtering) { diff --git a/net/dsa/user.c b/net/dsa/user.c index efbb34c711cb..e8f56a40b614 100644 --- a/net/dsa/user.c +++ b/net/dsa/user.c @@ -355,7 +355,7 @@ static int dsa_user_get_iflink(const struct net_device *dev) return READ_ONCE(dsa_user_to_conduit(dev)->ifindex); } -static int dsa_user_host_uc_install(struct net_device *dev, const u8 *addr) +int dsa_user_host_uc_install(struct net_device *dev, const u8 *addr) { struct net_device *conduit = dsa_user_to_conduit(dev); struct dsa_port *dp = dsa_user_to_port(dev); @@ -383,7 +383,7 @@ out: return err; } -static void dsa_user_host_uc_uninstall(struct net_device *dev) +void dsa_user_host_uc_uninstall(struct net_device *dev) { struct net_device *conduit = dsa_user_to_conduit(dev); struct dsa_port *dp = dsa_user_to_port(dev); @@ -2882,12 +2882,6 @@ int dsa_user_change_conduit(struct net_device *dev, struct net_device *conduit, ERR_PTR(err)); } - /* If the port doesn't have its own MAC address and relies on the DSA - * conduit's one, inherit it again from the new DSA conduit. - */ - if (is_zero_ether_addr(dp->mac)) - eth_hw_addr_inherit(dev, conduit); - return 0; out_revert_conduit_link: diff --git a/net/dsa/user.h b/net/dsa/user.h index 996069130bea..016884bead3c 100644 --- a/net/dsa/user.h +++ b/net/dsa/user.h @@ -42,6 +42,8 @@ int dsa_user_suspend(struct net_device *user_dev); int dsa_user_resume(struct net_device *user_dev); int dsa_user_register_notifier(void); void dsa_user_unregister_notifier(void); +int dsa_user_host_uc_install(struct net_device *dev, const u8 *addr); +void dsa_user_host_uc_uninstall(struct net_device *dev); void dsa_user_sync_ha(struct net_device *dev); void dsa_user_unsync_ha(struct net_device *dev); void dsa_user_setup_tagger(struct net_device *user); -- cgit v1.2.3-58-ga151 From 983e44f0ee003bf6ca28519fbcabaa7adb77827b Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Fri, 7 Jun 2024 14:08:43 +1200 Subject: net: dsa: Fix typo in NET_DSA_TAG_RTL4_A Kconfig Fix a minor typo in the help text for the NET_DSA_TAG_RTL4_A config option. Signed-off-by: Chris Packham Reviewed-by: Andrew Lunn Reviewed-by: Luiz Angelo Daros de Luca Link: https://lore.kernel.org/r/20240607020843.1380735-1-chris.packham@alliedtelesis.co.nz Signed-off-by: Jakub Kicinski --- net/dsa/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 8e698bea99a3..8d5bf869eb14 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -129,7 +129,7 @@ config NET_DSA_TAG_RTL4_A tristate "Tag driver for Realtek 4 byte protocol A tags" help Say Y or M if you want to enable support for tagging frames for the - Realtek switches with 4 byte protocol A tags, sich as found in + Realtek switches with 4 byte protocol A tags, such as found in the Realtek RTL8366RB. config NET_DSA_TAG_RTL8_4 -- cgit v1.2.3-58-ga151 From 45403b12c29c5d9510ace1ad767ea4b13a4caf38 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 7 Jun 2024 01:44:19 -0700 Subject: ip_tunnel: Move stats allocation to core With commit 34d21de99cea9 ("net: Move {l,t,d}stats allocation to core and convert veth & vrf"), stats allocation could be done on net core instead of this driver. With this new approach, the driver doesn't have to bother with error handling (allocation failure checking, making sure free happens in the right spot, etc). This is core responsibility now. Move ip_tunnel driver to leverage the core allocation. All the ip_tunnel_init() users call ip_tunnel_init() as part of their .ndo_init callback. The .ndo_init callback is called before the stats allocation in netdev_register(), thus, the allocation will happen before the netdev is visible. Signed-off-by: Breno Leitao Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240607084420.3932875-1-leitao@debian.org Signed-off-by: Jakub Kicinski --- net/ipv4/ip_tunnel.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index bccef2fcf620..5cffad42fe8c 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -1099,7 +1099,6 @@ static void ip_tunnel_dev_free(struct net_device *dev) gro_cells_destroy(&tunnel->gro_cells); dst_cache_destroy(&tunnel->dst_cache); - free_percpu(dev->tstats); } void ip_tunnel_dellink(struct net_device *dev, struct list_head *head) @@ -1313,20 +1312,15 @@ int ip_tunnel_init(struct net_device *dev) dev->needs_free_netdev = true; dev->priv_destructor = ip_tunnel_dev_free; - dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!dev->tstats) - return -ENOMEM; + dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL); - if (err) { - free_percpu(dev->tstats); + if (err) return err; - } err = gro_cells_init(&tunnel->gro_cells, dev); if (err) { dst_cache_destroy(&tunnel->dst_cache); - free_percpu(dev->tstats); return err; } -- cgit v1.2.3-58-ga151 From 94b601bc4f8528262b2b83194b7fd3fc2a6da75a Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Fri, 7 Jun 2024 18:25:25 +0800 Subject: net: core: Implement dstats-type stats collections We currently have dev_get_tstats64() for collecting per-cpu stats of type pcpu_sw_netstats ("tstats"). However, tstats doesn't allow for accounting tx/rx drops. We do have a stats variant that does have stats for dropped packets: struct pcpu_dstats, but there are no core helpers for using those stats. The VRF driver uses dstats, by providing its own collation/fetch functions to do so. This change adds a common implementation for dstats-type collection, used when pcpu_stat_type == NETDEV_PCPU_STAT_DSTAT. This is based on the VRF driver's existing stats collator (plus the unused tx_drops stat from there). We will switch the VRF driver to use this in the next change. Signed-off-by: Jeremy Kerr Reviewed-by: Simon Horman Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240607-dstats-v3-2-cc781fe116f7@codeconstruct.com.au Signed-off-by: Jakub Kicinski --- net/core/dev.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index e62698c7a0e6..c361a7b69da8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10703,6 +10703,54 @@ void netdev_run_todo(void) wake_up(&netdev_unregistering_wq); } +/* Collate per-cpu network dstats statistics + * + * Read per-cpu network statistics from dev->dstats and populate the related + * fields in @s. + */ +static void dev_fetch_dstats(struct rtnl_link_stats64 *s, + const struct pcpu_dstats __percpu *dstats) +{ + int cpu; + + for_each_possible_cpu(cpu) { + u64 rx_packets, rx_bytes, rx_drops; + u64 tx_packets, tx_bytes, tx_drops; + const struct pcpu_dstats *stats; + unsigned int start; + + stats = per_cpu_ptr(dstats, cpu); + do { + start = u64_stats_fetch_begin(&stats->syncp); + rx_packets = u64_stats_read(&stats->rx_packets); + rx_bytes = u64_stats_read(&stats->rx_bytes); + rx_drops = u64_stats_read(&stats->rx_drops); + tx_packets = u64_stats_read(&stats->tx_packets); + tx_bytes = u64_stats_read(&stats->tx_bytes); + tx_drops = u64_stats_read(&stats->tx_drops); + } while (u64_stats_fetch_retry(&stats->syncp, start)); + + s->rx_packets += rx_packets; + s->rx_bytes += rx_bytes; + s->rx_dropped += rx_drops; + s->tx_packets += tx_packets; + s->tx_bytes += tx_bytes; + s->tx_dropped += tx_drops; + } +} + +/* ndo_get_stats64 implementation for dtstats-based accounting. + * + * Populate @s from dev->stats and dev->dstats. This is used internally by the + * core for NETDEV_PCPU_STAT_DSTAT-type stats collection. + */ +static void dev_get_dstats64(const struct net_device *dev, + struct rtnl_link_stats64 *s) +{ + netdev_stats_to_stats64(s, &dev->stats); + dev_fetch_dstats(s, dev->dstats); +} + /* Convert net_device_stats to rtnl_link_stats64. rtnl_link_stats64 has * all the same fields in the same order as net_device_stats, with only * the type differing, but rtnl_link_stats64 may have additional fields @@ -10779,6 +10827,8 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev)); } else if (dev->pcpu_stat_type == NETDEV_PCPU_STAT_TSTATS) { dev_get_tstats64(dev, storage); + } else if (dev->pcpu_stat_type == NETDEV_PCPU_STAT_DSTATS) { + dev_get_dstats64(dev, storage); } else { netdev_stats_to_stats64(storage, &dev->stats); } -- cgit v1.2.3-58-ga151 From 3966a668bfeef49e265e4975a2cdc55fb931036d Mon Sep 17 00:00:00 2001 From: Dmitry Safonov <0x7f454c46@gmail.com> Date: Fri, 7 Jun 2024 00:25:55 +0100 Subject: net/tcp: Use static_branch_tcp_{md5,ao} to drop ifdefs It's possible to clean-up some ifdefs by hiding that tcp_{md5,ao}_needed static branch is defined and compiled only under related configs, since commit 4c8530dc7d7d ("net/tcp: Only produce AO/MD5 logs if there are any keys"). Reviewed-by: Eric Dumazet Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: David S. Miller --- include/net/tcp.h | 14 ++++---------- net/ipv4/tcp_ipv4.c | 8 ++------ 2 files changed, 6 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/include/net/tcp.h b/include/net/tcp.h index a70fc39090fe..e5427b05129b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2386,21 +2386,15 @@ static inline void tcp_get_current_key(const struct sock *sk, static inline bool tcp_key_is_md5(const struct tcp_key *key) { -#ifdef CONFIG_TCP_MD5SIG - if (static_branch_unlikely(&tcp_md5_needed.key) && - key->type == TCP_KEY_MD5) - return true; -#endif + if (static_branch_tcp_md5()) + return key->type == TCP_KEY_MD5; return false; } static inline bool tcp_key_is_ao(const struct tcp_key *key) { -#ifdef CONFIG_TCP_AO - if (static_branch_unlikely(&tcp_ao_needed.key) && - key->type == TCP_KEY_AO) - return true; -#endif + if (static_branch_tcp_ao()) + return key->type == TCP_KEY_AO; return false; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e61c7c974745..de0c8f43448a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1054,12 +1054,10 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) #else if (0) { #endif -#ifdef CONFIG_TCP_MD5SIG - } else if (static_branch_unlikely(&tcp_md5_needed.key)) { + } else if (static_branch_tcp_md5()) { key.md5_key = tcp_twsk_md5_key(tcptw); if (key.md5_key) key.type = TCP_KEY_MD5; -#endif } tcp_v4_send_ack(sk, skb, @@ -1128,8 +1126,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, #else if (0) { #endif -#ifdef CONFIG_TCP_MD5SIG - } else if (static_branch_unlikely(&tcp_md5_needed.key)) { + } else if (static_branch_tcp_md5()) { const union tcp_md5_addr *addr; int l3index; @@ -1138,7 +1135,6 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, key.md5_key = tcp_md5_do_lookup(sk, l3index, addr, AF_INET); if (key.md5_key) key.type = TCP_KEY_MD5; -#endif } tcp_v4_send_ack(sk, skb, seq, -- cgit v1.2.3-58-ga151 From 72863087f635367323693b9ab83c3107e0353c5f Mon Sep 17 00:00:00 2001 From: Dmitry Safonov <0x7f454c46@gmail.com> Date: Fri, 7 Jun 2024 00:25:56 +0100 Subject: net/tcp: Add a helper tcp_ao_hdr_maclen() It's going to be used more in TCP-AO tracepoints. Reviewed-by: Eric Dumazet Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: David S. Miller --- include/net/tcp_ao.h | 5 +++++ net/ipv4/tcp_ao.c | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index 5d8e9ed2c005..198e02004ad2 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -19,6 +19,11 @@ struct tcp_ao_hdr { u8 rnext_keyid; }; +static inline u8 tcp_ao_hdr_maclen(const struct tcp_ao_hdr *aoh) +{ + return aoh->length - sizeof(struct tcp_ao_hdr); +} + struct tcp_ao_counters { atomic64_t pkt_good; atomic64_t pkt_bad; diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 37c42b63ff99..50ae43c92829 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -884,8 +884,8 @@ tcp_ao_verify_hash(const struct sock *sk, const struct sk_buff *skb, const struct tcp_ao_hdr *aoh, struct tcp_ao_key *key, u8 *traffic_key, u8 *phash, u32 sne, int l3index) { - u8 maclen = aoh->length - sizeof(struct tcp_ao_hdr); const struct tcphdr *th = tcp_hdr(skb); + u8 maclen = tcp_ao_hdr_maclen(aoh); void *hash_buf = NULL; if (maclen != tcp_ao_maclen(key)) { -- cgit v1.2.3-58-ga151 From 811efc06e5f30a57030451b2d1998aa81273baf8 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov <0x7f454c46@gmail.com> Date: Fri, 7 Jun 2024 00:25:57 +0100 Subject: net/tcp: Move tcp_inbound_hash() from headers Two reasons: 1. It's grown up enough 2. In order to not do header spaghetti by including , which is necessary for TCP tracepoints. While at it, unexport and make static tcp_inbound_ao_hash(). Reviewed-by: Eric Dumazet Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: David S. Miller --- include/net/tcp.h | 78 +++---------------------------------------------------- net/ipv4/tcp.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 76 insertions(+), 76 deletions(-) (limited to 'net') diff --git a/include/net/tcp.h b/include/net/tcp.h index e5427b05129b..2aac11e7e1cc 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1863,12 +1863,6 @@ tcp_md5_do_lookup_any_l3index(const struct sock *sk, return __tcp_md5_do_lookup(sk, 0, addr, family, true); } -enum skb_drop_reason -tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, - const void *saddr, const void *daddr, - int family, int l3index, const __u8 *hash_location); - - #define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key) #else static inline struct tcp_md5sig_key * @@ -1885,13 +1879,6 @@ tcp_md5_do_lookup_any_l3index(const struct sock *sk, return NULL; } -static inline enum skb_drop_reason -tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, - const void *saddr, const void *daddr, - int family, int l3index, const __u8 *hash_location) -{ - return SKB_NOT_DROPPED_YET; -} #define tcp_twsk_md5_key(twsk) NULL #endif @@ -2806,66 +2793,9 @@ static inline bool tcp_ao_required(struct sock *sk, const void *saddr, return false; } -/* Called with rcu_read_lock() */ -static inline enum skb_drop_reason -tcp_inbound_hash(struct sock *sk, const struct request_sock *req, - const struct sk_buff *skb, - const void *saddr, const void *daddr, - int family, int dif, int sdif) -{ - const struct tcphdr *th = tcp_hdr(skb); - const struct tcp_ao_hdr *aoh; - const __u8 *md5_location; - int l3index; - - /* Invalid option or two times meet any of auth options */ - if (tcp_parse_auth_options(th, &md5_location, &aoh)) { - tcp_hash_fail("TCP segment has incorrect auth options set", - family, skb, ""); - return SKB_DROP_REASON_TCP_AUTH_HDR; - } - - if (req) { - if (tcp_rsk_used_ao(req) != !!aoh) { - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD); - tcp_hash_fail("TCP connection can't start/end using TCP-AO", - family, skb, "%s", - !aoh ? "missing AO" : "AO signed"); - return SKB_DROP_REASON_TCP_AOFAILURE; - } - } - - /* sdif set, means packet ingressed via a device - * in an L3 domain and dif is set to the l3mdev - */ - l3index = sdif ? dif : 0; - - /* Fast path: unsigned segments */ - if (likely(!md5_location && !aoh)) { - /* Drop if there's TCP-MD5 or TCP-AO key with any rcvid/sndid - * for the remote peer. On TCP-AO established connection - * the last key is impossible to remove, so there's - * always at least one current_key. - */ - if (tcp_ao_required(sk, saddr, family, l3index, true)) { - tcp_hash_fail("AO hash is required, but not found", - family, skb, "L3 index %d", l3index); - return SKB_DROP_REASON_TCP_AONOTFOUND; - } - if (unlikely(tcp_md5_do_lookup(sk, l3index, saddr, family))) { - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); - tcp_hash_fail("MD5 Hash not found", - family, skb, "L3 index %d", l3index); - return SKB_DROP_REASON_TCP_MD5NOTFOUND; - } - return SKB_NOT_DROPPED_YET; - } - - if (aoh) - return tcp_inbound_ao_hash(sk, skb, family, req, l3index, aoh); - - return tcp_inbound_md5_hash(sk, skb, saddr, daddr, family, - l3index, md5_location); -} +enum skb_drop_reason tcp_inbound_hash(struct sock *sk, + const struct request_sock *req, const struct sk_buff *skb, + const void *saddr, const void *daddr, + int family, int dif, int sdif); #endif /* _TCP_H */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 6553221694ec..17a4a8e4855d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4463,7 +4463,7 @@ int tcp_md5_hash_key(struct tcp_sigpool *hp, EXPORT_SYMBOL(tcp_md5_hash_key); /* Called with rcu_read_lock() */ -enum skb_drop_reason +static enum skb_drop_reason tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, const void *saddr, const void *daddr, int family, int l3index, const __u8 *hash_location) @@ -4517,10 +4517,80 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, } return SKB_NOT_DROPPED_YET; } -EXPORT_SYMBOL(tcp_inbound_md5_hash); +#else +static inline enum skb_drop_reason +tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, + const void *saddr, const void *daddr, + int family, int l3index, const __u8 *hash_location) +{ + return SKB_NOT_DROPPED_YET; +} #endif +/* Called with rcu_read_lock() */ +enum skb_drop_reason +tcp_inbound_hash(struct sock *sk, const struct request_sock *req, + const struct sk_buff *skb, + const void *saddr, const void *daddr, + int family, int dif, int sdif) +{ + const struct tcphdr *th = tcp_hdr(skb); + const struct tcp_ao_hdr *aoh; + const __u8 *md5_location; + int l3index; + + /* Invalid option or two times meet any of auth options */ + if (tcp_parse_auth_options(th, &md5_location, &aoh)) { + tcp_hash_fail("TCP segment has incorrect auth options set", + family, skb, ""); + return SKB_DROP_REASON_TCP_AUTH_HDR; + } + + if (req) { + if (tcp_rsk_used_ao(req) != !!aoh) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD); + tcp_hash_fail("TCP connection can't start/end using TCP-AO", + family, skb, "%s", + !aoh ? "missing AO" : "AO signed"); + return SKB_DROP_REASON_TCP_AOFAILURE; + } + } + + /* sdif set, means packet ingressed via a device + * in an L3 domain and dif is set to the l3mdev + */ + l3index = sdif ? dif : 0; + + /* Fast path: unsigned segments */ + if (likely(!md5_location && !aoh)) { + /* Drop if there's TCP-MD5 or TCP-AO key with any rcvid/sndid + * for the remote peer. On TCP-AO established connection + * the last key is impossible to remove, so there's + * always at least one current_key. + */ + if (tcp_ao_required(sk, saddr, family, l3index, true)) { + tcp_hash_fail("AO hash is required, but not found", + family, skb, "L3 index %d", l3index); + return SKB_DROP_REASON_TCP_AONOTFOUND; + } + if (unlikely(tcp_md5_do_lookup(sk, l3index, saddr, family))) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); + tcp_hash_fail("MD5 Hash not found", + family, skb, "L3 index %d", l3index); + return SKB_DROP_REASON_TCP_MD5NOTFOUND; + } + return SKB_NOT_DROPPED_YET; + } + + if (aoh) + return tcp_inbound_ao_hash(sk, skb, family, req, l3index, aoh); + + return tcp_inbound_md5_hash(sk, skb, saddr, daddr, family, + l3index, md5_location); +} +EXPORT_SYMBOL_GPL(tcp_inbound_hash); + void tcp_done(struct sock *sk) { struct request_sock *req; -- cgit v1.2.3-58-ga151 From 96be3dcd013df6aa79acf32e739c0a35b89a4f50 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov <0x7f454c46@gmail.com> Date: Fri, 7 Jun 2024 00:25:58 +0100 Subject: net/tcp: Add tcp-md5 and tcp-ao tracepoints Instead of forcing userspace to parse dmesg (that's what currently is happening, at least in codebase of my current company), provide a better way, that can be enabled/disabled in runtime. Currently, there are already tcp events, add hashing related ones there, too. Rasdaemon currently exercises net_dev_xmit_timeout, devlink_health_report, but it'll be trivial to teach it to deal with failed hashes. Otherwise, BGP may trace/log them itself. Especially exciting for possible investigations is key rotation (RNext_key requests). Suggested-by: Jakub Kicinski Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: David S. Miller --- include/trace/events/tcp.h | 317 +++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp.c | 17 +++ net/ipv4/tcp_ao.c | 13 ++ net/ipv4/tcp_input.c | 8 +- net/ipv4/tcp_output.c | 2 + 5 files changed, 355 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 49b5ee091cf6..1c8bd8e186b8 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -411,6 +411,323 @@ TRACE_EVENT(tcp_cong_state_set, __entry->cong_state) ); +DECLARE_EVENT_CLASS(tcp_hash_event, + + TP_PROTO(const struct sock *sk, const struct sk_buff *skb), + + TP_ARGS(sk, skb), + + TP_STRUCT__entry( + __field(__u64, net_cookie) + __field(const void *, skbaddr) + __field(const void *, skaddr) + __field(int, state) + + /* sockaddr_in6 is always bigger than sockaddr_in */ + __array(__u8, saddr, sizeof(struct sockaddr_in6)) + __array(__u8, daddr, sizeof(struct sockaddr_in6)) + __field(int, l3index) + + __field(__u16, sport) + __field(__u16, dport) + __field(__u16, family) + + __field(bool, fin) + __field(bool, syn) + __field(bool, rst) + __field(bool, psh) + __field(bool, ack) + ), + + TP_fast_assign( + const struct tcphdr *th = (const struct tcphdr *)skb->data; + + __entry->net_cookie = sock_net(sk)->net_cookie; + __entry->skbaddr = skb; + __entry->skaddr = sk; + __entry->state = sk->sk_state; + + memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); + memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); + TP_STORE_ADDR_PORTS_SKB(skb, th, __entry->saddr, __entry->daddr); + __entry->l3index = inet_sdif(skb) ? inet_iif(skb) : 0; + + /* For filtering use */ + __entry->sport = ntohs(th->source); + __entry->dport = ntohs(th->dest); + __entry->family = sk->sk_family; + + __entry->fin = th->fin; + __entry->syn = th->syn; + __entry->rst = th->rst; + __entry->psh = th->psh; + __entry->ack = th->ack; + ), + + TP_printk("net=%llu state=%s family=%s src=%pISpc dest=%pISpc L3index=%d [%c%c%c%c%c]", + __entry->net_cookie, + show_tcp_state_name(__entry->state), + show_family_name(__entry->family), + __entry->saddr, __entry->daddr, + __entry->l3index, + __entry->fin ? 'F' : ' ', + __entry->syn ? 'S' : ' ', + __entry->rst ? 'R' : ' ', + __entry->psh ? 'P' : ' ', + __entry->ack ? '.' : ' ') +); + +DEFINE_EVENT(tcp_hash_event, tcp_hash_bad_header, + + TP_PROTO(const struct sock *sk, const struct sk_buff *skb), + TP_ARGS(sk, skb) +); + +DEFINE_EVENT(tcp_hash_event, tcp_hash_md5_required, + + TP_PROTO(const struct sock *sk, const struct sk_buff *skb), + TP_ARGS(sk, skb) +); + +DEFINE_EVENT(tcp_hash_event, tcp_hash_md5_unexpected, + + TP_PROTO(const struct sock *sk, const struct sk_buff *skb), + TP_ARGS(sk, skb) +); + +DEFINE_EVENT(tcp_hash_event, tcp_hash_md5_mismatch, + + TP_PROTO(const struct sock *sk, const struct sk_buff *skb), + TP_ARGS(sk, skb) +); + +DEFINE_EVENT(tcp_hash_event, tcp_hash_ao_required, + + TP_PROTO(const struct sock *sk, const struct sk_buff *skb), + TP_ARGS(sk, skb) +); + +DECLARE_EVENT_CLASS(tcp_ao_event, + + TP_PROTO(const struct sock *sk, const struct sk_buff *skb, + const __u8 keyid, const __u8 rnext, const __u8 maclen), + + TP_ARGS(sk, skb, keyid, rnext, maclen), + + TP_STRUCT__entry( + __field(__u64, net_cookie) + __field(const void *, skbaddr) + __field(const void *, skaddr) + __field(int, state) + + /* sockaddr_in6 is always bigger than sockaddr_in */ + __array(__u8, saddr, sizeof(struct sockaddr_in6)) + __array(__u8, daddr, sizeof(struct sockaddr_in6)) + __field(int, l3index) + + __field(__u16, sport) + __field(__u16, dport) + __field(__u16, family) + + __field(bool, fin) + __field(bool, syn) + __field(bool, rst) + __field(bool, psh) + __field(bool, ack) + + __field(__u8, keyid) + __field(__u8, rnext) + __field(__u8, maclen) + ), + + TP_fast_assign( + const struct tcphdr *th = (const struct tcphdr *)skb->data; + + __entry->net_cookie = sock_net(sk)->net_cookie; + __entry->skbaddr = skb; + __entry->skaddr = sk; + __entry->state = sk->sk_state; + + memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); + memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); + TP_STORE_ADDR_PORTS_SKB(skb, th, __entry->saddr, __entry->daddr); + __entry->l3index = inet_sdif(skb) ? inet_iif(skb) : 0; + + /* For filtering use */ + __entry->sport = ntohs(th->source); + __entry->dport = ntohs(th->dest); + __entry->family = sk->sk_family; + + __entry->fin = th->fin; + __entry->syn = th->syn; + __entry->rst = th->rst; + __entry->psh = th->psh; + __entry->ack = th->ack; + + __entry->keyid = keyid; + __entry->rnext = rnext; + __entry->maclen = maclen; + ), + + TP_printk("net=%llu state=%s family=%s src=%pISpc dest=%pISpc L3index=%d [%c%c%c%c%c] keyid=%u rnext=%u maclen=%u", + __entry->net_cookie, + show_tcp_state_name(__entry->state), + show_family_name(__entry->family), + __entry->saddr, __entry->daddr, + __entry->l3index, + __entry->fin ? 'F' : ' ', + __entry->syn ? 'S' : ' ', + __entry->rst ? 'R' : ' ', + __entry->psh ? 'P' : ' ', + __entry->ack ? '.' : ' ', + __entry->keyid, __entry->rnext, __entry->maclen) +); + +DEFINE_EVENT(tcp_ao_event, tcp_ao_handshake_failure, + TP_PROTO(const struct sock *sk, const struct sk_buff *skb, + const __u8 keyid, const __u8 rnext, const __u8 maclen), + TP_ARGS(sk, skb, keyid, rnext, maclen) +); + +DEFINE_EVENT(tcp_ao_event, tcp_ao_wrong_maclen, + TP_PROTO(const struct sock *sk, const struct sk_buff *skb, + const __u8 keyid, const __u8 rnext, const __u8 maclen), + TP_ARGS(sk, skb, keyid, rnext, maclen) +); + +DEFINE_EVENT(tcp_ao_event, tcp_ao_mismatch, + TP_PROTO(const struct sock *sk, const struct sk_buff *skb, + const __u8 keyid, const __u8 rnext, const __u8 maclen), + TP_ARGS(sk, skb, keyid, rnext, maclen) +); + +DEFINE_EVENT(tcp_ao_event, tcp_ao_key_not_found, + TP_PROTO(const struct sock *sk, const struct sk_buff *skb, + const __u8 keyid, const __u8 rnext, const __u8 maclen), + TP_ARGS(sk, skb, keyid, rnext, maclen) +); + +DEFINE_EVENT(tcp_ao_event, tcp_ao_rnext_request, + TP_PROTO(const struct sock *sk, const struct sk_buff *skb, + const __u8 keyid, const __u8 rnext, const __u8 maclen), + TP_ARGS(sk, skb, keyid, rnext, maclen) +); + +DECLARE_EVENT_CLASS(tcp_ao_event_sk, + + TP_PROTO(const struct sock *sk, const __u8 keyid, const __u8 rnext), + + TP_ARGS(sk, keyid, rnext), + + TP_STRUCT__entry( + __field(__u64, net_cookie) + __field(const void *, skaddr) + __field(int, state) + + /* sockaddr_in6 is always bigger than sockaddr_in */ + __array(__u8, saddr, sizeof(struct sockaddr_in6)) + __array(__u8, daddr, sizeof(struct sockaddr_in6)) + + __field(__u16, sport) + __field(__u16, dport) + __field(__u16, family) + + __field(__u8, keyid) + __field(__u8, rnext) + ), + + TP_fast_assign( + const struct inet_sock *inet = inet_sk(sk); + + __entry->net_cookie = sock_net(sk)->net_cookie; + __entry->skaddr = sk; + __entry->state = sk->sk_state; + + memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); + memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); + TP_STORE_ADDR_PORTS(__entry, inet, sk); + + /* For filtering use */ + __entry->sport = ntohs(inet->inet_sport); + __entry->dport = ntohs(inet->inet_dport); + __entry->family = sk->sk_family; + + __entry->keyid = keyid; + __entry->rnext = rnext; + ), + + TP_printk("net=%llu state=%s family=%s src=%pISpc dest=%pISpc keyid=%u rnext=%u", + __entry->net_cookie, + show_tcp_state_name(__entry->state), + show_family_name(__entry->family), + __entry->saddr, __entry->daddr, + __entry->keyid, __entry->rnext) +); + +DEFINE_EVENT(tcp_ao_event_sk, tcp_ao_synack_no_key, + TP_PROTO(const struct sock *sk, const __u8 keyid, const __u8 rnext), + TP_ARGS(sk, keyid, rnext) +); + +DECLARE_EVENT_CLASS(tcp_ao_event_sne, + + TP_PROTO(const struct sock *sk, __u32 new_sne), + + TP_ARGS(sk, new_sne), + + TP_STRUCT__entry( + __field(__u64, net_cookie) + __field(const void *, skaddr) + __field(int, state) + + /* sockaddr_in6 is always bigger than sockaddr_in */ + __array(__u8, saddr, sizeof(struct sockaddr_in6)) + __array(__u8, daddr, sizeof(struct sockaddr_in6)) + + __field(__u16, sport) + __field(__u16, dport) + __field(__u16, family) + + __field(__u32, new_sne) + ), + + TP_fast_assign( + const struct inet_sock *inet = inet_sk(sk); + + __entry->net_cookie = sock_net(sk)->net_cookie; + __entry->skaddr = sk; + __entry->state = sk->sk_state; + + memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); + memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); + TP_STORE_ADDR_PORTS(__entry, inet, sk); + + /* For filtering use */ + __entry->sport = ntohs(inet->inet_sport); + __entry->dport = ntohs(inet->inet_dport); + __entry->family = sk->sk_family; + + __entry->new_sne = new_sne; + ), + + TP_printk("net=%llu state=%s family=%s src=%pISpc dest=%pISpc sne=%u", + __entry->net_cookie, + show_tcp_state_name(__entry->state), + show_family_name(__entry->family), + __entry->saddr, __entry->daddr, + __entry->new_sne) +); + +DEFINE_EVENT(tcp_ao_event_sne, tcp_ao_snd_sne_update, + TP_PROTO(const struct sock *sk, __u32 new_sne), + TP_ARGS(sk, new_sne) +); + +DEFINE_EVENT(tcp_ao_event_sne, tcp_ao_rcv_sne_update, + TP_PROTO(const struct sock *sk, __u32 new_sne), + TP_ARGS(sk, new_sne) +); + #endif /* _TRACE_TCP_H */ /* This part must be outside protection */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 17a4a8e4855d..73152ce1367e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -282,6 +282,7 @@ #include #include #include +#include #include /* Track pending CMSGs. */ @@ -4484,6 +4485,7 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, if (!key && hash_location) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); tcp_hash_fail("Unexpected MD5 Hash found", family, skb, ""); + trace_tcp_hash_md5_unexpected(sk, skb); return SKB_DROP_REASON_TCP_MD5UNEXPECTED; } @@ -4513,6 +4515,7 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, l3index); } } + trace_tcp_hash_md5_mismatch(sk, skb); return SKB_DROP_REASON_TCP_MD5FAILURE; } return SKB_NOT_DROPPED_YET; @@ -4544,15 +4547,27 @@ tcp_inbound_hash(struct sock *sk, const struct request_sock *req, if (tcp_parse_auth_options(th, &md5_location, &aoh)) { tcp_hash_fail("TCP segment has incorrect auth options set", family, skb, ""); + trace_tcp_hash_bad_header(sk, skb); return SKB_DROP_REASON_TCP_AUTH_HDR; } if (req) { if (tcp_rsk_used_ao(req) != !!aoh) { + u8 keyid, rnext, maclen; + + if (aoh) { + keyid = aoh->keyid; + rnext = aoh->rnext_keyid; + maclen = tcp_ao_hdr_maclen(aoh); + } else { + keyid = rnext = maclen = 0; + } + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD); tcp_hash_fail("TCP connection can't start/end using TCP-AO", family, skb, "%s", !aoh ? "missing AO" : "AO signed"); + trace_tcp_ao_handshake_failure(sk, skb, keyid, rnext, maclen); return SKB_DROP_REASON_TCP_AOFAILURE; } } @@ -4572,12 +4587,14 @@ tcp_inbound_hash(struct sock *sk, const struct request_sock *req, if (tcp_ao_required(sk, saddr, family, l3index, true)) { tcp_hash_fail("AO hash is required, but not found", family, skb, "L3 index %d", l3index); + trace_tcp_hash_ao_required(sk, skb); return SKB_DROP_REASON_TCP_AONOTFOUND; } if (unlikely(tcp_md5_do_lookup(sk, l3index, saddr, family))) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); tcp_hash_fail("MD5 Hash not found", family, skb, "L3 index %d", l3index); + trace_tcp_hash_md5_required(sk, skb); return SKB_DROP_REASON_TCP_MD5NOTFOUND; } return SKB_NOT_DROPPED_YET; diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 50ae43c92829..1e5087c6cd7d 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -16,6 +16,7 @@ #include #include #include +#include DEFINE_STATIC_KEY_DEFERRED_FALSE(tcp_ao_needed, HZ); @@ -895,6 +896,8 @@ tcp_ao_verify_hash(const struct sock *sk, const struct sk_buff *skb, tcp_hash_fail("AO hash wrong length", family, skb, "%u != %d L3index: %d", maclen, tcp_ao_maclen(key), l3index); + trace_tcp_ao_wrong_maclen(sk, skb, aoh->keyid, + aoh->rnext_keyid, maclen); return SKB_DROP_REASON_TCP_AOFAILURE; } @@ -911,6 +914,8 @@ tcp_ao_verify_hash(const struct sock *sk, const struct sk_buff *skb, atomic64_inc(&key->pkt_bad); tcp_hash_fail("AO hash mismatch", family, skb, "L3index: %d", l3index); + trace_tcp_ao_mismatch(sk, skb, aoh->keyid, + aoh->rnext_keyid, maclen); kfree(hash_buf); return SKB_DROP_REASON_TCP_AOFAILURE; } @@ -927,6 +932,7 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, int l3index, const struct tcp_ao_hdr *aoh) { const struct tcphdr *th = tcp_hdr(skb); + u8 maclen = tcp_ao_hdr_maclen(aoh); u8 *phash = (u8 *)(aoh + 1); /* hash goes just after the header */ struct tcp_ao_info *info; enum skb_drop_reason ret; @@ -941,6 +947,8 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOKEYNOTFOUND); tcp_hash_fail("AO key not found", family, skb, "keyid: %u L3index: %d", aoh->keyid, l3index); + trace_tcp_ao_key_not_found(sk, skb, aoh->keyid, + aoh->rnext_keyid, maclen); return SKB_DROP_REASON_TCP_AOUNEXPECTED; } @@ -981,6 +989,9 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, current_key = READ_ONCE(info->current_key); /* Key rotation: the peer asks us to use new key (RNext) */ if (unlikely(aoh->rnext_keyid != current_key->sndid)) { + trace_tcp_ao_rnext_request(sk, skb, current_key->sndid, + aoh->rnext_keyid, + tcp_ao_hdr_maclen(aoh)); /* If the key is not found we do nothing. */ key = tcp_ao_established_key(info, aoh->rnext_keyid, -1); if (key) @@ -1048,6 +1059,8 @@ key_not_found: atomic64_inc(&info->counters.key_not_found); tcp_hash_fail("Requested by the peer AO key id not found", family, skb, "L3index: %d", l3index); + trace_tcp_ao_key_not_found(sk, skb, aoh->keyid, + aoh->rnext_keyid, maclen); return SKB_DROP_REASON_TCP_AOKEYNOTFOUND; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index eb187450e4d7..d0a1e34d69f6 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3578,8 +3578,10 @@ static void tcp_snd_sne_update(struct tcp_sock *tp, u32 ack) ao = rcu_dereference_protected(tp->ao_info, lockdep_sock_is_held((struct sock *)tp)); - if (ao && ack < tp->snd_una) + if (ao && ack < tp->snd_una) { ao->snd_sne++; + trace_tcp_ao_snd_sne_update((struct sock *)tp, ao->snd_sne); + } #endif } @@ -3604,8 +3606,10 @@ static void tcp_rcv_sne_update(struct tcp_sock *tp, u32 seq) ao = rcu_dereference_protected(tp->ao_info, lockdep_sock_is_held((struct sock *)tp)); - if (ao && seq < tp->rcv_nxt) + if (ao && seq < tp->rcv_nxt) { ao->rcv_sne++; + trace_tcp_ao_rcv_sne_update((struct sock *)tp, ao->rcv_sne); + } #endif } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 090fb0c24599..16c48df8df4c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3768,6 +3768,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, #ifdef CONFIG_TCP_AO struct tcp_ao_key *ao_key = NULL; u8 keyid = tcp_rsk(req)->ao_keyid; + u8 rnext = tcp_rsk(req)->ao_rcv_next; ao_key = tcp_sk(sk)->af_specific->ao_lookup(sk, req_to_sk(req), keyid, -1); @@ -3777,6 +3778,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, * ao_keyid (RFC5925 RNextKeyID), so let's keep it simple here. */ if (unlikely(!ao_key)) { + trace_tcp_ao_synack_no_key(sk, keyid, rnext); rcu_read_unlock(); kfree_skb(skb); net_warn_ratelimited("TCP-AO: the keyid %u from SYN packet is not present - not sending SYNACK\n", -- cgit v1.2.3-58-ga151 From 78b1b27db91c7a94297a8b6a665fe7e86dfc5750 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov <0x7f454c46@gmail.com> Date: Fri, 7 Jun 2024 00:25:59 +0100 Subject: net/tcp: Remove tcp_hash_fail() Now there are tracepoints, that cover all functionality of tcp_hash_fail(), but also wire up missing places They are also faster, can be disabled and provide filtering. This potentially may create a regression if a userspace depends on dmesg logs. Fingers crossed, let's see if anyone complains in reality. Reviewed-by: Eric Dumazet Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: David S. Miller --- include/net/tcp_ao.h | 37 ------------------------------------- net/ipv4/tcp.c | 25 ------------------------- net/ipv4/tcp_ao.c | 9 --------- 3 files changed, 71 deletions(-) (limited to 'net') diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index 198e02004ad2..1d46460d0fef 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -149,43 +149,6 @@ extern struct static_key_false_deferred tcp_ao_needed; #define static_branch_tcp_ao() false #endif -static inline bool tcp_hash_should_produce_warnings(void) -{ - return static_branch_tcp_md5() || static_branch_tcp_ao(); -} - -#define tcp_hash_fail(msg, family, skb, fmt, ...) \ -do { \ - const struct tcphdr *th = tcp_hdr(skb); \ - char hdr_flags[6]; \ - char *f = hdr_flags; \ - \ - if (!tcp_hash_should_produce_warnings()) \ - break; \ - if (th->fin) \ - *f++ = 'F'; \ - if (th->syn) \ - *f++ = 'S'; \ - if (th->rst) \ - *f++ = 'R'; \ - if (th->psh) \ - *f++ = 'P'; \ - if (th->ack) \ - *f++ = '.'; \ - *f = 0; \ - if ((family) == AF_INET) { \ - net_info_ratelimited("%s for %pI4.%d->%pI4.%d [%s] " fmt "\n", \ - msg, &ip_hdr(skb)->saddr, ntohs(th->source), \ - &ip_hdr(skb)->daddr, ntohs(th->dest), \ - hdr_flags, ##__VA_ARGS__); \ - } else { \ - net_info_ratelimited("%s for [%pI6c].%d->[%pI6c].%d [%s]" fmt "\n", \ - msg, &ipv6_hdr(skb)->saddr, ntohs(th->source), \ - &ipv6_hdr(skb)->daddr, ntohs(th->dest), \ - hdr_flags, ##__VA_ARGS__); \ - } \ -} while (0) - #ifdef CONFIG_TCP_AO /* TCP-AO structures and functions */ struct tcp4_ao_context { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 73152ce1367e..e03a342c9162 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4484,7 +4484,6 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, if (!key && hash_location) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); - tcp_hash_fail("Unexpected MD5 Hash found", family, skb, ""); trace_tcp_hash_md5_unexpected(sk, skb); return SKB_DROP_REASON_TCP_MD5UNEXPECTED; } @@ -4500,21 +4499,6 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE); - if (family == AF_INET) { - tcp_hash_fail("MD5 Hash failed", AF_INET, skb, "%s L3 index %d", - genhash ? "tcp_v4_calc_md5_hash failed" - : "", l3index); - } else { - if (genhash) { - tcp_hash_fail("MD5 Hash failed", - AF_INET6, skb, "L3 index %d", - l3index); - } else { - tcp_hash_fail("MD5 Hash mismatch", - AF_INET6, skb, "L3 index %d", - l3index); - } - } trace_tcp_hash_md5_mismatch(sk, skb); return SKB_DROP_REASON_TCP_MD5FAILURE; } @@ -4545,8 +4529,6 @@ tcp_inbound_hash(struct sock *sk, const struct request_sock *req, /* Invalid option or two times meet any of auth options */ if (tcp_parse_auth_options(th, &md5_location, &aoh)) { - tcp_hash_fail("TCP segment has incorrect auth options set", - family, skb, ""); trace_tcp_hash_bad_header(sk, skb); return SKB_DROP_REASON_TCP_AUTH_HDR; } @@ -4564,9 +4546,6 @@ tcp_inbound_hash(struct sock *sk, const struct request_sock *req, } NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD); - tcp_hash_fail("TCP connection can't start/end using TCP-AO", - family, skb, "%s", - !aoh ? "missing AO" : "AO signed"); trace_tcp_ao_handshake_failure(sk, skb, keyid, rnext, maclen); return SKB_DROP_REASON_TCP_AOFAILURE; } @@ -4585,15 +4564,11 @@ tcp_inbound_hash(struct sock *sk, const struct request_sock *req, * always at least one current_key. */ if (tcp_ao_required(sk, saddr, family, l3index, true)) { - tcp_hash_fail("AO hash is required, but not found", - family, skb, "L3 index %d", l3index); trace_tcp_hash_ao_required(sk, skb); return SKB_DROP_REASON_TCP_AONOTFOUND; } if (unlikely(tcp_md5_do_lookup(sk, l3index, saddr, family))) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); - tcp_hash_fail("MD5 Hash not found", - family, skb, "L3 index %d", l3index); trace_tcp_hash_md5_required(sk, skb); return SKB_DROP_REASON_TCP_MD5NOTFOUND; } diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 1e5087c6cd7d..0de863aa5f66 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -893,9 +893,6 @@ tcp_ao_verify_hash(const struct sock *sk, const struct sk_buff *skb, NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD); atomic64_inc(&info->counters.pkt_bad); atomic64_inc(&key->pkt_bad); - tcp_hash_fail("AO hash wrong length", family, skb, - "%u != %d L3index: %d", maclen, - tcp_ao_maclen(key), l3index); trace_tcp_ao_wrong_maclen(sk, skb, aoh->keyid, aoh->rnext_keyid, maclen); return SKB_DROP_REASON_TCP_AOFAILURE; @@ -912,8 +909,6 @@ tcp_ao_verify_hash(const struct sock *sk, const struct sk_buff *skb, NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD); atomic64_inc(&info->counters.pkt_bad); atomic64_inc(&key->pkt_bad); - tcp_hash_fail("AO hash mismatch", family, skb, - "L3index: %d", l3index); trace_tcp_ao_mismatch(sk, skb, aoh->keyid, aoh->rnext_keyid, maclen); kfree(hash_buf); @@ -945,8 +940,6 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, info = rcu_dereference(tcp_sk(sk)->ao_info); if (!info) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOKEYNOTFOUND); - tcp_hash_fail("AO key not found", family, skb, - "keyid: %u L3index: %d", aoh->keyid, l3index); trace_tcp_ao_key_not_found(sk, skb, aoh->keyid, aoh->rnext_keyid, maclen); return SKB_DROP_REASON_TCP_AOUNEXPECTED; @@ -1057,8 +1050,6 @@ verify_hash: key_not_found: NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOKEYNOTFOUND); atomic64_inc(&info->counters.key_not_found); - tcp_hash_fail("Requested by the peer AO key id not found", - family, skb, "L3index: %d", l3index); trace_tcp_ao_key_not_found(sk, skb, aoh->keyid, aoh->rnext_keyid, maclen); return SKB_DROP_REASON_TCP_AOKEYNOTFOUND; -- cgit v1.2.3-58-ga151 From 5f12dd57a071daf896c00cb4572f2baebbddbb46 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 12 Jun 2024 10:05:33 +0200 Subject: wifi: mac80211: correct EHT EIRP TPE parsing For the EHT EIRP transmit power envelope, the 320 MHz is in the last octet, but if we've copied 4 octets (count == 3), the next one is at index 4 not 5 (count + 2). Fix this, and just hardcode the offset since count is always 3 here. Fixes: 39dc8b8ea387 ("wifi: mac80211: pass parsed TPE data to drivers") Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240612100533.f96c1e0fb758.I2f301c4341abb44dafd29128e7e32c66dc0e296d@changeid Signed-off-by: Johannes Berg --- net/mac80211/parse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c index 28aae14db8a9..279c5143b335 100644 --- a/net/mac80211/parse.c +++ b/net/mac80211/parse.c @@ -235,7 +235,7 @@ static void ieee80211_parse_tpe(struct ieee80211_parsed_tpe *tpe, *cnt_out = count + 1; /* separately take 320 MHz if present */ if (count == 3 && len > sizeof(*env) + count + 1) { - out[4] = env->variable[count + 2]; + out[4] = env->variable[4]; *cnt_out = 5; } break; -- cgit v1.2.3-58-ga151 From 7f12e26a194d0043441f870708093d9c2c3bad7d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 7 Jun 2024 20:17:17 +0200 Subject: wifi: cfg80211: make hash table duplicates more survivable Jiazi Li reported that they occasionally see hash table duplicates as evidenced by the WARN_ON() in rb_insert_bss() in this code. It isn't clear how that happens, nor have I been able to reproduce it, but if it does happen, the kernel crashes later, when it tries to unhash the entry that's now not hashed. Try to make this situation more survivable by removing the BSS from the list(s) as well, that way it's fully leaked here (as had been the intent in the hash insert error path), and no longer reachable through the list(s) so it shouldn't be unhashed again later. Link: https://lore.kernel.org/r/20231026013528.GA24122@Jiazi.Li Signed-off-by: Johannes Berg Link: https://msgid.link/20240607181726.36835-2-johannes@sipsolutions.net Signed-off-by: Johannes Berg --- net/wireless/scan.c | 46 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 2f2a3163968a..9b31274a1376 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1604,7 +1604,7 @@ struct cfg80211_bss *__cfg80211_get_bss(struct wiphy *wiphy, } EXPORT_SYMBOL(__cfg80211_get_bss); -static void rb_insert_bss(struct cfg80211_registered_device *rdev, +static bool rb_insert_bss(struct cfg80211_registered_device *rdev, struct cfg80211_internal_bss *bss) { struct rb_node **p = &rdev->bss_tree.rb_node; @@ -1620,7 +1620,7 @@ static void rb_insert_bss(struct cfg80211_registered_device *rdev, if (WARN_ON(!cmp)) { /* will sort of leak this BSS */ - return; + return false; } if (cmp < 0) @@ -1631,6 +1631,7 @@ static void rb_insert_bss(struct cfg80211_registered_device *rdev, rb_link_node(&bss->rbn, parent, p); rb_insert_color(&bss->rbn, &rdev->bss_tree); + return true; } static struct cfg80211_internal_bss * @@ -1657,6 +1658,34 @@ rb_find_bss(struct cfg80211_registered_device *rdev, return NULL; } +static void cfg80211_insert_bss(struct cfg80211_registered_device *rdev, + struct cfg80211_internal_bss *bss) +{ + lockdep_assert_held(&rdev->bss_lock); + + if (!rb_insert_bss(rdev, bss)) + return; + list_add_tail(&bss->list, &rdev->bss_list); + rdev->bss_entries++; +} + +static void cfg80211_rehash_bss(struct cfg80211_registered_device *rdev, + struct cfg80211_internal_bss *bss) +{ + lockdep_assert_held(&rdev->bss_lock); + + rb_erase(&bss->rbn, &rdev->bss_tree); + if (!rb_insert_bss(rdev, bss)) { + list_del(&bss->list); + if (!list_empty(&bss->hidden_list)) + list_del_init(&bss->hidden_list); + if (!list_empty(&bss->pub.nontrans_list)) + list_del_init(&bss->pub.nontrans_list); + rdev->bss_entries--; + } + rdev->bss_generation++; +} + static bool cfg80211_combine_bsses(struct cfg80211_registered_device *rdev, struct cfg80211_internal_bss *new) { @@ -1969,9 +1998,7 @@ __cfg80211_bss_update(struct cfg80211_registered_device *rdev, bss_ref_get(rdev, bss_from_pub(tmp->pub.transmitted_bss)); } - list_add_tail(&new->list, &rdev->bss_list); - rdev->bss_entries++; - rb_insert_bss(rdev, new); + cfg80211_insert_bss(rdev, new); found = new; } @@ -3349,19 +3376,14 @@ void cfg80211_update_assoc_bss_entry(struct wireless_dev *wdev, if (!WARN_ON(!__cfg80211_unlink_bss(rdev, new))) rdev->bss_generation++; } - - rb_erase(&cbss->rbn, &rdev->bss_tree); - rb_insert_bss(rdev, cbss); - rdev->bss_generation++; + cfg80211_rehash_bss(rdev, cbss); list_for_each_entry_safe(nontrans_bss, tmp, &cbss->pub.nontrans_list, nontrans_list) { bss = bss_from_pub(nontrans_bss); bss->pub.channel = chan; - rb_erase(&bss->rbn, &rdev->bss_tree); - rb_insert_bss(rdev, bss); - rdev->bss_generation++; + cfg80211_rehash_bss(rdev, bss); } done: -- cgit v1.2.3-58-ga151 From 7b7890f3861634d1ec762a2697072dff42f65319 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 7 Jun 2024 03:20:43 -0700 Subject: wifi: mac80211: Move stats allocation to core With commit 34d21de99cea9 ("net: Move {l,t,d}stats allocation to core and convert veth & vrf"), stats allocation could be done on net core instead of this driver. With this new approach, the driver doesn't have to bother with error handling (allocation failure checking, making sure free happens in the right spot, etc). This is core responsibility now. Move mac80211 driver to leverage the core allocation. Signed-off-by: Breno Leitao Reviewed-by: Simon Horman Link: https://msgid.link/20240607102045.235071-1-leitao@debian.org Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index d1a49ee4a194..ac4d4df65298 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1458,11 +1458,6 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) return res; } -static void ieee80211_if_free(struct net_device *dev) -{ - free_percpu(dev->tstats); -} - static void ieee80211_if_setup(struct net_device *dev) { ether_setup(dev); @@ -1470,7 +1465,6 @@ static void ieee80211_if_setup(struct net_device *dev) dev->priv_flags |= IFF_NO_QUEUE; dev->netdev_ops = &ieee80211_dataif_ops; dev->needs_free_netdev = true; - dev->priv_destructor = ieee80211_if_free; } static void ieee80211_iface_process_skb(struct ieee80211_local *local, @@ -2103,11 +2097,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, dev_net_set(ndev, wiphy_net(local->hw.wiphy)); - ndev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!ndev->tstats) { - free_netdev(ndev); - return -ENOMEM; - } + ndev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; ndev->needed_headroom = local->tx_headroom + 4*6 /* four MAC addresses */ @@ -2120,7 +2110,6 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, ret = dev_alloc_name(ndev, ndev->name); if (ret < 0) { - ieee80211_if_free(ndev); free_netdev(ndev); return ret; } -- cgit v1.2.3-58-ga151 From c018411d355518a0b2a304d7543564cdd1b808b6 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 7 Jun 2024 03:20:44 -0700 Subject: wifi: mac80211: Remove generic .ndo_get_stats64 Commit 3e2f544dd8a33 ("net: get stats64 if device if driver is configured") moved the callback to dev_get_tstats64() to net core, so, unless the driver is doing some custom stats collection, it does not need to set .ndo_get_stats64. Since this driver is now relying in NETDEV_PCPU_STAT_TSTATS, then, it doesn't need to set the dev_get_tstats64() generic .ndo_get_stats64 function pointer. In this driver specifically, .ndo_get_stats64 basically points to dev_fetch_sw_netstats(). Now it will point to dev_get_tstats64(), which calls netdev_stats_to_stats64() and dev_fetch_sw_netstats(). netdev_stats_to_stats64() seems irrelevant for this driver. Signed-off-by: Breno Leitao Reviewed-by: Simon Horman Link: https://msgid.link/20240607102045.235071-2-leitao@debian.org Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index ac4d4df65298..6b7580c61e0c 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -815,12 +815,6 @@ static void ieee80211_uninit(struct net_device *dev) ieee80211_teardown_sdata(IEEE80211_DEV_TO_SUB_IF(dev)); } -static void -ieee80211_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) -{ - dev_fetch_sw_netstats(stats, dev->tstats); -} - static int ieee80211_netdev_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { @@ -837,7 +831,6 @@ static const struct net_device_ops ieee80211_dataif_ops = { .ndo_start_xmit = ieee80211_subif_start_xmit, .ndo_set_rx_mode = ieee80211_set_multicast_list, .ndo_set_mac_address = ieee80211_change_mac, - .ndo_get_stats64 = ieee80211_get_stats64, .ndo_setup_tc = ieee80211_netdev_setup_tc, }; @@ -877,7 +870,6 @@ static const struct net_device_ops ieee80211_monitorif_ops = { .ndo_set_rx_mode = ieee80211_set_multicast_list, .ndo_set_mac_address = ieee80211_change_mac, .ndo_select_queue = ieee80211_monitor_select_queue, - .ndo_get_stats64 = ieee80211_get_stats64, }; static int ieee80211_netdev_fill_forward_path(struct net_device_path_ctx *ctx, @@ -945,7 +937,6 @@ static const struct net_device_ops ieee80211_dataif_8023_ops = { .ndo_start_xmit = ieee80211_subif_start_xmit_8023, .ndo_set_rx_mode = ieee80211_set_multicast_list, .ndo_set_mac_address = ieee80211_change_mac, - .ndo_get_stats64 = ieee80211_get_stats64, .ndo_fill_forward_path = ieee80211_netdev_fill_forward_path, .ndo_setup_tc = ieee80211_netdev_setup_tc, }; -- cgit v1.2.3-58-ga151 From 9ee0d44f055276fe2802b2f65058e920853f4f99 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Thu, 6 Jun 2024 10:06:52 +0800 Subject: wifi: cfg80211: fix typo in cfg80211_calculate_bitrate_he() rates_996 is mistakenly written as rates_969, fix it. Fixes: c4cbaf7973a7 ("cfg80211: Add support for HE") Signed-off-by: Baochen Qiang Link: https://msgid.link/20240606020653.33205-2-quic_bqiang@quicinc.com Signed-off-by: Johannes Berg --- net/wireless/util.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/util.c b/net/wireless/util.c index 082c6f9c5416..d262d37c1519 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1504,7 +1504,7 @@ static u32 cfg80211_calculate_bitrate_he(struct rate_info *rate) 5120, /* 0.833333... */ }; u32 rates_160M[3] = { 960777777, 907400000, 816666666 }; - u32 rates_969[3] = { 480388888, 453700000, 408333333 }; + u32 rates_996[3] = { 480388888, 453700000, 408333333 }; u32 rates_484[3] = { 229411111, 216666666, 195000000 }; u32 rates_242[3] = { 114711111, 108333333, 97500000 }; u32 rates_106[3] = { 40000000, 37777777, 34000000 }; @@ -1529,7 +1529,7 @@ static u32 cfg80211_calculate_bitrate_he(struct rate_info *rate) else if (rate->bw == RATE_INFO_BW_80 || (rate->bw == RATE_INFO_BW_HE_RU && rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_996)) - result = rates_969[rate->he_gi]; + result = rates_996[rate->he_gi]; else if (rate->bw == RATE_INFO_BW_40 || (rate->bw == RATE_INFO_BW_HE_RU && rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_484)) -- cgit v1.2.3-58-ga151 From bcbd771cd5d68c0c52567556097d75f9fc4e7cd6 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Thu, 6 Jun 2024 10:06:53 +0800 Subject: wifi: cfg80211: handle 2x996 RU allocation in cfg80211_calculate_bitrate_he() Currently NL80211_RATE_INFO_HE_RU_ALLOC_2x996 is not handled in cfg80211_calculate_bitrate_he(), leading to below warning: kernel: invalid HE MCS: bw:6, ru:6 kernel: WARNING: CPU: 0 PID: 2312 at net/wireless/util.c:1501 cfg80211_calculate_bitrate_he+0x22b/0x270 [cfg80211] Fix it by handling 2x996 RU allocation in the same way as 160 MHz bandwidth. Fixes: c4cbaf7973a7 ("cfg80211: Add support for HE") Signed-off-by: Baochen Qiang Link: https://msgid.link/20240606020653.33205-3-quic_bqiang@quicinc.com Signed-off-by: Johannes Berg --- net/wireless/util.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/util.c b/net/wireless/util.c index d262d37c1519..af6ec719567f 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1524,7 +1524,9 @@ static u32 cfg80211_calculate_bitrate_he(struct rate_info *rate) if (WARN_ON_ONCE(rate->nss < 1 || rate->nss > 8)) return 0; - if (rate->bw == RATE_INFO_BW_160) + if (rate->bw == RATE_INFO_BW_160 || + (rate->bw == RATE_INFO_BW_HE_RU && + rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_2x996)) result = rates_160M[rate->he_gi]; else if (rate->bw == RATE_INFO_BW_80 || (rate->bw == RATE_INFO_BW_HE_RU && -- cgit v1.2.3-58-ga151 From f3269b7912f75b6e34926334377ede45656a8950 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 May 2024 12:09:42 +0200 Subject: wifi: nl80211: expose can-monitor channel property It may be possible to monitor on disabled channels per the can-monitor flag, but evidently I forgot to expose that out to userspace. Fix that. Fixes: a110a3b79177 ("wifi: cfg80211: optionally support monitor on disabled channels") Reviewed-by: Miriam Rachel Korenblit Reviewed-by: Ilan Peer Signed-off-by: Johannes Berg Link: https://msgid.link/20240523120945.9a2c19a51e53.I50fa1b1a18b70f63a5095131ac23dc2e71f3d426@changeid Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 8ff5f79d446a..fd6e33a1eb2a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1204,6 +1204,9 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, struct wiphy *wiphy, if ((chan->flags & IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_6GHZ_AFC_CLIENT)) goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_CAN_MONITOR) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_CAN_MONITOR)) + goto nla_put_failure; } if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER, -- cgit v1.2.3-58-ga151 From ca41bfe367d54a7fa966c11852d50d4191187725 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 May 2024 12:09:46 +0200 Subject: wifi: mac80211: refactor channel checks To later introduce an override for VLP APs being allowed despite NO-IR flags, which is somewhat similar in construction to being allowed to monitor on disabled channels, refactor the code that checks channel flags to have not a 'monitor' argument but a set of 'permitting' flags that permit the operation without checking for 'prohibited' flags. Reviewed-by: Miriam Rachel Korenblit Reviewed-by: Ilan Peer Signed-off-by: Johannes Berg Link: https://msgid.link/20240523120945.3da28ded4a50.I90cffc633d0510293d511f60097dc75e719b55f0@changeid Signed-off-by: Johannes Berg --- net/wireless/chan.c | 19 +++++++++++-------- net/wireless/core.h | 3 ++- net/wireless/nl80211.c | 2 +- 3 files changed, 14 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 3414b2c3abcc..360480604515 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -1145,7 +1145,8 @@ EXPORT_SYMBOL(cfg80211_chandef_dfs_cac_time); static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy, u32 center_freq, u32 bandwidth, - u32 prohibited_flags, bool monitor) + u32 prohibited_flags, + u32 permitting_flags) { struct ieee80211_channel *c; u32 freq, start_freq, end_freq; @@ -1157,7 +1158,7 @@ static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy, c = ieee80211_get_channel_khz(wiphy, freq); if (!c) return false; - if (monitor && c->flags & IEEE80211_CHAN_CAN_MONITOR) + if (c->flags & permitting_flags) continue; if (c->flags & prohibited_flags) return false; @@ -1221,7 +1222,8 @@ static bool cfg80211_edmg_usable(struct wiphy *wiphy, u8 edmg_channels, bool _cfg80211_chandef_usable(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef, - u32 prohibited_flags, bool monitor) + u32 prohibited_flags, + u32 permitting_flags) { struct ieee80211_sta_ht_cap *ht_cap; struct ieee80211_sta_vht_cap *vht_cap; @@ -1383,22 +1385,23 @@ bool _cfg80211_chandef_usable(struct wiphy *wiphy, if (!cfg80211_secondary_chans_ok(wiphy, ieee80211_chandef_to_khz(chandef), - width, prohibited_flags, monitor)) + width, prohibited_flags, + permitting_flags)) return false; if (!chandef->center_freq2) return true; return cfg80211_secondary_chans_ok(wiphy, MHZ_TO_KHZ(chandef->center_freq2), - width, prohibited_flags, monitor); + width, prohibited_flags, + permitting_flags); } bool cfg80211_chandef_usable(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef, u32 prohibited_flags) { - return _cfg80211_chandef_usable(wiphy, chandef, prohibited_flags, - false); + return _cfg80211_chandef_usable(wiphy, chandef, prohibited_flags, 0); } EXPORT_SYMBOL(cfg80211_chandef_usable); @@ -1541,7 +1544,7 @@ static bool _cfg80211_reg_can_beacon(struct wiphy *wiphy, prohibited_flags = IEEE80211_CHAN_DISABLED; } - res = cfg80211_chandef_usable(wiphy, chandef, prohibited_flags); + res = _cfg80211_chandef_usable(wiphy, chandef, prohibited_flags, 0); trace_cfg80211_return_bool(res); return res; diff --git a/net/wireless/core.h b/net/wireless/core.h index 118f2f619828..470a18dc4cbf 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -494,7 +494,8 @@ bool cfg80211_wdev_on_sub_chan(struct wireless_dev *wdev, bool primary_only); bool _cfg80211_chandef_usable(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef, - u32 prohibited_flags, bool monitor); + u32 prohibited_flags, + u32 permitting_flags); static inline unsigned int elapsed_jiffies_msecs(unsigned long start) { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index fd6e33a1eb2a..4628280abf1d 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3347,7 +3347,7 @@ static int _nl80211_parse_chandef(struct cfg80211_registered_device *rdev, if (!_cfg80211_chandef_usable(&rdev->wiphy, chandef, IEEE80211_CHAN_DISABLED, - monitor)) { + monitor ? IEEE80211_CHAN_CAN_MONITOR : 0)) { NL_SET_ERR_MSG(extack, "(extension) channel is disabled"); return -EINVAL; } -- cgit v1.2.3-58-ga151 From 459662e83db6eb33f648213bba2c509f4d6dead9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 May 2024 12:09:47 +0200 Subject: wifi: cfg80211: refactor 6 GHz AP power type parsing Add cfg80211_get_6ghz_power_type() to parse the 6 GHz power type from a given set of elements, which is now only inside cfg80211_6ghz_power_type_valid(). Reviewed-by: Miriam Rachel Korenblit Signed-off-by: Johannes Berg Link: https://msgid.link/20240523120945.84cdffd94085.I76f434ee12552e8be91273f3b2d776179eaa62f1@changeid Signed-off-by: Johannes Berg --- net/wireless/core.h | 4 ++++ net/wireless/scan.c | 69 ++++++++++++++++++++++++++++++++--------------------- 2 files changed, 46 insertions(+), 27 deletions(-) (limited to 'net') diff --git a/net/wireless/core.h b/net/wireless/core.h index 470a18dc4cbf..41c8c0e3ba2e 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -533,6 +533,10 @@ struct cfg80211_internal_bss * cfg80211_bss_update(struct cfg80211_registered_device *rdev, struct cfg80211_internal_bss *tmp, bool signal_valid, unsigned long ts); + +enum ieee80211_ap_reg_power +cfg80211_get_6ghz_power_type(const u8 *elems, size_t elems_len); + #ifdef CONFIG_CFG80211_DEVELOPER_WARNINGS #define CFG80211_DEV_WARN_ON(cond) WARN_ON(cond) #else diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 9b31274a1376..88ce365764f5 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -2163,38 +2163,53 @@ struct cfg80211_inform_single_bss_data { u64 cannot_use_reasons; }; -static bool cfg80211_6ghz_power_type_valid(const u8 *ie, size_t ielen, - const u32 flags) +enum ieee80211_ap_reg_power +cfg80211_get_6ghz_power_type(const u8 *elems, size_t elems_len) { - const struct element *tmp; + const struct ieee80211_he_6ghz_oper *he_6ghz_oper; struct ieee80211_he_operation *he_oper; + const struct element *tmp; - tmp = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_OPERATION, ie, ielen); - if (tmp && tmp->datalen >= sizeof(*he_oper) + 1 && - tmp->datalen >= ieee80211_he_oper_size(tmp->data + 1)) { - const struct ieee80211_he_6ghz_oper *he_6ghz_oper; - - he_oper = (void *)&tmp->data[1]; - he_6ghz_oper = ieee80211_he_6ghz_oper(he_oper); - - if (!he_6ghz_oper) - return false; + tmp = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_OPERATION, + elems, elems_len); + if (!tmp || tmp->datalen < sizeof(*he_oper) + 1 || + tmp->datalen < ieee80211_he_oper_size(tmp->data + 1)) + return IEEE80211_REG_UNSET_AP; + + he_oper = (void *)&tmp->data[1]; + he_6ghz_oper = ieee80211_he_6ghz_oper(he_oper); + + if (!he_6ghz_oper) + return IEEE80211_REG_UNSET_AP; + + switch (u8_get_bits(he_6ghz_oper->control, + IEEE80211_HE_6GHZ_OPER_CTRL_REG_INFO)) { + case IEEE80211_6GHZ_CTRL_REG_LPI_AP: + case IEEE80211_6GHZ_CTRL_REG_INDOOR_LPI_AP: + return IEEE80211_REG_LPI_AP; + case IEEE80211_6GHZ_CTRL_REG_SP_AP: + case IEEE80211_6GHZ_CTRL_REG_INDOOR_SP_AP: + return IEEE80211_REG_SP_AP; + case IEEE80211_6GHZ_CTRL_REG_VLP_AP: + return IEEE80211_REG_VLP_AP; + default: + return IEEE80211_REG_UNSET_AP; + } +} - switch (u8_get_bits(he_6ghz_oper->control, - IEEE80211_HE_6GHZ_OPER_CTRL_REG_INFO)) { - case IEEE80211_6GHZ_CTRL_REG_LPI_AP: - case IEEE80211_6GHZ_CTRL_REG_INDOOR_LPI_AP: - return true; - case IEEE80211_6GHZ_CTRL_REG_SP_AP: - case IEEE80211_6GHZ_CTRL_REG_INDOOR_SP_AP: - return !(flags & IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT); - case IEEE80211_6GHZ_CTRL_REG_VLP_AP: - return !(flags & IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT); - default: - return false; - } +static bool cfg80211_6ghz_power_type_valid(const u8 *elems, size_t elems_len, + const u32 flags) +{ + switch (cfg80211_get_6ghz_power_type(elems, elems_len)) { + case IEEE80211_REG_LPI_AP: + return true; + case IEEE80211_REG_SP_AP: + return !(flags & IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT); + case IEEE80211_REG_VLP_AP: + return !(flags & IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT); + default: + return false; } - return false; } /* Returned bss is reference counted and must be cleaned up appropriately. */ -- cgit v1.2.3-58-ga151 From 9fd171a71b9d4cd8855891c0ba7e2a152139b41a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 May 2024 12:09:48 +0200 Subject: wifi: cfg80211: refactor regulatory beaconing checking There are two functions exported now, with different settings, refactor to just export a single function that take a struct with different settings. This will make it easier to add more parameters. Reviewed-by: Miriam Rachel Korenblit Signed-off-by: Johannes Berg Link: https://msgid.link/20240523120945.d44c34dadfc2.I59b4403108e0dbf7fc6ae8f7522e1af520cffb1c@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 54 ++++++++++++++++++++++++++++++++++++++++++++------ net/wireless/chan.c | 30 +++++++++++----------------- 2 files changed, 60 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index fb7d76513e1c..45ffeb110d36 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -8800,6 +8800,31 @@ static inline void cfg80211_report_obss_beacon(struct wiphy *wiphy, sig_dbm); } +/** + * struct cfg80211_beaconing_check_config - beacon check configuration + * @iftype: the interface type to check for + * @relax: allow IR-relaxation conditions to apply (e.g. another + * interface connected already on the same channel) + * NOTE: If this is set, wiphy mutex must be held. + */ +struct cfg80211_beaconing_check_config { + enum nl80211_iftype iftype; + bool relax; +}; + +/** + * cfg80211_reg_check_beaconing - check if beaconing is allowed + * @wiphy: the wiphy + * @chandef: the channel definition + * @cfg: additional parameters for the checking + * + * Return: %true if there is no secondary channel or the secondary channel(s) + * can be used for beaconing (i.e. is not a radar channel etc.) + */ +bool cfg80211_reg_check_beaconing(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + struct cfg80211_beaconing_check_config *cfg); + /** * cfg80211_reg_can_beacon - check if beaconing is allowed * @wiphy: the wiphy @@ -8809,9 +8834,17 @@ static inline void cfg80211_report_obss_beacon(struct wiphy *wiphy, * Return: %true if there is no secondary channel or the secondary channel(s) * can be used for beaconing (i.e. is not a radar channel etc.) */ -bool cfg80211_reg_can_beacon(struct wiphy *wiphy, - struct cfg80211_chan_def *chandef, - enum nl80211_iftype iftype); +static inline bool +cfg80211_reg_can_beacon(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + enum nl80211_iftype iftype) +{ + struct cfg80211_beaconing_check_config config = { + .iftype = iftype, + }; + + return cfg80211_reg_check_beaconing(wiphy, chandef, &config); +} /** * cfg80211_reg_can_beacon_relax - check if beaconing is allowed with relaxation @@ -8826,9 +8859,18 @@ bool cfg80211_reg_can_beacon(struct wiphy *wiphy, * * Context: Requires the wiphy mutex to be held. */ -bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy, - struct cfg80211_chan_def *chandef, - enum nl80211_iftype iftype); +static inline bool +cfg80211_reg_can_beacon_relax(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + enum nl80211_iftype iftype) +{ + struct cfg80211_beaconing_check_config config = { + .iftype = iftype, + .relax = true, + }; + + return cfg80211_reg_check_beaconing(wiphy, chandef, &config); +} /** * cfg80211_ch_switch_notify - update wdev channel and notify userspace diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 360480604515..8b1796130b28 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -1550,22 +1550,12 @@ static bool _cfg80211_reg_can_beacon(struct wiphy *wiphy, return res; } -bool cfg80211_reg_can_beacon(struct wiphy *wiphy, - struct cfg80211_chan_def *chandef, - enum nl80211_iftype iftype) -{ - return _cfg80211_reg_can_beacon(wiphy, chandef, iftype, true); -} -EXPORT_SYMBOL(cfg80211_reg_can_beacon); - -bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy, - struct cfg80211_chan_def *chandef, - enum nl80211_iftype iftype) +bool cfg80211_reg_check_beaconing(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + struct cfg80211_beaconing_check_config *cfg) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); - bool check_no_ir; - - lockdep_assert_held(&rdev->wiphy.mtx); + bool check_no_ir = true; /* * Under certain conditions suggested by some regulatory bodies a @@ -1573,12 +1563,16 @@ bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy, * only if such relaxations are not enabled and the conditions are not * met. */ - check_no_ir = !cfg80211_ir_permissive_chan(wiphy, iftype, - chandef->chan); + if (cfg->relax) { + lockdep_assert_held(&rdev->wiphy.mtx); + check_no_ir = !cfg80211_ir_permissive_chan(wiphy, cfg->iftype, + chandef->chan); + } - return _cfg80211_reg_can_beacon(wiphy, chandef, iftype, check_no_ir); + return _cfg80211_reg_can_beacon(wiphy, chandef, cfg->iftype, + check_no_ir); } -EXPORT_SYMBOL(cfg80211_reg_can_beacon_relax); +EXPORT_SYMBOL(cfg80211_reg_check_beaconing); int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev, struct cfg80211_chan_def *chandef) -- cgit v1.2.3-58-ga151 From c1d8bd8d777d55f6708ca6e47c54dbe9f66f9bbb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 May 2024 12:09:49 +0200 Subject: wifi: cfg80211: add regulatory flag to allow VLP AP operation Add a regulatory flag to allow VLP AP operation even on channels otherwise marked NO_IR, which may be possible in some regulatory domains/countries. Note that this requires checking also when the beacon is changed, since that may change the regulatory power type. Reviewed-by: Miriam Rachel Korenblit Signed-off-by: Johannes Berg Link: https://msgid.link/20240523120945.63792ce19790.Ie2a02750d283b78fbf3c686b10565fb0388889e2@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 6 ++++++ include/uapi/linux/nl80211.h | 6 ++++++ net/wireless/chan.c | 37 ++++++++++++++++++++++++++----------- net/wireless/nl80211.c | 27 +++++++++++++++++++++++++-- net/wireless/reg.c | 2 ++ net/wireless/trace.h | 15 +++++++++------ 6 files changed, 74 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 45ffeb110d36..6f992aff74ae 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -125,6 +125,8 @@ struct wiphy; * @IEEE80211_CHAN_CAN_MONITOR: This channel can be used for monitor * mode even in the presence of other (regulatory) restrictions, * even if it is otherwise disabled. + * @IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP: Allow using this channel for AP operation + * with very low power (VLP), even if otherwise set to NO_IR. */ enum ieee80211_channel_flags { IEEE80211_CHAN_DISABLED = BIT(0), @@ -152,6 +154,7 @@ enum ieee80211_channel_flags { IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT = BIT(22), IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT = BIT(23), IEEE80211_CHAN_CAN_MONITOR = BIT(24), + IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP = BIT(25), }; #define IEEE80211_CHAN_NO_HT40 \ @@ -8806,9 +8809,12 @@ static inline void cfg80211_report_obss_beacon(struct wiphy *wiphy, * @relax: allow IR-relaxation conditions to apply (e.g. another * interface connected already on the same channel) * NOTE: If this is set, wiphy mutex must be held. + * @reg_power: &enum ieee80211_ap_reg_power value indicating the + * advertised/used 6 GHz regulatory power setting */ struct cfg80211_beaconing_check_config { enum nl80211_iftype iftype; + enum ieee80211_ap_reg_power reg_power; bool relax; }; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index f917bc6c9b6f..6ae3997061b6 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4277,6 +4277,8 @@ enum nl80211_wmm_rule { * @NL80211_FREQUENCY_ATTR_CAN_MONITOR: This channel can be used in monitor * mode despite other (regulatory) restrictions, even if the channel is * otherwise completely disabled. + * @NL80211_FREQUENCY_ATTR_ALLOW_6GHZ_VLP_AP: This channel can be used for a + * very low power (VLP) AP, despite being NO_IR. * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number * currently defined * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use @@ -4320,6 +4322,7 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_NO_6GHZ_VLP_CLIENT, NL80211_FREQUENCY_ATTR_NO_6GHZ_AFC_CLIENT, NL80211_FREQUENCY_ATTR_CAN_MONITOR, + NL80211_FREQUENCY_ATTR_ALLOW_6GHZ_VLP_AP, /* keep last */ __NL80211_FREQUENCY_ATTR_AFTER_LAST, @@ -4529,6 +4532,8 @@ enum nl80211_sched_scan_match_attr { * Should be used together with %NL80211_RRF_DFS only. * @NL80211_RRF_NO_6GHZ_VLP_CLIENT: Client connection to VLP AP not allowed * @NL80211_RRF_NO_6GHZ_AFC_CLIENT: Client connection to AFC AP not allowed + * @NL80211_RRF_ALLOW_6GHZ_VLP_AP: Very low power (VLP) AP can be permitted + * despite NO_IR configuration. */ enum nl80211_reg_rule_flags { NL80211_RRF_NO_OFDM = 1<<0, @@ -4553,6 +4558,7 @@ enum nl80211_reg_rule_flags { NL80211_RRF_DFS_CONCURRENT = 1<<21, NL80211_RRF_NO_6GHZ_VLP_CLIENT = 1<<22, NL80211_RRF_NO_6GHZ_AFC_CLIENT = 1<<23, + NL80211_RRF_ALLOW_6GHZ_VLP_AP = 1<<24, }; #define NL80211_RRF_PASSIVE_SCAN NL80211_RRF_NO_IR diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 8b1796130b28..bf2fdcd42019 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -1523,28 +1523,38 @@ static bool cfg80211_ir_permissive_chan(struct wiphy *wiphy, static bool _cfg80211_reg_can_beacon(struct wiphy *wiphy, struct cfg80211_chan_def *chandef, enum nl80211_iftype iftype, - bool check_no_ir) + u32 prohibited_flags, + u32 permitting_flags) { - bool res; - u32 prohibited_flags = IEEE80211_CHAN_DISABLED; + bool res, check_radar; int dfs_required; - trace_cfg80211_reg_can_beacon(wiphy, chandef, iftype, check_no_ir); + trace_cfg80211_reg_can_beacon(wiphy, chandef, iftype, + prohibited_flags, + permitting_flags); - if (check_no_ir) - prohibited_flags |= IEEE80211_CHAN_NO_IR; + if (!_cfg80211_chandef_usable(wiphy, chandef, + IEEE80211_CHAN_DISABLED, 0)) + return false; dfs_required = cfg80211_chandef_dfs_required(wiphy, chandef, iftype); - if (dfs_required != 0) - prohibited_flags |= IEEE80211_CHAN_RADAR; + check_radar = dfs_required != 0; if (dfs_required > 0 && cfg80211_chandef_dfs_available(wiphy, chandef)) { /* We can skip IEEE80211_CHAN_NO_IR if chandef dfs available */ - prohibited_flags = IEEE80211_CHAN_DISABLED; + prohibited_flags &= ~IEEE80211_CHAN_NO_IR; + check_radar = false; } - res = _cfg80211_chandef_usable(wiphy, chandef, prohibited_flags, 0); + if (check_radar && + !_cfg80211_chandef_usable(wiphy, chandef, + IEEE80211_CHAN_RADAR, 0)) + return false; + + res = _cfg80211_chandef_usable(wiphy, chandef, + prohibited_flags, + permitting_flags); trace_cfg80211_return_bool(res); return res; @@ -1555,6 +1565,7 @@ bool cfg80211_reg_check_beaconing(struct wiphy *wiphy, struct cfg80211_beaconing_check_config *cfg) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + u32 permitting_flags = 0; bool check_no_ir = true; /* @@ -1569,8 +1580,12 @@ bool cfg80211_reg_check_beaconing(struct wiphy *wiphy, chandef->chan); } + if (cfg->reg_power == IEEE80211_REG_VLP_AP) + permitting_flags |= IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP; + return _cfg80211_reg_can_beacon(wiphy, chandef, cfg->iftype, - check_no_ir); + check_no_ir ? IEEE80211_CHAN_NO_IR : 0, + permitting_flags); } EXPORT_SYMBOL(cfg80211_reg_check_beaconing); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 4628280abf1d..a94e73c133f7 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1207,6 +1207,9 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, struct wiphy *wiphy, if ((chan->flags & IEEE80211_CHAN_CAN_MONITOR) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_CAN_MONITOR)) goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_ALLOW_6GHZ_VLP_AP)) + goto nla_put_failure; } if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER, @@ -5954,6 +5957,7 @@ static int nl80211_validate_ap_phy_operation(struct cfg80211_ap_settings *params static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct cfg80211_beaconing_check_config beacon_check = {}; unsigned int link_id = nl80211_link_id(info->attrs); struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; @@ -6103,8 +6107,13 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) goto out; } - if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, ¶ms->chandef, - wdev->iftype)) { + beacon_check.iftype = wdev->iftype; + beacon_check.relax = true; + beacon_check.reg_power = + cfg80211_get_6ghz_power_type(params->beacon.tail, + params->beacon.tail_len); + if (!cfg80211_reg_check_beaconing(&rdev->wiphy, ¶ms->chandef, + &beacon_check)) { err = -EINVAL; goto out; } @@ -6261,6 +6270,7 @@ out: static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct cfg80211_beaconing_check_config beacon_check = {}; unsigned int link_id = nl80211_link_id(info->attrs); struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; @@ -6287,6 +6297,19 @@ static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info) if (err) goto out; + /* recheck beaconing is permitted with possibly changed power type */ + beacon_check.iftype = wdev->iftype; + beacon_check.relax = true; + beacon_check.reg_power = + cfg80211_get_6ghz_power_type(params->beacon.tail, + params->beacon.tail_len); + if (!cfg80211_reg_check_beaconing(&rdev->wiphy, + &wdev->links[link_id].ap.chandef, + &beacon_check)) { + err = -EINVAL; + goto out; + } + attr = info->attrs[NL80211_ATTR_FILS_DISCOVERY]; if (attr) { err = nl80211_parse_fils_discovery(rdev, attr, diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 3cef0021a3db..4a27f3823e25 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1600,6 +1600,8 @@ static u32 map_regdom_flags(u32 rd_flags) channel_flags |= IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT; if (rd_flags & NL80211_RRF_PSD) channel_flags |= IEEE80211_CHAN_PSD; + if (rd_flags & NL80211_RRF_ALLOW_6GHZ_VLP_AP) + channel_flags |= IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP; return channel_flags; } diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 6ef9294747e3..5c26f065bd68 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -3389,23 +3389,26 @@ TRACE_EVENT(cfg80211_cqm_rssi_notify, TRACE_EVENT(cfg80211_reg_can_beacon, TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef, - enum nl80211_iftype iftype, bool check_no_ir), - TP_ARGS(wiphy, chandef, iftype, check_no_ir), + enum nl80211_iftype iftype, u32 prohibited_flags, + u32 permitting_flags), + TP_ARGS(wiphy, chandef, iftype, prohibited_flags, permitting_flags), TP_STRUCT__entry( WIPHY_ENTRY CHAN_DEF_ENTRY __field(enum nl80211_iftype, iftype) - __field(bool, check_no_ir) + __field(u32, prohibited_flags) + __field(u32, permitting_flags) ), TP_fast_assign( WIPHY_ASSIGN; CHAN_DEF_ASSIGN(chandef); __entry->iftype = iftype; - __entry->check_no_ir = check_no_ir; + __entry->prohibited_flags = prohibited_flags; + __entry->permitting_flags = permitting_flags; ), - TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT ", iftype=%d check_no_ir=%s", + TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT ", iftype=%d prohibited_flags=0x%x permitting_flags=0x%x", WIPHY_PR_ARG, CHAN_DEF_PR_ARG, __entry->iftype, - BOOL_TO_STR(__entry->check_no_ir)) + __entry->prohibited_flags, __entry->permitting_flags) ); TRACE_EVENT(cfg80211_chandef_dfs_required, -- cgit v1.2.3-58-ga151 From 91b193d546683558a8799ffb2e2f935d3800633e Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Sun, 2 Jun 2024 10:22:00 +0300 Subject: wifi: cfg80211: reject non-conformant 6 GHz center frequencies On 6 GHz (and also 5 GHz to some degree), only a specific set of center frequencies should be used depending on the channel bandwidth. Verify this is the case on 6 GHz. For 5 GHz, we are more accepting as there are APs that got it wrong historically. Signed-off-by: Benjamin Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20240602102200.876b10a2beda.I0d3d0daea4014e99654437ff6691378dbe452652@changeid Signed-off-by: Johannes Berg --- net/wireless/chan.c | 38 ++++++++++++++++++++++++++++++++++++++ net/wireless/tests/chan.c | 22 +++++++++++----------- 2 files changed, 49 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/wireless/chan.c b/net/wireless/chan.c index bf2fdcd42019..e579d7e1425f 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -263,6 +263,37 @@ static int cfg80211_chandef_get_width(const struct cfg80211_chan_def *c) return nl80211_chan_width_to_mhz(c->width); } +static bool cfg80211_valid_center_freq(u32 center, + enum nl80211_chan_width width) +{ + int bw; + int step; + + /* We only do strict verification on 6 GHz */ + if (center < 5955 || center > 7115) + return true; + + bw = nl80211_chan_width_to_mhz(width); + if (bw < 0) + return false; + + /* Validate that the channels bw is entirely within the 6 GHz band */ + if (center - bw / 2 < 5945 || center + bw / 2 > 7125) + return false; + + /* With 320 MHz the permitted channels overlap */ + if (bw == 320) + step = 160; + else + step = bw; + + /* + * Valid channels are packed from lowest frequency towards higher ones. + * So test that the lower frequency alignes with one of these steps. + */ + return (center - bw / 2 - 5945) % step == 0; +} + bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef) { u32 control_freq, oper_freq; @@ -374,6 +405,13 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef) return false; } + if (!cfg80211_valid_center_freq(chandef->center_freq1, chandef->width)) + return false; + + if (chandef->width == NL80211_CHAN_WIDTH_80P80 && + !cfg80211_valid_center_freq(chandef->center_freq2, chandef->width)) + return false; + /* channel 14 is only for IEEE 802.11b */ if (chandef->center_freq1 == 2484 && chandef->width != NL80211_CHAN_WIDTH_20_NOHT) diff --git a/net/wireless/tests/chan.c b/net/wireless/tests/chan.c index d02258ac2dab..74bbee25085f 100644 --- a/net/wireless/tests/chan.c +++ b/net/wireless/tests/chan.c @@ -113,16 +113,16 @@ static const struct chandef_compat_case { }, }, { - .desc = "different primary 160 MHz", + .desc = "different primary 320 MHz", .c1 = { .width = NL80211_CHAN_WIDTH_320, .chan = &chan_6ghz_105, - .center_freq1 = 6475 + 150, + .center_freq1 = 6475 + 110, }, .c2 = { .width = NL80211_CHAN_WIDTH_320, .chan = &chan_6ghz_105, - .center_freq1 = 6475 - 10, + .center_freq1 = 6475 - 50, }, }, { @@ -131,12 +131,12 @@ static const struct chandef_compat_case { .c1 = { .width = NL80211_CHAN_WIDTH_160, .chan = &chan_6ghz_105, - .center_freq1 = 6475 + 70, + .center_freq1 = 6475 + 30, }, .c2 = { .width = NL80211_CHAN_WIDTH_320, .chan = &chan_6ghz_105, - .center_freq1 = 6475 - 10, + .center_freq1 = 6475 - 50, }, .compat = true, }, @@ -145,12 +145,12 @@ static const struct chandef_compat_case { .c1 = { .width = NL80211_CHAN_WIDTH_160, .chan = &chan_6ghz_105, - .center_freq1 = 6475 + 70, + .center_freq1 = 6475 + 30, }, .c2 = { .width = NL80211_CHAN_WIDTH_320, .chan = &chan_6ghz_105, - .center_freq1 = 6475 - 10, + .center_freq1 = 6475 - 50, .punctured = 0xf, }, .compat = true, @@ -160,13 +160,13 @@ static const struct chandef_compat_case { .c1 = { .width = NL80211_CHAN_WIDTH_160, .chan = &chan_6ghz_105, - .center_freq1 = 6475 + 70, + .center_freq1 = 6475 + 30, .punctured = 0xc0, }, .c2 = { .width = NL80211_CHAN_WIDTH_320, .chan = &chan_6ghz_105, - .center_freq1 = 6475 - 10, + .center_freq1 = 6475 - 50, .punctured = 0xc000, }, .compat = true, @@ -176,13 +176,13 @@ static const struct chandef_compat_case { .c1 = { .width = NL80211_CHAN_WIDTH_160, .chan = &chan_6ghz_105, - .center_freq1 = 6475 + 70, + .center_freq1 = 6475 + 30, .punctured = 0x80, }, .c2 = { .width = NL80211_CHAN_WIDTH_320, .chan = &chan_6ghz_105, - .center_freq1 = 6475 - 10, + .center_freq1 = 6475 - 50, .punctured = 0xc000, }, }, -- cgit v1.2.3-58-ga151 From f737b70434f332042fcbe37f85ee3ee424959403 Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Wed, 5 Jun 2024 13:57:16 +0300 Subject: wifi: nl80211: remove the FTMs per burst limit for NDP ranging In NDP ranging, the number of NDP exchanges is not negotiated and thus is not limited by the protocol. Remove the limit on FTMs per burst for trigger based and non trigger based ranging. Signed-off-by: Avraham Stern Signed-off-by: Miri Korenblit Link: https://msgid.link/20240605135233.916e228537d9.I5fe4c1cefa1c1328726e7615dd5a0d861c694381@changeid Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 3 +-- net/wireless/pmsr.c | 10 +++++++++- 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a94e73c133f7..fcac7dedcd61 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -315,8 +315,7 @@ nl80211_pmsr_ftm_req_attr_policy[NL80211_PMSR_FTM_REQ_ATTR_MAX + 1] = { [NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD] = { .type = NLA_U16 }, [NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION] = NLA_POLICY_MAX(NLA_U8, 15), - [NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST] = - NLA_POLICY_MAX(NLA_U8, 31), + [NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST] = { .type = NLA_U8 }, [NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES] = { .type = NLA_U8 }, [NL80211_PMSR_FTM_REQ_ATTR_REQUEST_LCI] = { .type = NLA_FLAG }, [NL80211_PMSR_FTM_REQ_ATTR_REQUEST_CIVICLOC] = { .type = NLA_FLAG }, diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c index c569c37da317..0396fa19bdf1 100644 --- a/net/wireless/pmsr.c +++ b/net/wireless/pmsr.c @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (C) 2018 - 2021, 2023 Intel Corporation + * Copyright (C) 2018 - 2021, 2023 - 2024 Intel Corporation */ #include #include "core.h" @@ -148,6 +148,14 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev, return -EINVAL; } + if (out->ftm.ftms_per_burst > 31 && !out->ftm.non_trigger_based && + !out->ftm.trigger_based) { + NL_SET_ERR_MSG_ATTR(info->extack, + tb[NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST], + "FTM: FTMs per burst must be set lower than 31"); + return -ERANGE; + } + if ((out->ftm.trigger_based || out->ftm.non_trigger_based) && out->ftm.preamble != NL80211_PREAMBLE_HE) { NL_SET_ERR_MSG_ATTR(info->extack, -- cgit v1.2.3-58-ga151 From 03ecd745dde181f537bf84374caafb121463136b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 5 Jun 2024 13:57:18 +0300 Subject: wifi: mac80211: fix erroneous errors for STA changes When e.g. wpa_supplicant sets only the MLD "sta" authorized state, the code actually applies that change, but then returns an error to userspace anyway because there were no changes to the link station, and no link ID was given. However, it's not incorrect to not have a link ID when wanting to change only the MLD peer ("sta") state, so the code shouldn't require it. To fix this, separate the "new_link" argument out into a new three-state enum, because if modify is called on a link STA only, it should return an error if no link is given or if it doesn't exist. For modify on the MLD "sta", not having a link ID is OK, but if there is one it should be validated. This seems to not have mattered much as wpa_supplicant just prints a message and continues, and the authorized state was already set before this error return. However, in the later code powersave recalculation etc. will be skipped, so that it may result in never allowing powersave on MLO connections. Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20240605135233.48e2b8af07e3.Ib9793c383fcba118c05100e024f4a11a1c3d0e85@changeid Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 55 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 36 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index afb361a043d9..65e7cf635d76 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1814,8 +1814,15 @@ static void sta_apply_mesh_params(struct ieee80211_local *local, #endif } +enum sta_link_apply_mode { + STA_LINK_MODE_NEW, + STA_LINK_MODE_STA_MODIFY, + STA_LINK_MODE_LINK_MODIFY, +}; + static int sta_link_apply_parameters(struct ieee80211_local *local, - struct sta_info *sta, bool new_link, + struct sta_info *sta, + enum sta_link_apply_mode mode, struct link_station_parameters *params) { int ret = 0; @@ -1827,18 +1834,29 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, struct link_sta_info *link_sta = rcu_dereference_protected(sta->link[link_id], lockdep_is_held(&local->hw.wiphy->mtx)); - - /* - * If there are no changes, then accept a link that exist, - * unless it's a new link. - */ - if (params->link_id >= 0 && !new_link && - !params->link_mac && !params->txpwr_set && - !params->supported_rates_len && - !params->ht_capa && !params->vht_capa && - !params->he_capa && !params->eht_capa && - !params->opmode_notif_used) - return 0; + bool changes = params->link_mac || + params->txpwr_set || + params->supported_rates_len || + params->ht_capa || + params->vht_capa || + params->he_capa || + params->eht_capa || + params->opmode_notif_used; + + switch (mode) { + case STA_LINK_MODE_NEW: + if (!params->link_mac) + return -EINVAL; + break; + case STA_LINK_MODE_LINK_MODIFY: + break; + case STA_LINK_MODE_STA_MODIFY: + if (params->link_id >= 0) + break; + if (!changes) + return 0; + break; + } if (!link || !link_sta) return -EINVAL; @@ -1848,15 +1866,13 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, return -EINVAL; if (params->link_mac) { - if (new_link) { + if (mode == STA_LINK_MODE_NEW) { memcpy(link_sta->addr, params->link_mac, ETH_ALEN); memcpy(link_sta->pub->addr, params->link_mac, ETH_ALEN); } else if (!ether_addr_equal(link_sta->addr, params->link_mac)) { return -EINVAL; } - } else if (new_link) { - return -EINVAL; } if (params->txpwr_set) { @@ -2028,7 +2044,7 @@ static int sta_apply_parameters(struct ieee80211_local *local, if (params->listen_interval >= 0) sta->listen_interval = params->listen_interval; - ret = sta_link_apply_parameters(local, sta, false, + ret = sta_link_apply_parameters(local, sta, STA_LINK_MODE_STA_MODIFY, ¶ms->link_sta_params); if (ret) return ret; @@ -5005,7 +5021,7 @@ ieee80211_add_link_station(struct wiphy *wiphy, struct net_device *dev, if (ret) return ret; - ret = sta_link_apply_parameters(local, sta, true, params); + ret = sta_link_apply_parameters(local, sta, STA_LINK_MODE_NEW, params); if (ret) { ieee80211_sta_free_link(sta, params->link_id); return ret; @@ -5032,7 +5048,8 @@ ieee80211_mod_link_station(struct wiphy *wiphy, struct net_device *dev, if (!(sta->sta.valid_links & BIT(params->link_id))) return -EINVAL; - return sta_link_apply_parameters(local, sta, false, params); + return sta_link_apply_parameters(local, sta, STA_LINK_MODE_LINK_MODIFY, + params); } static int -- cgit v1.2.3-58-ga151 From 642508a42f74d7467aae7c56dff3016db64a25bd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 5 Jun 2024 13:57:19 +0300 Subject: wifi: mac80211: clean up 'ret' in sta_link_apply_parameters() There's no need to have the always-zero ret variable in the function scope, move it into the inner scope only. Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20240605135233.eb7a24632d98.I72d7fe1da89d4b89bcfd0f5fb9057e3e69355cfe@changeid Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 65e7cf635d76..3236477424b8 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1825,7 +1825,6 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, enum sta_link_apply_mode mode, struct link_station_parameters *params) { - int ret = 0; struct ieee80211_supported_band *sband; struct ieee80211_sub_if_data *sdata = sta->sdata; u32 link_id = params->link_id < 0 ? 0 : params->link_id; @@ -1876,6 +1875,8 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, } if (params->txpwr_set) { + int ret; + link_sta->pub->txpwr.type = params->txpwr.type; if (params->txpwr.type == NL80211_TX_POWER_LIMITED) link_sta->pub->txpwr.power = params->txpwr.power; @@ -1928,7 +1929,7 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, ieee80211_sta_init_nss(link_sta); - return ret; + return 0; } static int sta_apply_parameters(struct ieee80211_local *local, -- cgit v1.2.3-58-ga151 From 671b6b1b1ea5c260549c8f7401828147c40fa9cf Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Wed, 5 Jun 2024 13:57:20 +0300 Subject: wifi: cfg80211: honor WIPHY_FLAG_SPLIT_SCAN_6GHZ in cfg80211_conn_scan If a user uses iw to connect to a network and we don't have any information about the existing networks, cfg80211 will trigger a scan internally even if the user didn't ask for a scan. This scan is implemented by cfg80211_conn_scan(). This function called rdev_scan() directly without honoring the WIPHY_FLAG_SPLIT_SCAN_6GHZ flag. Use cfg80211_scan instead, this will split the scan if the low level driver asked to. Signed-off-by: Emmanuel Grumbach Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20240605135233.33f03661476a.I7b5be20a55aafe012cd9ddb3b4ba2d46b256ace4@changeid Signed-off-by: Johannes Berg --- net/wireless/sme.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/sme.c b/net/wireless/sme.c index a8ad55f11133..e419aa8c4a5a 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -5,7 +5,7 @@ * (for nl80211's connect() and wext) * * Copyright 2009 Johannes Berg - * Copyright (C) 2009, 2020, 2022-2023 Intel Corporation. All rights reserved. + * Copyright (C) 2009, 2020, 2022-2024 Intel Corporation. All rights reserved. * Copyright 2017 Intel Deutschland GmbH */ @@ -130,7 +130,7 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) rdev->scan_req = request; - err = rdev_scan(rdev, request); + err = cfg80211_scan(rdev); if (!err) { wdev->conn->state = CFG80211_CONN_SCANNING; nl80211_send_scan_start(rdev, wdev); -- cgit v1.2.3-58-ga151 From cce4c40b960673f9e020835def310f1e89d3a940 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Wed, 12 Jun 2024 09:58:33 -0600 Subject: bpf: treewide: Align kfunc signatures to prog point-of-view Previously, kfunc declarations in bpf_kfuncs.h (and others) used "user facing" types for kfuncs prototypes while the actual kfunc definitions used "kernel facing" types. More specifically: bpf_dynptr vs bpf_dynptr_kern, __sk_buff vs sk_buff, and xdp_md vs xdp_buff. It wasn't an issue before, as the verifier allows aliased types. However, since we are now generating kfunc prototypes in vmlinux.h (in addition to keeping bpf_kfuncs.h around), this conflict creates compilation errors. Fix this conflict by using "user facing" types in kfunc definitions. This results in more casts, but otherwise has no additional runtime cost. Note, similar to 5b268d1ebcdc ("bpf: Have bpf_rdonly_cast() take a const pointer"), we also make kfuncs take const arguments where appropriate in order to make the kfunc more permissive. Signed-off-by: Daniel Xu Link: https://lore.kernel.org/r/b58346a63a0e66bc9b7504da751b526b0b189a67.1718207789.git.dxu@dxuuu.xyz Signed-off-by: Alexei Starovoitov --- fs/verity/measure.c | 5 +-- include/linux/bpf.h | 8 ++--- kernel/bpf/crypto.c | 24 ++++++++----- kernel/bpf/helpers.c | 39 +++++++++++++++------- kernel/bpf/verifier.c | 2 +- kernel/trace/bpf_trace.c | 15 +++++---- net/core/filter.c | 32 +++++++++++------- .../testing/selftests/bpf/progs/ip_check_defrag.c | 10 +++--- .../selftests/bpf/progs/verifier_netfilter_ctx.c | 6 ++-- 9 files changed, 88 insertions(+), 53 deletions(-) (limited to 'net') diff --git a/fs/verity/measure.c b/fs/verity/measure.c index 3969d54158d1..175d2f1bc089 100644 --- a/fs/verity/measure.c +++ b/fs/verity/measure.c @@ -111,14 +111,15 @@ __bpf_kfunc_start_defs(); /** * bpf_get_fsverity_digest: read fsverity digest of file * @file: file to get digest from - * @digest_ptr: (out) dynptr for struct fsverity_digest + * @digest_p: (out) dynptr for struct fsverity_digest * * Read fsverity_digest of *file* into *digest_ptr*. * * Return: 0 on success, a negative value on error. */ -__bpf_kfunc int bpf_get_fsverity_digest(struct file *file, struct bpf_dynptr_kern *digest_ptr) +__bpf_kfunc int bpf_get_fsverity_digest(struct file *file, struct bpf_dynptr *digest_p) { + struct bpf_dynptr_kern *digest_ptr = (struct bpf_dynptr_kern *)digest_p; const struct inode *inode = file_inode(file); u32 dynptr_sz = __bpf_dynptr_size(digest_ptr); struct fsverity_digest *arg; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a834f4b761bc..f636b4998bf7 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -3265,8 +3265,8 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, struct bpf_insn *insn_buf, struct bpf_prog *prog, u32 *target_size); -int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags, - struct bpf_dynptr_kern *ptr); +int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags, + struct bpf_dynptr *ptr); #else static inline bool bpf_sock_common_is_valid_access(int off, int size, enum bpf_access_type type, @@ -3288,8 +3288,8 @@ static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, { return 0; } -static inline int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags, - struct bpf_dynptr_kern *ptr) +static inline int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags, + struct bpf_dynptr *ptr) { return -EOPNOTSUPP; } diff --git a/kernel/bpf/crypto.c b/kernel/bpf/crypto.c index 2bee4af91e38..3c1de0e5c0bd 100644 --- a/kernel/bpf/crypto.c +++ b/kernel/bpf/crypto.c @@ -311,11 +311,15 @@ static int bpf_crypto_crypt(const struct bpf_crypto_ctx *ctx, * Decrypts provided buffer using IV data and the crypto context. Crypto context must be configured. */ __bpf_kfunc int bpf_crypto_decrypt(struct bpf_crypto_ctx *ctx, - const struct bpf_dynptr_kern *src, - const struct bpf_dynptr_kern *dst, - const struct bpf_dynptr_kern *siv) + const struct bpf_dynptr *src, + const struct bpf_dynptr *dst, + const struct bpf_dynptr *siv) { - return bpf_crypto_crypt(ctx, src, dst, siv, true); + const struct bpf_dynptr_kern *src_kern = (struct bpf_dynptr_kern *)src; + const struct bpf_dynptr_kern *dst_kern = (struct bpf_dynptr_kern *)dst; + const struct bpf_dynptr_kern *siv_kern = (struct bpf_dynptr_kern *)siv; + + return bpf_crypto_crypt(ctx, src_kern, dst_kern, siv_kern, true); } /** @@ -328,11 +332,15 @@ __bpf_kfunc int bpf_crypto_decrypt(struct bpf_crypto_ctx *ctx, * Encrypts provided buffer using IV data and the crypto context. Crypto context must be configured. */ __bpf_kfunc int bpf_crypto_encrypt(struct bpf_crypto_ctx *ctx, - const struct bpf_dynptr_kern *src, - const struct bpf_dynptr_kern *dst, - const struct bpf_dynptr_kern *siv) + const struct bpf_dynptr *src, + const struct bpf_dynptr *dst, + const struct bpf_dynptr *siv) { - return bpf_crypto_crypt(ctx, src, dst, siv, false); + const struct bpf_dynptr_kern *src_kern = (struct bpf_dynptr_kern *)src; + const struct bpf_dynptr_kern *dst_kern = (struct bpf_dynptr_kern *)dst; + const struct bpf_dynptr_kern *siv_kern = (struct bpf_dynptr_kern *)siv; + + return bpf_crypto_crypt(ctx, src_kern, dst_kern, siv_kern, false); } __bpf_kfunc_end_defs(); diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 6f1abcb4b084..3ac521c48bba 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2459,9 +2459,10 @@ __bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid) * provided buffer, with its contents containing the data, if unable to obtain * direct pointer) */ -__bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset, +__bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u32 offset, void *buffer__opt, u32 buffer__szk) { + const struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; enum bpf_dynptr_type type; u32 len = buffer__szk; int err; @@ -2543,9 +2544,11 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset * provided buffer, with its contents containing the data, if unable to obtain * direct pointer) */ -__bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr_kern *ptr, u32 offset, +__bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u32 offset, void *buffer__opt, u32 buffer__szk) { + const struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; + if (!ptr->data || __bpf_dynptr_is_rdonly(ptr)) return NULL; @@ -2571,11 +2574,12 @@ __bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr_kern *ptr, u32 o * will be copied out into the buffer and the user will need to call * bpf_dynptr_write() to commit changes. */ - return bpf_dynptr_slice(ptr, offset, buffer__opt, buffer__szk); + return bpf_dynptr_slice(p, offset, buffer__opt, buffer__szk); } -__bpf_kfunc int bpf_dynptr_adjust(struct bpf_dynptr_kern *ptr, u32 start, u32 end) +__bpf_kfunc int bpf_dynptr_adjust(const struct bpf_dynptr *p, u32 start, u32 end) { + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; u32 size; if (!ptr->data || start > end) @@ -2592,36 +2596,45 @@ __bpf_kfunc int bpf_dynptr_adjust(struct bpf_dynptr_kern *ptr, u32 start, u32 en return 0; } -__bpf_kfunc bool bpf_dynptr_is_null(struct bpf_dynptr_kern *ptr) +__bpf_kfunc bool bpf_dynptr_is_null(const struct bpf_dynptr *p) { + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; + return !ptr->data; } -__bpf_kfunc bool bpf_dynptr_is_rdonly(struct bpf_dynptr_kern *ptr) +__bpf_kfunc bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *p) { + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; + if (!ptr->data) return false; return __bpf_dynptr_is_rdonly(ptr); } -__bpf_kfunc __u32 bpf_dynptr_size(const struct bpf_dynptr_kern *ptr) +__bpf_kfunc __u32 bpf_dynptr_size(const struct bpf_dynptr *p) { + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; + if (!ptr->data) return -EINVAL; return __bpf_dynptr_size(ptr); } -__bpf_kfunc int bpf_dynptr_clone(struct bpf_dynptr_kern *ptr, - struct bpf_dynptr_kern *clone__uninit) +__bpf_kfunc int bpf_dynptr_clone(const struct bpf_dynptr *p, + struct bpf_dynptr *clone__uninit) { + struct bpf_dynptr_kern *clone = (struct bpf_dynptr_kern *)clone__uninit; + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; + if (!ptr->data) { - bpf_dynptr_set_null(clone__uninit); + bpf_dynptr_set_null(clone); return -EINVAL; } - *clone__uninit = *ptr; + *clone = *ptr; return 0; } @@ -2986,7 +2999,9 @@ late_initcall(kfunc_init); */ const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len) { - return bpf_dynptr_slice(ptr, 0, NULL, len); + const struct bpf_dynptr *p = (struct bpf_dynptr *)ptr; + + return bpf_dynptr_slice(p, 0, NULL, len); } /* Get a pointer to dynptr data up to len bytes for read write access. If diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index dcac6119d810..acc9dd830807 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -10914,7 +10914,7 @@ enum { }; BTF_ID_LIST(kf_arg_btf_ids) -BTF_ID(struct, bpf_dynptr_kern) +BTF_ID(struct, bpf_dynptr) BTF_ID(struct, bpf_list_head) BTF_ID(struct, bpf_list_node) BTF_ID(struct, bpf_rb_root) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index bc16e21a2a44..4b3fda456299 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1369,8 +1369,8 @@ __bpf_kfunc void bpf_key_put(struct bpf_key *bkey) #ifdef CONFIG_SYSTEM_DATA_VERIFICATION /** * bpf_verify_pkcs7_signature - verify a PKCS#7 signature - * @data_ptr: data to verify - * @sig_ptr: signature of the data + * @data_p: data to verify + * @sig_p: signature of the data * @trusted_keyring: keyring with keys trusted for signature verification * * Verify the PKCS#7 signature *sig_ptr* against the supplied *data_ptr* @@ -1378,10 +1378,12 @@ __bpf_kfunc void bpf_key_put(struct bpf_key *bkey) * * Return: 0 on success, a negative value on error. */ -__bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr_kern *data_ptr, - struct bpf_dynptr_kern *sig_ptr, +__bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_p, + struct bpf_dynptr *sig_p, struct bpf_key *trusted_keyring) { + struct bpf_dynptr_kern *data_ptr = (struct bpf_dynptr_kern *)data_p; + struct bpf_dynptr_kern *sig_ptr = (struct bpf_dynptr_kern *)sig_p; const void *data, *sig; u32 data_len, sig_len; int ret; @@ -1444,7 +1446,7 @@ __bpf_kfunc_start_defs(); * bpf_get_file_xattr - get xattr of a file * @file: file to get xattr from * @name__str: name of the xattr - * @value_ptr: output buffer of the xattr value + * @value_p: output buffer of the xattr value * * Get xattr *name__str* of *file* and store the output in *value_ptr*. * @@ -1453,8 +1455,9 @@ __bpf_kfunc_start_defs(); * Return: 0 on success, a negative value on error. */ __bpf_kfunc int bpf_get_file_xattr(struct file *file, const char *name__str, - struct bpf_dynptr_kern *value_ptr) + struct bpf_dynptr *value_p) { + struct bpf_dynptr_kern *value_ptr = (struct bpf_dynptr_kern *)value_p; struct dentry *dentry; u32 value_len; void *value; diff --git a/net/core/filter.c b/net/core/filter.c index 7c46ecba3b01..73722790cee3 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -11859,28 +11859,34 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) } __bpf_kfunc_start_defs(); -__bpf_kfunc int bpf_dynptr_from_skb(struct sk_buff *skb, u64 flags, - struct bpf_dynptr_kern *ptr__uninit) +__bpf_kfunc int bpf_dynptr_from_skb(struct __sk_buff *s, u64 flags, + struct bpf_dynptr *ptr__uninit) { + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)ptr__uninit; + struct sk_buff *skb = (struct sk_buff *)s; + if (flags) { - bpf_dynptr_set_null(ptr__uninit); + bpf_dynptr_set_null(ptr); return -EINVAL; } - bpf_dynptr_init(ptr__uninit, skb, BPF_DYNPTR_TYPE_SKB, 0, skb->len); + bpf_dynptr_init(ptr, skb, BPF_DYNPTR_TYPE_SKB, 0, skb->len); return 0; } -__bpf_kfunc int bpf_dynptr_from_xdp(struct xdp_buff *xdp, u64 flags, - struct bpf_dynptr_kern *ptr__uninit) +__bpf_kfunc int bpf_dynptr_from_xdp(struct xdp_md *x, u64 flags, + struct bpf_dynptr *ptr__uninit) { + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)ptr__uninit; + struct xdp_buff *xdp = (struct xdp_buff *)x; + if (flags) { - bpf_dynptr_set_null(ptr__uninit); + bpf_dynptr_set_null(ptr); return -EINVAL; } - bpf_dynptr_init(ptr__uninit, xdp, BPF_DYNPTR_TYPE_XDP, 0, xdp_get_buff_len(xdp)); + bpf_dynptr_init(ptr, xdp, BPF_DYNPTR_TYPE_XDP, 0, xdp_get_buff_len(xdp)); return 0; } @@ -11906,10 +11912,11 @@ __bpf_kfunc int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern, return 0; } -__bpf_kfunc int bpf_sk_assign_tcp_reqsk(struct sk_buff *skb, struct sock *sk, +__bpf_kfunc int bpf_sk_assign_tcp_reqsk(struct __sk_buff *s, struct sock *sk, struct bpf_tcp_req_attrs *attrs, int attrs__sz) { #if IS_ENABLED(CONFIG_SYN_COOKIES) + struct sk_buff *skb = (struct sk_buff *)s; const struct request_sock_ops *ops; struct inet_request_sock *ireq; struct tcp_request_sock *treq; @@ -12004,16 +12011,17 @@ __bpf_kfunc int bpf_sk_assign_tcp_reqsk(struct sk_buff *skb, struct sock *sk, __bpf_kfunc_end_defs(); -int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags, - struct bpf_dynptr_kern *ptr__uninit) +int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags, + struct bpf_dynptr *ptr__uninit) { + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)ptr__uninit; int err; err = bpf_dynptr_from_skb(skb, flags, ptr__uninit); if (err) return err; - bpf_dynptr_set_rdonly(ptr__uninit); + bpf_dynptr_set_rdonly(ptr); return 0; } diff --git a/tools/testing/selftests/bpf/progs/ip_check_defrag.c b/tools/testing/selftests/bpf/progs/ip_check_defrag.c index 1c2b6c1616b0..645b2c9f7867 100644 --- a/tools/testing/selftests/bpf/progs/ip_check_defrag.c +++ b/tools/testing/selftests/bpf/progs/ip_check_defrag.c @@ -12,7 +12,7 @@ #define IP_OFFSET 0x1FFF #define NEXTHDR_FRAGMENT 44 -extern int bpf_dynptr_from_skb(struct sk_buff *skb, __u64 flags, +extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags, struct bpf_dynptr *ptr__uninit) __ksym; extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset, void *buffer, uint32_t buffer__sz) __ksym; @@ -42,7 +42,7 @@ static bool is_frag_v6(struct ipv6hdr *ip6h) return ip6h->nexthdr == NEXTHDR_FRAGMENT; } -static int handle_v4(struct sk_buff *skb) +static int handle_v4(struct __sk_buff *skb) { struct bpf_dynptr ptr; u8 iph_buf[20] = {}; @@ -64,7 +64,7 @@ static int handle_v4(struct sk_buff *skb) return NF_ACCEPT; } -static int handle_v6(struct sk_buff *skb) +static int handle_v6(struct __sk_buff *skb) { struct bpf_dynptr ptr; struct ipv6hdr *ip6h; @@ -89,9 +89,9 @@ static int handle_v6(struct sk_buff *skb) SEC("netfilter") int defrag(struct bpf_nf_ctx *ctx) { - struct sk_buff *skb = ctx->skb; + struct __sk_buff *skb = (struct __sk_buff *)ctx->skb; - switch (bpf_ntohs(skb->protocol)) { + switch (bpf_ntohs(ctx->skb->protocol)) { case ETH_P_IP: return handle_v4(skb); case ETH_P_IPV6: diff --git a/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c b/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c index 65bba330e7e5..ab9f9f2620ed 100644 --- a/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c +++ b/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c @@ -79,7 +79,7 @@ int with_invalid_ctx_access_test5(struct bpf_nf_ctx *ctx) return NF_ACCEPT; } -extern int bpf_dynptr_from_skb(struct sk_buff *skb, __u64 flags, +extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags, struct bpf_dynptr *ptr__uninit) __ksym; extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset, void *buffer, uint32_t buffer__sz) __ksym; @@ -90,8 +90,8 @@ __success __failure_unpriv __retval(0) int with_valid_ctx_access_test6(struct bpf_nf_ctx *ctx) { + struct __sk_buff *skb = (struct __sk_buff *)ctx->skb; const struct nf_hook_state *state = ctx->state; - struct sk_buff *skb = ctx->skb; const struct iphdr *iph; const struct tcphdr *th; u8 buffer_iph[20] = {}; @@ -99,7 +99,7 @@ int with_valid_ctx_access_test6(struct bpf_nf_ctx *ctx) struct bpf_dynptr ptr; uint8_t ihl; - if (skb->len <= 20 || bpf_dynptr_from_skb(skb, 0, &ptr)) + if (ctx->skb->len <= 20 || bpf_dynptr_from_skb(skb, 0, &ptr)) return NF_ACCEPT; iph = bpf_dynptr_slice(&ptr, 0, buffer_iph, sizeof(buffer_iph)); -- cgit v1.2.3-58-ga151 From b975d3ee5962237c1e2f5d5aeeaaf0dc2173486c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 9 Jun 2024 00:10:39 +0200 Subject: net: add and use skb_get_hash_net Years ago flow dissector gained ability to delegate flow dissection to a bpf program, scoped per netns. Unfortunately, skb_get_hash() only gets an sk_buff argument instead of both net+skb. This means the flow dissector needs to obtain the netns pointer from somewhere else. The netns is derived from skb->dev, and if that is not available, from skb->sk. If neither is set, we hit a (benign) WARN_ON_ONCE(). Trying both dev and sk covers most cases, but not all, as recently reported by Christoph Paasch. In case of nf-generated tcp reset, both sk and dev are NULL: WARNING: .. net/core/flow_dissector.c:1104 skb_flow_dissect_flow_keys include/linux/skbuff.h:1536 [inline] skb_get_hash include/linux/skbuff.h:1578 [inline] nft_trace_init+0x7d/0x120 net/netfilter/nf_tables_trace.c:320 nft_do_chain+0xb26/0xb90 net/netfilter/nf_tables_core.c:268 nft_do_chain_ipv4+0x7a/0xa0 net/netfilter/nft_chain_filter.c:23 nf_hook_slow+0x57/0x160 net/netfilter/core.c:626 __ip_local_out+0x21d/0x260 net/ipv4/ip_output.c:118 ip_local_out+0x26/0x1e0 net/ipv4/ip_output.c:127 nf_send_reset+0x58c/0x700 net/ipv4/netfilter/nf_reject_ipv4.c:308 nft_reject_ipv4_eval+0x53/0x90 net/ipv4/netfilter/nft_reject_ipv4.c:30 [..] syzkaller did something like this: table inet filter { chain input { type filter hook input priority filter; policy accept; meta nftrace set 1 tcp dport 42 reject with tcp reset } chain output { type filter hook output priority filter; policy accept; # empty chain is enough } } ... then sends a tcp packet to port 42. Initial attempt to simply set skb->dev from nf_reject_ipv4 doesn't cover all cases: skbs generated via ipv4 igmp_send_report trigger similar splat. Moreover, Pablo Neira found that nft_hash.c uses __skb_get_hash_symmetric() which would trigger same warn splat for such skbs. Lets allow callers to pass the current netns explicitly. The nf_trace infrastructure is adjusted to use the new helper. __skb_get_hash_symmetric is handled in the next patch. Reported-by: Christoph Paasch Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/494 Reviewed-by: Willem de Bruijn Signed-off-by: Florian Westphal Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240608221057.16070-2-fw@strlen.de Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 12 ++++++++++-- net/core/flow_dissector.c | 15 +++++++++++---- net/netfilter/nf_tables_trace.c | 2 +- 3 files changed, 22 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index fe7d8dbef77e..6e78019f899a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1498,7 +1498,7 @@ __skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool is_l4) __skb_set_hash(skb, hash, true, is_l4); } -void __skb_get_hash(struct sk_buff *skb); +void __skb_get_hash_net(const struct net *net, struct sk_buff *skb); u32 __skb_get_hash_symmetric(const struct sk_buff *skb); u32 skb_get_poff(const struct sk_buff *skb); u32 __skb_get_poff(const struct sk_buff *skb, const void *data, @@ -1578,10 +1578,18 @@ void skb_flow_dissect_hash(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container); +static inline __u32 skb_get_hash_net(const struct net *net, struct sk_buff *skb) +{ + if (!skb->l4_hash && !skb->sw_hash) + __skb_get_hash_net(net, skb); + + return skb->hash; +} + static inline __u32 skb_get_hash(struct sk_buff *skb) { if (!skb->l4_hash && !skb->sw_hash) - __skb_get_hash(skb); + __skb_get_hash_net(NULL, skb); return skb->hash; } diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 59fe46077b3c..702b4f0a70b6 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1860,7 +1860,8 @@ u32 __skb_get_hash_symmetric(const struct sk_buff *skb) EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric); /** - * __skb_get_hash: calculate a flow hash + * __skb_get_hash_net: calculate a flow hash + * @net: associated network namespace, derived from @skb if NULL * @skb: sk_buff to calculate flow hash from * * This function calculates a flow hash based on src/dst addresses @@ -1868,18 +1869,24 @@ EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric); * on success, zero indicates no valid hash. Also, sets l4_hash in skb * if hash is a canonical 4-tuple hash over transport ports. */ -void __skb_get_hash(struct sk_buff *skb) +void __skb_get_hash_net(const struct net *net, struct sk_buff *skb) { struct flow_keys keys; u32 hash; + memset(&keys, 0, sizeof(keys)); + + __skb_flow_dissect(net, skb, &flow_keys_dissector, + &keys, NULL, 0, 0, 0, + FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); + __flow_hash_secret_init(); - hash = ___skb_get_hash(skb, &keys, &hashrnd); + hash = __flow_hash_from_keys(&keys, &hashrnd); __skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys)); } -EXPORT_SYMBOL(__skb_get_hash); +EXPORT_SYMBOL(__skb_get_hash_net); __u32 skb_get_hash_perturb(const struct sk_buff *skb, const siphash_key_t *perturb) diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index a83637e3f455..580c55268f65 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -317,7 +317,7 @@ void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt, net_get_random_once(&trace_key, sizeof(trace_key)); info->skbid = (u32)siphash_3u32(hash32_ptr(skb), - skb_get_hash(skb), + skb_get_hash_net(nft_net(pkt), skb), skb->skb_iif, &trace_key); } -- cgit v1.2.3-58-ga151 From d1dab4f71d372e00e2d34a9c32bf261623e3a95c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 9 Jun 2024 00:10:40 +0200 Subject: net: add and use __skb_get_hash_symmetric_net Similar to previous patch: apply same logic for __skb_get_hash_symmetric and let callers pass the netns to the dissector core. Existing function is turned into a wrapper to avoid adjusting all callers, nft_hash.c uses new function. Reviewed-by: Willem de Bruijn Signed-off-by: Florian Westphal Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240608221057.16070-3-fw@strlen.de Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 8 +++++++- net/core/flow_dissector.c | 6 +++--- net/netfilter/nft_hash.c | 3 ++- 3 files changed, 12 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6e78019f899a..813406a9bd6c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1498,8 +1498,14 @@ __skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool is_l4) __skb_set_hash(skb, hash, true, is_l4); } +u32 __skb_get_hash_symmetric_net(const struct net *net, const struct sk_buff *skb); + +static inline u32 __skb_get_hash_symmetric(const struct sk_buff *skb) +{ + return __skb_get_hash_symmetric_net(NULL, skb); +} + void __skb_get_hash_net(const struct net *net, struct sk_buff *skb); -u32 __skb_get_hash_symmetric(const struct sk_buff *skb); u32 skb_get_poff(const struct sk_buff *skb); u32 __skb_get_poff(const struct sk_buff *skb, const void *data, const struct flow_keys_basic *keys, int hlen); diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 702b4f0a70b6..e479790db0f7 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1845,19 +1845,19 @@ EXPORT_SYMBOL(make_flow_keys_digest); static struct flow_dissector flow_keys_dissector_symmetric __read_mostly; -u32 __skb_get_hash_symmetric(const struct sk_buff *skb) +u32 __skb_get_hash_symmetric_net(const struct net *net, const struct sk_buff *skb) { struct flow_keys keys; __flow_hash_secret_init(); memset(&keys, 0, sizeof(keys)); - __skb_flow_dissect(NULL, skb, &flow_keys_dissector_symmetric, + __skb_flow_dissect(net, skb, &flow_keys_dissector_symmetric, &keys, NULL, 0, 0, 0, 0); return __flow_hash_from_keys(&keys, &hashrnd); } -EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric); +EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric_net); /** * __skb_get_hash_net: calculate a flow hash diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 92d47e469204..868d68302d22 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -51,7 +51,8 @@ static void nft_symhash_eval(const struct nft_expr *expr, struct sk_buff *skb = pkt->skb; u32 h; - h = reciprocal_scale(__skb_get_hash_symmetric(skb), priv->modulus); + h = reciprocal_scale(__skb_get_hash_symmetric_net(nft_net(pkt), skb), + priv->modulus); regs->data[priv->dreg] = h + priv->offset; } -- cgit v1.2.3-58-ga151 From 3e453ca122d483eb519f934b6624215f0536301c Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 7 Jun 2024 17:13:53 +0200 Subject: net: ipv4,ipv6: Pass multipath hash computation through a helper The following patches will add a sysctl to control multipath hash seed. In order to centralize the hash computation, add a helper, fib_multipath_hash_from_keys(), and have all IPv4 and IPv6 route.c invocations of flow_hash_from_keys() go through this helper instead. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240607151357.421181-2-petrm@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/ip_fib.h | 7 +++++++ net/ipv4/route.c | 12 ++++++------ net/ipv6/route.c | 12 ++++++------ 3 files changed, 19 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 9b2f69ba5e49..b8b3c07e8f7b 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -521,6 +521,13 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig); int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, const struct sk_buff *skb, struct flow_keys *flkeys); #endif + +static inline u32 fib_multipath_hash_from_keys(const struct net *net, + struct flow_keys *keys) +{ + return flow_hash_from_keys(keys); +} + int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope, struct netlink_ext_ack *extack); void fib_select_multipath(struct fib_result *res, int hash); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index cb0bdf34ed50..54512acbead7 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1923,7 +1923,7 @@ static u32 fib_multipath_custom_hash_outer(const struct net *net, hash_keys.ports.dst = keys.ports.dst; *p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION); - return flow_hash_from_keys(&hash_keys); + return fib_multipath_hash_from_keys(net, &hash_keys); } static u32 fib_multipath_custom_hash_inner(const struct net *net, @@ -1972,7 +1972,7 @@ static u32 fib_multipath_custom_hash_inner(const struct net *net, if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT) hash_keys.ports.dst = keys.ports.dst; - return flow_hash_from_keys(&hash_keys); + return fib_multipath_hash_from_keys(net, &hash_keys); } static u32 fib_multipath_custom_hash_skb(const struct net *net, @@ -2009,7 +2009,7 @@ static u32 fib_multipath_custom_hash_fl4(const struct net *net, if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) hash_keys.ports.dst = fl4->fl4_dport; - return flow_hash_from_keys(&hash_keys); + return fib_multipath_hash_from_keys(net, &hash_keys); } /* if skb is set it will be used and fl4 can be NULL */ @@ -2030,7 +2030,7 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, hash_keys.addrs.v4addrs.src = fl4->saddr; hash_keys.addrs.v4addrs.dst = fl4->daddr; } - mhash = flow_hash_from_keys(&hash_keys); + mhash = fib_multipath_hash_from_keys(net, &hash_keys); break; case 1: /* skb is currently provided only when forwarding */ @@ -2064,7 +2064,7 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, hash_keys.ports.dst = fl4->fl4_dport; hash_keys.basic.ip_proto = fl4->flowi4_proto; } - mhash = flow_hash_from_keys(&hash_keys); + mhash = fib_multipath_hash_from_keys(net, &hash_keys); break; case 2: memset(&hash_keys, 0, sizeof(hash_keys)); @@ -2095,7 +2095,7 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, hash_keys.addrs.v4addrs.src = fl4->saddr; hash_keys.addrs.v4addrs.dst = fl4->daddr; } - mhash = flow_hash_from_keys(&hash_keys); + mhash = fib_multipath_hash_from_keys(net, &hash_keys); break; case 3: if (skb) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ad5fff5a210c..1916de615398 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2372,7 +2372,7 @@ static u32 rt6_multipath_custom_hash_outer(const struct net *net, hash_keys.ports.dst = keys.ports.dst; *p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION); - return flow_hash_from_keys(&hash_keys); + return fib_multipath_hash_from_keys(net, &hash_keys); } static u32 rt6_multipath_custom_hash_inner(const struct net *net, @@ -2421,7 +2421,7 @@ static u32 rt6_multipath_custom_hash_inner(const struct net *net, if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT) hash_keys.ports.dst = keys.ports.dst; - return flow_hash_from_keys(&hash_keys); + return fib_multipath_hash_from_keys(net, &hash_keys); } static u32 rt6_multipath_custom_hash_skb(const struct net *net, @@ -2460,7 +2460,7 @@ static u32 rt6_multipath_custom_hash_fl6(const struct net *net, if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) hash_keys.ports.dst = fl6->fl6_dport; - return flow_hash_from_keys(&hash_keys); + return fib_multipath_hash_from_keys(net, &hash_keys); } /* if skb is set it will be used and fl6 can be NULL */ @@ -2482,7 +2482,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6, hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6); hash_keys.basic.ip_proto = fl6->flowi6_proto; } - mhash = flow_hash_from_keys(&hash_keys); + mhash = fib_multipath_hash_from_keys(net, &hash_keys); break; case 1: if (skb) { @@ -2514,7 +2514,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6, hash_keys.ports.dst = fl6->fl6_dport; hash_keys.basic.ip_proto = fl6->flowi6_proto; } - mhash = flow_hash_from_keys(&hash_keys); + mhash = fib_multipath_hash_from_keys(net, &hash_keys); break; case 2: memset(&hash_keys, 0, sizeof(hash_keys)); @@ -2551,7 +2551,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6, hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6); hash_keys.basic.ip_proto = fl6->flowi6_proto; } - mhash = flow_hash_from_keys(&hash_keys); + mhash = fib_multipath_hash_from_keys(net, &hash_keys); break; case 3: if (skb) -- cgit v1.2.3-58-ga151 From 4ee2a8cace3fb9a34aea6a56426f89d26dd514f3 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 7 Jun 2024 17:13:54 +0200 Subject: net: ipv4: Add a sysctl to set multipath hash seed When calculating hashes for the purpose of multipath forwarding, both IPv4 and IPv6 code currently fall back on flow_hash_from_keys(). That uses a randomly-generated seed. That's a fine choice by default, but unfortunately some deployments may need a tighter control over the seed used. In this patch, make the seed configurable by adding a new sysctl key, net.ipv4.fib_multipath_hash_seed to control the seed. This seed is used specifically for multipath forwarding and not for the other concerns that flow_hash_from_keys() is used for, such as queue selection. Expose the knob as sysctl because other such settings, such as headers to hash, are also handled that way. Like those, the multipath hash seed is a per-netns variable. Despite being placed in the net.ipv4 namespace, the multipath seed sysctl is used for both IPv4 and IPv6, similarly to e.g. a number of TCP variables. The seed used by flow_hash_from_keys() is a 128-bit quantity. However it seems that usually the seed is a much more modest value. 32 bits seem typical (Cisco, Cumulus), some systems go even lower. For that reason, and to decouple the user interface from implementation details, go with a 32-bit quantity, which is then quadruplicated to form the siphash key. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240607151357.421181-3-petrm@nvidia.com Signed-off-by: Jakub Kicinski --- Documentation/networking/ip-sysctl.rst | 14 ++++++++ include/net/flow_dissector.h | 2 ++ include/net/ip_fib.h | 23 +++++++++++- include/net/netns/ipv4.h | 8 +++++ net/core/flow_dissector.c | 7 ++++ net/ipv4/sysctl_net_ipv4.c | 66 ++++++++++++++++++++++++++++++++++ 6 files changed, 119 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 6e99eccdb837..3616389c8c2d 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -131,6 +131,20 @@ fib_multipath_hash_fields - UNSIGNED INTEGER Default: 0x0007 (source IP, destination IP and IP protocol) +fib_multipath_hash_seed - UNSIGNED INTEGER + The seed value used when calculating hash for multipath routes. Applies + to both IPv4 and IPv6 datapath. Only present for kernels built with + CONFIG_IP_ROUTE_MULTIPATH enabled. + + When set to 0, the seed value used for multipath routing defaults to an + internal random-generated one. + + The actual hashing algorithm is not specified -- there is no guarantee + that a next hop distribution effected by a given seed will keep stable + across kernel versions. + + Default: 0 (random) + fib_sync_mem - UNSIGNED INTEGER Amount of dirty memory from fib entries that can be backlogged before synchronize_rcu is forced. diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 99626475c3f4..3e47e123934d 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -442,6 +442,8 @@ static inline bool flow_keys_have_l4(const struct flow_keys *keys) } u32 flow_hash_from_keys(struct flow_keys *keys); +u32 flow_hash_from_keys_seed(struct flow_keys *keys, + const siphash_key_t *keyval); void skb_flow_get_icmp_tci(const struct sk_buff *skb, struct flow_dissector_key_icmp *key_icmp, const void *data, int thoff, int hlen); diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index b8b3c07e8f7b..6e7984bfb986 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -520,13 +520,34 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig); #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, const struct sk_buff *skb, struct flow_keys *flkeys); -#endif + +static void +fib_multipath_hash_construct_key(siphash_key_t *key, u32 mp_seed) +{ + u64 mp_seed_64 = mp_seed; + + key->key[0] = (mp_seed_64 << 32) | mp_seed_64; + key->key[1] = key->key[0]; +} +static inline u32 fib_multipath_hash_from_keys(const struct net *net, + struct flow_keys *keys) +{ + siphash_aligned_key_t hash_key; + u32 mp_seed; + + mp_seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed).mp_seed; + fib_multipath_hash_construct_key(&hash_key, mp_seed); + + return flow_hash_from_keys_seed(keys, &hash_key); +} +#else static inline u32 fib_multipath_hash_from_keys(const struct net *net, struct flow_keys *keys) { return flow_hash_from_keys(keys); } +#endif int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope, struct netlink_ext_ack *extack); diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index a91bb971f901..5fcd61ada622 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -40,6 +40,13 @@ struct inet_timewait_death_row { struct tcp_fastopen_context; +#ifdef CONFIG_IP_ROUTE_MULTIPATH +struct sysctl_fib_multipath_hash_seed { + u32 user_seed; + u32 mp_seed; +}; +#endif + struct netns_ipv4 { /* Cacheline organization can be found documented in * Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst. @@ -246,6 +253,7 @@ struct netns_ipv4 { #endif #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH + struct sysctl_fib_multipath_hash_seed sysctl_fib_multipath_hash_seed; u32 sysctl_fib_multipath_hash_fields; u8 sysctl_fib_multipath_use_neigh; u8 sysctl_fib_multipath_hash_policy; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index e479790db0f7..e64a26379807 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1806,6 +1806,13 @@ u32 flow_hash_from_keys(struct flow_keys *keys) } EXPORT_SYMBOL(flow_hash_from_keys); +u32 flow_hash_from_keys_seed(struct flow_keys *keys, + const siphash_key_t *keyval) +{ + return __flow_hash_from_keys(keys, keyval); +} +EXPORT_SYMBOL(flow_hash_from_keys_seed); + static inline u32 ___skb_get_hash(const struct sk_buff *skb, struct flow_keys *keys, const siphash_key_t *keyval) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index bb64c0ef092d..9140d20eb2d4 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -464,6 +464,61 @@ static int proc_fib_multipath_hash_fields(struct ctl_table *table, int write, return ret; } + +static u32 proc_fib_multipath_hash_rand_seed __ro_after_init; + +static void proc_fib_multipath_hash_init_rand_seed(void) +{ + get_random_bytes(&proc_fib_multipath_hash_rand_seed, + sizeof(proc_fib_multipath_hash_rand_seed)); +} + +static void proc_fib_multipath_hash_set_seed(struct net *net, u32 user_seed) +{ + struct sysctl_fib_multipath_hash_seed new = { + .user_seed = user_seed, + .mp_seed = (user_seed ? user_seed : + proc_fib_multipath_hash_rand_seed), + }; + + WRITE_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed, new); +} + +static int proc_fib_multipath_hash_seed(struct ctl_table *table, int write, + void *buffer, size_t *lenp, + loff_t *ppos) +{ + struct sysctl_fib_multipath_hash_seed *mphs; + struct net *net = table->data; + struct ctl_table tmp; + u32 user_seed; + int ret; + + mphs = &net->ipv4.sysctl_fib_multipath_hash_seed; + user_seed = mphs->user_seed; + + tmp = *table; + tmp.data = &user_seed; + + ret = proc_douintvec_minmax(&tmp, write, buffer, lenp, ppos); + + if (write && ret == 0) { + proc_fib_multipath_hash_set_seed(net, user_seed); + call_netevent_notifiers(NETEVENT_IPV4_MPATH_HASH_UPDATE, net); + } + + return ret; +} +#else + +static void proc_fib_multipath_hash_init_rand_seed(void) +{ +} + +static void proc_fib_multipath_hash_set_seed(struct net *net, u32 user_seed) +{ +} + #endif static struct ctl_table ipv4_table[] = { @@ -1072,6 +1127,13 @@ static struct ctl_table ipv4_net_table[] = { .extra1 = SYSCTL_ONE, .extra2 = &fib_multipath_hash_fields_all_mask, }, + { + .procname = "fib_multipath_hash_seed", + .data = &init_net, + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = proc_fib_multipath_hash_seed, + }, #endif { .procname = "ip_unprivileged_port_start", @@ -1550,6 +1612,8 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) if (!net->ipv4.sysctl_local_reserved_ports) goto err_ports; + proc_fib_multipath_hash_set_seed(net, 0); + return 0; err_ports: @@ -1584,6 +1648,8 @@ static __init int sysctl_ipv4_init(void) if (!hdr) return -ENOMEM; + proc_fib_multipath_hash_init_rand_seed(); + if (register_pernet_subsys(&ipv4_sysctl_ops)) { unregister_net_sysctl_table(hdr); return -ENOMEM; -- cgit v1.2.3-58-ga151 From a3cfe84cca28f205761a0450016593b0d728165e Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Thu, 6 Jun 2024 07:58:50 -0700 Subject: bpf: Add CHECKSUM_COMPLETE to bpf test progs Add special flag to validate that TC BPF program properly updates checksum information in skb. Signed-off-by: Vadim Fedorenko Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Kicinski Acked-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20240606145851.229116-1-vadfed@meta.com --- include/uapi/linux/bpf.h | 2 ++ net/bpf/test_run.c | 28 +++++++++++++++++++++++++++- tools/include/uapi/linux/bpf.h | 2 ++ 3 files changed, 31 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 25ea393cf084..35bcf52dbc65 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1425,6 +1425,8 @@ enum { #define BPF_F_TEST_RUN_ON_CPU (1U << 0) /* If set, XDP frames will be transmitted after processing */ #define BPF_F_TEST_XDP_LIVE_FRAMES (1U << 1) +/* If set, apply CHECKSUM_COMPLETE to skb and validate the checksum */ +#define BPF_F_TEST_SKB_CHECKSUM_COMPLETE (1U << 2) /* type for BPF_ENABLE_STATS */ enum bpf_stats_type { diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 36ae54f57bf5..3c965e32fc33 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -983,7 +983,8 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, void *data; int ret; - if (kattr->test.flags || kattr->test.cpu || kattr->test.batch_size) + if ((kattr->test.flags & ~BPF_F_TEST_SKB_CHECKSUM_COMPLETE) || + kattr->test.cpu || kattr->test.batch_size) return -EINVAL; data = bpf_test_init(kattr, kattr->test.data_size_in, @@ -1031,6 +1032,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); __skb_put(skb, size); + if (ctx && ctx->ifindex > 1) { dev = dev_get_by_index(net, ctx->ifindex); if (!dev) { @@ -1066,9 +1068,19 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, __skb_push(skb, hh_len); if (is_direct_pkt_access) bpf_compute_data_pointers(skb); + ret = convert___skb_to_skb(skb, ctx); if (ret) goto out; + + if (kattr->test.flags & BPF_F_TEST_SKB_CHECKSUM_COMPLETE) { + const int off = skb_network_offset(skb); + int len = skb->len - off; + + skb->csum = skb_checksum(skb, off, len, 0); + skb->ip_summed = CHECKSUM_COMPLETE; + } + ret = bpf_test_run(prog, skb, repeat, &retval, &duration, false); if (ret) goto out; @@ -1083,6 +1095,20 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, } memset(__skb_push(skb, hh_len), 0, hh_len); } + + if (kattr->test.flags & BPF_F_TEST_SKB_CHECKSUM_COMPLETE) { + const int off = skb_network_offset(skb); + int len = skb->len - off; + __wsum csum; + + csum = skb_checksum(skb, off, len, 0); + + if (csum_fold(skb->csum) != csum_fold(csum)) { + ret = -EBADMSG; + goto out; + } + } + convert_skb_to___skb(skb, ctx); size = skb->len; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 25ea393cf084..35bcf52dbc65 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1425,6 +1425,8 @@ enum { #define BPF_F_TEST_RUN_ON_CPU (1U << 0) /* If set, XDP frames will be transmitted after processing */ #define BPF_F_TEST_XDP_LIVE_FRAMES (1U << 1) +/* If set, apply CHECKSUM_COMPLETE to skb and validate the checksum */ +#define BPF_F_TEST_SKB_CHECKSUM_COMPLETE (1U << 2) /* type for BPF_ENABLE_STATS */ enum bpf_stats_type { -- cgit v1.2.3-58-ga151 From 5f703ce5c981ee02c00e210d5b155bbbfbf11263 Mon Sep 17 00:00:00 2001 From: Lukasz Majewski Date: Mon, 10 Jun 2024 15:39:14 +0200 Subject: net: hsr: Send supervisory frames to HSR network with ProxyNodeTable data This patch provides support for sending supervision HSR frames with MAC addresses stored in ProxyNodeTable when RedBox (i.e. HSR-SAN) is enabled. Supervision frames with RedBox MAC address (appended as second TLV) are only send for ProxyNodeTable nodes. This patch series shall be tested with hsr_redbox.sh script. Signed-off-by: Lukasz Majewski Reviewed-by: Wojciech Drewek Signed-off-by: David S. Miller --- net/hsr/hsr_device.c | 63 +++++++++++++++++++++++++++++++++++++++++--------- net/hsr/hsr_forward.c | 37 +++++++++++++++++++++++++++-- net/hsr/hsr_framereg.c | 12 ++++++++++ net/hsr/hsr_framereg.h | 2 ++ net/hsr/hsr_main.h | 4 +++- net/hsr/hsr_netlink.c | 1 + 6 files changed, 105 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index e6904288d40d..e4cc6b78dcfc 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -73,9 +73,15 @@ static void hsr_check_announce(struct net_device *hsr_dev) mod_timer(&hsr->announce_timer, jiffies + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL)); } + + if (hsr->redbox && !timer_pending(&hsr->announce_proxy_timer)) + mod_timer(&hsr->announce_proxy_timer, jiffies + + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL) / 2); } else { /* Deactivate the announce timer */ timer_delete(&hsr->announce_timer); + if (hsr->redbox) + timer_delete(&hsr->announce_proxy_timer); } } @@ -279,10 +285,11 @@ out: return NULL; } -static void send_hsr_supervision_frame(struct hsr_port *master, - unsigned long *interval) +static void send_hsr_supervision_frame(struct hsr_port *port, + unsigned long *interval, + const unsigned char *addr) { - struct hsr_priv *hsr = master->hsr; + struct hsr_priv *hsr = port->hsr; __u8 type = HSR_TLV_LIFE_CHECK; struct hsr_sup_payload *hsr_sp; struct hsr_sup_tlv *hsr_stlv; @@ -296,9 +303,9 @@ static void send_hsr_supervision_frame(struct hsr_port *master, hsr->announce_count++; } - skb = hsr_init_skb(master); + skb = hsr_init_skb(port); if (!skb) { - netdev_warn_once(master->dev, "HSR: Could not send supervision frame\n"); + netdev_warn_once(port->dev, "HSR: Could not send supervision frame\n"); return; } @@ -321,11 +328,12 @@ static void send_hsr_supervision_frame(struct hsr_port *master, hsr_stag->tlv.HSR_TLV_length = hsr->prot_version ? sizeof(struct hsr_sup_payload) : 12; - /* Payload: MacAddressA */ + /* Payload: MacAddressA / SAN MAC from ProxyNodeTable */ hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload)); - ether_addr_copy(hsr_sp->macaddress_A, master->dev->dev_addr); + ether_addr_copy(hsr_sp->macaddress_A, addr); - if (hsr->redbox) { + if (hsr->redbox && + hsr_is_node_in_db(&hsr->proxy_node_db, addr)) { hsr_stlv = skb_put(skb, sizeof(struct hsr_sup_tlv)); hsr_stlv->HSR_TLV_type = PRP_TLV_REDBOX_MAC; hsr_stlv->HSR_TLV_length = sizeof(struct hsr_sup_payload); @@ -340,13 +348,14 @@ static void send_hsr_supervision_frame(struct hsr_port *master, return; } - hsr_forward_skb(skb, master); + hsr_forward_skb(skb, port); spin_unlock_bh(&hsr->seqnr_lock); return; } static void send_prp_supervision_frame(struct hsr_port *master, - unsigned long *interval) + unsigned long *interval, + const unsigned char *addr) { struct hsr_priv *hsr = master->hsr; struct hsr_sup_payload *hsr_sp; @@ -396,7 +405,7 @@ static void hsr_announce(struct timer_list *t) rcu_read_lock(); master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); - hsr->proto_ops->send_sv_frame(master, &interval); + hsr->proto_ops->send_sv_frame(master, &interval, master->dev->dev_addr); if (is_admin_up(master->dev)) mod_timer(&hsr->announce_timer, jiffies + interval); @@ -404,6 +413,37 @@ static void hsr_announce(struct timer_list *t) rcu_read_unlock(); } +/* Announce (supervision frame) timer function for RedBox + */ +static void hsr_proxy_announce(struct timer_list *t) +{ + struct hsr_priv *hsr = from_timer(hsr, t, announce_proxy_timer); + struct hsr_port *interlink; + unsigned long interval = 0; + struct hsr_node *node; + + rcu_read_lock(); + /* RedBOX sends supervisory frames to HSR network with MAC addresses + * of SAN nodes stored in ProxyNodeTable. + */ + interlink = hsr_port_get_hsr(hsr, HSR_PT_INTERLINK); + list_for_each_entry_rcu(node, &hsr->proxy_node_db, mac_list) { + if (hsr_addr_is_redbox(hsr, node->macaddress_A)) + continue; + hsr->proto_ops->send_sv_frame(interlink, &interval, + node->macaddress_A); + } + + if (is_admin_up(interlink->dev)) { + if (!interval) + interval = msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL); + + mod_timer(&hsr->announce_proxy_timer, jiffies + interval); + } + + rcu_read_unlock(); +} + void hsr_del_ports(struct hsr_priv *hsr) { struct hsr_port *port; @@ -590,6 +630,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], timer_setup(&hsr->announce_timer, hsr_announce, 0); timer_setup(&hsr->prune_timer, hsr_prune_nodes, 0); timer_setup(&hsr->prune_proxy_timer, hsr_prune_proxy_nodes, 0); + timer_setup(&hsr->announce_proxy_timer, hsr_proxy_announce, 0); ether_addr_copy(hsr->sup_multicast_addr, def_multicast_addr); hsr->sup_multicast_addr[ETH_ALEN - 1] = multicast_spec; diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index 05a61b8286ec..960ef386bc3a 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -117,6 +117,35 @@ static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb) return true; } +static bool is_proxy_supervision_frame(struct hsr_priv *hsr, + struct sk_buff *skb) +{ + struct hsr_sup_payload *payload; + struct ethhdr *eth_hdr; + u16 total_length = 0; + + eth_hdr = (struct ethhdr *)skb_mac_header(skb); + + /* Get the HSR protocol revision. */ + if (eth_hdr->h_proto == htons(ETH_P_HSR)) + total_length = sizeof(struct hsrv1_ethhdr_sp); + else + total_length = sizeof(struct hsrv0_ethhdr_sp); + + if (!pskb_may_pull(skb, total_length + sizeof(struct hsr_sup_payload))) + return false; + + skb_pull(skb, total_length); + payload = (struct hsr_sup_payload *)skb->data; + skb_push(skb, total_length); + + /* For RedBox (HSR-SAN) check if we have received the supervision + * frame with MAC addresses from own ProxyNodeTable. + */ + return hsr_is_node_in_db(&hsr->proxy_node_db, + payload->macaddress_A); +} + static struct sk_buff *create_stripped_skb_hsr(struct sk_buff *skb_in, struct hsr_frame_info *frame) { @@ -499,7 +528,8 @@ static void hsr_forward_do(struct hsr_frame_info *frame) frame->sequence_nr)) continue; - if (frame->is_supervision && port->type == HSR_PT_MASTER) { + if (frame->is_supervision && port->type == HSR_PT_MASTER && + !frame->is_proxy_supervision) { hsr_handle_sup_frame(frame); continue; } @@ -637,6 +667,9 @@ static int fill_frame_info(struct hsr_frame_info *frame, memset(frame, 0, sizeof(*frame)); frame->is_supervision = is_supervision_frame(port->hsr, skb); + if (frame->is_supervision && hsr->redbox) + frame->is_proxy_supervision = + is_proxy_supervision_frame(port->hsr, skb); n_db = &hsr->node_db; if (port->type == HSR_PT_INTERLINK) @@ -688,7 +721,7 @@ void hsr_forward_skb(struct sk_buff *skb, struct hsr_port *port) /* Gets called for ingress frames as well as egress from master port. * So check and increment stats for master port only here. */ - if (port->type == HSR_PT_MASTER) { + if (port->type == HSR_PT_MASTER || port->type == HSR_PT_INTERLINK) { port->dev->stats.tx_packets++; port->dev->stats.tx_bytes += skb->len; } diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 614df9649794..73bc6f659812 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -36,6 +36,14 @@ static bool seq_nr_after(u16 a, u16 b) #define seq_nr_before(a, b) seq_nr_after((b), (a)) #define seq_nr_before_or_eq(a, b) (!seq_nr_after((a), (b))) +bool hsr_addr_is_redbox(struct hsr_priv *hsr, unsigned char *addr) +{ + if (!hsr->redbox || !is_valid_ether_addr(hsr->macaddress_redbox)) + return false; + + return ether_addr_equal(addr, hsr->macaddress_redbox); +} + bool hsr_addr_is_self(struct hsr_priv *hsr, unsigned char *addr) { struct hsr_self_node *sn; @@ -591,6 +599,10 @@ void hsr_prune_proxy_nodes(struct timer_list *t) spin_lock_bh(&hsr->list_lock); list_for_each_entry_safe(node, tmp, &hsr->proxy_node_db, mac_list) { + /* Don't prune RedBox node. */ + if (hsr_addr_is_redbox(hsr, node->macaddress_A)) + continue; + timestamp = node->time_in[HSR_PT_INTERLINK]; /* Prune old entries */ diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h index 7619e31c1d2d..993fa950d814 100644 --- a/net/hsr/hsr_framereg.h +++ b/net/hsr/hsr_framereg.h @@ -22,6 +22,7 @@ struct hsr_frame_info { struct hsr_node *node_src; u16 sequence_nr; bool is_supervision; + bool is_proxy_supervision; bool is_vlan; bool is_local_dest; bool is_local_exclusive; @@ -35,6 +36,7 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db, enum hsr_port_type rx_port); void hsr_handle_sup_frame(struct hsr_frame_info *frame); bool hsr_addr_is_self(struct hsr_priv *hsr, unsigned char *addr); +bool hsr_addr_is_redbox(struct hsr_priv *hsr, unsigned char *addr); void hsr_addr_subst_source(struct hsr_node *node, struct sk_buff *skb); void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb, diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index 23850b16d1ea..ab1f8d35d9dc 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -170,7 +170,8 @@ struct hsr_node; struct hsr_proto_ops { /* format and send supervision frame */ - void (*send_sv_frame)(struct hsr_port *port, unsigned long *interval); + void (*send_sv_frame)(struct hsr_port *port, unsigned long *interval, + const unsigned char addr[ETH_ALEN]); void (*handle_san_frame)(bool san, enum hsr_port_type port, struct hsr_node *node); bool (*drop_frame)(struct hsr_frame_info *frame, struct hsr_port *port); @@ -197,6 +198,7 @@ struct hsr_priv { struct list_head proxy_node_db; /* RedBox HSR proxy nodes */ struct hsr_self_node __rcu *self_node; /* MACs of slaves */ struct timer_list announce_timer; /* Supervision frame dispatch */ + struct timer_list announce_proxy_timer; struct timer_list prune_timer; struct timer_list prune_proxy_timer; int announce_count; diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index 898f18c6da53..f6ff0b61e08a 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -131,6 +131,7 @@ static void hsr_dellink(struct net_device *dev, struct list_head *head) del_timer_sync(&hsr->prune_timer); del_timer_sync(&hsr->prune_proxy_timer); del_timer_sync(&hsr->announce_timer); + timer_delete_sync(&hsr->announce_proxy_timer); hsr_debugfs_term(hsr); hsr_del_ports(hsr); -- cgit v1.2.3-58-ga151 From 404dbd26322f50c8123bf5bff9a409356889035f Mon Sep 17 00:00:00 2001 From: Chris Lew Date: Wed, 12 Jun 2024 12:01:56 +0530 Subject: net: qrtr: ns: Ignore ENODEV failures in ns Ignore the ENODEV failures returned by kernel_sendmsg(). These errors indicate that either the local port has been closed or the remote has gone down. Neither of these scenarios are fatal and will eventually be handled through packets that are later queued on the control port. Signed-off-by: Chris Lew Signed-off-by: Sarannya Sasikumar Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240612063156.1377210-1-quic_sarannya@quicinc.com Signed-off-by: Paolo Abeni --- net/qrtr/ns.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c index 654a3cc0d347..3de9350cbf30 100644 --- a/net/qrtr/ns.c +++ b/net/qrtr/ns.c @@ -132,8 +132,8 @@ static int service_announce_new(struct sockaddr_qrtr *dest, return kernel_sendmsg(qrtr_ns.sock, &msg, &iv, 1, sizeof(pkt)); } -static int service_announce_del(struct sockaddr_qrtr *dest, - struct qrtr_server *srv) +static void service_announce_del(struct sockaddr_qrtr *dest, + struct qrtr_server *srv) { struct qrtr_ctrl_pkt pkt; struct msghdr msg = { }; @@ -157,10 +157,10 @@ static int service_announce_del(struct sockaddr_qrtr *dest, msg.msg_namelen = sizeof(*dest); ret = kernel_sendmsg(qrtr_ns.sock, &msg, &iv, 1, sizeof(pkt)); - if (ret < 0) + if (ret < 0 && ret != -ENODEV) pr_err("failed to announce del service\n"); - return ret; + return; } static void lookup_notify(struct sockaddr_qrtr *to, struct qrtr_server *srv, @@ -188,7 +188,7 @@ static void lookup_notify(struct sockaddr_qrtr *to, struct qrtr_server *srv, msg.msg_namelen = sizeof(*to); ret = kernel_sendmsg(qrtr_ns.sock, &msg, &iv, 1, sizeof(pkt)); - if (ret < 0) + if (ret < 0 && ret != -ENODEV) pr_err("failed to send lookup notification\n"); } @@ -207,6 +207,9 @@ static int announce_servers(struct sockaddr_qrtr *sq) xa_for_each(&node->servers, index, srv) { ret = service_announce_new(sq, srv); if (ret < 0) { + if (ret == -ENODEV) + continue; + pr_err("failed to announce new service\n"); return ret; } @@ -369,7 +372,7 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from) msg.msg_namelen = sizeof(sq); ret = kernel_sendmsg(qrtr_ns.sock, &msg, &iv, 1, sizeof(pkt)); - if (ret < 0) { + if (ret < 0 && ret != -ENODEV) { pr_err("failed to send bye cmd\n"); return ret; } @@ -443,7 +446,7 @@ static int ctrl_cmd_del_client(struct sockaddr_qrtr *from, msg.msg_namelen = sizeof(sq); ret = kernel_sendmsg(qrtr_ns.sock, &msg, &iv, 1, sizeof(pkt)); - if (ret < 0) { + if (ret < 0 && ret != -ENODEV) { pr_err("failed to send del client cmd\n"); return ret; } -- cgit v1.2.3-58-ga151 From 124e8c2b1b5d08a10d3a44ed082eaaf98a78c91f Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 12 Jun 2024 18:38:12 -0700 Subject: bpf: Relax tuple len requirement for sk helpers. __bpf_skc_lookup() safely handles incorrect values of tuple len, hence we can allow zero to be passed as tuple len. This patch alone doesn't make an observable verifier difference. It's a trivial improvement that might simplify bpf programs. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Eduard Zingerman Link: https://lore.kernel.org/bpf/20240613013815.953-2-alexei.starovoitov@gmail.com --- net/core/filter.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index 73722790cee3..f1c37c85b858 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6815,7 +6815,7 @@ static const struct bpf_func_proto bpf_skc_lookup_tcp_proto = { .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, - .arg3_type = ARG_CONST_SIZE, + .arg3_type = ARG_CONST_SIZE_OR_ZERO, .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, }; @@ -6834,7 +6834,7 @@ static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = { .ret_type = RET_PTR_TO_SOCKET_OR_NULL, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, - .arg3_type = ARG_CONST_SIZE, + .arg3_type = ARG_CONST_SIZE_OR_ZERO, .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, }; @@ -6853,7 +6853,7 @@ static const struct bpf_func_proto bpf_sk_lookup_udp_proto = { .ret_type = RET_PTR_TO_SOCKET_OR_NULL, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, - .arg3_type = ARG_CONST_SIZE, + .arg3_type = ARG_CONST_SIZE_OR_ZERO, .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, }; @@ -6877,7 +6877,7 @@ static const struct bpf_func_proto bpf_tc_skc_lookup_tcp_proto = { .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, - .arg3_type = ARG_CONST_SIZE, + .arg3_type = ARG_CONST_SIZE_OR_ZERO, .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, }; @@ -6901,7 +6901,7 @@ static const struct bpf_func_proto bpf_tc_sk_lookup_tcp_proto = { .ret_type = RET_PTR_TO_SOCKET_OR_NULL, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, - .arg3_type = ARG_CONST_SIZE, + .arg3_type = ARG_CONST_SIZE_OR_ZERO, .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, }; @@ -6925,7 +6925,7 @@ static const struct bpf_func_proto bpf_tc_sk_lookup_udp_proto = { .ret_type = RET_PTR_TO_SOCKET_OR_NULL, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, - .arg3_type = ARG_CONST_SIZE, + .arg3_type = ARG_CONST_SIZE_OR_ZERO, .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, }; @@ -6963,7 +6963,7 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = { .ret_type = RET_PTR_TO_SOCKET_OR_NULL, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, - .arg3_type = ARG_CONST_SIZE, + .arg3_type = ARG_CONST_SIZE_OR_ZERO, .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, }; @@ -6987,7 +6987,7 @@ static const struct bpf_func_proto bpf_xdp_skc_lookup_tcp_proto = { .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, - .arg3_type = ARG_CONST_SIZE, + .arg3_type = ARG_CONST_SIZE_OR_ZERO, .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, }; @@ -7011,7 +7011,7 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = { .ret_type = RET_PTR_TO_SOCKET_OR_NULL, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, - .arg3_type = ARG_CONST_SIZE, + .arg3_type = ARG_CONST_SIZE_OR_ZERO, .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, }; @@ -7031,7 +7031,7 @@ static const struct bpf_func_proto bpf_sock_addr_skc_lookup_tcp_proto = { .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, - .arg3_type = ARG_CONST_SIZE, + .arg3_type = ARG_CONST_SIZE_OR_ZERO, .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, }; @@ -7050,7 +7050,7 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = { .ret_type = RET_PTR_TO_SOCKET_OR_NULL, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, - .arg3_type = ARG_CONST_SIZE, + .arg3_type = ARG_CONST_SIZE_OR_ZERO, .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, }; @@ -7069,7 +7069,7 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = { .ret_type = RET_PTR_TO_SOCKET_OR_NULL, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, - .arg3_type = ARG_CONST_SIZE, + .arg3_type = ARG_CONST_SIZE_OR_ZERO, .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, }; -- cgit v1.2.3-58-ga151 From afc5625e20971e8294a35b2f284a91b0601f9fce Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 13 Jun 2024 21:21:42 +0300 Subject: atm: clean up a put_user() calls Unlike copy_from_user(), put_user() and get_user() return -EFAULT on error. Use the error code directly instead of setting it. Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/04a018e8-7433-4f67-8ddd-9357a0114f87@moroto.mountain Signed-off-by: Jakub Kicinski --- net/atm/ioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c index f81f8d56f5c0..0f7a39aeccc8 100644 --- a/net/atm/ioctl.c +++ b/net/atm/ioctl.c @@ -68,7 +68,7 @@ static int do_vcc_ioctl(struct socket *sock, unsigned int cmd, goto done; } error = put_user(sk->sk_sndbuf - sk_wmem_alloc_get(sk), - (int __user *)argp) ? -EFAULT : 0; + (int __user *)argp); goto done; case SIOCINQ: { @@ -83,7 +83,7 @@ static int do_vcc_ioctl(struct socket *sock, unsigned int cmd, skb = skb_peek(&sk->sk_receive_queue); amount = skb ? skb->len : 0; spin_unlock_irq(&sk->sk_receive_queue.lock); - error = put_user(amount, (int __user *)argp) ? -EFAULT : 0; + error = put_user(amount, (int __user *)argp); goto done; } case ATM_SETSC: -- cgit v1.2.3-58-ga151 From 934c29999b57b835d65442da6f741d5e27f3b584 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 13 Jun 2024 14:35:04 +0300 Subject: net: micro-optimize skb_datagram_iter We only use the mapping in a single context in a short and contained scope, so kmap_local_page is sufficient and cheaper. This will also allow skb_datagram_iter to be called from softirq context. Signed-off-by: Sagi Grimberg Link: https://lore.kernel.org/r/20240613113504.1079860-1-sagi@grimberg.me Signed-off-by: Jakub Kicinski --- net/core/datagram.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/datagram.c b/net/core/datagram.c index e614cfd8e14a..95f242591fd2 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -417,14 +417,14 @@ static int __skb_datagram_iter(const struct sk_buff *skb, int offset, end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { struct page *page = skb_frag_page(frag); - u8 *vaddr = kmap(page); + u8 *vaddr = kmap_local_page(page); if (copy > len) copy = len; n = INDIRECT_CALL_1(cb, simple_copy_to_iter, vaddr + skb_frag_off(frag) + offset - start, copy, data, to); - kunmap(page); + kunmap_local(vaddr); offset += n; if (n != copy) goto short_copy; -- cgit v1.2.3-58-ga151 From 54fcc6189dfb822eea984fa2b3e477a02447279d Mon Sep 17 00:00:00 2001 From: Antony Antony Date: Tue, 11 Jun 2024 08:31:29 +0200 Subject: xfrm: Fix input error path memory access When there is a misconfiguration of input state slow path KASAN report error. Fix this error. west login: [ 52.987278] eth1: renamed from veth11 [ 53.078814] eth1: renamed from veth21 [ 53.181355] eth1: renamed from veth31 [ 54.921702] ================================================================== [ 54.922602] BUG: KASAN: wild-memory-access in xfrmi_rcv_cb+0x2d/0x295 [ 54.923393] Read of size 8 at addr 6b6b6b6b00000000 by task ping/512 [ 54.924169] [ 54.924386] CPU: 0 PID: 512 Comm: ping Not tainted 6.9.0-08574-gcd29a4313a1b #25 [ 54.925290] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 [ 54.926401] Call Trace: [ 54.926731] [ 54.927009] dump_stack_lvl+0x2a/0x3b [ 54.927478] kasan_report+0x84/0xa6 [ 54.927930] ? xfrmi_rcv_cb+0x2d/0x295 [ 54.928410] xfrmi_rcv_cb+0x2d/0x295 [ 54.928872] ? xfrm4_rcv_cb+0x3d/0x5e [ 54.929354] xfrm4_rcv_cb+0x46/0x5e [ 54.929804] xfrm_rcv_cb+0x7e/0xa1 [ 54.930240] xfrm_input+0x1b3a/0x1b96 [ 54.930715] ? xfrm_offload+0x41/0x41 [ 54.931182] ? raw_rcv+0x292/0x292 [ 54.931617] ? nf_conntrack_confirm+0xa2/0xa2 [ 54.932158] ? skb_sec_path+0xd/0x3f [ 54.932610] ? xfrmi_input+0x90/0xce [ 54.933066] xfrm4_esp_rcv+0x33/0x54 [ 54.933521] ip_protocol_deliver_rcu+0xd7/0x1b2 [ 54.934089] ip_local_deliver_finish+0x110/0x120 [ 54.934659] ? ip_protocol_deliver_rcu+0x1b2/0x1b2 [ 54.935248] NF_HOOK.constprop.0+0xf8/0x138 [ 54.935767] ? ip_sublist_rcv_finish+0x68/0x68 [ 54.936317] ? secure_tcpv6_ts_off+0x23/0x168 [ 54.936859] ? ip_protocol_deliver_rcu+0x1b2/0x1b2 [ 54.937454] ? __xfrm_policy_check2.constprop.0+0x18d/0x18d [ 54.938135] NF_HOOK.constprop.0+0xf8/0x138 [ 54.938663] ? ip_sublist_rcv_finish+0x68/0x68 [ 54.939220] ? __xfrm_policy_check2.constprop.0+0x18d/0x18d [ 54.939904] ? ip_local_deliver_finish+0x120/0x120 [ 54.940497] __netif_receive_skb_one_core+0xc9/0x107 [ 54.941121] ? __netif_receive_skb_list_core+0x1c2/0x1c2 [ 54.941771] ? blk_mq_start_stopped_hw_queues+0xc7/0xf9 [ 54.942413] ? blk_mq_start_stopped_hw_queue+0x38/0x38 [ 54.943044] ? virtqueue_get_buf_ctx+0x295/0x46b [ 54.943618] process_backlog+0xb3/0x187 [ 54.944102] __napi_poll.constprop.0+0x57/0x1a7 [ 54.944669] net_rx_action+0x1cb/0x380 [ 54.945150] ? __napi_poll.constprop.0+0x1a7/0x1a7 [ 54.945744] ? vring_new_virtqueue+0x17a/0x17a [ 54.946300] ? note_interrupt+0x2cd/0x367 [ 54.946805] handle_softirqs+0x13c/0x2c9 [ 54.947300] do_softirq+0x5f/0x7d [ 54.947727] [ 54.948014] [ 54.948300] __local_bh_enable_ip+0x48/0x62 [ 54.948832] __neigh_event_send+0x3fd/0x4ca [ 54.949361] neigh_resolve_output+0x1e/0x210 [ 54.949896] ip_finish_output2+0x4bf/0x4f0 [ 54.950410] ? __ip_finish_output+0x171/0x1b8 [ 54.950956] ip_send_skb+0x25/0x57 [ 54.951390] raw_sendmsg+0xf95/0x10c0 [ 54.951850] ? check_new_pages+0x45/0x71 [ 54.952343] ? raw_hash_sk+0x21b/0x21b [ 54.952815] ? kernel_init_pages+0x42/0x51 [ 54.953337] ? prep_new_page+0x44/0x51 [ 54.953811] ? get_page_from_freelist+0x72b/0x915 [ 54.954390] ? signal_pending_state+0x77/0x77 [ 54.954936] ? preempt_count_sub+0x14/0xb3 [ 54.955450] ? __might_resched+0x8a/0x240 [ 54.955951] ? __might_sleep+0x25/0xa0 [ 54.956424] ? first_zones_zonelist+0x2c/0x43 [ 54.956977] ? __rcu_read_lock+0x2d/0x3a [ 54.957476] ? __pte_offset_map+0x32/0xa4 [ 54.957980] ? __might_resched+0x8a/0x240 [ 54.958483] ? __might_sleep+0x25/0xa0 [ 54.958963] ? inet_send_prepare+0x54/0x54 [ 54.959478] ? sock_sendmsg_nosec+0x42/0x6c [ 54.960000] sock_sendmsg_nosec+0x42/0x6c [ 54.960502] __sys_sendto+0x15d/0x1cc [ 54.960966] ? __x64_sys_getpeername+0x44/0x44 [ 54.961522] ? __handle_mm_fault+0x679/0xae4 [ 54.962068] ? find_vma+0x6b/0x8b [ 54.962497] ? find_vma_intersection+0x8a/0x8a [ 54.963052] ? handle_mm_fault+0x38/0x154 [ 54.963556] ? handle_mm_fault+0xeb/0x154 [ 54.964059] ? preempt_latency_start+0x29/0x34 [ 54.964613] ? preempt_count_sub+0x14/0xb3 [ 54.965141] ? up_read+0x4b/0x5c [ 54.965557] __x64_sys_sendto+0x76/0x82 [ 54.966041] do_syscall_64+0x69/0xd5 [ 54.966497] entry_SYSCALL_64_after_hwframe+0x4b/0x53 [ 54.967119] RIP: 0033:0x7f2d2fec9a73 [ 54.967572] Code: 8b 15 a9 83 0c 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 80 3d 71 0b 0d 00 00 41 89 ca 74 14 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 75 c3 0f 1f 40 00 55 48 83 ec 30 44 89 4c 24 [ 54.969747] RSP: 002b:00007ffe85756418 EFLAGS: 00000202 ORIG_RAX: 000000000000002c [ 54.970655] RAX: ffffffffffffffda RBX: 0000558bebad1340 RCX: 00007f2d2fec9a73 [ 54.971511] RDX: 0000000000000040 RSI: 0000558bebad73c0 RDI: 0000000000000003 [ 54.972366] RBP: 0000558bebad73c0 R08: 0000558bebad35c0 R09: 0000000000000010 [ 54.973234] R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000000040 [ 54.974091] R13: 00007ffe85757b00 R14: 0000001d00000001 R15: 0000558bebad4680 [ 54.974951] [ 54.975244] ================================================================== [ 54.976133] Disabling lock debugging due to kernel taint [ 54.976784] Oops: stack segment: 0000 [#1] PREEMPT DEBUG_PAGEALLOC KASAN [ 54.977603] CPU: 0 PID: 512 Comm: ping Tainted: G B 6.9.0-08574-gcd29a4313a1b #25 [ 54.978654] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 [ 54.979750] RIP: 0010:xfrmi_rcv_cb+0x2d/0x295 [ 54.980293] Code: 00 00 41 57 41 56 41 89 f6 41 55 41 54 55 53 48 89 fb 51 85 f6 75 31 48 89 df e8 d7 e8 ff ff 48 89 c5 48 89 c7 e8 8b a4 4f ff <48> 8b 7d 00 48 89 ee e8 eb f3 ff ff 49 89 c5 b8 01 00 00 00 4d 85 [ 54.982462] RSP: 0018:ffffc90000007990 EFLAGS: 00010282 [ 54.983099] RAX: 0000000000000001 RBX: ffff8881126e9900 RCX: fffffbfff07b77cd [ 54.983948] RDX: fffffbfff07b77cd RSI: fffffbfff07b77cd RDI: ffffffff83dbbe60 [ 54.984794] RBP: 6b6b6b6b00000000 R08: 0000000000000008 R09: 0000000000000001 [ 54.985647] R10: ffffffff83dbbe67 R11: fffffbfff07b77cc R12: 00000000ffffffff [ 54.986512] R13: 00000000ffffffff R14: 00000000ffffffff R15: 0000000000000002 [ 54.987365] FS: 00007f2d2fc0dc40(0000) GS:ffffffff82eb2000(0000) knlGS:0000000000000000 [ 54.988329] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 54.989026] CR2: 00007ffe85755ff8 CR3: 0000000109941000 CR4: 0000000000350ef0 [ 54.989897] Call Trace: [ 54.990223] [ 54.990500] ? __die_body+0x1a/0x56 [ 54.990950] ? die+0x30/0x49 [ 54.991326] ? do_trap+0x9b/0x132 [ 54.991751] ? do_error_trap+0x7d/0xaf [ 54.992223] ? exc_stack_segment+0x35/0x45 [ 54.992734] ? asm_exc_stack_segment+0x22/0x30 [ 54.993294] ? xfrmi_rcv_cb+0x2d/0x295 [ 54.993764] ? xfrm4_rcv_cb+0x3d/0x5e [ 54.994228] xfrm4_rcv_cb+0x46/0x5e [ 54.994670] xfrm_rcv_cb+0x7e/0xa1 [ 54.995106] xfrm_input+0x1b3a/0x1b96 [ 54.995572] ? xfrm_offload+0x41/0x41 [ 54.996038] ? raw_rcv+0x292/0x292 [ 54.996472] ? nf_conntrack_confirm+0xa2/0xa2 [ 54.997011] ? skb_sec_path+0xd/0x3f [ 54.997466] ? xfrmi_input+0x90/0xce [ 54.997925] xfrm4_esp_rcv+0x33/0x54 [ 54.998378] ip_protocol_deliver_rcu+0xd7/0x1b2 [ 54.998944] ip_local_deliver_finish+0x110/0x120 [ 54.999520] ? ip_protocol_deliver_rcu+0x1b2/0x1b2 [ 55.000111] NF_HOOK.constprop.0+0xf8/0x138 [ 55.000630] ? ip_sublist_rcv_finish+0x68/0x68 [ 55.001195] ? secure_tcpv6_ts_off+0x23/0x168 [ 55.001743] ? ip_protocol_deliver_rcu+0x1b2/0x1b2 [ 55.002331] ? __xfrm_policy_check2.constprop.0+0x18d/0x18d [ 55.003008] NF_HOOK.constprop.0+0xf8/0x138 [ 55.003527] ? ip_sublist_rcv_finish+0x68/0x68 [ 55.004078] ? __xfrm_policy_check2.constprop.0+0x18d/0x18d [ 55.004755] ? ip_local_deliver_finish+0x120/0x120 [ 55.005351] __netif_receive_skb_one_core+0xc9/0x107 [ 55.005972] ? __netif_receive_skb_list_core+0x1c2/0x1c2 [ 55.006626] ? blk_mq_start_stopped_hw_queues+0xc7/0xf9 [ 55.007266] ? blk_mq_start_stopped_hw_queue+0x38/0x38 [ 55.007899] ? virtqueue_get_buf_ctx+0x295/0x46b [ 55.008476] process_backlog+0xb3/0x187 [ 55.008961] __napi_poll.constprop.0+0x57/0x1a7 [ 55.009540] net_rx_action+0x1cb/0x380 [ 55.010020] ? __napi_poll.constprop.0+0x1a7/0x1a7 [ 55.010610] ? vring_new_virtqueue+0x17a/0x17a [ 55.011173] ? note_interrupt+0x2cd/0x367 [ 55.011675] handle_softirqs+0x13c/0x2c9 [ 55.012169] do_softirq+0x5f/0x7d [ 55.012597] [ 55.012882] [ 55.013179] __local_bh_enable_ip+0x48/0x62 [ 55.013704] __neigh_event_send+0x3fd/0x4ca [ 55.014227] neigh_resolve_output+0x1e/0x210 [ 55.014761] ip_finish_output2+0x4bf/0x4f0 [ 55.015278] ? __ip_finish_output+0x171/0x1b8 [ 55.015823] ip_send_skb+0x25/0x57 [ 55.016261] raw_sendmsg+0xf95/0x10c0 [ 55.016729] ? check_new_pages+0x45/0x71 [ 55.017229] ? raw_hash_sk+0x21b/0x21b [ 55.017708] ? kernel_init_pages+0x42/0x51 [ 55.018225] ? prep_new_page+0x44/0x51 [ 55.018704] ? get_page_from_freelist+0x72b/0x915 [ 55.019292] ? signal_pending_state+0x77/0x77 [ 55.019840] ? preempt_count_sub+0x14/0xb3 [ 55.020357] ? __might_resched+0x8a/0x240 [ 55.020860] ? __might_sleep+0x25/0xa0 [ 55.021345] ? first_zones_zonelist+0x2c/0x43 [ 55.021896] ? __rcu_read_lock+0x2d/0x3a [ 55.022396] ? __pte_offset_map+0x32/0xa4 [ 55.022901] ? __might_resched+0x8a/0x240 [ 55.023404] ? __might_sleep+0x25/0xa0 [ 55.023879] ? inet_send_prepare+0x54/0x54 [ 55.024391] ? sock_sendmsg_nosec+0x42/0x6c [ 55.024918] sock_sendmsg_nosec+0x42/0x6c [ 55.025428] __sys_sendto+0x15d/0x1cc [ 55.025892] ? __x64_sys_getpeername+0x44/0x44 [ 55.026441] ? __handle_mm_fault+0x679/0xae4 [ 55.026988] ? find_vma+0x6b/0x8b [ 55.027414] ? find_vma_intersection+0x8a/0x8a [ 55.027966] ? handle_mm_fault+0x38/0x154 [ 55.028470] ? handle_mm_fault+0xeb/0x154 [ 55.028972] ? preempt_latency_start+0x29/0x34 [ 55.029532] ? preempt_count_sub+0x14/0xb3 [ 55.030047] ? up_read+0x4b/0x5c [ 55.030463] __x64_sys_sendto+0x76/0x82 [ 55.030949] do_syscall_64+0x69/0xd5 [ 55.031406] entry_SYSCALL_64_after_hwframe+0x4b/0x53 [ 55.032028] RIP: 0033:0x7f2d2fec9a73 [ 55.032481] Code: 8b 15 a9 83 0c 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 80 3d 71 0b 0d 00 00 41 89 ca 74 14 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 75 c3 0f 1f 40 00 55 48 83 ec 30 44 89 4c 24 [ 55.034660] RSP: 002b:00007ffe85756418 EFLAGS: 00000202 ORIG_RAX: 000000000000002c [ 55.035567] RAX: ffffffffffffffda RBX: 0000558bebad1340 RCX: 00007f2d2fec9a73 [ 55.036424] RDX: 0000000000000040 RSI: 0000558bebad73c0 RDI: 0000000000000003 [ 55.037293] RBP: 0000558bebad73c0 R08: 0000558bebad35c0 R09: 0000000000000010 [ 55.038153] R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000000040 [ 55.039012] R13: 00007ffe85757b00 R14: 0000001d00000001 R15: 0000558bebad4680 [ 55.039871] [ 55.040167] Modules linked in: [ 55.040585] ---[ end trace 0000000000000000 ]--- [ 55.041164] RIP: 0010:xfrmi_rcv_cb+0x2d/0x295 [ 55.041714] Code: 00 00 41 57 41 56 41 89 f6 41 55 41 54 55 53 48 89 fb 51 85 f6 75 31 48 89 df e8 d7 e8 ff ff 48 89 c5 48 89 c7 e8 8b a4 4f ff <48> 8b 7d 00 48 89 ee e8 eb f3 ff ff 49 89 c5 b8 01 00 00 00 4d 85 [ 55.043889] RSP: 0018:ffffc90000007990 EFLAGS: 00010282 [ 55.044528] RAX: 0000000000000001 RBX: ffff8881126e9900 RCX: fffffbfff07b77cd [ 55.045386] RDX: fffffbfff07b77cd RSI: fffffbfff07b77cd RDI: ffffffff83dbbe60 [ 55.046250] RBP: 6b6b6b6b00000000 R08: 0000000000000008 R09: 0000000000000001 [ 55.047104] R10: ffffffff83dbbe67 R11: fffffbfff07b77cc R12: 00000000ffffffff [ 55.047960] R13: 00000000ffffffff R14: 00000000ffffffff R15: 0000000000000002 [ 55.048820] FS: 00007f2d2fc0dc40(0000) GS:ffffffff82eb2000(0000) knlGS:0000000000000000 [ 55.049805] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 55.050507] CR2: 00007ffe85755ff8 CR3: 0000000109941000 CR4: 0000000000350ef0 [ 55.051366] Kernel panic - not syncing: Fatal exception in interrupt [ 55.052136] Kernel Offset: disabled [ 55.052577] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- Fixes: 304b44f0d5a4 ("xfrm: Add dir validation to "in" data path lookup") Signed-off-by: Antony Antony Reviewed-by: Simon Horman Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_input.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index d2ea18dcb0cb..63c004103912 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -585,8 +585,11 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) } if (unlikely(x->dir && x->dir != XFRM_SA_DIR_IN)) { + secpath_reset(skb); XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEDIRERROR); + xfrm_audit_state_notfound(skb, family, spi, seq); xfrm_state_put(x); + x = NULL; goto drop; } -- cgit v1.2.3-58-ga151 From 15f5fe9e84839dcc9eaa69b08ced9d24cb464369 Mon Sep 17 00:00:00 2001 From: Antony Antony Date: Tue, 11 Jun 2024 08:32:15 +0200 Subject: xfrm: Log input direction mismatch error in one place Previously, the offload data path decrypted the packet before checking the direction, leading to error logging and packet dropping. However, dropped packets wouldn't be visible in tcpdump or audit log. With this fix, the offload path, upon noticing SA direction mismatch, will pass the packet to the stack without decrypting it. The L3 layer will then log the error, audit, and drop ESP without decrypting or decapsulating it. This also ensures that the slow path records the error and audit log, making dropped packets visible in tcpdump. Fixes: 304b44f0d5a4 ("xfrm: Add dir validation to "in" data path lookup") Signed-off-by: Antony Antony Reviewed-by: Simon Horman Signed-off-by: Steffen Klassert --- net/ipv4/esp4_offload.c | 7 +++++++ net/ipv6/esp6_offload.c | 7 +++++++ net/xfrm/xfrm_input.c | 5 ----- 3 files changed, 14 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index b3271957ad9a..3f28ecbdcaef 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -56,6 +56,13 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, x = xfrm_state_lookup(dev_net(skb->dev), skb->mark, (xfrm_address_t *)&ip_hdr(skb)->daddr, spi, IPPROTO_ESP, AF_INET); + + if (unlikely(x && x->dir && x->dir != XFRM_SA_DIR_IN)) { + /* non-offload path will record the error and audit log */ + xfrm_state_put(x); + x = NULL; + } + if (!x) goto out_reset; diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index 527b7caddbc6..919ebfabbe4e 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -83,6 +83,13 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head, x = xfrm_state_lookup(dev_net(skb->dev), skb->mark, (xfrm_address_t *)&ipv6_hdr(skb)->daddr, spi, IPPROTO_ESP, AF_INET6); + + if (unlikely(x && x->dir && x->dir != XFRM_SA_DIR_IN)) { + /* non-offload path will record the error and audit log */ + xfrm_state_put(x); + x = NULL; + } + if (!x) goto out_reset; diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 63c004103912..e95462b982b0 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -474,11 +474,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) if (encap_type < 0 || (xo && xo->flags & XFRM_GRO)) { x = xfrm_input_state(skb); - if (unlikely(x->dir && x->dir != XFRM_SA_DIR_IN)) { - XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEDIRERROR); - goto drop; - } - if (unlikely(x->km.state != XFRM_STATE_VALID)) { if (x->km.state == XFRM_STATE_ACQ) XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR); -- cgit v1.2.3-58-ga151 From d0e35656d83458d668593930f1568d464dde429c Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Fri, 14 Jun 2024 02:00:28 +0800 Subject: net/smc: refactoring initialization of smc sock This patch aims to isolate the shared components of SMC socket allocation by introducing smc_sk_init() for sock initialization and __smc_create_clcsk() for the initialization of clcsock. This is in preparation for the subsequent implementation of the AF_INET version of SMC. Signed-off-by: D. Wythe Reviewed-by: Tony Lu Reviewed-by: Wenjia Zhang Reviewed-by: Dust Li Tested-by: Niklas Schnelle Tested-by: Wenjia Zhang Signed-off-by: David S. Miller --- net/smc/af_smc.c | 86 +++++++++++++++++++++++++++++++------------------------- net/smc/smc.h | 5 ++++ 2 files changed, 53 insertions(+), 38 deletions(-) (limited to 'net') diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index c5f98c6b2561..0508ea20b3c3 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -361,25 +361,15 @@ static void smc_destruct(struct sock *sk) return; } -static struct sock *smc_sock_alloc(struct net *net, struct socket *sock, - int protocol) +void smc_sk_init(struct net *net, struct sock *sk, int protocol) { - struct smc_sock *smc; - struct proto *prot; - struct sock *sk; - - prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto; - sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0); - if (!sk) - return NULL; + struct smc_sock *smc = smc_sk(sk); - sock_init_data(sock, sk); /* sets sk_refcnt to 1 */ sk->sk_state = SMC_INIT; sk->sk_destruct = smc_destruct; sk->sk_protocol = protocol; WRITE_ONCE(sk->sk_sndbuf, 2 * READ_ONCE(net->smc.sysctl_wmem)); WRITE_ONCE(sk->sk_rcvbuf, 2 * READ_ONCE(net->smc.sysctl_rmem)); - smc = smc_sk(sk); INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); INIT_WORK(&smc->connect_work, smc_connect_work); INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work); @@ -389,6 +379,24 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock, sk->sk_prot->hash(sk); mutex_init(&smc->clcsock_release_lock); smc_init_saved_callbacks(smc); + smc->limit_smc_hs = net->smc.limit_smc_hs; + smc->use_fallback = false; /* assume rdma capability first */ + smc->fallback_rsn = 0; +} + +static struct sock *smc_sock_alloc(struct net *net, struct socket *sock, + int protocol) +{ + struct proto *prot; + struct sock *sk; + + prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto; + sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0); + if (!sk) + return NULL; + + sock_init_data(sock, sk); /* sets sk_refcnt to 1 */ + smc_sk_init(net, sk, protocol); return sk; } @@ -3303,6 +3311,31 @@ static const struct proto_ops smc_sock_ops = { .splice_read = smc_splice_read, }; +int smc_create_clcsk(struct net *net, struct sock *sk, int family) +{ + struct smc_sock *smc = smc_sk(sk); + int rc; + + rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP, + &smc->clcsock); + if (rc) { + sk_common_release(sk); + return rc; + } + + /* smc_clcsock_release() does not wait smc->clcsock->sk's + * destruction; its sk_state might not be TCP_CLOSE after + * smc->sk is close()d, and TCP timers can be fired later, + * which need net ref. + */ + sk = smc->clcsock->sk; + __netns_tracker_free(net, &sk->ns_tracker, false); + sk->sk_net_refcnt = 1; + get_net_track(net, &sk->ns_tracker, GFP_KERNEL); + sock_inuse_add(net, 1); + return 0; +} + static int __smc_create(struct net *net, struct socket *sock, int protocol, int kern, struct socket *clcsock) { @@ -3328,35 +3361,12 @@ static int __smc_create(struct net *net, struct socket *sock, int protocol, /* create internal TCP socket for CLC handshake and fallback */ smc = smc_sk(sk); - smc->use_fallback = false; /* assume rdma capability first */ - smc->fallback_rsn = 0; - - /* default behavior from limit_smc_hs in every net namespace */ - smc->limit_smc_hs = net->smc.limit_smc_hs; rc = 0; - if (!clcsock) { - rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP, - &smc->clcsock); - if (rc) { - sk_common_release(sk); - goto out; - } - - /* smc_clcsock_release() does not wait smc->clcsock->sk's - * destruction; its sk_state might not be TCP_CLOSE after - * smc->sk is close()d, and TCP timers can be fired later, - * which need net ref. - */ - sk = smc->clcsock->sk; - __netns_tracker_free(net, &sk->ns_tracker, false); - sk->sk_net_refcnt = 1; - get_net_track(net, &sk->ns_tracker, GFP_KERNEL); - sock_inuse_add(net, 1); - } else { + if (clcsock) smc->clcsock = clcsock; - } - + else + rc = smc_create_clcsk(net, sk, family); out: return rc; } diff --git a/net/smc/smc.h b/net/smc/smc.h index 18c8b7870198..3edec1e133d8 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -34,6 +34,11 @@ extern struct proto smc_proto; extern struct proto smc_proto6; +/* smc sock initialization */ +void smc_sk_init(struct net *net, struct sock *sk, int protocol); +/* clcsock initialization */ +int smc_create_clcsk(struct net *net, struct sock *sk, int family); + #ifdef ATOMIC64_INIT #define KERNEL_HAS_ATOMIC64 #endif -- cgit v1.2.3-58-ga151 From 13543d02c90d6195b31bef8fb51dfeff77c0b368 Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Fri, 14 Jun 2024 02:00:29 +0800 Subject: net/smc: expose smc proto operations Externalize smc proto operations (smc_xxx) to allow access from files other than af_smc.c This is in preparation for the subsequent implementation of the AF_INET version of SMC. Signed-off-by: D. Wythe Reviewed-by: Wenjia Zhang Reviewed-by: Dust Li Tested-by: Niklas Schnelle Tested-by: Wenjia Zhang Signed-off-by: David S. Miller --- net/smc/af_smc.c | 60 ++++++++++++++++++++++++++++---------------------------- net/smc/smc.h | 33 +++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 30 deletions(-) (limited to 'net') diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 0508ea20b3c3..7ee6f37813d6 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -170,15 +170,15 @@ static bool smc_hs_congested(const struct sock *sk) return false; } -static struct smc_hashinfo smc_v4_hashinfo = { +struct smc_hashinfo smc_v4_hashinfo = { .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock), }; -static struct smc_hashinfo smc_v6_hashinfo = { +struct smc_hashinfo smc_v6_hashinfo = { .lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock), }; -static int smc_hash_sk(struct sock *sk) +int smc_hash_sk(struct sock *sk) { struct smc_hashinfo *h = sk->sk_prot->h.smc_hash; struct hlist_head *head; @@ -193,7 +193,7 @@ static int smc_hash_sk(struct sock *sk) return 0; } -static void smc_unhash_sk(struct sock *sk) +void smc_unhash_sk(struct sock *sk) { struct smc_hashinfo *h = sk->sk_prot->h.smc_hash; @@ -207,7 +207,7 @@ static void smc_unhash_sk(struct sock *sk) * work which we didn't do because of user hold the sock_lock in the * BH context */ -static void smc_release_cb(struct sock *sk) +void smc_release_cb(struct sock *sk) { struct smc_sock *smc = smc_sk(sk); @@ -307,7 +307,7 @@ static int __smc_release(struct smc_sock *smc) return rc; } -static int smc_release(struct socket *sock) +int smc_release(struct socket *sock) { struct sock *sk = sock->sk; struct smc_sock *smc; @@ -401,8 +401,8 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock, return sk; } -static int smc_bind(struct socket *sock, struct sockaddr *uaddr, - int addr_len) +int smc_bind(struct socket *sock, struct sockaddr *uaddr, + int addr_len) { struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; struct sock *sk = sock->sk; @@ -1631,8 +1631,8 @@ out: release_sock(&smc->sk); } -static int smc_connect(struct socket *sock, struct sockaddr *addr, - int alen, int flags) +int smc_connect(struct socket *sock, struct sockaddr *addr, + int alen, int flags) { struct sock *sk = sock->sk; struct smc_sock *smc; @@ -2613,7 +2613,7 @@ out: read_unlock_bh(&listen_clcsock->sk_callback_lock); } -static int smc_listen(struct socket *sock, int backlog) +int smc_listen(struct socket *sock, int backlog) { struct sock *sk = sock->sk; struct smc_sock *smc; @@ -2678,8 +2678,8 @@ out: return rc; } -static int smc_accept(struct socket *sock, struct socket *new_sock, - struct proto_accept_arg *arg) +int smc_accept(struct socket *sock, struct socket *new_sock, + struct proto_accept_arg *arg) { struct sock *sk = sock->sk, *nsk; DECLARE_WAITQUEUE(wait, current); @@ -2748,8 +2748,8 @@ out: return rc; } -static int smc_getname(struct socket *sock, struct sockaddr *addr, - int peer) +int smc_getname(struct socket *sock, struct sockaddr *addr, + int peer) { struct smc_sock *smc; @@ -2762,7 +2762,7 @@ static int smc_getname(struct socket *sock, struct sockaddr *addr, return smc->clcsock->ops->getname(smc->clcsock, addr, peer); } -static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) +int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct smc_sock *smc; @@ -2800,8 +2800,8 @@ out: return rc; } -static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, - int flags) +int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) { struct sock *sk = sock->sk; struct smc_sock *smc; @@ -2850,8 +2850,8 @@ static __poll_t smc_accept_poll(struct sock *parent) return mask; } -static __poll_t smc_poll(struct file *file, struct socket *sock, - poll_table *wait) +__poll_t smc_poll(struct file *file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk; struct smc_sock *smc; @@ -2903,7 +2903,7 @@ static __poll_t smc_poll(struct file *file, struct socket *sock, return mask; } -static int smc_shutdown(struct socket *sock, int how) +int smc_shutdown(struct socket *sock, int how) { struct sock *sk = sock->sk; bool do_shutdown = true; @@ -3043,8 +3043,8 @@ static int __smc_setsockopt(struct socket *sock, int level, int optname, return rc; } -static int smc_setsockopt(struct socket *sock, int level, int optname, - sockptr_t optval, unsigned int optlen) +int smc_setsockopt(struct socket *sock, int level, int optname, + sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; struct smc_sock *smc; @@ -3130,8 +3130,8 @@ out: return rc; } -static int smc_getsockopt(struct socket *sock, int level, int optname, - char __user *optval, int __user *optlen) +int smc_getsockopt(struct socket *sock, int level, int optname, + char __user *optval, int __user *optlen) { struct smc_sock *smc; int rc; @@ -3156,8 +3156,8 @@ static int smc_getsockopt(struct socket *sock, int level, int optname, return rc; } -static int smc_ioctl(struct socket *sock, unsigned int cmd, - unsigned long arg) +int smc_ioctl(struct socket *sock, unsigned int cmd, + unsigned long arg) { union smc_host_cursor cons, urg; struct smc_connection *conn; @@ -3243,9 +3243,9 @@ static int smc_ioctl(struct socket *sock, unsigned int cmd, * Note that subsequent recv() calls have to wait till all splice() processing * completed. */ -static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos, - struct pipe_inode_info *pipe, size_t len, - unsigned int flags) +ssize_t smc_splice_read(struct socket *sock, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags) { struct sock *sk = sock->sk; struct smc_sock *smc; diff --git a/net/smc/smc.h b/net/smc/smc.h index 3edec1e133d8..34b781e463c4 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -34,6 +34,39 @@ extern struct proto smc_proto; extern struct proto smc_proto6; +extern struct smc_hashinfo smc_v4_hashinfo; +extern struct smc_hashinfo smc_v6_hashinfo; + +int smc_hash_sk(struct sock *sk); +void smc_unhash_sk(struct sock *sk); +void smc_release_cb(struct sock *sk); + +int smc_release(struct socket *sock); +int smc_bind(struct socket *sock, struct sockaddr *uaddr, + int addr_len); +int smc_connect(struct socket *sock, struct sockaddr *addr, + int alen, int flags); +int smc_accept(struct socket *sock, struct socket *new_sock, + struct proto_accept_arg *arg); +int smc_getname(struct socket *sock, struct sockaddr *addr, + int peer); +__poll_t smc_poll(struct file *file, struct socket *sock, + poll_table *wait); +int smc_ioctl(struct socket *sock, unsigned int cmd, + unsigned long arg); +int smc_listen(struct socket *sock, int backlog); +int smc_shutdown(struct socket *sock, int how); +int smc_setsockopt(struct socket *sock, int level, int optname, + sockptr_t optval, unsigned int optlen); +int smc_getsockopt(struct socket *sock, int level, int optname, + char __user *optval, int __user *optlen); +int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len); +int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags); +ssize_t smc_splice_read(struct socket *sock, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags); + /* smc sock initialization */ void smc_sk_init(struct net *net, struct sock *sk, int protocol); /* clcsock initialization */ -- cgit v1.2.3-58-ga151 From d25a92ccae6bed02327b63d138e12e7806830f78 Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Fri, 14 Jun 2024 02:00:30 +0800 Subject: net/smc: Introduce IPPROTO_SMC This patch allows to create smc socket via AF_INET, similar to the following code, /* create v4 smc sock */ v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC); /* create v6 smc sock */ v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC); There are several reasons why we believe it is appropriate here: 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6) address. There is no AF_SMC address at all. 2. Create smc socket in the AF_INET(6) path, which allows us to reuse the infrastructure of AF_INET(6) path, such as common ebpf hooks. Otherwise, smc have to implement it again in AF_SMC path. Signed-off-by: D. Wythe Reviewed-by: Wenjia Zhang Reviewed-by: Dust Li Tested-by: Niklas Schnelle Tested-by: Wenjia Zhang Signed-off-by: David S. Miller --- include/uapi/linux/in.h | 2 + net/smc/Makefile | 2 +- net/smc/af_smc.c | 16 ++++- net/smc/smc_inet.c | 159 ++++++++++++++++++++++++++++++++++++++++++++++++ net/smc/smc_inet.h | 22 +++++++ 5 files changed, 198 insertions(+), 3 deletions(-) create mode 100644 net/smc/smc_inet.c create mode 100644 net/smc/smc_inet.h (limited to 'net') diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index e682ab628dfa..d358add1611c 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -81,6 +81,8 @@ enum { #define IPPROTO_ETHERNET IPPROTO_ETHERNET IPPROTO_RAW = 255, /* Raw IP packets */ #define IPPROTO_RAW IPPROTO_RAW + IPPROTO_SMC = 256, /* Shared Memory Communications */ +#define IPPROTO_SMC IPPROTO_SMC IPPROTO_MPTCP = 262, /* Multipath TCP connection */ #define IPPROTO_MPTCP IPPROTO_MPTCP IPPROTO_MAX diff --git a/net/smc/Makefile b/net/smc/Makefile index 2c510d543058..60f1c87d5212 100644 --- a/net/smc/Makefile +++ b/net/smc/Makefile @@ -4,6 +4,6 @@ obj-$(CONFIG_SMC) += smc.o obj-$(CONFIG_SMC_DIAG) += smc_diag.o smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o -smc-y += smc_tracepoint.o +smc-y += smc_tracepoint.o smc_inet.o smc-$(CONFIG_SYSCTL) += smc_sysctl.o smc-$(CONFIG_SMC_LO) += smc_loopback.o diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 7ee6f37813d6..73a875573e7a 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -54,6 +54,7 @@ #include "smc_tracepoint.h" #include "smc_sysctl.h" #include "smc_loopback.h" +#include "smc_inet.h" static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group * creation on server @@ -3575,10 +3576,15 @@ static int __init smc_init(void) pr_err("%s: tcp_ulp_register fails with %d\n", __func__, rc); goto out_lo; } - + rc = smc_inet_init(); + if (rc) { + pr_err("%s: smc_inet_init fails with %d\n", __func__, rc); + goto out_ulp; + } static_branch_enable(&tcp_have_smc); return 0; - +out_ulp: + tcp_unregister_ulp(&smc_ulp_ops); out_lo: smc_loopback_exit(); out_ib: @@ -3615,6 +3621,7 @@ out_pernet_subsys: static void __exit smc_exit(void) { static_branch_disable(&tcp_have_smc); + smc_inet_exit(); tcp_unregister_ulp(&smc_ulp_ops); sock_unregister(PF_SMC); smc_core_exit(); @@ -3642,4 +3649,9 @@ MODULE_DESCRIPTION("smc socket address family"); MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_SMC); MODULE_ALIAS_TCP_ULP("smc"); +/* 256 for IPPROTO_SMC and 1 for SOCK_STREAM */ +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 256, 1); +#if IS_ENABLED(CONFIG_IPV6) +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 256, 1); +#endif /* CONFIG_IPV6 */ MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME); diff --git a/net/smc/smc_inet.c b/net/smc/smc_inet.c new file mode 100644 index 000000000000..bece346dd8e9 --- /dev/null +++ b/net/smc/smc_inet.c @@ -0,0 +1,159 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Shared Memory Communications over RDMA (SMC-R) and RoCE + * + * Definitions for the IPPROTO_SMC (socket related) + * + * Copyright IBM Corp. 2016, 2018 + * Copyright (c) 2024, Alibaba Inc. + * + * Author: D. Wythe + */ + +#include +#include + +#include "smc_inet.h" +#include "smc.h" + +static int smc_inet_init_sock(struct sock *sk); + +static struct proto smc_inet_prot = { + .name = "INET_SMC", + .owner = THIS_MODULE, + .init = smc_inet_init_sock, + .hash = smc_hash_sk, + .unhash = smc_unhash_sk, + .release_cb = smc_release_cb, + .obj_size = sizeof(struct smc_sock), + .h.smc_hash = &smc_v4_hashinfo, + .slab_flags = SLAB_TYPESAFE_BY_RCU, +}; + +static const struct proto_ops smc_inet_stream_ops = { + .family = PF_INET, + .owner = THIS_MODULE, + .release = smc_release, + .bind = smc_bind, + .connect = smc_connect, + .socketpair = sock_no_socketpair, + .accept = smc_accept, + .getname = smc_getname, + .poll = smc_poll, + .ioctl = smc_ioctl, + .listen = smc_listen, + .shutdown = smc_shutdown, + .setsockopt = smc_setsockopt, + .getsockopt = smc_getsockopt, + .sendmsg = smc_sendmsg, + .recvmsg = smc_recvmsg, + .mmap = sock_no_mmap, + .splice_read = smc_splice_read, +}; + +static struct inet_protosw smc_inet_protosw = { + .type = SOCK_STREAM, + .protocol = IPPROTO_SMC, + .prot = &smc_inet_prot, + .ops = &smc_inet_stream_ops, + .flags = INET_PROTOSW_ICSK, +}; + +#if IS_ENABLED(CONFIG_IPV6) +static struct proto smc_inet6_prot = { + .name = "INET6_SMC", + .owner = THIS_MODULE, + .init = smc_inet_init_sock, + .hash = smc_hash_sk, + .unhash = smc_unhash_sk, + .release_cb = smc_release_cb, + .obj_size = sizeof(struct smc_sock), + .h.smc_hash = &smc_v6_hashinfo, + .slab_flags = SLAB_TYPESAFE_BY_RCU, +}; + +static const struct proto_ops smc_inet6_stream_ops = { + .family = PF_INET6, + .owner = THIS_MODULE, + .release = smc_release, + .bind = smc_bind, + .connect = smc_connect, + .socketpair = sock_no_socketpair, + .accept = smc_accept, + .getname = smc_getname, + .poll = smc_poll, + .ioctl = smc_ioctl, + .listen = smc_listen, + .shutdown = smc_shutdown, + .setsockopt = smc_setsockopt, + .getsockopt = smc_getsockopt, + .sendmsg = smc_sendmsg, + .recvmsg = smc_recvmsg, + .mmap = sock_no_mmap, + .splice_read = smc_splice_read, +}; + +static struct inet_protosw smc_inet6_protosw = { + .type = SOCK_STREAM, + .protocol = IPPROTO_SMC, + .prot = &smc_inet6_prot, + .ops = &smc_inet6_stream_ops, + .flags = INET_PROTOSW_ICSK, +}; +#endif /* CONFIG_IPV6 */ + +static int smc_inet_init_sock(struct sock *sk) +{ + struct net *net = sock_net(sk); + + /* init common smc sock */ + smc_sk_init(net, sk, IPPROTO_SMC); + /* create clcsock */ + return smc_create_clcsk(net, sk, sk->sk_family); +} + +int __init smc_inet_init(void) +{ + int rc; + + rc = proto_register(&smc_inet_prot, 1); + if (rc) { + pr_err("%s: proto_register smc_inet_prot fails with %d\n", + __func__, rc); + return rc; + } + /* no return value */ + inet_register_protosw(&smc_inet_protosw); + +#if IS_ENABLED(CONFIG_IPV6) + rc = proto_register(&smc_inet6_prot, 1); + if (rc) { + pr_err("%s: proto_register smc_inet6_prot fails with %d\n", + __func__, rc); + goto out_inet6_prot; + } + rc = inet6_register_protosw(&smc_inet6_protosw); + if (rc) { + pr_err("%s: inet6_register_protosw smc_inet6_protosw fails with %d\n", + __func__, rc); + goto out_inet6_protosw; + } + return rc; +out_inet6_protosw: + proto_unregister(&smc_inet6_prot); +out_inet6_prot: + inet_unregister_protosw(&smc_inet_protosw); + proto_unregister(&smc_inet_prot); +#endif /* CONFIG_IPV6 */ + return rc; +} + +void smc_inet_exit(void) +{ +#if IS_ENABLED(CONFIG_IPV6) + inet6_unregister_protosw(&smc_inet6_protosw); + proto_unregister(&smc_inet6_prot); +#endif /* CONFIG_IPV6 */ + inet_unregister_protosw(&smc_inet_protosw); + proto_unregister(&smc_inet_prot); +} diff --git a/net/smc/smc_inet.h b/net/smc/smc_inet.h new file mode 100644 index 000000000000..a489c8a2b8ef --- /dev/null +++ b/net/smc/smc_inet.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Shared Memory Communications over RDMA (SMC-R) and RoCE + * + * Definitions for the IPPROTO_SMC (socket related) + + * Copyright IBM Corp. 2016 + * Copyright (c) 2024, Alibaba Inc. + * + * Author: D. Wythe + */ +#ifndef __INET_SMC +#define __INET_SMC + +/* Initialize protocol registration on IPPROTO_SMC, + * @return 0 on success + */ +int smc_inet_init(void); + +void smc_inet_exit(void); + +#endif /* __INET_SMC */ -- cgit v1.2.3-58-ga151 From dd89a81d850fa9a65f67b4527c0e420d15bf836c Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 14 Jun 2024 08:25:18 -0400 Subject: fou: remove warn in gue_gro_receive on unsupported protocol Drop the WARN_ON_ONCE inn gue_gro_receive if the encapsulated type is not known or does not have a GRO handler. Such a packet is easily constructed. Syzbot generates them and sets off this warning. Remove the warning as it is expected and not actionable. The warning was previously reduced from WARN_ON to WARN_ON_ONCE in commit 270136613bf7 ("fou: Do WARN_ON_ONCE in gue_gro_receive for bad proto callbacks"). Signed-off-by: Willem de Bruijn Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240614122552.1649044-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/fou_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/fou_core.c b/net/ipv4/fou_core.c index a8494f796dca..0abbc413e0fe 100644 --- a/net/ipv4/fou_core.c +++ b/net/ipv4/fou_core.c @@ -433,7 +433,7 @@ next_proto: offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; ops = rcu_dereference(offloads[proto]); - if (WARN_ON_ONCE(!ops || !ops->callbacks.gro_receive)) + if (!ops || !ops->callbacks.gro_receive) goto out; pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); -- cgit v1.2.3-58-ga151 From efb459303dd5dd6e198a0d58322dc04c3356dc23 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Wed, 12 Jun 2024 17:04:02 +0200 Subject: net: Move dev_set_hwtstamp_phylib to net/core/dev.h This declaration was added to the header to be called from ethtool. ethtool is separated from core for code organization but it is not really a separate entity, it controls very core things. As ethtool is an internal stuff it is not wise to have it in netdevice.h. Move the declaration to net/core/dev.h instead. Remove the EXPORT_SYMBOL_GPL call as ethtool can not be built as a module. Reviewed-by: Willem de Bruijn Signed-off-by: Kory Maincent Link: https://lore.kernel.org/r/20240612-feature_ptp_netnext-v15-2-b2a086257b63@bootlin.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 3 --- net/core/dev.h | 4 ++++ net/core/dev_ioctl.c | 1 - 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 85111502cf8f..c83b390191d4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3904,9 +3904,6 @@ int generic_hwtstamp_get_lower(struct net_device *dev, int generic_hwtstamp_set_lower(struct net_device *dev, struct kernel_hwtstamp_config *kernel_cfg, struct netlink_ext_ack *extack); -int dev_set_hwtstamp_phylib(struct net_device *dev, - struct kernel_hwtstamp_config *cfg, - struct netlink_ext_ack *extack); int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *userdata); unsigned int dev_get_flags(const struct net_device *); int __dev_change_flags(struct net_device *dev, unsigned int flags, diff --git a/net/core/dev.h b/net/core/dev.h index b7b518bc2be5..58f88d28bc99 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -166,4 +166,8 @@ static inline void dev_xmit_recursion_dec(void) __this_cpu_dec(softnet_data.xmit.recursion); } +int dev_set_hwtstamp_phylib(struct net_device *dev, + struct kernel_hwtstamp_config *cfg, + struct netlink_ext_ack *extack); + #endif diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 9a66cf5015f2..b9719ed3c3fd 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -363,7 +363,6 @@ int dev_set_hwtstamp_phylib(struct net_device *dev, return 0; } -EXPORT_SYMBOL_GPL(dev_set_hwtstamp_phylib); static int dev_set_hwtstamp(struct net_device *dev, struct ifreq *ifr) { -- cgit v1.2.3-58-ga151 From 9f1f70dd850038b37e8b103ef0cc6f5eaeb6a8fc Mon Sep 17 00:00:00 2001 From: Hongfu Li Date: Mon, 17 Jun 2024 15:54:35 +0800 Subject: rds:Simplify the allocation of slab caches Use the new KMEM_CACHE() macro instead of direct kmem_cache_create to simplify the creation of SLAB caches. Signed-off-by: Hongfu Li Reviewed-by: Zhu Yanjun Reviewed-by: Allison Henderson Signed-off-by: David S. Miller --- net/rds/tcp.c | 4 +--- net/rds/tcp_recv.c | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/rds/tcp.c b/net/rds/tcp.c index d8111ac83bb6..3dc6956f66f8 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -719,9 +719,7 @@ static int __init rds_tcp_init(void) { int ret; - rds_tcp_conn_slab = kmem_cache_create("rds_tcp_connection", - sizeof(struct rds_tcp_connection), - 0, 0, NULL); + rds_tcp_conn_slab = KMEM_CACHE(rds_tcp_connection, 0); if (!rds_tcp_conn_slab) { ret = -ENOMEM; goto out; diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c index c00f04a1a534..7997a19d1da3 100644 --- a/net/rds/tcp_recv.c +++ b/net/rds/tcp_recv.c @@ -337,9 +337,7 @@ out: int rds_tcp_recv_init(void) { - rds_tcp_incoming_slab = kmem_cache_create("rds_tcp_incoming", - sizeof(struct rds_tcp_incoming), - 0, 0, NULL); + rds_tcp_incoming_slab = KMEM_CACHE(rds_tcp_incoming, 0); if (!rds_tcp_incoming_slab) return -ENOMEM; return 0; -- cgit v1.2.3-58-ga151 From c53795d48ee8f385c6a9e394651e7ee914baaeba Mon Sep 17 00:00:00 2001 From: Yan Zhai Date: Mon, 17 Jun 2024 11:09:04 -0700 Subject: net: add rx_sk to trace_kfree_skb skb does not include enough information to find out receiving sockets/services and netns/containers on packet drops. In theory skb->dev tells about netns, but it can get cleared/reused, e.g. by TCP stack for OOO packet lookup. Similarly, skb->sk often identifies a local sender, and tells nothing about a receiver. Allow passing an extra receiving socket to the tracepoint to improve the visibility on receiving drops. Signed-off-by: Yan Zhai Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/trace/events/skb.h | 11 +++++++---- net/core/dev.c | 2 +- net/core/drop_monitor.c | 9 ++++++--- net/core/skbuff.c | 2 +- 4 files changed, 15 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 07e0715628ec..b877133cd93a 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -24,13 +24,14 @@ DEFINE_DROP_REASON(FN, FN) TRACE_EVENT(kfree_skb, TP_PROTO(struct sk_buff *skb, void *location, - enum skb_drop_reason reason), + enum skb_drop_reason reason, struct sock *rx_sk), - TP_ARGS(skb, location, reason), + TP_ARGS(skb, location, reason, rx_sk), TP_STRUCT__entry( __field(void *, skbaddr) __field(void *, location) + __field(void *, rx_sk) __field(unsigned short, protocol) __field(enum skb_drop_reason, reason) ), @@ -38,12 +39,14 @@ TRACE_EVENT(kfree_skb, TP_fast_assign( __entry->skbaddr = skb; __entry->location = location; + __entry->rx_sk = rx_sk; __entry->protocol = ntohs(skb->protocol); __entry->reason = reason; ), - TP_printk("skbaddr=%p protocol=%u location=%pS reason: %s", - __entry->skbaddr, __entry->protocol, __entry->location, + TP_printk("skbaddr=%p rx_sk=%p protocol=%u location=%pS reason: %s", + __entry->skbaddr, __entry->rx_sk, __entry->protocol, + __entry->location, __print_symbolic(__entry->reason, DEFINE_DROP_REASON(FN, FNe))) ); diff --git a/net/core/dev.c b/net/core/dev.c index c361a7b69da8..093d82bf0e28 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5234,7 +5234,7 @@ static __latent_entropy void net_tx_action(struct softirq_action *h) trace_consume_skb(skb, net_tx_action); else trace_kfree_skb(skb, net_tx_action, - get_kfree_skb_cb(skb)->reason); + get_kfree_skb_cb(skb)->reason, NULL); if (skb->fclone != SKB_FCLONE_UNAVAILABLE) __kfree_skb(skb); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 430ed18f8584..2e0ae3328232 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -109,7 +109,8 @@ static u32 net_dm_queue_len = 1000; struct net_dm_alert_ops { void (*kfree_skb_probe)(void *ignore, struct sk_buff *skb, void *location, - enum skb_drop_reason reason); + enum skb_drop_reason reason, + struct sock *rx_sk); void (*napi_poll_probe)(void *ignore, struct napi_struct *napi, int work, int budget); void (*work_item_func)(struct work_struct *work); @@ -264,7 +265,8 @@ out: static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location, - enum skb_drop_reason reason) + enum skb_drop_reason reason, + struct sock *rx_sk) { trace_drop_common(skb, location); } @@ -491,7 +493,8 @@ static const struct net_dm_alert_ops net_dm_alert_summary_ops = { static void net_dm_packet_trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location, - enum skb_drop_reason reason) + enum skb_drop_reason reason, + struct sock *rx_sk) { ktime_t tstamp = ktime_get_real(); struct per_cpu_dm_data *data; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c8ac79851cd6..8973db4eabd4 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1203,7 +1203,7 @@ bool __kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason) if (reason == SKB_CONSUMED) trace_consume_skb(skb, __builtin_return_address(0)); else - trace_kfree_skb(skb, __builtin_return_address(0), reason); + trace_kfree_skb(skb, __builtin_return_address(0), reason, NULL); return true; } -- cgit v1.2.3-58-ga151 From ba8de796baf4bdc03530774fb284fe3c97875566 Mon Sep 17 00:00:00 2001 From: Yan Zhai Date: Mon, 17 Jun 2024 11:09:09 -0700 Subject: net: introduce sk_skb_reason_drop function Long used destructors kfree_skb and kfree_skb_reason do not pass receiving socket to packet drop tracepoints trace_kfree_skb. This makes it hard to track packet drops of a certain netns (container) or a socket (user application). The naming of these destructors are also not consistent with most sk/skb operating functions, i.e. functions named "sk_xxx" or "skb_xxx". Introduce a new functions sk_skb_reason_drop as drop-in replacement for kfree_skb_reason on local receiving path. Callers can now pass receiving sockets to the tracepoints. kfree_skb and kfree_skb_reason are still usable but they are now just inline helpers that call sk_skb_reason_drop. Note it is not feasible to do the same to consume_skb. Packets not dropped can flow through multiple receive handlers, and have multiple receiving sockets. Leave it untouched for now. Suggested-by: Eric Dumazet Signed-off-by: Yan Zhai Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/linux/skbuff.h | 10 ++++++++-- net/core/skbuff.c | 22 ++++++++++++---------- 2 files changed, 20 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 813406a9bd6c..f4cda3fbdb75 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1251,8 +1251,14 @@ static inline bool skb_data_unref(const struct sk_buff *skb, return true; } -void __fix_address -kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason); +void __fix_address sk_skb_reason_drop(struct sock *sk, struct sk_buff *skb, + enum skb_drop_reason reason); + +static inline void +kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason) +{ + sk_skb_reason_drop(NULL, skb, reason); +} /** * kfree_skb - free an sk_buff with 'NOT_SPECIFIED' reason diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8973db4eabd4..2315c088e91d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1190,7 +1190,8 @@ void __kfree_skb(struct sk_buff *skb) EXPORT_SYMBOL(__kfree_skb); static __always_inline -bool __kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason) +bool __sk_skb_reason_drop(struct sock *sk, struct sk_buff *skb, + enum skb_drop_reason reason) { if (unlikely(!skb_unref(skb))) return false; @@ -1203,26 +1204,27 @@ bool __kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason) if (reason == SKB_CONSUMED) trace_consume_skb(skb, __builtin_return_address(0)); else - trace_kfree_skb(skb, __builtin_return_address(0), reason, NULL); + trace_kfree_skb(skb, __builtin_return_address(0), reason, sk); return true; } /** - * kfree_skb_reason - free an sk_buff with special reason + * sk_skb_reason_drop - free an sk_buff with special reason + * @sk: the socket to receive @skb, or NULL if not applicable * @skb: buffer to free * @reason: reason why this skb is dropped * - * Drop a reference to the buffer and free it if the usage count has - * hit zero. Meanwhile, pass the drop reason to 'kfree_skb' - * tracepoint. + * Drop a reference to the buffer and free it if the usage count has hit + * zero. Meanwhile, pass the receiving socket and drop reason to + * 'kfree_skb' tracepoint. */ void __fix_address -kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason) +sk_skb_reason_drop(struct sock *sk, struct sk_buff *skb, enum skb_drop_reason reason) { - if (__kfree_skb_reason(skb, reason)) + if (__sk_skb_reason_drop(sk, skb, reason)) __kfree_skb(skb); } -EXPORT_SYMBOL(kfree_skb_reason); +EXPORT_SYMBOL(sk_skb_reason_drop); #define KFREE_SKB_BULK_SIZE 16 @@ -1261,7 +1263,7 @@ kfree_skb_list_reason(struct sk_buff *segs, enum skb_drop_reason reason) while (segs) { struct sk_buff *next = segs->next; - if (__kfree_skb_reason(segs, reason)) { + if (__sk_skb_reason_drop(NULL, segs, reason)) { skb_poison_list(segs); kfree_skb_add_bulk(segs, &sa, reason); } -- cgit v1.2.3-58-ga151 From 7467de17635fb9d240987a0b9574fb2283a53b73 Mon Sep 17 00:00:00 2001 From: Yan Zhai Date: Mon, 17 Jun 2024 11:09:13 -0700 Subject: ping: use sk_skb_reason_drop to free rx packets Replace kfree_skb_reason with sk_skb_reason_drop and pass the receiving socket to the tracepoint. Signed-off-by: Yan Zhai Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/ipv4/ping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 823306487a82..619ddc087957 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -946,7 +946,7 @@ static enum skb_drop_reason __ping_queue_rcv_skb(struct sock *sk, pr_debug("ping_queue_rcv_skb(sk=%p,sk->num=%d,skb=%p)\n", inet_sk(sk), inet_sk(sk)->inet_num, skb); if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) { - kfree_skb_reason(skb, reason); + sk_skb_reason_drop(sk, skb, reason); pr_debug("ping_queue_rcv_skb -> failed\n"); return reason; } -- cgit v1.2.3-58-ga151 From ce9a2424e9da2cd4e790f2498621bc2aa5e5d298 Mon Sep 17 00:00:00 2001 From: Yan Zhai Date: Mon, 17 Jun 2024 11:09:16 -0700 Subject: net: raw: use sk_skb_reason_drop to free rx packets Replace kfree_skb_reason with sk_skb_reason_drop and pass the receiving socket to the tracepoint. Signed-off-by: Yan Zhai Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/ipv4/raw.c | 4 ++-- net/ipv6/raw.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 1a0953650356..474dfd263c8b 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -301,7 +301,7 @@ static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb) ipv4_pktinfo_prepare(sk, skb, true); if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) { - kfree_skb_reason(skb, reason); + sk_skb_reason_drop(sk, skb, reason); return NET_RX_DROP; } @@ -312,7 +312,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb) { if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) { atomic_inc(&sk->sk_drops); - kfree_skb_reason(skb, SKB_DROP_REASON_XFRM_POLICY); + sk_skb_reason_drop(sk, skb, SKB_DROP_REASON_XFRM_POLICY); return NET_RX_DROP; } nf_reset_ct(skb); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index f838366e8256..608fa9d05b55 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -362,14 +362,14 @@ static inline int rawv6_rcv_skb(struct sock *sk, struct sk_buff *skb) if ((raw6_sk(sk)->checksum || rcu_access_pointer(sk->sk_filter)) && skb_checksum_complete(skb)) { atomic_inc(&sk->sk_drops); - kfree_skb_reason(skb, SKB_DROP_REASON_SKB_CSUM); + sk_skb_reason_drop(sk, skb, SKB_DROP_REASON_SKB_CSUM); return NET_RX_DROP; } /* Charge it to the socket. */ skb_dst_drop(skb); if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) { - kfree_skb_reason(skb, reason); + sk_skb_reason_drop(sk, skb, reason); return NET_RX_DROP; } @@ -390,7 +390,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { atomic_inc(&sk->sk_drops); - kfree_skb_reason(skb, SKB_DROP_REASON_XFRM_POLICY); + sk_skb_reason_drop(sk, skb, SKB_DROP_REASON_XFRM_POLICY); return NET_RX_DROP; } nf_reset_ct(skb); @@ -415,7 +415,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) if (inet_test_bit(HDRINCL, sk)) { if (skb_checksum_complete(skb)) { atomic_inc(&sk->sk_drops); - kfree_skb_reason(skb, SKB_DROP_REASON_SKB_CSUM); + sk_skb_reason_drop(sk, skb, SKB_DROP_REASON_SKB_CSUM); return NET_RX_DROP; } } -- cgit v1.2.3-58-ga151 From 46a02aa357529d7b038096955976b14f7c44aa23 Mon Sep 17 00:00:00 2001 From: Yan Zhai Date: Mon, 17 Jun 2024 11:09:20 -0700 Subject: tcp: use sk_skb_reason_drop to free rx packets Replace kfree_skb_reason with sk_skb_reason_drop and pass the receiving socket to the tracepoint. Reported-by: kernel test robot Closes: https://lore.kernel.org/r/202406011539.jhwBd7DX-lkp@intel.com/ Signed-off-by: Yan Zhai Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/ipv4/syncookies.c | 2 +- net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_ipv4.c | 6 +++--- net/ipv6/syncookies.c | 2 +- net/ipv6/tcp_ipv6.c | 6 +++--- 5 files changed, 9 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index b61d36810fe3..1948d15f1f28 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -496,6 +496,6 @@ out: out_free: reqsk_free(req); out_drop: - kfree_skb_reason(skb, reason); + sk_skb_reason_drop(sk, skb, reason); return NULL; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d0a1e34d69f6..f513d1f927ad 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4860,7 +4860,7 @@ static void tcp_drop_reason(struct sock *sk, struct sk_buff *skb, enum skb_drop_reason reason) { sk_drops_add(sk, skb); - kfree_skb_reason(skb, reason); + sk_skb_reason_drop(sk, skb, reason); } /* This one checks to see if we can put data from the diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index de0c8f43448a..8e49d69279d5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1932,7 +1932,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) reset: tcp_v4_send_reset(rsk, skb, sk_rst_convert_drop_reason(reason)); discard: - kfree_skb_reason(skb, reason); + sk_skb_reason_drop(sk, skb, reason); /* Be careful here. If this function gets more complicated and * gcc suffers from register pressure on the x86, sk (in %ebx) * might be destroyed here. This current version compiles correctly, @@ -2168,8 +2168,8 @@ int tcp_v4_rcv(struct sk_buff *skb) int dif = inet_iif(skb); const struct iphdr *iph; const struct tcphdr *th; + struct sock *sk = NULL; bool refcounted; - struct sock *sk; int ret; u32 isn; @@ -2368,7 +2368,7 @@ bad_packet: discard_it: SKB_DR_OR(drop_reason, NOT_SPECIFIED); /* Discard frame. */ - kfree_skb_reason(skb, drop_reason); + sk_skb_reason_drop(sk, skb, drop_reason); return 0; discard_and_relse: diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index bfad1e89b6a6..9d83eadd308b 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -275,6 +275,6 @@ out: out_free: reqsk_free(req); out_drop: - kfree_skb_reason(skb, reason); + sk_skb_reason_drop(sk, skb, reason); return NULL; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 223b71790e44..200fea92f12f 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1674,7 +1674,7 @@ reset: discard: if (opt_skb) __kfree_skb(opt_skb); - kfree_skb_reason(skb, reason); + sk_skb_reason_drop(sk, skb, reason); return 0; csum_err: reason = SKB_DROP_REASON_TCP_CSUM; @@ -1747,8 +1747,8 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) int dif = inet6_iif(skb); const struct tcphdr *th; const struct ipv6hdr *hdr; + struct sock *sk = NULL; bool refcounted; - struct sock *sk; int ret; u32 isn; struct net *net = dev_net(skb->dev); @@ -1940,7 +1940,7 @@ bad_packet: discard_it: SKB_DR_OR(drop_reason, NOT_SPECIFIED); - kfree_skb_reason(skb, drop_reason); + sk_skb_reason_drop(sk, skb, drop_reason); return 0; discard_and_relse: -- cgit v1.2.3-58-ga151 From fc0cc9248843b37243fa5fd3287a121ec41d291f Mon Sep 17 00:00:00 2001 From: Yan Zhai Date: Mon, 17 Jun 2024 11:09:24 -0700 Subject: udp: use sk_skb_reason_drop to free rx packets Replace kfree_skb_reason with sk_skb_reason_drop and pass the receiving socket to the tracepoint. Reported-by: kernel test robot Closes: https://lore.kernel.org/r/202406011751.NpVN0sSk-lkp@intel.com/ Signed-off-by: Yan Zhai Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/ipv4/udp.c | 10 +++++----- net/ipv6/udp.c | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c9ca6d285347..d08bf16d476d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2074,7 +2074,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) } UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); trace_udp_fail_queue_rcv_skb(rc, sk, skb); - kfree_skb_reason(skb, drop_reason); + sk_skb_reason_drop(sk, skb, drop_reason); return -1; } @@ -2196,7 +2196,7 @@ csum_error: drop: __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); atomic_inc(&sk->sk_drops); - kfree_skb_reason(skb, drop_reason); + sk_skb_reason_drop(sk, skb, drop_reason); return -1; } @@ -2383,7 +2383,7 @@ static int udp_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb, int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, int proto) { - struct sock *sk; + struct sock *sk = NULL; struct udphdr *uh; unsigned short ulen; struct rtable *rt = skb_rtable(skb); @@ -2460,7 +2460,7 @@ no_sk: * Hmm. We got an UDP packet to a port to which we * don't wanna listen. Ignore it. */ - kfree_skb_reason(skb, drop_reason); + sk_skb_reason_drop(sk, skb, drop_reason); return 0; short_packet: @@ -2485,7 +2485,7 @@ csum_error: __UDP_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); drop: __UDP_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); - kfree_skb_reason(skb, drop_reason); + sk_skb_reason_drop(sk, skb, drop_reason); return 0; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index c81a07ac0463..b56f0b9f4307 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -673,7 +673,7 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) } UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); trace_udp_fail_queue_rcv_skb(rc, sk, skb); - kfree_skb_reason(skb, drop_reason); + sk_skb_reason_drop(sk, skb, drop_reason); return -1; } @@ -776,7 +776,7 @@ csum_error: drop: __UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); atomic_inc(&sk->sk_drops); - kfree_skb_reason(skb, drop_reason); + sk_skb_reason_drop(sk, skb, drop_reason); return -1; } @@ -940,8 +940,8 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; const struct in6_addr *saddr, *daddr; struct net *net = dev_net(skb->dev); + struct sock *sk = NULL; struct udphdr *uh; - struct sock *sk; bool refcounted; u32 ulen = 0; @@ -1033,7 +1033,7 @@ no_sk: __UDP6_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); - kfree_skb_reason(skb, reason); + sk_skb_reason_drop(sk, skb, reason); return 0; short_packet: @@ -1054,7 +1054,7 @@ csum_error: __UDP6_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); discard: __UDP6_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); - kfree_skb_reason(skb, reason); + sk_skb_reason_drop(sk, skb, reason); return 0; } -- cgit v1.2.3-58-ga151 From e2e7d78d9a25c78dc829da400bcec857b8c41b78 Mon Sep 17 00:00:00 2001 From: Yan Zhai Date: Mon, 17 Jun 2024 11:09:27 -0700 Subject: af_packet: use sk_skb_reason_drop to free rx packets Replace kfree_skb_reason with sk_skb_reason_drop and pass the receiving socket to the tracepoint. Reported-by: kernel test robot Closes: https://lore.kernel.org/r/202406011859.Aacus8GV-lkp@intel.com/ Signed-off-by: Yan Zhai Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/packet/af_packet.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index fce390887591..42d29b8a84fc 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2121,7 +2121,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { enum skb_drop_reason drop_reason = SKB_CONSUMED; - struct sock *sk; + struct sock *sk = NULL; struct sockaddr_ll *sll; struct packet_sock *po; u8 *skb_head = skb->data; @@ -2226,7 +2226,7 @@ drop_n_restore: skb->len = skb_len; } drop: - kfree_skb_reason(skb, drop_reason); + sk_skb_reason_drop(sk, skb, drop_reason); return 0; } @@ -2234,7 +2234,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { enum skb_drop_reason drop_reason = SKB_CONSUMED; - struct sock *sk; + struct sock *sk = NULL; struct packet_sock *po; struct sockaddr_ll *sll; union tpacket_uhdr h; @@ -2494,7 +2494,7 @@ drop_n_restore: skb->len = skb_len; } drop: - kfree_skb_reason(skb, drop_reason); + sk_skb_reason_drop(sk, skb, drop_reason); return 0; drop_n_account: @@ -2503,7 +2503,7 @@ drop_n_account: drop_reason = SKB_DROP_REASON_PACKET_SOCK_ERROR; sk->sk_data_ready(sk); - kfree_skb_reason(copy_skb, drop_reason); + sk_skb_reason_drop(sk, copy_skb, drop_reason); goto drop_n_restore; } -- cgit v1.2.3-58-ga151 From 89f5e607772b4d6e467adf8bbe530a5036abb415 Mon Sep 17 00:00:00 2001 From: Lukasz Majewski Date: Tue, 18 Jun 2024 14:58:17 +0200 Subject: net: hsr: cosmetic: Remove extra white space This change just removes extra (i.e. not needed) white space in prp_drop_frame() function. No functional changes. Signed-off-by: Lukasz Majewski Reviewed-by: Hangbin Liu Link: https://lore.kernel.org/r/20240618125817.1111070-1-lukma@denx.de Signed-off-by: Jakub Kicinski --- net/hsr/hsr_forward.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index 960ef386bc3a..b38060246e62 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -421,9 +421,9 @@ static int hsr_xmit(struct sk_buff *skb, struct hsr_port *port, bool prp_drop_frame(struct hsr_frame_info *frame, struct hsr_port *port) { return ((frame->port_rcv->type == HSR_PT_SLAVE_A && - port->type == HSR_PT_SLAVE_B) || + port->type == HSR_PT_SLAVE_B) || (frame->port_rcv->type == HSR_PT_SLAVE_B && - port->type == HSR_PT_SLAVE_A)); + port->type == HSR_PT_SLAVE_A)); } bool hsr_drop_frame(struct hsr_frame_info *frame, struct hsr_port *port) -- cgit v1.2.3-58-ga151 From ba63a7e08523be3496cc1b7e5f77d306144a3a40 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Sat, 20 Apr 2024 21:47:46 +0200 Subject: can: isotp: remove ISO 15675-2 specification version where possible With the new ISO 15765-2:2024 release the former documentation and comments have to be reworked. This patch removes the ISO specification version/date where possible. Signed-off-by: Oliver Hartkopp Acked-by: Vincent Mailhol Acked-by: Francesco Valla Link: https://lore.kernel.org/all/20240420194746.4885-1-socketcan@hartkopp.net Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/isotp.h | 2 +- net/can/Kconfig | 11 +++++------ net/can/isotp.c | 11 ++++++----- 3 files changed, 12 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/include/uapi/linux/can/isotp.h b/include/uapi/linux/can/isotp.h index 6cde62371b6f..bd990917f7c4 100644 --- a/include/uapi/linux/can/isotp.h +++ b/include/uapi/linux/can/isotp.h @@ -2,7 +2,7 @@ /* * linux/can/isotp.h * - * Definitions for isotp CAN sockets (ISO 15765-2:2016) + * Definitions for ISO 15765-2 CAN transport protocol sockets * * Copyright (c) 2020 Volkswagen Group Electronic Research * All rights reserved. diff --git a/net/can/Kconfig b/net/can/Kconfig index cb56be8e3862..af64a6f76458 100644 --- a/net/can/Kconfig +++ b/net/can/Kconfig @@ -56,18 +56,17 @@ config CAN_GW source "net/can/j1939/Kconfig" config CAN_ISOTP - tristate "ISO 15765-2:2016 CAN transport protocol" + tristate "ISO 15765-2 CAN transport protocol" help CAN Transport Protocols offer support for segmented Point-to-Point communication between CAN nodes via two defined CAN Identifiers. + This protocol driver implements segmented data transfers for CAN CC + (aka Classical CAN, CAN 2.0B) and CAN FD frame types which were + introduced with ISO 15765-2:2016. As CAN frames can only transport a small amount of data bytes - (max. 8 bytes for 'classic' CAN and max. 64 bytes for CAN FD) this + (max. 8 bytes for CAN CC and max. 64 bytes for CAN FD) this segmentation is needed to transport longer Protocol Data Units (PDU) as needed e.g. for vehicle diagnosis (UDS, ISO 14229) or IP-over-CAN traffic. - This protocol driver implements data transfers according to - ISO 15765-2:2016 for 'classic' CAN and CAN FD frame types. - If you want to perform automotive vehicle diagnostic services (UDS), - say 'y'. endif diff --git a/net/can/isotp.c b/net/can/isotp.c index 25bac0fafc83..16046931542a 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -72,7 +72,7 @@ #include #include -MODULE_DESCRIPTION("PF_CAN isotp 15765-2:2016 protocol"); +MODULE_DESCRIPTION("PF_CAN ISO 15765-2 transport protocol"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Oliver Hartkopp "); MODULE_ALIAS("can-proto-6"); @@ -83,10 +83,11 @@ MODULE_ALIAS("can-proto-6"); (CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG) : \ (CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG)) -/* ISO 15765-2:2016 supports more than 4095 byte per ISO PDU as the FF_DL can - * take full 32 bit values (4 Gbyte). We would need some good concept to handle - * this between user space and kernel space. For now set the static buffer to - * something about 8 kbyte to be able to test this new functionality. +/* Since ISO 15765-2:2016 the CAN isotp protocol supports more than 4095 + * byte per ISO PDU as the FF_DL can take full 32 bit values (4 Gbyte). + * We would need some good concept to handle this between user space and + * kernel space. For now set the static buffer to something about 8 kbyte + * to be able to test this new functionality. */ #define DEFAULT_MAX_PDU_SIZE 8300 -- cgit v1.2.3-58-ga151 From a744e2d03a91507646ffff8a03a19a2f34a6798a Mon Sep 17 00:00:00 2001 From: James Chapman Date: Thu, 20 Jun 2024 12:22:37 +0100 Subject: l2tp: remove unused list_head member in l2tp_tunnel Remove an unused variable in struct l2tp_tunnel which was left behind by commit c4d48a58f32c5 ("l2tp: convert l2tp_tunnel_list to idr"). Signed-off-by: James Chapman Reviewed-by: Tom Parkin Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 2 -- net/l2tp/l2tp_core.h | 1 - 2 files changed, 3 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 88a34db265d8..69f8c9f5cdc7 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1462,8 +1462,6 @@ int l2tp_tunnel_create(int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, /* Init delete workqueue struct */ INIT_WORK(&tunnel->del_work, l2tp_tunnel_del_work); - INIT_LIST_HEAD(&tunnel->list); - err = 0; err: if (tunnelp) diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 91ebf0a3f499..54dfba1eb91c 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -174,7 +174,6 @@ struct l2tp_tunnel { enum l2tp_encap_type encap; struct l2tp_stats stats; - struct list_head list; /* list node on per-namespace list of tunnels */ struct net *l2tp_net; /* the net we belong to */ refcount_t ref_count; -- cgit v1.2.3-58-ga151 From aa5e17e1f5ecb68d3f67a069f7345dbf1a8f274f Mon Sep 17 00:00:00 2001 From: James Chapman Date: Thu, 20 Jun 2024 12:22:38 +0100 Subject: l2tp: store l2tpv3 sessions in per-net IDR L2TPv3 sessions are currently held in one of two fixed-size hash lists: either a per-net hashlist (IP-encap), or a per-tunnel hashlist (UDP-encap), keyed by the L2TPv3 32-bit session_id. In order to lookup L2TPv3 sessions in UDP-encap tunnels efficiently without finding the tunnel first via sk_user_data, UDP sessions are now kept in a per-net session list, keyed by session ID. Convert the existing per-net hashlist to use an IDR for better performance when there are many sessions and have L2TPv3 UDP sessions use the same IDR. Although the L2TPv3 RFC states that the session ID alone identifies the session, our implementation has allowed the same session ID to be used in different L2TP UDP tunnels. To retain support for this, a new per-net session hashtable is used, keyed by the sock and session ID. If on creating a new session, a session already exists with that ID in the IDR, the colliding sessions are added to the new hashtable and the existing IDR entry is flagged. When looking up sessions, the approach is to first check the IDR and if no unflagged match is found, check the new hashtable. The sock is made available to session getters where session ID collisions are to be considered. In this way, the new hashtable is used only for session ID collisions so can be kept small. For managing session removal, we need a list of colliding sessions matching a given ID in order to update or remove the IDR entry of the ID. This is necessary to detect session ID collisions when future sessions are created. The list head is allocated on first collision of a given ID and refcounted. Signed-off-by: James Chapman Reviewed-by: Tom Parkin Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 240 +++++++++++++++++++++++++++++++++++++-------------- net/l2tp/l2tp_core.h | 18 ++-- net/l2tp/l2tp_ip.c | 2 +- net/l2tp/l2tp_ip6.c | 2 +- 4 files changed, 188 insertions(+), 74 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 69f8c9f5cdc7..d6bffdb16466 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -107,11 +107,17 @@ struct l2tp_net { /* Lock for write access to l2tp_tunnel_idr */ spinlock_t l2tp_tunnel_idr_lock; struct idr l2tp_tunnel_idr; - struct hlist_head l2tp_session_hlist[L2TP_HASH_SIZE_2]; - /* Lock for write access to l2tp_session_hlist */ - spinlock_t l2tp_session_hlist_lock; + /* Lock for write access to l2tp_v3_session_idr/htable */ + spinlock_t l2tp_session_idr_lock; + struct idr l2tp_v3_session_idr; + struct hlist_head l2tp_v3_session_htable[16]; }; +static inline unsigned long l2tp_v3_session_hashkey(struct sock *sk, u32 session_id) +{ + return ((unsigned long)sk) + session_id; +} + #if IS_ENABLED(CONFIG_IPV6) static bool l2tp_sk_is_v6(struct sock *sk) { @@ -125,17 +131,6 @@ static inline struct l2tp_net *l2tp_pernet(const struct net *net) return net_generic(net, l2tp_net_id); } -/* Session hash global list for L2TPv3. - * The session_id SHOULD be random according to RFC3931, but several - * L2TP implementations use incrementing session_ids. So we do a real - * hash on the session_id, rather than a simple bitmask. - */ -static inline struct hlist_head * -l2tp_session_id_hash_2(struct l2tp_net *pn, u32 session_id) -{ - return &pn->l2tp_session_hlist[hash_32(session_id, L2TP_HASH_BITS_2)]; -} - /* Session hash list. * The session_id SHOULD be random according to RFC2661, but several * L2TP implementations (Cisco and Microsoft) use incrementing @@ -262,26 +257,40 @@ struct l2tp_session *l2tp_tunnel_get_session(struct l2tp_tunnel *tunnel, } EXPORT_SYMBOL_GPL(l2tp_tunnel_get_session); -struct l2tp_session *l2tp_session_get(const struct net *net, u32 session_id) +struct l2tp_session *l2tp_v3_session_get(const struct net *net, struct sock *sk, u32 session_id) { - struct hlist_head *session_list; + const struct l2tp_net *pn = l2tp_pernet(net); struct l2tp_session *session; - session_list = l2tp_session_id_hash_2(l2tp_pernet(net), session_id); - rcu_read_lock_bh(); - hlist_for_each_entry_rcu(session, session_list, global_hlist) - if (session->session_id == session_id) { - l2tp_session_inc_refcount(session); - rcu_read_unlock_bh(); + session = idr_find(&pn->l2tp_v3_session_idr, session_id); + if (session && !hash_hashed(&session->hlist) && + refcount_inc_not_zero(&session->ref_count)) { + rcu_read_unlock_bh(); + return session; + } - return session; + /* If we get here and session is non-NULL, the session_id + * collides with one in another tunnel. If sk is non-NULL, + * find the session matching sk. + */ + if (session && sk) { + unsigned long key = l2tp_v3_session_hashkey(sk, session->session_id); + + hash_for_each_possible_rcu(pn->l2tp_v3_session_htable, session, + hlist, key) { + if (session->tunnel->sock == sk && + refcount_inc_not_zero(&session->ref_count)) { + rcu_read_unlock_bh(); + return session; + } } + } rcu_read_unlock_bh(); return NULL; } -EXPORT_SYMBOL_GPL(l2tp_session_get); +EXPORT_SYMBOL_GPL(l2tp_v3_session_get); struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth) { @@ -313,12 +322,12 @@ struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net, const char *ifname) { struct l2tp_net *pn = l2tp_pernet(net); - int hash; + unsigned long session_id, tmp; struct l2tp_session *session; rcu_read_lock_bh(); - for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) { - hlist_for_each_entry_rcu(session, &pn->l2tp_session_hlist[hash], global_hlist) { + idr_for_each_entry_ul(&pn->l2tp_v3_session_idr, session, tmp, session_id) { + if (session) { if (!strcmp(session->ifname, ifname)) { l2tp_session_inc_refcount(session); rcu_read_unlock_bh(); @@ -334,13 +343,106 @@ struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net, } EXPORT_SYMBOL_GPL(l2tp_session_get_by_ifname); +static void l2tp_session_coll_list_add(struct l2tp_session_coll_list *clist, + struct l2tp_session *session) +{ + l2tp_session_inc_refcount(session); + WARN_ON_ONCE(session->coll_list); + session->coll_list = clist; + spin_lock(&clist->lock); + list_add(&session->clist, &clist->list); + spin_unlock(&clist->lock); +} + +static int l2tp_session_collision_add(struct l2tp_net *pn, + struct l2tp_session *session1, + struct l2tp_session *session2) +{ + struct l2tp_session_coll_list *clist; + + lockdep_assert_held(&pn->l2tp_session_idr_lock); + + if (!session2) + return -EEXIST; + + /* If existing session is in IP-encap tunnel, refuse new session */ + if (session2->tunnel->encap == L2TP_ENCAPTYPE_IP) + return -EEXIST; + + clist = session2->coll_list; + if (!clist) { + /* First collision. Allocate list to manage the collided sessions + * and add the existing session to the list. + */ + clist = kmalloc(sizeof(*clist), GFP_ATOMIC); + if (!clist) + return -ENOMEM; + + spin_lock_init(&clist->lock); + INIT_LIST_HEAD(&clist->list); + refcount_set(&clist->ref_count, 1); + l2tp_session_coll_list_add(clist, session2); + } + + /* If existing session isn't already in the session hlist, add it. */ + if (!hash_hashed(&session2->hlist)) + hash_add(pn->l2tp_v3_session_htable, &session2->hlist, + session2->hlist_key); + + /* Add new session to the hlist and collision list */ + hash_add(pn->l2tp_v3_session_htable, &session1->hlist, + session1->hlist_key); + refcount_inc(&clist->ref_count); + l2tp_session_coll_list_add(clist, session1); + + return 0; +} + +static void l2tp_session_collision_del(struct l2tp_net *pn, + struct l2tp_session *session) +{ + struct l2tp_session_coll_list *clist = session->coll_list; + unsigned long session_key = session->session_id; + struct l2tp_session *session2; + + lockdep_assert_held(&pn->l2tp_session_idr_lock); + + hash_del(&session->hlist); + + if (clist) { + /* Remove session from its collision list. If there + * are other sessions with the same ID, replace this + * session's IDR entry with that session, otherwise + * remove the IDR entry. If this is the last session, + * the collision list data is freed. + */ + spin_lock(&clist->lock); + list_del_init(&session->clist); + session2 = list_first_entry_or_null(&clist->list, struct l2tp_session, clist); + if (session2) { + void *old = idr_replace(&pn->l2tp_v3_session_idr, session2, session_key); + + WARN_ON_ONCE(IS_ERR_VALUE(old)); + } else { + void *removed = idr_remove(&pn->l2tp_v3_session_idr, session_key); + + WARN_ON_ONCE(removed != session); + } + session->coll_list = NULL; + spin_unlock(&clist->lock); + if (refcount_dec_and_test(&clist->ref_count)) + kfree(clist); + l2tp_session_dec_refcount(session); + } +} + int l2tp_session_register(struct l2tp_session *session, struct l2tp_tunnel *tunnel) { + struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net); struct l2tp_session *session_walk; - struct hlist_head *g_head; struct hlist_head *head; - struct l2tp_net *pn; + u32 session_key; int err; head = l2tp_session_id_hash(tunnel, session->session_id); @@ -358,39 +460,45 @@ int l2tp_session_register(struct l2tp_session *session, } if (tunnel->version == L2TP_HDR_VER_3) { - pn = l2tp_pernet(tunnel->l2tp_net); - g_head = l2tp_session_id_hash_2(pn, session->session_id); - - spin_lock_bh(&pn->l2tp_session_hlist_lock); - + session_key = session->session_id; + spin_lock_bh(&pn->l2tp_session_idr_lock); + err = idr_alloc_u32(&pn->l2tp_v3_session_idr, NULL, + &session_key, session_key, GFP_ATOMIC); /* IP encap expects session IDs to be globally unique, while - * UDP encap doesn't. + * UDP encap doesn't. This isn't per the RFC, which says that + * sessions are identified only by the session ID, but is to + * support existing userspace which depends on it. */ - hlist_for_each_entry(session_walk, g_head, global_hlist) - if (session_walk->session_id == session->session_id && - (session_walk->tunnel->encap == L2TP_ENCAPTYPE_IP || - tunnel->encap == L2TP_ENCAPTYPE_IP)) { - err = -EEXIST; - goto err_tlock_pnlock; - } + if (err == -ENOSPC && tunnel->encap == L2TP_ENCAPTYPE_UDP) { + struct l2tp_session *session2; - l2tp_tunnel_inc_refcount(tunnel); - hlist_add_head_rcu(&session->global_hlist, g_head); - - spin_unlock_bh(&pn->l2tp_session_hlist_lock); - } else { - l2tp_tunnel_inc_refcount(tunnel); + session2 = idr_find(&pn->l2tp_v3_session_idr, + session_key); + err = l2tp_session_collision_add(pn, session, session2); + } + spin_unlock_bh(&pn->l2tp_session_idr_lock); + if (err == -ENOSPC) + err = -EEXIST; } + if (err) + goto err_tlock; + + l2tp_tunnel_inc_refcount(tunnel); + hlist_add_head_rcu(&session->hlist, head); spin_unlock_bh(&tunnel->hlist_lock); + if (tunnel->version == L2TP_HDR_VER_3) { + spin_lock_bh(&pn->l2tp_session_idr_lock); + idr_replace(&pn->l2tp_v3_session_idr, session, session_key); + spin_unlock_bh(&pn->l2tp_session_idr_lock); + } + trace_register_session(session); return 0; -err_tlock_pnlock: - spin_unlock_bh(&pn->l2tp_session_hlist_lock); err_tlock: spin_unlock_bh(&tunnel->hlist_lock); @@ -1218,13 +1326,19 @@ static void l2tp_session_unhash(struct l2tp_session *session) hlist_del_init_rcu(&session->hlist); spin_unlock_bh(&tunnel->hlist_lock); - /* For L2TPv3 we have a per-net hash: remove from there, too */ - if (tunnel->version != L2TP_HDR_VER_2) { + /* For L2TPv3 we have a per-net IDR: remove from there, too */ + if (tunnel->version == L2TP_HDR_VER_3) { struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net); - - spin_lock_bh(&pn->l2tp_session_hlist_lock); - hlist_del_init_rcu(&session->global_hlist); - spin_unlock_bh(&pn->l2tp_session_hlist_lock); + struct l2tp_session *removed = session; + + spin_lock_bh(&pn->l2tp_session_idr_lock); + if (hash_hashed(&session->hlist)) + l2tp_session_collision_del(pn, session); + else + removed = idr_remove(&pn->l2tp_v3_session_idr, + session->session_id); + WARN_ON_ONCE(removed && removed != session); + spin_unlock_bh(&pn->l2tp_session_idr_lock); } synchronize_rcu(); @@ -1649,8 +1763,9 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn skb_queue_head_init(&session->reorder_q); + session->hlist_key = l2tp_v3_session_hashkey(tunnel->sock, session->session_id); INIT_HLIST_NODE(&session->hlist); - INIT_HLIST_NODE(&session->global_hlist); + INIT_LIST_HEAD(&session->clist); if (cfg) { session->pwtype = cfg->pw_type; @@ -1683,15 +1798,12 @@ EXPORT_SYMBOL_GPL(l2tp_session_create); static __net_init int l2tp_init_net(struct net *net) { struct l2tp_net *pn = net_generic(net, l2tp_net_id); - int hash; idr_init(&pn->l2tp_tunnel_idr); spin_lock_init(&pn->l2tp_tunnel_idr_lock); - for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) - INIT_HLIST_HEAD(&pn->l2tp_session_hlist[hash]); - - spin_lock_init(&pn->l2tp_session_hlist_lock); + idr_init(&pn->l2tp_v3_session_idr); + spin_lock_init(&pn->l2tp_session_idr_lock); return 0; } @@ -1701,7 +1813,6 @@ static __net_exit void l2tp_exit_net(struct net *net) struct l2tp_net *pn = l2tp_pernet(net); struct l2tp_tunnel *tunnel = NULL; unsigned long tunnel_id, tmp; - int hash; rcu_read_lock_bh(); idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) { @@ -1714,8 +1825,7 @@ static __net_exit void l2tp_exit_net(struct net *net) flush_workqueue(l2tp_wq); rcu_barrier(); - for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) - WARN_ON_ONCE(!hlist_empty(&pn->l2tp_session_hlist[hash])); + idr_destroy(&pn->l2tp_v3_session_idr); idr_destroy(&pn->l2tp_tunnel_idr); } diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 54dfba1eb91c..bfccc4ca2644 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -23,10 +23,6 @@ #define L2TP_HASH_BITS 4 #define L2TP_HASH_SIZE BIT(L2TP_HASH_BITS) -/* System-wide session hash table size */ -#define L2TP_HASH_BITS_2 8 -#define L2TP_HASH_SIZE_2 BIT(L2TP_HASH_BITS_2) - struct sk_buff; struct l2tp_stats { @@ -61,6 +57,12 @@ struct l2tp_session_cfg { char *ifname; }; +struct l2tp_session_coll_list { + spinlock_t lock; /* for access to list */ + struct list_head list; + refcount_t ref_count; +}; + /* Represents a session (pseudowire) instance. * Tracks runtime state including cookies, dataplane packet sequencing, and IO statistics. * Is linked into a per-tunnel session hashlist; and in the case of an L2TPv3 session into @@ -88,8 +90,11 @@ struct l2tp_session { u32 nr_oos; /* NR of last OOS packet */ int nr_oos_count; /* for OOS recovery */ int nr_oos_count_max; - struct hlist_node hlist; /* hash list node */ refcount_t ref_count; + struct hlist_node hlist; /* per-net session hlist */ + unsigned long hlist_key; /* key for session hlist */ + struct l2tp_session_coll_list *coll_list; /* session collision list */ + struct list_head clist; /* for coll_list */ char name[L2TP_SESSION_NAME_MAX]; /* for logging */ char ifname[IFNAMSIZ]; @@ -102,7 +107,6 @@ struct l2tp_session { int reorder_skip; /* set if skip to next nr */ enum l2tp_pwtype pwtype; struct l2tp_stats stats; - struct hlist_node global_hlist; /* global hash list node */ /* Session receive handler for data packets. * Each pseudowire implementation should implement this callback in order to @@ -226,7 +230,7 @@ struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth); struct l2tp_session *l2tp_tunnel_get_session(struct l2tp_tunnel *tunnel, u32 session_id); -struct l2tp_session *l2tp_session_get(const struct net *net, u32 session_id); +struct l2tp_session *l2tp_v3_session_get(const struct net *net, struct sock *sk, u32 session_id); struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth); struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net, const char *ifname); diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 19c8cc5289d5..e48aa177d74c 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -140,7 +140,7 @@ static int l2tp_ip_recv(struct sk_buff *skb) } /* Ok, this is a data packet. Lookup the session. */ - session = l2tp_session_get(net, session_id); + session = l2tp_v3_session_get(net, NULL, session_id); if (!session) goto discard; diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 8780ec64f376..d217ff1f229e 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -150,7 +150,7 @@ static int l2tp_ip6_recv(struct sk_buff *skb) } /* Ok, this is a data packet. Lookup the session. */ - session = l2tp_session_get(net, session_id); + session = l2tp_v3_session_get(net, NULL, session_id); if (!session) goto discard; -- cgit v1.2.3-58-ga151 From 2a3339f6c9636aa39f2493865e4664df1ef2baed Mon Sep 17 00:00:00 2001 From: James Chapman Date: Thu, 20 Jun 2024 12:22:39 +0100 Subject: l2tp: store l2tpv2 sessions in per-net IDR L2TPv2 sessions are currently kept in a per-tunnel hashlist, keyed by 16-bit session_id. When handling received L2TPv2 packets, we need to first derive the tunnel using the 16-bit tunnel_id or sock, then lookup the session in a per-tunnel hlist using the 16-bit session_id. We want to avoid using sk_user_data in the datapath and double lookups on every packet. So instead, use a per-net IDR to hold L2TPv2 sessions, keyed by a 32-bit value derived from the 16-bit tunnel_id and session_id. This will allow the L2TPv2 UDP receive datapath to lookup a session with a single lookup without deriving the tunnel first. L2TPv2 sessions are held in their own IDR to avoid potential key collisions with L2TPv3 sessions. Signed-off-by: James Chapman Reviewed-by: Tom Parkin Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 70 +++++++++++++++++++++++++++++++++++++++++----------- net/l2tp/l2tp_core.h | 1 + 2 files changed, 56 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index d6bffdb16466..6f30b347fd46 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -107,12 +107,18 @@ struct l2tp_net { /* Lock for write access to l2tp_tunnel_idr */ spinlock_t l2tp_tunnel_idr_lock; struct idr l2tp_tunnel_idr; - /* Lock for write access to l2tp_v3_session_idr/htable */ + /* Lock for write access to l2tp_v[23]_session_idr/htable */ spinlock_t l2tp_session_idr_lock; + struct idr l2tp_v2_session_idr; struct idr l2tp_v3_session_idr; struct hlist_head l2tp_v3_session_htable[16]; }; +static inline u32 l2tp_v2_session_key(u16 tunnel_id, u16 session_id) +{ + return ((u32)tunnel_id) << 16 | session_id; +} + static inline unsigned long l2tp_v3_session_hashkey(struct sock *sk, u32 session_id) { return ((unsigned long)sk) + session_id; @@ -292,6 +298,24 @@ struct l2tp_session *l2tp_v3_session_get(const struct net *net, struct sock *sk, } EXPORT_SYMBOL_GPL(l2tp_v3_session_get); +struct l2tp_session *l2tp_v2_session_get(const struct net *net, u16 tunnel_id, u16 session_id) +{ + u32 session_key = l2tp_v2_session_key(tunnel_id, session_id); + const struct l2tp_net *pn = l2tp_pernet(net); + struct l2tp_session *session; + + rcu_read_lock_bh(); + session = idr_find(&pn->l2tp_v2_session_idr, session_key); + if (session && refcount_inc_not_zero(&session->ref_count)) { + rcu_read_unlock_bh(); + return session; + } + rcu_read_unlock_bh(); + + return NULL; +} +EXPORT_SYMBOL_GPL(l2tp_v2_session_get); + struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth) { int hash; @@ -477,23 +501,32 @@ int l2tp_session_register(struct l2tp_session *session, err = l2tp_session_collision_add(pn, session, session2); } spin_unlock_bh(&pn->l2tp_session_idr_lock); - if (err == -ENOSPC) - err = -EEXIST; + } else { + session_key = l2tp_v2_session_key(tunnel->tunnel_id, + session->session_id); + spin_lock_bh(&pn->l2tp_session_idr_lock); + err = idr_alloc_u32(&pn->l2tp_v2_session_idr, NULL, + &session_key, session_key, GFP_ATOMIC); + spin_unlock_bh(&pn->l2tp_session_idr_lock); } - if (err) + if (err) { + if (err == -ENOSPC) + err = -EEXIST; goto err_tlock; + } l2tp_tunnel_inc_refcount(tunnel); hlist_add_head_rcu(&session->hlist, head); spin_unlock_bh(&tunnel->hlist_lock); - if (tunnel->version == L2TP_HDR_VER_3) { - spin_lock_bh(&pn->l2tp_session_idr_lock); + spin_lock_bh(&pn->l2tp_session_idr_lock); + if (tunnel->version == L2TP_HDR_VER_3) idr_replace(&pn->l2tp_v3_session_idr, session, session_key); - spin_unlock_bh(&pn->l2tp_session_idr_lock); - } + else + idr_replace(&pn->l2tp_v2_session_idr, session, session_key); + spin_unlock_bh(&pn->l2tp_session_idr_lock); trace_register_session(session); @@ -1321,25 +1354,30 @@ static void l2tp_session_unhash(struct l2tp_session *session) /* Remove the session from core hashes */ if (tunnel) { + struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net); + struct l2tp_session *removed = session; + /* Remove from the per-tunnel hash */ spin_lock_bh(&tunnel->hlist_lock); hlist_del_init_rcu(&session->hlist); spin_unlock_bh(&tunnel->hlist_lock); - /* For L2TPv3 we have a per-net IDR: remove from there, too */ + /* Remove from per-net IDR */ + spin_lock_bh(&pn->l2tp_session_idr_lock); if (tunnel->version == L2TP_HDR_VER_3) { - struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net); - struct l2tp_session *removed = session; - - spin_lock_bh(&pn->l2tp_session_idr_lock); if (hash_hashed(&session->hlist)) l2tp_session_collision_del(pn, session); else removed = idr_remove(&pn->l2tp_v3_session_idr, session->session_id); - WARN_ON_ONCE(removed && removed != session); - spin_unlock_bh(&pn->l2tp_session_idr_lock); + } else { + u32 session_key = l2tp_v2_session_key(tunnel->tunnel_id, + session->session_id); + removed = idr_remove(&pn->l2tp_v2_session_idr, + session_key); } + WARN_ON_ONCE(removed && removed != session); + spin_unlock_bh(&pn->l2tp_session_idr_lock); synchronize_rcu(); } @@ -1802,6 +1840,7 @@ static __net_init int l2tp_init_net(struct net *net) idr_init(&pn->l2tp_tunnel_idr); spin_lock_init(&pn->l2tp_tunnel_idr_lock); + idr_init(&pn->l2tp_v2_session_idr); idr_init(&pn->l2tp_v3_session_idr); spin_lock_init(&pn->l2tp_session_idr_lock); @@ -1825,6 +1864,7 @@ static __net_exit void l2tp_exit_net(struct net *net) flush_workqueue(l2tp_wq); rcu_barrier(); + idr_destroy(&pn->l2tp_v2_session_idr); idr_destroy(&pn->l2tp_v3_session_idr); idr_destroy(&pn->l2tp_tunnel_idr); } diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index bfccc4ca2644..d80f15f5b9fc 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -231,6 +231,7 @@ struct l2tp_session *l2tp_tunnel_get_session(struct l2tp_tunnel *tunnel, u32 session_id); struct l2tp_session *l2tp_v3_session_get(const struct net *net, struct sock *sk, u32 session_id); +struct l2tp_session *l2tp_v2_session_get(const struct net *net, u16 tunnel_id, u16 session_id); struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth); struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net, const char *ifname); -- cgit v1.2.3-58-ga151 From ff6a2ac23cb027ff9980d633412db17d5f7a1e7c Mon Sep 17 00:00:00 2001 From: James Chapman Date: Thu, 20 Jun 2024 12:22:40 +0100 Subject: l2tp: refactor udp recv to lookup to not use sk_user_data Modify UDP decap to not use the tunnel pointer which comes from the sock's sk_user_data when parsing the L2TP header. By looking up the destination session using only the packet contents we avoid potential UDP 5-tuple aliasing issues which arise from depending on the socket that received the packet. Drop the useless error messages on short packet or on failing to find a session since the tunnel pointer might point to a different tunnel if multiple sockets use the same 5-tuple. Short packets (those not big enough to contain an L2TP header) are no longer counted in the tunnel's invalid counter because we can't derive the tunnel until we parse the l2tp header to lookup the session. l2tp_udp_encap_recv was a small wrapper around l2tp_udp_recv_core which used sk_user_data to derive a tunnel pointer in an RCU-safe way. But we no longer need the tunnel pointer, so remove that code and combine the two functions. Signed-off-by: James Chapman Reviewed-by: Tom Parkin Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 96 ++++++++++++---------------------------------------- 1 file changed, 21 insertions(+), 75 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 6f30b347fd46..2c6378a9f384 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -926,19 +926,14 @@ static void l2tp_session_queue_purge(struct l2tp_session *session) } } -/* Internal UDP receive frame. Do the real work of receiving an L2TP data frame - * here. The skb is not on a list when we get here. - * Returns 0 if the packet was a data packet and was successfully passed on. - * Returns 1 if the packet was not a good data packet and could not be - * forwarded. All such packets are passed up to userspace to deal with. - */ -static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) +/* UDP encapsulation receive handler. See net/ipv4/udp.c for details. */ +int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct l2tp_session *session = NULL; - struct l2tp_tunnel *orig_tunnel = tunnel; + struct l2tp_tunnel *tunnel = NULL; + struct net *net = sock_net(sk); unsigned char *ptr, *optr; u16 hdrflags; - u32 tunnel_id, session_id; u16 version; int length; @@ -948,11 +943,8 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) __skb_pull(skb, sizeof(struct udphdr)); /* Short packet? */ - if (!pskb_may_pull(skb, L2TP_HDR_SIZE_MAX)) { - pr_debug_ratelimited("%s: recv short packet (len=%d)\n", - tunnel->name, skb->len); - goto invalid; - } + if (!pskb_may_pull(skb, L2TP_HDR_SIZE_MAX)) + goto pass; /* Point to L2TP header */ optr = skb->data; @@ -975,6 +967,8 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) ptr += 2; if (version == L2TP_HDR_VER_2) { + u16 tunnel_id, session_id; + /* If length is present, skip it */ if (hdrflags & L2TP_HDRFLAG_L) ptr += 2; @@ -982,49 +976,35 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) /* Extract tunnel and session ID */ tunnel_id = ntohs(*(__be16 *)ptr); ptr += 2; - - if (tunnel_id != tunnel->tunnel_id) { - /* We are receiving trafic for another tunnel, probably - * because we have several tunnels between the same - * IP/port quadruple, look it up. - */ - struct l2tp_tunnel *alt_tunnel; - - alt_tunnel = l2tp_tunnel_get(tunnel->l2tp_net, tunnel_id); - if (!alt_tunnel) - goto pass; - tunnel = alt_tunnel; - } - session_id = ntohs(*(__be16 *)ptr); ptr += 2; + + session = l2tp_v2_session_get(net, tunnel_id, session_id); } else { + u32 session_id; + ptr += 2; /* skip reserved bits */ - tunnel_id = tunnel->tunnel_id; session_id = ntohl(*(__be32 *)ptr); ptr += 4; - } - /* Check protocol version */ - if (version != tunnel->version) { - pr_debug_ratelimited("%s: recv protocol version mismatch: got %d expected %d\n", - tunnel->name, version, tunnel->version); - goto invalid; + session = l2tp_v3_session_get(net, sk, session_id); } - /* Find the session context */ - session = l2tp_tunnel_get_session(tunnel, session_id); if (!session || !session->recv_skb) { if (session) l2tp_session_dec_refcount(session); /* Not found? Pass to userspace to deal with */ - pr_debug_ratelimited("%s: no session found (%u/%u). Passing up.\n", - tunnel->name, tunnel_id, session_id); goto pass; } - if (tunnel->version == L2TP_HDR_VER_3 && + tunnel = session->tunnel; + + /* Check protocol version */ + if (version != tunnel->version) + goto invalid; + + if (version == L2TP_HDR_VER_3 && l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) { l2tp_session_dec_refcount(session); goto invalid; @@ -1033,9 +1013,6 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) l2tp_recv_common(session, skb, ptr, optr, hdrflags, length); l2tp_session_dec_refcount(session); - if (tunnel != orig_tunnel) - l2tp_tunnel_dec_refcount(tunnel); - return 0; invalid: @@ -1045,42 +1022,11 @@ pass: /* Put UDP header back */ __skb_push(skb, sizeof(struct udphdr)); - if (tunnel != orig_tunnel) - l2tp_tunnel_dec_refcount(tunnel); - - return 1; -} - -/* UDP encapsulation receive and error receive handlers. - * See net/ipv4/udp.c for details. - * - * Note that these functions are called from inside an - * RCU-protected region, but without the socket being locked. - * - * Hence we use rcu_dereference_sk_user_data to access the - * tunnel data structure rather the usual l2tp_sk_to_tunnel - * accessor function. - */ -int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) -{ - struct l2tp_tunnel *tunnel; - - tunnel = rcu_dereference_sk_user_data(sk); - if (!tunnel) - goto pass_up; - if (WARN_ON(tunnel->magic != L2TP_TUNNEL_MAGIC)) - goto pass_up; - - if (l2tp_udp_recv_core(tunnel, skb)) - goto pass_up; - - return 0; - -pass_up: return 1; } EXPORT_SYMBOL_GPL(l2tp_udp_encap_recv); +/* UDP encapsulation receive error handler. See net/ipv4/udp.c for details. */ static void l2tp_udp_encap_err_recv(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload) { -- cgit v1.2.3-58-ga151 From c37e0138ca5f3be6b69c3020470aecb94eb5d773 Mon Sep 17 00:00:00 2001 From: James Chapman Date: Thu, 20 Jun 2024 12:22:41 +0100 Subject: l2tp: don't use sk_user_data in l2tp_udp_encap_err_recv If UDP sockets are aliased, sk might be the wrong socket. There's no benefit to using sk_user_data to do some checks on the associated tunnel context. Just report the error anyway, like udp core does. Signed-off-by: James Chapman Reviewed-by: Tom Parkin Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 2c6378a9f384..cbc5de1373cd 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1030,12 +1030,6 @@ EXPORT_SYMBOL_GPL(l2tp_udp_encap_recv); static void l2tp_udp_encap_err_recv(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload) { - struct l2tp_tunnel *tunnel; - - tunnel = rcu_dereference_sk_user_data(sk); - if (!tunnel || tunnel->fd < 0) - return; - sk->sk_err = err; sk_error_report(sk); -- cgit v1.2.3-58-ga151 From 5f77c18ea55601822f9c495135a5b5d4b499d647 Mon Sep 17 00:00:00 2001 From: James Chapman Date: Thu, 20 Jun 2024 12:22:42 +0100 Subject: l2tp: use IDR for all session lookups Add generic session getter which uses IDR. Replace all users of l2tp_tunnel_get_session which uses the per-tunnel session list to use the generic getter. Signed-off-by: James Chapman Reviewed-by: Tom Parkin Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 10 ++++++++++ net/l2tp/l2tp_core.h | 2 ++ net/l2tp/l2tp_netlink.c | 6 ++++-- net/l2tp/l2tp_ppp.c | 6 ++++-- 4 files changed, 20 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index cbc5de1373cd..0e826a0260fe 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -316,6 +316,16 @@ struct l2tp_session *l2tp_v2_session_get(const struct net *net, u16 tunnel_id, u } EXPORT_SYMBOL_GPL(l2tp_v2_session_get); +struct l2tp_session *l2tp_session_get(const struct net *net, struct sock *sk, int pver, + u32 tunnel_id, u32 session_id) +{ + if (pver == L2TP_HDR_VER_2) + return l2tp_v2_session_get(net, tunnel_id, session_id); + else + return l2tp_v3_session_get(net, sk, session_id); +} +EXPORT_SYMBOL_GPL(l2tp_session_get); + struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth) { int hash; diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index d80f15f5b9fc..0e7c9b0bcc1e 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -232,6 +232,8 @@ struct l2tp_session *l2tp_tunnel_get_session(struct l2tp_tunnel *tunnel, struct l2tp_session *l2tp_v3_session_get(const struct net *net, struct sock *sk, u32 session_id); struct l2tp_session *l2tp_v2_session_get(const struct net *net, u16 tunnel_id, u16 session_id); +struct l2tp_session *l2tp_session_get(const struct net *net, struct sock *sk, int pver, + u32 tunnel_id, u32 session_id); struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth); struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net, const char *ifname); diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index a901fd14fe3b..d105030520f9 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -61,7 +61,8 @@ static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info) session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]); tunnel = l2tp_tunnel_get(net, tunnel_id); if (tunnel) { - session = l2tp_tunnel_get_session(tunnel, session_id); + session = l2tp_session_get(net, tunnel->sock, tunnel->version, + tunnel_id, session_id); l2tp_tunnel_dec_refcount(tunnel); } } @@ -635,7 +636,8 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf &cfg); if (ret >= 0) { - session = l2tp_tunnel_get_session(tunnel, session_id); + session = l2tp_session_get(net, tunnel->sock, tunnel->version, + tunnel_id, session_id); if (session) { ret = l2tp_session_notify(&l2tp_nl_family, info, session, L2TP_CMD_SESSION_CREATE); diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 6146e4e67bbb..3596290047b2 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -753,7 +753,8 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, if (tunnel->peer_tunnel_id == 0) tunnel->peer_tunnel_id = info.peer_tunnel_id; - session = l2tp_tunnel_get_session(tunnel, info.session_id); + session = l2tp_session_get(sock_net(sk), tunnel->sock, tunnel->version, + info.tunnel_id, info.session_id); if (session) { drop_refcnt = true; @@ -1045,7 +1046,8 @@ static int pppol2tp_tunnel_copy_stats(struct pppol2tp_ioc_stats *stats, /* If session_id is set, search the corresponding session in the * context of this tunnel and record the session's statistics. */ - session = l2tp_tunnel_get_session(tunnel, stats->session_id); + session = l2tp_session_get(tunnel->l2tp_net, tunnel->sock, tunnel->version, + tunnel->tunnel_id, stats->session_id); if (!session) return -EBADR; -- cgit v1.2.3-58-ga151 From 8c6245af4fc5b6d244fb0f953d493e848d1e1387 Mon Sep 17 00:00:00 2001 From: James Chapman Date: Thu, 20 Jun 2024 12:22:43 +0100 Subject: l2tp: drop the now unused l2tp_tunnel_get_session All users of l2tp_tunnel_get_session are now gone so it can be removed. Signed-off-by: James Chapman Reviewed-by: Tom Parkin Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 22 ---------------------- net/l2tp/l2tp_core.h | 2 -- 2 files changed, 24 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 0e826a0260fe..3ce689331542 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -241,28 +241,6 @@ struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth) } EXPORT_SYMBOL_GPL(l2tp_tunnel_get_nth); -struct l2tp_session *l2tp_tunnel_get_session(struct l2tp_tunnel *tunnel, - u32 session_id) -{ - struct hlist_head *session_list; - struct l2tp_session *session; - - session_list = l2tp_session_id_hash(tunnel, session_id); - - rcu_read_lock_bh(); - hlist_for_each_entry_rcu(session, session_list, hlist) - if (session->session_id == session_id) { - l2tp_session_inc_refcount(session); - rcu_read_unlock_bh(); - - return session; - } - rcu_read_unlock_bh(); - - return NULL; -} -EXPORT_SYMBOL_GPL(l2tp_tunnel_get_session); - struct l2tp_session *l2tp_v3_session_get(const struct net *net, struct sock *sk, u32 session_id) { const struct l2tp_net *pn = l2tp_pernet(net); diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 0e7c9b0bcc1e..bfff69f2e0a2 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -227,8 +227,6 @@ void l2tp_session_dec_refcount(struct l2tp_session *session); */ struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id); struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth); -struct l2tp_session *l2tp_tunnel_get_session(struct l2tp_tunnel *tunnel, - u32 session_id); struct l2tp_session *l2tp_v3_session_get(const struct net *net, struct sock *sk, u32 session_id); struct l2tp_session *l2tp_v2_session_get(const struct net *net, u16 tunnel_id, u16 session_id); -- cgit v1.2.3-58-ga151 From d18d3f0a24fc4a3513495892ab1a3753628b341b Mon Sep 17 00:00:00 2001 From: James Chapman Date: Thu, 20 Jun 2024 12:22:44 +0100 Subject: l2tp: replace hlist with simple list for per-tunnel session list The per-tunnel session list is no longer used by the datapath. However, we still need a list of sessions in the tunnel for l2tp_session_get_nth, which is used by management code. (An alternative might be to walk each session IDR list, matching only sessions of a given tunnel.) Replace the per-tunnel hlist with a per-tunnel list. In functions which walk a list of sessions of a tunnel, walk this list instead. Signed-off-by: James Chapman Reviewed-by: Tom Parkin Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 109 +++++++++++++++++------------------------------- net/l2tp/l2tp_core.h | 19 ++++----- net/l2tp/l2tp_debugfs.c | 13 +++--- 3 files changed, 50 insertions(+), 91 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 3ce689331542..be4bcbf291a1 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include #include @@ -137,18 +136,6 @@ static inline struct l2tp_net *l2tp_pernet(const struct net *net) return net_generic(net, l2tp_net_id); } -/* Session hash list. - * The session_id SHOULD be random according to RFC2661, but several - * L2TP implementations (Cisco and Microsoft) use incrementing - * session_ids. So we do a real hash on the session_id, rather than a - * simple bitmask. - */ -static inline struct hlist_head * -l2tp_session_id_hash(struct l2tp_tunnel *tunnel, u32 session_id) -{ - return &tunnel->session_hlist[hash_32(session_id, L2TP_HASH_BITS)]; -} - static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel) { trace_free_tunnel(tunnel); @@ -306,21 +293,17 @@ EXPORT_SYMBOL_GPL(l2tp_session_get); struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth) { - int hash; struct l2tp_session *session; int count = 0; rcu_read_lock_bh(); - for (hash = 0; hash < L2TP_HASH_SIZE; hash++) { - hlist_for_each_entry_rcu(session, &tunnel->session_hlist[hash], hlist) { - if (++count > nth) { - l2tp_session_inc_refcount(session); - rcu_read_unlock_bh(); - return session; - } + list_for_each_entry_rcu(session, &tunnel->session_list, list) { + if (++count > nth) { + l2tp_session_inc_refcount(session); + rcu_read_unlock_bh(); + return session; } } - rcu_read_unlock_bh(); return NULL; @@ -334,21 +317,23 @@ struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net, const char *ifname) { struct l2tp_net *pn = l2tp_pernet(net); - unsigned long session_id, tmp; + unsigned long tunnel_id, tmp; struct l2tp_session *session; + struct l2tp_tunnel *tunnel; rcu_read_lock_bh(); - idr_for_each_entry_ul(&pn->l2tp_v3_session_idr, session, tmp, session_id) { - if (session) { - if (!strcmp(session->ifname, ifname)) { - l2tp_session_inc_refcount(session); - rcu_read_unlock_bh(); - - return session; + idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) { + if (tunnel) { + list_for_each_entry_rcu(session, &tunnel->session_list, list) { + if (!strcmp(session->ifname, ifname)) { + l2tp_session_inc_refcount(session); + rcu_read_unlock_bh(); + + return session; + } } } } - rcu_read_unlock_bh(); return NULL; @@ -452,25 +437,15 @@ int l2tp_session_register(struct l2tp_session *session, struct l2tp_tunnel *tunnel) { struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net); - struct l2tp_session *session_walk; - struct hlist_head *head; u32 session_key; int err; - head = l2tp_session_id_hash(tunnel, session->session_id); - - spin_lock_bh(&tunnel->hlist_lock); + spin_lock_bh(&tunnel->list_lock); if (!tunnel->acpt_newsess) { err = -ENODEV; goto err_tlock; } - hlist_for_each_entry(session_walk, head, hlist) - if (session_walk->session_id == session->session_id) { - err = -EEXIST; - goto err_tlock; - } - if (tunnel->version == L2TP_HDR_VER_3) { session_key = session->session_id; spin_lock_bh(&pn->l2tp_session_idr_lock); @@ -506,8 +481,8 @@ int l2tp_session_register(struct l2tp_session *session, l2tp_tunnel_inc_refcount(tunnel); - hlist_add_head_rcu(&session->hlist, head); - spin_unlock_bh(&tunnel->hlist_lock); + list_add(&session->list, &tunnel->session_list); + spin_unlock_bh(&tunnel->list_lock); spin_lock_bh(&pn->l2tp_session_idr_lock); if (tunnel->version == L2TP_HDR_VER_3) @@ -521,7 +496,7 @@ int l2tp_session_register(struct l2tp_session *session, return 0; err_tlock: - spin_unlock_bh(&tunnel->hlist_lock); + spin_unlock_bh(&tunnel->list_lock); return err; } @@ -1275,20 +1250,19 @@ end: return; } -/* Remove an l2tp session from l2tp_core's hash lists. */ +/* Remove an l2tp session from l2tp_core's lists. */ static void l2tp_session_unhash(struct l2tp_session *session) { struct l2tp_tunnel *tunnel = session->tunnel; - /* Remove the session from core hashes */ if (tunnel) { struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net); struct l2tp_session *removed = session; - /* Remove from the per-tunnel hash */ - spin_lock_bh(&tunnel->hlist_lock); - hlist_del_init_rcu(&session->hlist); - spin_unlock_bh(&tunnel->hlist_lock); + /* Remove from the per-tunnel list */ + spin_lock_bh(&tunnel->list_lock); + list_del_init(&session->list); + spin_unlock_bh(&tunnel->list_lock); /* Remove from per-net IDR */ spin_lock_bh(&pn->l2tp_session_idr_lock); @@ -1316,28 +1290,19 @@ static void l2tp_session_unhash(struct l2tp_session *session) static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel) { struct l2tp_session *session; - int hash; + struct list_head __rcu *pos; + struct list_head *tmp; - spin_lock_bh(&tunnel->hlist_lock); + spin_lock_bh(&tunnel->list_lock); tunnel->acpt_newsess = false; - for (hash = 0; hash < L2TP_HASH_SIZE; hash++) { -again: - hlist_for_each_entry_rcu(session, &tunnel->session_hlist[hash], hlist) { - hlist_del_init_rcu(&session->hlist); - - spin_unlock_bh(&tunnel->hlist_lock); - l2tp_session_delete(session); - spin_lock_bh(&tunnel->hlist_lock); - - /* Now restart from the beginning of this hash - * chain. We always remove a session from the - * list so we are guaranteed to make forward - * progress. - */ - goto again; - } + list_for_each_safe(pos, tmp, &tunnel->session_list) { + session = list_entry(pos, struct l2tp_session, list); + list_del_init(&session->list); + spin_unlock_bh(&tunnel->list_lock); + l2tp_session_delete(session); + spin_lock_bh(&tunnel->list_lock); } - spin_unlock_bh(&tunnel->hlist_lock); + spin_unlock_bh(&tunnel->list_lock); } /* Tunnel socket destroy hook for UDP encapsulation */ @@ -1531,8 +1496,9 @@ int l2tp_tunnel_create(int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, tunnel->magic = L2TP_TUNNEL_MAGIC; sprintf(&tunnel->name[0], "tunl %u", tunnel_id); - spin_lock_init(&tunnel->hlist_lock); + spin_lock_init(&tunnel->list_lock); tunnel->acpt_newsess = true; + INIT_LIST_HEAD(&tunnel->session_list); tunnel->encap = encap; @@ -1732,6 +1698,7 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn session->hlist_key = l2tp_v3_session_hashkey(tunnel->sock, session->session_id); INIT_HLIST_NODE(&session->hlist); INIT_LIST_HEAD(&session->clist); + INIT_LIST_HEAD(&session->list); if (cfg) { session->pwtype = cfg->pw_type; diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index bfff69f2e0a2..8ac81bc1bc6f 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -19,10 +19,6 @@ #define L2TP_TUNNEL_MAGIC 0x42114DDA #define L2TP_SESSION_MAGIC 0x0C04EB7D -/* Per tunnel session hash table size */ -#define L2TP_HASH_BITS 4 -#define L2TP_HASH_SIZE BIT(L2TP_HASH_BITS) - struct sk_buff; struct l2tp_stats { @@ -65,8 +61,7 @@ struct l2tp_session_coll_list { /* Represents a session (pseudowire) instance. * Tracks runtime state including cookies, dataplane packet sequencing, and IO statistics. - * Is linked into a per-tunnel session hashlist; and in the case of an L2TPv3 session into - * an additional per-net ("global") hashlist. + * Is linked into a per-tunnel session list and a per-net ("global") IDR tree. */ #define L2TP_SESSION_NAME_MAX 32 struct l2tp_session { @@ -90,6 +85,7 @@ struct l2tp_session { u32 nr_oos; /* NR of last OOS packet */ int nr_oos_count; /* for OOS recovery */ int nr_oos_count_max; + struct list_head list; /* per-tunnel list node */ refcount_t ref_count; struct hlist_node hlist; /* per-net session hlist */ unsigned long hlist_key; /* key for session hlist */ @@ -118,7 +114,7 @@ struct l2tp_session { /* Session close handler. * Each pseudowire implementation may implement this callback in order to carry * out pseudowire-specific shutdown actions. - * The callback is called by core after unhashing the session and purging its + * The callback is called by core after unlisting the session and purging its * reorder queue. */ void (*session_close)(struct l2tp_session *session); @@ -154,7 +150,7 @@ struct l2tp_tunnel_cfg { /* Represents a tunnel instance. * Tracks runtime state including IO statistics. * Holds the tunnel socket (either passed from userspace or directly created by the kernel). - * Maintains a hashlist of sessions belonging to the tunnel instance. + * Maintains a list of sessions belonging to the tunnel instance. * Is linked into a per-net list of tunnels. */ #define L2TP_TUNNEL_NAME_MAX 20 @@ -164,12 +160,11 @@ struct l2tp_tunnel { unsigned long dead; struct rcu_head rcu; - spinlock_t hlist_lock; /* write-protection for session_hlist */ + spinlock_t list_lock; /* write-protection for session_list */ bool acpt_newsess; /* indicates whether this tunnel accepts - * new sessions. Protected by hlist_lock. + * new sessions. Protected by list_lock. */ - struct hlist_head session_hlist[L2TP_HASH_SIZE]; - /* hashed list of sessions, hashed by id */ + struct list_head session_list; /* list of sessions */ u32 tunnel_id; u32 peer_tunnel_id; int version; /* 2=>L2TPv2, 3=>L2TPv3 */ diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index 4595b56d175d..8755ae521154 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -123,17 +123,14 @@ static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v) struct l2tp_tunnel *tunnel = v; struct l2tp_session *session; int session_count = 0; - int hash; rcu_read_lock_bh(); - for (hash = 0; hash < L2TP_HASH_SIZE; hash++) { - hlist_for_each_entry_rcu(session, &tunnel->session_hlist[hash], hlist) { - /* Session ID of zero is a dummy/reserved value used by pppol2tp */ - if (session->session_id == 0) - continue; + list_for_each_entry_rcu(session, &tunnel->session_list, list) { + /* Session ID of zero is a dummy/reserved value used by pppol2tp */ + if (session->session_id == 0) + continue; - session_count++; - } + session_count++; } rcu_read_unlock_bh(); -- cgit v1.2.3-58-ga151 From 43d7ca2907cb0e8a23d73934a5cf000e24b0aa22 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 20 Jun 2024 15:21:53 +0200 Subject: net: Use __napi_alloc_frag_align() instead of open coding it. The else condition within __netdev_alloc_frag_align() is an open coded __napi_alloc_frag_align(). Use __napi_alloc_frag_align() instead of open coding it. Move fragsz assignment before page_frag_alloc_align() invocation because __napi_alloc_frag_align() also contains this statement. Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20240620132727.660738-4-bigeasy@linutronix.de Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2315c088e91d..1b52f69ad05e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -318,19 +318,15 @@ void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) { void *data; - fragsz = SKB_DATA_ALIGN(fragsz); if (in_hardirq() || irqs_disabled()) { struct page_frag_cache *nc = this_cpu_ptr(&netdev_alloc_cache); + fragsz = SKB_DATA_ALIGN(fragsz); data = __page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, align_mask); } else { - struct napi_alloc_cache *nc; - local_bh_disable(); - nc = this_cpu_ptr(&napi_alloc_cache); - data = __page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, - align_mask); + data = __napi_alloc_frag_align(fragsz, align_mask); local_bh_enable(); } return data; -- cgit v1.2.3-58-ga151 From bdacf3e34945232037979b977d7f409f734a7226 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 20 Jun 2024 15:21:54 +0200 Subject: net: Use nested-BH locking for napi_alloc_cache. napi_alloc_cache is a per-CPU variable and relies on disabled BH for its locking. Without per-CPU locking in local_bh_disable() on PREEMPT_RT this data structure requires explicit locking. Add a local_lock_t to the data structure and use local_lock_nested_bh() for locking. This change adds only lockdep coverage and does not alter the functional behaviour for !PREEMPT_RT. Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20240620132727.660738-5-bigeasy@linutronix.de Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1b52f69ad05e..eb9a7e65b5c8 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -277,6 +277,7 @@ static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp_mask) #endif struct napi_alloc_cache { + local_lock_t bh_lock; struct page_frag_cache page; struct page_frag_1k page_small; unsigned int skb_count; @@ -284,7 +285,9 @@ struct napi_alloc_cache { }; static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache); -static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache); +static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache) = { + .bh_lock = INIT_LOCAL_LOCK(bh_lock), +}; /* Double check that napi_get_frags() allocates skbs with * skb->head being backed by slab, not a page fragment. @@ -306,11 +309,16 @@ void napi_get_frags_check(struct napi_struct *napi) void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) { struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); + void *data; fragsz = SKB_DATA_ALIGN(fragsz); - return __page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, + local_lock_nested_bh(&napi_alloc_cache.bh_lock); + data = __page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, align_mask); + local_unlock_nested_bh(&napi_alloc_cache.bh_lock); + return data; + } EXPORT_SYMBOL(__napi_alloc_frag_align); @@ -338,16 +346,20 @@ static struct sk_buff *napi_skb_cache_get(void) struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); struct sk_buff *skb; + local_lock_nested_bh(&napi_alloc_cache.bh_lock); if (unlikely(!nc->skb_count)) { nc->skb_count = kmem_cache_alloc_bulk(net_hotdata.skbuff_cache, GFP_ATOMIC, NAPI_SKB_CACHE_BULK, nc->skb_cache); - if (unlikely(!nc->skb_count)) + if (unlikely(!nc->skb_count)) { + local_unlock_nested_bh(&napi_alloc_cache.bh_lock); return NULL; + } } skb = nc->skb_cache[--nc->skb_count]; + local_unlock_nested_bh(&napi_alloc_cache.bh_lock); kasan_mempool_unpoison_object(skb, kmem_cache_size(net_hotdata.skbuff_cache)); return skb; @@ -740,9 +752,13 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, pfmemalloc = nc->pfmemalloc; } else { local_bh_disable(); + local_lock_nested_bh(&napi_alloc_cache.bh_lock); + nc = this_cpu_ptr(&napi_alloc_cache.page); data = page_frag_alloc(nc, len, gfp_mask); pfmemalloc = nc->pfmemalloc; + + local_unlock_nested_bh(&napi_alloc_cache.bh_lock); local_bh_enable(); } @@ -806,11 +822,11 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len) goto skb_success; } - nc = this_cpu_ptr(&napi_alloc_cache); - if (sk_memalloc_socks()) gfp_mask |= __GFP_MEMALLOC; + local_lock_nested_bh(&napi_alloc_cache.bh_lock); + nc = this_cpu_ptr(&napi_alloc_cache); if (NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) { /* we are artificially inflating the allocation size, but * that is not as bad as it may look like, as: @@ -832,6 +848,7 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len) data = page_frag_alloc(&nc->page, len, gfp_mask); pfmemalloc = nc->page.pfmemalloc; } + local_unlock_nested_bh(&napi_alloc_cache.bh_lock); if (unlikely(!data)) return NULL; @@ -1431,6 +1448,7 @@ static void napi_skb_cache_put(struct sk_buff *skb) if (!kasan_mempool_poison_object(skb)) return; + local_lock_nested_bh(&napi_alloc_cache.bh_lock); nc->skb_cache[nc->skb_count++] = skb; if (unlikely(nc->skb_count == NAPI_SKB_CACHE_SIZE)) { @@ -1442,6 +1460,7 @@ static void napi_skb_cache_put(struct sk_buff *skb) nc->skb_cache + NAPI_SKB_CACHE_HALF); nc->skb_count = NAPI_SKB_CACHE_HALF; } + local_unlock_nested_bh(&napi_alloc_cache.bh_lock); } void __napi_kfree_skb(struct sk_buff *skb, enum skb_drop_reason reason) -- cgit v1.2.3-58-ga151 From 585aa621af6cd11d3d6f830965af3e45ee79ea51 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 20 Jun 2024 15:21:55 +0200 Subject: net/tcp_sigpool: Use nested-BH locking for sigpool_scratch. sigpool_scratch is a per-CPU variable and relies on disabled BH for its locking. Without per-CPU locking in local_bh_disable() on PREEMPT_RT this data structure requires explicit locking. Make a struct with a pad member (original sigpool_scratch) and a local_lock_t and use local_lock_nested_bh() for locking. This change adds only lockdep coverage and does not alter the functional behaviour for !PREEMPT_RT. Cc: David Ahern Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20240620132727.660738-6-bigeasy@linutronix.de Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_sigpool.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_sigpool.c b/net/ipv4/tcp_sigpool.c index 8512cb09ebc0..d8a4f192873a 100644 --- a/net/ipv4/tcp_sigpool.c +++ b/net/ipv4/tcp_sigpool.c @@ -10,7 +10,14 @@ #include static size_t __scratch_size; -static DEFINE_PER_CPU(void __rcu *, sigpool_scratch); +struct sigpool_scratch { + local_lock_t bh_lock; + void __rcu *pad; +}; + +static DEFINE_PER_CPU(struct sigpool_scratch, sigpool_scratch) = { + .bh_lock = INIT_LOCAL_LOCK(bh_lock), +}; struct sigpool_entry { struct crypto_ahash *hash; @@ -72,7 +79,7 @@ static int sigpool_reserve_scratch(size_t size) break; } - old_scratch = rcu_replace_pointer(per_cpu(sigpool_scratch, cpu), + old_scratch = rcu_replace_pointer(per_cpu(sigpool_scratch.pad, cpu), scratch, lockdep_is_held(&cpool_mutex)); if (!cpu_online(cpu) || !old_scratch) { kfree(old_scratch); @@ -93,7 +100,7 @@ static void sigpool_scratch_free(void) int cpu; for_each_possible_cpu(cpu) - kfree(rcu_replace_pointer(per_cpu(sigpool_scratch, cpu), + kfree(rcu_replace_pointer(per_cpu(sigpool_scratch.pad, cpu), NULL, lockdep_is_held(&cpool_mutex))); __scratch_size = 0; } @@ -277,7 +284,8 @@ int tcp_sigpool_start(unsigned int id, struct tcp_sigpool *c) __cond_acquires(RC /* Pairs with tcp_sigpool_reserve_scratch(), scratch area is * valid (allocated) until tcp_sigpool_end(). */ - c->scratch = rcu_dereference_bh(*this_cpu_ptr(&sigpool_scratch)); + local_lock_nested_bh(&sigpool_scratch.bh_lock); + c->scratch = rcu_dereference_bh(*this_cpu_ptr(&sigpool_scratch.pad)); return 0; } EXPORT_SYMBOL_GPL(tcp_sigpool_start); @@ -286,6 +294,7 @@ void tcp_sigpool_end(struct tcp_sigpool *c) __releases(RCU_BH) { struct crypto_ahash *hash = crypto_ahash_reqtfm(c->req); + local_unlock_nested_bh(&sigpool_scratch.bh_lock); rcu_read_unlock_bh(); ahash_request_free(c->req); crypto_free_ahash(hash); -- cgit v1.2.3-58-ga151 From ebad6d0334793f16a16e5320182f665292a31e0c Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 20 Jun 2024 15:21:56 +0200 Subject: net/ipv4: Use nested-BH locking for ipv4_tcp_sk. ipv4_tcp_sk is a per-CPU variable and relies on disabled BH for its locking. Without per-CPU locking in local_bh_disable() on PREEMPT_RT this data structure requires explicit locking. Make a struct with a sock member (original ipv4_tcp_sk) and a local_lock_t and use local_lock_nested_bh() for locking. This change adds only lockdep coverage and does not alter the functional behaviour for !PREEMPT_RT. Cc: David Ahern Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20240620132727.660738-7-bigeasy@linutronix.de Signed-off-by: Jakub Kicinski --- include/net/sock.h | 5 +++++ net/ipv4/tcp_ipv4.c | 15 +++++++++++---- 2 files changed, 16 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/include/net/sock.h b/include/net/sock.h index b30ea0c342a6..cce23ac4d514 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -544,6 +544,11 @@ struct sock { netns_tracker ns_tracker; }; +struct sock_bh_locked { + struct sock *sock; + local_lock_t bh_lock; +}; + enum sk_pacing { SK_PACING_NONE = 0, SK_PACING_NEEDED = 1, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 8e49d69279d5..fd17f25ff288 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -93,7 +93,9 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, struct inet_hashinfo tcp_hashinfo; EXPORT_SYMBOL(tcp_hashinfo); -static DEFINE_PER_CPU(struct sock *, ipv4_tcp_sk); +static DEFINE_PER_CPU(struct sock_bh_locked, ipv4_tcp_sk) = { + .bh_lock = INIT_LOCAL_LOCK(bh_lock), +}; static u32 tcp_v4_init_seq(const struct sk_buff *skb) { @@ -882,7 +884,9 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb, arg.tos = ip_hdr(skb)->tos; arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); local_bh_disable(); - ctl_sk = this_cpu_read(ipv4_tcp_sk); + local_lock_nested_bh(&ipv4_tcp_sk.bh_lock); + ctl_sk = this_cpu_read(ipv4_tcp_sk.sock); + sock_net_set(ctl_sk, net); if (sk) { ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? @@ -907,6 +911,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb, sock_net_set(ctl_sk, &init_net); __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); __TCP_INC_STATS(net, TCP_MIB_OUTRSTS); + local_unlock_nested_bh(&ipv4_tcp_sk.bh_lock); local_bh_enable(); #ifdef CONFIG_TCP_MD5SIG @@ -1002,7 +1007,8 @@ static void tcp_v4_send_ack(const struct sock *sk, arg.tos = tos; arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL); local_bh_disable(); - ctl_sk = this_cpu_read(ipv4_tcp_sk); + local_lock_nested_bh(&ipv4_tcp_sk.bh_lock); + ctl_sk = this_cpu_read(ipv4_tcp_sk.sock); sock_net_set(ctl_sk, net); ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? inet_twsk(sk)->tw_mark : READ_ONCE(sk->sk_mark); @@ -1017,6 +1023,7 @@ static void tcp_v4_send_ack(const struct sock *sk, sock_net_set(ctl_sk, &init_net); __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); + local_unlock_nested_bh(&ipv4_tcp_sk.bh_lock); local_bh_enable(); } @@ -3615,7 +3622,7 @@ void __init tcp_v4_init(void) sk->sk_clockid = CLOCK_MONOTONIC; - per_cpu(ipv4_tcp_sk, cpu) = sk; + per_cpu(ipv4_tcp_sk.sock, cpu) = sk; } if (register_pernet_subsys(&tcp_sk_ops)) panic("Failed to create the TCP control socket.\n"); -- cgit v1.2.3-58-ga151 From c67ef53a88db5bcc29a6da20dcbc09206d49d0a9 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 20 Jun 2024 15:21:57 +0200 Subject: netfilter: br_netfilter: Use nested-BH locking for brnf_frag_data_storage. brnf_frag_data_storage is a per-CPU variable and relies on disabled BH for its locking. Without per-CPU locking in local_bh_disable() on PREEMPT_RT this data structure requires explicit locking. Add a local_lock_t to the data structure and use local_lock_nested_bh() for locking. This change adds only lockdep coverage and does not alter the functional behaviour for !PREEMPT_RT. Cc: Florian Westphal Cc: Jozsef Kadlecsik Cc: Nikolay Aleksandrov Cc: Pablo Neira Ayuso Cc: Roopa Prabhu Cc: bridge@lists.linux.dev Cc: coreteam@netfilter.org Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20240620132727.660738-8-bigeasy@linutronix.de Signed-off-by: Jakub Kicinski --- net/bridge/br_netfilter_hooks.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index bf30c50b5689..3c9f6538990e 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -137,6 +137,7 @@ static inline bool is_pppoe_ipv6(const struct sk_buff *skb, #define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN) struct brnf_frag_data { + local_lock_t bh_lock; char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH]; u8 encap_size; u8 size; @@ -144,7 +145,9 @@ struct brnf_frag_data { __be16 vlan_proto; }; -static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage); +static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage) = { + .bh_lock = INIT_LOCAL_LOCK(bh_lock), +}; static void nf_bridge_info_free(struct sk_buff *skb) { @@ -850,6 +853,7 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff { struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); unsigned int mtu, mtu_reserved; + int ret; mtu_reserved = nf_bridge_mtu_reduction(skb); mtu = skb->dev->mtu; @@ -882,6 +886,7 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; + local_lock_nested_bh(&brnf_frag_data_storage.bh_lock); data = this_cpu_ptr(&brnf_frag_data_storage); if (skb_vlan_tag_present(skb)) { @@ -897,7 +902,9 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff skb_copy_from_linear_data_offset(skb, -data->size, data->mac, data->size); - return br_nf_ip_fragment(net, sk, skb, br_nf_push_frag_xmit); + ret = br_nf_ip_fragment(net, sk, skb, br_nf_push_frag_xmit); + local_unlock_nested_bh(&brnf_frag_data_storage.bh_lock); + return ret; } if (IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) && skb->protocol == htons(ETH_P_IPV6)) { @@ -909,6 +916,7 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; + local_lock_nested_bh(&brnf_frag_data_storage.bh_lock); data = this_cpu_ptr(&brnf_frag_data_storage); data->encap_size = nf_bridge_encap_header_len(skb); data->size = ETH_HLEN + data->encap_size; @@ -916,8 +924,12 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff skb_copy_from_linear_data_offset(skb, -data->size, data->mac, data->size); - if (v6ops) - return v6ops->fragment(net, sk, skb, br_nf_push_frag_xmit); + if (v6ops) { + ret = v6ops->fragment(net, sk, skb, br_nf_push_frag_xmit); + local_unlock_nested_bh(&brnf_frag_data_storage.bh_lock); + return ret; + } + local_unlock_nested_bh(&brnf_frag_data_storage.bh_lock); kfree_skb(skb); return -EMSGSIZE; -- cgit v1.2.3-58-ga151 From ecefbc09e8ee768ae85b7bb7a1de8c8287397d68 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 20 Jun 2024 15:21:58 +0200 Subject: net: softnet_data: Make xmit per task. Softirq is preemptible on PREEMPT_RT. Without a per-CPU lock in local_bh_disable() there is no guarantee that only one device is transmitting at a time. With preemption and multiple senders it is possible that the per-CPU `recursion' counter gets incremented by different threads and exceeds XMIT_RECURSION_LIMIT leading to a false positive recursion alert. The `more' member is subject to similar problems if set by one thread for one driver and wrongly used by another driver within another thread. Instead of adding a lock to protect the per-CPU variable it is simpler to make xmit per-task. Sending and receiving skbs happens always in thread context anyway. Having a lock to protected the per-CPU counter would block/ serialize two sending threads needlessly. It would also require a recursive lock to ensure that the owner can increment the counter further. Make the softnet_data.xmit a task_struct member on PREEMPT_RT. Add needed wrapper. Cc: Ben Segall Cc: Daniel Bristot de Oliveira Cc: Dietmar Eggemann Cc: Juri Lelli Cc: Mel Gorman Cc: Steven Rostedt Cc: Valentin Schneider Cc: Vincent Guittot Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20240620132727.660738-9-bigeasy@linutronix.de Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 42 +++++++++++++++++++++++++++++++----------- include/linux/netdevice_xmit.h | 13 +++++++++++++ include/linux/sched.h | 5 ++++- net/core/dev.c | 14 ++++++++++++++ net/core/dev.h | 18 ++++++++++++++++++ 5 files changed, 80 insertions(+), 12 deletions(-) create mode 100644 include/linux/netdevice_xmit.h (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c83b390191d4..f6fc9066147d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -43,6 +43,7 @@ #include #include +#include #include #include #include @@ -3223,13 +3224,7 @@ struct softnet_data { struct sk_buff_head xfrm_backlog; #endif /* written and read only by owning cpu: */ - struct { - u16 recursion; - u8 more; -#ifdef CONFIG_NET_EGRESS - u8 skip_txqueue; -#endif - } xmit; + struct netdev_xmit xmit; #ifdef CONFIG_RPS /* input_queue_head should be written by cpu owning this struct, * and only read by other cpus. Worth using a cache line. @@ -3257,10 +3252,18 @@ struct softnet_data { DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); +#ifndef CONFIG_PREEMPT_RT static inline int dev_recursion_level(void) { return this_cpu_read(softnet_data.xmit.recursion); } +#else +static inline int dev_recursion_level(void) +{ + return current->net_xmit.recursion; +} + +#endif void __netif_schedule(struct Qdisc *q); void netif_schedule_queue(struct netdev_queue *txq); @@ -4872,18 +4875,35 @@ static inline ktime_t netdev_get_tstamp(struct net_device *dev, return hwtstamps->hwtstamp; } -static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops, - struct sk_buff *skb, struct net_device *dev, - bool more) +#ifndef CONFIG_PREEMPT_RT +static inline void netdev_xmit_set_more(bool more) { __this_cpu_write(softnet_data.xmit.more, more); - return ops->ndo_start_xmit(skb, dev); } static inline bool netdev_xmit_more(void) { return __this_cpu_read(softnet_data.xmit.more); } +#else +static inline void netdev_xmit_set_more(bool more) +{ + current->net_xmit.more = more; +} + +static inline bool netdev_xmit_more(void) +{ + return current->net_xmit.more; +} +#endif + +static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops, + struct sk_buff *skb, struct net_device *dev, + bool more) +{ + netdev_xmit_set_more(more); + return ops->ndo_start_xmit(skb, dev); +} static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, bool more) diff --git a/include/linux/netdevice_xmit.h b/include/linux/netdevice_xmit.h new file mode 100644 index 000000000000..38325e070296 --- /dev/null +++ b/include/linux/netdevice_xmit.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _LINUX_NETDEVICE_XMIT_H +#define _LINUX_NETDEVICE_XMIT_H + +struct netdev_xmit { + u16 recursion; + u8 more; +#ifdef CONFIG_NET_EGRESS + u8 skip_txqueue; +#endif +}; + +#endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 61591ac6eab6..5187486c2522 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -975,7 +976,9 @@ struct task_struct { /* delay due to memory thrashing */ unsigned in_thrashing:1; #endif - +#ifdef CONFIG_PREEMPT_RT + struct netdev_xmit net_xmit; +#endif unsigned long atomic_flags; /* Flags requiring atomic access. */ struct restart_block restart_block; diff --git a/net/core/dev.c b/net/core/dev.c index 093d82bf0e28..95b9e4cc1767 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3940,6 +3940,7 @@ netdev_tx_queue_mapping(struct net_device *dev, struct sk_buff *skb) return netdev_get_tx_queue(dev, netdev_cap_txqueue(dev, qm)); } +#ifndef CONFIG_PREEMPT_RT static bool netdev_xmit_txqueue_skipped(void) { return __this_cpu_read(softnet_data.xmit.skip_txqueue); @@ -3950,6 +3951,19 @@ void netdev_xmit_skip_txqueue(bool skip) __this_cpu_write(softnet_data.xmit.skip_txqueue, skip); } EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue); + +#else +static bool netdev_xmit_txqueue_skipped(void) +{ + return current->net_xmit.skip_txqueue; +} + +void netdev_xmit_skip_txqueue(bool skip) +{ + current->net_xmit.skip_txqueue = skip; +} +EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue); +#endif #endif /* CONFIG_NET_EGRESS */ #ifdef CONFIG_NET_XGRESS diff --git a/net/core/dev.h b/net/core/dev.h index 58f88d28bc99..5654325c5b71 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -150,6 +150,8 @@ struct napi_struct *napi_by_id(unsigned int napi_id); void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu); #define XMIT_RECURSION_LIMIT 8 + +#ifndef CONFIG_PREEMPT_RT static inline bool dev_xmit_recursion(void) { return unlikely(__this_cpu_read(softnet_data.xmit.recursion) > @@ -165,6 +167,22 @@ static inline void dev_xmit_recursion_dec(void) { __this_cpu_dec(softnet_data.xmit.recursion); } +#else +static inline bool dev_xmit_recursion(void) +{ + return unlikely(current->net_xmit.recursion > XMIT_RECURSION_LIMIT); +} + +static inline void dev_xmit_recursion_inc(void) +{ + current->net_xmit.recursion++; +} + +static inline void dev_xmit_recursion_dec(void) +{ + current->net_xmit.recursion--; +} +#endif int dev_set_hwtstamp_phylib(struct net_device *dev, struct kernel_hwtstamp_config *cfg, -- cgit v1.2.3-58-ga151 From a8760d0d1497878d01f901ef2038c7389187a8af Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 20 Jun 2024 15:21:59 +0200 Subject: dev: Remove PREEMPT_RT ifdefs from backlog_lock.*(). The backlog_napi locking (previously RPS) relies on explicit locking if either RPS or backlog NAPI is enabled. If both are disabled then locking was achieved by disabling interrupts except on PREEMPT_RT. PREEMPT_RT was excluded because the needed synchronisation was already provided local_bh_disable(). Since the introduction of backlog NAPI and making it mandatory for PREEMPT_RT the ifdef within backlog_lock.*() is obsolete and can be removed. Remove the ifdefs in backlog_lock.*(). Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20240620132727.660738-10-bigeasy@linutronix.de Signed-off-by: Jakub Kicinski --- net/core/dev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 95b9e4cc1767..73c4d14e4feb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -229,7 +229,7 @@ static inline void backlog_lock_irq_save(struct softnet_data *sd, { if (IS_ENABLED(CONFIG_RPS) || use_backlog_threads()) spin_lock_irqsave(&sd->input_pkt_queue.lock, *flags); - else if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + else local_irq_save(*flags); } @@ -237,7 +237,7 @@ static inline void backlog_lock_irq_disable(struct softnet_data *sd) { if (IS_ENABLED(CONFIG_RPS) || use_backlog_threads()) spin_lock_irq(&sd->input_pkt_queue.lock); - else if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + else local_irq_disable(); } @@ -246,7 +246,7 @@ static inline void backlog_unlock_irq_restore(struct softnet_data *sd, { if (IS_ENABLED(CONFIG_RPS) || use_backlog_threads()) spin_unlock_irqrestore(&sd->input_pkt_queue.lock, *flags); - else if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + else local_irq_restore(*flags); } @@ -254,7 +254,7 @@ static inline void backlog_unlock_irq_enable(struct softnet_data *sd) { if (IS_ENABLED(CONFIG_RPS) || use_backlog_threads()) spin_unlock_irq(&sd->input_pkt_queue.lock); - else if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + else local_irq_enable(); } -- cgit v1.2.3-58-ga151 From b22800f9d3b142bf2550dd47ff738b9feedc1093 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 20 Jun 2024 15:22:00 +0200 Subject: dev: Use nested-BH locking for softnet_data.process_queue. softnet_data::process_queue is a per-CPU variable and relies on disabled BH for its locking. Without per-CPU locking in local_bh_disable() on PREEMPT_RT this data structure requires explicit locking. softnet_data::input_queue_head can be updated lockless. This is fine because this value is only update CPU local by the local backlog_napi thread. Add a local_lock_t to softnet_data and use local_lock_nested_bh() for locking of process_queue. This change adds only lockdep coverage and does not alter the functional behaviour for !PREEMPT_RT. Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20240620132727.660738-11-bigeasy@linutronix.de Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 + net/core/dev.c | 12 +++++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f6fc9066147d..4e81660b4462 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3202,6 +3202,7 @@ static inline bool dev_has_header(const struct net_device *dev) struct softnet_data { struct list_head poll_list; struct sk_buff_head process_queue; + local_lock_t process_queue_bh_lock; /* stats */ unsigned int processed; diff --git a/net/core/dev.c b/net/core/dev.c index 73c4d14e4feb..8ef727c2ae2b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -449,7 +449,9 @@ static RAW_NOTIFIER_HEAD(netdev_chain); * queue in the local softnet handler. */ -DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); +DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data) = { + .process_queue_bh_lock = INIT_LOCAL_LOCK(process_queue_bh_lock), +}; EXPORT_PER_CPU_SYMBOL(softnet_data); /* Page_pool has a lockless array/stack to alloc/recycle pages. @@ -5949,6 +5951,7 @@ static void flush_backlog(struct work_struct *work) } backlog_unlock_irq_enable(sd); + local_lock_nested_bh(&softnet_data.process_queue_bh_lock); skb_queue_walk_safe(&sd->process_queue, skb, tmp) { if (skb->dev->reg_state == NETREG_UNREGISTERING) { __skb_unlink(skb, &sd->process_queue); @@ -5956,6 +5959,7 @@ static void flush_backlog(struct work_struct *work) rps_input_queue_head_incr(sd); } } + local_unlock_nested_bh(&softnet_data.process_queue_bh_lock); local_bh_enable(); } @@ -6077,7 +6081,9 @@ static int process_backlog(struct napi_struct *napi, int quota) while (again) { struct sk_buff *skb; + local_lock_nested_bh(&softnet_data.process_queue_bh_lock); while ((skb = __skb_dequeue(&sd->process_queue))) { + local_unlock_nested_bh(&softnet_data.process_queue_bh_lock); rcu_read_lock(); __netif_receive_skb(skb); rcu_read_unlock(); @@ -6086,7 +6092,9 @@ static int process_backlog(struct napi_struct *napi, int quota) return work; } + local_lock_nested_bh(&softnet_data.process_queue_bh_lock); } + local_unlock_nested_bh(&softnet_data.process_queue_bh_lock); backlog_lock_irq_disable(sd); if (skb_queue_empty(&sd->input_pkt_queue)) { @@ -6101,8 +6109,10 @@ static int process_backlog(struct napi_struct *napi, int quota) napi->state &= NAPIF_STATE_THREADED; again = false; } else { + local_lock_nested_bh(&softnet_data.process_queue_bh_lock); skb_queue_splice_tail_init(&sd->input_pkt_queue, &sd->process_queue); + local_unlock_nested_bh(&softnet_data.process_queue_bh_lock); } backlog_unlock_irq_enable(sd); } -- cgit v1.2.3-58-ga151 From 3414adbd6a6ad3702d0bdc49081ee7c9e9e1c600 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 20 Jun 2024 15:22:01 +0200 Subject: lwt: Don't disable migration prio invoking BPF. There is no need to explicitly disable migration if bottom halves are also disabled. Disabling BH implies disabling migration. Remove migrate_disable() and rely solely on disabling BH to remain on the same CPU. Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20240620132727.660738-12-bigeasy@linutronix.de Signed-off-by: Jakub Kicinski --- net/core/lwt_bpf.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index 4a0797f0a154..a94943681e5a 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -40,10 +40,9 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt, { int ret; - /* Migration disable and BH disable are needed to protect per-cpu - * redirect_info between BPF prog and skb_do_redirect(). + /* Disabling BH is needed to protect per-CPU bpf_redirect_info between + * BPF prog and skb_do_redirect(). */ - migrate_disable(); local_bh_disable(); bpf_compute_data_pointers(skb); ret = bpf_prog_run_save_cb(lwt->prog, skb); @@ -78,7 +77,6 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt, } local_bh_enable(); - migrate_enable(); return ret; } -- cgit v1.2.3-58-ga151 From d1542d4ae4dfdc47c9b3205ebe849ed23af213dd Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 20 Jun 2024 15:22:02 +0200 Subject: seg6: Use nested-BH locking for seg6_bpf_srh_states. The access to seg6_bpf_srh_states is protected by disabling preemption. Based on the code, the entry point is input_action_end_bpf() and every other function (the bpf helper functions bpf_lwt_seg6_*()), that is accessing seg6_bpf_srh_states, should be called from within input_action_end_bpf(). input_action_end_bpf() accesses seg6_bpf_srh_states first at the top of the function and then disables preemption. This looks wrong because if preemption needs to be disabled as part of the locking mechanism then the variable shouldn't be accessed beforehand. Looking at how it is used via test_lwt_seg6local.sh then input_action_end_bpf() is always invoked from softirq context. If this is always the case then the preempt_disable() statement is superfluous. If this is not always invoked from softirq then disabling only preemption is not sufficient. Replace the preempt_disable() statement with nested-BH locking. This is not an equivalent replacement as it assumes that the invocation of input_action_end_bpf() always occurs in softirq context and thus the preempt_disable() is superfluous. Add a local_lock_t the data structure and use local_lock_nested_bh() for locking. Add lockdep_assert_held() to ensure the lock is held while the per-CPU variable is referenced in the helper functions. Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: David Ahern Cc: Hao Luo Cc: Jiri Olsa Cc: John Fastabend Cc: KP Singh Cc: Martin KaFai Lau Cc: Song Liu Cc: Stanislav Fomichev Cc: Yonghong Song Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20240620132727.660738-13-bigeasy@linutronix.de Signed-off-by: Jakub Kicinski --- include/net/seg6_local.h | 1 + net/core/filter.c | 3 +++ net/ipv6/seg6_local.c | 22 ++++++++++++++-------- 3 files changed, 18 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/include/net/seg6_local.h b/include/net/seg6_local.h index 3fab9dec2ec4..888c1ce6f527 100644 --- a/include/net/seg6_local.h +++ b/include/net/seg6_local.h @@ -19,6 +19,7 @@ extern int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, extern bool seg6_bpf_has_valid_srh(struct sk_buff *skb); struct seg6_bpf_srh_state { + local_lock_t bh_lock; struct ipv6_sr_hdr *srh; u16 hdrlen; bool valid; diff --git a/net/core/filter.c b/net/core/filter.c index b077e7467946..c4f5db324f04 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6455,6 +6455,7 @@ BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset, void *srh_tlvs, *srh_end, *ptr; int srhoff = 0; + lockdep_assert_held(&srh_state->bh_lock); if (srh == NULL) return -EINVAL; @@ -6511,6 +6512,7 @@ BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb, int hdroff = 0; int err; + lockdep_assert_held(&srh_state->bh_lock); switch (action) { case SEG6_LOCAL_ACTION_END_X: if (!seg6_bpf_has_valid_srh(skb)) @@ -6587,6 +6589,7 @@ BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset, int srhoff = 0; int ret; + lockdep_assert_held(&srh_state->bh_lock); if (unlikely(srh == NULL)) return -EINVAL; diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index c434940131b1..c74705ead984 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -1380,7 +1380,9 @@ drop: return err; } -DEFINE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states); +DEFINE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states) = { + .bh_lock = INIT_LOCAL_LOCK(bh_lock), +}; bool seg6_bpf_has_valid_srh(struct sk_buff *skb) { @@ -1388,6 +1390,7 @@ bool seg6_bpf_has_valid_srh(struct sk_buff *skb) this_cpu_ptr(&seg6_bpf_srh_states); struct ipv6_sr_hdr *srh = srh_state->srh; + lockdep_assert_held(&srh_state->bh_lock); if (unlikely(srh == NULL)) return false; @@ -1408,8 +1411,7 @@ bool seg6_bpf_has_valid_srh(struct sk_buff *skb) static int input_action_end_bpf(struct sk_buff *skb, struct seg6_local_lwt *slwt) { - struct seg6_bpf_srh_state *srh_state = - this_cpu_ptr(&seg6_bpf_srh_states); + struct seg6_bpf_srh_state *srh_state; struct ipv6_sr_hdr *srh; int ret; @@ -1420,10 +1422,14 @@ static int input_action_end_bpf(struct sk_buff *skb, } advance_nextseg(srh, &ipv6_hdr(skb)->daddr); - /* preempt_disable is needed to protect the per-CPU buffer srh_state, - * which is also accessed by the bpf_lwt_seg6_* helpers + /* The access to the per-CPU buffer srh_state is protected by running + * always in softirq context (with disabled BH). On PREEMPT_RT the + * required locking is provided by the following local_lock_nested_bh() + * statement. It is also accessed by the bpf_lwt_seg6_* helpers via + * bpf_prog_run_save_cb(). */ - preempt_disable(); + local_lock_nested_bh(&seg6_bpf_srh_states.bh_lock); + srh_state = this_cpu_ptr(&seg6_bpf_srh_states); srh_state->srh = srh; srh_state->hdrlen = srh->hdrlen << 3; srh_state->valid = true; @@ -1446,15 +1452,15 @@ static int input_action_end_bpf(struct sk_buff *skb, if (srh_state->srh && !seg6_bpf_has_valid_srh(skb)) goto drop; + local_unlock_nested_bh(&seg6_bpf_srh_states.bh_lock); - preempt_enable(); if (ret != BPF_REDIRECT) seg6_lookup_nexthop(skb, NULL, 0); return dst_input(skb); drop: - preempt_enable(); + local_unlock_nested_bh(&seg6_bpf_srh_states.bh_lock); kfree_skb(skb); return -EINVAL; } -- cgit v1.2.3-58-ga151 From 78f520b7bbe579438dfc202226b3dac5607d8c7f Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 20 Jun 2024 15:22:03 +0200 Subject: net: Use nested-BH locking for bpf_scratchpad. bpf_scratchpad is a per-CPU variable and relies on disabled BH for its locking. Without per-CPU locking in local_bh_disable() on PREEMPT_RT this data structure requires explicit locking. Add a local_lock_t to the data structure and use local_lock_nested_bh() for locking. This change adds only lockdep coverage and does not alter the functional behaviour for !PREEMPT_RT. Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Hao Luo Cc: Jiri Olsa Cc: John Fastabend Cc: KP Singh Cc: Martin KaFai Lau Cc: Song Liu Cc: Stanislav Fomichev Cc: Yonghong Song Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20240620132727.660738-14-bigeasy@linutronix.de Signed-off-by: Jakub Kicinski --- net/core/filter.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index c4f5db324f04..adb887195c3a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1658,9 +1658,12 @@ struct bpf_scratchpad { __be32 diff[MAX_BPF_STACK / sizeof(__be32)]; u8 buff[MAX_BPF_STACK]; }; + local_lock_t bh_lock; }; -static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp); +static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp) = { + .bh_lock = INIT_LOCAL_LOCK(bh_lock), +}; static inline int __bpf_try_make_writable(struct sk_buff *skb, unsigned int write_len) @@ -2021,6 +2024,7 @@ BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size, struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp); u32 diff_size = from_size + to_size; int i, j = 0; + __wsum ret; /* This is quite flexible, some examples: * @@ -2034,12 +2038,15 @@ BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size, diff_size > sizeof(sp->diff))) return -EINVAL; + local_lock_nested_bh(&bpf_sp.bh_lock); for (i = 0; i < from_size / sizeof(__be32); i++, j++) sp->diff[j] = ~from[i]; for (i = 0; i < to_size / sizeof(__be32); i++, j++) sp->diff[j] = to[i]; - return csum_partial(sp->diff, diff_size, seed); + ret = csum_partial(sp->diff, diff_size, seed); + local_unlock_nested_bh(&bpf_sp.bh_lock); + return ret; } static const struct bpf_func_proto bpf_csum_diff_proto = { -- cgit v1.2.3-58-ga151 From 401cb7dae8130fd34eb84648e02ab4c506df7d5e Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 20 Jun 2024 15:22:04 +0200 Subject: net: Reference bpf_redirect_info via task_struct on PREEMPT_RT. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The XDP redirect process is two staged: - bpf_prog_run_xdp() is invoked to run a eBPF program which inspects the packet and makes decisions. While doing that, the per-CPU variable bpf_redirect_info is used. - Afterwards xdp_do_redirect() is invoked and accesses bpf_redirect_info and it may also access other per-CPU variables like xskmap_flush_list. At the very end of the NAPI callback, xdp_do_flush() is invoked which does not access bpf_redirect_info but will touch the individual per-CPU lists. The per-CPU variables are only used in the NAPI callback hence disabling bottom halves is the only protection mechanism. Users from preemptible context (like cpu_map_kthread_run()) explicitly disable bottom halves for protections reasons. Without locking in local_bh_disable() on PREEMPT_RT this data structure requires explicit locking. PREEMPT_RT has forced-threaded interrupts enabled and every NAPI-callback runs in a thread. If each thread has its own data structure then locking can be avoided. Create a struct bpf_net_context which contains struct bpf_redirect_info. Define the variable on stack, use bpf_net_ctx_set() to save a pointer to it, bpf_net_ctx_clear() removes it again. The bpf_net_ctx_set() may nest. For instance a function can be used from within NET_RX_SOFTIRQ/ net_rx_action which uses bpf_net_ctx_set() and NET_TX_SOFTIRQ which does not. Therefore only the first invocations updates the pointer. Use bpf_net_ctx_get_ri() as a wrapper to retrieve the current struct bpf_redirect_info. The returned data structure is zero initialized to ensure nothing is leaked from stack. This is done on first usage of the struct. bpf_net_ctx_set() sets bpf_redirect_info::kern_flags to 0 to note that initialisation is required. First invocation of bpf_net_ctx_get_ri() will memset() the data structure and update bpf_redirect_info::kern_flags. bpf_redirect_info::nh is excluded from memset because it is only used once BPF_F_NEIGH is set which also sets the nh member. The kern_flags is moved past nh to exclude it from memset. The pointer to bpf_net_context is saved task's task_struct. Using always the bpf_net_context approach has the advantage that there is almost zero differences between PREEMPT_RT and non-PREEMPT_RT builds. Cc: Andrii Nakryiko Cc: Eduard Zingerman Cc: Hao Luo Cc: Jiri Olsa Cc: John Fastabend Cc: KP Singh Cc: Martin KaFai Lau Cc: Song Liu Cc: Stanislav Fomichev Cc: Yonghong Song Acked-by: Alexei Starovoitov Acked-by: Jesper Dangaard Brouer Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20240620132727.660738-15-bigeasy@linutronix.de Signed-off-by: Jakub Kicinski --- include/linux/filter.h | 56 +++++++++++++++++++++++++++++++++++++++++--------- include/linux/sched.h | 3 +++ kernel/bpf/cpumap.c | 3 +++ kernel/bpf/devmap.c | 9 +++++++- kernel/fork.c | 1 + net/bpf/test_run.c | 11 +++++++++- net/core/dev.c | 29 +++++++++++++++++++++++++- net/core/filter.c | 44 +++++++++++---------------------------- net/core/lwt_bpf.c | 3 +++ 9 files changed, 114 insertions(+), 45 deletions(-) (limited to 'net') diff --git a/include/linux/filter.h b/include/linux/filter.h index b02aea291b7e..0a7f6e4a00b6 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -733,21 +733,59 @@ struct bpf_nh_params { }; }; +/* flags for bpf_redirect_info kern_flags */ +#define BPF_RI_F_RF_NO_DIRECT BIT(0) /* no napi_direct on return_frame */ +#define BPF_RI_F_RI_INIT BIT(1) + struct bpf_redirect_info { u64 tgt_index; void *tgt_value; struct bpf_map *map; u32 flags; - u32 kern_flags; u32 map_id; enum bpf_map_type map_type; struct bpf_nh_params nh; + u32 kern_flags; }; -DECLARE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info); +struct bpf_net_context { + struct bpf_redirect_info ri; +}; -/* flags for bpf_redirect_info kern_flags */ -#define BPF_RI_F_RF_NO_DIRECT BIT(0) /* no napi_direct on return_frame */ +static inline struct bpf_net_context *bpf_net_ctx_set(struct bpf_net_context *bpf_net_ctx) +{ + struct task_struct *tsk = current; + + if (tsk->bpf_net_context != NULL) + return NULL; + bpf_net_ctx->ri.kern_flags = 0; + + tsk->bpf_net_context = bpf_net_ctx; + return bpf_net_ctx; +} + +static inline void bpf_net_ctx_clear(struct bpf_net_context *bpf_net_ctx) +{ + if (bpf_net_ctx) + current->bpf_net_context = NULL; +} + +static inline struct bpf_net_context *bpf_net_ctx_get(void) +{ + return current->bpf_net_context; +} + +static inline struct bpf_redirect_info *bpf_net_ctx_get_ri(void) +{ + struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get(); + + if (!(bpf_net_ctx->ri.kern_flags & BPF_RI_F_RI_INIT)) { + memset(&bpf_net_ctx->ri, 0, offsetof(struct bpf_net_context, ri.nh)); + bpf_net_ctx->ri.kern_flags |= BPF_RI_F_RI_INIT; + } + + return &bpf_net_ctx->ri; +} /* Compute the linear packet data range [data, data_end) which * will be accessed by various program types (cls_bpf, act_bpf, @@ -1018,25 +1056,23 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt); -void bpf_clear_redirect_map(struct bpf_map *map); - static inline bool xdp_return_frame_no_direct(void) { - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); return ri->kern_flags & BPF_RI_F_RF_NO_DIRECT; } static inline void xdp_set_return_frame_no_direct(void) { - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); ri->kern_flags |= BPF_RI_F_RF_NO_DIRECT; } static inline void xdp_clear_return_frame_no_direct(void) { - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); ri->kern_flags &= ~BPF_RI_F_RF_NO_DIRECT; } @@ -1592,7 +1628,7 @@ static __always_inline long __bpf_xdp_redirect_map(struct bpf_map *map, u64 inde u64 flags, const u64 flag_mask, void *lookup_elem(struct bpf_map *map, u32 key)) { - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); const u64 action_mask = XDP_ABORTED | XDP_DROP | XDP_PASS | XDP_TX; /* Lower bits of the flags are used as return code on lookup failure */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 5187486c2522..5ff5e65a4627 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -54,6 +54,7 @@ struct bio_list; struct blk_plug; struct bpf_local_storage; struct bpf_run_ctx; +struct bpf_net_context; struct capture_control; struct cfs_rq; struct fs_struct; @@ -1509,6 +1510,8 @@ struct task_struct { /* Used for BPF run context */ struct bpf_run_ctx *bpf_ctx; #endif + /* Used by BPF for per-TASK xdp storage */ + struct bpf_net_context *bpf_net_context; #ifdef CONFIG_GCC_PLUGIN_STACKLEAK unsigned long lowest_stack; diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index a8e34416e960..66974bd02710 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -240,12 +240,14 @@ static int cpu_map_bpf_prog_run(struct bpf_cpu_map_entry *rcpu, void **frames, int xdp_n, struct xdp_cpumap_stats *stats, struct list_head *list) { + struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; int nframes; if (!rcpu->prog) return xdp_n; rcu_read_lock_bh(); + bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); nframes = cpu_map_bpf_prog_run_xdp(rcpu, frames, xdp_n, stats); @@ -255,6 +257,7 @@ static int cpu_map_bpf_prog_run(struct bpf_cpu_map_entry *rcpu, void **frames, if (unlikely(!list_empty(list))) cpu_map_bpf_prog_run_skb(rcpu, list, stats); + bpf_net_ctx_clear(bpf_net_ctx); rcu_read_unlock_bh(); /* resched point, may call do_softirq() */ return nframes; diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 7f3b34452243..fbfdfb60db8d 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -196,7 +196,14 @@ static void dev_map_free(struct bpf_map *map) list_del_rcu(&dtab->list); spin_unlock(&dev_map_lock); - bpf_clear_redirect_map(map); + /* bpf_redirect_info->map is assigned in __bpf_xdp_redirect_map() + * during NAPI callback and cleared after the XDP redirect. There is no + * explicit RCU read section which protects bpf_redirect_info->map but + * local_bh_disable() also marks the beginning an RCU section. This + * makes the complete softirq callback RCU protected. Thus after + * following synchronize_rcu() there no bpf_redirect_info->map == map + * assignment. + */ synchronize_rcu(); /* Make sure prior __dev_map_entry_free() have completed. */ diff --git a/kernel/fork.c b/kernel/fork.c index 99076dbe27d8..f314bdd7e610 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2355,6 +2355,7 @@ __latent_entropy struct task_struct *copy_process( RCU_INIT_POINTER(p->bpf_storage, NULL); p->bpf_ctx = NULL; #endif + p->bpf_net_context = NULL; /* Perform scheduler related setup. Assign this task to a CPU. */ retval = sched_fork(clone_flags, p); diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 36ae54f57bf5..a6d7f790cdda 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -283,9 +283,10 @@ static int xdp_recv_frames(struct xdp_frame **frames, int nframes, static int xdp_test_run_batch(struct xdp_test_data *xdp, struct bpf_prog *prog, u32 repeat) { - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; int err = 0, act, ret, i, nframes = 0, batch_sz; struct xdp_frame **frames = xdp->frames; + struct bpf_redirect_info *ri; struct xdp_page_head *head; struct xdp_frame *frm; bool redirect = false; @@ -295,6 +296,8 @@ static int xdp_test_run_batch(struct xdp_test_data *xdp, struct bpf_prog *prog, batch_sz = min_t(u32, repeat, xdp->batch_size); local_bh_disable(); + bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); + ri = bpf_net_ctx_get_ri(); xdp_set_return_frame_no_direct(); for (i = 0; i < batch_sz; i++) { @@ -359,6 +362,7 @@ out: } xdp_clear_return_frame_no_direct(); + bpf_net_ctx_clear(bpf_net_ctx); local_bh_enable(); return err; } @@ -394,6 +398,7 @@ static int bpf_test_run_xdp_live(struct bpf_prog *prog, struct xdp_buff *ctx, static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *retval, u32 *time, bool xdp) { + struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; struct bpf_prog_array_item item = {.prog = prog}; struct bpf_run_ctx *old_ctx; struct bpf_cg_run_ctx run_ctx; @@ -419,10 +424,14 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, do { run_ctx.prog_item = &item; local_bh_disable(); + bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); + if (xdp) *retval = bpf_prog_run_xdp(prog, ctx); else *retval = bpf_prog_run(prog, ctx); + + bpf_net_ctx_clear(bpf_net_ctx); local_bh_enable(); } while (bpf_test_timer_continue(&t, 1, repeat, &ret, time)); bpf_reset_run_ctx(old_ctx); diff --git a/net/core/dev.c b/net/core/dev.c index 8ef727c2ae2b..b94fb4e63a28 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4045,10 +4045,13 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, { struct bpf_mprog_entry *entry = rcu_dereference_bh(skb->dev->tcx_ingress); enum skb_drop_reason drop_reason = SKB_DROP_REASON_TC_INGRESS; + struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; int sch_ret; if (!entry) return skb; + + bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); if (*pt_prev) { *ret = deliver_skb(skb, *pt_prev, orig_dev); *pt_prev = NULL; @@ -4077,10 +4080,12 @@ ingress_verdict: break; } *ret = NET_RX_SUCCESS; + bpf_net_ctx_clear(bpf_net_ctx); return NULL; case TC_ACT_SHOT: kfree_skb_reason(skb, drop_reason); *ret = NET_RX_DROP; + bpf_net_ctx_clear(bpf_net_ctx); return NULL; /* used by tc_run */ case TC_ACT_STOLEN: @@ -4090,8 +4095,10 @@ ingress_verdict: fallthrough; case TC_ACT_CONSUMED: *ret = NET_RX_SUCCESS; + bpf_net_ctx_clear(bpf_net_ctx); return NULL; } + bpf_net_ctx_clear(bpf_net_ctx); return skb; } @@ -4101,11 +4108,14 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) { struct bpf_mprog_entry *entry = rcu_dereference_bh(dev->tcx_egress); enum skb_drop_reason drop_reason = SKB_DROP_REASON_TC_EGRESS; + struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; int sch_ret; if (!entry) return skb; + bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); + /* qdisc_skb_cb(skb)->pkt_len & tcx_set_ingress() was * already set by the caller. */ @@ -4121,10 +4131,12 @@ egress_verdict: /* No need to push/pop skb's mac_header here on egress! */ skb_do_redirect(skb); *ret = NET_XMIT_SUCCESS; + bpf_net_ctx_clear(bpf_net_ctx); return NULL; case TC_ACT_SHOT: kfree_skb_reason(skb, drop_reason); *ret = NET_XMIT_DROP; + bpf_net_ctx_clear(bpf_net_ctx); return NULL; /* used by tc_run */ case TC_ACT_STOLEN: @@ -4134,8 +4146,10 @@ egress_verdict: fallthrough; case TC_ACT_CONSUMED: *ret = NET_XMIT_SUCCESS; + bpf_net_ctx_clear(bpf_net_ctx); return NULL; } + bpf_net_ctx_clear(bpf_net_ctx); return skb; } @@ -6325,6 +6339,7 @@ enum { static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, unsigned flags, u16 budget) { + struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; bool skip_schedule = false; unsigned long timeout; int rc; @@ -6342,6 +6357,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, clear_bit(NAPI_STATE_IN_BUSY_POLL, &napi->state); local_bh_disable(); + bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); if (flags & NAPI_F_PREFER_BUSY_POLL) { napi->defer_hard_irqs_count = READ_ONCE(napi->dev->napi_defer_hard_irqs); @@ -6364,6 +6380,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, netpoll_poll_unlock(have_poll_lock); if (rc == budget) __busy_poll_stop(napi, skip_schedule); + bpf_net_ctx_clear(bpf_net_ctx); local_bh_enable(); } @@ -6373,6 +6390,7 @@ static void __napi_busy_loop(unsigned int napi_id, { unsigned long start_time = loop_end ? busy_loop_current_time() : 0; int (*napi_poll)(struct napi_struct *napi, int budget); + struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; void *have_poll_lock = NULL; struct napi_struct *napi; @@ -6391,6 +6409,7 @@ restart: int work = 0; local_bh_disable(); + bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); if (!napi_poll) { unsigned long val = READ_ONCE(napi->state); @@ -6421,6 +6440,7 @@ count: __NET_ADD_STATS(dev_net(napi->dev), LINUX_MIB_BUSYPOLLRXPACKETS, work); skb_defer_free_flush(this_cpu_ptr(&softnet_data)); + bpf_net_ctx_clear(bpf_net_ctx); local_bh_enable(); if (!loop_end || loop_end(loop_end_arg, start_time)) @@ -6848,6 +6868,7 @@ static int napi_thread_wait(struct napi_struct *napi) static void napi_threaded_poll_loop(struct napi_struct *napi) { + struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; struct softnet_data *sd; unsigned long last_qs = jiffies; @@ -6856,6 +6877,8 @@ static void napi_threaded_poll_loop(struct napi_struct *napi) void *have; local_bh_disable(); + bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); + sd = this_cpu_ptr(&softnet_data); sd->in_napi_threaded_poll = true; @@ -6871,6 +6894,7 @@ static void napi_threaded_poll_loop(struct napi_struct *napi) net_rps_action_and_irq_enable(sd); } skb_defer_free_flush(sd); + bpf_net_ctx_clear(bpf_net_ctx); local_bh_enable(); if (!repoll) @@ -6896,10 +6920,12 @@ static __latent_entropy void net_rx_action(struct softirq_action *h) struct softnet_data *sd = this_cpu_ptr(&softnet_data); unsigned long time_limit = jiffies + usecs_to_jiffies(READ_ONCE(net_hotdata.netdev_budget_usecs)); + struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; int budget = READ_ONCE(net_hotdata.netdev_budget); LIST_HEAD(list); LIST_HEAD(repoll); + bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); start: sd->in_net_rx_action = true; local_irq_disable(); @@ -6952,7 +6978,8 @@ start: sd->in_net_rx_action = false; net_rps_action_and_irq_enable(sd); -end:; +end: + bpf_net_ctx_clear(bpf_net_ctx); } struct netdev_adjacent { diff --git a/net/core/filter.c b/net/core/filter.c index adb887195c3a..eb1c4425c06f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2483,9 +2483,6 @@ static const struct bpf_func_proto bpf_clone_redirect_proto = { .arg3_type = ARG_ANYTHING, }; -DEFINE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info); -EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info); - static struct net_device *skb_get_peer_dev(struct net_device *dev) { const struct net_device_ops *ops = dev->netdev_ops; @@ -2498,7 +2495,7 @@ static struct net_device *skb_get_peer_dev(struct net_device *dev) int skb_do_redirect(struct sk_buff *skb) { - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); struct net *net = dev_net(skb->dev); struct net_device *dev; u32 flags = ri->flags; @@ -2531,7 +2528,7 @@ out_drop: BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags) { - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); if (unlikely(flags & (~(BPF_F_INGRESS) | BPF_F_REDIRECT_INTERNAL))) return TC_ACT_SHOT; @@ -2552,7 +2549,7 @@ static const struct bpf_func_proto bpf_redirect_proto = { BPF_CALL_2(bpf_redirect_peer, u32, ifindex, u64, flags) { - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); if (unlikely(flags)) return TC_ACT_SHOT; @@ -2574,7 +2571,7 @@ static const struct bpf_func_proto bpf_redirect_peer_proto = { BPF_CALL_4(bpf_redirect_neigh, u32, ifindex, struct bpf_redir_neigh *, params, int, plen, u64, flags) { - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); if (unlikely((plen && plen < sizeof(*params)) || flags)) return TC_ACT_SHOT; @@ -4300,30 +4297,13 @@ void xdp_do_check_flushed(struct napi_struct *napi) } #endif -void bpf_clear_redirect_map(struct bpf_map *map) -{ - struct bpf_redirect_info *ri; - int cpu; - - for_each_possible_cpu(cpu) { - ri = per_cpu_ptr(&bpf_redirect_info, cpu); - /* Avoid polluting remote cacheline due to writes if - * not needed. Once we pass this test, we need the - * cmpxchg() to make sure it hasn't been changed in - * the meantime by remote CPU. - */ - if (unlikely(READ_ONCE(ri->map) == map)) - cmpxchg(&ri->map, map, NULL); - } -} - DEFINE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key); EXPORT_SYMBOL_GPL(bpf_master_redirect_enabled_key); u32 xdp_master_redirect(struct xdp_buff *xdp) { + struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); struct net_device *master, *slave; - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); master = netdev_master_upper_dev_get_rcu(xdp->rxq->dev); slave = master->netdev_ops->ndo_xdp_get_xmit_slave(master, xdp); @@ -4395,7 +4375,7 @@ static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri, map = READ_ONCE(ri->map); /* The map pointer is cleared when the map is being torn - * down by bpf_clear_redirect_map() + * down by dev_map_free() */ if (unlikely(!map)) { err = -ENOENT; @@ -4440,7 +4420,7 @@ err: int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, struct bpf_prog *xdp_prog) { - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); enum bpf_map_type map_type = ri->map_type; if (map_type == BPF_MAP_TYPE_XSKMAP) @@ -4454,7 +4434,7 @@ EXPORT_SYMBOL_GPL(xdp_do_redirect); int xdp_do_redirect_frame(struct net_device *dev, struct xdp_buff *xdp, struct xdp_frame *xdpf, struct bpf_prog *xdp_prog) { - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); enum bpf_map_type map_type = ri->map_type; if (map_type == BPF_MAP_TYPE_XSKMAP) @@ -4471,7 +4451,7 @@ static int xdp_do_generic_redirect_map(struct net_device *dev, enum bpf_map_type map_type, u32 map_id, u32 flags) { - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); struct bpf_map *map; int err; @@ -4483,7 +4463,7 @@ static int xdp_do_generic_redirect_map(struct net_device *dev, map = READ_ONCE(ri->map); /* The map pointer is cleared when the map is being torn - * down by bpf_clear_redirect_map() + * down by dev_map_free() */ if (unlikely(!map)) { err = -ENOENT; @@ -4525,7 +4505,7 @@ err: int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, struct xdp_buff *xdp, struct bpf_prog *xdp_prog) { - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); enum bpf_map_type map_type = ri->map_type; void *fwd = ri->tgt_value; u32 map_id = ri->map_id; @@ -4561,7 +4541,7 @@ err: BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags) { - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); if (unlikely(flags)) return XDP_ABORTED; diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index a94943681e5a..afb05f58b64c 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -38,12 +38,14 @@ static inline struct bpf_lwt *bpf_lwt_lwtunnel(struct lwtunnel_state *lwt) static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt, struct dst_entry *dst, bool can_redirect) { + struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; int ret; /* Disabling BH is needed to protect per-CPU bpf_redirect_info between * BPF prog and skb_do_redirect(). */ local_bh_disable(); + bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); bpf_compute_data_pointers(skb); ret = bpf_prog_run_save_cb(lwt->prog, skb); @@ -76,6 +78,7 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt, break; } + bpf_net_ctx_clear(bpf_net_ctx); local_bh_enable(); return ret; -- cgit v1.2.3-58-ga151 From 3f9fe37d9e16a6cfd5f4d1f536686ea71db3196f Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 20 Jun 2024 15:22:05 +0200 Subject: net: Move per-CPU flush-lists to bpf_net_context on PREEMPT_RT. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The per-CPU flush lists, which are accessed from within the NAPI callback (xdp_do_flush() for instance), are per-CPU. There are subject to the same problem as struct bpf_redirect_info. Add the per-CPU lists cpu_map_flush_list, dev_map_flush_list and xskmap_map_flush_list to struct bpf_net_context. Add wrappers for the access. The lists initialized on first usage (similar to bpf_net_ctx_get_ri()). Cc: "Björn Töpel" Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Eduard Zingerman Cc: Hao Luo Cc: Jiri Olsa Cc: John Fastabend Cc: Jonathan Lemon Cc: KP Singh Cc: Maciej Fijalkowski Cc: Magnus Karlsson Cc: Martin KaFai Lau Cc: Song Liu Cc: Stanislav Fomichev Cc: Yonghong Song Acked-by: Jesper Dangaard Brouer Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20240620132727.660738-16-bigeasy@linutronix.de Signed-off-by: Jakub Kicinski --- include/linux/filter.h | 42 ++++++++++++++++++++++++++++++++++++++++++ kernel/bpf/cpumap.c | 19 +++---------------- kernel/bpf/devmap.c | 11 +++-------- net/xdp/xsk.c | 12 ++++-------- 4 files changed, 52 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/include/linux/filter.h b/include/linux/filter.h index 0a7f6e4a00b6..c0349522de8f 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -736,6 +736,9 @@ struct bpf_nh_params { /* flags for bpf_redirect_info kern_flags */ #define BPF_RI_F_RF_NO_DIRECT BIT(0) /* no napi_direct on return_frame */ #define BPF_RI_F_RI_INIT BIT(1) +#define BPF_RI_F_CPU_MAP_INIT BIT(2) +#define BPF_RI_F_DEV_MAP_INIT BIT(3) +#define BPF_RI_F_XSK_MAP_INIT BIT(4) struct bpf_redirect_info { u64 tgt_index; @@ -750,6 +753,9 @@ struct bpf_redirect_info { struct bpf_net_context { struct bpf_redirect_info ri; + struct list_head cpu_map_flush_list; + struct list_head dev_map_flush_list; + struct list_head xskmap_map_flush_list; }; static inline struct bpf_net_context *bpf_net_ctx_set(struct bpf_net_context *bpf_net_ctx) @@ -787,6 +793,42 @@ static inline struct bpf_redirect_info *bpf_net_ctx_get_ri(void) return &bpf_net_ctx->ri; } +static inline struct list_head *bpf_net_ctx_get_cpu_map_flush_list(void) +{ + struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get(); + + if (!(bpf_net_ctx->ri.kern_flags & BPF_RI_F_CPU_MAP_INIT)) { + INIT_LIST_HEAD(&bpf_net_ctx->cpu_map_flush_list); + bpf_net_ctx->ri.kern_flags |= BPF_RI_F_CPU_MAP_INIT; + } + + return &bpf_net_ctx->cpu_map_flush_list; +} + +static inline struct list_head *bpf_net_ctx_get_dev_flush_list(void) +{ + struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get(); + + if (!(bpf_net_ctx->ri.kern_flags & BPF_RI_F_DEV_MAP_INIT)) { + INIT_LIST_HEAD(&bpf_net_ctx->dev_map_flush_list); + bpf_net_ctx->ri.kern_flags |= BPF_RI_F_DEV_MAP_INIT; + } + + return &bpf_net_ctx->dev_map_flush_list; +} + +static inline struct list_head *bpf_net_ctx_get_xskmap_flush_list(void) +{ + struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get(); + + if (!(bpf_net_ctx->ri.kern_flags & BPF_RI_F_XSK_MAP_INIT)) { + INIT_LIST_HEAD(&bpf_net_ctx->xskmap_map_flush_list); + bpf_net_ctx->ri.kern_flags |= BPF_RI_F_XSK_MAP_INIT; + } + + return &bpf_net_ctx->xskmap_map_flush_list; +} + /* Compute the linear packet data range [data, data_end) which * will be accessed by various program types (cls_bpf, act_bpf, * lwt, ...). Subsystems allowing direct data access must (!) diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 66974bd02710..068e994ed781 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -79,8 +79,6 @@ struct bpf_cpu_map { struct bpf_cpu_map_entry __rcu **cpu_map; }; -static DEFINE_PER_CPU(struct list_head, cpu_map_flush_list); - static struct bpf_map *cpu_map_alloc(union bpf_attr *attr) { u32 value_size = attr->value_size; @@ -709,7 +707,7 @@ static void bq_flush_to_queue(struct xdp_bulk_queue *bq) */ static void bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf) { - struct list_head *flush_list = this_cpu_ptr(&cpu_map_flush_list); + struct list_head *flush_list = bpf_net_ctx_get_cpu_map_flush_list(); struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq); if (unlikely(bq->count == CPU_MAP_BULK_SIZE)) @@ -761,7 +759,7 @@ trace: void __cpu_map_flush(void) { - struct list_head *flush_list = this_cpu_ptr(&cpu_map_flush_list); + struct list_head *flush_list = bpf_net_ctx_get_cpu_map_flush_list(); struct xdp_bulk_queue *bq, *tmp; list_for_each_entry_safe(bq, tmp, flush_list, flush_node) { @@ -775,20 +773,9 @@ void __cpu_map_flush(void) #ifdef CONFIG_DEBUG_NET bool cpu_map_check_flush(void) { - if (list_empty(this_cpu_ptr(&cpu_map_flush_list))) + if (list_empty(bpf_net_ctx_get_cpu_map_flush_list())) return false; __cpu_map_flush(); return true; } #endif - -static int __init cpu_map_init(void) -{ - int cpu; - - for_each_possible_cpu(cpu) - INIT_LIST_HEAD(&per_cpu(cpu_map_flush_list, cpu)); - return 0; -} - -subsys_initcall(cpu_map_init); diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index fbfdfb60db8d..317ac2d66ebd 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -83,7 +83,6 @@ struct bpf_dtab { u32 n_buckets; }; -static DEFINE_PER_CPU(struct list_head, dev_flush_list); static DEFINE_SPINLOCK(dev_map_lock); static LIST_HEAD(dev_map_list); @@ -415,7 +414,7 @@ out: */ void __dev_flush(void) { - struct list_head *flush_list = this_cpu_ptr(&dev_flush_list); + struct list_head *flush_list = bpf_net_ctx_get_dev_flush_list(); struct xdp_dev_bulk_queue *bq, *tmp; list_for_each_entry_safe(bq, tmp, flush_list, flush_node) { @@ -429,7 +428,7 @@ void __dev_flush(void) #ifdef CONFIG_DEBUG_NET bool dev_check_flush(void) { - if (list_empty(this_cpu_ptr(&dev_flush_list))) + if (list_empty(bpf_net_ctx_get_dev_flush_list())) return false; __dev_flush(); return true; @@ -460,7 +459,7 @@ static void *__dev_map_lookup_elem(struct bpf_map *map, u32 key) static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf, struct net_device *dev_rx, struct bpf_prog *xdp_prog) { - struct list_head *flush_list = this_cpu_ptr(&dev_flush_list); + struct list_head *flush_list = bpf_net_ctx_get_dev_flush_list(); struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq); if (unlikely(bq->count == DEV_MAP_BULK_SIZE)) @@ -1160,15 +1159,11 @@ static struct notifier_block dev_map_notifier = { static int __init dev_map_init(void) { - int cpu; - /* Assure tracepoint shadow struct _bpf_dtab_netdev is in sync */ BUILD_BUG_ON(offsetof(struct bpf_dtab_netdev, dev) != offsetof(struct _bpf_dtab_netdev, dev)); register_netdevice_notifier(&dev_map_notifier); - for_each_possible_cpu(cpu) - INIT_LIST_HEAD(&per_cpu(dev_flush_list, cpu)); return 0; } diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 7d1c0986f9bb..ed062e038389 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -35,8 +35,6 @@ #define TX_BATCH_SIZE 32 #define MAX_PER_SOCKET_BUDGET (TX_BATCH_SIZE) -static DEFINE_PER_CPU(struct list_head, xskmap_flush_list); - void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool) { if (pool->cached_need_wakeup & XDP_WAKEUP_RX) @@ -372,7 +370,7 @@ static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp) { - struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list); + struct list_head *flush_list = bpf_net_ctx_get_xskmap_flush_list(); int err; err = xsk_rcv(xs, xdp); @@ -387,7 +385,7 @@ int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp) void __xsk_map_flush(void) { - struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list); + struct list_head *flush_list = bpf_net_ctx_get_xskmap_flush_list(); struct xdp_sock *xs, *tmp; list_for_each_entry_safe(xs, tmp, flush_list, flush_node) { @@ -399,7 +397,7 @@ void __xsk_map_flush(void) #ifdef CONFIG_DEBUG_NET bool xsk_map_check_flush(void) { - if (list_empty(this_cpu_ptr(&xskmap_flush_list))) + if (list_empty(bpf_net_ctx_get_xskmap_flush_list())) return false; __xsk_map_flush(); return true; @@ -1772,7 +1770,7 @@ static struct pernet_operations xsk_net_ops = { static int __init xsk_init(void) { - int err, cpu; + int err; err = proto_register(&xsk_proto, 0 /* no slab */); if (err) @@ -1790,8 +1788,6 @@ static int __init xsk_init(void) if (err) goto out_pernet; - for_each_possible_cpu(cpu) - INIT_LIST_HEAD(&per_cpu(xskmap_flush_list, cpu)); return 0; out_pernet: -- cgit v1.2.3-58-ga151 From 07b87f9eea0c30675084d50c82532d20168da009 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Thu, 20 Jun 2024 08:47:24 +0200 Subject: xfrm: Fix unregister netdevice hang on hardware offload. When offloading xfrm states to hardware, the offloading device is attached to the skbs secpath. If a skb is free is deferred, an unregister netdevice hangs because the netdevice is still refcounted. Fix this by removing the netdevice from the xfrm states when the netdevice is unregistered. To find all xfrm states that need to be cleared we add another list where skbs linked to that are unlinked from the lists (deleted) but not yet freed. Fixes: d77e38e612a0 ("xfrm: Add an IPsec hardware offloading API") Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 36 +++++++++--------------------- net/xfrm/xfrm_state.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 69 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 77ebf5bcf0b9..7d4c2235252c 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -178,7 +178,10 @@ struct xfrm_state { struct hlist_node gclist; struct hlist_node bydst; }; - struct hlist_node bysrc; + union { + struct hlist_node dev_gclist; + struct hlist_node bysrc; + }; struct hlist_node byspi; struct hlist_node byseq; @@ -1588,7 +1591,7 @@ void xfrm_state_update_stats(struct net *net); static inline void xfrm_dev_state_update_stats(struct xfrm_state *x) { struct xfrm_dev_offload *xdo = &x->xso; - struct net_device *dev = xdo->dev; + struct net_device *dev = READ_ONCE(xdo->dev); if (dev && dev->xfrmdev_ops && dev->xfrmdev_ops->xdo_dev_state_update_stats) @@ -1946,13 +1949,16 @@ int xfrm_dev_policy_add(struct net *net, struct xfrm_policy *xp, struct xfrm_user_offload *xuo, u8 dir, struct netlink_ext_ack *extack); bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x); +void xfrm_dev_state_delete(struct xfrm_state *x); +void xfrm_dev_state_free(struct xfrm_state *x); static inline void xfrm_dev_state_advance_esn(struct xfrm_state *x) { struct xfrm_dev_offload *xso = &x->xso; + struct net_device *dev = READ_ONCE(xso->dev); - if (xso->dev && xso->dev->xfrmdev_ops->xdo_dev_state_advance_esn) - xso->dev->xfrmdev_ops->xdo_dev_state_advance_esn(x); + if (dev && dev->xfrmdev_ops->xdo_dev_state_advance_esn) + dev->xfrmdev_ops->xdo_dev_state_advance_esn(x); } static inline bool xfrm_dst_offload_ok(struct dst_entry *dst) @@ -1973,28 +1979,6 @@ static inline bool xfrm_dst_offload_ok(struct dst_entry *dst) return false; } -static inline void xfrm_dev_state_delete(struct xfrm_state *x) -{ - struct xfrm_dev_offload *xso = &x->xso; - - if (xso->dev) - xso->dev->xfrmdev_ops->xdo_dev_state_delete(x); -} - -static inline void xfrm_dev_state_free(struct xfrm_state *x) -{ - struct xfrm_dev_offload *xso = &x->xso; - struct net_device *dev = xso->dev; - - if (dev && dev->xfrmdev_ops) { - if (dev->xfrmdev_ops->xdo_dev_state_free) - dev->xfrmdev_ops->xdo_dev_state_free(x); - xso->dev = NULL; - xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED; - netdev_put(dev, &xso->dev_tracker); - } -} - static inline void xfrm_dev_policy_delete(struct xfrm_policy *x) { struct xfrm_dev_offload *xdo = &x->xdo; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 649bb739df0d..d531d2a1fae2 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -49,6 +49,7 @@ static struct kmem_cache *xfrm_state_cache __ro_after_init; static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task); static HLIST_HEAD(xfrm_state_gc_list); +static HLIST_HEAD(xfrm_state_dev_gc_list); static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x) { @@ -214,6 +215,7 @@ static DEFINE_SPINLOCK(xfrm_state_afinfo_lock); static struct xfrm_state_afinfo __rcu *xfrm_state_afinfo[NPROTO]; static DEFINE_SPINLOCK(xfrm_state_gc_lock); +static DEFINE_SPINLOCK(xfrm_state_dev_gc_lock); int __xfrm_state_delete(struct xfrm_state *x); @@ -683,6 +685,40 @@ struct xfrm_state *xfrm_state_alloc(struct net *net) } EXPORT_SYMBOL(xfrm_state_alloc); +#ifdef CONFIG_XFRM_OFFLOAD +void xfrm_dev_state_delete(struct xfrm_state *x) +{ + struct xfrm_dev_offload *xso = &x->xso; + struct net_device *dev = READ_ONCE(xso->dev); + + if (dev) { + dev->xfrmdev_ops->xdo_dev_state_delete(x); + spin_lock_bh(&xfrm_state_dev_gc_lock); + hlist_add_head(&x->dev_gclist, &xfrm_state_dev_gc_list); + spin_unlock_bh(&xfrm_state_dev_gc_lock); + } +} + +void xfrm_dev_state_free(struct xfrm_state *x) +{ + struct xfrm_dev_offload *xso = &x->xso; + struct net_device *dev = READ_ONCE(xso->dev); + + if (dev && dev->xfrmdev_ops) { + spin_lock_bh(&xfrm_state_dev_gc_lock); + if (!hlist_unhashed(&x->dev_gclist)) + hlist_del(&x->dev_gclist); + spin_unlock_bh(&xfrm_state_dev_gc_lock); + + if (dev->xfrmdev_ops->xdo_dev_state_free) + dev->xfrmdev_ops->xdo_dev_state_free(x); + WRITE_ONCE(xso->dev, NULL); + xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED; + netdev_put(dev, &xso->dev_tracker); + } +} +#endif + void __xfrm_state_destroy(struct xfrm_state *x, bool sync) { WARN_ON(x->km.state != XFRM_STATE_DEAD); @@ -848,6 +884,9 @@ EXPORT_SYMBOL(xfrm_state_flush); int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid) { + struct xfrm_state *x; + struct hlist_node *tmp; + struct xfrm_dev_offload *xso; int i, err = 0, cnt = 0; spin_lock_bh(&net->xfrm.xfrm_state_lock); @@ -857,8 +896,6 @@ int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_vali err = -ESRCH; for (i = 0; i <= net->xfrm.state_hmask; i++) { - struct xfrm_state *x; - struct xfrm_dev_offload *xso; restart: hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { xso = &x->xso; @@ -868,6 +905,8 @@ restart: spin_unlock_bh(&net->xfrm.xfrm_state_lock); err = xfrm_state_delete(x); + xfrm_dev_state_free(x); + xfrm_audit_state_delete(x, err ? 0 : 1, task_valid); xfrm_state_put(x); @@ -884,6 +923,24 @@ restart: out: spin_unlock_bh(&net->xfrm.xfrm_state_lock); + + spin_lock_bh(&xfrm_state_dev_gc_lock); +restart_gc: + hlist_for_each_entry_safe(x, tmp, &xfrm_state_dev_gc_list, dev_gclist) { + xso = &x->xso; + + if (xso->dev == dev) { + spin_unlock_bh(&xfrm_state_dev_gc_lock); + xfrm_dev_state_free(x); + spin_lock_bh(&xfrm_state_dev_gc_lock); + goto restart_gc; + } + + } + spin_unlock_bh(&xfrm_state_dev_gc_lock); + + xfrm_flush_gc(); + return err; } EXPORT_SYMBOL(xfrm_dev_state_flush); -- cgit v1.2.3-58-ga151 From 3955802f160b5c61ac00d7e54da8d746f2e4a2d5 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 20 Jun 2024 13:56:13 -0700 Subject: af_unix: Define locking order for unix_table_double_lock(). When created, AF_UNIX socket is put into net->unx.table.buckets[], and the hash is stored in sk->sk_hash. * unbound socket : 0 <= sk_hash <= UNIX_HASH_MOD When bind() is called, the socket could be moved to another bucket. * pathname socket : 0 <= sk_hash <= UNIX_HASH_MOD * abstract socket : UNIX_HASH_MOD + 1 <= sk_hash <= UNIX_HASH_MOD * 2 + 1 Then, we call unix_table_double_lock() which locks a single bucket or two. Let's define the order as unix_table_lock_cmp_fn() instead of using spin_lock_nested(). The locking is always done in ascending order of sk->sk_hash, which is the index of buckets/locks array allocated by kvmalloc_array(). sk_hash_A < sk_hash_B <=> &locks[sk_hash_A].dep_map < &locks[sk_hash_B].dep_map So, the relation of two sk->sk_hash can be derived from the addresses of dep_map in the array of locks. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Kent Overstreet Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e9c941e6a464..7889d4723959 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -126,6 +126,15 @@ static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2]; * hash table is protected with spinlock. * each socket state is protected by separate spinlock. */ +#ifdef CONFIG_PROVE_LOCKING +#define cmp_ptr(l, r) (((l) > (r)) - ((l) < (r))) + +static int unix_table_lock_cmp_fn(const struct lockdep_map *a, + const struct lockdep_map *b) +{ + return cmp_ptr(a, b); +} +#endif static unsigned int unix_unbound_hash(struct sock *sk) { @@ -168,7 +177,7 @@ static void unix_table_double_lock(struct net *net, swap(hash1, hash2); spin_lock(&net->unx.table.locks[hash1]); - spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING); + spin_lock(&net->unx.table.locks[hash2]); } static void unix_table_double_unlock(struct net *net, @@ -3578,6 +3587,7 @@ static int __net_init unix_net_init(struct net *net) for (i = 0; i < UNIX_HASH_SIZE; i++) { spin_lock_init(&net->unx.table.locks[i]); + lock_set_cmp_fn(&net->unx.table.locks[i], unix_table_lock_cmp_fn, NULL); INIT_HLIST_HEAD(&net->unx.table.buckets[i]); } -- cgit v1.2.3-58-ga151 From ed99822817cb728eee8786c1c921c69c6be206fe Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 20 Jun 2024 13:56:14 -0700 Subject: af_unix: Define locking order for U_LOCK_SECOND in unix_state_double_lock(). unix_dgram_connect() and unix_dgram_{send,recv}msg() lock the socket and peer in ascending order of the socket address. Let's define the order as unix_state_lock_cmp_fn() instead of using unix_state_lock_nested(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Kent Overstreet Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 7889d4723959..0657f599bbef 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -134,6 +134,18 @@ static int unix_table_lock_cmp_fn(const struct lockdep_map *a, { return cmp_ptr(a, b); } + +static int unix_state_lock_cmp_fn(const struct lockdep_map *_a, + const struct lockdep_map *_b) +{ + const struct unix_sock *a, *b; + + a = container_of(_a, struct unix_sock, lock.dep_map); + b = container_of(_b, struct unix_sock, lock.dep_map); + + /* unix_state_double_lock(): ascending address order. */ + return cmp_ptr(a, b); +} #endif static unsigned int unix_unbound_hash(struct sock *sk) @@ -987,6 +999,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, u->path.dentry = NULL; u->path.mnt = NULL; spin_lock_init(&u->lock); + lock_set_cmp_fn(&u->lock, unix_state_lock_cmp_fn, NULL); mutex_init(&u->iolock); /* single task reading lock */ mutex_init(&u->bindlock); /* single task binding lock */ init_waitqueue_head(&u->peer_wait); @@ -1335,11 +1348,12 @@ static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) unix_state_lock(sk1); return; } + if (sk1 > sk2) swap(sk1, sk2); unix_state_lock(sk1); - unix_state_lock_nested(sk2, U_LOCK_SECOND); + unix_state_lock(sk2); } static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) -- cgit v1.2.3-58-ga151 From 1ca27e0c8c13ac50a4acf9cdf77069e2d94a547d Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 20 Jun 2024 13:56:15 -0700 Subject: af_unix: Don't retry after unix_state_lock_nested() in unix_stream_connect(). When a SOCK_(STREAM|SEQPACKET) socket connect()s to another one, we need to lock the two sockets to check their states in unix_stream_connect(). We use unix_state_lock() for the server and unix_state_lock_nested() for client with tricky sk->sk_state check to avoid deadlock. The possible deadlock scenario are the following: 1) Self connect() 2) Simultaneous connect() The former is simple, attempt to grab the same lock, and the latter is AB-BA deadlock. After the server's unix_state_lock(), we check the server socket's state, and if it's not TCP_LISTEN, connect() fails with -EINVAL. Then, we avoid the former deadlock by checking the client's state before unix_state_lock_nested(). If its state is not TCP_LISTEN, we can make sure that the client and the server are not identical based on the state. Also, the latter deadlock can be avoided in the same way. Due to the server sk->sk_state requirement, AB-BA deadlock could happen only with TCP_LISTEN sockets. So, if the client's state is TCP_LISTEN, we can give up the second lock to avoid the deadlock. CPU 1 CPU 2 CPU 3 connect(A -> B) connect(B -> A) listen(A) --- --- --- unix_state_lock(B) B->sk_state == TCP_LISTEN READ_ONCE(A->sk_state) == TCP_CLOSE ^^^^^^^^^ ok, will lock A unix_state_lock(A) .--------------' WRITE_ONCE(A->sk_state, TCP_LISTEN) | unix_state_unlock(A) | | unix_state_lock(A) | A->sk_sk_state == TCP_LISTEN | READ_ONCE(B->sk_state) == TCP_LISTEN v ^^^^^^^^^^ unix_state_lock_nested(A) Don't lock B !! Currently, while checking the client's state, we also check if it's TCP_ESTABLISHED, but this is unlikely and can be checked after we know the state is not TCP_CLOSE. Moreover, if it happens after the second lock, we now jump to the restart label, but it's unlikely that the server is not found during the retry, so the jump is mostly to revist the client state check. Let's remove the retry logic and check the state against TCP_CLOSE first. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 34 +++++++++------------------------- 1 file changed, 9 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 0657f599bbef..88f2c5d039c4 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1496,6 +1496,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, struct unix_sock *u = unix_sk(sk), *newu, *otheru; struct net *net = sock_net(sk); struct sk_buff *skb = NULL; + unsigned char state; long timeo; int err; @@ -1546,7 +1547,6 @@ restart: goto out; } - /* Latch state of peer */ unix_state_lock(other); /* Apparently VFS overslept socket death. Retry. */ @@ -1576,37 +1576,21 @@ restart: goto restart; } - /* Latch our state. - - It is tricky place. We need to grab our state lock and cannot - drop lock on peer. It is dangerous because deadlock is - possible. Connect to self case and simultaneous - attempt to connect are eliminated by checking socket - state. other is TCP_LISTEN, if sk is TCP_LISTEN we - check this before attempt to grab lock. - - Well, and we have to recheck the state after socket locked. + /* self connect and simultaneous connect are eliminated + * by rejecting TCP_LISTEN socket to avoid deadlock. */ - switch (READ_ONCE(sk->sk_state)) { - case TCP_CLOSE: - /* This is ok... continue with connect */ - break; - case TCP_ESTABLISHED: - /* Socket is already connected */ - err = -EISCONN; - goto out_unlock; - default: - err = -EINVAL; + state = READ_ONCE(sk->sk_state); + if (unlikely(state != TCP_CLOSE)) { + err = state == TCP_ESTABLISHED ? -EISCONN : -EINVAL; goto out_unlock; } unix_state_lock_nested(sk, U_LOCK_SECOND); - if (sk->sk_state != TCP_CLOSE) { + if (unlikely(sk->sk_state != TCP_CLOSE)) { + err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EINVAL; unix_state_unlock(sk); - unix_state_unlock(other); - sock_put(other); - goto restart; + goto out_unlock; } err = security_unix_stream_connect(sk, other, newsk); -- cgit v1.2.3-58-ga151 From 98f706de445b464f25220360210a4bcb9cc6c41a Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 20 Jun 2024 13:56:16 -0700 Subject: af_unix: Define locking order for U_LOCK_SECOND in unix_stream_connect(). While a SOCK_(STREAM|SEQPACKET) socket connect()s to another, we hold two locks of them by unix_state_lock() and unix_state_lock_nested() in unix_stream_connect(). Before unix_state_lock_nested(), the following is guaranteed by checking sk->sk_state: 1. The first socket is TCP_LISTEN 2. The second socket is not the first one 3. Simultaneous connect() must fail So, the client state can be TCP_CLOSE or TCP_LISTEN or TCP_ESTABLISHED. Let's define the expected states as unix_state_lock_cmp_fn() instead of using unix_state_lock_nested(). Note that 2. is detected by debug_spin_lock_before() and 3. cannot be expressed as lock_cmp_fn. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- include/net/af_unix.h | 1 - net/unix/af_unix.c | 37 ++++++++++++++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index b6eedf7650da..fd813ad73ab8 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -98,7 +98,6 @@ struct unix_sock { #define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock) enum unix_socket_lock_class { U_LOCK_NORMAL, - U_LOCK_SECOND, /* for double locking, see unix_state_double_lock(). */ U_LOCK_DIAG, /* used while dumping icons, see sk_diag_dump_icons(). */ U_LOCK_GC_LISTENER, /* used for listening socket while determining gc * candidates to close a small race window. diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 88f2c5d039c4..a092d6999ae0 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -143,6 +143,41 @@ static int unix_state_lock_cmp_fn(const struct lockdep_map *_a, a = container_of(_a, struct unix_sock, lock.dep_map); b = container_of(_b, struct unix_sock, lock.dep_map); + if (a->sk.sk_state == TCP_LISTEN) { + /* unix_stream_connect(): Before the 2nd unix_state_lock(), + * + * 1. a is TCP_LISTEN. + * 2. b is not a. + * 3. concurrent connect(b -> a) must fail. + * + * Except for 2. & 3., the b's state can be any possible + * value due to concurrent connect() or listen(). + * + * 2. is detected in debug_spin_lock_before(), and 3. cannot + * be expressed as lock_cmp_fn. + */ + switch (b->sk.sk_state) { + case TCP_CLOSE: + case TCP_ESTABLISHED: + case TCP_LISTEN: + return -1; + default: + /* Invalid case. */ + return 0; + } + } + + /* Should never happen. Just to be symmetric. */ + if (b->sk.sk_state == TCP_LISTEN) { + switch (b->sk.sk_state) { + case TCP_CLOSE: + case TCP_ESTABLISHED: + return 1; + default: + return 0; + } + } + /* unix_state_double_lock(): ascending address order. */ return cmp_ptr(a, b); } @@ -1585,7 +1620,7 @@ restart: goto out_unlock; } - unix_state_lock_nested(sk, U_LOCK_SECOND); + unix_state_lock(sk); if (unlikely(sk->sk_state != TCP_CLOSE)) { err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EINVAL; -- cgit v1.2.3-58-ga151 From b380b18102a0b72a9726077474a2915dfe8bbc08 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 20 Jun 2024 13:56:17 -0700 Subject: af_unix: Don't acquire unix_state_lock() for sock_i_ino(). sk_diag_dump_peer() and sk_diag_dump() call unix_state_lock() for sock_i_ino() which reads SOCK_INODE(sk->sk_socket)->i_ino, but it's protected by sk->sk_callback_lock. Let's remove unnecessary unix_state_lock(). Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/diag.c | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/unix/diag.c b/net/unix/diag.c index 937edf4afed4..d2d66727b0da 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -47,9 +47,7 @@ static int sk_diag_dump_peer(struct sock *sk, struct sk_buff *nlskb) peer = unix_peer_get(sk); if (peer) { - unix_state_lock(peer); ino = sock_i_ino(peer); - unix_state_unlock(peer); sock_put(peer); return nla_put_u32(nlskb, UNIX_DIAG_PEER, ino); @@ -180,22 +178,6 @@ out_nlmsg_trim: return -EMSGSIZE; } -static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req, - struct user_namespace *user_ns, - u32 portid, u32 seq, u32 flags) -{ - int sk_ino; - - unix_state_lock(sk); - sk_ino = sock_i_ino(sk); - unix_state_unlock(sk); - - if (!sk_ino) - return 0; - - return sk_diag_fill(sk, skb, req, user_ns, portid, seq, flags, sk_ino); -} - static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); @@ -213,14 +195,22 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) num = 0; spin_lock(&net->unx.table.locks[slot]); sk_for_each(sk, &net->unx.table.buckets[slot]) { + int sk_ino; + if (num < s_num) goto next; + if (!(req->udiag_states & (1 << READ_ONCE(sk->sk_state)))) goto next; - if (sk_diag_dump(sk, skb, req, sk_user_ns(skb->sk), + + sk_ino = sock_i_ino(sk); + if (!sk_ino) + goto next; + + if (sk_diag_fill(sk, skb, req, sk_user_ns(skb->sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - NLM_F_MULTI) < 0) { + NLM_F_MULTI, sk_ino) < 0) { spin_unlock(&net->unx.table.locks[slot]); goto done; } -- cgit v1.2.3-58-ga151 From c4da4661d985fd3cbaea3ea6101e2dd0d2ad4b74 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 20 Jun 2024 13:56:18 -0700 Subject: af_unix: Remove U_LOCK_DIAG. sk_diag_dump_icons() acquires embryo's lock by unix_state_lock_nested() to fetch its peer. The embryo's ->peer is set to NULL only when its parent listener is close()d. Then, unix_release_sock() is called for each embryo after unlinking skb by skb_dequeue(). In sk_diag_dump_icons(), we hold the parent's recvq lock, so we need not acquire unix_state_lock_nested(), and peer is always non-NULL. Let's remove unnecessary unix_state_lock_nested() and non-NULL test for peer. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- include/net/af_unix.h | 1 - net/unix/diag.c | 17 +++-------------- 2 files changed, 3 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index fd813ad73ab8..c42645199cee 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -98,7 +98,6 @@ struct unix_sock { #define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock) enum unix_socket_lock_class { U_LOCK_NORMAL, - U_LOCK_DIAG, /* used while dumping icons, see sk_diag_dump_icons(). */ U_LOCK_GC_LISTENER, /* used for listening socket while determining gc * candidates to close a small race window. */ diff --git a/net/unix/diag.c b/net/unix/diag.c index d2d66727b0da..9138af8b465e 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -73,20 +73,9 @@ static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb) buf = nla_data(attr); i = 0; - skb_queue_walk(&sk->sk_receive_queue, skb) { - struct sock *req, *peer; - - req = skb->sk; - /* - * The state lock is outer for the same sk's - * queue lock. With the other's queue locked it's - * OK to lock the state. - */ - unix_state_lock_nested(req, U_LOCK_DIAG); - peer = unix_sk(req)->peer; - buf[i++] = (peer ? sock_i_ino(peer) : 0); - unix_state_unlock(req); - } + skb_queue_walk(&sk->sk_receive_queue, skb) + buf[i++] = sock_i_ino(unix_peer(skb->sk)); + spin_unlock(&sk->sk_receive_queue.lock); } -- cgit v1.2.3-58-ga151 From 8647ece4814f3bfdb5f7a8e19f882c9b89299a07 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 20 Jun 2024 13:56:20 -0700 Subject: af_unix: Define locking order for U_RECVQ_LOCK_EMBRYO in unix_collect_skb(). While GC is cleaning up cyclic references by SCM_RIGHTS, unix_collect_skb() collects skb in the socket's recvq. If the socket is TCP_LISTEN, we need to collect skb in the embryo's queue. Then, both the listener's recvq lock and the embroy's one are held. The locking is always done in the listener -> embryo order. Let's define it as unix_recvq_lock_cmp_fn() instead of using spin_lock_nested(). Note that the reverse order is defined for consistency. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 21 +++++++++++++++++++++ net/unix/garbage.c | 8 +------- 2 files changed, 22 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index a092d6999ae0..89675879038d 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -181,6 +181,25 @@ static int unix_state_lock_cmp_fn(const struct lockdep_map *_a, /* unix_state_double_lock(): ascending address order. */ return cmp_ptr(a, b); } + +static int unix_recvq_lock_cmp_fn(const struct lockdep_map *_a, + const struct lockdep_map *_b) +{ + const struct sock *a, *b; + + a = container_of(_a, struct sock, sk_receive_queue.lock.dep_map); + b = container_of(_b, struct sock, sk_receive_queue.lock.dep_map); + + /* unix_collect_skb(): listener -> embryo order. */ + if (a->sk_state == TCP_LISTEN && unix_sk(b)->listener == a) + return -1; + + /* Should never happen. Just to be symmetric. */ + if (b->sk_state == TCP_LISTEN && unix_sk(a)->listener == b) + return 1; + + return 0; +} #endif static unsigned int unix_unbound_hash(struct sock *sk) @@ -1028,6 +1047,8 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, sk->sk_write_space = unix_write_space; sk->sk_max_ack_backlog = READ_ONCE(net->unx.sysctl_max_dgram_qlen); sk->sk_destruct = unix_sock_destructor; + lock_set_cmp_fn(&sk->sk_receive_queue.lock, unix_recvq_lock_cmp_fn, NULL); + u = unix_sk(sk); u->listener = NULL; u->vertex = NULL; diff --git a/net/unix/garbage.c b/net/unix/garbage.c index dfe94a90ece4..eb8aa5171a68 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -337,11 +337,6 @@ static bool unix_vertex_dead(struct unix_vertex *vertex) return true; } -enum unix_recv_queue_lock_class { - U_RECVQ_LOCK_NORMAL, - U_RECVQ_LOCK_EMBRYO, -}; - static void unix_collect_queue(struct unix_sock *u, struct sk_buff_head *hitlist) { skb_queue_splice_init(&u->sk.sk_receive_queue, hitlist); @@ -375,8 +370,7 @@ static void unix_collect_skb(struct list_head *scc, struct sk_buff_head *hitlist skb_queue_walk(queue, skb) { struct sk_buff_head *embryo_queue = &skb->sk->sk_receive_queue; - /* listener -> embryo order, the inversion never happens. */ - spin_lock_nested(&embryo_queue->lock, U_RECVQ_LOCK_EMBRYO); + spin_lock(&embryo_queue->lock); unix_collect_queue(unix_sk(skb->sk), hitlist); spin_unlock(&embryo_queue->lock); } -- cgit v1.2.3-58-ga151 From faf489e6896d645a679d3d90a2d1d5d12c6b3e13 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 20 Jun 2024 13:56:21 -0700 Subject: af_unix: Set sk_peer_pid/sk_peer_cred locklessly for new socket. init_peercred() is called in 3 places: 1. socketpair() : both sockets 2. connect() : child socket 3. listen() : listening socket The first two need not hold sk_peer_lock because no one can touch the socket. Let's set cred/pid without holding lock for the two cases and rename the old init_peercred() to update_peercred() to properly reflect the use case. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 89675879038d..d11664c2faad 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -750,6 +750,12 @@ static void unix_release_sock(struct sock *sk, int embrion) } static void init_peercred(struct sock *sk) +{ + sk->sk_peer_pid = get_pid(task_tgid(current)); + sk->sk_peer_cred = get_current_cred(); +} + +static void update_peercred(struct sock *sk) { const struct cred *old_cred; struct pid *old_pid; @@ -757,8 +763,7 @@ static void init_peercred(struct sock *sk) spin_lock(&sk->sk_peer_lock); old_pid = sk->sk_peer_pid; old_cred = sk->sk_peer_cred; - sk->sk_peer_pid = get_pid(task_tgid(current)); - sk->sk_peer_cred = get_current_cred(); + init_peercred(sk); spin_unlock(&sk->sk_peer_lock); put_pid(old_pid); @@ -810,7 +815,7 @@ static int unix_listen(struct socket *sock, int backlog) WRITE_ONCE(sk->sk_state, TCP_LISTEN); /* set credentials so connect can copy them */ - init_peercred(sk); + update_peercred(sk); err = 0; out_unlock: -- cgit v1.2.3-58-ga151 From e4bd881d987121dbf1a288641491955a53d9f8f7 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 20 Jun 2024 13:56:22 -0700 Subject: af_unix: Remove put_pid()/put_cred() in copy_peercred(). When (AF_UNIX, SOCK_STREAM) socket connect()s to a listening socket, the listener's sk_peer_pid/sk_peer_cred are copied to the client in copy_peercred(). Then, the client's sk_peer_pid and sk_peer_cred are always NULL, so we need not call put_pid() and put_cred() there. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index d11664c2faad..3d0ace7ca017 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -772,9 +772,6 @@ static void update_peercred(struct sock *sk) static void copy_peercred(struct sock *sk, struct sock *peersk) { - const struct cred *old_cred; - struct pid *old_pid; - if (sk < peersk) { spin_lock(&sk->sk_peer_lock); spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING); @@ -782,16 +779,12 @@ static void copy_peercred(struct sock *sk, struct sock *peersk) spin_lock(&peersk->sk_peer_lock); spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING); } - old_pid = sk->sk_peer_pid; - old_cred = sk->sk_peer_cred; + sk->sk_peer_pid = get_pid(peersk->sk_peer_pid); sk->sk_peer_cred = get_cred(peersk->sk_peer_cred); spin_unlock(&sk->sk_peer_lock); spin_unlock(&peersk->sk_peer_lock); - - put_pid(old_pid); - put_cred(old_cred); } static int unix_listen(struct socket *sock, int backlog) -- cgit v1.2.3-58-ga151 From 22e5751b0524fedd4f345412d8d3394387471ab7 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 20 Jun 2024 13:56:23 -0700 Subject: af_unix: Don't use spin_lock_nested() in copy_peercred(). When (AF_UNIX, SOCK_STREAM) socket connect()s to a listening socket, the listener's sk_peer_pid/sk_peer_cred are copied to the client in copy_peercred(). Then, two sk_peer_locks are held there; one is client's and another is listener's. However, the latter is not needed because we hold the listner's unix_state_lock() there and unix_listen() cannot update the cred concurrently. Let's drop the unnecessary spin_lock() and use the bare spin_lock() for the client to protect concurrent read by getsockopt(SO_PEERCRED). Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 3d0ace7ca017..103a7909cb1a 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -772,19 +772,12 @@ static void update_peercred(struct sock *sk) static void copy_peercred(struct sock *sk, struct sock *peersk) { - if (sk < peersk) { - spin_lock(&sk->sk_peer_lock); - spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING); - } else { - spin_lock(&peersk->sk_peer_lock); - spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING); - } + lockdep_assert_held(&unix_sk(peersk)->lock); - sk->sk_peer_pid = get_pid(peersk->sk_peer_pid); + spin_lock(&sk->sk_peer_lock); + sk->sk_peer_pid = get_pid(peersk->sk_peer_pid); sk->sk_peer_cred = get_cred(peersk->sk_peer_cred); - spin_unlock(&sk->sk_peer_lock); - spin_unlock(&peersk->sk_peer_lock); } static int unix_listen(struct socket *sock, int backlog) -- cgit v1.2.3-58-ga151 From a8a8d89dbd2bd2b762b6d0226a1201ec33f7aeac Mon Sep 17 00:00:00 2001 From: James Chapman Date: Mon, 24 Jun 2024 09:29:45 +0100 Subject: l2tp: remove incorrect __rcu attribute This fixes a sparse warning. Fixes: d18d3f0a24fc ("l2tp: replace hlist with simple list for per-tunnel session list") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202406220754.evK8Hrjw-lkp@intel.com/ Signed-off-by: James Chapman Link: https://patch.msgid.link/20240624082945.1925009-1-jchapman@katalix.com Signed-off-by: Jakub Kicinski --- net/l2tp/l2tp_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index be4bcbf291a1..64f446f0930b 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1290,7 +1290,7 @@ static void l2tp_session_unhash(struct l2tp_session *session) static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel) { struct l2tp_session *session; - struct list_head __rcu *pos; + struct list_head *pos; struct list_head *tmp; spin_lock_bh(&tunnel->list_lock); -- cgit v1.2.3-58-ga151 From 605efd54b50437ed9f3915690539d0afddca9d95 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 13 May 2024 15:00:41 +0200 Subject: netfilter: nf_tables: make struct nft_trans first member of derived subtypes There is 'struct nft_trans', the basic structure for all transactional objects, and the the various different transactional objects, such as nft_trans_table, chain, set, set_elem and so on. Right now 'struct nft_trans' uses a flexible member at the tail (data[]), and casting is needed to access the actual type-specific members. Change this to make the hierarchy visible in source code, i.e. make struct nft_trans the first member of all derived subtypes. This has several advantages: 1. pahole output reflects the real size needed by the particular subtype 2. allows to use container_of() to convert the base type to the actual object type instead of casting ->data to the overlay structure. 3. It makes it easy to add intermediate types. 'struct nft_trans' contains a 'binding_list' that is only needed by two subtypes, so it should be part of the two subtypes, not in the base structure. But that makes it hard to interate over the binding_list, because there is no common base structure. A follow patch moves the bind list to a new struct: struct nft_trans_binding { struct nft_trans nft_trans; struct list_head binding_list; }; ... and makes that structure the new 'first member' for both nft_trans_chain and nft_trans_set. No functional change intended in this patch. Some numbers: struct nft_trans { /* size: 88, cachelines: 2, members: 5 */ struct nft_trans_chain { /* size: 152, cachelines: 3, members: 10 */ struct nft_trans_elem { /* size: 112, cachelines: 2, members: 4 */ struct nft_trans_flowtable { /* size: 128, cachelines: 2, members: 5 */ struct nft_trans_obj { /* size: 112, cachelines: 2, members: 4 */ struct nft_trans_rule { /* size: 112, cachelines: 2, members: 5 */ struct nft_trans_set { /* size: 120, cachelines: 2, members: 8 */ struct nft_trans_table { /* size: 96, cachelines: 2, members: 2 */ Of particular interest is nft_trans_elem, which needs to be allocated once for each pending (to be added or removed) set element. Add BUILD_BUG_ON to check struct nft_trans is placed at the top of the container structure. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 162 ++++++++++++++++++++++---------------- net/netfilter/nf_tables_api.c | 18 +++-- 2 files changed, 105 insertions(+), 75 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 2796153b03da..b25df037fceb 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1608,14 +1608,16 @@ static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext) } /** - * struct nft_trans - nf_tables object update in transaction + * struct nft_trans - nf_tables object update in transaction * - * @list: used internally - * @binding_list: list of objects with possible bindings - * @msg_type: message type - * @put_net: ctx->net needs to be put - * @ctx: transaction context - * @data: internal information related to the transaction + * @list: used internally + * @binding_list: list of objects with possible bindings + * @msg_type: message type + * @put_net: ctx->net needs to be put + * @ctx: transaction context + * + * This is the information common to all objects in the transaction, + * this must always be the first member of derived sub-types. */ struct nft_trans { struct list_head list; @@ -1623,26 +1625,29 @@ struct nft_trans { int msg_type; bool put_net; struct nft_ctx ctx; - char data[]; }; struct nft_trans_rule { + struct nft_trans nft_trans; struct nft_rule *rule; struct nft_flow_rule *flow; u32 rule_id; bool bound; }; -#define nft_trans_rule(trans) \ - (((struct nft_trans_rule *)trans->data)->rule) -#define nft_trans_flow_rule(trans) \ - (((struct nft_trans_rule *)trans->data)->flow) -#define nft_trans_rule_id(trans) \ - (((struct nft_trans_rule *)trans->data)->rule_id) -#define nft_trans_rule_bound(trans) \ - (((struct nft_trans_rule *)trans->data)->bound) +#define nft_trans_container_rule(trans) \ + container_of(trans, struct nft_trans_rule, nft_trans) +#define nft_trans_rule(trans) \ + nft_trans_container_rule(trans)->rule +#define nft_trans_flow_rule(trans) \ + nft_trans_container_rule(trans)->flow +#define nft_trans_rule_id(trans) \ + nft_trans_container_rule(trans)->rule_id +#define nft_trans_rule_bound(trans) \ + nft_trans_container_rule(trans)->bound struct nft_trans_set { + struct nft_trans nft_trans; struct nft_set *set; u32 set_id; u32 gc_int; @@ -1652,22 +1657,25 @@ struct nft_trans_set { u32 size; }; -#define nft_trans_set(trans) \ - (((struct nft_trans_set *)trans->data)->set) -#define nft_trans_set_id(trans) \ - (((struct nft_trans_set *)trans->data)->set_id) -#define nft_trans_set_bound(trans) \ - (((struct nft_trans_set *)trans->data)->bound) -#define nft_trans_set_update(trans) \ - (((struct nft_trans_set *)trans->data)->update) -#define nft_trans_set_timeout(trans) \ - (((struct nft_trans_set *)trans->data)->timeout) -#define nft_trans_set_gc_int(trans) \ - (((struct nft_trans_set *)trans->data)->gc_int) -#define nft_trans_set_size(trans) \ - (((struct nft_trans_set *)trans->data)->size) +#define nft_trans_container_set(trans) \ + container_of(trans, struct nft_trans_set, nft_trans) +#define nft_trans_set(trans) \ + nft_trans_container_set(trans)->set +#define nft_trans_set_id(trans) \ + nft_trans_container_set(trans)->set_id +#define nft_trans_set_bound(trans) \ + nft_trans_container_set(trans)->bound +#define nft_trans_set_update(trans) \ + nft_trans_container_set(trans)->update +#define nft_trans_set_timeout(trans) \ + nft_trans_container_set(trans)->timeout +#define nft_trans_set_gc_int(trans) \ + nft_trans_container_set(trans)->gc_int +#define nft_trans_set_size(trans) \ + nft_trans_container_set(trans)->size struct nft_trans_chain { + struct nft_trans nft_trans; struct nft_chain *chain; bool update; char *name; @@ -1679,73 +1687,87 @@ struct nft_trans_chain { struct list_head hook_list; }; -#define nft_trans_chain(trans) \ - (((struct nft_trans_chain *)trans->data)->chain) -#define nft_trans_chain_update(trans) \ - (((struct nft_trans_chain *)trans->data)->update) -#define nft_trans_chain_name(trans) \ - (((struct nft_trans_chain *)trans->data)->name) -#define nft_trans_chain_stats(trans) \ - (((struct nft_trans_chain *)trans->data)->stats) -#define nft_trans_chain_policy(trans) \ - (((struct nft_trans_chain *)trans->data)->policy) -#define nft_trans_chain_bound(trans) \ - (((struct nft_trans_chain *)trans->data)->bound) -#define nft_trans_chain_id(trans) \ - (((struct nft_trans_chain *)trans->data)->chain_id) -#define nft_trans_basechain(trans) \ - (((struct nft_trans_chain *)trans->data)->basechain) -#define nft_trans_chain_hooks(trans) \ - (((struct nft_trans_chain *)trans->data)->hook_list) +#define nft_trans_container_chain(trans) \ + container_of(trans, struct nft_trans_chain, nft_trans) +#define nft_trans_chain(trans) \ + nft_trans_container_chain(trans)->chain +#define nft_trans_chain_update(trans) \ + nft_trans_container_chain(trans)->update +#define nft_trans_chain_name(trans) \ + nft_trans_container_chain(trans)->name +#define nft_trans_chain_stats(trans) \ + nft_trans_container_chain(trans)->stats +#define nft_trans_chain_policy(trans) \ + nft_trans_container_chain(trans)->policy +#define nft_trans_chain_bound(trans) \ + nft_trans_container_chain(trans)->bound +#define nft_trans_chain_id(trans) \ + nft_trans_container_chain(trans)->chain_id +#define nft_trans_basechain(trans) \ + nft_trans_container_chain(trans)->basechain +#define nft_trans_chain_hooks(trans) \ + nft_trans_container_chain(trans)->hook_list struct nft_trans_table { + struct nft_trans nft_trans; bool update; }; -#define nft_trans_table_update(trans) \ - (((struct nft_trans_table *)trans->data)->update) +#define nft_trans_container_table(trans) \ + container_of(trans, struct nft_trans_table, nft_trans) +#define nft_trans_table_update(trans) \ + nft_trans_container_table(trans)->update struct nft_trans_elem { + struct nft_trans nft_trans; struct nft_set *set; struct nft_elem_priv *elem_priv; bool bound; }; -#define nft_trans_elem_set(trans) \ - (((struct nft_trans_elem *)trans->data)->set) -#define nft_trans_elem_priv(trans) \ - (((struct nft_trans_elem *)trans->data)->elem_priv) -#define nft_trans_elem_set_bound(trans) \ - (((struct nft_trans_elem *)trans->data)->bound) +#define nft_trans_container_elem(t) \ + container_of(t, struct nft_trans_elem, nft_trans) +#define nft_trans_elem_set(trans) \ + nft_trans_container_elem(trans)->set +#define nft_trans_elem_priv(trans) \ + nft_trans_container_elem(trans)->elem_priv +#define nft_trans_elem_set_bound(trans) \ + nft_trans_container_elem(trans)->bound struct nft_trans_obj { + struct nft_trans nft_trans; struct nft_object *obj; struct nft_object *newobj; bool update; }; -#define nft_trans_obj(trans) \ - (((struct nft_trans_obj *)trans->data)->obj) -#define nft_trans_obj_newobj(trans) \ - (((struct nft_trans_obj *)trans->data)->newobj) -#define nft_trans_obj_update(trans) \ - (((struct nft_trans_obj *)trans->data)->update) +#define nft_trans_container_obj(t) \ + container_of(t, struct nft_trans_obj, nft_trans) +#define nft_trans_obj(trans) \ + nft_trans_container_obj(trans)->obj +#define nft_trans_obj_newobj(trans) \ + nft_trans_container_obj(trans)->newobj +#define nft_trans_obj_update(trans) \ + nft_trans_container_obj(trans)->update struct nft_trans_flowtable { + struct nft_trans nft_trans; struct nft_flowtable *flowtable; bool update; struct list_head hook_list; u32 flags; }; -#define nft_trans_flowtable(trans) \ - (((struct nft_trans_flowtable *)trans->data)->flowtable) -#define nft_trans_flowtable_update(trans) \ - (((struct nft_trans_flowtable *)trans->data)->update) -#define nft_trans_flowtable_hooks(trans) \ - (((struct nft_trans_flowtable *)trans->data)->hook_list) -#define nft_trans_flowtable_flags(trans) \ - (((struct nft_trans_flowtable *)trans->data)->flags) +#define nft_trans_container_flowtable(t) \ + container_of(t, struct nft_trans_flowtable, nft_trans) +#define nft_trans_flowtable(trans) \ + nft_trans_container_flowtable(trans)->flowtable +#define nft_trans_flowtable_update(trans) \ + nft_trans_container_flowtable(trans)->update +#define nft_trans_flowtable_hooks(trans) \ + nft_trans_container_flowtable(trans)->hook_list +#define nft_trans_flowtable_flags(trans) \ + nft_trans_container_flowtable(trans)->flags #define NFT_TRANS_GC_BATCHCOUNT 256 diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index be3b4c90d2ed..19edd1bcecef 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -153,7 +153,7 @@ static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx, { struct nft_trans *trans; - trans = kzalloc(sizeof(struct nft_trans) + size, gfp); + trans = kzalloc(size, gfp); if (trans == NULL) return NULL; @@ -10348,7 +10348,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) trans->msg_type, GFP_KERNEL); break; case NFT_MSG_NEWSETELEM: - te = (struct nft_trans_elem *)trans->data; + te = nft_trans_container_elem(trans); nft_setelem_activate(net, te->set, te->elem_priv); nf_tables_setelem_notify(&trans->ctx, te->set, @@ -10363,7 +10363,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) break; case NFT_MSG_DELSETELEM: case NFT_MSG_DESTROYSETELEM: - te = (struct nft_trans_elem *)trans->data; + te = nft_trans_container_elem(trans); nf_tables_setelem_notify(&trans->ctx, te->set, te->elem_priv, @@ -10643,7 +10643,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_trans_destroy(trans); break; } - te = (struct nft_trans_elem *)trans->data; + te = nft_trans_container_elem(trans); nft_setelem_remove(net, te->set, te->elem_priv); if (!nft_setelem_is_catchall(te->set, te->elem_priv)) atomic_dec(&te->set->nelems); @@ -10656,7 +10656,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; case NFT_MSG_DELSETELEM: case NFT_MSG_DESTROYSETELEM: - te = (struct nft_trans_elem *)trans->data; + te = nft_trans_container_elem(trans); if (!nft_setelem_active_next(net, te->set, te->elem_priv)) { nft_setelem_data_activate(net, te->set, te->elem_priv); @@ -11588,6 +11588,14 @@ static int __init nf_tables_module_init(void) { int err; + BUILD_BUG_ON(offsetof(struct nft_trans_table, nft_trans) != 0); + BUILD_BUG_ON(offsetof(struct nft_trans_chain, nft_trans) != 0); + BUILD_BUG_ON(offsetof(struct nft_trans_rule, nft_trans) != 0); + BUILD_BUG_ON(offsetof(struct nft_trans_set, nft_trans) != 0); + BUILD_BUG_ON(offsetof(struct nft_trans_elem, nft_trans) != 0); + BUILD_BUG_ON(offsetof(struct nft_trans_obj, nft_trans) != 0); + BUILD_BUG_ON(offsetof(struct nft_trans_flowtable, nft_trans) != 0); + err = register_pernet_subsys(&nf_tables_net_ops); if (err < 0) return err; -- cgit v1.2.3-58-ga151 From 17d8f3ad36a5fa5c93afab90ed03ba7ec748dd03 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 24 Jun 2024 20:53:16 +0200 Subject: netfilter: nf_tables: move bind list_head into relevant subtypes Only nft_trans_chain and nft_trans_set subtypes use the trans->binding_list member. Add a new common binding subtype and move the member there. This reduces size of all other subtypes by 16 bytes on 64bit platforms. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 26 +++++++++----- net/netfilter/nf_tables_api.c | 71 +++++++++++++++++++++++++++++++-------- 2 files changed, 75 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index b25df037fceb..f72448095833 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1611,7 +1611,6 @@ static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext) * struct nft_trans - nf_tables object update in transaction * * @list: used internally - * @binding_list: list of objects with possible bindings * @msg_type: message type * @put_net: ctx->net needs to be put * @ctx: transaction context @@ -1621,12 +1620,23 @@ static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext) */ struct nft_trans { struct list_head list; - struct list_head binding_list; int msg_type; bool put_net; struct nft_ctx ctx; }; +/** + * struct nft_trans_binding - nf_tables object with binding support in transaction + * @nft_trans: base structure, MUST be first member + * @binding_list: list of objects with possible bindings + * + * This is the base type used by objects that can be bound to a chain. + */ +struct nft_trans_binding { + struct nft_trans nft_trans; + struct list_head binding_list; +}; + struct nft_trans_rule { struct nft_trans nft_trans; struct nft_rule *rule; @@ -1647,7 +1657,7 @@ struct nft_trans_rule { nft_trans_container_rule(trans)->bound struct nft_trans_set { - struct nft_trans nft_trans; + struct nft_trans_binding nft_trans_binding; struct nft_set *set; u32 set_id; u32 gc_int; @@ -1657,8 +1667,8 @@ struct nft_trans_set { u32 size; }; -#define nft_trans_container_set(trans) \ - container_of(trans, struct nft_trans_set, nft_trans) +#define nft_trans_container_set(t) \ + container_of(t, struct nft_trans_set, nft_trans_binding.nft_trans) #define nft_trans_set(trans) \ nft_trans_container_set(trans)->set #define nft_trans_set_id(trans) \ @@ -1675,7 +1685,7 @@ struct nft_trans_set { nft_trans_container_set(trans)->size struct nft_trans_chain { - struct nft_trans nft_trans; + struct nft_trans_binding nft_trans_binding; struct nft_chain *chain; bool update; char *name; @@ -1687,8 +1697,8 @@ struct nft_trans_chain { struct list_head hook_list; }; -#define nft_trans_container_chain(trans) \ - container_of(trans, struct nft_trans_chain, nft_trans) +#define nft_trans_container_chain(t) \ + container_of(t, struct nft_trans_chain, nft_trans_binding.nft_trans) #define nft_trans_chain(trans) \ nft_trans_container_chain(trans)->chain #define nft_trans_chain_update(trans) \ diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 19edd1bcecef..c950938ef612 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -158,7 +158,6 @@ static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx, return NULL; INIT_LIST_HEAD(&trans->list); - INIT_LIST_HEAD(&trans->binding_list); trans->msg_type = msg_type; trans->ctx = *ctx; @@ -171,10 +170,26 @@ static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx, return nft_trans_alloc_gfp(ctx, msg_type, size, GFP_KERNEL); } +static struct nft_trans_binding *nft_trans_get_binding(struct nft_trans *trans) +{ + switch (trans->msg_type) { + case NFT_MSG_NEWCHAIN: + case NFT_MSG_NEWSET: + return container_of(trans, struct nft_trans_binding, nft_trans); + } + + return NULL; +} + static void nft_trans_list_del(struct nft_trans *trans) { + struct nft_trans_binding *trans_binding; + list_del(&trans->list); - list_del(&trans->binding_list); + + trans_binding = nft_trans_get_binding(trans); + if (trans_binding) + list_del(&trans_binding->binding_list); } static void nft_trans_destroy(struct nft_trans *trans) @@ -372,21 +387,26 @@ static void nf_tables_unregister_hook(struct net *net, static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *trans) { struct nftables_pernet *nft_net = nft_pernet(net); + struct nft_trans_binding *binding; + + list_add_tail(&trans->list, &nft_net->commit_list); + + binding = nft_trans_get_binding(trans); + if (!binding) + return; switch (trans->msg_type) { case NFT_MSG_NEWSET: if (!nft_trans_set_update(trans) && nft_set_is_anonymous(nft_trans_set(trans))) - list_add_tail(&trans->binding_list, &nft_net->binding_list); + list_add_tail(&binding->binding_list, &nft_net->binding_list); break; case NFT_MSG_NEWCHAIN: if (!nft_trans_chain_update(trans) && nft_chain_binding(nft_trans_chain(trans))) - list_add_tail(&trans->binding_list, &nft_net->binding_list); + list_add_tail(&binding->binding_list, &nft_net->binding_list); break; } - - list_add_tail(&trans->list, &nft_net->commit_list); } static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type) @@ -416,11 +436,27 @@ static int nft_deltable(struct nft_ctx *ctx) return err; } -static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type) +static struct nft_trans * +nft_trans_alloc_chain(const struct nft_ctx *ctx, int msg_type) { + struct nft_trans_chain *trans_chain; struct nft_trans *trans; trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_chain)); + if (!trans) + return NULL; + + trans_chain = nft_trans_container_chain(trans); + INIT_LIST_HEAD(&trans_chain->nft_trans_binding.binding_list); + + return trans; +} + +static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type) +{ + struct nft_trans *trans; + + trans = nft_trans_alloc_chain(ctx, msg_type); if (trans == NULL) return ERR_PTR(-ENOMEM); @@ -560,12 +596,16 @@ static int __nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, struct nft_set *set, const struct nft_set_desc *desc) { + struct nft_trans_set *trans_set; struct nft_trans *trans; trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_set)); if (trans == NULL) return -ENOMEM; + trans_set = nft_trans_container_set(trans); + INIT_LIST_HEAD(&trans_set->nft_trans_binding.binding_list); + if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] && !desc) { nft_trans_set_id(trans) = ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID])); @@ -2698,8 +2738,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, } err = -ENOMEM; - trans = nft_trans_alloc(ctx, NFT_MSG_NEWCHAIN, - sizeof(struct nft_trans_chain)); + trans = nft_trans_alloc_chain(ctx, NFT_MSG_NEWCHAIN); if (trans == NULL) goto err_trans; @@ -2915,8 +2954,7 @@ static int nft_delchain_hook(struct nft_ctx *ctx, list_move(&hook->list, &chain_del_list); } - trans = nft_trans_alloc(ctx, NFT_MSG_DELCHAIN, - sizeof(struct nft_trans_chain)); + trans = nft_trans_alloc_chain(ctx, NFT_MSG_DELCHAIN); if (!trans) { err = -ENOMEM; goto err_chain_del_hook; @@ -10147,6 +10185,7 @@ static void nft_gc_seq_end(struct nftables_pernet *nft_net, unsigned int gc_seq) static int nf_tables_commit(struct net *net, struct sk_buff *skb) { struct nftables_pernet *nft_net = nft_pernet(net); + struct nft_trans_binding *trans_binding; struct nft_trans *trans, *next; unsigned int base_seq, gc_seq; LIST_HEAD(set_update_list); @@ -10161,7 +10200,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) return 0; } - list_for_each_entry(trans, &nft_net->binding_list, binding_list) { + list_for_each_entry(trans_binding, &nft_net->binding_list, binding_list) { + trans = &trans_binding->nft_trans; switch (trans->msg_type) { case NFT_MSG_NEWSET: if (!nft_trans_set_update(trans) && @@ -10179,6 +10219,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) return -EINVAL; } break; + default: + WARN_ONCE(1, "Unhandled bind type %d", trans->msg_type); + break; } } @@ -11589,9 +11632,9 @@ static int __init nf_tables_module_init(void) int err; BUILD_BUG_ON(offsetof(struct nft_trans_table, nft_trans) != 0); - BUILD_BUG_ON(offsetof(struct nft_trans_chain, nft_trans) != 0); + BUILD_BUG_ON(offsetof(struct nft_trans_chain, nft_trans_binding.nft_trans) != 0); BUILD_BUG_ON(offsetof(struct nft_trans_rule, nft_trans) != 0); - BUILD_BUG_ON(offsetof(struct nft_trans_set, nft_trans) != 0); + BUILD_BUG_ON(offsetof(struct nft_trans_set, nft_trans_binding.nft_trans) != 0); BUILD_BUG_ON(offsetof(struct nft_trans_elem, nft_trans) != 0); BUILD_BUG_ON(offsetof(struct nft_trans_obj, nft_trans) != 0); BUILD_BUG_ON(offsetof(struct nft_trans_flowtable, nft_trans) != 0); -- cgit v1.2.3-58-ga151 From 06fcaca2ed1f7a1d02b18364fceac4525332f178 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 13 May 2024 15:00:44 +0200 Subject: netfilter: nf_tables: reduce trans->ctx.table references nft_ctx is huge, it should not be stored in nft_trans at all, most information is not needed. Preparation patch to remove trans->ctx, no change in behaviour intended. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 79 ++++++++++++++++++++++--------------------- 1 file changed, 41 insertions(+), 38 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c950938ef612..60c435774db8 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9472,14 +9472,15 @@ static void nft_chain_commit_drop_policy(struct nft_trans *trans) static void nft_chain_commit_update(struct nft_trans *trans) { + struct nft_table *table = trans->ctx.table; struct nft_base_chain *basechain; if (nft_trans_chain_name(trans)) { - rhltable_remove(&trans->ctx.table->chains_ht, + rhltable_remove(&table->chains_ht, &trans->ctx.chain->rhlhead, nft_chain_ht_params); swap(trans->ctx.chain->name, nft_trans_chain_name(trans)); - rhltable_insert_key(&trans->ctx.table->chains_ht, + rhltable_insert_key(&table->chains_ht, trans->ctx.chain->name, &trans->ctx.chain->rhlhead, nft_chain_ht_params); @@ -10237,9 +10238,10 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) /* 1. Allocate space for next generation rules_gen_X[] */ list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) { + struct nft_table *table = trans->ctx.table; int ret; - ret = nf_tables_commit_audit_alloc(&adl, trans->ctx.table); + ret = nf_tables_commit_audit_alloc(&adl, table); if (ret) { nf_tables_commit_chain_prepare_cancel(net); nf_tables_commit_audit_free(&adl); @@ -10280,28 +10282,29 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) net->nft.gencursor = nft_gencursor_next(net); list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) { - nf_tables_commit_audit_collect(&adl, trans->ctx.table, - trans->msg_type); + struct nft_table *table = trans->ctx.table; + + nf_tables_commit_audit_collect(&adl, table, trans->msg_type); switch (trans->msg_type) { case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { - if (!(trans->ctx.table->flags & __NFT_TABLE_F_UPDATE)) { + if (!(table->flags & __NFT_TABLE_F_UPDATE)) { nft_trans_destroy(trans); break; } - if (trans->ctx.table->flags & NFT_TABLE_F_DORMANT) - nf_tables_table_disable(net, trans->ctx.table); + if (table->flags & NFT_TABLE_F_DORMANT) + nf_tables_table_disable(net, table); - trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE; + table->flags &= ~__NFT_TABLE_F_UPDATE; } else { - nft_clear(net, trans->ctx.table); + nft_clear(net, table); } nf_tables_table_notify(&trans->ctx, NFT_MSG_NEWTABLE); nft_trans_destroy(trans); break; case NFT_MSG_DELTABLE: case NFT_MSG_DESTROYTABLE: - list_del_rcu(&trans->ctx.table->list); + list_del_rcu(&table->list); nf_tables_table_notify(&trans->ctx, trans->msg_type); break; case NFT_MSG_NEWCHAIN: @@ -10324,7 +10327,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) if (nft_trans_chain_update(trans)) { nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN, &nft_trans_chain_hooks(trans)); - if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT)) { + if (!(table->flags & NFT_TABLE_F_DORMANT)) { nft_netdev_unregister_hooks(net, &nft_trans_chain_hooks(trans), true); @@ -10333,8 +10336,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nft_chain_del(trans->ctx.chain); nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN, NULL); - nf_tables_unregister_hook(trans->ctx.net, - trans->ctx.table, + nf_tables_unregister_hook(trans->ctx.net, table, trans->ctx.chain); } break; @@ -10377,7 +10379,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) */ if (nft_set_is_anonymous(nft_trans_set(trans)) && !list_empty(&nft_trans_set(trans)->bindings)) - nft_use_dec(&trans->ctx.table->use); + nft_use_dec(&table->use); } nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), NFT_MSG_NEWSET, GFP_KERNEL); @@ -10575,37 +10577,39 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list, list) { + struct nft_table *table = trans->ctx.table; + switch (trans->msg_type) { case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { - if (!(trans->ctx.table->flags & __NFT_TABLE_F_UPDATE)) { + if (!(table->flags & __NFT_TABLE_F_UPDATE)) { nft_trans_destroy(trans); break; } - if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_DORMANT) { - nf_tables_table_disable(net, trans->ctx.table); - trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; - } else if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_AWAKEN) { - trans->ctx.table->flags &= ~NFT_TABLE_F_DORMANT; + if (table->flags & __NFT_TABLE_F_WAS_DORMANT) { + nf_tables_table_disable(net, table); + table->flags |= NFT_TABLE_F_DORMANT; + } else if (table->flags & __NFT_TABLE_F_WAS_AWAKEN) { + table->flags &= ~NFT_TABLE_F_DORMANT; } - if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_ORPHAN) { - trans->ctx.table->flags &= ~NFT_TABLE_F_OWNER; - trans->ctx.table->nlpid = 0; + if (table->flags & __NFT_TABLE_F_WAS_ORPHAN) { + table->flags &= ~NFT_TABLE_F_OWNER; + table->nlpid = 0; } - trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE; + table->flags &= ~__NFT_TABLE_F_UPDATE; nft_trans_destroy(trans); } else { - list_del_rcu(&trans->ctx.table->list); + list_del_rcu(&table->list); } break; case NFT_MSG_DELTABLE: case NFT_MSG_DESTROYTABLE: - nft_clear(trans->ctx.net, trans->ctx.table); + nft_clear(trans->ctx.net, table); nft_trans_destroy(trans); break; case NFT_MSG_NEWCHAIN: if (nft_trans_chain_update(trans)) { - if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT)) { + if (!(table->flags & NFT_TABLE_F_DORMANT)) { nft_netdev_unregister_hooks(net, &nft_trans_chain_hooks(trans), true); @@ -10618,10 +10622,9 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_trans_destroy(trans); break; } - nft_use_dec_restore(&trans->ctx.table->use); + nft_use_dec_restore(&table->use); nft_chain_del(trans->ctx.chain); - nf_tables_unregister_hook(trans->ctx.net, - trans->ctx.table, + nf_tables_unregister_hook(trans->ctx.net, table, trans->ctx.chain); } break; @@ -10631,7 +10634,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) list_splice(&nft_trans_chain_hooks(trans), &nft_trans_basechain(trans)->hook_list); } else { - nft_use_inc_restore(&trans->ctx.table->use); + nft_use_inc_restore(&table->use); nft_clear(trans->ctx.net, trans->ctx.chain); } nft_trans_destroy(trans); @@ -10664,7 +10667,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_trans_destroy(trans); break; } - nft_use_dec_restore(&trans->ctx.table->use); + nft_use_dec_restore(&table->use); if (nft_trans_set_bound(trans)) { nft_trans_destroy(trans); break; @@ -10674,7 +10677,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; case NFT_MSG_DELSET: case NFT_MSG_DESTROYSET: - nft_use_inc_restore(&trans->ctx.table->use); + nft_use_inc_restore(&table->use); nft_clear(trans->ctx.net, nft_trans_set(trans)); if (nft_trans_set(trans)->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) nft_map_activate(&trans->ctx, nft_trans_set(trans)); @@ -10720,13 +10723,13 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_obj_destroy(&trans->ctx, nft_trans_obj_newobj(trans)); nft_trans_destroy(trans); } else { - nft_use_dec_restore(&trans->ctx.table->use); + nft_use_dec_restore(&table->use); nft_obj_del(nft_trans_obj(trans)); } break; case NFT_MSG_DELOBJ: case NFT_MSG_DESTROYOBJ: - nft_use_inc_restore(&trans->ctx.table->use); + nft_use_inc_restore(&table->use); nft_clear(trans->ctx.net, nft_trans_obj(trans)); nft_trans_destroy(trans); break; @@ -10735,7 +10738,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_unregister_flowtable_net_hooks(net, &nft_trans_flowtable_hooks(trans)); } else { - nft_use_dec_restore(&trans->ctx.table->use); + nft_use_dec_restore(&table->use); list_del_rcu(&nft_trans_flowtable(trans)->list); nft_unregister_flowtable_net_hooks(net, &nft_trans_flowtable(trans)->hook_list); @@ -10747,7 +10750,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) list_splice(&nft_trans_flowtable_hooks(trans), &nft_trans_flowtable(trans)->hook_list); } else { - nft_use_inc_restore(&trans->ctx.table->use); + nft_use_inc_restore(&table->use); nft_clear(trans->ctx.net, nft_trans_flowtable(trans)); } nft_trans_destroy(trans); -- cgit v1.2.3-58-ga151 From 8965d42bcf54d42cbc72fe34a9d0ec3f8527debd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 13 May 2024 15:00:45 +0200 Subject: netfilter: nf_tables: pass nft_chain to destroy function, not nft_ctx It would be better to not store nft_ctx inside nft_trans object, the netlink ctx strucutre is huge and most of its information is never needed in places that use trans->ctx. Avoid/reduce its usage if possible, no runtime behaviour change intended. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- net/netfilter/nf_tables_api.c | 17 ++++++++--------- net/netfilter/nft_immediate.c | 2 +- 3 files changed, 10 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 1f0607b671ac..328fdc140551 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1171,7 +1171,7 @@ static inline bool nft_chain_is_bound(struct nft_chain *chain) int nft_chain_add(struct nft_table *table, struct nft_chain *chain); void nft_chain_del(struct nft_chain *chain); -void nf_tables_chain_destroy(struct nft_ctx *ctx); +void nf_tables_chain_destroy(struct nft_chain *chain); struct nft_stats { u64 bytes; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 60c435774db8..bdc2d7f781ca 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2118,9 +2118,9 @@ static void nf_tables_chain_free_chain_rules(struct nft_chain *chain) kvfree(chain->blob_next); } -void nf_tables_chain_destroy(struct nft_ctx *ctx) +void nf_tables_chain_destroy(struct nft_chain *chain) { - struct nft_chain *chain = ctx->chain; + const struct nft_table *table = chain->table; struct nft_hook *hook, *next; if (WARN_ON(chain->use > 0)) @@ -2132,7 +2132,7 @@ void nf_tables_chain_destroy(struct nft_ctx *ctx) if (nft_is_base_chain(chain)) { struct nft_base_chain *basechain = nft_base_chain(chain); - if (nft_base_chain_netdev(ctx->family, basechain->ops.hooknum)) { + if (nft_base_chain_netdev(table->family, basechain->ops.hooknum)) { list_for_each_entry_safe(hook, next, &basechain->hook_list, list) { list_del_rcu(&hook->list); @@ -2621,7 +2621,7 @@ err_chain_add: err_trans: nft_use_dec_restore(&table->use); err_destroy_chain: - nf_tables_chain_destroy(ctx); + nf_tables_chain_destroy(chain); return err; } @@ -9532,7 +9532,7 @@ static void nft_commit_release(struct nft_trans *trans) if (nft_trans_chain_update(trans)) nft_hooks_destroy(&nft_trans_chain_hooks(trans)); else - nf_tables_chain_destroy(&trans->ctx); + nf_tables_chain_destroy(nft_trans_chain(trans)); break; case NFT_MSG_DELRULE: case NFT_MSG_DESTROYRULE: @@ -10524,7 +10524,7 @@ static void nf_tables_abort_release(struct nft_trans *trans) if (nft_trans_chain_update(trans)) nft_hooks_destroy(&nft_trans_chain_hooks(trans)); else - nf_tables_chain_destroy(&trans->ctx); + nf_tables_chain_destroy(nft_trans_chain(trans)); break; case NFT_MSG_NEWRULE: nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); @@ -11411,7 +11411,7 @@ int __nft_release_basechain(struct nft_ctx *ctx) } nft_chain_del(ctx->chain); nft_use_dec(&ctx->table->use); - nf_tables_chain_destroy(ctx); + nf_tables_chain_destroy(ctx->chain); return 0; } @@ -11486,10 +11486,9 @@ static void __nft_release_table(struct net *net, struct nft_table *table) nft_obj_destroy(&ctx, obj); } list_for_each_entry_safe(chain, nc, &table->chains, list) { - ctx.chain = chain; nft_chain_del(chain); nft_use_dec(&table->use); - nf_tables_chain_destroy(&ctx); + nf_tables_chain_destroy(chain); } nf_tables_table_destroy(&ctx); } diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 6475c7abc1fe..ac2422c215e5 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -221,7 +221,7 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx, list_del(&rule->list); nf_tables_rule_destroy(&chain_ctx, rule); } - nf_tables_chain_destroy(&chain_ctx); + nf_tables_chain_destroy(chain); break; default: break; -- cgit v1.2.3-58-ga151 From 0c2e0ee861de071c2e02c44dda5c44329ea8394d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 13 May 2024 15:00:46 +0200 Subject: netfilter: nf_tables: pass more specific nft_trans_chain where possible These functions pass a pointer to the base object type, use the more specific one. No functional change intended. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 51 ++++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index bdc2d7f781ca..62a4da955574 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -448,6 +448,7 @@ nft_trans_alloc_chain(const struct nft_ctx *ctx, int msg_type) trans_chain = nft_trans_container_chain(trans); INIT_LIST_HEAD(&trans_chain->nft_trans_binding.binding_list); + trans_chain->chain = ctx->chain; return trans; } @@ -468,7 +469,6 @@ static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type) ntohl(nla_get_be32(ctx->nla[NFTA_CHAIN_ID])); } } - nft_trans_chain(trans) = ctx->chain; nft_trans_commit_list_add_tail(ctx->net, trans); return trans; @@ -2089,18 +2089,19 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr) return newstats; } -static void nft_chain_stats_replace(struct nft_trans *trans) +static void nft_chain_stats_replace(struct nft_trans_chain *trans) { - struct nft_base_chain *chain = nft_base_chain(trans->ctx.chain); + const struct nft_trans *t = &trans->nft_trans_binding.nft_trans; + struct nft_base_chain *chain = nft_base_chain(trans->chain); - if (!nft_trans_chain_stats(trans)) + if (!trans->stats) return; - nft_trans_chain_stats(trans) = - rcu_replace_pointer(chain->stats, nft_trans_chain_stats(trans), - lockdep_commit_lock_is_held(trans->ctx.net)); + trans->stats = + rcu_replace_pointer(chain->stats, trans->stats, + lockdep_commit_lock_is_held(t->ctx.net)); - if (!nft_trans_chain_stats(trans)) + if (!trans->stats) static_branch_inc(&nft_counters_enabled); } @@ -9456,47 +9457,47 @@ static int nf_tables_validate(struct net *net) * * We defer the drop policy until the transaction has been finalized. */ -static void nft_chain_commit_drop_policy(struct nft_trans *trans) +static void nft_chain_commit_drop_policy(struct nft_trans_chain *trans) { struct nft_base_chain *basechain; - if (nft_trans_chain_policy(trans) != NF_DROP) + if (trans->policy != NF_DROP) return; - if (!nft_is_base_chain(trans->ctx.chain)) + if (!nft_is_base_chain(trans->chain)) return; - basechain = nft_base_chain(trans->ctx.chain); + basechain = nft_base_chain(trans->chain); basechain->policy = NF_DROP; } -static void nft_chain_commit_update(struct nft_trans *trans) +static void nft_chain_commit_update(struct nft_trans_chain *trans) { - struct nft_table *table = trans->ctx.table; + struct nft_table *table = trans->nft_trans_binding.nft_trans.ctx.table; struct nft_base_chain *basechain; - if (nft_trans_chain_name(trans)) { + if (trans->name) { rhltable_remove(&table->chains_ht, - &trans->ctx.chain->rhlhead, + &trans->chain->rhlhead, nft_chain_ht_params); - swap(trans->ctx.chain->name, nft_trans_chain_name(trans)); + swap(trans->chain->name, trans->name); rhltable_insert_key(&table->chains_ht, - trans->ctx.chain->name, - &trans->ctx.chain->rhlhead, + trans->chain->name, + &trans->chain->rhlhead, nft_chain_ht_params); } - if (!nft_is_base_chain(trans->ctx.chain)) + if (!nft_is_base_chain(trans->chain)) return; nft_chain_stats_replace(trans); - basechain = nft_base_chain(trans->ctx.chain); + basechain = nft_base_chain(trans->chain); - switch (nft_trans_chain_policy(trans)) { + switch (trans->policy) { case NF_DROP: case NF_ACCEPT: - basechain->policy = nft_trans_chain_policy(trans); + basechain->policy = trans->policy; break; } } @@ -10309,14 +10310,14 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) break; case NFT_MSG_NEWCHAIN: if (nft_trans_chain_update(trans)) { - nft_chain_commit_update(trans); + nft_chain_commit_update(nft_trans_container_chain(trans)); nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN, &nft_trans_chain_hooks(trans)); list_splice(&nft_trans_chain_hooks(trans), &nft_trans_basechain(trans)->hook_list); /* trans destroyed after rcu grace period */ } else { - nft_chain_commit_drop_policy(trans); + nft_chain_commit_drop_policy(nft_trans_container_chain(trans)); nft_clear(net, trans->ctx.chain); nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN, NULL); nft_trans_destroy(trans); -- cgit v1.2.3-58-ga151 From d4f6f3994e133ff49e89d7e108500f45224a79e1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 13 May 2024 15:00:47 +0200 Subject: netfilter: nf_tables: avoid usage of embedded nft_ctx nft_ctx is stored in nft_trans object, but nft_ctx is large (48 bytes on 64-bit platforms), it should not be embedded in the transaction structures. Reduce its usage so we can remove it eventually. This replaces trans->ctx.chain with the chain pointer already available in nft_trans_chain structure. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 10 +++++----- net/netfilter/nf_tables_offload.c | 16 ++++++++-------- 2 files changed, 13 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 62a4da955574..f4e39816104f 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9682,10 +9682,10 @@ static void nf_tables_commit_chain_prepare_cancel(struct net *net) struct nft_trans *trans, *next; list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) { - struct nft_chain *chain = trans->ctx.chain; - if (trans->msg_type == NFT_MSG_NEWRULE || trans->msg_type == NFT_MSG_DELRULE) { + struct nft_chain *chain = trans->ctx.chain; + kvfree(chain->blob_next); chain->blob_next = NULL; } @@ -10318,7 +10318,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) /* trans destroyed after rcu grace period */ } else { nft_chain_commit_drop_policy(nft_trans_container_chain(trans)); - nft_clear(net, trans->ctx.chain); + nft_clear(net, nft_trans_chain(trans)); nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN, NULL); nft_trans_destroy(trans); } @@ -10334,11 +10334,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) true); } } else { - nft_chain_del(trans->ctx.chain); + nft_chain_del(nft_trans_chain(trans)); nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN, NULL); nf_tables_unregister_hook(trans->ctx.net, table, - trans->ctx.chain); + nft_trans_chain(trans)); } break; case NFT_MSG_NEWRULE: diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index 12ab78fa5d84..8d892a0d2438 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -518,18 +518,18 @@ static void nft_flow_rule_offload_abort(struct net *net, switch (trans->msg_type) { case NFT_MSG_NEWCHAIN: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) || + if (!(nft_trans_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) || nft_trans_chain_update(trans)) continue; - err = nft_flow_offload_chain(trans->ctx.chain, NULL, + err = nft_flow_offload_chain(nft_trans_chain(trans), NULL, FLOW_BLOCK_UNBIND); break; case NFT_MSG_DELCHAIN: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) + if (!(nft_trans_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; - err = nft_flow_offload_chain(trans->ctx.chain, NULL, + err = nft_flow_offload_chain(nft_trans_chain(trans), NULL, FLOW_BLOCK_BIND); break; case NFT_MSG_NEWRULE: @@ -569,20 +569,20 @@ int nft_flow_rule_offload_commit(struct net *net) switch (trans->msg_type) { case NFT_MSG_NEWCHAIN: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) || + if (!(nft_trans_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) || nft_trans_chain_update(trans)) continue; policy = nft_trans_chain_policy(trans); - err = nft_flow_offload_chain(trans->ctx.chain, &policy, + err = nft_flow_offload_chain(nft_trans_chain(trans), &policy, FLOW_BLOCK_BIND); break; case NFT_MSG_DELCHAIN: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) + if (!(nft_trans_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; policy = nft_trans_chain_policy(trans); - err = nft_flow_offload_chain(trans->ctx.chain, &policy, + err = nft_flow_offload_chain(nft_trans_chain(trans), &policy, FLOW_BLOCK_UNBIND); break; case NFT_MSG_NEWRULE: -- cgit v1.2.3-58-ga151 From 13f20bc9ec4f9f25935bf52337d3d1708787bd55 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 24 Jun 2024 20:57:03 +0200 Subject: netfilter: nf_tables: store chain pointer in rule transaction Currently the chain can be derived from trans->ctx.chain, but the ctx will go away soon. Thus add the chain pointer to nft_trans_rule structure itself. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 3 +++ net/netfilter/nf_tables_api.c | 21 +++++++++++---------- net/netfilter/nf_tables_offload.c | 16 ++++++++-------- 3 files changed, 22 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 328fdc140551..86e6bd63a205 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1640,6 +1640,7 @@ struct nft_trans_binding { struct nft_trans_rule { struct nft_trans nft_trans; struct nft_rule *rule; + struct nft_chain *chain; struct nft_flow_rule *flow; u32 rule_id; bool bound; @@ -1655,6 +1656,8 @@ struct nft_trans_rule { nft_trans_container_rule(trans)->rule_id #define nft_trans_rule_bound(trans) \ nft_trans_container_rule(trans)->bound +#define nft_trans_rule_chain(trans) \ + nft_trans_container_rule(trans)->chain struct nft_trans_set { struct nft_trans_binding nft_trans_binding; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f4e39816104f..3e5980f0bf71 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -251,7 +251,7 @@ static void __nft_chain_trans_bind(const struct nft_ctx *ctx, nft_trans_chain_bound(trans) = bind; break; case NFT_MSG_NEWRULE: - if (trans->ctx.chain == chain) + if (nft_trans_rule_chain(trans) == chain) nft_trans_rule_bound(trans) = bind; break; } @@ -541,6 +541,7 @@ static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type, ntohl(nla_get_be32(ctx->nla[NFTA_RULE_ID])); } nft_trans_rule(trans) = rule; + nft_trans_rule_chain(trans) = ctx->chain; nft_trans_commit_list_add_tail(ctx->net, trans); return trans; @@ -4227,7 +4228,7 @@ static struct nft_rule *nft_rule_lookup_byid(const struct net *net, list_for_each_entry(trans, &nft_net->commit_list, list) { if (trans->msg_type == NFT_MSG_NEWRULE && - trans->ctx.chain == chain && + nft_trans_rule_chain(trans) == chain && id == nft_trans_rule_id(trans)) return nft_trans_rule(trans); } @@ -9684,7 +9685,7 @@ static void nf_tables_commit_chain_prepare_cancel(struct net *net) list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) { if (trans->msg_type == NFT_MSG_NEWRULE || trans->msg_type == NFT_MSG_DELRULE) { - struct nft_chain *chain = trans->ctx.chain; + struct nft_chain *chain = nft_trans_rule_chain(trans); kvfree(chain->blob_next); chain->blob_next = NULL; @@ -10250,7 +10251,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) } if (trans->msg_type == NFT_MSG_NEWRULE || trans->msg_type == NFT_MSG_DELRULE) { - chain = trans->ctx.chain; + chain = nft_trans_rule_chain(trans); ret = nf_tables_commit_chain_prepare(net, chain); if (ret < 0) { @@ -10346,7 +10347,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_rule_notify(&trans->ctx, nft_trans_rule(trans), NFT_MSG_NEWRULE); - if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) + if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) nft_flow_rule_destroy(nft_trans_flow_rule(trans)); nft_trans_destroy(trans); @@ -10361,7 +10362,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nft_trans_rule(trans), NFT_TRANS_COMMIT); - if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) + if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) nft_flow_rule_destroy(nft_trans_flow_rule(trans)); break; case NFT_MSG_NEWSET: @@ -10645,20 +10646,20 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_trans_destroy(trans); break; } - nft_use_dec_restore(&trans->ctx.chain->use); + nft_use_dec_restore(&nft_trans_rule_chain(trans)->use); list_del_rcu(&nft_trans_rule(trans)->list); nft_rule_expr_deactivate(&trans->ctx, nft_trans_rule(trans), NFT_TRANS_ABORT); - if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) + if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) nft_flow_rule_destroy(nft_trans_flow_rule(trans)); break; case NFT_MSG_DELRULE: case NFT_MSG_DESTROYRULE: - nft_use_inc_restore(&trans->ctx.chain->use); + nft_use_inc_restore(&nft_trans_rule_chain(trans)->use); nft_clear(trans->ctx.net, nft_trans_rule(trans)); nft_rule_expr_activate(&trans->ctx, nft_trans_rule(trans)); - if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) + if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) nft_flow_rule_destroy(nft_trans_flow_rule(trans)); nft_trans_destroy(trans); diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index 8d892a0d2438..0619feb10abb 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -533,18 +533,18 @@ static void nft_flow_rule_offload_abort(struct net *net, FLOW_BLOCK_BIND); break; case NFT_MSG_NEWRULE: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) + if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; - err = nft_flow_offload_rule(trans->ctx.chain, + err = nft_flow_offload_rule(nft_trans_rule_chain(trans), nft_trans_rule(trans), NULL, FLOW_CLS_DESTROY); break; case NFT_MSG_DELRULE: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) + if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; - err = nft_flow_offload_rule(trans->ctx.chain, + err = nft_flow_offload_rule(nft_trans_rule_chain(trans), nft_trans_rule(trans), nft_trans_flow_rule(trans), FLOW_CLS_REPLACE); @@ -586,7 +586,7 @@ int nft_flow_rule_offload_commit(struct net *net) FLOW_BLOCK_UNBIND); break; case NFT_MSG_NEWRULE: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) + if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; if (trans->ctx.flags & NLM_F_REPLACE || @@ -594,16 +594,16 @@ int nft_flow_rule_offload_commit(struct net *net) err = -EOPNOTSUPP; break; } - err = nft_flow_offload_rule(trans->ctx.chain, + err = nft_flow_offload_rule(nft_trans_rule_chain(trans), nft_trans_rule(trans), nft_trans_flow_rule(trans), FLOW_CLS_REPLACE); break; case NFT_MSG_DELRULE: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) + if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; - err = nft_flow_offload_rule(trans->ctx.chain, + err = nft_flow_offload_rule(nft_trans_rule_chain(trans), nft_trans_rule(trans), NULL, FLOW_CLS_DESTROY); break; -- cgit v1.2.3-58-ga151 From 551b3886401c3b25fd7a3d5ae31fb8ae29423cc4 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 13 May 2024 15:00:49 +0200 Subject: netfilter: nf_tables: reduce trans->ctx.chain references These objects are the trans_chain subtype, so use the helper instead of referencing trans->ctx, which will be removed soon. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 3e5980f0bf71..bd311b37fc61 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1262,7 +1262,7 @@ static bool nft_table_pending_update(const struct nft_ctx *ctx) ((trans->msg_type == NFT_MSG_NEWCHAIN && nft_trans_chain_update(trans)) || (trans->msg_type == NFT_MSG_DELCHAIN && - nft_is_base_chain(trans->ctx.chain)))) + nft_is_base_chain(nft_trans_chain(trans))))) return true; } @@ -2815,13 +2815,11 @@ static struct nft_chain *nft_chain_lookup_byid(const struct net *net, struct nft_trans *trans; list_for_each_entry(trans, &nft_net->commit_list, list) { - struct nft_chain *chain = trans->ctx.chain; - if (trans->msg_type == NFT_MSG_NEWCHAIN && - chain->table == table && + nft_trans_chain(trans)->table == table && id == nft_trans_chain_id(trans) && - nft_active_genmask(chain, genmask)) - return chain; + nft_active_genmask(nft_trans_chain(trans), genmask)) + return nft_trans_chain(trans); } return ERR_PTR(-ENOENT); } @@ -10625,9 +10623,9 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; } nft_use_dec_restore(&table->use); - nft_chain_del(trans->ctx.chain); + nft_chain_del(nft_trans_chain(trans)); nf_tables_unregister_hook(trans->ctx.net, table, - trans->ctx.chain); + nft_trans_chain(trans)); } break; case NFT_MSG_DELCHAIN: @@ -10637,7 +10635,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) &nft_trans_basechain(trans)->hook_list); } else { nft_use_inc_restore(&table->use); - nft_clear(trans->ctx.net, trans->ctx.chain); + nft_clear(trans->ctx.net, nft_trans_chain(trans)); } nft_trans_destroy(trans); break; -- cgit v1.2.3-58-ga151 From 0be908750162ed08ef0dfdec68e1619c8f693f83 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 13 May 2024 15:00:50 +0200 Subject: netfilter: nf_tables: pass nft_table to destroy function No functional change intended. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index bd311b37fc61..6958f922f95a 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1656,15 +1656,15 @@ static int nf_tables_deltable(struct sk_buff *skb, const struct nfnl_info *info, return nft_flush_table(&ctx); } -static void nf_tables_table_destroy(struct nft_ctx *ctx) +static void nf_tables_table_destroy(struct nft_table *table) { - if (WARN_ON(ctx->table->use > 0)) + if (WARN_ON(table->use > 0)) return; - rhltable_destroy(&ctx->table->chains_ht); - kfree(ctx->table->name); - kfree(ctx->table->udata); - kfree(ctx->table); + rhltable_destroy(&table->chains_ht); + kfree(table->name); + kfree(table->udata); + kfree(table); } void nft_register_chain_type(const struct nft_chain_type *ctype) @@ -9521,7 +9521,7 @@ static void nft_commit_release(struct nft_trans *trans) switch (trans->msg_type) { case NFT_MSG_DELTABLE: case NFT_MSG_DESTROYTABLE: - nf_tables_table_destroy(&trans->ctx); + nf_tables_table_destroy(trans->ctx.table); break; case NFT_MSG_NEWCHAIN: free_percpu(nft_trans_chain_stats(trans)); @@ -10518,7 +10518,7 @@ static void nf_tables_abort_release(struct nft_trans *trans) { switch (trans->msg_type) { case NFT_MSG_NEWTABLE: - nf_tables_table_destroy(&trans->ctx); + nf_tables_table_destroy(trans->ctx.table); break; case NFT_MSG_NEWCHAIN: if (nft_trans_chain_update(trans)) @@ -11490,7 +11490,7 @@ static void __nft_release_table(struct net *net, struct nft_table *table) nft_use_dec(&table->use); nf_tables_chain_destroy(chain); } - nf_tables_table_destroy(&ctx); + nf_tables_table_destroy(table); } static void __nft_release_tables(struct net *net) -- cgit v1.2.3-58-ga151 From e169285f8c56b8d5702475de0582dc83650c6cee Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 13 May 2024 15:00:51 +0200 Subject: netfilter: nf_tables: do not store nft_ctx in transaction objects nft_ctx is huge and most of the information stored within isn't used at all. Remove nft_ctx member from the base transaction structure and store only what is needed. After this change, relevant struct sizes are: struct nft_trans_chain { /* size: 120 (-32), cachelines: 2, members: 10 */ struct nft_trans_elem { /* size: 72 (-40), cachelines: 2, members: 4 */ struct nft_trans_flowtable { /* size: 80 (-48), cachelines: 2, members: 5 */ struct nft_trans_obj { /* size: 72 (-40), cachelines: 2, members: 4 */ struct nft_trans_rule { /* size: 80 (-32), cachelines: 2, members: 6 */ struct nft_trans_set { /* size: 96 (-24), cachelines: 2, members: 8 */ struct nft_trans_table { /* size: 56 (-40), cachelines: 1, members: 2 */ struct nft_trans_elem can now be allocated from kmalloc-96 instead of kmalloc-128 slab. A further reduction by 8 bytes would even allow for kmalloc-64. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 43 ++++++++++-- net/netfilter/nf_tables_api.c | 140 ++++++++++++++++++++++---------------- net/netfilter/nf_tables_offload.c | 8 +-- 3 files changed, 125 insertions(+), 66 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 86e6bd63a205..1e8da1b882ac 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1611,18 +1611,26 @@ static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext) * struct nft_trans - nf_tables object update in transaction * * @list: used internally + * @net: struct net + * @table: struct nft_table the object resides in * @msg_type: message type - * @put_net: ctx->net needs to be put - * @ctx: transaction context + * @seq: netlink sequence number + * @flags: modifiers to new request + * @report: notify via unicast netlink message + * @put_net: net needs to be put * * This is the information common to all objects in the transaction, * this must always be the first member of derived sub-types. */ struct nft_trans { struct list_head list; + struct net *net; + struct nft_table *table; int msg_type; - bool put_net; - struct nft_ctx ctx; + u32 seq; + u16 flags; + u8 report:1; + u8 put_net:1; }; /** @@ -1794,6 +1802,33 @@ struct nft_trans_gc { struct rcu_head rcu; }; +static inline void nft_ctx_update(struct nft_ctx *ctx, + const struct nft_trans *trans) +{ + switch (trans->msg_type) { + case NFT_MSG_NEWRULE: + case NFT_MSG_DELRULE: + case NFT_MSG_DESTROYRULE: + ctx->chain = nft_trans_rule_chain(trans); + break; + case NFT_MSG_NEWCHAIN: + case NFT_MSG_DELCHAIN: + case NFT_MSG_DESTROYCHAIN: + ctx->chain = nft_trans_chain(trans); + break; + default: + ctx->chain = NULL; + break; + } + + ctx->net = trans->net; + ctx->table = trans->table; + ctx->family = trans->table->family; + ctx->report = trans->report; + ctx->flags = trans->flags; + ctx->seq = trans->seq; +} + struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set, unsigned int gc_seq, gfp_t gfp); void nft_trans_gc_destroy(struct nft_trans_gc *trans); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 6958f922f95a..02d75aefaa8e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -159,7 +159,12 @@ static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx, INIT_LIST_HEAD(&trans->list); trans->msg_type = msg_type; - trans->ctx = *ctx; + + trans->net = ctx->net; + trans->table = ctx->table; + trans->seq = ctx->seq; + trans->flags = ctx->flags; + trans->report = ctx->report; return trans; } @@ -1258,7 +1263,7 @@ static bool nft_table_pending_update(const struct nft_ctx *ctx) return true; list_for_each_entry(trans, &nft_net->commit_list, list) { - if (trans->ctx.table == ctx->table && + if (trans->table == ctx->table && ((trans->msg_type == NFT_MSG_NEWCHAIN && nft_trans_chain_update(trans)) || (trans->msg_type == NFT_MSG_DELCHAIN && @@ -2100,7 +2105,7 @@ static void nft_chain_stats_replace(struct nft_trans_chain *trans) trans->stats = rcu_replace_pointer(chain->stats, trans->stats, - lockdep_commit_lock_is_held(t->ctx.net)); + lockdep_commit_lock_is_held(t->net)); if (!trans->stats) static_branch_inc(&nft_counters_enabled); @@ -2766,7 +2771,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, err = -EEXIST; list_for_each_entry(tmp, &nft_net->commit_list, list) { if (tmp->msg_type == NFT_MSG_NEWCHAIN && - tmp->ctx.table == table && + tmp->table == table && nft_trans_chain_update(tmp) && nft_trans_chain_name(tmp) && strcmp(name, nft_trans_chain_name(tmp)) == 0) { @@ -9472,7 +9477,7 @@ static void nft_chain_commit_drop_policy(struct nft_trans_chain *trans) static void nft_chain_commit_update(struct nft_trans_chain *trans) { - struct nft_table *table = trans->nft_trans_binding.nft_trans.ctx.table; + struct nft_table *table = trans->nft_trans_binding.nft_trans.table; struct nft_base_chain *basechain; if (trans->name) { @@ -9501,7 +9506,8 @@ static void nft_chain_commit_update(struct nft_trans_chain *trans) } } -static void nft_obj_commit_update(struct nft_trans *trans) +static void nft_obj_commit_update(const struct nft_ctx *ctx, + struct nft_trans *trans) { struct nft_object *newobj; struct nft_object *obj; @@ -9513,15 +9519,21 @@ static void nft_obj_commit_update(struct nft_trans *trans) return; obj->ops->update(obj, newobj); - nft_obj_destroy(&trans->ctx, newobj); + nft_obj_destroy(ctx, newobj); } static void nft_commit_release(struct nft_trans *trans) { + struct nft_ctx ctx = { + .net = trans->net, + }; + + nft_ctx_update(&ctx, trans); + switch (trans->msg_type) { case NFT_MSG_DELTABLE: case NFT_MSG_DESTROYTABLE: - nf_tables_table_destroy(trans->ctx.table); + nf_tables_table_destroy(trans->table); break; case NFT_MSG_NEWCHAIN: free_percpu(nft_trans_chain_stats(trans)); @@ -9536,21 +9548,21 @@ static void nft_commit_release(struct nft_trans *trans) break; case NFT_MSG_DELRULE: case NFT_MSG_DESTROYRULE: - nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); + nf_tables_rule_destroy(&ctx, nft_trans_rule(trans)); break; case NFT_MSG_DELSET: case NFT_MSG_DESTROYSET: - nft_set_destroy(&trans->ctx, nft_trans_set(trans)); + nft_set_destroy(&ctx, nft_trans_set(trans)); break; case NFT_MSG_DELSETELEM: case NFT_MSG_DESTROYSETELEM: - nf_tables_set_elem_destroy(&trans->ctx, + nf_tables_set_elem_destroy(&ctx, nft_trans_elem_set(trans), nft_trans_elem_priv(trans)); break; case NFT_MSG_DELOBJ: case NFT_MSG_DESTROYOBJ: - nft_obj_destroy(&trans->ctx, nft_trans_obj(trans)); + nft_obj_destroy(&ctx, nft_trans_obj(trans)); break; case NFT_MSG_DELFLOWTABLE: case NFT_MSG_DESTROYFLOWTABLE: @@ -9562,7 +9574,7 @@ static void nft_commit_release(struct nft_trans *trans) } if (trans->put_net) - put_net(trans->ctx.net); + put_net(trans->net); kfree(trans); } @@ -10042,7 +10054,7 @@ static void nf_tables_commit_release(struct net *net) trans = list_last_entry(&nft_net->commit_list, struct nft_trans, list); - get_net(trans->ctx.net); + get_net(trans->net); WARN_ON_ONCE(trans->put_net); trans->put_net = true; @@ -10186,6 +10198,7 @@ static void nft_gc_seq_end(struct nftables_pernet *nft_net, unsigned int gc_seq) static int nf_tables_commit(struct net *net, struct sk_buff *skb) { struct nftables_pernet *nft_net = nft_pernet(net); + const struct nlmsghdr *nlh = nlmsg_hdr(skb); struct nft_trans_binding *trans_binding; struct nft_trans *trans, *next; unsigned int base_seq, gc_seq; @@ -10193,6 +10206,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) struct nft_trans_elem *te; struct nft_chain *chain; struct nft_table *table; + struct nft_ctx ctx; LIST_HEAD(adl); int err; @@ -10201,6 +10215,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) return 0; } + nft_ctx_init(&ctx, net, skb, nlh, NFPROTO_UNSPEC, NULL, NULL, NULL); + list_for_each_entry(trans_binding, &nft_net->binding_list, binding_list) { trans = &trans_binding->nft_trans; switch (trans->msg_type) { @@ -10238,7 +10254,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) /* 1. Allocate space for next generation rules_gen_X[] */ list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) { - struct nft_table *table = trans->ctx.table; + struct nft_table *table = trans->table; int ret; ret = nf_tables_commit_audit_alloc(&adl, table); @@ -10282,7 +10298,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) net->nft.gencursor = nft_gencursor_next(net); list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) { - struct nft_table *table = trans->ctx.table; + struct nft_table *table = trans->table; + + nft_ctx_update(&ctx, trans); nf_tables_commit_audit_collect(&adl, table, trans->msg_type); switch (trans->msg_type) { @@ -10299,18 +10317,18 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) } else { nft_clear(net, table); } - nf_tables_table_notify(&trans->ctx, NFT_MSG_NEWTABLE); + nf_tables_table_notify(&ctx, NFT_MSG_NEWTABLE); nft_trans_destroy(trans); break; case NFT_MSG_DELTABLE: case NFT_MSG_DESTROYTABLE: list_del_rcu(&table->list); - nf_tables_table_notify(&trans->ctx, trans->msg_type); + nf_tables_table_notify(&ctx, trans->msg_type); break; case NFT_MSG_NEWCHAIN: if (nft_trans_chain_update(trans)) { nft_chain_commit_update(nft_trans_container_chain(trans)); - nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN, + nf_tables_chain_notify(&ctx, NFT_MSG_NEWCHAIN, &nft_trans_chain_hooks(trans)); list_splice(&nft_trans_chain_hooks(trans), &nft_trans_basechain(trans)->hook_list); @@ -10318,14 +10336,14 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) } else { nft_chain_commit_drop_policy(nft_trans_container_chain(trans)); nft_clear(net, nft_trans_chain(trans)); - nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN, NULL); + nf_tables_chain_notify(&ctx, NFT_MSG_NEWCHAIN, NULL); nft_trans_destroy(trans); } break; case NFT_MSG_DELCHAIN: case NFT_MSG_DESTROYCHAIN: if (nft_trans_chain_update(trans)) { - nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN, + nf_tables_chain_notify(&ctx, NFT_MSG_DELCHAIN, &nft_trans_chain_hooks(trans)); if (!(table->flags & NFT_TABLE_F_DORMANT)) { nft_netdev_unregister_hooks(net, @@ -10334,16 +10352,15 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) } } else { nft_chain_del(nft_trans_chain(trans)); - nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN, + nf_tables_chain_notify(&ctx, NFT_MSG_DELCHAIN, NULL); - nf_tables_unregister_hook(trans->ctx.net, table, + nf_tables_unregister_hook(ctx.net, ctx.table, nft_trans_chain(trans)); } break; case NFT_MSG_NEWRULE: - nft_clear(trans->ctx.net, nft_trans_rule(trans)); - nf_tables_rule_notify(&trans->ctx, - nft_trans_rule(trans), + nft_clear(net, nft_trans_rule(trans)); + nf_tables_rule_notify(&ctx, nft_trans_rule(trans), NFT_MSG_NEWRULE); if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) nft_flow_rule_destroy(nft_trans_flow_rule(trans)); @@ -10353,11 +10370,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) case NFT_MSG_DELRULE: case NFT_MSG_DESTROYRULE: list_del_rcu(&nft_trans_rule(trans)->list); - nf_tables_rule_notify(&trans->ctx, - nft_trans_rule(trans), + nf_tables_rule_notify(&ctx, nft_trans_rule(trans), trans->msg_type); - nft_rule_expr_deactivate(&trans->ctx, - nft_trans_rule(trans), + nft_rule_expr_deactivate(&ctx, nft_trans_rule(trans), NFT_TRANS_COMMIT); if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) @@ -10381,7 +10396,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) !list_empty(&nft_trans_set(trans)->bindings)) nft_use_dec(&table->use); } - nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), + nf_tables_set_notify(&ctx, nft_trans_set(trans), NFT_MSG_NEWSET, GFP_KERNEL); nft_trans_destroy(trans); break; @@ -10389,14 +10404,14 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) case NFT_MSG_DESTROYSET: nft_trans_set(trans)->dead = 1; list_del_rcu(&nft_trans_set(trans)->list); - nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), + nf_tables_set_notify(&ctx, nft_trans_set(trans), trans->msg_type, GFP_KERNEL); break; case NFT_MSG_NEWSETELEM: te = nft_trans_container_elem(trans); nft_setelem_activate(net, te->set, te->elem_priv); - nf_tables_setelem_notify(&trans->ctx, te->set, + nf_tables_setelem_notify(&ctx, te->set, te->elem_priv, NFT_MSG_NEWSETELEM); if (te->set->ops->commit && @@ -10410,7 +10425,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) case NFT_MSG_DESTROYSETELEM: te = nft_trans_container_elem(trans); - nf_tables_setelem_notify(&trans->ctx, te->set, + nf_tables_setelem_notify(&ctx, te->set, te->elem_priv, trans->msg_type); nft_setelem_remove(net, te->set, te->elem_priv); @@ -10426,13 +10441,13 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) break; case NFT_MSG_NEWOBJ: if (nft_trans_obj_update(trans)) { - nft_obj_commit_update(trans); - nf_tables_obj_notify(&trans->ctx, + nft_obj_commit_update(&ctx, trans); + nf_tables_obj_notify(&ctx, nft_trans_obj(trans), NFT_MSG_NEWOBJ); } else { nft_clear(net, nft_trans_obj(trans)); - nf_tables_obj_notify(&trans->ctx, + nf_tables_obj_notify(&ctx, nft_trans_obj(trans), NFT_MSG_NEWOBJ); nft_trans_destroy(trans); @@ -10441,14 +10456,14 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) case NFT_MSG_DELOBJ: case NFT_MSG_DESTROYOBJ: nft_obj_del(nft_trans_obj(trans)); - nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans), + nf_tables_obj_notify(&ctx, nft_trans_obj(trans), trans->msg_type); break; case NFT_MSG_NEWFLOWTABLE: if (nft_trans_flowtable_update(trans)) { nft_trans_flowtable(trans)->data.flags = nft_trans_flowtable_flags(trans); - nf_tables_flowtable_notify(&trans->ctx, + nf_tables_flowtable_notify(&ctx, nft_trans_flowtable(trans), &nft_trans_flowtable_hooks(trans), NFT_MSG_NEWFLOWTABLE); @@ -10456,7 +10471,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) &nft_trans_flowtable(trans)->hook_list); } else { nft_clear(net, nft_trans_flowtable(trans)); - nf_tables_flowtable_notify(&trans->ctx, + nf_tables_flowtable_notify(&ctx, nft_trans_flowtable(trans), NULL, NFT_MSG_NEWFLOWTABLE); @@ -10466,7 +10481,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) case NFT_MSG_DELFLOWTABLE: case NFT_MSG_DESTROYFLOWTABLE: if (nft_trans_flowtable_update(trans)) { - nf_tables_flowtable_notify(&trans->ctx, + nf_tables_flowtable_notify(&ctx, nft_trans_flowtable(trans), &nft_trans_flowtable_hooks(trans), trans->msg_type); @@ -10474,7 +10489,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) &nft_trans_flowtable_hooks(trans)); } else { list_del_rcu(&nft_trans_flowtable(trans)->list); - nf_tables_flowtable_notify(&trans->ctx, + nf_tables_flowtable_notify(&ctx, nft_trans_flowtable(trans), NULL, trans->msg_type); @@ -10516,9 +10531,13 @@ static void nf_tables_module_autoload(struct net *net) static void nf_tables_abort_release(struct nft_trans *trans) { + struct nft_ctx ctx = { }; + + nft_ctx_update(&ctx, trans); + switch (trans->msg_type) { case NFT_MSG_NEWTABLE: - nf_tables_table_destroy(trans->ctx.table); + nf_tables_table_destroy(trans->table); break; case NFT_MSG_NEWCHAIN: if (nft_trans_chain_update(trans)) @@ -10527,17 +10546,17 @@ static void nf_tables_abort_release(struct nft_trans *trans) nf_tables_chain_destroy(nft_trans_chain(trans)); break; case NFT_MSG_NEWRULE: - nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); + nf_tables_rule_destroy(&ctx, nft_trans_rule(trans)); break; case NFT_MSG_NEWSET: - nft_set_destroy(&trans->ctx, nft_trans_set(trans)); + nft_set_destroy(&ctx, nft_trans_set(trans)); break; case NFT_MSG_NEWSETELEM: nft_set_elem_destroy(nft_trans_elem_set(trans), nft_trans_elem_priv(trans), true); break; case NFT_MSG_NEWOBJ: - nft_obj_destroy(&trans->ctx, nft_trans_obj(trans)); + nft_obj_destroy(&ctx, nft_trans_obj(trans)); break; case NFT_MSG_NEWFLOWTABLE: if (nft_trans_flowtable_update(trans)) @@ -10569,6 +10588,9 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) struct nft_trans *trans, *next; LIST_HEAD(set_update_list); struct nft_trans_elem *te; + struct nft_ctx ctx = { + .net = net, + }; int err = 0; if (action == NFNL_ABORT_VALIDATE && @@ -10577,7 +10599,9 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list, list) { - struct nft_table *table = trans->ctx.table; + struct nft_table *table = trans->table; + + nft_ctx_update(&ctx, trans); switch (trans->msg_type) { case NFT_MSG_NEWTABLE: @@ -10604,7 +10628,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; case NFT_MSG_DELTABLE: case NFT_MSG_DESTROYTABLE: - nft_clear(trans->ctx.net, table); + nft_clear(trans->net, table); nft_trans_destroy(trans); break; case NFT_MSG_NEWCHAIN: @@ -10624,7 +10648,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) } nft_use_dec_restore(&table->use); nft_chain_del(nft_trans_chain(trans)); - nf_tables_unregister_hook(trans->ctx.net, table, + nf_tables_unregister_hook(trans->net, table, nft_trans_chain(trans)); } break; @@ -10635,7 +10659,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) &nft_trans_basechain(trans)->hook_list); } else { nft_use_inc_restore(&table->use); - nft_clear(trans->ctx.net, nft_trans_chain(trans)); + nft_clear(trans->net, nft_trans_chain(trans)); } nft_trans_destroy(trans); break; @@ -10646,7 +10670,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) } nft_use_dec_restore(&nft_trans_rule_chain(trans)->use); list_del_rcu(&nft_trans_rule(trans)->list); - nft_rule_expr_deactivate(&trans->ctx, + nft_rule_expr_deactivate(&ctx, nft_trans_rule(trans), NFT_TRANS_ABORT); if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) @@ -10655,8 +10679,8 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) case NFT_MSG_DELRULE: case NFT_MSG_DESTROYRULE: nft_use_inc_restore(&nft_trans_rule_chain(trans)->use); - nft_clear(trans->ctx.net, nft_trans_rule(trans)); - nft_rule_expr_activate(&trans->ctx, nft_trans_rule(trans)); + nft_clear(trans->net, nft_trans_rule(trans)); + nft_rule_expr_activate(&ctx, nft_trans_rule(trans)); if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) nft_flow_rule_destroy(nft_trans_flow_rule(trans)); @@ -10678,9 +10702,9 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) case NFT_MSG_DELSET: case NFT_MSG_DESTROYSET: nft_use_inc_restore(&table->use); - nft_clear(trans->ctx.net, nft_trans_set(trans)); + nft_clear(trans->net, nft_trans_set(trans)); if (nft_trans_set(trans)->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) - nft_map_activate(&trans->ctx, nft_trans_set(trans)); + nft_map_activate(&ctx, nft_trans_set(trans)); nft_trans_destroy(trans); break; @@ -10720,7 +10744,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; case NFT_MSG_NEWOBJ: if (nft_trans_obj_update(trans)) { - nft_obj_destroy(&trans->ctx, nft_trans_obj_newobj(trans)); + nft_obj_destroy(&ctx, nft_trans_obj_newobj(trans)); nft_trans_destroy(trans); } else { nft_use_dec_restore(&table->use); @@ -10730,7 +10754,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) case NFT_MSG_DELOBJ: case NFT_MSG_DESTROYOBJ: nft_use_inc_restore(&table->use); - nft_clear(trans->ctx.net, nft_trans_obj(trans)); + nft_clear(trans->net, nft_trans_obj(trans)); nft_trans_destroy(trans); break; case NFT_MSG_NEWFLOWTABLE: @@ -10751,7 +10775,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) &nft_trans_flowtable(trans)->hook_list); } else { nft_use_inc_restore(&table->use); - nft_clear(trans->ctx.net, nft_trans_flowtable(trans)); + nft_clear(trans->net, nft_trans_flowtable(trans)); } nft_trans_destroy(trans); break; diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index 0619feb10abb..64675f1c7f29 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -513,7 +513,7 @@ static void nft_flow_rule_offload_abort(struct net *net, int err = 0; list_for_each_entry_continue_reverse(trans, &nft_net->commit_list, list) { - if (trans->ctx.family != NFPROTO_NETDEV) + if (trans->table->family != NFPROTO_NETDEV) continue; switch (trans->msg_type) { @@ -564,7 +564,7 @@ int nft_flow_rule_offload_commit(struct net *net) u8 policy; list_for_each_entry(trans, &nft_net->commit_list, list) { - if (trans->ctx.family != NFPROTO_NETDEV) + if (trans->table->family != NFPROTO_NETDEV) continue; switch (trans->msg_type) { @@ -589,8 +589,8 @@ int nft_flow_rule_offload_commit(struct net *net) if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; - if (trans->ctx.flags & NLM_F_REPLACE || - !(trans->ctx.flags & NLM_F_APPEND)) { + if (trans->flags & NLM_F_REPLACE || + !(trans->flags & NLM_F_APPEND)) { err = -EOPNOTSUPP; break; } -- cgit v1.2.3-58-ga151 From 53796b03295cf7ab1fc8600016fa6dfbf4a494a0 Mon Sep 17 00:00:00 2001 From: Ismael Luceno Date: Thu, 23 May 2024 18:54:44 +0200 Subject: ipvs: Avoid unnecessary calls to skb_is_gso_sctp In the context of the SCTP SNAT/DNAT handler, these calls can only return true. Fixes: e10d3ba4d434 ("ipvs: Fix checksumming on GSO of SCTP packets") Signed-off-by: Ismael Luceno Acked-by: Julian Anastasov Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_proto_sctp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 1e689c714127..83e452916403 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -126,7 +126,7 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, if (sctph->source != cp->vport || payload_csum || skb->ip_summed == CHECKSUM_PARTIAL) { sctph->source = cp->vport; - if (!skb_is_gso(skb) || !skb_is_gso_sctp(skb)) + if (!skb_is_gso(skb)) sctp_nat_csum(skb, sctph, sctphoff); } else { skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -175,7 +175,7 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, (skb->ip_summed == CHECKSUM_PARTIAL && !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CRC))) { sctph->dest = cp->dport; - if (!skb_is_gso(skb) || !skb_is_gso_sctp(skb)) + if (!skb_is_gso(skb)) sctp_nat_csum(skb, sctph, sctphoff); } else if (skb->ip_summed != CHECKSUM_PARTIAL) { skb->ip_summed = CHECKSUM_UNNECESSARY; -- cgit v1.2.3-58-ga151 From 0b88d1654d556264bcd24a9cb6383f0888e30131 Mon Sep 17 00:00:00 2001 From: Yunjian Wang Date: Fri, 31 May 2024 11:48:47 +0800 Subject: netfilter: nf_conncount: fix wrong variable type Now there is a issue is that code checks reports a warning: implicit narrowing conversion from type 'unsigned int' to small type 'u8' (the 'keylen' variable). Fix it by removing the 'keylen' variable. Signed-off-by: Yunjian Wang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conncount.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index 8715617b02fe..34ba14e59e95 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -321,7 +321,6 @@ insert_tree(struct net *net, struct nf_conncount_rb *rbconn; struct nf_conncount_tuple *conn; unsigned int count = 0, gc_count = 0; - u8 keylen = data->keylen; bool do_gc = true; spin_lock_bh(&nf_conncount_locks[hash]); @@ -333,7 +332,7 @@ restart: rbconn = rb_entry(*rbnode, struct nf_conncount_rb, node); parent = *rbnode; - diff = key_diff(key, rbconn->key, keylen); + diff = key_diff(key, rbconn->key, data->keylen); if (diff < 0) { rbnode = &((*rbnode)->rb_left); } else if (diff > 0) { @@ -378,7 +377,7 @@ restart: conn->tuple = *tuple; conn->zone = *zone; - memcpy(rbconn->key, key, sizeof(u32) * keylen); + memcpy(rbconn->key, key, sizeof(u32) * data->keylen); nf_conncount_list_init(&rbconn->list); list_add(&conn->node, &rbconn->list.head); @@ -403,7 +402,6 @@ count_tree(struct net *net, struct rb_node *parent; struct nf_conncount_rb *rbconn; unsigned int hash; - u8 keylen = data->keylen; hash = jhash2(key, data->keylen, conncount_rnd) % CONNCOUNT_SLOTS; root = &data->root[hash]; @@ -414,7 +412,7 @@ count_tree(struct net *net, rbconn = rb_entry(parent, struct nf_conncount_rb, node); - diff = key_diff(key, rbconn->key, keylen); + diff = key_diff(key, rbconn->key, data->keylen); if (diff < 0) { parent = rcu_dereference_raw(parent->rb_left); } else if (diff > 0) { -- cgit v1.2.3-58-ga151 From fe87a8deaad46c9a45381624f7e5f2f4fd145721 Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Fri, 31 May 2024 09:28:47 +0800 Subject: netfilter: cttimeout: remove 'l3num' attr check After commit dd2934a95701 ("netfilter: conntrack: remove l3->l4 mapping information"), the attribute of type `CTA_TIMEOUT_L3PROTO` is not used any more in function cttimeout_default_set. However, the previous commit ea9cf2a55a7b ("netfilter: cttimeout: remove set but not used variable 'l3num'") forgot to remove the attribute present check when removing the related variable. This commit removes that check to ensure consistency. Signed-off-by: Lin Ma Reviewed-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_cttimeout.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index f466af4f8531..eab4f476b47f 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -366,8 +366,7 @@ static int cttimeout_default_set(struct sk_buff *skb, __u8 l4num; int ret; - if (!cda[CTA_TIMEOUT_L3PROTO] || - !cda[CTA_TIMEOUT_L4PROTO] || + if (!cda[CTA_TIMEOUT_L4PROTO] || !cda[CTA_TIMEOUT_DATA]) return -EINVAL; -- cgit v1.2.3-58-ga151 From f750dfe825b904164688adeb147950e0e0c4d262 Mon Sep 17 00:00:00 2001 From: Heng Qi Date: Fri, 21 Jun 2024 18:13:51 +0800 Subject: ethtool: provide customized dim profile management The NetDIM library, currently leveraged by an array of NICs, delivers excellent acceleration benefits. Nevertheless, NICs vary significantly in their dim profile list prerequisites. Specifically, virtio-net backends may present diverse sw or hw device implementation, making a one-size-fits-all parameter list impractical. On Alibaba Cloud, the virtio DPU's performance under the default DIM profile falls short of expectations, partly due to a mismatch in parameter configuration. I also noticed that ice/idpf/ena and other NICs have customized profilelist or placed some restrictions on dim capabilities. Motivated by this, I tried adding new params for "ethtool -C" that provides a per-device control to modify and access a device's interrupt parameters. Usage ======== The target NIC is named ethx. Assume that ethx only declares support for rx profile setting (with DIM_PROFILE_RX flag set in profile_flags) and supports modification of usec and pkt fields. 1. Query the currently customized list of the device $ ethtool -c ethx ... rx-profile: {.usec = 1, .pkts = 256, .comps = n/a,}, {.usec = 8, .pkts = 256, .comps = n/a,}, {.usec = 64, .pkts = 256, .comps = n/a,}, {.usec = 128, .pkts = 256, .comps = n/a,}, {.usec = 256, .pkts = 256, .comps = n/a,} tx-profile: n/a 2. Tune $ ethtool -C ethx rx-profile 1,1,n_2,n,n_3,3,n_4,4,n_n,5,n "n" means do not modify this field. $ ethtool -c ethx ... rx-profile: {.usec = 1, .pkts = 1, .comps = n/a,}, {.usec = 2, .pkts = 256, .comps = n/a,}, {.usec = 3, .pkts = 3, .comps = n/a,}, {.usec = 4, .pkts = 4, .comps = n/a,}, {.usec = 256, .pkts = 5, .comps = n/a,} tx-profile: n/a 3. Hint If the device does not support some type of customized dim profiles, the corresponding "n/a" will display. If the "n/a" field is being modified, -EOPNOTSUPP will be reported. Signed-off-by: Heng Qi Reviewed-by: Simon Horman Link: https://patch.msgid.link/20240621101353.107425-4-hengqi@linux.alibaba.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/ethtool.yaml | 31 +++ Documentation/networking/ethtool-netlink.rst | 8 + Documentation/networking/net_dim.rst | 42 +++++ include/linux/dim.h | 58 ++++++ include/linux/ethtool.h | 4 +- include/linux/netdevice.h | 3 + include/uapi/linux/ethtool_netlink.h | 22 +++ lib/dim/net_dim.c | 70 +++++++ net/Kconfig | 1 + net/ethtool/coalesce.c | 273 ++++++++++++++++++++++++++- 10 files changed, 509 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 00dc61358be8..6c2ab3d1c22f 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -414,6 +414,26 @@ attribute-sets: name: combined-count type: u32 + - + name: irq-moderation + attributes: + - + name: usec + type: u32 + - + name: pkts + type: u32 + - + name: comps + type: u32 + - + name: profile + attributes: + - + name: irq-moderation + type: nest + multi-attr: true + nested-attributes: irq-moderation - name: coalesce attributes: @@ -502,6 +522,15 @@ attribute-sets: - name: tx-aggr-time-usecs type: u32 + - + name: rx-profile + type: nest + nested-attributes: profile + - + name: tx-profile + type: nest + nested-attributes: profile + - name: pause-stat attributes: @@ -1325,6 +1354,8 @@ operations: - tx-aggr-max-bytes - tx-aggr-max-frames - tx-aggr-time-usecs + - rx-profile + - tx-profile dump: *coalesce-get-op - name: coalesce-set diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 160bfb0ae8ba..7ec08e903bab 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -1033,6 +1033,8 @@ Kernel response contents: ``ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES`` u32 max aggr size, Tx ``ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES`` u32 max aggr packets, Tx ``ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS`` u32 time (us), aggr, Tx + ``ETHTOOL_A_COALESCE_RX_PROFILE`` nested profile of DIM, Rx + ``ETHTOOL_A_COALESCE_TX_PROFILE`` nested profile of DIM, Tx =========================================== ====== ======================= Attributes are only included in reply if their value is not zero or the @@ -1062,6 +1064,10 @@ block should be sent. This feature is mainly of interest for specific USB devices which does not cope well with frequent small-sized URBs transmissions. +``ETHTOOL_A_COALESCE_RX_PROFILE`` and ``ETHTOOL_A_COALESCE_TX_PROFILE`` refer +to DIM parameters, see `Generic Network Dynamic Interrupt Moderation (Net DIM) +`_. + COALESCE_SET ============ @@ -1098,6 +1104,8 @@ Request contents: ``ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES`` u32 max aggr size, Tx ``ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES`` u32 max aggr packets, Tx ``ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS`` u32 time (us), aggr, Tx + ``ETHTOOL_A_COALESCE_RX_PROFILE`` nested profile of DIM, Rx + ``ETHTOOL_A_COALESCE_TX_PROFILE`` nested profile of DIM, Tx =========================================== ====== ======================= Request is rejected if it attributes declared as unsupported by driver (i.e. diff --git a/Documentation/networking/net_dim.rst b/Documentation/networking/net_dim.rst index 3bed9fd95336..8908fd7b0a8d 100644 --- a/Documentation/networking/net_dim.rst +++ b/Documentation/networking/net_dim.rst @@ -169,6 +169,48 @@ usage is not complete but it should make the outline of the usage clear. ... } + +Tuning DIM +========== + +Net DIM serves a range of network devices and delivers excellent acceleration +benefits. Yet, it has been observed that some preset configurations of DIM may +not align seamlessly with the varying specifications of network devices, and +this discrepancy has been identified as a factor to the suboptimal performance +outcomes of DIM-enabled network devices, related to a mismatch in profiles. + +To address this issue, Net DIM introduces a per-device control to modify and +access a device's ``rx-profile`` and ``tx-profile`` parameters: +Assume that the target network device is named ethx, and ethx only declares +support for RX profile setting and supports modification of ``usec`` field +and ``pkts`` field (See the data structure: +:c:type:`struct dim_cq_moder `). + +You can use ethtool to modify the current RX DIM profile where all +values are 64:: + + $ ethtool -C ethx rx-profile 1,1,n_2,2,n_3,n,n_n,4,n_n,n,n + +``n`` means do not modify this field, and ``_`` separates structure +elements of the profile array. + +Querying the current profiles using:: + + $ ethtool -c ethx + ... + rx-profile: + {.usec = 1, .pkts = 1, .comps = n/a,}, + {.usec = 2, .pkts = 2, .comps = n/a,}, + {.usec = 3, .pkts = 64, .comps = n/a,}, + {.usec = 64, .pkts = 4, .comps = n/a,}, + {.usec = 64, .pkts = 64, .comps = n/a,} + tx-profile: n/a + +If the network device does not support specific fields of DIM profiles, +the corresponding ``n/a`` will display. If the ``n/a`` field is being +modified, error messages will be reported. + + Dynamic Interrupt Moderation (DIM) library API ============================================== diff --git a/include/linux/dim.h b/include/linux/dim.h index 43398f5eade2..e0f39bd85432 100644 --- a/include/linux/dim.h +++ b/include/linux/dim.h @@ -10,6 +10,8 @@ #include #include +struct net_device; + /* Number of DIM profiles and period mode. */ #define NET_DIM_PARAMS_NUM_PROFILES 5 #define NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE 256 @@ -45,12 +47,45 @@ * @pkts: CQ packet counter suggestion (by DIM) * @comps: Completion counter * @cq_period_mode: CQ period count mode (from CQE/EQE) + * @rcu: for asynchronous kfree_rcu */ struct dim_cq_moder { u16 usec; u16 pkts; u16 comps; u8 cq_period_mode; + struct rcu_head rcu; +}; + +#define DIM_PROFILE_RX BIT(0) /* support rx profile modification */ +#define DIM_PROFILE_TX BIT(1) /* support tx profile modification */ + +#define DIM_COALESCE_USEC BIT(0) /* support usec field modification */ +#define DIM_COALESCE_PKTS BIT(1) /* support pkts field modification */ +#define DIM_COALESCE_COMPS BIT(2) /* support comps field modification */ + +/** + * struct dim_irq_moder - Structure for irq moderation information. + * Used to collect irq moderation related information. + * + * @profile_flags: DIM_PROFILE_* + * @coal_flags: DIM_COALESCE_* for Rx and Tx + * @dim_rx_mode: Rx DIM period count mode: CQE or EQE + * @dim_tx_mode: Tx DIM period count mode: CQE or EQE + * @rx_profile: DIM profile list for Rx + * @tx_profile: DIM profile list for Tx + * @rx_dim_work: Rx DIM worker scheduled by net_dim() + * @tx_dim_work: Tx DIM worker scheduled by net_dim() + */ +struct dim_irq_moder { + u8 profile_flags; + u8 coal_flags; + u8 dim_rx_mode; + u8 dim_tx_mode; + struct dim_cq_moder __rcu *rx_profile; + struct dim_cq_moder __rcu *tx_profile; + void (*rx_dim_work)(struct work_struct *work); + void (*tx_dim_work)(struct work_struct *work); }; /** @@ -198,6 +233,29 @@ enum dim_step_result { DIM_ON_EDGE, }; +/** + * net_dim_init_irq_moder - collect information to initialize irq moderation + * @dev: target network device + * @profile_flags: Rx or Tx profile modification capability + * @coal_flags: irq moderation params flags + * @rx_mode: CQ period mode for Rx + * @tx_mode: CQ period mode for Tx + * @rx_dim_work: Rx worker called after dim decision + * @tx_dim_work: Tx worker called after dim decision + * + * Return: 0 on success or a negative error code. + */ +int net_dim_init_irq_moder(struct net_device *dev, u8 profile_flags, + u8 coal_flags, u8 rx_mode, u8 tx_mode, + void (*rx_dim_work)(struct work_struct *work), + void (*tx_dim_work)(struct work_struct *work)); + +/** + * net_dim_free_irq_moder - free fields for irq moderation + * @dev: target network device + */ +void net_dim_free_irq_moder(struct net_device *dev); + /** * dim_on_top - check if current state is a good place to stop (top location) * @dim: DIM context diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 6fd9107d3cc0..959196af7f5a 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -284,7 +284,9 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, #define ETHTOOL_COALESCE_TX_AGGR_MAX_BYTES BIT(24) #define ETHTOOL_COALESCE_TX_AGGR_MAX_FRAMES BIT(25) #define ETHTOOL_COALESCE_TX_AGGR_TIME_USECS BIT(26) -#define ETHTOOL_COALESCE_ALL_PARAMS GENMASK(26, 0) +#define ETHTOOL_COALESCE_RX_PROFILE BIT(27) +#define ETHTOOL_COALESCE_TX_PROFILE BIT(28) +#define ETHTOOL_COALESCE_ALL_PARAMS GENMASK(28, 0) #define ETHTOOL_COALESCE_USECS \ (ETHTOOL_COALESCE_RX_USECS | ETHTOOL_COALESCE_TX_USECS) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4e81660b4462..cc18acd3c58b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2402,6 +2402,9 @@ struct net_device { /** @page_pools: page pools created for this netdevice */ struct hlist_head page_pools; #endif + + /** @irq_moder: dim parameters used if IS_ENABLED(CONFIG_DIMLIB). */ + struct dim_irq_moder *irq_moder; }; #define to_net_dev(d) container_of(d, struct net_device, dev) diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index b49b804b9495..d15856c7e001 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -415,12 +415,34 @@ enum { ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES, /* u32 */ ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES, /* u32 */ ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS, /* u32 */ + /* nest - _A_PROFILE_IRQ_MODERATION */ + ETHTOOL_A_COALESCE_RX_PROFILE, + /* nest - _A_PROFILE_IRQ_MODERATION */ + ETHTOOL_A_COALESCE_TX_PROFILE, /* add new constants above here */ __ETHTOOL_A_COALESCE_CNT, ETHTOOL_A_COALESCE_MAX = (__ETHTOOL_A_COALESCE_CNT - 1) }; +enum { + ETHTOOL_A_PROFILE_UNSPEC, + /* nest, _A_IRQ_MODERATION_* */ + ETHTOOL_A_PROFILE_IRQ_MODERATION, + __ETHTOOL_A_PROFILE_CNT, + ETHTOOL_A_PROFILE_MAX = (__ETHTOOL_A_PROFILE_CNT - 1) +}; + +enum { + ETHTOOL_A_IRQ_MODERATION_UNSPEC, + ETHTOOL_A_IRQ_MODERATION_USEC, /* u32 */ + ETHTOOL_A_IRQ_MODERATION_PKTS, /* u32 */ + ETHTOOL_A_IRQ_MODERATION_COMPS, /* u32 */ + + __ETHTOOL_A_IRQ_MODERATION_CNT, + ETHTOOL_A_IRQ_MODERATION_MAX = (__ETHTOOL_A_IRQ_MODERATION_CNT - 1) +}; + /* PAUSE */ enum { diff --git a/lib/dim/net_dim.c b/lib/dim/net_dim.c index 67d5beb34dc3..0cd41277c7a3 100644 --- a/lib/dim/net_dim.c +++ b/lib/dim/net_dim.c @@ -4,6 +4,7 @@ */ #include +#include /* * Net DIM profiles: @@ -95,6 +96,75 @@ net_dim_get_def_tx_moderation(u8 cq_period_mode) } EXPORT_SYMBOL(net_dim_get_def_tx_moderation); +int net_dim_init_irq_moder(struct net_device *dev, u8 profile_flags, + u8 coal_flags, u8 rx_mode, u8 tx_mode, + void (*rx_dim_work)(struct work_struct *work), + void (*tx_dim_work)(struct work_struct *work)) +{ + struct dim_cq_moder *rxp = NULL, *txp; + struct dim_irq_moder *moder; + int len; + + dev->irq_moder = kzalloc(sizeof(*dev->irq_moder), GFP_KERNEL); + if (!dev->irq_moder) + return -ENOMEM; + + moder = dev->irq_moder; + len = NET_DIM_PARAMS_NUM_PROFILES * sizeof(*moder->rx_profile); + + moder->coal_flags = coal_flags; + moder->profile_flags = profile_flags; + + if (profile_flags & DIM_PROFILE_RX) { + moder->rx_dim_work = rx_dim_work; + moder->dim_rx_mode = rx_mode; + rxp = kmemdup(rx_profile[rx_mode], len, GFP_KERNEL); + if (!rxp) + goto free_moder; + + rcu_assign_pointer(moder->rx_profile, rxp); + } + + if (profile_flags & DIM_PROFILE_TX) { + moder->tx_dim_work = tx_dim_work; + moder->dim_tx_mode = tx_mode; + txp = kmemdup(tx_profile[tx_mode], len, GFP_KERNEL); + if (!txp) + goto free_rxp; + + rcu_assign_pointer(moder->tx_profile, txp); + } + + return 0; + +free_rxp: + kfree(rxp); +free_moder: + kfree(moder); + return -ENOMEM; +} +EXPORT_SYMBOL(net_dim_init_irq_moder); + +/* RTNL lock is held. */ +void net_dim_free_irq_moder(struct net_device *dev) +{ + struct dim_cq_moder *rxp, *txp; + + if (!dev->irq_moder) + return; + + rxp = rtnl_dereference(dev->irq_moder->rx_profile); + txp = rtnl_dereference(dev->irq_moder->tx_profile); + + rcu_assign_pointer(dev->irq_moder->rx_profile, NULL); + rcu_assign_pointer(dev->irq_moder->tx_profile, NULL); + + kfree_rcu(rxp, rcu); + kfree_rcu(txp, rcu); + kfree(dev->irq_moder); +} +EXPORT_SYMBOL(net_dim_free_irq_moder); + static int net_dim_step(struct dim *dim) { if (dim->tired == (NET_DIM_PARAMS_NUM_PROFILES * 2)) diff --git a/net/Kconfig b/net/Kconfig index 9fe65fa26e48..d27d0deac0bf 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -508,6 +508,7 @@ config FAILOVER config ETHTOOL_NETLINK bool "Netlink interface for ethtool" + select DIMLIB default y help An alternative userspace interface for ethtool based on generic diff --git a/net/ethtool/coalesce.c b/net/ethtool/coalesce.c index 83112c1a71ae..759b16e3d134 100644 --- a/net/ethtool/coalesce.c +++ b/net/ethtool/coalesce.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only +#include #include "netlink.h" #include "common.h" @@ -82,6 +83,14 @@ static int coalesce_prepare_data(const struct ethnl_req_info *req_base, static int coalesce_reply_size(const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { + int modersz = nla_total_size(0) + /* _PROFILE_IRQ_MODERATION, nest */ + nla_total_size(sizeof(u32)) + /* _IRQ_MODERATION_USEC */ + nla_total_size(sizeof(u32)) + /* _IRQ_MODERATION_PKTS */ + nla_total_size(sizeof(u32)); /* _IRQ_MODERATION_COMPS */ + + int total_modersz = nla_total_size(0) + /* _{R,T}X_PROFILE, nest */ + modersz * NET_DIM_PARAMS_NUM_PROFILES; + return nla_total_size(sizeof(u32)) + /* _RX_USECS */ nla_total_size(sizeof(u32)) + /* _RX_MAX_FRAMES */ nla_total_size(sizeof(u32)) + /* _RX_USECS_IRQ */ @@ -108,7 +117,8 @@ static int coalesce_reply_size(const struct ethnl_req_info *req_base, nla_total_size(sizeof(u8)) + /* _USE_CQE_MODE_RX */ nla_total_size(sizeof(u32)) + /* _TX_AGGR_MAX_BYTES */ nla_total_size(sizeof(u32)) + /* _TX_AGGR_MAX_FRAMES */ - nla_total_size(sizeof(u32)); /* _TX_AGGR_TIME_USECS */ + nla_total_size(sizeof(u32)) + /* _TX_AGGR_TIME_USECS */ + total_modersz * 2; /* _{R,T}X_PROFILE */ } static bool coalesce_put_u32(struct sk_buff *skb, u16 attr_type, u32 val, @@ -127,14 +137,84 @@ static bool coalesce_put_bool(struct sk_buff *skb, u16 attr_type, u32 val, return nla_put_u8(skb, attr_type, !!val); } +/** + * coalesce_put_profile - fill reply with a nla nest with four child nla nests. + * @skb: socket buffer the message is stored in + * @attr_type: nest attr type ETHTOOL_A_COALESCE_*X_PROFILE + * @profile: data passed to userspace + * @coal_flags: modifiable parameters supported by the driver + * + * Put a dim profile nest attribute. Refer to ETHTOOL_A_PROFILE_IRQ_MODERATION. + * + * Return: 0 on success or a negative error code. + */ +static int coalesce_put_profile(struct sk_buff *skb, u16 attr_type, + const struct dim_cq_moder *profile, + u8 coal_flags) +{ + struct nlattr *profile_attr, *moder_attr; + int i, ret; + + if (!profile || !coal_flags) + return 0; + + profile_attr = nla_nest_start(skb, attr_type); + if (!profile_attr) + return -EMSGSIZE; + + for (i = 0; i < NET_DIM_PARAMS_NUM_PROFILES; i++) { + moder_attr = nla_nest_start(skb, + ETHTOOL_A_PROFILE_IRQ_MODERATION); + if (!moder_attr) { + ret = -EMSGSIZE; + goto cancel_profile; + } + + if (coal_flags & DIM_COALESCE_USEC) { + ret = nla_put_u32(skb, ETHTOOL_A_IRQ_MODERATION_USEC, + profile[i].usec); + if (ret) + goto cancel_moder; + } + + if (coal_flags & DIM_COALESCE_PKTS) { + ret = nla_put_u32(skb, ETHTOOL_A_IRQ_MODERATION_PKTS, + profile[i].pkts); + if (ret) + goto cancel_moder; + } + + if (coal_flags & DIM_COALESCE_COMPS) { + ret = nla_put_u32(skb, ETHTOOL_A_IRQ_MODERATION_COMPS, + profile[i].comps); + if (ret) + goto cancel_moder; + } + + nla_nest_end(skb, moder_attr); + } + + nla_nest_end(skb, profile_attr); + + return 0; + +cancel_moder: + nla_nest_cancel(skb, moder_attr); +cancel_profile: + nla_nest_cancel(skb, profile_attr); + return ret; +} + static int coalesce_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct coalesce_reply_data *data = COALESCE_REPDATA(reply_base); const struct kernel_ethtool_coalesce *kcoal = &data->kernel_coalesce; + struct dim_irq_moder *moder = req_base->dev->irq_moder; const struct ethtool_coalesce *coal = &data->coalesce; u32 supported = data->supported_params; + int ret = 0; if (coalesce_put_u32(skb, ETHTOOL_A_COALESCE_RX_USECS, coal->rx_coalesce_usecs, supported) || @@ -192,11 +272,41 @@ static int coalesce_fill_reply(struct sk_buff *skb, kcoal->tx_aggr_time_usecs, supported)) return -EMSGSIZE; - return 0; + if (!moder) + return 0; + + rcu_read_lock(); + if (moder->profile_flags & DIM_PROFILE_RX) { + ret = coalesce_put_profile(skb, ETHTOOL_A_COALESCE_RX_PROFILE, + rcu_dereference(moder->rx_profile), + moder->coal_flags); + if (ret) + goto out; + } + + if (moder->profile_flags & DIM_PROFILE_TX) + ret = coalesce_put_profile(skb, ETHTOOL_A_COALESCE_TX_PROFILE, + rcu_dereference(moder->tx_profile), + moder->coal_flags); + +out: + rcu_read_unlock(); + return ret; } /* COALESCE_SET */ +static const struct nla_policy coalesce_irq_moderation_policy[] = { + [ETHTOOL_A_IRQ_MODERATION_USEC] = { .type = NLA_U32 }, + [ETHTOOL_A_IRQ_MODERATION_PKTS] = { .type = NLA_U32 }, + [ETHTOOL_A_IRQ_MODERATION_COMPS] = { .type = NLA_U32 }, +}; + +static const struct nla_policy coalesce_profile_policy[] = { + [ETHTOOL_A_PROFILE_IRQ_MODERATION] = + NLA_POLICY_NESTED(coalesce_irq_moderation_policy), +}; + const struct nla_policy ethnl_coalesce_set_policy[] = { [ETHTOOL_A_COALESCE_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), @@ -227,6 +337,10 @@ const struct nla_policy ethnl_coalesce_set_policy[] = { [ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES] = { .type = NLA_U32 }, [ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES] = { .type = NLA_U32 }, [ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS] = { .type = NLA_U32 }, + [ETHTOOL_A_COALESCE_RX_PROFILE] = + NLA_POLICY_NESTED(coalesce_profile_policy), + [ETHTOOL_A_COALESCE_TX_PROFILE] = + NLA_POLICY_NESTED(coalesce_profile_policy), }; static int @@ -234,6 +348,7 @@ ethnl_set_coalesce_validate(struct ethnl_req_info *req_info, struct genl_info *info) { const struct ethtool_ops *ops = req_info->dev->ethtool_ops; + struct dim_irq_moder *irq_moder = req_info->dev->irq_moder; struct nlattr **tb = info->attrs; u32 supported_params; u16 a; @@ -243,6 +358,12 @@ ethnl_set_coalesce_validate(struct ethnl_req_info *req_info, /* make sure that only supported parameters are present */ supported_params = ops->supported_coalesce_params; + if (irq_moder && irq_moder->profile_flags & DIM_PROFILE_RX) + supported_params |= ETHTOOL_COALESCE_RX_PROFILE; + + if (irq_moder && irq_moder->profile_flags & DIM_PROFILE_TX) + supported_params |= ETHTOOL_COALESCE_TX_PROFILE; + for (a = ETHTOOL_A_COALESCE_RX_USECS; a < __ETHTOOL_A_COALESCE_CNT; a++) if (tb[a] && !(supported_params & attr_to_mask(a))) { NL_SET_ERR_MSG_ATTR(info->extack, tb[a], @@ -253,6 +374,138 @@ ethnl_set_coalesce_validate(struct ethnl_req_info *req_info, return 1; } +/** + * ethnl_update_irq_moder - update a specific field in the given profile + * @irq_moder: place that collects dim related information + * @irq_field: field in profile to modify + * @attr_type: attr type ETHTOOL_A_IRQ_MODERATION_* + * @tb: netlink attribute with new values or null + * @coal_bit: DIM_COALESCE_* bit from coal_flags + * @mod: pointer to bool for modification tracking + * @extack: netlink extended ack + * + * Return: 0 on success or a negative error code. + */ +static int ethnl_update_irq_moder(struct dim_irq_moder *irq_moder, + u16 *irq_field, u16 attr_type, + struct nlattr **tb, + u8 coal_bit, bool *mod, + struct netlink_ext_ack *extack) +{ + int ret = 0; + u32 val; + + if (!tb[attr_type]) + return 0; + + if (irq_moder->coal_flags & coal_bit) { + val = nla_get_u32(tb[attr_type]); + if (*irq_field == val) + return 0; + + *irq_field = val; + *mod = true; + } else { + NL_SET_BAD_ATTR(extack, tb[attr_type]); + ret = -EOPNOTSUPP; + } + + return ret; +} + +/** + * ethnl_update_profile - get a profile nest with child nests from userspace. + * @dev: netdevice to update the profile + * @dst: profile get from the driver and modified by ethnl_update_profile. + * @nests: nest attr ETHTOOL_A_COALESCE_*X_PROFILE to set profile. + * @mod: pointer to bool for modification tracking + * @extack: Netlink extended ack + * + * Layout of nests: + * Nested ETHTOOL_A_COALESCE_*X_PROFILE attr + * Nested ETHTOOL_A_PROFILE_IRQ_MODERATION attr + * ETHTOOL_A_IRQ_MODERATION_USEC attr + * ETHTOOL_A_IRQ_MODERATION_PKTS attr + * ETHTOOL_A_IRQ_MODERATION_COMPS attr + * ... + * Nested ETHTOOL_A_PROFILE_IRQ_MODERATION attr + * ETHTOOL_A_IRQ_MODERATION_USEC attr + * ETHTOOL_A_IRQ_MODERATION_PKTS attr + * ETHTOOL_A_IRQ_MODERATION_COMPS attr + * + * Return: 0 on success or a negative error code. + */ +static int ethnl_update_profile(struct net_device *dev, + struct dim_cq_moder __rcu **dst, + const struct nlattr *nests, + bool *mod, + struct netlink_ext_ack *extack) +{ + int len_irq_moder = ARRAY_SIZE(coalesce_irq_moderation_policy); + struct nlattr *tb[ARRAY_SIZE(coalesce_irq_moderation_policy)]; + struct dim_irq_moder *irq_moder = dev->irq_moder; + struct dim_cq_moder *new_profile, *old_profile; + int ret, rem, i = 0, len; + struct nlattr *nest; + + if (!nests) + return 0; + + if (!*dst) + return -EOPNOTSUPP; + + old_profile = rtnl_dereference(*dst); + len = NET_DIM_PARAMS_NUM_PROFILES * sizeof(*old_profile); + new_profile = kmemdup(old_profile, len, GFP_KERNEL); + if (!new_profile) + return -ENOMEM; + + nla_for_each_nested_type(nest, ETHTOOL_A_PROFILE_IRQ_MODERATION, + nests, rem) { + ret = nla_parse_nested(tb, len_irq_moder - 1, nest, + coalesce_irq_moderation_policy, + extack); + if (ret) + goto err_out; + + ret = ethnl_update_irq_moder(irq_moder, &new_profile[i].usec, + ETHTOOL_A_IRQ_MODERATION_USEC, + tb, DIM_COALESCE_USEC, + mod, extack); + if (ret) + goto err_out; + + ret = ethnl_update_irq_moder(irq_moder, &new_profile[i].pkts, + ETHTOOL_A_IRQ_MODERATION_PKTS, + tb, DIM_COALESCE_PKTS, + mod, extack); + if (ret) + goto err_out; + + ret = ethnl_update_irq_moder(irq_moder, &new_profile[i].comps, + ETHTOOL_A_IRQ_MODERATION_COMPS, + tb, DIM_COALESCE_COMPS, + mod, extack); + if (ret) + goto err_out; + + i++; + } + + /* After the profile is modified, dim itself is a dynamic + * mechanism and will quickly fit to the appropriate + * coalescing parameters according to the new profile. + */ + rcu_assign_pointer(*dst, new_profile); + kfree_rcu(old_profile, rcu); + + return 0; + +err_out: + kfree(new_profile); + return ret; +} + static int __ethnl_set_coalesce(struct ethnl_req_info *req_info, struct genl_info *info, bool *dual_change) @@ -317,6 +570,22 @@ __ethnl_set_coalesce(struct ethnl_req_info *req_info, struct genl_info *info, ethnl_update_u32(&kernel_coalesce.tx_aggr_time_usecs, tb[ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS], &mod); + if (dev->irq_moder && dev->irq_moder->profile_flags & DIM_PROFILE_RX) { + ret = ethnl_update_profile(dev, &dev->irq_moder->rx_profile, + tb[ETHTOOL_A_COALESCE_RX_PROFILE], + &mod, info->extack); + if (ret < 0) + return ret; + } + + if (dev->irq_moder && dev->irq_moder->profile_flags & DIM_PROFILE_TX) { + ret = ethnl_update_profile(dev, &dev->irq_moder->tx_profile, + tb[ETHTOOL_A_COALESCE_TX_PROFILE], + &mod, info->extack); + if (ret < 0) + return ret; + } + /* Update operation modes */ ethnl_update_bool32(&coalesce.use_adaptive_rx_coalesce, tb[ETHTOOL_A_COALESCE_USE_ADAPTIVE_RX], &mod_mode); -- cgit v1.2.3-58-ga151 From dc494fdc1f0657b4786d4f556b0462fe5198506b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 12 Jun 2024 14:28:35 +0200 Subject: wifi: mac80211: refactor CSA queue block/unblock This code is duplicated many times, refactor it into new separate functions. Reviewed-by: Miriam Rachel Korenblit Link: https://patch.msgid.link/20240612143037.1ad22f10392d.If21490c2c67aae28f3c54038363181ee920ce3d1@changeid Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 20 ++++---------------- net/mac80211/ieee80211_i.h | 3 +++ net/mac80211/iface.c | 29 ++++++++++++++++++++++++----- net/mac80211/mlme.c | 33 ++++++--------------------------- 4 files changed, 37 insertions(+), 48 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 3236477424b8..7eb2e5bedb6f 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1615,11 +1615,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, /* abort any running channel switch or color change */ link_conf->csa_active = false; link_conf->color_change_active = false; - if (sdata->csa_blocked_queues) { - ieee80211_wake_vif_queues(local, sdata, - IEEE80211_QUEUE_STOP_REASON_CSA); - sdata->csa_blocked_queues = false; - } + ieee80211_vif_unblock_queues_csa(sdata); ieee80211_free_next_beacon(link); @@ -3757,11 +3753,7 @@ static int __ieee80211_csa_finalize(struct ieee80211_link_data *link_data) ieee80211_link_info_change_notify(sdata, link_data, changed); - if (sdata->csa_blocked_queues) { - ieee80211_wake_vif_queues(local, sdata, - IEEE80211_QUEUE_STOP_REASON_CSA); - sdata->csa_blocked_queues = false; - } + ieee80211_vif_unblock_queues_csa(sdata); err = drv_post_channel_switch(link_data); if (err) @@ -4038,12 +4030,8 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, link_data->csa.chanreq = chanreq; link_conf->csa_active = true; - if (params->block_tx && - !ieee80211_hw_check(&local->hw, HANDLES_QUIET_CSA)) { - ieee80211_stop_vif_queues(local, sdata, - IEEE80211_QUEUE_STOP_REASON_CSA); - sdata->csa_blocked_queues = true; - } + if (params->block_tx) + ieee80211_vif_block_queues_csa(sdata); cfg80211_ch_switch_started_notify(sdata->dev, &link_data->csa.chanreq.oper, link_id, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 3e735c9436d3..b482763e9f99 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1823,6 +1823,9 @@ ieee80211_have_rx_timestamp(struct ieee80211_rx_status *status) void ieee80211_vif_inc_num_mcast(struct ieee80211_sub_if_data *sdata); void ieee80211_vif_dec_num_mcast(struct ieee80211_sub_if_data *sdata); +void ieee80211_vif_block_queues_csa(struct ieee80211_sub_if_data *sdata); +void ieee80211_vif_unblock_queues_csa(struct ieee80211_sub_if_data *sdata); + /* This function returns the number of multicast stations connected to this * interface. It returns -1 if that number is not tracked, that is for netdevs * not in AP or AP_VLAN mode or when using 4addr. diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 6b7580c61e0c..f06e165d6c7a 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -544,11 +544,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do sdata->vif.bss_conf.csa_active = false; if (sdata->vif.type == NL80211_IFTYPE_STATION) sdata->deflink.u.mgd.csa.waiting_bcn = false; - if (sdata->csa_blocked_queues) { - ieee80211_wake_vif_queues(local, sdata, - IEEE80211_QUEUE_STOP_REASON_CSA); - sdata->csa_blocked_queues = false; - } + ieee80211_vif_unblock_queues_csa(sdata); wiphy_work_cancel(local->hw.wiphy, &sdata->deflink.csa.finalize_work); wiphy_work_cancel(local->hw.wiphy, @@ -2345,3 +2341,26 @@ void ieee80211_vif_dec_num_mcast(struct ieee80211_sub_if_data *sdata) else if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) atomic_dec(&sdata->u.vlan.num_mcast_sta); } + +void ieee80211_vif_block_queues_csa(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + + if (ieee80211_hw_check(&local->hw, HANDLES_QUIET_CSA)) + return; + + ieee80211_stop_vif_queues(local, sdata, + IEEE80211_QUEUE_STOP_REASON_CSA); + sdata->csa_blocked_queues = true; +} + +void ieee80211_vif_unblock_queues_csa(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + + if (sdata->csa_blocked_queues) { + ieee80211_wake_vif_queues(local, sdata, + IEEE80211_QUEUE_STOP_REASON_CSA); + sdata->csa_blocked_queues = false; + } +} diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index ef3280fafbe9..ac376ec47a59 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2166,7 +2166,6 @@ static void ieee80211_csa_switch_work(struct wiphy *wiphy, static void ieee80211_chswitch_post_beacon(struct ieee80211_link_data *link) { struct ieee80211_sub_if_data *sdata = link->sdata; - struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; int ret; @@ -2174,11 +2173,7 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_link_data *link) WARN_ON(!link->conf->csa_active); - if (sdata->csa_blocked_queues) { - ieee80211_wake_vif_queues(local, sdata, - IEEE80211_QUEUE_STOP_REASON_CSA); - sdata->csa_blocked_queues = false; - } + ieee80211_vif_unblock_queues_csa(sdata); link->conf->csa_active = false; link->u.mgd.csa.blocked_tx = false; @@ -2242,11 +2237,7 @@ ieee80211_sta_abort_chanswitch(struct ieee80211_link_data *link) ieee80211_link_unreserve_chanctx(link); - if (sdata->csa_blocked_queues) { - ieee80211_wake_vif_queues(local, sdata, - IEEE80211_QUEUE_STOP_REASON_CSA); - sdata->csa_blocked_queues = false; - } + ieee80211_vif_unblock_queues_csa(sdata); link->conf->csa_active = false; link->u.mgd.csa.blocked_tx = false; @@ -2571,12 +2562,8 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, link->u.mgd.beacon_crc_valid = false; link->u.mgd.csa.blocked_tx = csa_ie.mode; - if (csa_ie.mode && - !ieee80211_hw_check(&local->hw, HANDLES_QUIET_CSA)) { - ieee80211_stop_vif_queues(local, sdata, - IEEE80211_QUEUE_STOP_REASON_CSA); - sdata->csa_blocked_queues = true; - } + if (csa_ie.mode) + ieee80211_vif_block_queues_csa(sdata); cfg80211_ch_switch_started_notify(sdata->dev, &csa_ie.chanreq.oper, link->link_id, csa_ie.count, @@ -3670,11 +3657,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, sdata->deflink.u.mgd.csa.blocked_tx = false; sdata->deflink.u.mgd.csa.waiting_bcn = false; sdata->deflink.u.mgd.csa.ignored_same_chan = false; - if (sdata->csa_blocked_queues) { - ieee80211_wake_vif_queues(local, sdata, - IEEE80211_QUEUE_STOP_REASON_CSA); - sdata->csa_blocked_queues = false; - } + ieee80211_vif_unblock_queues_csa(sdata); /* existing TX TSPEC sessions no longer exist */ memset(ifmgd->tx_tspec, 0, sizeof(ifmgd->tx_tspec)); @@ -4045,11 +4028,7 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) sdata->vif.bss_conf.csa_active = false; sdata->deflink.u.mgd.csa.waiting_bcn = false; sdata->deflink.u.mgd.csa.blocked_tx = false; - if (sdata->csa_blocked_queues) { - ieee80211_wake_vif_queues(local, sdata, - IEEE80211_QUEUE_STOP_REASON_CSA); - sdata->csa_blocked_queues = false; - } + ieee80211_vif_unblock_queues_csa(sdata); ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), tx, WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, -- cgit v1.2.3-58-ga151 From 414e090bc41d27af8196d2f36c2e883d7b4f1478 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 12 Jun 2024 14:28:36 +0200 Subject: wifi: mac80211: restrict public action ECSA frame handling Public action extended channel switch announcement (ECSA) frames cannot be protected well, the spec is unclear about what should happen in the presence of stations that can receive protected dual and stations that cannot. Mitigate these issues by not treating public action frames as the absolute truth, only treat them as a hint to stop transmitting (quiet mode), and do the remainder of the CSA handling only when receiving the next beacon (or protected action frame) that contains the CSA; or, if it doesn't, simply stop being quiet and continue operating normally. This limits the exposure to malicious ECSA public action frames, since they cannot cause a disconnect now, only a short interruption in traffic. Reviewed-by: Miriam Rachel Korenblit Link: https://patch.msgid.link/20240612143037.ec7ccc45903e.Ife17d55c7ecbf98060f9c52889f3c8ba48798970@changeid Signed-off-by: Johannes Berg --- net/mac80211/ibss.c | 3 ++- net/mac80211/ieee80211_i.h | 4 +++- net/mac80211/mesh.c | 2 +- net/mac80211/mlme.c | 59 +++++++++++++++++++++++++++++++++++++++------- net/mac80211/spectmgmt.c | 15 +++++++----- 5 files changed, 65 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index bf338f3d4dd3..7db4c3ee7e6d 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -785,7 +785,8 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, err = ieee80211_parse_ch_switch_ie(sdata, elems, ifibss->chandef.chan->band, vht_cap_info, &conn, - ifibss->bssid, &csa_ie); + ifibss->bssid, false, + &csa_ie); /* can't switch to destination channel, fail */ if (err < 0) goto disconnect; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index b482763e9f99..cb3e28f88089 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2218,6 +2218,8 @@ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, * @conn: contains information about own capabilities and restrictions * to decide which channel switch announcements can be accepted * @bssid: the currently connected bssid (for reporting) + * @unprot_action: whether the frame was an unprotected frame or not, + * used for reporting * @csa_ie: parsed 802.11 csa elements on count, mode, chandef and mesh ttl. * All of them will be filled with if success only. * Return: 0 on success, <0 on error and >0 if there is nothing to parse. @@ -2227,7 +2229,7 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, enum nl80211_band current_band, u32 vht_cap_info, struct ieee80211_conn_settings *conn, - u8 *bssid, + u8 *bssid, bool unprot_action, struct ieee80211_csa_ie *csa_ie); /* Suspend/resume and hw reconfiguration */ diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 6d4510221c98..f94e4be0be12 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -1312,7 +1312,7 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata, memset(¶ms, 0, sizeof(params)); err = ieee80211_parse_ch_switch_ie(sdata, elems, sband->band, vht_cap_info, &conn, - sdata->vif.addr, + sdata->vif.addr, false, &csa_ie); if (err < 0) return false; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index ac376ec47a59..fff158b1615f 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2352,7 +2352,8 @@ ieee80211_sta_other_link_csa_disappeared(struct ieee80211_link_data *link, enum ieee80211_csa_source { IEEE80211_CSA_SOURCE_BEACON, IEEE80211_CSA_SOURCE_OTHER_LINK, - IEEE80211_CSA_SOURCE_ACTION, + IEEE80211_CSA_SOURCE_PROT_ACTION, + IEEE80211_CSA_SOURCE_UNPROT_ACTION, }; static void @@ -2393,7 +2394,9 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, current_band, bss->vht_cap_info, &link->u.mgd.conn, - link->u.mgd.bssid, &csa_ie); + link->u.mgd.bssid, + source == IEEE80211_CSA_SOURCE_UNPROT_ACTION, + &csa_ie); if (res == 0) { ch_switch.block_tx = csa_ie.mode; ch_switch.chandef = csa_ie.chanreq.oper; @@ -2412,12 +2415,17 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, res = 1; } - if (res < 0) + if (res < 0) { + /* ignore this case, not a protected frame */ + if (source == IEEE80211_CSA_SOURCE_UNPROT_ACTION) + return; goto drop_connection; + } if (link->conf->csa_active) { switch (source) { - case IEEE80211_CSA_SOURCE_ACTION: + case IEEE80211_CSA_SOURCE_PROT_ACTION: + case IEEE80211_CSA_SOURCE_UNPROT_ACTION: /* already processing - disregard action frames */ return; case IEEE80211_CSA_SOURCE_BEACON: @@ -2466,9 +2474,35 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, } } - /* nothing to do at all - no active CSA nor a new one */ - if (res) + /* no active CSA nor a new one */ + if (res) { + /* + * However, we may have stopped queues when receiving a public + * action frame that couldn't be protected, if it had the quiet + * bit set. This is a trade-off, we want to be quiet as soon as + * possible, but also don't trust the public action frame much, + * as it can't be protected. + */ + if (unlikely(link->u.mgd.csa.blocked_tx)) { + link->u.mgd.csa.blocked_tx = false; + ieee80211_vif_unblock_queues_csa(sdata); + } + return; + } + + /* + * We don't really trust public action frames, but block queues (go to + * quiet mode) for them anyway, we should get a beacon soon to either + * know what the CSA really is, or figure out the public action frame + * was actually an attack. + */ + if (source == IEEE80211_CSA_SOURCE_UNPROT_ACTION) { + if (csa_ie.mode) { + link->u.mgd.csa.blocked_tx = true; + ieee80211_vif_block_queues_csa(sdata); + } return; + } if (link->conf->chanreq.oper.chan->band != csa_ie.chanreq.oper.chan->band) { @@ -7453,12 +7487,16 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, mgmt->u.action.u.chan_switch.variable, ies_len, true, NULL); - if (elems && !elems->parse_error) + if (elems && !elems->parse_error) { + enum ieee80211_csa_source src = + IEEE80211_CSA_SOURCE_PROT_ACTION; + ieee80211_sta_process_chanswitch(link, rx_status->mactime, rx_status->device_timestamp, elems, elems, - IEEE80211_CSA_SOURCE_ACTION); + src); + } kfree(elems); } else if (mgmt->u.action.category == WLAN_CATEGORY_PUBLIC) { struct ieee802_11_elems *elems; @@ -7479,6 +7517,9 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, ies_len, true, NULL); if (elems && !elems->parse_error) { + enum ieee80211_csa_source src = + IEEE80211_CSA_SOURCE_UNPROT_ACTION; + /* for the handling code pretend it was an IE */ elems->ext_chansw_ie = &mgmt->u.action.u.ext_chan_switch.data; @@ -7487,7 +7528,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, rx_status->mactime, rx_status->device_timestamp, elems, elems, - IEEE80211_CSA_SOURCE_ACTION); + src); } kfree(elems); diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index df96d3db1c0e..e91ca4ccdd37 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -223,7 +223,7 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, enum nl80211_band current_band, u32 vht_cap_info, struct ieee80211_conn_settings *conn, - u8 *bssid, + u8 *bssid, bool unprot_action, struct ieee80211_csa_ie *csa_ie) { enum nl80211_band new_band = current_band; @@ -258,8 +258,10 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, if (!ieee80211_operating_class_to_band(new_op_class, &new_band)) { new_op_class = 0; - sdata_info(sdata, "cannot understand ECSA IE operating class, %d, ignoring\n", - ext_chansw_elem->new_operating_class); + if (!unprot_action) + sdata_info(sdata, + "cannot understand ECSA IE operating class, %d, ignoring\n", + ext_chansw_elem->new_operating_class); } else { new_chan_no = ext_chansw_elem->new_ch_num; csa_ie->count = ext_chansw_elem->count; @@ -293,9 +295,10 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, new_freq = ieee80211_channel_to_frequency(new_chan_no, new_band); new_chan = ieee80211_get_channel(sdata->local->hw.wiphy, new_freq); if (!new_chan || new_chan->flags & IEEE80211_CHAN_DISABLED) { - sdata_info(sdata, - "BSS %pM switches to unsupported channel (%d MHz), disconnecting\n", - bssid, new_freq); + if (!unprot_action) + sdata_info(sdata, + "BSS %pM switches to unsupported channel (%d MHz), disconnecting\n", + bssid, new_freq); return -EINVAL; } -- cgit v1.2.3-58-ga151 From b777bdfc9be8a61ecf3c07962add3b5fc1279e98 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 12 Jun 2024 14:28:37 +0200 Subject: wifi: mac80211: handle protected dual of public action The code currently handles ECSA (extended channel switch announcement) public action frames. Handle also their protected dual, which actually is protected. Reviewed-by: Miriam Rachel Korenblit Link: https://patch.msgid.link/20240612143037.db642feb8b2e.I184fa5c9bffb68099171701e403c2aa733f60fde@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 22 ++++++++++++++-------- net/mac80211/rx.c | 1 + 2 files changed, 15 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index fff158b1615f..e76b887a46eb 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -7429,6 +7429,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, { struct ieee80211_link_data *link = &sdata->deflink; struct ieee80211_rx_status *rx_status; + struct ieee802_11_elems *elems; struct ieee80211_mgmt *mgmt; u16 fc; int ies_len; @@ -7472,9 +7473,8 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, !ether_addr_equal(mgmt->bssid, sdata->vif.cfg.ap_addr)) break; - if (mgmt->u.action.category == WLAN_CATEGORY_SPECTRUM_MGMT) { - struct ieee802_11_elems *elems; - + switch (mgmt->u.action.category) { + case WLAN_CATEGORY_SPECTRUM_MGMT: ies_len = skb->len - offsetof(struct ieee80211_mgmt, u.action.u.chan_switch.variable); @@ -7498,9 +7498,9 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, src); } kfree(elems); - } else if (mgmt->u.action.category == WLAN_CATEGORY_PUBLIC) { - struct ieee802_11_elems *elems; - + break; + case WLAN_CATEGORY_PUBLIC: + case WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION: ies_len = skb->len - offsetof(struct ieee80211_mgmt, u.action.u.ext_chan_switch.variable); @@ -7517,8 +7517,13 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, ies_len, true, NULL); if (elems && !elems->parse_error) { - enum ieee80211_csa_source src = - IEEE80211_CSA_SOURCE_UNPROT_ACTION; + enum ieee80211_csa_source src; + + if (mgmt->u.action.category == + WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION) + src = IEEE80211_CSA_SOURCE_PROT_ACTION; + else + src = IEEE80211_CSA_SOURCE_UNPROT_ACTION; /* for the handling code pretend it was an IE */ elems->ext_chansw_ie = @@ -7532,6 +7537,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, } kfree(elems); + break; } break; } diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 4914692750e5..0ff9062a130c 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3617,6 +3617,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) break; case WLAN_CATEGORY_PUBLIC: + case WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION: if (len < IEEE80211_MIN_ACTION_SIZE + 1) goto invalid; if (sdata->vif.type != NL80211_IFTYPE_STATION) -- cgit v1.2.3-58-ga151 From 25af8ff51d7e027fbdf8ed84e349427311a6da4d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 12 Jun 2024 14:32:03 +0200 Subject: wifi: mac80211: optionally pass chandef to ieee80211_sta_cap_rx_bw() We'll need this function to take a new chandef in (some) channel switching cases, so prepare for that by allowing that to be passed and using it if so. Clean up the code a little bit while at it. Reviewed-by: Miriam Rachel Korenblit Link: https://patch.msgid.link/20240612143418.772313f08b6a.If9708249e5870671e745d4c2b02e03b25092bea3@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 8 +++++++- net/mac80211/vht.c | 48 +++++++++++++++++++++++----------------------- 2 files changed, 31 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index cb3e28f88089..e96404f9dc70 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2158,7 +2158,13 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, const struct ieee80211_vht_cap *vht_cap_ie2, struct link_sta_info *link_sta); enum ieee80211_sta_rx_bandwidth -ieee80211_sta_cap_rx_bw(struct link_sta_info *link_sta); +_ieee80211_sta_cap_rx_bw(struct link_sta_info *link_sta, + struct cfg80211_chan_def *chandef); +static inline enum ieee80211_sta_rx_bandwidth +ieee80211_sta_cap_rx_bw(struct link_sta_info *link_sta) +{ + return _ieee80211_sta_cap_rx_bw(link_sta, NULL); +} enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct link_sta_info *link_sta); void ieee80211_sta_init_nss(struct link_sta_info *link_sta); diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index 642891cafbaf..c280945fc9d6 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -351,7 +351,8 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, /* FIXME: move this to some better location - parses HE/EHT now */ enum ieee80211_sta_rx_bandwidth -ieee80211_sta_cap_rx_bw(struct link_sta_info *link_sta) +_ieee80211_sta_cap_rx_bw(struct link_sta_info *link_sta, + struct cfg80211_chan_def *chandef) { unsigned int link_id = link_sta->link_id; struct ieee80211_sub_if_data *sdata = link_sta->sta->sdata; @@ -361,44 +362,43 @@ ieee80211_sta_cap_rx_bw(struct link_sta_info *link_sta) u32 cap_width; if (he_cap->has_he) { - struct ieee80211_bss_conf *link_conf; - enum ieee80211_sta_rx_bandwidth ret; + enum nl80211_band band; u8 info; - rcu_read_lock(); - link_conf = rcu_dereference(sdata->vif.link_conf[link_id]); + if (chandef) { + band = chandef->chan->band; + } else { + struct ieee80211_bss_conf *link_conf; + + rcu_read_lock(); + link_conf = rcu_dereference(sdata->vif.link_conf[link_id]); + band = link_conf->chanreq.oper.chan->band; + rcu_read_unlock(); + } - if (eht_cap->has_eht && - link_conf->chanreq.oper.chan->band == NL80211_BAND_6GHZ) { + if (eht_cap->has_eht && band == NL80211_BAND_6GHZ) { info = eht_cap->eht_cap_elem.phy_cap_info[0]; - if (info & IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ) { - ret = IEEE80211_STA_RX_BW_320; - goto out; - } + if (info & IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ) + return IEEE80211_STA_RX_BW_320; } info = he_cap->he_cap_elem.phy_cap_info[0]; - if (link_conf->chanreq.oper.chan->band == NL80211_BAND_2GHZ) { + if (band == NL80211_BAND_2GHZ) { if (info & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G) - ret = IEEE80211_STA_RX_BW_40; - else - ret = IEEE80211_STA_RX_BW_20; - goto out; + return IEEE80211_STA_RX_BW_40; + return IEEE80211_STA_RX_BW_20; } if (info & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G || info & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G) - ret = IEEE80211_STA_RX_BW_160; - else if (info & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G) - ret = IEEE80211_STA_RX_BW_80; - else - ret = IEEE80211_STA_RX_BW_20; -out: - rcu_read_unlock(); + return IEEE80211_STA_RX_BW_160; + + if (info & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G) + return IEEE80211_STA_RX_BW_80; - return ret; + return IEEE80211_STA_RX_BW_20; } if (!vht_cap->vht_supported) -- cgit v1.2.3-58-ga151 From 7d2bad829c449b8cb172d20cb51d62c586fca94d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 12 Jun 2024 14:32:04 +0200 Subject: wifi: mac80211: optionally pass chandef to ieee80211_sta_cur_vht_bw() We'll need this as well for channel switching cases, so add the ability now to pass the chandef to calculate for. Reviewed-by: Miriam Rachel Korenblit Link: https://patch.msgid.link/20240612143418.f70e05d9f306.Ifa0ce267de4f0ef3c21d063fb0cbf50e84d7d6ff@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 8 +++++++- net/mac80211/vht.c | 25 ++++++++++++++++--------- 2 files changed, 23 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index e96404f9dc70..33b8efff92c1 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2166,7 +2166,13 @@ ieee80211_sta_cap_rx_bw(struct link_sta_info *link_sta) return _ieee80211_sta_cap_rx_bw(link_sta, NULL); } enum ieee80211_sta_rx_bandwidth -ieee80211_sta_cur_vht_bw(struct link_sta_info *link_sta); +_ieee80211_sta_cur_vht_bw(struct link_sta_info *link_sta, + struct cfg80211_chan_def *chandef); +static inline enum ieee80211_sta_rx_bandwidth +ieee80211_sta_cur_vht_bw(struct link_sta_info *link_sta) +{ + return _ieee80211_sta_cur_vht_bw(link_sta, NULL); +} void ieee80211_sta_init_nss(struct link_sta_info *link_sta); enum ieee80211_sta_rx_bandwidth ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width); diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index c280945fc9d6..bf6ef45af757 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -503,22 +503,29 @@ ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width) /* FIXME: rename/move - this deals with everything not just VHT */ enum ieee80211_sta_rx_bandwidth -ieee80211_sta_cur_vht_bw(struct link_sta_info *link_sta) +_ieee80211_sta_cur_vht_bw(struct link_sta_info *link_sta, + struct cfg80211_chan_def *chandef) { struct sta_info *sta = link_sta->sta; - struct ieee80211_bss_conf *link_conf; enum nl80211_chan_width bss_width; enum ieee80211_sta_rx_bandwidth bw; - rcu_read_lock(); - link_conf = rcu_dereference(sta->sdata->vif.link_conf[link_sta->link_id]); - if (WARN_ON(!link_conf)) - bss_width = NL80211_CHAN_WIDTH_20_NOHT; - else + if (chandef) { + bss_width = chandef->width; + } else { + struct ieee80211_bss_conf *link_conf; + + rcu_read_lock(); + link_conf = rcu_dereference(sta->sdata->vif.link_conf[link_sta->link_id]); + if (WARN_ON_ONCE(!link_conf)) { + rcu_read_unlock(); + return IEEE80211_STA_RX_BW_20; + } bss_width = link_conf->chanreq.oper.width; - rcu_read_unlock(); + rcu_read_unlock(); + } - bw = ieee80211_sta_cap_rx_bw(link_sta); + bw = _ieee80211_sta_cap_rx_bw(link_sta, chandef); bw = min(bw, link_sta->cur_max_bandwidth); /* Don't consider AP's bandwidth for TDLS peers, section 11.23.1 of -- cgit v1.2.3-58-ga151 From b27512368591fc959768df1f7dacf2a96b1bd036 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 12 Jun 2024 14:32:05 +0200 Subject: wifi: mac80211: make ieee80211_chan_bw_change() able to use reserved Make ieee80211_chan_bw_change() able to use the reserved chanreq (really the chandef part of it) for the calculations, so it can be used _without_ applying the changes first. Remove the comment that indicates this is required, since it no longer is. However, this capability only gets used later. Also, this is not ideal, we really should not different so much between reserved and non-reserved usage, to simplify. That's a further cleanup later though. Reviewed-by: Miriam Rachel Korenblit Link: https://patch.msgid.link/20240612143418.1a08cf83b8cb.Ie567bb272eb25ce487651088f13ad041f549651c@changeid Signed-off-by: Johannes Berg --- net/mac80211/chan.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index ec16d7676088..a42ab3179d99 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -396,12 +396,9 @@ _ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local, return IEEE80211_CHANCTX_CHANGE_MIN_WIDTH; } -/* calling this function is assuming that station vif is updated to - * lates changes by calling ieee80211_link_update_chanreq - */ static void ieee80211_chan_bw_change(struct ieee80211_local *local, struct ieee80211_chanctx *ctx, - bool narrowed) + bool reserved, bool narrowed) { struct sta_info *sta; struct ieee80211_supported_band *sband = @@ -418,13 +415,17 @@ static void ieee80211_chan_bw_change(struct ieee80211_local *local, continue; for (link_id = 0; link_id < ARRAY_SIZE(sta->sdata->link); link_id++) { - struct ieee80211_bss_conf *link_conf = - rcu_dereference(sdata->vif.link_conf[link_id]); + struct ieee80211_link_data *link = + rcu_dereference(sdata->link[link_id]); + struct ieee80211_bss_conf *link_conf; + struct cfg80211_chan_def *new_chandef; struct link_sta_info *link_sta; - if (!link_conf) + if (!link) continue; + link_conf = link->conf; + if (rcu_access_pointer(link_conf->chanctx_conf) != &ctx->conf) continue; @@ -432,7 +433,13 @@ static void ieee80211_chan_bw_change(struct ieee80211_local *local, if (!link_sta) continue; - new_sta_bw = ieee80211_sta_cur_vht_bw(link_sta); + if (reserved) + new_chandef = &link->reserved.oper; + else + new_chandef = &link_conf->chanreq.oper; + + new_sta_bw = _ieee80211_sta_cur_vht_bw(link_sta, + new_chandef); /* nothing change */ if (new_sta_bw == link_sta->pub->bandwidth) @@ -466,12 +473,12 @@ void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local, return; /* check is BW narrowed */ - ieee80211_chan_bw_change(local, ctx, true); + ieee80211_chan_bw_change(local, ctx, false, true); drv_change_chanctx(local, ctx, changed); /* check is BW wider */ - ieee80211_chan_bw_change(local, ctx, false); + ieee80211_chan_bw_change(local, ctx, false, false); } static void _ieee80211_change_chanctx(struct ieee80211_local *local, @@ -505,7 +512,7 @@ static void _ieee80211_change_chanctx(struct ieee80211_local *local, * due to maybe not returning from it, e.g in case new context was added * first time with all parameters up to date. */ - ieee80211_chan_bw_change(local, old_ctx, true); + ieee80211_chan_bw_change(local, old_ctx, false, true); if (ieee80211_chanreq_identical(&ctx_req, chanreq)) { ieee80211_recalc_chanctx_min_def(local, ctx, rsvd_for); @@ -536,7 +543,7 @@ static void _ieee80211_change_chanctx(struct ieee80211_local *local, drv_change_chanctx(local, ctx, changed); /* check if BW is wider */ - ieee80211_chan_bw_change(local, old_ctx, false); + ieee80211_chan_bw_change(local, old_ctx, false, false); } static void ieee80211_change_chanctx(struct ieee80211_local *local, -- cgit v1.2.3-58-ga151 From dd7b1bdb5614419305089de399b7e6aa2fde8301 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 12 Jun 2024 14:32:06 +0200 Subject: wifi: mac80211: update STA/chandef width during switch In channel switch without an additional channel context, where the reassign logic kicks in, we also need to update the station bandwidth and chandef minimum width correctly to avoid having station rate control configured to wider bandwidth than the channel context. Do that now. Reviewed-by: Miriam Rachel Korenblit Link: https://patch.msgid.link/20240612143418.0bc3d28231b3.I51e76df86212057ca0469e235ba9bf4461cbee75@changeid Signed-off-by: Johannes Berg --- net/mac80211/chan.c | 67 +++++++++++++++++++++++++++++++++++----------- net/mac80211/ieee80211_i.h | 3 ++- net/mac80211/util.c | 2 +- 3 files changed, 55 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index a42ab3179d99..942c882f1a1d 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -295,17 +295,24 @@ ieee80211_get_max_required_bw(struct ieee80211_link_data *link) static enum nl80211_chan_width ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local, struct ieee80211_chanctx *ctx, - struct ieee80211_link_data *rsvd_for) + struct ieee80211_link_data *rsvd_for, + bool check_reserved) { struct ieee80211_sub_if_data *sdata; struct ieee80211_link_data *link; enum nl80211_chan_width max_bw = NL80211_CHAN_WIDTH_20_NOHT; + if (WARN_ON(check_reserved && rsvd_for)) + return ctx->conf.def.width; + for_each_sdata_link(local, link) { enum nl80211_chan_width width = NL80211_CHAN_WIDTH_20_NOHT; - if (link != rsvd_for && - rcu_access_pointer(link->conf->chanctx_conf) != &ctx->conf) + if (check_reserved) { + if (link->reserved_chanctx != ctx) + continue; + } else if (link != rsvd_for && + rcu_access_pointer(link->conf->chanctx_conf) != &ctx->conf) continue; switch (link->sdata->vif.type) { @@ -359,7 +366,8 @@ ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local, static u32 _ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local, struct ieee80211_chanctx *ctx, - struct ieee80211_link_data *rsvd_for) + struct ieee80211_link_data *rsvd_for, + bool check_reserved) { enum nl80211_chan_width max_bw; struct cfg80211_chan_def min_def; @@ -379,7 +387,8 @@ _ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local, return 0; } - max_bw = ieee80211_get_chanctx_max_required_bw(local, ctx, rsvd_for); + max_bw = ieee80211_get_chanctx_max_required_bw(local, ctx, rsvd_for, + check_reserved); /* downgrade chandef up to max_bw */ min_def = ctx->conf.def; @@ -465,9 +474,11 @@ static void ieee80211_chan_bw_change(struct ieee80211_local *local, */ void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local, struct ieee80211_chanctx *ctx, - struct ieee80211_link_data *rsvd_for) + struct ieee80211_link_data *rsvd_for, + bool check_reserved) { - u32 changed = _ieee80211_recalc_chanctx_min_def(local, ctx, rsvd_for); + u32 changed = _ieee80211_recalc_chanctx_min_def(local, ctx, rsvd_for, + check_reserved); if (!changed) return; @@ -515,7 +526,7 @@ static void _ieee80211_change_chanctx(struct ieee80211_local *local, ieee80211_chan_bw_change(local, old_ctx, false, true); if (ieee80211_chanreq_identical(&ctx_req, chanreq)) { - ieee80211_recalc_chanctx_min_def(local, ctx, rsvd_for); + ieee80211_recalc_chanctx_min_def(local, ctx, rsvd_for, false); return; } @@ -536,7 +547,7 @@ static void _ieee80211_change_chanctx(struct ieee80211_local *local, ctx->conf.ap = chanreq->ap; /* check if min chanctx also changed */ - changed |= _ieee80211_recalc_chanctx_min_def(local, ctx, rsvd_for); + changed |= _ieee80211_recalc_chanctx_min_def(local, ctx, rsvd_for, false); ieee80211_add_wbrf(local, &ctx->conf.def); @@ -663,7 +674,7 @@ ieee80211_alloc_chanctx(struct ieee80211_local *local, ctx->conf.rx_chains_dynamic = 1; ctx->mode = mode; ctx->conf.radar_enabled = false; - _ieee80211_recalc_chanctx_min_def(local, ctx, NULL); + _ieee80211_recalc_chanctx_min_def(local, ctx, NULL, false); return ctx; } @@ -845,7 +856,7 @@ static int ieee80211_assign_link_chanctx(struct ieee80211_link_data *link, if (new_ctx) { /* recalc considering the link we'll use it for now */ - ieee80211_recalc_chanctx_min_def(local, new_ctx, link); + ieee80211_recalc_chanctx_min_def(local, new_ctx, link, false); ret = drv_assign_vif_chanctx(local, sdata, link->conf, new_ctx); if (assign_on_failure || !ret) { @@ -868,12 +879,12 @@ static int ieee80211_assign_link_chanctx(struct ieee80211_link_data *link, ieee80211_recalc_chanctx_chantype(local, curr_ctx); ieee80211_recalc_smps_chanctx(local, curr_ctx); ieee80211_recalc_radar_chanctx(local, curr_ctx); - ieee80211_recalc_chanctx_min_def(local, curr_ctx, NULL); + ieee80211_recalc_chanctx_min_def(local, curr_ctx, NULL, false); } if (new_ctx && ieee80211_chanctx_num_assigned(local, new_ctx) > 0) { ieee80211_recalc_txpower(sdata, false); - ieee80211_recalc_chanctx_min_def(local, new_ctx, NULL); + ieee80211_recalc_chanctx_min_def(local, new_ctx, NULL, false); } if (conf) { @@ -1286,7 +1297,7 @@ ieee80211_link_use_reserved_reassign(struct ieee80211_link_data *link) if (ieee80211_chanctx_refcount(local, old_ctx) == 0) ieee80211_free_chanctx(local, old_ctx, false); - ieee80211_recalc_chanctx_min_def(local, new_ctx, NULL); + ieee80211_recalc_chanctx_min_def(local, new_ctx, NULL, false); ieee80211_recalc_smps_chanctx(local, new_ctx); ieee80211_recalc_radar_chanctx(local, new_ctx); @@ -1552,6 +1563,31 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local) goto err; } + /* update station rate control and min width before switch */ + list_for_each_entry(ctx, &local->chanctx_list, list) { + struct ieee80211_link_data *link; + + if (ctx->replace_state != IEEE80211_CHANCTX_REPLACES_OTHER) + continue; + + if (WARN_ON(!ctx->replace_ctx)) { + err = -EINVAL; + goto err; + } + + list_for_each_entry(link, &ctx->reserved_links, + reserved_chanctx_list) { + if (!ieee80211_link_has_in_place_reservation(link)) + continue; + + ieee80211_chan_bw_change(local, + ieee80211_link_get_chanctx(link), + true, true); + } + + ieee80211_recalc_chanctx_min_def(local, ctx, NULL, true); + } + /* * All necessary vifs are ready. Perform the switch now depending on * reservations and driver capabilities. @@ -1619,7 +1655,7 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local) ieee80211_recalc_chanctx_chantype(local, ctx); ieee80211_recalc_smps_chanctx(local, ctx); ieee80211_recalc_radar_chanctx(local, ctx); - ieee80211_recalc_chanctx_min_def(local, ctx, NULL); + ieee80211_recalc_chanctx_min_def(local, ctx, NULL, false); list_for_each_entry_safe(link, link_tmp, &ctx->reserved_links, reserved_chanctx_list) { @@ -1632,6 +1668,7 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local) link->reserved_chanctx = NULL; ieee80211_link_chanctx_reservation_complete(link); + ieee80211_chan_bw_change(local, ctx, false, false); } /* diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 33b8efff92c1..501f02809135 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2624,7 +2624,8 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *chanctx); void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local, struct ieee80211_chanctx *ctx, - struct ieee80211_link_data *rsvd_for); + struct ieee80211_link_data *rsvd_for, + bool check_reserved); bool ieee80211_is_radar_required(struct ieee80211_local *local); void ieee80211_dfs_cac_timer_work(struct wiphy *wiphy, struct wiphy_work *work); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index c73ff7dfbdba..b3b8873a107b 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2334,7 +2334,7 @@ void ieee80211_recalc_min_chandef(struct ieee80211_sub_if_data *sdata, chanctx = container_of(chanctx_conf, struct ieee80211_chanctx, conf); - ieee80211_recalc_chanctx_min_def(local, chanctx, NULL); + ieee80211_recalc_chanctx_min_def(local, chanctx, NULL, false); } } -- cgit v1.2.3-58-ga151 From d42fcaece03654a4b21d2da88d68ed913e0b6c46 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 12 Jun 2024 14:35:56 +0200 Subject: wifi: mac80211: add ieee80211_tdls_sta_link_id() We've open-coded this twice and will need it again, add ieee80211_tdls_sta_link_id() to get the one link ID for a TDLS STA. Reviewed-by: Miriam Rachel Korenblit Link: https://patch.msgid.link/20240612143707.9f8141ae1725.I343822bbba0ae08dedb2f54a0ce87f2ae5ebeb2b@changeid Signed-off-by: Johannes Berg --- net/mac80211/sta_info.h | 6 ++++++ net/mac80211/tx.c | 6 ++---- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index bd5e2f7146f6..9195d5a2de0a 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -727,6 +727,12 @@ struct sta_info { struct ieee80211_sta sta; }; +static inline int ieee80211_tdls_sta_link_id(struct sta_info *sta) +{ + /* TDLS STA can only have a single link */ + return sta->sta.valid_links ? __ffs(sta->sta.valid_links) : 0; +} + static inline enum nl80211_plink_state sta_plink_state(struct sta_info *sta) { #ifdef CONFIG_MAC80211_MESH diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index f861d99e5f05..72a9ba8bc5fd 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2774,8 +2774,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, if (tdls_peer) { /* For TDLS only one link can be valid with peer STA */ - int tdls_link_id = sta->sta.valid_links ? - __ffs(sta->sta.valid_links) : 0; + int tdls_link_id = ieee80211_tdls_sta_link_id(sta); struct ieee80211_link_data *link; /* DA SA BSSID */ @@ -3101,8 +3100,7 @@ void ieee80211_check_fast_xmit(struct sta_info *sta) case NL80211_IFTYPE_STATION: if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { /* For TDLS only one link can be valid with peer STA */ - int tdls_link_id = sta->sta.valid_links ? - __ffs(sta->sta.valid_links) : 0; + int tdls_link_id = ieee80211_tdls_sta_link_id(sta); struct ieee80211_link_data *link; /* DA SA BSSID */ -- cgit v1.2.3-58-ga151 From 0b2d9d9aec2be212a28b7d14b5462c56d9adc3a3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 12 Jun 2024 14:35:57 +0200 Subject: wifi: mac80211: correcty limit wider BW TDLS STAs When updating a channel context, the code can apply wider bandwidth TDLS STA channel definitions to each and every channel context used by the device, an approach that will surely lead to problems if there is ever more than one. Restrict the wider BW TDLS STA consideration to only TDLS STAs that are actually related to links using the channel context being updated. Fixes: 0fabfaafec3a ("mac80211: upgrade BW of TDLS peers when possible") Reviewed-by: Miriam Rachel Korenblit Link: https://patch.msgid.link/20240612143707.1ad989acecde.I5c75c94d95c3f4ea84f8ff4253189f4b13bad5c3@changeid Signed-off-by: Johannes Berg --- net/mac80211/chan.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'net') diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 942c882f1a1d..06a65dc6f6c6 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -793,13 +793,24 @@ void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local, /* TDLS peers can sometimes affect the chandef width */ list_for_each_entry(sta, &local->sta_list, list) { + struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_chan_req tdls_chanreq = {}; + int tdls_link_id; + if (!sta->uploaded || !test_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW) || !test_sta_flag(sta, WLAN_STA_AUTHORIZED) || !sta->tdls_chandef.chan) continue; + tdls_link_id = ieee80211_tdls_sta_link_id(sta); + link = sdata_dereference(sdata->link[tdls_link_id], sdata); + if (!link) + continue; + + if (rcu_access_pointer(link->conf->chanctx_conf) != conf) + continue; + tdls_chanreq.oper = sta->tdls_chandef; /* note this always fills and returns &tmp if compat */ -- cgit v1.2.3-58-ga151 From 9cc88678db5be1d67c865dbe60afb56867698301 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 12 Jun 2024 14:38:10 +0200 Subject: wifi: mac80211: check SSID in beacon Check that the SSID in beacons is correct, if it's not hidden and beacon protection is enabled (otherwise there's no value). If it doesn't match, disconnect. Reviewed-by: Miriam Rachel Korenblit Link: https://patch.msgid.link/20240612143809.8b24a3d26a3d.I3e3ef31dbd2ec606be74d502a9d00dd9514c6885@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index e76b887a46eb..3d207d79d11f 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -6667,6 +6667,29 @@ handle: } } +static bool ieee80211_mgd_ssid_mismatch(struct ieee80211_sub_if_data *sdata, + const struct ieee802_11_elems *elems) +{ + struct ieee80211_vif_cfg *cfg = &sdata->vif.cfg; + static u8 zero_ssid[IEEE80211_MAX_SSID_LEN]; + + if (!elems->ssid) + return false; + + /* hidden SSID: zero length */ + if (elems->ssid_len == 0) + return false; + + if (elems->ssid_len != cfg->ssid_len) + return true; + + /* hidden SSID: zeroed out */ + if (memcmp(elems->ssid, zero_ssid, elems->ssid_len)) + return false; + + return memcmp(elems->ssid, cfg->ssid, cfg->ssid_len); +} + static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, struct ieee80211_hdr *hdr, size_t len, struct ieee80211_rx_status *rx_status) @@ -6808,6 +6831,15 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, elems = ieee802_11_parse_elems_full(&parse_params); if (!elems) return; + + if (rx_status->flag & RX_FLAG_DECRYPTED && + ieee80211_mgd_ssid_mismatch(sdata, elems)) { + sdata_info(sdata, "SSID mismatch for AP %pM, disconnect\n", + sdata->vif.cfg.ap_addr); + __ieee80211_disconnect(sdata); + return; + } + ncrc = elems->crc; if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK) && -- cgit v1.2.3-58-ga151 From 5036eaffed3e29fb35c796989efb3c88ac79d87b Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Fri, 14 Jun 2024 09:35:41 +0200 Subject: wifi: cfg80211: Always call tracing Call the tracing function even if the cfg80211 callbacks are not set. This would allow better understanding of user space actions. Reviewed-by: Miriam Rachel Korenblit Signed-off-by: Ilan Peer Link: https://patch.msgid.link/20240614093541.018cb816e176.I28f68740a6b42144346f5c175c7874b0a669a364@changeid Signed-off-by: Johannes Berg --- net/wireless/rdev-ops.h | 56 +++++++++++++++++++------------------------------ 1 file changed, 21 insertions(+), 35 deletions(-) (limited to 'net') diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 755af47b88b9..8f15658002ee 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -578,13 +578,11 @@ static inline int rdev_leave_ibss(struct cfg80211_registered_device *rdev, static inline int rdev_set_wiphy_params(struct cfg80211_registered_device *rdev, u32 changed) { - int ret; - - if (!rdev->ops->set_wiphy_params) - return -EOPNOTSUPP; + int ret = -EOPNOTSUPP; trace_rdev_set_wiphy_params(&rdev->wiphy, changed); - ret = rdev->ops->set_wiphy_params(&rdev->wiphy, changed); + if (rdev->ops->set_wiphy_params) + ret = rdev->ops->set_wiphy_params(&rdev->wiphy, changed); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } @@ -1425,13 +1423,11 @@ rdev_set_radar_background(struct cfg80211_registered_device *rdev, struct cfg80211_chan_def *chandef) { struct wiphy *wiphy = &rdev->wiphy; - int ret; - - if (!rdev->ops->set_radar_background) - return -EOPNOTSUPP; + int ret = -EOPNOTSUPP; trace_rdev_set_radar_background(wiphy, chandef); - ret = rdev->ops->set_radar_background(wiphy, chandef); + if (rdev->ops->set_radar_background) + ret = rdev->ops->set_radar_background(wiphy, chandef); trace_rdev_return_int(wiphy, ret); return ret; @@ -1468,13 +1464,11 @@ rdev_add_link_station(struct cfg80211_registered_device *rdev, struct net_device *dev, struct link_station_parameters *params) { - int ret; - - if (!rdev->ops->add_link_station) - return -EOPNOTSUPP; + int ret = -EOPNOTSUPP; trace_rdev_add_link_station(&rdev->wiphy, dev, params); - ret = rdev->ops->add_link_station(&rdev->wiphy, dev, params); + if (rdev->ops->add_link_station) + ret = rdev->ops->add_link_station(&rdev->wiphy, dev, params); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } @@ -1484,13 +1478,11 @@ rdev_mod_link_station(struct cfg80211_registered_device *rdev, struct net_device *dev, struct link_station_parameters *params) { - int ret; - - if (!rdev->ops->mod_link_station) - return -EOPNOTSUPP; + int ret = -EOPNOTSUPP; trace_rdev_mod_link_station(&rdev->wiphy, dev, params); - ret = rdev->ops->mod_link_station(&rdev->wiphy, dev, params); + if (rdev->ops->mod_link_station) + ret = rdev->ops->mod_link_station(&rdev->wiphy, dev, params); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } @@ -1500,13 +1492,11 @@ rdev_del_link_station(struct cfg80211_registered_device *rdev, struct net_device *dev, struct link_station_del_parameters *params) { - int ret; - - if (!rdev->ops->del_link_station) - return -EOPNOTSUPP; + int ret = -EOPNOTSUPP; trace_rdev_del_link_station(&rdev->wiphy, dev, params); - ret = rdev->ops->del_link_station(&rdev->wiphy, dev, params); + if (rdev->ops->del_link_station) + ret = rdev->ops->del_link_station(&rdev->wiphy, dev, params); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } @@ -1517,13 +1507,11 @@ rdev_set_hw_timestamp(struct cfg80211_registered_device *rdev, struct cfg80211_set_hw_timestamp *hwts) { struct wiphy *wiphy = &rdev->wiphy; - int ret; - - if (!rdev->ops->set_hw_timestamp) - return -EOPNOTSUPP; + int ret = -EOPNOTSUPP; trace_rdev_set_hw_timestamp(wiphy, dev, hwts); - ret = rdev->ops->set_hw_timestamp(wiphy, dev, hwts); + if (rdev->ops->set_hw_timestamp) + ret = rdev->ops->set_hw_timestamp(wiphy, dev, hwts); trace_rdev_return_int(wiphy, ret); return ret; @@ -1535,13 +1523,11 @@ rdev_set_ttlm(struct cfg80211_registered_device *rdev, struct cfg80211_ttlm_params *params) { struct wiphy *wiphy = &rdev->wiphy; - int ret; - - if (!rdev->ops->set_ttlm) - return -EOPNOTSUPP; + int ret = -EOPNOTSUPP; trace_rdev_set_ttlm(wiphy, dev, params); - ret = rdev->ops->set_ttlm(wiphy, dev, params); + if (rdev->ops->set_ttlm) + ret = rdev->ops->set_ttlm(wiphy, dev, params); trace_rdev_return_int(wiphy, ret); return ret; -- cgit v1.2.3-58-ga151 From 1decf05d0f4de78ef67dc3f794709258c689e09e Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 18 Jun 2024 19:25:56 +0300 Subject: wifi: mac80211: inform the low level if drv_stop() is a suspend This will allow the low level driver to take different actions for different flows. Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20240618192529.739036208b6e.Ie18a2fe8e02bf2717549d39420b350cfdaf3d317@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/admtek/adm8211.c | 2 +- drivers/net/wireless/ath/ar5523/ar5523.c | 2 +- drivers/net/wireless/ath/ath10k/mac.c | 2 +- drivers/net/wireless/ath/ath11k/mac.c | 2 +- drivers/net/wireless/ath/ath12k/mac.c | 2 +- drivers/net/wireless/ath/ath5k/base.c | 2 +- drivers/net/wireless/ath/ath5k/base.h | 2 +- drivers/net/wireless/ath/ath9k/htc_drv_main.c | 2 +- drivers/net/wireless/ath/ath9k/main.c | 2 +- drivers/net/wireless/ath/carl9170/main.c | 2 +- drivers/net/wireless/ath/wcn36xx/main.c | 2 +- drivers/net/wireless/atmel/at76c50x-usb.c | 2 +- drivers/net/wireless/broadcom/b43/main.c | 2 +- drivers/net/wireless/broadcom/b43legacy/main.c | 2 +- .../wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c | 2 +- drivers/net/wireless/intel/iwlegacy/3945-mac.c | 2 +- drivers/net/wireless/intel/iwlegacy/4965-mac.c | 2 +- drivers/net/wireless/intel/iwlegacy/4965.h | 2 +- drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 2 +- drivers/net/wireless/intersil/p54/main.c | 2 +- drivers/net/wireless/marvell/libertas_tf/main.c | 2 +- drivers/net/wireless/marvell/mwl8k.c | 4 ++-- drivers/net/wireless/mediatek/mt76/mt7603/main.c | 2 +- drivers/net/wireless/mediatek/mt76/mt7615/main.c | 2 +- drivers/net/wireless/mediatek/mt76/mt7615/usb.c | 2 +- drivers/net/wireless/mediatek/mt76/mt76x0/pci.c | 2 +- drivers/net/wireless/mediatek/mt76/mt76x0/usb.c | 2 +- drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c | 2 +- drivers/net/wireless/mediatek/mt76/mt76x2/usb_main.c | 2 +- drivers/net/wireless/mediatek/mt76/mt7915/main.c | 2 +- drivers/net/wireless/mediatek/mt76/mt7921/main.c | 4 ++-- drivers/net/wireless/mediatek/mt76/mt792x.h | 4 ++-- drivers/net/wireless/mediatek/mt76/mt792x_core.c | 2 +- drivers/net/wireless/mediatek/mt76/mt792x_usb.c | 4 ++-- drivers/net/wireless/mediatek/mt76/mt7996/main.c | 2 +- drivers/net/wireless/mediatek/mt7601u/main.c | 2 +- drivers/net/wireless/purelifi/plfxlc/mac.c | 2 +- drivers/net/wireless/purelifi/plfxlc/mac.h | 2 +- drivers/net/wireless/purelifi/plfxlc/usb.c | 4 ++-- drivers/net/wireless/ralink/rt2x00/rt2x00.h | 2 +- drivers/net/wireless/ralink/rt2x00/rt2x00mac.c | 2 +- drivers/net/wireless/realtek/rtl818x/rtl8180/dev.c | 2 +- drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c | 2 +- drivers/net/wireless/realtek/rtl8xxxu/core.c | 2 +- drivers/net/wireless/realtek/rtlwifi/core.c | 4 ++-- drivers/net/wireless/realtek/rtw88/mac80211.c | 2 +- drivers/net/wireless/realtek/rtw89/mac80211.c | 2 +- drivers/net/wireless/rsi/rsi_91x_mac80211.c | 3 ++- drivers/net/wireless/silabs/wfx/sta.c | 2 +- drivers/net/wireless/silabs/wfx/sta.h | 2 +- drivers/net/wireless/st/cw1200/sta.c | 2 +- drivers/net/wireless/st/cw1200/sta.h | 2 +- drivers/net/wireless/ti/wl1251/main.c | 2 +- drivers/net/wireless/ti/wlcore/main.c | 2 +- drivers/net/wireless/virtual/mac80211_hwsim.c | 2 +- drivers/net/wireless/zydas/zd1211rw/zd_mac.c | 2 +- drivers/net/wireless/zydas/zd1211rw/zd_mac.h | 2 +- drivers/net/wireless/zydas/zd1211rw/zd_usb.c | 2 +- drivers/staging/vt6655/device_main.c | 2 +- drivers/staging/vt6656/main_usb.c | 2 +- include/net/mac80211.h | 2 +- net/mac80211/driver-ops.c | 6 +++--- net/mac80211/driver-ops.h | 2 +- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/iface.c | 4 ++-- net/mac80211/pm.c | 4 ++-- net/mac80211/trace.h | 15 ++++++++++++--- net/mac80211/util.c | 4 ++-- 70 files changed, 93 insertions(+), 83 deletions(-) (limited to 'net') diff --git a/drivers/net/wireless/admtek/adm8211.c b/drivers/net/wireless/admtek/adm8211.c index e3fd48dd3909..a2d87c3ad196 100644 --- a/drivers/net/wireless/admtek/adm8211.c +++ b/drivers/net/wireless/admtek/adm8211.c @@ -1550,7 +1550,7 @@ fail: return retval; } -static void adm8211_stop(struct ieee80211_hw *dev) +static void adm8211_stop(struct ieee80211_hw *dev, bool suspend) { struct adm8211_priv *priv = dev->priv; diff --git a/drivers/net/wireless/ath/ar5523/ar5523.c b/drivers/net/wireless/ath/ar5523/ar5523.c index 5a55db349cb5..156f3650c006 100644 --- a/drivers/net/wireless/ath/ar5523/ar5523.c +++ b/drivers/net/wireless/ath/ar5523/ar5523.c @@ -1061,7 +1061,7 @@ err: return error; } -static void ar5523_stop(struct ieee80211_hw *hw) +static void ar5523_stop(struct ieee80211_hw *hw, bool suspend) { struct ar5523 *ar = hw->priv; diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 3bf67b2ecd6d..a5da32e87106 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -5363,7 +5363,7 @@ err: return ret; } -static void ath10k_stop(struct ieee80211_hw *hw) +static void ath10k_stop(struct ieee80211_hw *hw, bool suspend) { struct ath10k *ar = hw->priv; u32 opt; diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index a1800c75d32b..c9a13b88c804 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -6278,7 +6278,7 @@ err: return ret; } -static void ath11k_mac_op_stop(struct ieee80211_hw *hw) +static void ath11k_mac_op_stop(struct ieee80211_hw *hw, bool suspend) { struct ath11k *ar = hw->priv; struct htt_ppdu_stats_info *ppdu_stats, *tmp; diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 509c02bffdae..bd3e021e2a81 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -6112,7 +6112,7 @@ static void ath12k_mac_stop(struct ath12k *ar) atomic_set(&ar->num_pending_mgmt_tx, 0); } -static void ath12k_mac_op_stop(struct ieee80211_hw *hw) +static void ath12k_mac_op_stop(struct ieee80211_hw *hw, bool suspend) { struct ath12k_hw *ah = ath12k_hw_to_ah(hw); struct ath12k *ar; diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c index 9f534ed2fbb3..abe41330fb69 100644 --- a/drivers/net/wireless/ath/ath5k/base.c +++ b/drivers/net/wireless/ath/ath5k/base.c @@ -2847,7 +2847,7 @@ static void ath5k_stop_tasklets(struct ath5k_hw *ah) * if another thread does a system call and the thread doing the * stop is preempted). */ -void ath5k_stop(struct ieee80211_hw *hw) +void ath5k_stop(struct ieee80211_hw *hw, bool suspend) { struct ath5k_hw *ah = hw->priv; int ret; diff --git a/drivers/net/wireless/ath/ath5k/base.h b/drivers/net/wireless/ath/ath5k/base.h index 97469d0fbad7..594e5b945cb7 100644 --- a/drivers/net/wireless/ath/ath5k/base.h +++ b/drivers/net/wireless/ath/ath5k/base.h @@ -92,7 +92,7 @@ void ath5k_vif_iter(void *data, u8 *mac, struct ieee80211_vif *vif); bool ath5k_any_vif_assoc(struct ath5k_hw *ah); int ath5k_start(struct ieee80211_hw *hw); -void ath5k_stop(struct ieee80211_hw *hw); +void ath5k_stop(struct ieee80211_hw *hw, bool suspend); void ath5k_beacon_update_timers(struct ath5k_hw *ah, u64 bc_tsf); int ath5k_beacon_update(struct ieee80211_hw *hw, struct ieee80211_vif *vif); diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_main.c b/drivers/net/wireless/ath/ath9k/htc_drv_main.c index b389e19381c4..8a03bcc2789e 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_main.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_main.c @@ -973,7 +973,7 @@ static int ath9k_htc_start(struct ieee80211_hw *hw) return ret; } -static void ath9k_htc_stop(struct ieee80211_hw *hw) +static void ath9k_htc_stop(struct ieee80211_hw *hw, bool suspend) { struct ath9k_htc_priv *priv = hw->priv; struct ath_hw *ah = priv->ah; diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 01173aac3045..b92c89dad8de 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -895,7 +895,7 @@ static void ath9k_pending_key_del(struct ath_softc *sc, u8 keyix) ath_key_delete(common, keyix); } -static void ath9k_stop(struct ieee80211_hw *hw) +static void ath9k_stop(struct ieee80211_hw *hw, bool suspend) { struct ath_softc *sc = hw->priv; struct ath_hw *ah = sc->sc_ah; diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c index 7e7797bf44b7..755c068e4197 100644 --- a/drivers/net/wireless/ath/carl9170/main.c +++ b/drivers/net/wireless/ath/carl9170/main.c @@ -439,7 +439,7 @@ static void carl9170_cancel_worker(struct ar9170 *ar) cancel_work_sync(&ar->ampdu_work); } -static void carl9170_op_stop(struct ieee80211_hw *hw) +static void carl9170_op_stop(struct ieee80211_hw *hw, bool suspend) { struct ar9170 *ar = hw->priv; diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c index e760d8002e09..408776562a7e 100644 --- a/drivers/net/wireless/ath/wcn36xx/main.c +++ b/drivers/net/wireless/ath/wcn36xx/main.c @@ -278,7 +278,7 @@ out_err: return ret; } -static void wcn36xx_stop(struct ieee80211_hw *hw) +static void wcn36xx_stop(struct ieee80211_hw *hw, bool suspend) { struct wcn36xx *wcn = hw->priv; diff --git a/drivers/net/wireless/atmel/at76c50x-usb.c b/drivers/net/wireless/atmel/at76c50x-usb.c index baa53cfefe48..504e05ea30f2 100644 --- a/drivers/net/wireless/atmel/at76c50x-usb.c +++ b/drivers/net/wireless/atmel/at76c50x-usb.c @@ -1850,7 +1850,7 @@ error: return 0; } -static void at76_mac80211_stop(struct ieee80211_hw *hw) +static void at76_mac80211_stop(struct ieee80211_hw *hw, bool suspend) { struct at76_priv *priv = hw->priv; diff --git a/drivers/net/wireless/broadcom/b43/main.c b/drivers/net/wireless/broadcom/b43/main.c index badb2f494035..8e56dcf9309d 100644 --- a/drivers/net/wireless/broadcom/b43/main.c +++ b/drivers/net/wireless/broadcom/b43/main.c @@ -5078,7 +5078,7 @@ static int b43_op_start(struct ieee80211_hw *hw) return err; } -static void b43_op_stop(struct ieee80211_hw *hw) +static void b43_op_stop(struct ieee80211_hw *hw, bool suspend) { struct b43_wl *wl = hw_to_b43_wl(hw); struct b43_wldev *dev = wl->current_dev; diff --git a/drivers/net/wireless/broadcom/b43legacy/main.c b/drivers/net/wireless/broadcom/b43legacy/main.c index 18eb610f600a..441d6440671b 100644 --- a/drivers/net/wireless/broadcom/b43legacy/main.c +++ b/drivers/net/wireless/broadcom/b43legacy/main.c @@ -3485,7 +3485,7 @@ out_mutex_unlock: return err; } -static void b43legacy_op_stop(struct ieee80211_hw *hw) +static void b43legacy_op_stop(struct ieee80211_hw *hw, bool suspend) { struct b43legacy_wl *wl = hw_to_b43legacy_wl(hw); struct b43legacy_wldev *dev = wl->current_dev; diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c index 860ef9c11c46..9ac6f3c4bdc7 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c @@ -457,7 +457,7 @@ static int brcms_ops_start(struct ieee80211_hw *hw) return err; } -static void brcms_ops_stop(struct ieee80211_hw *hw) +static void brcms_ops_stop(struct ieee80211_hw *hw, bool suspend) { struct brcms_info *wl = hw->priv; int status; diff --git a/drivers/net/wireless/intel/iwlegacy/3945-mac.c b/drivers/net/wireless/intel/iwlegacy/3945-mac.c index 075b705a8d7b..74fc76c00ebc 100644 --- a/drivers/net/wireless/intel/iwlegacy/3945-mac.c +++ b/drivers/net/wireless/intel/iwlegacy/3945-mac.c @@ -2813,7 +2813,7 @@ out_release_irq: } static void -il3945_mac_stop(struct ieee80211_hw *hw) +il3945_mac_stop(struct ieee80211_hw *hw, bool suspend) { struct il_priv *il = hw->priv; diff --git a/drivers/net/wireless/intel/iwlegacy/4965-mac.c b/drivers/net/wireless/intel/iwlegacy/4965-mac.c index d018f56be966..1600c344edbb 100644 --- a/drivers/net/wireless/intel/iwlegacy/4965-mac.c +++ b/drivers/net/wireless/intel/iwlegacy/4965-mac.c @@ -5820,7 +5820,7 @@ out: } void -il4965_mac_stop(struct ieee80211_hw *hw) +il4965_mac_stop(struct ieee80211_hw *hw, bool suspend) { struct il_priv *il = hw->priv; diff --git a/drivers/net/wireless/intel/iwlegacy/4965.h b/drivers/net/wireless/intel/iwlegacy/4965.h index 863e3792d153..951f2245fefb 100644 --- a/drivers/net/wireless/intel/iwlegacy/4965.h +++ b/drivers/net/wireless/intel/iwlegacy/4965.h @@ -151,7 +151,7 @@ void il4965_mac_tx(struct ieee80211_hw *hw, struct ieee80211_tx_control *control, struct sk_buff *skb); int il4965_mac_start(struct ieee80211_hw *hw); -void il4965_mac_stop(struct ieee80211_hw *hw); +void il4965_mac_stop(struct ieee80211_hw *hw, bool suspend); void il4965_configure_filter(struct ieee80211_hw *hw, unsigned int changed_flags, unsigned int *total_flags, u64 multicast); diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c index 826f129ecefa..74d163e56511 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c @@ -300,7 +300,7 @@ static int iwlagn_mac_start(struct ieee80211_hw *hw) return ret; } -static void iwlagn_mac_stop(struct ieee80211_hw *hw) +static void iwlagn_mac_stop(struct ieee80211_hw *hw, bool suspend) { struct iwl_priv *priv = IWL_MAC80211_GET_DVM(hw); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 81103a974b20..896e560f5a82 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -1376,7 +1376,7 @@ void __iwl_mvm_mac_stop(struct iwl_mvm *mvm) } } -void iwl_mvm_mac_stop(struct ieee80211_hw *hw) +void iwl_mvm_mac_stop(struct ieee80211_hw *hw, bool suspend) { struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index bce517effe55..54c3553db219 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -2818,7 +2818,7 @@ int iwl_mvm_op_set_antenna(struct ieee80211_hw *hw, u32 tx_ant, u32 rx_ant); int iwl_mvm_mac_start(struct ieee80211_hw *hw); void iwl_mvm_mac_reconfig_complete(struct ieee80211_hw *hw, enum ieee80211_reconfig_type reconfig_type); -void iwl_mvm_mac_stop(struct ieee80211_hw *hw); +void iwl_mvm_mac_stop(struct ieee80211_hw *hw, bool suspend); static inline int iwl_mvm_mac_config(struct ieee80211_hw *hw, u32 changed) { return 0; diff --git a/drivers/net/wireless/intersil/p54/main.c b/drivers/net/wireless/intersil/p54/main.c index 687841b2fa2a..42111bb53f58 100644 --- a/drivers/net/wireless/intersil/p54/main.c +++ b/drivers/net/wireless/intersil/p54/main.c @@ -197,7 +197,7 @@ out: return err; } -static void p54_stop(struct ieee80211_hw *dev) +static void p54_stop(struct ieee80211_hw *dev, bool suspend) { struct p54_common *priv = dev->priv; int i; diff --git a/drivers/net/wireless/marvell/libertas_tf/main.c b/drivers/net/wireless/marvell/libertas_tf/main.c index 9cca69fe04d7..b47a832b9ae2 100644 --- a/drivers/net/wireless/marvell/libertas_tf/main.c +++ b/drivers/net/wireless/marvell/libertas_tf/main.c @@ -267,7 +267,7 @@ static int lbtf_op_start(struct ieee80211_hw *hw) return 0; } -static void lbtf_op_stop(struct ieee80211_hw *hw) +static void lbtf_op_stop(struct ieee80211_hw *hw, bool suspend) { struct lbtf_private *priv = hw->priv; unsigned long flags; diff --git a/drivers/net/wireless/marvell/mwl8k.c b/drivers/net/wireless/marvell/mwl8k.c index 241a02a0accd..b130e057370f 100644 --- a/drivers/net/wireless/marvell/mwl8k.c +++ b/drivers/net/wireless/marvell/mwl8k.c @@ -4768,7 +4768,7 @@ static int mwl8k_start(struct ieee80211_hw *hw) return rc; } -static void mwl8k_stop(struct ieee80211_hw *hw) +static void mwl8k_stop(struct ieee80211_hw *hw, bool suspend) { struct mwl8k_priv *priv = hw->priv; int i; @@ -6023,7 +6023,7 @@ static int mwl8k_reload_firmware(struct ieee80211_hw *hw, char *fw_image) struct mwl8k_priv *priv = hw->priv; struct mwl8k_vif *vif, *tmp_vif; - mwl8k_stop(hw); + mwl8k_stop(hw, false); mwl8k_rxq_deinit(hw, 0); /* diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/main.c b/drivers/net/wireless/mediatek/mt76/mt7603/main.c index 9b49267b1eab..f35fa643c0da 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/main.c @@ -23,7 +23,7 @@ mt7603_start(struct ieee80211_hw *hw) } static void -mt7603_stop(struct ieee80211_hw *hw) +mt7603_stop(struct ieee80211_hw *hw, bool suspend) { struct mt7603_dev *dev = hw->priv; diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c index c27acaf0eb1c..50e262c1622f 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c @@ -91,7 +91,7 @@ out: return ret; } -static void mt7615_stop(struct ieee80211_hw *hw) +static void mt7615_stop(struct ieee80211_hw *hw, bool suspend) { struct mt7615_dev *dev = mt7615_hw_dev(hw); struct mt7615_phy *phy = mt7615_hw_phy(hw); diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/usb.c b/drivers/net/wireless/mediatek/mt76/mt7615/usb.c index df737e1ff27b..9335ca0776fe 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/usb.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/usb.c @@ -79,7 +79,7 @@ static void mt7663u_copy(struct mt76_dev *dev, u32 offset, mutex_unlock(&usb->usb_ctrl_mtx); } -static void mt7663u_stop(struct ieee80211_hw *hw) +static void mt7663u_stop(struct ieee80211_hw *hw, bool suspend) { struct mt7615_phy *phy = mt7615_hw_phy(hw); struct mt7615_dev *dev = hw->priv; diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c index 79b7996ad1a8..2ecee7c5c80d 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c @@ -44,7 +44,7 @@ static void mt76x0e_stop_hw(struct mt76x02_dev *dev) mt76_clear(dev, MT_WPDMA_GLO_CFG, MT_WPDMA_GLO_CFG_RX_DMA_EN); } -static void mt76x0e_stop(struct ieee80211_hw *hw) +static void mt76x0e_stop(struct ieee80211_hw *hw, bool suspend) { struct mt76x02_dev *dev = hw->priv; diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c b/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c index bba44f289b4e..390f502e97f0 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c @@ -77,7 +77,7 @@ static void mt76x0u_cleanup(struct mt76x02_dev *dev) mt76u_queues_deinit(&dev->mt76); } -static void mt76x0u_stop(struct ieee80211_hw *hw) +static void mt76x0u_stop(struct ieee80211_hw *hw, bool suspend) { struct mt76x02_dev *dev = hw->priv; diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c index bfc8c69f43fa..6accea551319 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c @@ -24,7 +24,7 @@ mt76x2_start(struct ieee80211_hw *hw) } static void -mt76x2_stop(struct ieee80211_hw *hw) +mt76x2_stop(struct ieee80211_hw *hw, bool suspend) { struct mt76x02_dev *dev = hw->priv; diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/usb_main.c b/drivers/net/wireless/mediatek/mt76/mt76x2/usb_main.c index 9fe390fdd730..ba0241c36672 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/usb_main.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/usb_main.c @@ -22,7 +22,7 @@ static int mt76x2u_start(struct ieee80211_hw *hw) return 0; } -static void mt76x2u_stop(struct ieee80211_hw *hw) +static void mt76x2u_stop(struct ieee80211_hw *hw, bool suspend) { struct mt76x02_dev *dev = hw->priv; diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c index 2624edbb59a1..049223df9beb 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c @@ -108,7 +108,7 @@ static int mt7915_start(struct ieee80211_hw *hw) return ret; } -static void mt7915_stop(struct ieee80211_hw *hw) +static void mt7915_stop(struct ieee80211_hw *hw, bool suspend) { struct mt7915_dev *dev = mt7915_hw_dev(hw); struct mt7915_phy *phy = mt7915_hw_phy(hw); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c index 3e3ad3518d85..4f30426afbb7 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c @@ -268,7 +268,7 @@ static int mt7921_start(struct ieee80211_hw *hw) return err; } -static void mt7921_stop(struct ieee80211_hw *hw) +static void mt7921_stop(struct ieee80211_hw *hw, bool suspend) { struct mt792x_dev *dev = mt792x_hw_dev(hw); int err = 0; @@ -281,7 +281,7 @@ static void mt7921_stop(struct ieee80211_hw *hw) return; } - mt792x_stop(hw); + mt792x_stop(hw, false); } static int diff --git a/drivers/net/wireless/mediatek/mt76/mt792x.h b/drivers/net/wireless/mediatek/mt76/mt792x.h index 20578497a405..cf14a38c5e72 100644 --- a/drivers/net/wireless/mediatek/mt76/mt792x.h +++ b/drivers/net/wireless/mediatek/mt76/mt792x.h @@ -251,7 +251,7 @@ static inline bool mt792x_dma_need_reinit(struct mt792x_dev *dev) #define mt792x_mutex_release(dev) \ mt76_connac_mutex_release(&(dev)->mt76, &(dev)->pm) -void mt792x_stop(struct ieee80211_hw *hw); +void mt792x_stop(struct ieee80211_hw *hw, bool suspend); void mt792x_pm_wake_work(struct work_struct *work); void mt792x_pm_power_save_work(struct work_struct *work); void mt792x_reset(struct mt76_dev *mdev); @@ -368,7 +368,7 @@ void mt792xu_wr(struct mt76_dev *dev, u32 addr, u32 val); u32 mt792xu_rmw(struct mt76_dev *dev, u32 addr, u32 mask, u32 val); void mt792xu_copy(struct mt76_dev *dev, u32 offset, const void *data, int len); void mt792xu_disconnect(struct usb_interface *usb_intf); -void mt792xu_stop(struct ieee80211_hw *hw); +void mt792xu_stop(struct ieee80211_hw *hw, bool suspend); static inline void mt792x_skb_add_usb_sdio_hdr(struct mt792x_dev *dev, struct sk_buff *skb, diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_core.c b/drivers/net/wireless/mediatek/mt76/mt792x_core.c index a405af8d9052..4adca99eb9b8 100644 --- a/drivers/net/wireless/mediatek/mt76/mt792x_core.c +++ b/drivers/net/wireless/mediatek/mt76/mt792x_core.c @@ -91,7 +91,7 @@ void mt792x_tx(struct ieee80211_hw *hw, struct ieee80211_tx_control *control, } EXPORT_SYMBOL_GPL(mt792x_tx); -void mt792x_stop(struct ieee80211_hw *hw) +void mt792x_stop(struct ieee80211_hw *hw, bool suspend) { struct mt792x_dev *dev = mt792x_hw_dev(hw); struct mt792x_phy *phy = mt792x_hw_phy(hw); diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_usb.c b/drivers/net/wireless/mediatek/mt76/mt792x_usb.c index b49668a4b784..76272a03b22e 100644 --- a/drivers/net/wireless/mediatek/mt76/mt792x_usb.c +++ b/drivers/net/wireless/mediatek/mt76/mt792x_usb.c @@ -285,12 +285,12 @@ int mt792xu_init_reset(struct mt792x_dev *dev) } EXPORT_SYMBOL_GPL(mt792xu_init_reset); -void mt792xu_stop(struct ieee80211_hw *hw) +void mt792xu_stop(struct ieee80211_hw *hw, bool suspend) { struct mt792x_dev *dev = mt792x_hw_dev(hw); mt76u_stop_tx(&dev->mt76); - mt792x_stop(hw); + mt792x_stop(hw, false); } EXPORT_SYMBOL_GPL(mt792xu_stop); diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index 7c97140d8255..bce082038219 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -93,7 +93,7 @@ static int mt7996_start(struct ieee80211_hw *hw) return ret; } -static void mt7996_stop(struct ieee80211_hw *hw) +static void mt7996_stop(struct ieee80211_hw *hw, bool suspend) { struct mt7996_dev *dev = mt7996_hw_dev(hw); struct mt7996_phy *phy = mt7996_hw_phy(hw); diff --git a/drivers/net/wireless/mediatek/mt7601u/main.c b/drivers/net/wireless/mediatek/mt7601u/main.c index a7330576486b..7570c6ceecea 100644 --- a/drivers/net/wireless/mediatek/mt7601u/main.c +++ b/drivers/net/wireless/mediatek/mt7601u/main.c @@ -28,7 +28,7 @@ out: return ret; } -static void mt7601u_stop(struct ieee80211_hw *hw) +static void mt7601u_stop(struct ieee80211_hw *hw, bool suspend) { struct mt7601u_dev *dev = hw->priv; diff --git a/drivers/net/wireless/purelifi/plfxlc/mac.c b/drivers/net/wireless/purelifi/plfxlc/mac.c index 641f847d47ab..eae93efa6150 100644 --- a/drivers/net/wireless/purelifi/plfxlc/mac.c +++ b/drivers/net/wireless/purelifi/plfxlc/mac.c @@ -111,7 +111,7 @@ int plfxlc_op_start(struct ieee80211_hw *hw) return 0; } -void plfxlc_op_stop(struct ieee80211_hw *hw) +void plfxlc_op_stop(struct ieee80211_hw *hw, bool suspend) { struct plfxlc_mac *mac = plfxlc_hw_mac(hw); diff --git a/drivers/net/wireless/purelifi/plfxlc/mac.h b/drivers/net/wireless/purelifi/plfxlc/mac.h index 49b92413729b..9384acddcf26 100644 --- a/drivers/net/wireless/purelifi/plfxlc/mac.h +++ b/drivers/net/wireless/purelifi/plfxlc/mac.h @@ -178,7 +178,7 @@ int plfxlc_mac_rx(struct ieee80211_hw *hw, const u8 *buffer, void plfxlc_mac_tx_failed(struct urb *urb); void plfxlc_mac_tx_to_dev(struct sk_buff *skb, int error); int plfxlc_op_start(struct ieee80211_hw *hw); -void plfxlc_op_stop(struct ieee80211_hw *hw); +void plfxlc_op_stop(struct ieee80211_hw *hw, bool suspend); int plfxlc_restore_settings(struct plfxlc_mac *mac); #endif /* PLFXLC_MAC_H */ diff --git a/drivers/net/wireless/purelifi/plfxlc/usb.c b/drivers/net/wireless/purelifi/plfxlc/usb.c index 311676c1ece0..15334940287d 100644 --- a/drivers/net/wireless/purelifi/plfxlc/usb.c +++ b/drivers/net/wireless/purelifi/plfxlc/usb.c @@ -408,7 +408,7 @@ void plfxlc_usb_init(struct plfxlc_usb *usb, struct ieee80211_hw *hw, void plfxlc_usb_release(struct plfxlc_usb *usb) { - plfxlc_op_stop(plfxlc_usb_to_hw(usb)); + plfxlc_op_stop(plfxlc_usb_to_hw(usb), false); plfxlc_usb_disable_tx(usb); plfxlc_usb_disable_rx(usb); usb_set_intfdata(usb->intf, NULL); @@ -761,7 +761,7 @@ static void plfxlc_usb_resume(struct plfxlc_usb *usb) static void plfxlc_usb_stop(struct plfxlc_usb *usb) { - plfxlc_op_stop(plfxlc_usb_to_hw(usb)); + plfxlc_op_stop(plfxlc_usb_to_hw(usb), false); plfxlc_usb_disable_tx(usb); plfxlc_usb_disable_rx(usb); diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00.h b/drivers/net/wireless/ralink/rt2x00/rt2x00.h index bb648f95dfdd..dfb4bb370f01 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00.h +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00.h @@ -1450,7 +1450,7 @@ void rt2x00mac_tx(struct ieee80211_hw *hw, struct ieee80211_tx_control *control, struct sk_buff *skb); int rt2x00mac_start(struct ieee80211_hw *hw); -void rt2x00mac_stop(struct ieee80211_hw *hw); +void rt2x00mac_stop(struct ieee80211_hw *hw, bool suspend); void rt2x00mac_reconfig_complete(struct ieee80211_hw *hw, enum ieee80211_reconfig_type reconfig_type); int rt2x00mac_add_interface(struct ieee80211_hw *hw, diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c b/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c index 75fda72c14ca..451632488805 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c @@ -178,7 +178,7 @@ int rt2x00mac_start(struct ieee80211_hw *hw) } EXPORT_SYMBOL_GPL(rt2x00mac_start); -void rt2x00mac_stop(struct ieee80211_hw *hw) +void rt2x00mac_stop(struct ieee80211_hw *hw, bool suspend) { struct rt2x00_dev *rt2x00dev = hw->priv; diff --git a/drivers/net/wireless/realtek/rtl818x/rtl8180/dev.c b/drivers/net/wireless/realtek/rtl818x/rtl8180/dev.c index 77b6cb7e1f6b..ded8d4d59289 100644 --- a/drivers/net/wireless/realtek/rtl818x/rtl8180/dev.c +++ b/drivers/net/wireless/realtek/rtl818x/rtl8180/dev.c @@ -1249,7 +1249,7 @@ static int rtl8180_start(struct ieee80211_hw *dev) return ret; } -static void rtl8180_stop(struct ieee80211_hw *dev) +static void rtl8180_stop(struct ieee80211_hw *dev, bool suspend) { struct rtl8180_priv *priv = dev->priv; u8 reg; diff --git a/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c b/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c index 78d99afa373d..220ac5bdf279 100644 --- a/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c +++ b/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c @@ -1019,7 +1019,7 @@ rtl8187_start_exit: return ret; } -static void rtl8187_stop(struct ieee80211_hw *dev) +static void rtl8187_stop(struct ieee80211_hw *dev, bool suspend) { struct rtl8187_priv *priv = dev->priv; struct sk_buff *skb; diff --git a/drivers/net/wireless/realtek/rtl8xxxu/core.c b/drivers/net/wireless/realtek/rtl8xxxu/core.c index 89a841b4e8d5..3685dbefc9bd 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/core.c @@ -7521,7 +7521,7 @@ error_out: return ret; } -static void rtl8xxxu_stop(struct ieee80211_hw *hw) +static void rtl8xxxu_stop(struct ieee80211_hw *hw, bool suspend) { struct rtl8xxxu_priv *priv = hw->priv; unsigned long flags; diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c index 42b7db12b1bd..7537f04b1930 100644 --- a/drivers/net/wireless/realtek/rtlwifi/core.c +++ b/drivers/net/wireless/realtek/rtlwifi/core.c @@ -144,7 +144,7 @@ static int rtl_op_start(struct ieee80211_hw *hw) return err; } -static void rtl_op_stop(struct ieee80211_hw *hw) +static void rtl_op_stop(struct ieee80211_hw *hw, bool suspend) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_mac *mac = rtl_mac(rtl_priv(hw)); @@ -547,7 +547,7 @@ static int rtl_op_suspend(struct ieee80211_hw *hw, rtlhal->enter_pnp_sleep = true; rtl_lps_leave(hw, true); - rtl_op_stop(hw); + rtl_op_stop(hw, false); device_set_wakeup_enable(wiphy_dev(hw->wiphy), true); return 0; } diff --git a/drivers/net/wireless/realtek/rtw88/mac80211.c b/drivers/net/wireless/realtek/rtw88/mac80211.c index 0acebbfa13c4..63326b352738 100644 --- a/drivers/net/wireless/realtek/rtw88/mac80211.c +++ b/drivers/net/wireless/realtek/rtw88/mac80211.c @@ -62,7 +62,7 @@ static int rtw_ops_start(struct ieee80211_hw *hw) return ret; } -static void rtw_ops_stop(struct ieee80211_hw *hw) +static void rtw_ops_stop(struct ieee80211_hw *hw, bool suspend) { struct rtw_dev *rtwdev = hw->priv; diff --git a/drivers/net/wireless/realtek/rtw89/mac80211.c b/drivers/net/wireless/realtek/rtw89/mac80211.c index 41b286da3d59..722d09e9fbb5 100644 --- a/drivers/net/wireless/realtek/rtw89/mac80211.c +++ b/drivers/net/wireless/realtek/rtw89/mac80211.c @@ -66,7 +66,7 @@ static int rtw89_ops_start(struct ieee80211_hw *hw) return ret; } -static void rtw89_ops_stop(struct ieee80211_hw *hw) +static void rtw89_ops_stop(struct ieee80211_hw *hw, bool suspend) { struct rtw89_dev *rtwdev = hw->priv; diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index 211fa25b9a78..3425a473b9a1 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c +++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c @@ -410,10 +410,11 @@ static int rsi_mac80211_start(struct ieee80211_hw *hw) /** * rsi_mac80211_stop() - This is the last handler that 802.11 module calls. * @hw: Pointer to the ieee80211_hw structure. + * @suspend: true if the this was called from suspend flow. * * Return: None. */ -static void rsi_mac80211_stop(struct ieee80211_hw *hw) +static void rsi_mac80211_stop(struct ieee80211_hw *hw, bool suspend) { struct rsi_hw *adapter = hw->priv; struct rsi_common *common = adapter->priv; diff --git a/drivers/net/wireless/silabs/wfx/sta.c b/drivers/net/wireless/silabs/wfx/sta.c index a904602f02ce..216d43c8bd6e 100644 --- a/drivers/net/wireless/silabs/wfx/sta.c +++ b/drivers/net/wireless/silabs/wfx/sta.c @@ -805,7 +805,7 @@ int wfx_start(struct ieee80211_hw *hw) return 0; } -void wfx_stop(struct ieee80211_hw *hw) +void wfx_stop(struct ieee80211_hw *hw, bool suspend) { struct wfx_dev *wdev = hw->priv; diff --git a/drivers/net/wireless/silabs/wfx/sta.h b/drivers/net/wireless/silabs/wfx/sta.h index c478ddcb934b..7817c7c6f3dd 100644 --- a/drivers/net/wireless/silabs/wfx/sta.h +++ b/drivers/net/wireless/silabs/wfx/sta.h @@ -20,7 +20,7 @@ struct wfx_sta_priv { /* mac80211 interface */ int wfx_start(struct ieee80211_hw *hw); -void wfx_stop(struct ieee80211_hw *hw); +void wfx_stop(struct ieee80211_hw *hw, bool suspend); int wfx_config(struct ieee80211_hw *hw, u32 changed); int wfx_set_rts_threshold(struct ieee80211_hw *hw, u32 value); void wfx_set_default_unicast_key(struct ieee80211_hw *hw, struct ieee80211_vif *vif, int idx); diff --git a/drivers/net/wireless/st/cw1200/sta.c b/drivers/net/wireless/st/cw1200/sta.c index 8ef1d06b9bbd..c259da8161e4 100644 --- a/drivers/net/wireless/st/cw1200/sta.c +++ b/drivers/net/wireless/st/cw1200/sta.c @@ -90,7 +90,7 @@ out: return ret; } -void cw1200_stop(struct ieee80211_hw *dev) +void cw1200_stop(struct ieee80211_hw *dev, bool suspend) { struct cw1200_common *priv = dev->priv; LIST_HEAD(list); diff --git a/drivers/net/wireless/st/cw1200/sta.h b/drivers/net/wireless/st/cw1200/sta.h index a49f187c7049..b955b92cfd73 100644 --- a/drivers/net/wireless/st/cw1200/sta.h +++ b/drivers/net/wireless/st/cw1200/sta.h @@ -13,7 +13,7 @@ /* mac80211 API */ int cw1200_start(struct ieee80211_hw *dev); -void cw1200_stop(struct ieee80211_hw *dev); +void cw1200_stop(struct ieee80211_hw *dev, bool suspend); int cw1200_add_interface(struct ieee80211_hw *dev, struct ieee80211_vif *vif); void cw1200_remove_interface(struct ieee80211_hw *dev, diff --git a/drivers/net/wireless/ti/wl1251/main.c b/drivers/net/wireless/ti/wl1251/main.c index 0da2d29dd7bd..bb53d681c11b 100644 --- a/drivers/net/wireless/ti/wl1251/main.c +++ b/drivers/net/wireless/ti/wl1251/main.c @@ -415,7 +415,7 @@ out: return ret; } -static void wl1251_op_stop(struct ieee80211_hw *hw) +static void wl1251_op_stop(struct ieee80211_hw *hw, bool suspend) { struct wl1251 *wl = hw->priv; diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index 35d1114a28aa..7e1d30f89855 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -2085,7 +2085,7 @@ static void wlcore_op_stop_locked(struct wl1271 *wl) memset(wl->reg_ch_conf_last, 0, sizeof(wl->reg_ch_conf_last)); } -static void wlcore_op_stop(struct ieee80211_hw *hw) +static void wlcore_op_stop(struct ieee80211_hw *hw, bool suspend) { struct wl1271 *wl = hw->priv; diff --git a/drivers/net/wireless/virtual/mac80211_hwsim.c b/drivers/net/wireless/virtual/mac80211_hwsim.c index fbf24870209d..8491eb32f760 100644 --- a/drivers/net/wireless/virtual/mac80211_hwsim.c +++ b/drivers/net/wireless/virtual/mac80211_hwsim.c @@ -2098,7 +2098,7 @@ static int mac80211_hwsim_start(struct ieee80211_hw *hw) } -static void mac80211_hwsim_stop(struct ieee80211_hw *hw) +static void mac80211_hwsim_stop(struct ieee80211_hw *hw, bool suspend) { struct mac80211_hwsim_data *data = hw->priv; int i; diff --git a/drivers/net/wireless/zydas/zd1211rw/zd_mac.c b/drivers/net/wireless/zydas/zd1211rw/zd_mac.c index 900c063bd724..f90c33d19b39 100644 --- a/drivers/net/wireless/zydas/zd1211rw/zd_mac.c +++ b/drivers/net/wireless/zydas/zd1211rw/zd_mac.c @@ -326,7 +326,7 @@ out: return r; } -void zd_op_stop(struct ieee80211_hw *hw) +void zd_op_stop(struct ieee80211_hw *hw, bool suspend) { struct zd_mac *mac = zd_hw_mac(hw); struct zd_chip *chip = &mac->chip; diff --git a/drivers/net/wireless/zydas/zd1211rw/zd_mac.h b/drivers/net/wireless/zydas/zd1211rw/zd_mac.h index 5ff84bdc5a4c..053748a474ec 100644 --- a/drivers/net/wireless/zydas/zd1211rw/zd_mac.h +++ b/drivers/net/wireless/zydas/zd1211rw/zd_mac.h @@ -303,7 +303,7 @@ void zd_mac_tx_failed(struct urb *urb); void zd_mac_tx_to_dev(struct sk_buff *skb, int error); int zd_op_start(struct ieee80211_hw *hw); -void zd_op_stop(struct ieee80211_hw *hw); +void zd_op_stop(struct ieee80211_hw *hw, bool suspend); int zd_restore_settings(struct zd_mac *mac); #ifdef DEBUG diff --git a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c index 2ee4218da1c5..a8a94edf2a70 100644 --- a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c +++ b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c @@ -1476,7 +1476,7 @@ static void zd_usb_stop(struct zd_usb *usb) { dev_dbg_f(zd_usb_dev(usb), "\n"); - zd_op_stop(zd_usb_to_hw(usb)); + zd_op_stop(zd_usb_to_hw(usb), false); zd_usb_disable_tx(usb); zd_usb_disable_rx(usb); diff --git a/drivers/staging/vt6655/device_main.c b/drivers/staging/vt6655/device_main.c index 283804b49e91..3ff8103366c1 100644 --- a/drivers/staging/vt6655/device_main.c +++ b/drivers/staging/vt6655/device_main.c @@ -1339,7 +1339,7 @@ err_free_rings: return ret; } -static void vnt_stop(struct ieee80211_hw *hw) +static void vnt_stop(struct ieee80211_hw *hw, bool suspend) { struct vnt_private *priv = hw->priv; diff --git a/drivers/staging/vt6656/main_usb.c b/drivers/staging/vt6656/main_usb.c index 7bbed462f062..4f09e733e7a8 100644 --- a/drivers/staging/vt6656/main_usb.c +++ b/drivers/staging/vt6656/main_usb.c @@ -613,7 +613,7 @@ err: return ret; } -static void vnt_stop(struct ieee80211_hw *hw) +static void vnt_stop(struct ieee80211_hw *hw, bool suspend) { struct vnt_private *priv = hw->priv; int i; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index ecfa65ade226..9c96e8ae9ef7 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4444,7 +4444,7 @@ struct ieee80211_ops { struct ieee80211_tx_control *control, struct sk_buff *skb); int (*start)(struct ieee80211_hw *hw); - void (*stop)(struct ieee80211_hw *hw); + void (*stop)(struct ieee80211_hw *hw, bool suspend); #ifdef CONFIG_PM int (*suspend)(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan); int (*resume)(struct ieee80211_hw *hw); diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index dce37ba8ebe3..bef43990f07a 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -33,7 +33,7 @@ int drv_start(struct ieee80211_local *local) return ret; } -void drv_stop(struct ieee80211_local *local) +void drv_stop(struct ieee80211_local *local, bool suspend) { might_sleep(); lockdep_assert_wiphy(local->hw.wiphy); @@ -41,8 +41,8 @@ void drv_stop(struct ieee80211_local *local) if (WARN_ON(!local->started)) return; - trace_drv_stop(local); - local->ops->stop(&local->hw); + trace_drv_stop(local, suspend); + local->ops->stop(&local->hw, suspend); trace_drv_return_void(local); /* sync away all work on the tasklet before clearing started */ diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index d4e73d3630e0..d382d9729e85 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -88,7 +88,7 @@ static inline int drv_get_et_sset_count(struct ieee80211_sub_if_data *sdata, } int drv_start(struct ieee80211_local *local); -void drv_stop(struct ieee80211_local *local); +void drv_stop(struct ieee80211_local *local, bool suspend); #ifdef CONFIG_PM static inline int drv_suspend(struct ieee80211_local *local, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 501f02809135..6349552e62a8 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2246,7 +2246,7 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, /* Suspend/resume and hw reconfiguration */ int ieee80211_reconfig(struct ieee80211_local *local); -void ieee80211_stop_device(struct ieee80211_local *local); +void ieee80211_stop_device(struct ieee80211_local *local, bool suspend); int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index f06e165d6c7a..64106097949a 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -698,7 +698,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do wiphy_delayed_work_flush(local->hw.wiphy, &local->scan_work); if (local->open_count == 0) { - ieee80211_stop_device(local); + ieee80211_stop_device(local, false); /* no reconfiguring after stop! */ return; @@ -1435,7 +1435,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) drv_remove_interface(local, sdata); err_stop: if (!local->open_count) - drv_stop(local); + drv_stop(local, false); err_del_bss: sdata->bss = NULL; if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index c1fa26e09479..d823d58303e8 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * Portions - * Copyright (C) 2020-2021, 2023 Intel Corporation + * Copyright (C) 2020-2021, 2023-2024 Intel Corporation */ #include #include @@ -171,7 +171,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) WARN_ON(!list_empty(&local->chanctx_list)); /* stop hardware - this must stop RX */ - ieee80211_stop_device(local); + ieee80211_stop_device(local, true); suspend: local->suspended = true; diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index b26aacfbc622..dc498cd8cd91 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -328,9 +328,18 @@ TRACE_EVENT(drv_set_wakeup, TP_printk(LOCAL_PR_FMT " enabled:%d", LOCAL_PR_ARG, __entry->enabled) ); -DEFINE_EVENT(local_only_evt, drv_stop, - TP_PROTO(struct ieee80211_local *local), - TP_ARGS(local) +TRACE_EVENT(drv_stop, + TP_PROTO(struct ieee80211_local *local, bool suspend), + TP_ARGS(local, suspend), + TP_STRUCT__entry( + LOCAL_ENTRY + __field(bool, suspend) + ), + TP_fast_assign( + LOCAL_ASSIGN; + __entry->suspend = suspend; + ), + TP_printk(LOCAL_PR_FMT " suspend:%d", LOCAL_PR_ARG, __entry->suspend) ); DEFINE_EVENT(local_sdata_addr_evt, drv_add_interface, diff --git a/net/mac80211/util.c b/net/mac80211/util.c index b3b8873a107b..610f0a94796c 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1565,7 +1565,7 @@ u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata, return supp_rates; } -void ieee80211_stop_device(struct ieee80211_local *local) +void ieee80211_stop_device(struct ieee80211_local *local, bool suspend) { ieee80211_handle_queued_frames(local); @@ -1576,7 +1576,7 @@ void ieee80211_stop_device(struct ieee80211_local *local) flush_workqueue(local->workqueue); wiphy_work_flush(local->hw.wiphy, NULL); - drv_stop(local); + drv_stop(local, suspend); } static void ieee80211_flush_completed_scan(struct ieee80211_local *local, -- cgit v1.2.3-58-ga151 From 021d53a3d87eeb9dbba524ac515651242a2a7e3b Mon Sep 17 00:00:00 2001 From: Zong-Zhe Yang Date: Mon, 17 Jun 2024 19:52:17 +0800 Subject: wifi: mac80211: fix NULL dereference at band check in starting tx ba session In MLD connection, link_data/link_conf are dynamically allocated. They don't point to vif->bss_conf. So, there will be no chanreq assigned to vif->bss_conf and then the chan will be NULL. Tweak the code to check ht_supported/vht_supported/has_he/has_eht on sta deflink. Crash log (with rtw89 version under MLO development): [ 9890.526087] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 9890.526102] #PF: supervisor read access in kernel mode [ 9890.526105] #PF: error_code(0x0000) - not-present page [ 9890.526109] PGD 0 P4D 0 [ 9890.526114] Oops: 0000 [#1] PREEMPT SMP PTI [ 9890.526119] CPU: 2 PID: 6367 Comm: kworker/u16:2 Kdump: loaded Tainted: G OE 6.9.0 #1 [ 9890.526123] Hardware name: LENOVO 2356AD1/2356AD1, BIOS G7ETB3WW (2.73 ) 11/28/2018 [ 9890.526126] Workqueue: phy2 rtw89_core_ba_work [rtw89_core] [ 9890.526203] RIP: 0010:ieee80211_start_tx_ba_session (net/mac80211/agg-tx.c:618 (discriminator 1)) mac80211 [ 9890.526279] Code: f7 e8 d5 93 3e ea 48 83 c4 28 89 d8 5b 41 5c 41 5d 41 5e 41 5f 5d c3 cc cc cc cc 49 8b 84 24 e0 f1 ff ff 48 8b 80 90 1b 00 00 <83> 38 03 0f 84 37 fe ff ff bb ea ff ff ff eb cc 49 8b 84 24 10 f3 All code ======== 0: f7 e8 imul %eax 2: d5 (bad) 3: 93 xchg %eax,%ebx 4: 3e ea ds (bad) 6: 48 83 c4 28 add $0x28,%rsp a: 89 d8 mov %ebx,%eax c: 5b pop %rbx d: 41 5c pop %r12 f: 41 5d pop %r13 11: 41 5e pop %r14 13: 41 5f pop %r15 15: 5d pop %rbp 16: c3 retq 17: cc int3 18: cc int3 19: cc int3 1a: cc int3 1b: 49 8b 84 24 e0 f1 ff mov -0xe20(%r12),%rax 22: ff 23: 48 8b 80 90 1b 00 00 mov 0x1b90(%rax),%rax 2a:* 83 38 03 cmpl $0x3,(%rax) <-- trapping instruction 2d: 0f 84 37 fe ff ff je 0xfffffffffffffe6a 33: bb ea ff ff ff mov $0xffffffea,%ebx 38: eb cc jmp 0x6 3a: 49 rex.WB 3b: 8b .byte 0x8b 3c: 84 24 10 test %ah,(%rax,%rdx,1) 3f: f3 repz Code starting with the faulting instruction =========================================== 0: 83 38 03 cmpl $0x3,(%rax) 3: 0f 84 37 fe ff ff je 0xfffffffffffffe40 9: bb ea ff ff ff mov $0xffffffea,%ebx e: eb cc jmp 0xffffffffffffffdc 10: 49 rex.WB 11: 8b .byte 0x8b 12: 84 24 10 test %ah,(%rax,%rdx,1) 15: f3 repz [ 9890.526285] RSP: 0018:ffffb8db09013d68 EFLAGS: 00010246 [ 9890.526291] RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff9308e0d656c8 [ 9890.526295] RDX: 0000000000000000 RSI: ffffffffab99460b RDI: ffffffffab9a7685 [ 9890.526300] RBP: ffffb8db09013db8 R08: 0000000000000000 R09: 0000000000000873 [ 9890.526304] R10: ffff9308e0d64800 R11: 0000000000000002 R12: ffff9308e5ff6e70 [ 9890.526308] R13: ffff930952500e20 R14: ffff9309192a8c00 R15: 0000000000000000 [ 9890.526313] FS: 0000000000000000(0000) GS:ffff930b4e700000(0000) knlGS:0000000000000000 [ 9890.526316] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 9890.526318] CR2: 0000000000000000 CR3: 0000000391c58005 CR4: 00000000001706f0 [ 9890.526321] Call Trace: [ 9890.526324] [ 9890.526327] ? show_regs (arch/x86/kernel/dumpstack.c:479) [ 9890.526335] ? __die (arch/x86/kernel/dumpstack.c:421 arch/x86/kernel/dumpstack.c:434) [ 9890.526340] ? page_fault_oops (arch/x86/mm/fault.c:713) [ 9890.526347] ? search_module_extables (kernel/module/main.c:3256 (discriminator 3)) [ 9890.526353] ? ieee80211_start_tx_ba_session (net/mac80211/agg-tx.c:618 (discriminator 1)) mac80211 Signed-off-by: Zong-Zhe Yang Link: https://patch.msgid.link/20240617115217.22344-1-kevin_yang@realtek.com Signed-off-by: Johannes Berg --- net/mac80211/agg-tx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 21d55dc539f6..677bbbac9f16 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -616,7 +616,9 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, return -EINVAL; if (!pubsta->deflink.ht_cap.ht_supported && - sta->sdata->vif.bss_conf.chanreq.oper.chan->band != NL80211_BAND_6GHZ) + !pubsta->deflink.vht_cap.vht_supported && + !pubsta->deflink.he_cap.has_he && + !pubsta->deflink.eht_cap.has_eht) return -EINVAL; if (WARN_ON_ONCE(!local->ops->ampdu_action)) -- cgit v1.2.3-58-ga151 From 1bbdb7f7a4ebf2596b06a6bb84505a4a37a66d2e Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Thu, 13 Jun 2024 22:04:36 +0800 Subject: net: rfkill: Correct return value in invalid parameter case rfkill_set_hw_state_reason() does not return current combined block state when its parameter @reason is invalid, that is wrong according to its comments, fix it by correcting the value returned. Also reformat the WARN while at it. Signed-off-by: Zijun Hu Link: https://patch.msgid.link/1718287476-28227-1-git-send-email-quic_zijuhu@quicinc.com [edit/reformat commit message, remove unneeded variable] Signed-off-by: Johannes Berg --- net/rfkill/core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/rfkill/core.c b/net/rfkill/core.c index c3feb4f49d09..7a5367628c05 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -546,10 +546,10 @@ bool rfkill_set_hw_state_reason(struct rfkill *rfkill, BUG_ON(!rfkill); - if (WARN(reason & - ~(RFKILL_HARD_BLOCK_SIGNAL | RFKILL_HARD_BLOCK_NOT_OWNER), - "hw_state reason not supported: 0x%lx", reason)) - return blocked; + if (WARN(reason & ~(RFKILL_HARD_BLOCK_SIGNAL | + RFKILL_HARD_BLOCK_NOT_OWNER), + "hw_state reason not supported: 0x%lx", reason)) + return rfkill_blocked(rfkill); spin_lock_irqsave(&rfkill->lock, flags); prev = !!(rfkill->hard_block_reasons & reason); -- cgit v1.2.3-58-ga151 From f531d13bdfe3f4f084aaa8acae2cb0f02295f5ae Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Mon, 27 May 2024 20:29:14 -0700 Subject: xfrm: support sending NAT keepalives in ESP in UDP states Add the ability to send out RFC-3948 NAT keepalives from the xfrm stack. To use, Userspace sets an XFRM_NAT_KEEPALIVE_INTERVAL integer property when creating XFRM outbound states which denotes the number of seconds between keepalive messages. Keepalive messages are sent from a per net delayed work which iterates over the xfrm states. The logic is guarded by the xfrm state spinlock due to the xfrm state walk iterator. Possible future enhancements: - Adding counters to keep track of sent keepalives. - deduplicate NAT keepalives between states sharing the same nat keepalive parameters. - provisioning hardware offloads for devices capable of implementing this. - revise xfrm state list to use an rcu list in order to avoid running this under spinlock. Suggested-by: Paul Wouters Tested-by: Paul Wouters Tested-by: Antony Antony Signed-off-by: Eyal Birger Signed-off-by: Steffen Klassert --- include/net/ipv6_stubs.h | 3 + include/net/netns/xfrm.h | 1 + include/net/xfrm.h | 10 ++ include/uapi/linux/xfrm.h | 1 + net/ipv6/af_inet6.c | 1 + net/ipv6/xfrm6_policy.c | 7 + net/xfrm/Makefile | 3 +- net/xfrm/xfrm_compat.c | 6 +- net/xfrm/xfrm_nat_keepalive.c | 292 ++++++++++++++++++++++++++++++++++++++++++ net/xfrm/xfrm_policy.c | 8 ++ net/xfrm/xfrm_state.c | 17 +++ net/xfrm/xfrm_user.c | 15 +++ 12 files changed, 361 insertions(+), 3 deletions(-) create mode 100644 net/xfrm/xfrm_nat_keepalive.c (limited to 'net') diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index 485c39a89866..11cefd50704d 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -9,6 +9,7 @@ #include #include #include +#include /* structs from net/ip6_fib.h */ struct fib6_info; @@ -72,6 +73,8 @@ struct ipv6_stub { int (*output)(struct net *, struct sock *, struct sk_buff *)); struct net_device *(*ipv6_dev_find)(struct net *net, const struct in6_addr *addr, struct net_device *dev); + int (*ip6_xmit)(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, + __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority); }; extern const struct ipv6_stub *ipv6_stub __read_mostly; diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 423b52eca908..d489d9250bff 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -83,6 +83,7 @@ struct netns_xfrm { spinlock_t xfrm_policy_lock; struct mutex xfrm_cfg_mutex; + struct delayed_work nat_keepalive_work; }; #endif diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 77ebf5bcf0b9..46a214a76081 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -229,6 +229,10 @@ struct xfrm_state { struct xfrm_encap_tmpl *encap; struct sock __rcu *encap_sk; + /* NAT keepalive */ + u32 nat_keepalive_interval; /* seconds */ + time64_t nat_keepalive_expiration; + /* Data for care-of address */ xfrm_address_t *coaddr; @@ -2203,4 +2207,10 @@ static inline int register_xfrm_state_bpf(void) } #endif +int xfrm_nat_keepalive_init(unsigned short family); +void xfrm_nat_keepalive_fini(unsigned short family); +int xfrm_nat_keepalive_net_init(struct net *net); +int xfrm_nat_keepalive_net_fini(struct net *net); +void xfrm_nat_keepalive_state_updated(struct xfrm_state *x); + #endif /* _NET_XFRM_H */ diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index d950d02ab791..f28701500714 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -321,6 +321,7 @@ enum xfrm_attr_type_t { XFRMA_IF_ID, /* __u32 */ XFRMA_MTIMER_THRESH, /* __u32 in seconds for input SA */ XFRMA_SA_DIR, /* __u8 */ + XFRMA_NAT_KEEPALIVE_INTERVAL, /* __u32 in seconds for NAT keepalive */ __XFRMA_MAX #define XFRMA_OUTPUT_MARK XFRMA_SET_MARK /* Compatibility */ diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 8041dc181bd4..2b893858b9a9 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -1060,6 +1060,7 @@ static const struct ipv6_stub ipv6_stub_impl = { .nd_tbl = &nd_tbl, .ipv6_fragment = ip6_fragment, .ipv6_dev_find = ipv6_dev_find, + .ip6_xmit = ip6_xmit, }; static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = { diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index cc885d3aa9e5..6837ff05f11a 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -284,8 +284,14 @@ int __init xfrm6_init(void) ret = register_pernet_subsys(&xfrm6_net_ops); if (ret) goto out_protocol; + + ret = xfrm_nat_keepalive_init(AF_INET6); + if (ret) + goto out_nat_keepalive; out: return ret; +out_nat_keepalive: + unregister_pernet_subsys(&xfrm6_net_ops); out_protocol: xfrm6_protocol_fini(); out_state: @@ -297,6 +303,7 @@ out_policy: void xfrm6_fini(void) { + xfrm_nat_keepalive_fini(AF_INET6); unregister_pernet_subsys(&xfrm6_net_ops); xfrm6_protocol_fini(); xfrm6_policy_fini(); diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index 547cec77ba03..512e0b2f8514 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -13,7 +13,8 @@ endif obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \ xfrm_input.o xfrm_output.o \ - xfrm_sysctl.o xfrm_replay.o xfrm_device.o + xfrm_sysctl.o xfrm_replay.o xfrm_device.o \ + xfrm_nat_keepalive.o obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o obj-$(CONFIG_XFRM_USER) += xfrm_user.o diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c index 703d4172c7d7..91357ccaf4af 100644 --- a/net/xfrm/xfrm_compat.c +++ b/net/xfrm/xfrm_compat.c @@ -131,6 +131,7 @@ static const struct nla_policy compat_policy[XFRMA_MAX+1] = { [XFRMA_IF_ID] = { .type = NLA_U32 }, [XFRMA_MTIMER_THRESH] = { .type = NLA_U32 }, [XFRMA_SA_DIR] = NLA_POLICY_RANGE(NLA_U8, XFRM_SA_DIR_IN, XFRM_SA_DIR_OUT), + [XFRMA_NAT_KEEPALIVE_INTERVAL] = { .type = NLA_U32 }, }; static struct nlmsghdr *xfrm_nlmsg_put_compat(struct sk_buff *skb, @@ -280,9 +281,10 @@ static int xfrm_xlate64_attr(struct sk_buff *dst, const struct nlattr *src) case XFRMA_IF_ID: case XFRMA_MTIMER_THRESH: case XFRMA_SA_DIR: + case XFRMA_NAT_KEEPALIVE_INTERVAL: return xfrm_nla_cpy(dst, src, nla_len(src)); default: - BUILD_BUG_ON(XFRMA_MAX != XFRMA_SA_DIR); + BUILD_BUG_ON(XFRMA_MAX != XFRMA_NAT_KEEPALIVE_INTERVAL); pr_warn_once("unsupported nla_type %d\n", src->nla_type); return -EOPNOTSUPP; } @@ -437,7 +439,7 @@ static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla, int err; if (type > XFRMA_MAX) { - BUILD_BUG_ON(XFRMA_MAX != XFRMA_SA_DIR); + BUILD_BUG_ON(XFRMA_MAX != XFRMA_NAT_KEEPALIVE_INTERVAL); NL_SET_ERR_MSG(extack, "Bad attribute"); return -EOPNOTSUPP; } diff --git a/net/xfrm/xfrm_nat_keepalive.c b/net/xfrm/xfrm_nat_keepalive.c new file mode 100644 index 000000000000..82f0a301683f --- /dev/null +++ b/net/xfrm/xfrm_nat_keepalive.c @@ -0,0 +1,292 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * xfrm_nat_keepalive.c + * + * (c) 2024 Eyal Birger + */ + +#include +#include +#include + +static DEFINE_PER_CPU(struct sock *, nat_keepalive_sk_ipv4); +#if IS_ENABLED(CONFIG_IPV6) +static DEFINE_PER_CPU(struct sock *, nat_keepalive_sk_ipv6); +#endif + +struct nat_keepalive { + struct net *net; + u16 family; + xfrm_address_t saddr; + xfrm_address_t daddr; + __be16 encap_sport; + __be16 encap_dport; + __u32 smark; +}; + +static void nat_keepalive_init(struct nat_keepalive *ka, struct xfrm_state *x) +{ + ka->net = xs_net(x); + ka->family = x->props.family; + ka->saddr = x->props.saddr; + ka->daddr = x->id.daddr; + ka->encap_sport = x->encap->encap_sport; + ka->encap_dport = x->encap->encap_dport; + ka->smark = xfrm_smark_get(0, x); +} + +static int nat_keepalive_send_ipv4(struct sk_buff *skb, + struct nat_keepalive *ka) +{ + struct net *net = ka->net; + struct flowi4 fl4; + struct rtable *rt; + struct sock *sk; + __u8 tos = 0; + int err; + + flowi4_init_output(&fl4, 0 /* oif */, skb->mark, tos, + RT_SCOPE_UNIVERSE, IPPROTO_UDP, 0, + ka->daddr.a4, ka->saddr.a4, ka->encap_dport, + ka->encap_sport, sock_net_uid(net, NULL)); + + rt = ip_route_output_key(net, &fl4); + if (IS_ERR(rt)) + return PTR_ERR(rt); + + skb_dst_set(skb, &rt->dst); + + sk = *this_cpu_ptr(&nat_keepalive_sk_ipv4); + sock_net_set(sk, net); + err = ip_build_and_send_pkt(skb, sk, fl4.saddr, fl4.daddr, NULL, tos); + sock_net_set(sk, &init_net); + return err; +} + +#if IS_ENABLED(CONFIG_IPV6) +static int nat_keepalive_send_ipv6(struct sk_buff *skb, + struct nat_keepalive *ka, + struct udphdr *uh) +{ + struct net *net = ka->net; + struct dst_entry *dst; + struct flowi6 fl6; + struct sock *sk; + __wsum csum; + int err; + + csum = skb_checksum(skb, 0, skb->len, 0); + uh->check = csum_ipv6_magic(&ka->saddr.in6, &ka->daddr.in6, + skb->len, IPPROTO_UDP, csum); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_mark = skb->mark; + fl6.saddr = ka->saddr.in6; + fl6.daddr = ka->daddr.in6; + fl6.flowi6_proto = IPPROTO_UDP; + fl6.fl6_sport = ka->encap_sport; + fl6.fl6_dport = ka->encap_dport; + + sk = *this_cpu_ptr(&nat_keepalive_sk_ipv6); + sock_net_set(sk, net); + dst = ipv6_stub->ipv6_dst_lookup_flow(net, sk, &fl6, NULL); + if (IS_ERR(dst)) + return PTR_ERR(dst); + + skb_dst_set(skb, dst); + err = ipv6_stub->ip6_xmit(sk, skb, &fl6, skb->mark, NULL, 0, 0); + sock_net_set(sk, &init_net); + return err; +} +#endif + +static void nat_keepalive_send(struct nat_keepalive *ka) +{ + const int nat_ka_hdrs_len = max(sizeof(struct iphdr), + sizeof(struct ipv6hdr)) + + sizeof(struct udphdr); + const u8 nat_ka_payload = 0xFF; + int err = -EAFNOSUPPORT; + struct sk_buff *skb; + struct udphdr *uh; + + skb = alloc_skb(nat_ka_hdrs_len + sizeof(nat_ka_payload), GFP_ATOMIC); + if (unlikely(!skb)) + return; + + skb_reserve(skb, nat_ka_hdrs_len); + + skb_put_u8(skb, nat_ka_payload); + + uh = skb_push(skb, sizeof(*uh)); + uh->source = ka->encap_sport; + uh->dest = ka->encap_dport; + uh->len = htons(skb->len); + uh->check = 0; + + skb->mark = ka->smark; + + switch (ka->family) { + case AF_INET: + err = nat_keepalive_send_ipv4(skb, ka); + break; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + err = nat_keepalive_send_ipv6(skb, ka, uh); + break; +#endif + } + if (err) + kfree_skb(skb); +} + +struct nat_keepalive_work_ctx { + time64_t next_run; + time64_t now; +}; + +static int nat_keepalive_work_single(struct xfrm_state *x, int count, void *ptr) +{ + struct nat_keepalive_work_ctx *ctx = ptr; + bool send_keepalive = false; + struct nat_keepalive ka; + time64_t next_run; + u32 interval; + int delta; + + interval = x->nat_keepalive_interval; + if (!interval) + return 0; + + spin_lock(&x->lock); + + delta = (int)(ctx->now - x->lastused); + if (delta < interval) { + x->nat_keepalive_expiration = ctx->now + interval - delta; + next_run = x->nat_keepalive_expiration; + } else if (x->nat_keepalive_expiration > ctx->now) { + next_run = x->nat_keepalive_expiration; + } else { + next_run = ctx->now + interval; + nat_keepalive_init(&ka, x); + send_keepalive = true; + } + + spin_unlock(&x->lock); + + if (send_keepalive) + nat_keepalive_send(&ka); + + if (!ctx->next_run || next_run < ctx->next_run) + ctx->next_run = next_run; + return 0; +} + +static void nat_keepalive_work(struct work_struct *work) +{ + struct nat_keepalive_work_ctx ctx; + struct xfrm_state_walk walk; + struct net *net; + + ctx.next_run = 0; + ctx.now = ktime_get_real_seconds(); + + net = container_of(work, struct net, xfrm.nat_keepalive_work.work); + xfrm_state_walk_init(&walk, IPPROTO_ESP, NULL); + xfrm_state_walk(net, &walk, nat_keepalive_work_single, &ctx); + xfrm_state_walk_done(&walk, net); + if (ctx.next_run) + schedule_delayed_work(&net->xfrm.nat_keepalive_work, + (ctx.next_run - ctx.now) * HZ); +} + +static int nat_keepalive_sk_init(struct sock * __percpu *socks, + unsigned short family) +{ + struct sock *sk; + int err, i; + + for_each_possible_cpu(i) { + err = inet_ctl_sock_create(&sk, family, SOCK_RAW, IPPROTO_UDP, + &init_net); + if (err < 0) + goto err; + + *per_cpu_ptr(socks, i) = sk; + } + + return 0; +err: + for_each_possible_cpu(i) + inet_ctl_sock_destroy(*per_cpu_ptr(socks, i)); + return err; +} + +static void nat_keepalive_sk_fini(struct sock * __percpu *socks) +{ + int i; + + for_each_possible_cpu(i) + inet_ctl_sock_destroy(*per_cpu_ptr(socks, i)); +} + +void xfrm_nat_keepalive_state_updated(struct xfrm_state *x) +{ + struct net *net; + + if (!x->nat_keepalive_interval) + return; + + net = xs_net(x); + schedule_delayed_work(&net->xfrm.nat_keepalive_work, 0); +} + +int __net_init xfrm_nat_keepalive_net_init(struct net *net) +{ + INIT_DELAYED_WORK(&net->xfrm.nat_keepalive_work, nat_keepalive_work); + return 0; +} + +int xfrm_nat_keepalive_net_fini(struct net *net) +{ + cancel_delayed_work_sync(&net->xfrm.nat_keepalive_work); + return 0; +} + +int xfrm_nat_keepalive_init(unsigned short family) +{ + int err = -EAFNOSUPPORT; + + switch (family) { + case AF_INET: + err = nat_keepalive_sk_init(&nat_keepalive_sk_ipv4, PF_INET); + break; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + err = nat_keepalive_sk_init(&nat_keepalive_sk_ipv6, PF_INET6); + break; +#endif + } + + if (err) + pr_err("xfrm nat keepalive init: failed to init err:%d\n", err); + return err; +} +EXPORT_SYMBOL_GPL(xfrm_nat_keepalive_init); + +void xfrm_nat_keepalive_fini(unsigned short family) +{ + switch (family) { + case AF_INET: + nat_keepalive_sk_fini(&nat_keepalive_sk_ipv4); + break; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + nat_keepalive_sk_fini(&nat_keepalive_sk_ipv6); + break; +#endif + } +} +EXPORT_SYMBOL_GPL(xfrm_nat_keepalive_fini); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 475b904fe68b..6603d3bd171f 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -4289,8 +4289,14 @@ static int __net_init xfrm_net_init(struct net *net) if (rv < 0) goto out_sysctl; + rv = xfrm_nat_keepalive_net_init(net); + if (rv < 0) + goto out_nat_keepalive; + return 0; +out_nat_keepalive: + xfrm_sysctl_fini(net); out_sysctl: xfrm_policy_fini(net); out_policy: @@ -4303,6 +4309,7 @@ out_statistics: static void __net_exit xfrm_net_exit(struct net *net) { + xfrm_nat_keepalive_net_fini(net); xfrm_sysctl_fini(net); xfrm_policy_fini(net); xfrm_state_fini(net); @@ -4364,6 +4371,7 @@ void __init xfrm_init(void) #endif register_xfrm_state_bpf(); + xfrm_nat_keepalive_init(AF_INET); } #ifdef CONFIG_AUDITSYSCALL diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 649bb739df0d..abadc857cd45 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -715,6 +715,7 @@ int __xfrm_state_delete(struct xfrm_state *x) if (x->id.spi) hlist_del_rcu(&x->byspi); net->xfrm.state_num--; + xfrm_nat_keepalive_state_updated(x); spin_unlock(&net->xfrm.xfrm_state_lock); if (x->encap_sk) @@ -1453,6 +1454,7 @@ static void __xfrm_state_insert(struct xfrm_state *x) net->xfrm.state_num++; xfrm_hash_grow_check(net, x->bydst.next != NULL); + xfrm_nat_keepalive_state_updated(x); } /* net->xfrm.xfrm_state_lock is held */ @@ -2871,6 +2873,21 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload, goto error; } + if (x->nat_keepalive_interval) { + if (x->dir != XFRM_SA_DIR_OUT) { + NL_SET_ERR_MSG(extack, "NAT keepalive is only supported for outbound SAs"); + err = -EINVAL; + goto error; + } + + if (!x->encap || x->encap->encap_type != UDP_ENCAP_ESPINUDP) { + NL_SET_ERR_MSG(extack, + "NAT keepalive is only supported for UDP encapsulation"); + err = -EINVAL; + goto error; + } + } + error: return err; } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index e83c687bd64e..a552cfa623ea 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -833,6 +833,10 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, if (attrs[XFRMA_SA_DIR]) x->dir = nla_get_u8(attrs[XFRMA_SA_DIR]); + if (attrs[XFRMA_NAT_KEEPALIVE_INTERVAL]) + x->nat_keepalive_interval = + nla_get_u32(attrs[XFRMA_NAT_KEEPALIVE_INTERVAL]); + err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV], extack); if (err) goto error; @@ -1288,6 +1292,13 @@ static int copy_to_user_state_extra(struct xfrm_state *x, } if (x->dir) ret = nla_put_u8(skb, XFRMA_SA_DIR, x->dir); + + if (x->nat_keepalive_interval) { + ret = nla_put_u32(skb, XFRMA_NAT_KEEPALIVE_INTERVAL, + x->nat_keepalive_interval); + if (ret) + goto out; + } out: return ret; } @@ -3165,6 +3176,7 @@ const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_IF_ID] = { .type = NLA_U32 }, [XFRMA_MTIMER_THRESH] = { .type = NLA_U32 }, [XFRMA_SA_DIR] = NLA_POLICY_RANGE(NLA_U8, XFRM_SA_DIR_IN, XFRM_SA_DIR_OUT), + [XFRMA_NAT_KEEPALIVE_INTERVAL] = { .type = NLA_U32 }, }; EXPORT_SYMBOL_GPL(xfrma_policy); @@ -3474,6 +3486,9 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x) if (x->dir) l += nla_total_size(sizeof(x->dir)); + if (x->nat_keepalive_interval) + l += nla_total_size(sizeof(x->nat_keepalive_interval)); + return l; } -- cgit v1.2.3-58-ga151 From 2d5f6801db8ec0ce97bc520ecd51a7be06a35042 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 26 Jun 2024 10:01:53 +0300 Subject: Revert "net: micro-optimize skb_datagram_iter" This reverts commit 934c29999b57b835d65442da6f741d5e27f3b584. This triggered a usercopy BUG() in systems with HIGHMEM, reported by the test robot in: https://lore.kernel.org/oe-lkp/202406161539.b5ff7b20-oliver.sang@intel.com Signed-off-by: Sagi Grimberg Link: https://patch.msgid.link/20240626070153.759257-1-sagi@grimberg.me Signed-off-by: Jakub Kicinski --- net/core/datagram.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/datagram.c b/net/core/datagram.c index 95f242591fd2..e614cfd8e14a 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -417,14 +417,14 @@ static int __skb_datagram_iter(const struct sk_buff *skb, int offset, end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { struct page *page = skb_frag_page(frag); - u8 *vaddr = kmap_local_page(page); + u8 *vaddr = kmap(page); if (copy > len) copy = len; n = INDIRECT_CALL_1(cb, simple_copy_to_iter, vaddr + skb_frag_off(frag) + offset - start, copy, data, to); - kunmap_local(vaddr); + kunmap(page); offset += n; if (n != copy) goto short_copy; -- cgit v1.2.3-58-ga151 From a7e5793035792cc46a1a4b0a783655ffa897dfe9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 27 Jun 2024 10:44:11 +0200 Subject: wifi: nl80211: don't give key data to userspace When a key is requested by userspace, there's really no need to include the key data, the sequence counter is really what userspace needs in this case. The fact that it's included is just a historic quirk. Remove the key data. Reviewed-by: Miriam Rachel Korenblit Link: https://patch.msgid.link/20240627104411.b6a4f097e4ea.I7e6cc976cb9e8a80ef25a3351330f313373b4578@changeid Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 19d81200a2a2..674368d028f3 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4487,10 +4487,7 @@ static void get_key_callback(void *c, struct key_params *params) struct nlattr *key; struct get_key_cookie *cookie = c; - if ((params->key && - nla_put(cookie->msg, NL80211_ATTR_KEY_DATA, - params->key_len, params->key)) || - (params->seq && + if ((params->seq && nla_put(cookie->msg, NL80211_ATTR_KEY_SEQ, params->seq_len, params->seq)) || (params->cipher && @@ -4502,10 +4499,7 @@ static void get_key_callback(void *c, struct key_params *params) if (!key) goto nla_put_failure; - if ((params->key && - nla_put(cookie->msg, NL80211_KEY_DATA, - params->key_len, params->key)) || - (params->seq && + if ((params->seq && nla_put(cookie->msg, NL80211_KEY_SEQ, params->seq_len, params->seq)) || (params->cipher && -- cgit v1.2.3-58-ga151 From 0a3d5991438fcf89c3aebf55c30231faee31567e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 27 Jun 2024 10:44:12 +0200 Subject: wifi: mac80211: remove key data from get_key callback This is now unused in nl80211, so there's no need to set the pointer/length. Reviewed-by: Miriam Rachel Korenblit Link: https://patch.msgid.link/20240627104411.f8ac49a8cfe0.Ic01f775903f0acd10b9ba77eb39d4ed50e709173@changeid Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 7eb2e5bedb6f..3d49b3ee3a2b 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -742,9 +742,6 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, break; } - params.key = key->conf.key; - params.key_len = key->conf.keylen; - callback(cookie, ¶ms); err = 0; -- cgit v1.2.3-58-ga151 From 3f5d7ff7c533af6f06a39becb77a7afbfaa8b782 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Thu, 27 Jun 2024 10:46:00 +0200 Subject: wifi: mac80211: Use the link BSS configuration for beacon processing The beacon processing should be fully done in the context of the link. This also resolves a bug with CQM handling with MLO as in such a case the RSSI thresholds configuration is maintained in the link context and not in the interface context. Reviewed-by: Miriam Rachel Korenblit Signed-off-by: Ilan Peer Link: https://patch.msgid.link/20240627104600.bb2f0f697881.I675b6a8a186b717f3eef79113c27361fd1a7622c@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 3d207d79d11f..80a10cd49565 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -6696,7 +6696,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, { struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; + struct ieee80211_bss_conf *bss_conf = link->conf; struct ieee80211_vif_cfg *vif_cfg = &sdata->vif.cfg; struct ieee80211_mgmt *mgmt = (void *) hdr; size_t baselen; @@ -6740,7 +6740,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, parse_params.len = len - baselen; rcu_read_lock(); - chanctx_conf = rcu_dereference(link->conf->chanctx_conf); + chanctx_conf = rcu_dereference(bss_conf->chanctx_conf); if (!chanctx_conf) { rcu_read_unlock(); return; @@ -6770,11 +6770,11 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, ifmgd->assoc_data->need_beacon = false; if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY) && !ieee80211_is_s1g_beacon(hdr->frame_control)) { - link->conf->sync_tsf = + bss_conf->sync_tsf = le64_to_cpu(mgmt->u.beacon.timestamp); - link->conf->sync_device_ts = + bss_conf->sync_device_ts = rx_status->device_timestamp; - link->conf->sync_dtim_count = elems->dtim_count; + bss_conf->sync_dtim_count = elems->dtim_count; } if (elems->mbssid_config_ie) @@ -6798,7 +6798,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, } if (!ifmgd->associated || - !ieee80211_rx_our_beacon(bssid, link->conf->bss)) + !ieee80211_rx_our_beacon(bssid, bss_conf->bss)) return; bssid = link->u.mgd.bssid; @@ -6825,7 +6825,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, */ if (!ieee80211_is_s1g_beacon(hdr->frame_control)) ncrc = crc32_be(0, (void *)&mgmt->u.beacon.beacon_int, 4); - parse_params.bss = link->conf->bss; + parse_params.bss = bss_conf->bss; parse_params.filter = care_about_ies; parse_params.crc = ncrc; elems = ieee802_11_parse_elems_full(&parse_params); @@ -6906,11 +6906,11 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, */ if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY) && !ieee80211_is_s1g_beacon(hdr->frame_control)) { - link->conf->sync_tsf = + bss_conf->sync_tsf = le64_to_cpu(mgmt->u.beacon.timestamp); - link->conf->sync_device_ts = + bss_conf->sync_device_ts = rx_status->device_timestamp; - link->conf->sync_dtim_count = elems->dtim_count; + bss_conf->sync_dtim_count = elems->dtim_count; } if ((ncrc == link->u.mgd.beacon_crc && link->u.mgd.beacon_crc_valid) || @@ -6973,10 +6973,10 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, goto free; } - if (WARN_ON(!link->conf->chanreq.oper.chan)) + if (WARN_ON(!bss_conf->chanreq.oper.chan)) goto free; - sband = local->hw.wiphy->bands[link->conf->chanreq.oper.chan->band]; + sband = local->hw.wiphy->bands[bss_conf->chanreq.oper.chan->band]; changed |= ieee80211_recalc_twt_req(sdata, sband, link, link_sta, elems); -- cgit v1.2.3-58-ga151 From 8c62617295d3c4cd03f1a02c3b9bf9d4e6d6e0c6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 27 Jun 2024 13:25:27 +0200 Subject: wifi: mac80211: remove DEAUTH_NEED_MGD_TX_PREP This flag is annoying because it puts a lot of logic into mac80211 that could just as well be in the driver (only iwlmvm uses it) and the implementation is also broken for MLO. Remove the flag in favour of calling drv_mgd_prepare_tx() without any conditions even for the deauth-while-assoc case. The drivers that implement it can take the appropriate actions, which for the only user of DEAUTH_NEED_MGD_TX_PREP (iwlmvm) is a bit more tricky than the implementation in mac80211 is anyway, and all others have no need and can just exit if info->was_assoc is set. Reviewed-by: Miriam Rachel Korenblit Link: https://patch.msgid.link/20240627132527.94924bcc9c9e.I328a219e45f2e2724cd52e75bb9feee3bf21a463@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 12 +++++----- .../net/wireless/intel/iwlwifi/mvm/mld-mac80211.c | 2 ++ drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 4 ++++ .../net/wireless/intel/iwlwifi/mvm/time-event.c | 2 ++ include/net/mac80211.h | 22 +++++------------- net/mac80211/debugfs.c | 1 - net/mac80211/main.c | 3 --- net/mac80211/mlme.c | 26 +++------------------- 8 files changed, 23 insertions(+), 49 deletions(-) (limited to 'net') diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 90cf0ec69682..60bfe42d5386 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -383,12 +383,6 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) if (!mvm->mld_api_is_used) ieee80211_hw_set(hw, TIMING_BEACON_ONLY); - /* We should probably have this, but mac80211 - * currently doesn't support it for MLO. - */ - if (!(hw->wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO)) - ieee80211_hw_set(hw, DEAUTH_NEED_MGD_TX_PREP); - /* * On older devices, enabling TX A-MSDU occasionally leads to * something getting messed up, the command read from the FIFO @@ -2853,6 +2847,8 @@ static void iwl_mvm_bss_info_changed_station(struct iwl_mvm *mvm, if (changes & BSS_CHANGED_ASSOC) { if (vif->cfg.assoc) { + mvmvif->session_prot_connection_loss = false; + /* clear statistics to get clean beacon counter */ iwl_mvm_request_statistics(mvm, true); for_each_mvm_vif_valid_link(mvmvif, i) @@ -4268,8 +4264,12 @@ void iwl_mvm_mac_mgd_prepare_tx(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_prep_tx_info *info) { + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); + if (info->was_assoc && !mvmvif->session_prot_connection_loss) + return; + guard(mvm)(mvm); iwl_mvm_protect_assoc(mvm, vif, info->duration, info->link_id); } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c index ebf313e161f4..3c99396ad369 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c @@ -873,6 +873,8 @@ static void iwl_mvm_mld_vif_cfg_changed_station(struct iwl_mvm *mvm, if (changes & BSS_CHANGED_ASSOC) { if (vif->cfg.assoc) { + mvmvif->session_prot_connection_loss = false; + /* clear statistics to get clean beacon counter */ iwl_mvm_request_statistics(mvm, true); iwl_mvm_sf_update(mvm, vif, false); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 362973fdeac0..393ce424c196 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -450,6 +450,9 @@ struct iwl_mvm_esr_exit { * @unblock_esr_tpt_wk: work for unblocking EMLSR when tpt is high enough. * @roc_activity: currently running ROC activity for this vif (or * ROC_NUM_ACTIVITIES if no activity is running). + * @session_prot_connection_loss: the connection was lost due to session + * protection ending without receiving a beacon, so we need to now + * protect the deauth separately */ struct iwl_mvm_vif { struct iwl_mvm *mvm; @@ -463,6 +466,7 @@ struct iwl_mvm_vif { bool pm_enabled; bool monitor_active; bool esr_active; + bool session_prot_connection_loss; u8 low_latency: 6; u8 low_latency_actual: 1; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c index 2773dfa8baa9..77b0cae8566f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c @@ -222,6 +222,8 @@ static bool iwl_mvm_te_check_disconnect(struct iwl_mvm *mvm, iwl_dbg_tlv_time_point(&mvm->fwrt, IWL_FW_INI_TIME_POINT_ASSOC_FAILED, NULL); + + mvmvif->session_prot_connection_loss = true; } iwl_mvm_connection_loss(mvm, vif, errmsg); diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 9c96e8ae9ef7..bd0f8aefa797 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2767,14 +2767,6 @@ struct ieee80211_txq { * @IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA: Hardware supports buffer STA on * TDLS links. * - * @IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP: The driver requires the - * mgd_prepare_tx() callback to be called before transmission of a - * deauthentication frame in case the association was completed but no - * beacon was heard. This is required in multi-channel scenarios, where the - * virtual interface might not be given air time for the transmission of - * the frame, as it is not synced with the AP/P2P GO yet, and thus the - * deauthentication frame might not be transmitted. - * * @IEEE80211_HW_DOESNT_SUPPORT_QOS_NDP: The driver (or firmware) doesn't * support QoS NDP for AP probing - that's most likely a driver bug. * @@ -2874,7 +2866,6 @@ enum ieee80211_hw_flags { IEEE80211_HW_REPORTS_LOW_ACK, IEEE80211_HW_SUPPORTS_TX_FRAG, IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA, - IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP, IEEE80211_HW_DOESNT_SUPPORT_QOS_NDP, IEEE80211_HW_BUFF_MMPDU_TXQ, IEEE80211_HW_SUPPORTS_VHT_EXT_NSS_BW, @@ -3787,13 +3778,15 @@ enum ieee80211_reconfig_type { * @success: whether the frame exchange was successful, only * used with the mgd_complete_tx() method, and then only * valid for auth and (re)assoc. + * @was_assoc: set if this call is due to deauth/disassoc + * while just having been associated * @link_id: the link id on which the frame will be TX'ed. * Only used with the mgd_prepare_tx() method. */ struct ieee80211_prep_tx_info { u16 duration; u16 subtype; - u8 success:1; + u8 success:1, was_assoc:1; int link_id; }; @@ -4242,12 +4235,9 @@ struct ieee80211_prep_tx_info { * yet it need not necessarily be given airtime, in particular since any * transmission to a P2P GO needs to be synchronized against the GO's * powersave state. mac80211 will call this function before transmitting a - * management frame prior to having successfully associated to allow the - * driver to give it channel time for the transmission, to get a response - * and to be able to synchronize with the GO. - * For drivers that set %IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP, mac80211 - * would also call this function before transmitting a deauthentication - * frame in case that no beacon was heard from the AP/P2P GO. + * management frame prior to transmitting that frame to allow the driver + * to give it channel time for the transmission, to get a response and be + * able to synchronize with the GO. * The callback will be called before each transmission and upon return * mac80211 will transmit the frame right away. * Additional information is passed in the &struct ieee80211_prep_tx_info diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 98310188f330..02b5476a4376 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -483,7 +483,6 @@ static const char *hw_flag_names[] = { FLAG(REPORTS_LOW_ACK), FLAG(SUPPORTS_TX_FRAG), FLAG(SUPPORTS_TDLS_BUFFER_STA), - FLAG(DEAUTH_NEED_MGD_TX_PREP), FLAG(DOESNT_SUPPORT_QOS_NDP), FLAG(BUFF_MMPDU_TXQ), FLAG(SUPPORTS_VHT_EXT_NSS_BW), diff --git a/net/mac80211/main.c b/net/mac80211/main.c index a9aefc83d30a..7578ea56c12f 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1161,9 +1161,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) if (WARN_ON(!ieee80211_hw_check(hw, AP_LINK_PS))) return -EINVAL; - - if (WARN_ON(ieee80211_hw_check(hw, DEAUTH_NEED_MGD_TX_PREP))) - return -EINVAL; } #ifdef CONFIG_PM diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 80a10cd49565..4779a18ab75d 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3521,6 +3521,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, u64 changed = 0; struct ieee80211_prep_tx_info info = { .subtype = stype, + .was_assoc = true, + .link_id = ffs(sdata->vif.active_links) - 1, }; lockdep_assert_wiphy(local->hw.wiphy); @@ -3569,29 +3571,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, /* deauthenticate/disassociate now */ if (tx || frame_buf) { - /* - * In multi channel scenarios guarantee that the virtual - * interface is granted immediate airtime to transmit the - * deauthentication frame by calling mgd_prepare_tx, if the - * driver requested so. - */ - if (ieee80211_hw_check(&local->hw, DEAUTH_NEED_MGD_TX_PREP)) { - for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); - link_id++) { - struct ieee80211_link_data *link; - - link = sdata_dereference(sdata->link[link_id], - sdata); - if (!link) - continue; - if (link->u.mgd.have_beacon) - break; - } - if (link_id == IEEE80211_MLD_MAX_NUM_LINKS) { - info.link_id = ffs(sdata->vif.active_links) - 1; - drv_mgd_prepare_tx(sdata->local, sdata, &info); - } - } + drv_mgd_prepare_tx(sdata->local, sdata, &info); ieee80211_send_deauth_disassoc(sdata, sdata->vif.cfg.ap_addr, sdata->vif.cfg.ap_addr, stype, -- cgit v1.2.3-58-ga151 From d7d4cfc4c97c7cf49cb2893ef60e8ab59dcac047 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Thu, 27 Jun 2024 17:08:51 +0300 Subject: ethtool: Add flashing transceiver modules' firmware notifications ability Add progress notifications ability to user space while flashing modules' firmware by implementing the interface between the user space and the kernel. Signed-off-by: Danielle Ratson Reviewed-by: Petr Machata Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- net/ethtool/module.c | 117 ++++++++++++++++++++++++++++++++++++++++++++++++ net/ethtool/module_fw.h | 31 +++++++++++++ net/ethtool/netlink.c | 5 +++ net/ethtool/netlink.h | 1 + 4 files changed, 154 insertions(+) create mode 100644 net/ethtool/module_fw.h (limited to 'net') diff --git a/net/ethtool/module.c b/net/ethtool/module.c index ceb575efc290..ba728b4a38a1 100644 --- a/net/ethtool/module.c +++ b/net/ethtool/module.c @@ -5,6 +5,7 @@ #include "netlink.h" #include "common.h" #include "bitset.h" +#include "module_fw.h" struct module_req_info { struct ethnl_req_info base; @@ -158,3 +159,119 @@ const struct ethnl_request_ops ethnl_module_request_ops = { .set = ethnl_set_module, .set_ntf_cmd = ETHTOOL_MSG_MODULE_NTF, }; + +/* MODULE_FW_FLASH_NTF */ + +static int +ethnl_module_fw_flash_ntf_put_err(struct sk_buff *skb, char *err_msg, + char *sub_err_msg) +{ + int err_msg_len, sub_err_msg_len, total_len; + struct nlattr *attr; + + if (!err_msg) + return 0; + + err_msg_len = strlen(err_msg); + total_len = err_msg_len + 2; /* For period and NUL. */ + + if (sub_err_msg) { + sub_err_msg_len = strlen(sub_err_msg); + total_len += sub_err_msg_len + 2; /* For ", ". */ + } + + attr = nla_reserve(skb, ETHTOOL_A_MODULE_FW_FLASH_STATUS_MSG, + total_len); + if (!attr) + return -ENOMEM; + + if (sub_err_msg) + sprintf(nla_data(attr), "%s, %s.", err_msg, sub_err_msg); + else + sprintf(nla_data(attr), "%s.", err_msg); + + return 0; +} + +static void +ethnl_module_fw_flash_ntf(struct net_device *dev, + enum ethtool_module_fw_flash_status status, + struct ethnl_module_fw_flash_ntf_params *ntf_params, + char *err_msg, char *sub_err_msg, + u64 done, u64 total) +{ + struct sk_buff *skb; + void *hdr; + int ret; + + if (ntf_params->closed_sock) + return; + + skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + return; + + hdr = ethnl_unicast_put(skb, ntf_params->portid, ntf_params->seq, + ETHTOOL_MSG_MODULE_FW_FLASH_NTF); + if (!hdr) + goto err_skb; + + ret = ethnl_fill_reply_header(skb, dev, + ETHTOOL_A_MODULE_FW_FLASH_HEADER); + if (ret < 0) + goto err_skb; + + if (nla_put_u32(skb, ETHTOOL_A_MODULE_FW_FLASH_STATUS, status)) + goto err_skb; + + ret = ethnl_module_fw_flash_ntf_put_err(skb, err_msg, sub_err_msg); + if (ret < 0) + goto err_skb; + + if (nla_put_uint(skb, ETHTOOL_A_MODULE_FW_FLASH_DONE, done)) + goto err_skb; + + if (nla_put_uint(skb, ETHTOOL_A_MODULE_FW_FLASH_TOTAL, total)) + goto err_skb; + + genlmsg_end(skb, hdr); + genlmsg_unicast(dev_net(dev), skb, ntf_params->portid); + return; + +err_skb: + nlmsg_free(skb); +} + +void ethnl_module_fw_flash_ntf_err(struct net_device *dev, + struct ethnl_module_fw_flash_ntf_params *params, + char *err_msg, char *sub_err_msg) +{ + ethnl_module_fw_flash_ntf(dev, ETHTOOL_MODULE_FW_FLASH_STATUS_ERROR, + params, err_msg, sub_err_msg, 0, 0); +} + +void +ethnl_module_fw_flash_ntf_start(struct net_device *dev, + struct ethnl_module_fw_flash_ntf_params *params) +{ + ethnl_module_fw_flash_ntf(dev, ETHTOOL_MODULE_FW_FLASH_STATUS_STARTED, + params, NULL, NULL, 0, 0); +} + +void +ethnl_module_fw_flash_ntf_complete(struct net_device *dev, + struct ethnl_module_fw_flash_ntf_params *params) +{ + ethnl_module_fw_flash_ntf(dev, ETHTOOL_MODULE_FW_FLASH_STATUS_COMPLETED, + params, NULL, NULL, 0, 0); +} + +void +ethnl_module_fw_flash_ntf_in_progress(struct net_device *dev, + struct ethnl_module_fw_flash_ntf_params *params, + u64 done, u64 total) +{ + ethnl_module_fw_flash_ntf(dev, + ETHTOOL_MODULE_FW_FLASH_STATUS_IN_PROGRESS, + params, NULL, NULL, done, total); +} diff --git a/net/ethtool/module_fw.h b/net/ethtool/module_fw.h new file mode 100644 index 000000000000..ee4a291ac1d4 --- /dev/null +++ b/net/ethtool/module_fw.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +/** + * struct ethnl_module_fw_flash_ntf_params - module firmware flashing + * notifications parameters + * @portid: Netlink portid of sender. + * @seq: Sequence number of sender. + * @closed_sock: Indicates whether the socket was closed from user space. + */ +struct ethnl_module_fw_flash_ntf_params { + u32 portid; + u32 seq; + bool closed_sock; +}; + +void +ethnl_module_fw_flash_ntf_err(struct net_device *dev, + struct ethnl_module_fw_flash_ntf_params *params, + char *err_msg, char *sub_err_msg); +void +ethnl_module_fw_flash_ntf_start(struct net_device *dev, + struct ethnl_module_fw_flash_ntf_params *params); +void +ethnl_module_fw_flash_ntf_complete(struct net_device *dev, + struct ethnl_module_fw_flash_ntf_params *params); +void +ethnl_module_fw_flash_ntf_in_progress(struct net_device *dev, + struct ethnl_module_fw_flash_ntf_params *params, + u64 done, u64 total); diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index bd04f28d5cf4..393ce668fb04 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -239,6 +239,11 @@ void *ethnl_bcastmsg_put(struct sk_buff *skb, u8 cmd) cmd); } +void *ethnl_unicast_put(struct sk_buff *skb, u32 portid, u32 seq, u8 cmd) +{ + return genlmsg_put(skb, portid, seq, ðtool_genl_family, 0, cmd); +} + int ethnl_multicast(struct sk_buff *skb, struct net_device *dev) { return genlmsg_multicast_netns(ðtool_genl_family, dev_net(dev), skb, diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 9a333a8d04c1..5e6c6a7b7adc 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -21,6 +21,7 @@ struct sk_buff *ethnl_reply_init(size_t payload, struct net_device *dev, u8 cmd, void **ehdrp); void *ethnl_dump_put(struct sk_buff *skb, struct netlink_callback *cb, u8 cmd); void *ethnl_bcastmsg_put(struct sk_buff *skb, u8 cmd); +void *ethnl_unicast_put(struct sk_buff *skb, u32 portid, u32 seq, u8 cmd); int ethnl_multicast(struct sk_buff *skb, struct net_device *dev); /** -- cgit v1.2.3-58-ga151 From 31e0aa99dc02b2b038a270b0670fc8201b69ec8a Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Thu, 27 Jun 2024 17:08:52 +0300 Subject: ethtool: Veto some operations during firmware flashing process Some operations cannot be performed during the firmware flashing process. For example: - Port must be down during the whole flashing process to avoid packet loss while committing reset for example. - Writing to EEPROM interrupts the flashing process, so operations like ethtool dump, module reset, get and set power mode should be vetoed. - Split port firmware flashing should be vetoed. In order to veto those scenarios, add a flag in 'struct net_device' that indicates when a firmware flash is taking place on the module and use it to prevent interruptions during the process. Signed-off-by: Danielle Ratson Reviewed-by: Petr Machata Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 +++- net/ethtool/eeprom.c | 6 ++++++ net/ethtool/ioctl.c | 12 ++++++++++++ net/ethtool/netlink.c | 12 ++++++++++++ 4 files changed, 33 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cc18acd3c58b..1e3401093c13 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1990,6 +1990,8 @@ enum netdev_reg_state { * * @threaded: napi threaded mode is enabled * + * @module_fw_flash_in_progress: Module firmware flashing is in progress. + * * @net_notifier_list: List of per-net netdev notifier block * that follow this device when it is moved * to another network namespace. @@ -2374,7 +2376,7 @@ struct net_device { bool proto_down; bool threaded; unsigned wol_enabled:1; - + unsigned module_fw_flash_in_progress:1; struct list_head net_notifier_list; #if IS_ENABLED(CONFIG_MACSEC) diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c index 6209c3a9c8f7..f36811b3ecf1 100644 --- a/net/ethtool/eeprom.c +++ b/net/ethtool/eeprom.c @@ -91,6 +91,12 @@ static int get_module_eeprom_by_page(struct net_device *dev, { const struct ethtool_ops *ops = dev->ethtool_ops; + if (dev->module_fw_flash_in_progress) { + NL_SET_ERR_MSG(extack, + "Module firmware flashing is in progress"); + return -EBUSY; + } + if (dev->sfp_bus) return sfp_get_module_eeprom_by_page(dev->sfp_bus, page_data, extack); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index e645d751a5e8..1cca372c0d80 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -658,6 +658,9 @@ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) if (!dev->ethtool_ops->get_link_ksettings) return -EOPNOTSUPP; + if (dev->module_fw_flash_in_progress) + return -EBUSY; + memset(&link_ksettings, 0, sizeof(link_ksettings)); err = dev->ethtool_ops->get_link_ksettings(dev, &link_ksettings); if (err < 0) @@ -1449,6 +1452,9 @@ static int ethtool_reset(struct net_device *dev, char __user *useraddr) if (!dev->ethtool_ops->reset) return -EOPNOTSUPP; + if (dev->module_fw_flash_in_progress) + return -EBUSY; + if (copy_from_user(&reset, useraddr, sizeof(reset))) return -EFAULT; @@ -2462,6 +2468,9 @@ int ethtool_get_module_info_call(struct net_device *dev, const struct ethtool_ops *ops = dev->ethtool_ops; struct phy_device *phydev = dev->phydev; + if (dev->module_fw_flash_in_progress) + return -EBUSY; + if (dev->sfp_bus) return sfp_get_module_info(dev->sfp_bus, modinfo); @@ -2499,6 +2508,9 @@ int ethtool_get_module_eeprom_call(struct net_device *dev, const struct ethtool_ops *ops = dev->ethtool_ops; struct phy_device *phydev = dev->phydev; + if (dev->module_fw_flash_in_progress) + return -EBUSY; + if (dev->sfp_bus) return sfp_get_module_eeprom(dev->sfp_bus, ee, data); diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 393ce668fb04..a5907bbde427 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -765,10 +765,22 @@ static void ethnl_notify_features(struct netdev_notifier_info *info) static int ethnl_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { + struct netdev_notifier_info *info = ptr; + struct netlink_ext_ack *extack; + struct net_device *dev; + + dev = netdev_notifier_info_to_dev(info); + extack = netdev_notifier_info_to_extack(info); + switch (event) { case NETDEV_FEAT_CHANGE: ethnl_notify_features(ptr); break; + case NETDEV_PRE_UP: + if (dev->module_fw_flash_in_progress) { + NL_SET_ERR_MSG(extack, "Can't set port up while flashing module firmware"); + return NOTIFY_BAD; + } } return NOTIFY_DONE; -- cgit v1.2.3-58-ga151 From a39c84d796254e6b1662ca0c46dbc313379e9291 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Thu, 27 Jun 2024 17:08:54 +0300 Subject: ethtool: cmis_cdb: Add a layer for supporting CDB commands CDB (Command Data Block Message Communication) reads and writes are performed on memory map pages 9Fh-AFh according to the CMIS standard, section 8.20 of revision 5.2. Page 9Fh is used to specify the CDB command to be executed and also provides an area for a local payload (LPL). According to the CMIS standard, the firmware update process is done using a CDB commands sequence that will be implemented in the next patch. The kernel interface that will implement the firmware update using CDB command will include 2 layers that will be added under ethtool: * The upper layer that will be triggered from the module layer, is cmis_fw_update. * The lower one is cmis_cdb. In the future there might be more operations to implement using CDB commands. Therefore, the idea is to keep the CDB interface clean and the cmis_fw_update specific to the CDB commands handling it. These two layers will communicate using the API the consists of three functions: - struct ethtool_cmis_cdb * ethtool_cmis_cdb_init(struct net_device *dev, struct ethtool_module_fw_flash_params *params); - void ethtool_cmis_cdb_fini(struct ethtool_cmis_cdb *cdb); - int ethtool_cmis_cdb_execute_cmd(struct net_device *dev, struct ethtool_cmis_cdb_cmd_args *args); Add the CDB layer to support initializing, finishing and executing CDB commands: * The initialization process will include creating of an ethtool_cmis_cdb instance, querying the module CDB support, entering and validating the password from user space (CMD 0x0000) and querying the module features (CMD 0x0040). * The finishing API will simply free the ethtool_cmis_cdb instance. * The executing process will write the CDB command to EEPROM using set_module_eeprom_by_page() that was presented earlier, and will process the reply from EEPROM. Signed-off-by: Danielle Ratson Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- net/ethtool/Makefile | 2 +- net/ethtool/cmis.h | 117 ++++++++++ net/ethtool/cmis_cdb.c | 602 ++++++++++++++++++++++++++++++++++++++++++++++++ net/ethtool/module_fw.h | 10 + 4 files changed, 730 insertions(+), 1 deletion(-) create mode 100644 net/ethtool/cmis.h create mode 100644 net/ethtool/cmis_cdb.c (limited to 'net') diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile index 504f954a1b28..38806b3ecf83 100644 --- a/net/ethtool/Makefile +++ b/net/ethtool/Makefile @@ -8,4 +8,4 @@ ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o rss.o \ linkstate.o debug.o wol.o features.o privflags.o rings.o \ channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \ tunnels.o fec.o eeprom.o stats.o phc_vclocks.o mm.o \ - module.o pse-pd.o plca.o mm.o + module.o cmis_cdb.o pse-pd.o plca.o mm.o diff --git a/net/ethtool/cmis.h b/net/ethtool/cmis.h new file mode 100644 index 000000000000..295f5d0df915 --- /dev/null +++ b/net/ethtool/cmis.h @@ -0,0 +1,117 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#define ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH 120 +#define ETHTOOL_CMIS_CDB_CMD_PAGE 0x9F +#define ETHTOOL_CMIS_CDB_PAGE_I2C_ADDR 0x50 + +/** + * struct ethtool_cmis_cdb - CDB commands parameters + * @cmis_rev: CMIS revision major. + * @read_write_len_ext: Allowable additional number of byte octets to the LPL + * in a READ or a WRITE CDB commands. + * @max_completion_time: Maximum CDB command completion time in msec. + */ +struct ethtool_cmis_cdb { + u8 cmis_rev; + u8 read_write_len_ext; + u16 max_completion_time; +}; + +enum ethtool_cmis_cdb_cmd_id { + ETHTOOL_CMIS_CDB_CMD_QUERY_STATUS = 0x0000, + ETHTOOL_CMIS_CDB_CMD_MODULE_FEATURES = 0x0040, +}; + +/** + * struct ethtool_cmis_cdb_request - CDB commands request fields as decribed in + * the CMIS standard + * @id: Command ID. + * @epl_len: EPL memory length. + * @lpl_len: LPL memory length. + * @chk_code: Check code for the previous field and the payload. + * @resv1: Added to match the CMIS standard request continuity. + * @resv2: Added to match the CMIS standard request continuity. + * @payload: Payload for the CDB commands. + */ +struct ethtool_cmis_cdb_request { + __be16 id; + struct_group(body, + __be16 epl_len; + u8 lpl_len; + u8 chk_code; + u8 resv1; + u8 resv2; + u8 payload[ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH]; + ); +}; + +#define CDB_F_COMPLETION_VALID BIT(0) +#define CDB_F_STATUS_VALID BIT(1) + +/** + * struct ethtool_cmis_cdb_cmd_args - CDB commands execution arguments + * @req: CDB command fields as described in the CMIS standard. + * @max_duration: Maximum duration time for command completion in msec. + * @read_write_len_ext: Allowable additional number of byte octets to the LPL + * in a READ or a WRITE commands. + * @msleep_pre_rpl: Waiting time before checking reply in msec. + * @rpl_exp_len: Expected reply length in bytes. + * @flags: Validation flags for CDB commands. + * @err_msg: Error message to be sent to user space. + */ +struct ethtool_cmis_cdb_cmd_args { + struct ethtool_cmis_cdb_request req; + u16 max_duration; + u8 read_write_len_ext; + u8 msleep_pre_rpl; + u8 rpl_exp_len; + u8 flags; + char *err_msg; +}; + +/** + * struct ethtool_cmis_cdb_rpl_hdr - CDB commands reply header arguments + * @rpl_len: Reply length. + * @rpl_chk_code: Reply check code. + */ +struct ethtool_cmis_cdb_rpl_hdr { + u8 rpl_len; + u8 rpl_chk_code; +}; + +/** + * struct ethtool_cmis_cdb_rpl - CDB commands reply arguments + * @hdr: CDB commands reply header arguments. + * @payload: Payload for the CDB commands reply. + */ +struct ethtool_cmis_cdb_rpl { + struct ethtool_cmis_cdb_rpl_hdr hdr; + u8 payload[ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH]; +}; + +u32 ethtool_cmis_get_max_payload_size(u8 num_of_byte_octs); + +void ethtool_cmis_cdb_compose_args(struct ethtool_cmis_cdb_cmd_args *args, + enum ethtool_cmis_cdb_cmd_id cmd, u8 *pl, + u8 lpl_len, u16 max_duration, + u8 read_write_len_ext, u16 msleep_pre_rpl, + u8 rpl_exp_len, u8 flags); + +void ethtool_cmis_cdb_check_completion_flag(u8 cmis_rev, u8 *flags); + +void ethtool_cmis_page_init(struct ethtool_module_eeprom *page_data, + u8 page, u32 offset, u32 length); +void ethtool_cmis_page_fini(struct ethtool_module_eeprom *page_data); + +struct ethtool_cmis_cdb * +ethtool_cmis_cdb_init(struct net_device *dev, + const struct ethtool_module_fw_flash_params *params, + struct ethnl_module_fw_flash_ntf_params *ntf_params); +void ethtool_cmis_cdb_fini(struct ethtool_cmis_cdb *cdb); + +int ethtool_cmis_wait_for_cond(struct net_device *dev, u8 flags, u8 flag, + u16 max_duration, u32 offset, + bool (*cond_success)(u8), bool (*cond_fail)(u8), u8 *state); + +int ethtool_cmis_cdb_execute_cmd(struct net_device *dev, + struct ethtool_cmis_cdb_cmd_args *args); diff --git a/net/ethtool/cmis_cdb.c b/net/ethtool/cmis_cdb.c new file mode 100644 index 000000000000..1bb08783b60d --- /dev/null +++ b/net/ethtool/cmis_cdb.c @@ -0,0 +1,602 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include + +#include "common.h" +#include "module_fw.h" +#include "cmis.h" + +/* For accessing the LPL field on page 9Fh, the allowable length extension is + * min(i, 15) byte octets where i specifies the allowable additional number of + * byte octets in a READ or a WRITE. + */ +u32 ethtool_cmis_get_max_payload_size(u8 num_of_byte_octs) +{ + return 8 * (1 + min_t(u8, num_of_byte_octs, 15)); +} + +void ethtool_cmis_cdb_compose_args(struct ethtool_cmis_cdb_cmd_args *args, + enum ethtool_cmis_cdb_cmd_id cmd, u8 *pl, + u8 lpl_len, u16 max_duration, + u8 read_write_len_ext, u16 msleep_pre_rpl, + u8 rpl_exp_len, u8 flags) +{ + args->req.id = cpu_to_be16(cmd); + args->req.lpl_len = lpl_len; + if (pl) + memcpy(args->req.payload, pl, args->req.lpl_len); + + args->max_duration = max_duration; + args->read_write_len_ext = + ethtool_cmis_get_max_payload_size(read_write_len_ext); + args->msleep_pre_rpl = msleep_pre_rpl; + args->rpl_exp_len = rpl_exp_len; + args->flags = flags; + args->err_msg = NULL; +} + +void ethtool_cmis_page_init(struct ethtool_module_eeprom *page_data, + u8 page, u32 offset, u32 length) +{ + page_data->page = page; + page_data->offset = offset; + page_data->length = length; + page_data->i2c_address = ETHTOOL_CMIS_CDB_PAGE_I2C_ADDR; +} + +#define CMIS_REVISION_PAGE 0x00 +#define CMIS_REVISION_OFFSET 0x01 + +struct cmis_rev_rpl { + u8 rev; +}; + +static u8 cmis_rev_rpl_major(struct cmis_rev_rpl *rpl) +{ + return rpl->rev >> 4; +} + +static int cmis_rev_major_get(struct net_device *dev, u8 *rev_major) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + struct ethtool_module_eeprom page_data = {0}; + struct netlink_ext_ack extack = {}; + struct cmis_rev_rpl rpl = {}; + int err; + + ethtool_cmis_page_init(&page_data, CMIS_REVISION_PAGE, + CMIS_REVISION_OFFSET, sizeof(rpl)); + page_data.data = (u8 *)&rpl; + + err = ops->get_module_eeprom_by_page(dev, &page_data, &extack); + if (err < 0) { + if (extack._msg) + netdev_err(dev, "%s\n", extack._msg); + return err; + } + + *rev_major = cmis_rev_rpl_major(&rpl); + + return 0; +} + +#define CMIS_CDB_ADVERTISEMENT_PAGE 0x01 +#define CMIS_CDB_ADVERTISEMENT_OFFSET 0xA3 + +/* Based on section 8.4.11 "CDB Messaging Support Advertisement" in CMIS + * standard revision 5.2. + */ +struct cmis_cdb_advert_rpl { + u8 inst_supported; + u8 read_write_len_ext; + u8 resv1; + u8 resv2; +}; + +static u8 cmis_cdb_advert_rpl_inst_supported(struct cmis_cdb_advert_rpl *rpl) +{ + return rpl->inst_supported >> 6; +} + +static int cmis_cdb_advertisement_get(struct ethtool_cmis_cdb *cdb, + struct net_device *dev) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + struct ethtool_module_eeprom page_data = {}; + struct cmis_cdb_advert_rpl rpl = {}; + struct netlink_ext_ack extack = {}; + int err; + + ethtool_cmis_page_init(&page_data, CMIS_CDB_ADVERTISEMENT_PAGE, + CMIS_CDB_ADVERTISEMENT_OFFSET, sizeof(rpl)); + page_data.data = (u8 *)&rpl; + + err = ops->get_module_eeprom_by_page(dev, &page_data, &extack); + if (err < 0) { + if (extack._msg) + netdev_err(dev, "%s\n", extack._msg); + return err; + } + + if (!cmis_cdb_advert_rpl_inst_supported(&rpl)) + return -EOPNOTSUPP; + + cdb->read_write_len_ext = rpl.read_write_len_ext; + + return 0; +} + +#define CMIS_PASSWORD_ENTRY_PAGE 0x00 +#define CMIS_PASSWORD_ENTRY_OFFSET 0x7A + +struct cmis_password_entry_pl { + __be32 password; +}; + +/* See section 9.3.1 "CMD 0000h: Query Status" in CMIS standard revision 5.2. + * struct cmis_cdb_query_status_pl and struct cmis_cdb_query_status_rpl are + * structured layouts of the flat arrays, + * struct ethtool_cmis_cdb_request::payload and + * struct ethtool_cmis_cdb_rpl::payload respectively. + */ +struct cmis_cdb_query_status_pl { + u16 response_delay; +}; + +struct cmis_cdb_query_status_rpl { + u8 length; + u8 status; +}; + +static int +cmis_cdb_validate_password(struct ethtool_cmis_cdb *cdb, + struct net_device *dev, + const struct ethtool_module_fw_flash_params *params, + struct ethnl_module_fw_flash_ntf_params *ntf_params) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + struct cmis_cdb_query_status_pl qs_pl = {0}; + struct ethtool_module_eeprom page_data = {}; + struct ethtool_cmis_cdb_cmd_args args = {}; + struct cmis_password_entry_pl pe_pl = {}; + struct cmis_cdb_query_status_rpl *rpl; + struct netlink_ext_ack extack = {}; + int err; + + ethtool_cmis_page_init(&page_data, CMIS_PASSWORD_ENTRY_PAGE, + CMIS_PASSWORD_ENTRY_OFFSET, sizeof(pe_pl)); + page_data.data = (u8 *)&pe_pl; + + pe_pl = *((struct cmis_password_entry_pl *)page_data.data); + pe_pl.password = params->password; + err = ops->set_module_eeprom_by_page(dev, &page_data, &extack); + if (err < 0) { + if (extack._msg) + netdev_err(dev, "%s\n", extack._msg); + return err; + } + + ethtool_cmis_cdb_compose_args(&args, ETHTOOL_CMIS_CDB_CMD_QUERY_STATUS, + (u8 *)&qs_pl, sizeof(qs_pl), 0, + cdb->read_write_len_ext, 1000, + sizeof(*rpl), + CDB_F_COMPLETION_VALID | CDB_F_STATUS_VALID); + + err = ethtool_cmis_cdb_execute_cmd(dev, &args); + if (err < 0) { + ethnl_module_fw_flash_ntf_err(dev, ntf_params, + "Query Status command failed", + args.err_msg); + return err; + } + + rpl = (struct cmis_cdb_query_status_rpl *)args.req.payload; + if (!rpl->length || !rpl->status) { + ethnl_module_fw_flash_ntf_err(dev, ntf_params, + "Password was not accepted", + NULL); + return -EINVAL; + } + + return 0; +} + +/* Some CDB commands asserts the CDB completion flag only from CMIS + * revision 5. Therefore, check the relevant validity flag only when + * the revision supports it. + */ +void ethtool_cmis_cdb_check_completion_flag(u8 cmis_rev, u8 *flags) +{ + *flags |= cmis_rev >= 5 ? CDB_F_COMPLETION_VALID : 0; +} + +#define CMIS_CDB_MODULE_FEATURES_RESV_DATA 34 + +/* See section 9.4.1 "CMD 0040h: Module Features" in CMIS standard revision 5.2. + * struct cmis_cdb_module_features_rpl is structured layout of the flat + * array, ethtool_cmis_cdb_rpl::payload. + */ +struct cmis_cdb_module_features_rpl { + u8 resv1[CMIS_CDB_MODULE_FEATURES_RESV_DATA]; + __be16 max_completion_time; +}; + +static u16 +cmis_cdb_module_features_completion_time(struct cmis_cdb_module_features_rpl *rpl) +{ + return be16_to_cpu(rpl->max_completion_time); +} + +static int cmis_cdb_module_features_get(struct ethtool_cmis_cdb *cdb, + struct net_device *dev, + struct ethnl_module_fw_flash_ntf_params *ntf_params) +{ + struct ethtool_cmis_cdb_cmd_args args = {}; + struct cmis_cdb_module_features_rpl *rpl; + u8 flags = CDB_F_STATUS_VALID; + int err; + + ethtool_cmis_cdb_check_completion_flag(cdb->cmis_rev, &flags); + ethtool_cmis_cdb_compose_args(&args, + ETHTOOL_CMIS_CDB_CMD_MODULE_FEATURES, + NULL, 0, 0, cdb->read_write_len_ext, + 1000, sizeof(*rpl), flags); + + err = ethtool_cmis_cdb_execute_cmd(dev, &args); + if (err < 0) { + ethnl_module_fw_flash_ntf_err(dev, ntf_params, + "Module Features command failed", + args.err_msg); + return err; + } + + rpl = (struct cmis_cdb_module_features_rpl *)args.req.payload; + cdb->max_completion_time = + cmis_cdb_module_features_completion_time(rpl); + + return 0; +} + +struct ethtool_cmis_cdb * +ethtool_cmis_cdb_init(struct net_device *dev, + const struct ethtool_module_fw_flash_params *params, + struct ethnl_module_fw_flash_ntf_params *ntf_params) +{ + struct ethtool_cmis_cdb *cdb; + int err; + + cdb = kzalloc(sizeof(*cdb), GFP_KERNEL); + if (!cdb) + return ERR_PTR(-ENOMEM); + + err = cmis_rev_major_get(dev, &cdb->cmis_rev); + if (err < 0) + goto err; + + if (cdb->cmis_rev < 4) { + ethnl_module_fw_flash_ntf_err(dev, ntf_params, + "CMIS revision doesn't support module firmware flashing", + NULL); + err = -EOPNOTSUPP; + goto err; + } + + err = cmis_cdb_advertisement_get(cdb, dev); + if (err < 0) + goto err; + + if (params->password_valid) { + err = cmis_cdb_validate_password(cdb, dev, params, ntf_params); + if (err < 0) + goto err; + } + + err = cmis_cdb_module_features_get(cdb, dev, ntf_params); + if (err < 0) + goto err; + + return cdb; + +err: + ethtool_cmis_cdb_fini(cdb); + return ERR_PTR(err); +} + +void ethtool_cmis_cdb_fini(struct ethtool_cmis_cdb *cdb) +{ + kfree(cdb); +} + +static bool is_completed(u8 data) +{ + return !!(data & 0x40); +} + +#define CMIS_CDB_STATUS_SUCCESS 0x01 + +static bool status_success(u8 data) +{ + return data == CMIS_CDB_STATUS_SUCCESS; +} + +#define CMIS_CDB_STATUS_FAIL 0x40 + +static bool status_fail(u8 data) +{ + return data & CMIS_CDB_STATUS_FAIL; +} + +struct cmis_wait_for_cond_rpl { + u8 state; +}; + +static int +ethtool_cmis_module_poll(struct net_device *dev, + struct cmis_wait_for_cond_rpl *rpl, u32 offset, + bool (*cond_success)(u8), bool (*cond_fail)(u8)) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + struct ethtool_module_eeprom page_data = {0}; + struct netlink_ext_ack extack = {}; + int err; + + ethtool_cmis_page_init(&page_data, 0, offset, sizeof(rpl)); + page_data.data = (u8 *)rpl; + + err = ops->get_module_eeprom_by_page(dev, &page_data, &extack); + if (err < 0) { + if (extack._msg) + netdev_err_once(dev, "%s\n", extack._msg); + return -EBUSY; + } + + if ((*cond_success)(rpl->state)) + return 0; + + if (*cond_fail && (*cond_fail)(rpl->state)) + return -EIO; + + return -EBUSY; +} + +int ethtool_cmis_wait_for_cond(struct net_device *dev, u8 flags, u8 flag, + u16 max_duration, u32 offset, + bool (*cond_success)(u8), bool (*cond_fail)(u8), + u8 *state) +{ + struct cmis_wait_for_cond_rpl rpl = {}; + unsigned long end; + int err; + + if (!(flags & flag)) + return 0; + + if (max_duration == 0) + max_duration = U16_MAX; + + end = jiffies + msecs_to_jiffies(max_duration); + do { + err = ethtool_cmis_module_poll(dev, &rpl, offset, cond_success, + cond_fail); + if (err != -EBUSY) + goto out; + + msleep(20); + } while (time_before(jiffies, end)); + + err = ethtool_cmis_module_poll(dev, &rpl, offset, cond_success, + cond_fail); + if (err == -EBUSY) + err = -ETIMEDOUT; + +out: + *state = rpl.state; + return err; +} + +#define CMIS_CDB_COMPLETION_FLAG_OFFSET 0x08 + +static int cmis_cdb_wait_for_completion(struct net_device *dev, + struct ethtool_cmis_cdb_cmd_args *args) +{ + u8 flag; + int err; + + /* Some vendors demand waiting time before checking completion flag + * in some CDB commands. + */ + msleep(args->msleep_pre_rpl); + + err = ethtool_cmis_wait_for_cond(dev, args->flags, + CDB_F_COMPLETION_VALID, + args->max_duration, + CMIS_CDB_COMPLETION_FLAG_OFFSET, + is_completed, NULL, &flag); + if (err < 0) + args->err_msg = "Completion Flag did not set on time"; + + return err; +} + +#define CMIS_CDB_STATUS_OFFSET 0x25 + +static void cmis_cdb_status_fail_msg_get(u8 status, char **err_msg) +{ + switch (status) { + case 0b10000001: + *err_msg = "CDB Status is in progress: Busy capturing command"; + break; + case 0b10000010: + *err_msg = + "CDB Status is in progress: Busy checking/validating command"; + break; + case 0b10000011: + *err_msg = "CDB Status is in progress: Busy executing"; + break; + case 0b01000000: + *err_msg = "CDB status failed: no specific failure"; + break; + case 0b01000010: + *err_msg = + "CDB status failed: Parameter range error or parameter not supported"; + break; + case 0b01000101: + *err_msg = "CDB status failed: CdbChkCode error"; + break; + default: + *err_msg = "Unknown failure reason"; + } +}; + +static int cmis_cdb_wait_for_status(struct net_device *dev, + struct ethtool_cmis_cdb_cmd_args *args) +{ + u8 status; + int err; + + /* Some vendors demand waiting time before checking status in some + * CDB commands. + */ + msleep(args->msleep_pre_rpl); + + err = ethtool_cmis_wait_for_cond(dev, args->flags, CDB_F_STATUS_VALID, + args->max_duration, + CMIS_CDB_STATUS_OFFSET, + status_success, status_fail, &status); + if (err < 0 && !args->err_msg) + cmis_cdb_status_fail_msg_get(status, &args->err_msg); + + return err; +} + +#define CMIS_CDB_REPLY_OFFSET 0x86 + +static int cmis_cdb_process_reply(struct net_device *dev, + struct ethtool_module_eeprom *page_data, + struct ethtool_cmis_cdb_cmd_args *args) +{ + u8 rpl_hdr_len = sizeof(struct ethtool_cmis_cdb_rpl_hdr); + u8 rpl_exp_len = args->rpl_exp_len + rpl_hdr_len; + const struct ethtool_ops *ops = dev->ethtool_ops; + struct netlink_ext_ack extack = {}; + struct ethtool_cmis_cdb_rpl *rpl; + int err; + + if (!args->rpl_exp_len) + return 0; + + ethtool_cmis_page_init(page_data, ETHTOOL_CMIS_CDB_CMD_PAGE, + CMIS_CDB_REPLY_OFFSET, rpl_exp_len); + page_data->data = kmalloc(page_data->length, GFP_KERNEL); + if (!page_data->data) + return -ENOMEM; + + err = ops->get_module_eeprom_by_page(dev, page_data, &extack); + if (err < 0) { + if (extack._msg) + netdev_err(dev, "%s\n", extack._msg); + goto out; + } + + rpl = (struct ethtool_cmis_cdb_rpl *)page_data->data; + if ((args->rpl_exp_len > rpl->hdr.rpl_len + rpl_hdr_len) || + !rpl->hdr.rpl_chk_code) { + err = -EIO; + goto out; + } + + args->req.lpl_len = rpl->hdr.rpl_len; + memcpy(args->req.payload, rpl->payload, args->req.lpl_len); + +out: + kfree(page_data->data); + return err; +} + +static int +__ethtool_cmis_cdb_execute_cmd(struct net_device *dev, + struct ethtool_module_eeprom *page_data, + u8 page, u32 offset, u32 length, void *data) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + struct netlink_ext_ack extack = {}; + int err; + + ethtool_cmis_page_init(page_data, page, offset, length); + page_data->data = kmemdup(data, page_data->length, GFP_KERNEL); + if (!page_data->data) + return -ENOMEM; + + err = ops->set_module_eeprom_by_page(dev, page_data, &extack); + if (err < 0) { + if (extack._msg) + netdev_err(dev, "%s\n", extack._msg); + } + + kfree(page_data->data); + return err; +} + +static u8 cmis_cdb_calc_checksum(const void *data, size_t size) +{ + const u8 *bytes = (const u8 *)data; + u8 checksum = 0; + + for (size_t i = 0; i < size; i++) + checksum += bytes[i]; + + return ~checksum; +} + +#define CMIS_CDB_CMD_ID_OFFSET 0x80 + +int ethtool_cmis_cdb_execute_cmd(struct net_device *dev, + struct ethtool_cmis_cdb_cmd_args *args) +{ + struct ethtool_module_eeprom page_data = {}; + u32 offset; + int err; + + args->req.chk_code = + cmis_cdb_calc_checksum(&args->req, sizeof(args->req)); + + if (args->req.lpl_len > args->read_write_len_ext) { + args->err_msg = "LPL length is longer than CDB read write length extension allows"; + return -EINVAL; + } + + /* According to the CMIS standard, there are two options to trigger the + * CDB commands. The default option is triggering the command by writing + * the CMDID bytes. Therefore, the command will be split to 2 calls: + * First, with everything except the CMDID field and then the CMDID + * field. + */ + offset = CMIS_CDB_CMD_ID_OFFSET + + offsetof(struct ethtool_cmis_cdb_request, body); + err = __ethtool_cmis_cdb_execute_cmd(dev, &page_data, + ETHTOOL_CMIS_CDB_CMD_PAGE, offset, + sizeof(args->req.body), + &args->req.body); + if (err < 0) + return err; + + offset = CMIS_CDB_CMD_ID_OFFSET + + offsetof(struct ethtool_cmis_cdb_request, id); + err = __ethtool_cmis_cdb_execute_cmd(dev, &page_data, + ETHTOOL_CMIS_CDB_CMD_PAGE, offset, + sizeof(args->req.id), + &args->req.id); + if (err < 0) + return err; + + err = cmis_cdb_wait_for_completion(dev, args); + if (err < 0) + return err; + + err = cmis_cdb_wait_for_status(dev, args); + if (err < 0) + return err; + + return cmis_cdb_process_reply(dev, &page_data, args); +} diff --git a/net/ethtool/module_fw.h b/net/ethtool/module_fw.h index ee4a291ac1d4..6c86d05ab6cf 100644 --- a/net/ethtool/module_fw.h +++ b/net/ethtool/module_fw.h @@ -15,6 +15,16 @@ struct ethnl_module_fw_flash_ntf_params { bool closed_sock; }; +/** + * struct ethtool_module_fw_flash_params - module firmware flashing parameters + * @password: Module password. Only valid when @pass_valid is set. + * @password_valid: Whether the module password is valid or not. + */ +struct ethtool_module_fw_flash_params { + __be32 password; + u8 password_valid:1; +}; + void ethnl_module_fw_flash_ntf_err(struct net_device *dev, struct ethnl_module_fw_flash_ntf_params *params, -- cgit v1.2.3-58-ga151 From c4f78134d45c9619339c96b4bea380b1d0699788 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Thu, 27 Jun 2024 17:08:55 +0300 Subject: ethtool: cmis_fw_update: add a layer for supporting firmware update using CDB According to the CMIS standard, the firmware update process is done using a CDB commands sequence. Implement a work that will be triggered from the module layer in the next patch the will initiate and execute all the CDB commands in order, to eventually complete the firmware update process. This flashing process includes, writing the firmware image, running the new firmware image and committing it after testing, so that it will run upon reset. This work will also notify user space about the progress of the firmware update process. Signed-off-by: Danielle Ratson Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- net/ethtool/Makefile | 2 +- net/ethtool/cmis.h | 7 + net/ethtool/cmis_fw_update.c | 399 +++++++++++++++++++++++++++++++++++++++++++ net/ethtool/module_fw.h | 31 ++++ 4 files changed, 438 insertions(+), 1 deletion(-) create mode 100644 net/ethtool/cmis_fw_update.c (limited to 'net') diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile index 38806b3ecf83..9a190635fe95 100644 --- a/net/ethtool/Makefile +++ b/net/ethtool/Makefile @@ -8,4 +8,4 @@ ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o rss.o \ linkstate.o debug.o wol.o features.o privflags.o rings.o \ channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \ tunnels.o fec.o eeprom.o stats.o phc_vclocks.o mm.o \ - module.o cmis_cdb.o pse-pd.o plca.o mm.o + module.o cmis_fw_update.o cmis_cdb.o pse-pd.o plca.o mm.o diff --git a/net/ethtool/cmis.h b/net/ethtool/cmis.h index 295f5d0df915..e71cc3e1b7eb 100644 --- a/net/ethtool/cmis.h +++ b/net/ethtool/cmis.h @@ -20,6 +20,12 @@ struct ethtool_cmis_cdb { enum ethtool_cmis_cdb_cmd_id { ETHTOOL_CMIS_CDB_CMD_QUERY_STATUS = 0x0000, ETHTOOL_CMIS_CDB_CMD_MODULE_FEATURES = 0x0040, + ETHTOOL_CMIS_CDB_CMD_FW_MANAGMENT_FEATURES = 0x0041, + ETHTOOL_CMIS_CDB_CMD_START_FW_DOWNLOAD = 0x0101, + ETHTOOL_CMIS_CDB_CMD_WRITE_FW_BLOCK_LPL = 0x0103, + ETHTOOL_CMIS_CDB_CMD_COMPLETE_FW_DOWNLOAD = 0x0107, + ETHTOOL_CMIS_CDB_CMD_RUN_FW_IMAGE = 0x0109, + ETHTOOL_CMIS_CDB_CMD_COMMIT_FW_IMAGE = 0x010A, }; /** @@ -47,6 +53,7 @@ struct ethtool_cmis_cdb_request { #define CDB_F_COMPLETION_VALID BIT(0) #define CDB_F_STATUS_VALID BIT(1) +#define CDB_F_MODULE_STATE_VALID BIT(2) /** * struct ethtool_cmis_cdb_cmd_args - CDB commands execution arguments diff --git a/net/ethtool/cmis_fw_update.c b/net/ethtool/cmis_fw_update.c new file mode 100644 index 000000000000..ae4b4b28a601 --- /dev/null +++ b/net/ethtool/cmis_fw_update.c @@ -0,0 +1,399 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include + +#include "common.h" +#include "module_fw.h" +#include "cmis.h" + +struct cmis_fw_update_fw_mng_features { + u8 start_cmd_payload_size; + u16 max_duration_start; + u16 max_duration_write; + u16 max_duration_complete; +}; + +/* See section 9.4.2 "CMD 0041h: Firmware Management Features" in CMIS standard + * revision 5.2. + * struct cmis_cdb_fw_mng_features_rpl is a structured layout of the flat + * array, ethtool_cmis_cdb_rpl::payload. + */ +struct cmis_cdb_fw_mng_features_rpl { + u8 resv1; + u8 resv2; + u8 start_cmd_payload_size; + u8 resv3; + u8 read_write_len_ext; + u8 write_mechanism; + u8 resv4; + u8 resv5; + __be16 max_duration_start; + __be16 resv6; + __be16 max_duration_write; + __be16 max_duration_complete; + __be16 resv7; +}; + +#define CMIS_CDB_FW_WRITE_MECHANISM_LPL 0x01 + +static int +cmis_fw_update_fw_mng_features_get(struct ethtool_cmis_cdb *cdb, + struct net_device *dev, + struct cmis_fw_update_fw_mng_features *fw_mng, + struct ethnl_module_fw_flash_ntf_params *ntf_params) +{ + struct ethtool_cmis_cdb_cmd_args args = {}; + struct cmis_cdb_fw_mng_features_rpl *rpl; + u8 flags = CDB_F_STATUS_VALID; + int err; + + ethtool_cmis_cdb_check_completion_flag(cdb->cmis_rev, &flags); + ethtool_cmis_cdb_compose_args(&args, + ETHTOOL_CMIS_CDB_CMD_FW_MANAGMENT_FEATURES, + NULL, 0, cdb->max_completion_time, + cdb->read_write_len_ext, 1000, + sizeof(*rpl), flags); + + err = ethtool_cmis_cdb_execute_cmd(dev, &args); + if (err < 0) { + ethnl_module_fw_flash_ntf_err(dev, ntf_params, + "FW Management Features command failed", + args.err_msg); + return err; + } + + rpl = (struct cmis_cdb_fw_mng_features_rpl *)args.req.payload; + if (!(rpl->write_mechanism == CMIS_CDB_FW_WRITE_MECHANISM_LPL)) { + ethnl_module_fw_flash_ntf_err(dev, ntf_params, + "Write LPL is not supported", + NULL); + return -EOPNOTSUPP; + } + + /* Above, we used read_write_len_ext that we got from CDB + * advertisement. Update it with the value that we got from module + * features query, which is specific for Firmware Management Commands + * (IDs 0100h-01FFh). + */ + cdb->read_write_len_ext = rpl->read_write_len_ext; + fw_mng->start_cmd_payload_size = rpl->start_cmd_payload_size; + fw_mng->max_duration_start = be16_to_cpu(rpl->max_duration_start); + fw_mng->max_duration_write = be16_to_cpu(rpl->max_duration_write); + fw_mng->max_duration_complete = be16_to_cpu(rpl->max_duration_complete); + + return 0; +} + +/* See section 9.7.2 "CMD 0101h: Start Firmware Download" in CMIS standard + * revision 5.2. + * struct cmis_cdb_start_fw_download_pl is a structured layout of the + * flat array, ethtool_cmis_cdb_request::payload. + */ +struct cmis_cdb_start_fw_download_pl { + __struct_group(cmis_cdb_start_fw_download_pl_h, head, /* no attrs */, + __be32 image_size; + __be32 resv1; + ); + u8 vendor_data[ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH - + sizeof(struct cmis_cdb_start_fw_download_pl_h)]; +}; + +static int +cmis_fw_update_start_download(struct ethtool_cmis_cdb *cdb, + struct ethtool_cmis_fw_update_params *fw_update, + struct cmis_fw_update_fw_mng_features *fw_mng) +{ + u8 vendor_data_size = fw_mng->start_cmd_payload_size; + struct cmis_cdb_start_fw_download_pl pl = {}; + struct ethtool_cmis_cdb_cmd_args args = {}; + u8 lpl_len; + int err; + + pl.image_size = cpu_to_be32(fw_update->fw->size); + memcpy(pl.vendor_data, fw_update->fw->data, vendor_data_size); + + lpl_len = offsetof(struct cmis_cdb_start_fw_download_pl, + vendor_data[vendor_data_size]); + + ethtool_cmis_cdb_compose_args(&args, + ETHTOOL_CMIS_CDB_CMD_START_FW_DOWNLOAD, + (u8 *)&pl, lpl_len, + fw_mng->max_duration_start, + cdb->read_write_len_ext, 1000, 0, + CDB_F_COMPLETION_VALID | CDB_F_STATUS_VALID); + + err = ethtool_cmis_cdb_execute_cmd(fw_update->dev, &args); + if (err < 0) + ethnl_module_fw_flash_ntf_err(fw_update->dev, + &fw_update->ntf_params, + "Start FW download command failed", + args.err_msg); + + return err; +} + +/* See section 9.7.4 "CMD 0103h: Write Firmware Block LPL" in CMIS standard + * revision 5.2. + * struct cmis_cdb_write_fw_block_lpl_pl is a structured layout of the + * flat array, ethtool_cmis_cdb_request::payload. + */ +struct cmis_cdb_write_fw_block_lpl_pl { + __be32 block_address; + u8 fw_block[ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH - sizeof(__be32)]; +}; + +static int +cmis_fw_update_write_image(struct ethtool_cmis_cdb *cdb, + struct ethtool_cmis_fw_update_params *fw_update, + struct cmis_fw_update_fw_mng_features *fw_mng) +{ + u8 start = fw_mng->start_cmd_payload_size; + u32 offset, max_block_size, max_lpl_len; + u32 image_size = fw_update->fw->size; + int err; + + max_lpl_len = min_t(u32, + ethtool_cmis_get_max_payload_size(cdb->read_write_len_ext), + ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH); + max_block_size = + max_lpl_len - sizeof_field(struct cmis_cdb_write_fw_block_lpl_pl, + block_address); + + for (offset = start; offset < image_size; offset += max_block_size) { + struct cmis_cdb_write_fw_block_lpl_pl pl = { + .block_address = cpu_to_be32(offset - start), + }; + struct ethtool_cmis_cdb_cmd_args args = {}; + u32 block_size, lpl_len; + + ethnl_module_fw_flash_ntf_in_progress(fw_update->dev, + &fw_update->ntf_params, + offset - start, + image_size); + block_size = min_t(u32, max_block_size, image_size - offset); + memcpy(pl.fw_block, &fw_update->fw->data[offset], block_size); + lpl_len = block_size + + sizeof_field(struct cmis_cdb_write_fw_block_lpl_pl, + block_address); + + ethtool_cmis_cdb_compose_args(&args, + ETHTOOL_CMIS_CDB_CMD_WRITE_FW_BLOCK_LPL, + (u8 *)&pl, lpl_len, + fw_mng->max_duration_write, + cdb->read_write_len_ext, 1, 0, + CDB_F_COMPLETION_VALID | CDB_F_STATUS_VALID); + + err = ethtool_cmis_cdb_execute_cmd(fw_update->dev, &args); + if (err < 0) { + ethnl_module_fw_flash_ntf_err(fw_update->dev, + &fw_update->ntf_params, + "Write FW block LPL command failed", + args.err_msg); + return err; + } + } + + return 0; +} + +static int +cmis_fw_update_complete_download(struct ethtool_cmis_cdb *cdb, + struct net_device *dev, + struct cmis_fw_update_fw_mng_features *fw_mng, + struct ethnl_module_fw_flash_ntf_params *ntf_params) +{ + struct ethtool_cmis_cdb_cmd_args args = {}; + int err; + + ethtool_cmis_cdb_compose_args(&args, + ETHTOOL_CMIS_CDB_CMD_COMPLETE_FW_DOWNLOAD, + NULL, 0, fw_mng->max_duration_complete, + cdb->read_write_len_ext, 1000, 0, + CDB_F_COMPLETION_VALID | CDB_F_STATUS_VALID); + + err = ethtool_cmis_cdb_execute_cmd(dev, &args); + if (err < 0) + ethnl_module_fw_flash_ntf_err(dev, ntf_params, + "Complete FW download command failed", + args.err_msg); + + return err; +} + +static int +cmis_fw_update_download_image(struct ethtool_cmis_cdb *cdb, + struct ethtool_cmis_fw_update_params *fw_update, + struct cmis_fw_update_fw_mng_features *fw_mng) +{ + int err; + + err = cmis_fw_update_start_download(cdb, fw_update, fw_mng); + if (err < 0) + return err; + + err = cmis_fw_update_write_image(cdb, fw_update, fw_mng); + if (err < 0) + return err; + + err = cmis_fw_update_complete_download(cdb, fw_update->dev, fw_mng, + &fw_update->ntf_params); + if (err < 0) + return err; + + return 0; +} + +enum { + CMIS_MODULE_LOW_PWR = 1, + CMIS_MODULE_READY = 3, +}; + +static bool module_is_ready(u8 data) +{ + u8 state = (data >> 1) & 7; + + return state == CMIS_MODULE_READY || state == CMIS_MODULE_LOW_PWR; +} + +#define CMIS_MODULE_READY_MAX_DURATION_MSEC 1000 +#define CMIS_MODULE_STATE_OFFSET 3 + +static int +cmis_fw_update_wait_for_module_state(struct net_device *dev, u8 flags) +{ + u8 state; + + return ethtool_cmis_wait_for_cond(dev, flags, CDB_F_MODULE_STATE_VALID, + CMIS_MODULE_READY_MAX_DURATION_MSEC, + CMIS_MODULE_STATE_OFFSET, + module_is_ready, NULL, &state); +} + +/* See section 9.7.10 "CMD 0109h: Run Firmware Image" in CMIS standard + * revision 5.2. + * struct cmis_cdb_run_fw_image_pl is a structured layout of the flat + * array, ethtool_cmis_cdb_request::payload. + */ +struct cmis_cdb_run_fw_image_pl { + u8 resv1; + u8 image_to_run; + u16 delay_to_reset; +}; + +static int +cmis_fw_update_run_image(struct ethtool_cmis_cdb *cdb, struct net_device *dev, + struct ethnl_module_fw_flash_ntf_params *ntf_params) +{ + struct ethtool_cmis_cdb_cmd_args args = {}; + struct cmis_cdb_run_fw_image_pl pl = {0}; + int err; + + ethtool_cmis_cdb_compose_args(&args, ETHTOOL_CMIS_CDB_CMD_RUN_FW_IMAGE, + (u8 *)&pl, sizeof(pl), + cdb->max_completion_time, + cdb->read_write_len_ext, 1000, 0, + CDB_F_MODULE_STATE_VALID); + + err = ethtool_cmis_cdb_execute_cmd(dev, &args); + if (err < 0) { + ethnl_module_fw_flash_ntf_err(dev, ntf_params, + "Run image command failed", + args.err_msg); + return err; + } + + err = cmis_fw_update_wait_for_module_state(dev, args.flags); + if (err < 0) + ethnl_module_fw_flash_ntf_err(dev, ntf_params, + "Module is not ready on time after reset", + NULL); + + return err; +} + +static int +cmis_fw_update_commit_image(struct ethtool_cmis_cdb *cdb, + struct net_device *dev, + struct ethnl_module_fw_flash_ntf_params *ntf_params) +{ + struct ethtool_cmis_cdb_cmd_args args = {}; + int err; + + ethtool_cmis_cdb_compose_args(&args, + ETHTOOL_CMIS_CDB_CMD_COMMIT_FW_IMAGE, + NULL, 0, cdb->max_completion_time, + cdb->read_write_len_ext, 1000, 0, + CDB_F_COMPLETION_VALID | CDB_F_STATUS_VALID); + + err = ethtool_cmis_cdb_execute_cmd(dev, &args); + if (err < 0) + ethnl_module_fw_flash_ntf_err(dev, ntf_params, + "Commit image command failed", + args.err_msg); + + return err; +} + +static int cmis_fw_update_reset(struct net_device *dev) +{ + __u32 reset_data = ETH_RESET_PHY; + + return dev->ethtool_ops->reset(dev, &reset_data); +} + +void +ethtool_cmis_fw_update(struct ethtool_cmis_fw_update_params *fw_update) +{ + struct ethnl_module_fw_flash_ntf_params *ntf_params = + &fw_update->ntf_params; + struct cmis_fw_update_fw_mng_features fw_mng = {0}; + struct net_device *dev = fw_update->dev; + struct ethtool_cmis_cdb *cdb; + int err; + + cdb = ethtool_cmis_cdb_init(dev, &fw_update->params, ntf_params); + if (IS_ERR(cdb)) + goto err_send_ntf; + + ethnl_module_fw_flash_ntf_start(dev, ntf_params); + + err = cmis_fw_update_fw_mng_features_get(cdb, dev, &fw_mng, ntf_params); + if (err < 0) + goto err_cdb_fini; + + err = cmis_fw_update_download_image(cdb, fw_update, &fw_mng); + if (err < 0) + goto err_cdb_fini; + + err = cmis_fw_update_run_image(cdb, dev, ntf_params); + if (err < 0) + goto err_cdb_fini; + + /* The CDB command "Run Firmware Image" resets the firmware, so the new + * one might have different settings. + * Free the old CDB instance, and init a new one. + */ + ethtool_cmis_cdb_fini(cdb); + + cdb = ethtool_cmis_cdb_init(dev, &fw_update->params, ntf_params); + if (IS_ERR(cdb)) + goto err_send_ntf; + + err = cmis_fw_update_commit_image(cdb, dev, ntf_params); + if (err < 0) + goto err_cdb_fini; + + err = cmis_fw_update_reset(dev); + if (err < 0) + goto err_cdb_fini; + + ethnl_module_fw_flash_ntf_complete(dev, ntf_params); + ethtool_cmis_cdb_fini(cdb); + return; + +err_cdb_fini: + ethtool_cmis_cdb_fini(cdb); +err_send_ntf: + ethnl_module_fw_flash_ntf_err(dev, ntf_params, NULL, NULL); +} diff --git a/net/ethtool/module_fw.h b/net/ethtool/module_fw.h index 6c86d05ab6cf..d0fc2529b60e 100644 --- a/net/ethtool/module_fw.h +++ b/net/ethtool/module_fw.h @@ -25,6 +25,35 @@ struct ethtool_module_fw_flash_params { u8 password_valid:1; }; +/** + * struct ethtool_cmis_fw_update_params - CMIS firmware update specific + * parameters + * @dev: Pointer to the net_device to be flashed. + * @params: Module firmware flashing parameters. + * @ntf_params: Module firmware flashing notification parameters. + * @fw: Firmware to flash. + */ +struct ethtool_cmis_fw_update_params { + struct net_device *dev; + struct ethtool_module_fw_flash_params params; + struct ethnl_module_fw_flash_ntf_params ntf_params; + const struct firmware *fw; +}; + +/** + * struct ethtool_module_fw_flash - module firmware flashing + * @list: List node for &module_fw_flash_work_list. + * @dev_tracker: Refcount tracker for @dev. + * @work: The flashing firmware work. + * @fw_update: CMIS firmware update specific parameters. + */ +struct ethtool_module_fw_flash { + struct list_head list; + netdevice_tracker dev_tracker; + struct work_struct work; + struct ethtool_cmis_fw_update_params fw_update; +}; + void ethnl_module_fw_flash_ntf_err(struct net_device *dev, struct ethnl_module_fw_flash_ntf_params *params, @@ -39,3 +68,5 @@ void ethnl_module_fw_flash_ntf_in_progress(struct net_device *dev, struct ethnl_module_fw_flash_ntf_params *params, u64 done, u64 total); + +void ethtool_cmis_fw_update(struct ethtool_cmis_fw_update_params *params); -- cgit v1.2.3-58-ga151 From 32b4c8b53ee7799e34a2b1634d32d3ce1e36c44e Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Thu, 27 Jun 2024 17:08:56 +0300 Subject: ethtool: Add ability to flash transceiver modules' firmware Add the ability to flash the modules' firmware by implementing the interface between the user space and the kernel. Example from a succeeding implementation: # ethtool --flash-module-firmware swp40 file test.bin Transceiver module firmware flashing started for device swp40 Transceiver module firmware flashing in progress for device swp40 Progress: 99% Transceiver module firmware flashing completed for device swp40 In addition, add infrastructure that allows modules to set socket-specific private data. This ensures that when a socket is closed from user space during the flashing process, the right socket halts sending notifications to user space until the work item is completed. Signed-off-by: Danielle Ratson Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- net/ethtool/module.c | 277 ++++++++++++++++++++++++++++++++++++++++++++++++ net/ethtool/module_fw.h | 3 + net/ethtool/netlink.c | 39 +++++++ net/ethtool/netlink.h | 15 +++ 4 files changed, 334 insertions(+) (limited to 'net') diff --git a/net/ethtool/module.c b/net/ethtool/module.c index ba728b4a38a1..6b7448df08d5 100644 --- a/net/ethtool/module.c +++ b/net/ethtool/module.c @@ -1,6 +1,9 @@ // SPDX-License-Identifier: GPL-2.0-only #include +#include +#include +#include #include "netlink.h" #include "common.h" @@ -34,6 +37,12 @@ static int module_get_power_mode(struct net_device *dev, if (!ops->get_module_power_mode) return 0; + if (dev->module_fw_flash_in_progress) { + NL_SET_ERR_MSG(extack, + "Module firmware flashing is in progress"); + return -EBUSY; + } + return ops->get_module_power_mode(dev, &data->power, extack); } @@ -110,6 +119,12 @@ ethnl_set_module_validate(struct ethnl_req_info *req_info, if (!tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY]) return 0; + if (req_info->dev->module_fw_flash_in_progress) { + NL_SET_ERR_MSG(info->extack, + "Module firmware flashing is in progress"); + return -EBUSY; + } + if (!ops->get_module_power_mode || !ops->set_module_power_mode) { NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY], @@ -160,6 +175,268 @@ const struct ethnl_request_ops ethnl_module_request_ops = { .set_ntf_cmd = ETHTOOL_MSG_MODULE_NTF, }; +/* MODULE_FW_FLASH_ACT */ + +const struct nla_policy +ethnl_module_fw_flash_act_policy[ETHTOOL_A_MODULE_FW_FLASH_PASSWORD + 1] = { + [ETHTOOL_A_MODULE_FW_FLASH_HEADER] = + NLA_POLICY_NESTED(ethnl_header_policy), + [ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME] = { .type = NLA_NUL_STRING }, + [ETHTOOL_A_MODULE_FW_FLASH_PASSWORD] = { .type = NLA_U32 }, +}; + +static LIST_HEAD(module_fw_flash_work_list); +static DEFINE_SPINLOCK(module_fw_flash_work_list_lock); + +static int +module_flash_fw_work_list_add(struct ethtool_module_fw_flash *module_fw, + struct genl_info *info) +{ + struct ethtool_module_fw_flash *work; + + /* First, check if already registered. */ + spin_lock(&module_fw_flash_work_list_lock); + list_for_each_entry(work, &module_fw_flash_work_list, list) { + if (work->fw_update.ntf_params.portid == info->snd_portid && + work->fw_update.dev == module_fw->fw_update.dev) { + spin_unlock(&module_fw_flash_work_list_lock); + return -EALREADY; + } + } + + list_add_tail(&module_fw->list, &module_fw_flash_work_list); + spin_unlock(&module_fw_flash_work_list_lock); + + return 0; +} + +static void module_flash_fw_work_list_del(struct list_head *list) +{ + spin_lock(&module_fw_flash_work_list_lock); + list_del(list); + spin_unlock(&module_fw_flash_work_list_lock); +} + +static void module_flash_fw_work(struct work_struct *work) +{ + struct ethtool_module_fw_flash *module_fw; + + module_fw = container_of(work, struct ethtool_module_fw_flash, work); + + ethtool_cmis_fw_update(&module_fw->fw_update); + + module_flash_fw_work_list_del(&module_fw->list); + module_fw->fw_update.dev->module_fw_flash_in_progress = false; + netdev_put(module_fw->fw_update.dev, &module_fw->dev_tracker); + release_firmware(module_fw->fw_update.fw); + kfree(module_fw); +} + +#define MODULE_EEPROM_PHYS_ID_PAGE 0 +#define MODULE_EEPROM_PHYS_ID_I2C_ADDR 0x50 + +static int module_flash_fw_work_init(struct ethtool_module_fw_flash *module_fw, + struct net_device *dev, + struct netlink_ext_ack *extack) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + struct ethtool_module_eeprom page_data = {}; + u8 phys_id; + int err; + + /* Fetch the SFF-8024 Identifier Value. For all supported standards, it + * is located at I2C address 0x50, byte 0. See section 4.1 in SFF-8024, + * revision 4.9. + */ + page_data.page = MODULE_EEPROM_PHYS_ID_PAGE; + page_data.offset = SFP_PHYS_ID; + page_data.length = sizeof(phys_id); + page_data.i2c_address = MODULE_EEPROM_PHYS_ID_I2C_ADDR; + page_data.data = &phys_id; + + err = ops->get_module_eeprom_by_page(dev, &page_data, extack); + if (err < 0) + return err; + + switch (phys_id) { + case SFF8024_ID_QSFP_DD: + case SFF8024_ID_OSFP: + case SFF8024_ID_DSFP: + case SFF8024_ID_QSFP_PLUS_CMIS: + case SFF8024_ID_SFP_DD_CMIS: + case SFF8024_ID_SFP_PLUS_CMIS: + INIT_WORK(&module_fw->work, module_flash_fw_work); + break; + default: + NL_SET_ERR_MSG(extack, + "Module type does not support firmware flashing"); + return -EOPNOTSUPP; + } + + return 0; +} + +void ethnl_module_fw_flash_sock_destroy(struct ethnl_sock_priv *sk_priv) +{ + struct ethtool_module_fw_flash *work; + + spin_lock(&module_fw_flash_work_list_lock); + list_for_each_entry(work, &module_fw_flash_work_list, list) { + if (work->fw_update.dev == sk_priv->dev && + work->fw_update.ntf_params.portid == sk_priv->portid) { + work->fw_update.ntf_params.closed_sock = true; + break; + } + } + spin_unlock(&module_fw_flash_work_list_lock); +} + +static int +module_flash_fw_schedule(struct net_device *dev, const char *file_name, + struct ethtool_module_fw_flash_params *params, + struct sk_buff *skb, struct genl_info *info) +{ + struct ethtool_cmis_fw_update_params *fw_update; + struct ethtool_module_fw_flash *module_fw; + int err; + + module_fw = kzalloc(sizeof(*module_fw), GFP_KERNEL); + if (!module_fw) + return -ENOMEM; + + fw_update = &module_fw->fw_update; + fw_update->params = *params; + err = request_firmware_direct(&fw_update->fw, + file_name, &dev->dev); + if (err) { + NL_SET_ERR_MSG(info->extack, + "Failed to request module firmware image"); + goto err_free; + } + + err = module_flash_fw_work_init(module_fw, dev, info->extack); + if (err < 0) + goto err_release_firmware; + + dev->module_fw_flash_in_progress = true; + netdev_hold(dev, &module_fw->dev_tracker, GFP_KERNEL); + fw_update->dev = dev; + fw_update->ntf_params.portid = info->snd_portid; + fw_update->ntf_params.seq = info->snd_seq; + fw_update->ntf_params.closed_sock = false; + + err = ethnl_sock_priv_set(skb, dev, fw_update->ntf_params.portid, + ETHTOOL_SOCK_TYPE_MODULE_FW_FLASH); + if (err < 0) + goto err_release_firmware; + + err = module_flash_fw_work_list_add(module_fw, info); + if (err < 0) + goto err_release_firmware; + + schedule_work(&module_fw->work); + + return 0; + +err_release_firmware: + release_firmware(fw_update->fw); +err_free: + kfree(module_fw); + return err; +} + +static int module_flash_fw(struct net_device *dev, struct nlattr **tb, + struct sk_buff *skb, struct genl_info *info) +{ + struct ethtool_module_fw_flash_params params = {}; + const char *file_name; + struct nlattr *attr; + + if (GENL_REQ_ATTR_CHECK(info, ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME)) + return -EINVAL; + + file_name = nla_data(tb[ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME]); + + attr = tb[ETHTOOL_A_MODULE_FW_FLASH_PASSWORD]; + if (attr) { + params.password = cpu_to_be32(nla_get_u32(attr)); + params.password_valid = true; + } + + return module_flash_fw_schedule(dev, file_name, ¶ms, skb, info); +} + +static int ethnl_module_fw_flash_validate(struct net_device *dev, + struct netlink_ext_ack *extack) +{ + struct devlink_port *devlink_port = dev->devlink_port; + const struct ethtool_ops *ops = dev->ethtool_ops; + + if (!ops->set_module_eeprom_by_page || + !ops->get_module_eeprom_by_page) { + NL_SET_ERR_MSG(extack, + "Flashing module firmware is not supported by this device"); + return -EOPNOTSUPP; + } + + if (!ops->reset) { + NL_SET_ERR_MSG(extack, + "Reset module is not supported by this device, so flashing is not permitted"); + return -EOPNOTSUPP; + } + + if (dev->module_fw_flash_in_progress) { + NL_SET_ERR_MSG(extack, "Module firmware flashing already in progress"); + return -EBUSY; + } + + if (dev->flags & IFF_UP) { + NL_SET_ERR_MSG(extack, "Netdevice is up, so flashing is not permitted"); + return -EBUSY; + } + + if (devlink_port && devlink_port->attrs.split) { + NL_SET_ERR_MSG(extack, "Can't perform firmware flashing on a split port"); + return -EOPNOTSUPP; + } + + return 0; +} + +int ethnl_act_module_fw_flash(struct sk_buff *skb, struct genl_info *info) +{ + struct ethnl_req_info req_info = {}; + struct nlattr **tb = info->attrs; + struct net_device *dev; + int ret; + + ret = ethnl_parse_header_dev_get(&req_info, + tb[ETHTOOL_A_MODULE_FW_FLASH_HEADER], + genl_info_net(info), info->extack, + true); + if (ret < 0) + return ret; + dev = req_info.dev; + + rtnl_lock(); + ret = ethnl_ops_begin(dev); + if (ret < 0) + goto out_rtnl; + + ret = ethnl_module_fw_flash_validate(dev, info->extack); + if (ret < 0) + goto out_rtnl; + + ret = module_flash_fw(dev, tb, skb, info); + + ethnl_ops_complete(dev); + +out_rtnl: + rtnl_unlock(); + ethnl_parse_header_dev_put(&req_info); + return ret; +} + /* MODULE_FW_FLASH_NTF */ static int diff --git a/net/ethtool/module_fw.h b/net/ethtool/module_fw.h index d0fc2529b60e..634543a12d0c 100644 --- a/net/ethtool/module_fw.h +++ b/net/ethtool/module_fw.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ #include +#include "netlink.h" /** * struct ethnl_module_fw_flash_ntf_params - module firmware flashing @@ -54,6 +55,8 @@ struct ethtool_module_fw_flash { struct ethtool_cmis_fw_update_params fw_update; }; +void ethnl_module_fw_flash_sock_destroy(struct ethnl_sock_priv *sk_priv); + void ethnl_module_fw_flash_ntf_err(struct net_device *dev, struct ethnl_module_fw_flash_ntf_params *params, diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index a5907bbde427..81fe2e5b95f6 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -4,6 +4,7 @@ #include #include #include "netlink.h" +#include "module_fw.h" static struct genl_family ethtool_genl_family; @@ -30,6 +31,35 @@ const struct nla_policy ethnl_header_policy_stats[] = { ETHTOOL_FLAGS_STATS), }; +int ethnl_sock_priv_set(struct sk_buff *skb, struct net_device *dev, u32 portid, + enum ethnl_sock_type type) +{ + struct ethnl_sock_priv *sk_priv; + + sk_priv = genl_sk_priv_get(ðtool_genl_family, NETLINK_CB(skb).sk); + if (IS_ERR(sk_priv)) + return PTR_ERR(sk_priv); + + sk_priv->dev = dev; + sk_priv->portid = portid; + sk_priv->type = type; + + return 0; +} + +static void ethnl_sock_priv_destroy(void *priv) +{ + struct ethnl_sock_priv *sk_priv = priv; + + switch (sk_priv->type) { + case ETHTOOL_SOCK_TYPE_MODULE_FW_FLASH: + ethnl_module_fw_flash_sock_destroy(sk_priv); + break; + default: + break; + } +} + int ethnl_ops_begin(struct net_device *dev) { int ret; @@ -1142,6 +1172,13 @@ static const struct genl_ops ethtool_genl_ops[] = { .policy = ethnl_mm_set_policy, .maxattr = ARRAY_SIZE(ethnl_mm_set_policy) - 1, }, + { + .cmd = ETHTOOL_MSG_MODULE_FW_FLASH_ACT, + .flags = GENL_UNS_ADMIN_PERM, + .doit = ethnl_act_module_fw_flash, + .policy = ethnl_module_fw_flash_act_policy, + .maxattr = ARRAY_SIZE(ethnl_module_fw_flash_act_policy) - 1, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { @@ -1158,6 +1195,8 @@ static struct genl_family ethtool_genl_family __ro_after_init = { .resv_start_op = ETHTOOL_MSG_MODULE_GET + 1, .mcgrps = ethtool_nl_mcgrps, .n_mcgrps = ARRAY_SIZE(ethtool_nl_mcgrps), + .sock_priv_size = sizeof(struct ethnl_sock_priv), + .sock_priv_destroy = ethnl_sock_priv_destroy, }; /* module setup */ diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 5e6c6a7b7adc..46ec273a87c5 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -284,6 +284,19 @@ struct ethnl_reply_data { int ethnl_ops_begin(struct net_device *dev); void ethnl_ops_complete(struct net_device *dev); +enum ethnl_sock_type { + ETHTOOL_SOCK_TYPE_MODULE_FW_FLASH, +}; + +struct ethnl_sock_priv { + struct net_device *dev; + u32 portid; + enum ethnl_sock_type type; +}; + +int ethnl_sock_priv_set(struct sk_buff *skb, struct net_device *dev, u32 portid, + enum ethnl_sock_type type); + /** * struct ethnl_request_ops - unified handling of GET and SET requests * @request_cmd: command id for request (GET) @@ -442,6 +455,7 @@ extern const struct nla_policy ethnl_plca_set_cfg_policy[ETHTOOL_A_PLCA_MAX + 1] extern const struct nla_policy ethnl_plca_get_status_policy[ETHTOOL_A_PLCA_HEADER + 1]; extern const struct nla_policy ethnl_mm_get_policy[ETHTOOL_A_MM_HEADER + 1]; extern const struct nla_policy ethnl_mm_set_policy[ETHTOOL_A_MM_MAX + 1]; +extern const struct nla_policy ethnl_module_fw_flash_act_policy[ETHTOOL_A_MODULE_FW_FLASH_PASSWORD + 1]; int ethnl_set_features(struct sk_buff *skb, struct genl_info *info); int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info); @@ -449,6 +463,7 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info); int ethnl_tunnel_info_doit(struct sk_buff *skb, struct genl_info *info); int ethnl_tunnel_info_start(struct netlink_callback *cb); int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int ethnl_act_module_fw_flash(struct sk_buff *skb, struct genl_info *info); extern const char stats_std_names[__ETHTOOL_STATS_CNT][ETH_GSTRING_LEN]; extern const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING_LEN]; -- cgit v1.2.3-58-ga151 From f4ebd03496f6b67940b0af92ce885c1d0dc9e121 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 27 Jun 2024 00:35:05 +0200 Subject: netfilter: xt_recent: Lift restrictions on max hitcount value Support tracking of up to 65535 packets per table entry instead of just 255 to better facilitate longer term tracking or higher throughput scenarios. Note how this aligns sizes of struct recent_entry's 'nstamps' and 'index' fields when 'nstamps' was larger before. This is unnecessary as the value of 'nstamps' grows along with that of 'index' after being initialized to 1 (see recent_entry_update()). Its value will thus never exceed that of 'index' and therefore does not need to provide space for larger values. Requested-by: Fabio Link: https://bugzilla.netfilter.org/show_bug.cgi?id=1745 Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_recent.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index ef93e0d3bee0..588a5e6ad899 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -59,9 +59,9 @@ MODULE_PARM_DESC(ip_list_gid, "default owning group of /proc/net/xt_recent/* fil /* retained for backwards compatibility */ static unsigned int ip_pkt_list_tot __read_mostly; module_param(ip_pkt_list_tot, uint, 0400); -MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP address to remember (max. 255)"); +MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP address to remember (max. 65535)"); -#define XT_RECENT_MAX_NSTAMPS 256 +#define XT_RECENT_MAX_NSTAMPS 65536 struct recent_entry { struct list_head list; @@ -69,7 +69,7 @@ struct recent_entry { union nf_inet_addr addr; u_int16_t family; u_int8_t ttl; - u_int8_t index; + u_int16_t index; u_int16_t nstamps; unsigned long stamps[]; }; @@ -80,7 +80,7 @@ struct recent_table { union nf_inet_addr mask; unsigned int refcnt; unsigned int entries; - u8 nstamps_max_mask; + u_int16_t nstamps_max_mask; struct list_head lru_list; struct list_head iphash[]; }; -- cgit v1.2.3-58-ga151 From 10154dbded6d6a2fecaebdfda206609de0f121a9 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 26 Jun 2024 19:51:26 +0200 Subject: udp: Allow GSO transmit from devices with no checksum offload Today sending a UDP GSO packet from a TUN device results in an EIO error: import fcntl, os, struct from socket import * TUNSETIFF = 0x400454CA IFF_TUN = 0x0001 IFF_NO_PI = 0x1000 UDP_SEGMENT = 103 tun_fd = os.open("/dev/net/tun", os.O_RDWR) ifr = struct.pack("16sH", b"tun0", IFF_TUN | IFF_NO_PI) fcntl.ioctl(tun_fd, TUNSETIFF, ifr) os.system("ip addr add 192.0.2.1/24 dev tun0") os.system("ip link set dev tun0 up") s = socket(AF_INET, SOCK_DGRAM) s.setsockopt(SOL_UDP, UDP_SEGMENT, 1200) s.sendto(b"x" * 3000, ("192.0.2.2", 9)) # EIO This is due to a check in the udp stack if the egress device offers checksum offload. While TUN/TAP devices, by default, don't advertise this capability because it requires support from the TUN/TAP reader. However, the GSO stack has a software fallback for checksum calculation, which we can use. This way we don't force UDP_SEGMENT users to handle the EIO error and implement a segmentation fallback. Lift the restriction so that UDP_SEGMENT can be used with any egress device. We also need to adjust the UDP GSO code to match the GSO stack expectation about ip_summed field, as set in commit 8d63bee643f1 ("net: avoid skb_warn_bad_offload false positives on UFO"). Otherwise we will hit the bad offload check. Users should, however, expect a potential performance impact when batch-sending packets with UDP_SEGMENT without checksum offload on the egress device. In such case the packet payload is read twice: first during the sendmsg syscall when copying data from user memory, and then in the GSO stack for checksum computation. This double memory read can be less efficient than a regular sendmsg where the checksum is calculated during the initial data copy from user memory. Signed-off-by: Jakub Sitnicki Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20240626-linux-udpgso-v2-1-422dfcbd6b48@cloudflare.com Signed-off-by: Jakub Kicinski --- net/ipv4/udp.c | 3 +-- net/ipv4/udp_offload.c | 8 ++++++++ net/ipv6/udp.c | 3 +-- 3 files changed, 10 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d08bf16d476d..ed97df6af14d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -938,8 +938,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4, kfree_skb(skb); return -EINVAL; } - if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite || - dst_xfrm(skb_dst(skb))) { + if (is_udplite || dst_xfrm(skb_dst(skb))) { kfree_skb(skb); return -EIO; } diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 59448a2dbf2c..aa2e0a28ca61 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -357,6 +357,14 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, else uh->check = gso_make_checksum(seg, ~check) ? : CSUM_MANGLED_0; + /* On the TX path, CHECKSUM_NONE and CHECKSUM_UNNECESSARY have the same + * meaning. However, check for bad offloads in the GSO stack expects the + * latter, if the checksum was calculated in software. To vouch for the + * segment skbs we actually need to set it on the gso_skb. + */ + if (gso_skb->ip_summed == CHECKSUM_NONE) + gso_skb->ip_summed = CHECKSUM_UNNECESSARY; + /* update refcount for the packet */ if (copy_dtor) { int delta = sum_truesize - gso_skb->truesize; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b56f0b9f4307..b5456394cc67 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1257,8 +1257,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, kfree_skb(skb); return -EINVAL; } - if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite || - dst_xfrm(skb_dst(skb))) { + if (is_udplite || dst_xfrm(skb_dst(skb))) { kfree_skb(skb); return -EIO; } -- cgit v1.2.3-58-ga151 From 3ebbd9f6de7ec6d538639ebb657246f629ace81e Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 27 Jun 2024 16:33:46 +0100 Subject: net: move ethtool-related netdev state into its own struct net_dev->ethtool is a pointer to new struct ethtool_netdev_state, which currently contains only the wol_enabled field. Suggested-by: Jakub Kicinski Signed-off-by: Edward Cree Reviewed-by: Przemek Kitszel Link: https://patch.msgid.link/293a562278371de7534ed1eb17531838ca090633.1719502239.git.ecree.xilinx@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 4 ++-- drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c | 4 ++-- drivers/net/ethernet/wangxun/ngbe/ngbe_main.c | 2 +- drivers/net/phy/phy.c | 2 +- drivers/net/phy/phy_device.c | 5 +++-- drivers/net/phy/phylink.c | 2 +- include/linux/ethtool.h | 8 ++++++++ include/linux/netdevice.h | 8 +++++--- net/core/dev.c | 4 ++++ net/ethtool/ioctl.c | 2 +- net/ethtool/wol.c | 2 +- 11 files changed, 29 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 9246ea2118ff..714d2e804694 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -1608,7 +1608,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts) if (!tp->dash_enabled) { rtl_set_d3_pll_down(tp, !wolopts); - tp->dev->wol_enabled = wolopts ? 1 : 0; + tp->dev->ethtool->wol_enabled = wolopts ? 1 : 0; } } @@ -5478,7 +5478,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) rtl_set_d3_pll_down(tp, true); } else { rtl_set_d3_pll_down(tp, false); - dev->wol_enabled = 1; + dev->ethtool->wol_enabled = 1; } jumbo_max = rtl_jumbo_max(tp); diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c index 46a5a3e95202..e868f7ef4920 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c @@ -37,9 +37,9 @@ static int ngbe_set_wol(struct net_device *netdev, wx->wol = 0; if (wol->wolopts & WAKE_MAGIC) wx->wol = WX_PSR_WKUP_CTL_MAG; - netdev->wol_enabled = !!(wx->wol); + netdev->ethtool->wol_enabled = !!(wx->wol); wr32(wx, WX_PSR_WKUP_CTL, wx->wol); - device_set_wakeup_enable(&pdev->dev, netdev->wol_enabled); + device_set_wakeup_enable(&pdev->dev, netdev->ethtool->wol_enabled); return 0; } diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c index e894e01d030d..a8119de60deb 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c @@ -650,7 +650,7 @@ static int ngbe_probe(struct pci_dev *pdev, if (wx->wol_hw_supported) wx->wol = NGBE_PSR_WKUP_CTL_MAG; - netdev->wol_enabled = !!(wx->wol); + netdev->ethtool->wol_enabled = !!(wx->wol); wr32(wx, NGBE_PSR_WKUP_CTL, wx->wol); device_set_wakeup_enable(&pdev->dev, wx->wol); diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index c4236564c1cd..785182fa5fe0 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -1309,7 +1309,7 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat) if (netdev) { struct device *parent = netdev->dev.parent; - if (netdev->wol_enabled) + if (netdev->ethtool->wol_enabled) pm_system_wakeup(); else if (device_may_wakeup(&netdev->dev)) pm_wakeup_dev_event(&netdev->dev, 0, true); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 6c6ec9475709..473cbc1d497b 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -296,7 +296,7 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) if (!netdev) goto out; - if (netdev->wol_enabled) + if (netdev->ethtool->wol_enabled) return false; /* As long as not all affected network drivers support the @@ -1984,7 +1984,8 @@ int phy_suspend(struct phy_device *phydev) return 0; phy_ethtool_get_wol(phydev, &wol); - phydev->wol_enabled = wol.wolopts || (netdev && netdev->wol_enabled); + phydev->wol_enabled = wol.wolopts || + (netdev && netdev->ethtool->wol_enabled); /* If the device has WOL enabled, we cannot suspend the PHY */ if (phydev->wol_enabled && !(phydrv->flags & PHY_ALWAYS_CALL_SUSPEND)) return -EBUSY; diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 6c24c48dcf0f..51c526d227fa 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -2282,7 +2282,7 @@ void phylink_suspend(struct phylink *pl, bool mac_wol) { ASSERT_RTNL(); - if (mac_wol && (!pl->netdev || pl->netdev->wol_enabled)) { + if (mac_wol && (!pl->netdev || pl->netdev->ethtool->wol_enabled)) { /* Wake-on-Lan enabled, MAC handling */ mutex_lock(&pl->state_mutex); diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index c7f6f2bc9cac..374639e661d1 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -1004,6 +1004,14 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev, const struct ethtool_link_ksettings *cmd, u32 *dev_speed, u8 *dev_duplex); +/** + * struct ethtool_netdev_state - per-netdevice state for ethtool features + * @wol_enabled: Wake-on-LAN is enabled + */ +struct ethtool_netdev_state { + unsigned wol_enabled:1; +}; + struct phy_device; struct phy_tdr_config; struct phy_plca_cfg; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1e3401093c13..3c719f0d5f5a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -80,6 +80,7 @@ struct xdp_buff; struct xdp_frame; struct xdp_metadata_ops; struct xdp_md; +struct ethtool_netdev_state; typedef u32 xdp_features_t; @@ -1986,8 +1987,6 @@ enum netdev_reg_state { * switch driver and used to set the phys state of the * switch port. * - * @wol_enabled: Wake-on-LAN is enabled - * * @threaded: napi threaded mode is enabled * * @module_fw_flash_in_progress: Module firmware flashing is in progress. @@ -2001,6 +2000,7 @@ enum netdev_reg_state { * @udp_tunnel_nic_info: static structure describing the UDP tunnel * offload capabilities of the device * @udp_tunnel_nic: UDP tunnel offload state + * @ethtool: ethtool related state * @xdp_state: stores info on attached XDP BPF programs * * @nested_level: Used as a parameter of spin_lock_nested() of @@ -2375,7 +2375,7 @@ struct net_device { struct lock_class_key *qdisc_tx_busylock; bool proto_down; bool threaded; - unsigned wol_enabled:1; + unsigned module_fw_flash_in_progress:1; struct list_head net_notifier_list; @@ -2386,6 +2386,8 @@ struct net_device { const struct udp_tunnel_nic_info *udp_tunnel_nic_info; struct udp_tunnel_nic *udp_tunnel_nic; + struct ethtool_netdev_state *ethtool; + /* protected by rtnl_lock */ struct bpf_xdp_entity xdp_state[__MAX_XDP_MODE]; diff --git a/net/core/dev.c b/net/core/dev.c index 0a23d7da7fbc..c0c6e200e17f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11116,6 +11116,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->real_num_rx_queues = rxqs; if (netif_alloc_rx_queues(dev)) goto free_all; + dev->ethtool = kzalloc(sizeof(*dev->ethtool), GFP_KERNEL_ACCOUNT); + if (!dev->ethtool) + goto free_all; strcpy(dev->name, name); dev->name_assign_type = name_assign_type; @@ -11166,6 +11169,7 @@ void free_netdev(struct net_device *dev) return; } + kfree(dev->ethtool); netif_free_tx_queues(dev); netif_free_rx_queues(dev); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 1cca372c0d80..94059ce9e5f2 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1509,7 +1509,7 @@ static int ethtool_set_wol(struct net_device *dev, char __user *useraddr) if (ret) return ret; - dev->wol_enabled = !!wol.wolopts; + dev->ethtool->wol_enabled = !!wol.wolopts; ethtool_notify(dev, ETHTOOL_MSG_WOL_NTF, NULL); return 0; diff --git a/net/ethtool/wol.c b/net/ethtool/wol.c index 0ed56c9ac1bc..a39d8000d808 100644 --- a/net/ethtool/wol.c +++ b/net/ethtool/wol.c @@ -137,7 +137,7 @@ ethnl_set_wol(struct ethnl_req_info *req_info, struct genl_info *info) ret = dev->ethtool_ops->set_wol(dev, &wol); if (ret) return ret; - dev->wol_enabled = !!wol.wolopts; + dev->ethtool->wol_enabled = !!wol.wolopts; return 1; } -- cgit v1.2.3-58-ga151 From 6ad2962f8adfd53fca52dce7f830783e95d99ce7 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 27 Jun 2024 16:33:47 +0100 Subject: net: ethtool: attach an XArray of custom RSS contexts to a netdevice Each context stores the RXFH settings (indir, key, and hfunc) as well as optionally some driver private data. Delete any still-existing contexts at netdev unregister time. Signed-off-by: Edward Cree Reviewed-by: Przemek Kitszel Link: https://patch.msgid.link/cbd1c402cec38f2e03124f2ab65b4ae4e08bd90d.1719502240.git.ecree.xilinx@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 42 ++++++++++++++++++++++++++++++++++++++++++ net/core/dev.c | 27 +++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) (limited to 'net') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 374639e661d1..c741d6403364 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -159,6 +159,46 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings) return index % n_rx_rings; } +/** + * struct ethtool_rxfh_context - a custom RSS context configuration + * @indir_size: Number of u32 entries in indirection table + * @key_size: Size of hash key, in bytes + * @priv_size: Size of driver private data, in bytes + * @hfunc: RSS hash function identifier. One of the %ETH_RSS_HASH_* + * @input_xfrm: Defines how the input data is transformed. Valid values are one + * of %RXH_XFRM_*. + * @indir_configured: indir has been specified (at create time or subsequently) + * @key_configured: hkey has been specified (at create time or subsequently) + */ +struct ethtool_rxfh_context { + u32 indir_size; + u32 key_size; + u16 priv_size; + u8 hfunc; + u8 input_xfrm; + u8 indir_configured:1; + u8 key_configured:1; + /* private: driver private data, indirection table, and hash key are + * stored sequentially in @data area. Use below helpers to access. + */ + u8 data[] __aligned(sizeof(void *)); +}; + +static inline void *ethtool_rxfh_context_priv(struct ethtool_rxfh_context *ctx) +{ + return ctx->data; +} + +static inline u32 *ethtool_rxfh_context_indir(struct ethtool_rxfh_context *ctx) +{ + return (u32 *)(ctx->data + ALIGN(ctx->priv_size, sizeof(u32))); +} + +static inline u8 *ethtool_rxfh_context_key(struct ethtool_rxfh_context *ctx) +{ + return (u8 *)(ethtool_rxfh_context_indir(ctx) + ctx->indir_size); +} + /* declare a link mode bitmap */ #define __ETHTOOL_DECLARE_LINK_MODE_MASK(name) \ DECLARE_BITMAP(name, __ETHTOOL_LINK_MODE_MASK_NBITS) @@ -1006,9 +1046,11 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev, /** * struct ethtool_netdev_state - per-netdevice state for ethtool features + * @rss_ctx: XArray of custom RSS contexts * @wol_enabled: Wake-on-LAN is enabled */ struct ethtool_netdev_state { + struct xarray rss_ctx; unsigned wol_enabled:1; }; diff --git a/net/core/dev.c b/net/core/dev.c index c0c6e200e17f..bce810f8ae53 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10336,6 +10336,9 @@ int register_netdevice(struct net_device *dev) if (ret) return ret; + /* rss ctx ID 0 is reserved for the default context, start from 1 */ + xa_init_flags(&dev->ethtool->rss_ctx, XA_FLAGS_ALLOC1); + spin_lock_init(&dev->addr_list_lock); netdev_set_addr_lockdep_class(dev); @@ -11235,6 +11238,28 @@ void synchronize_net(void) } EXPORT_SYMBOL(synchronize_net); +static void netdev_rss_contexts_free(struct net_device *dev) +{ + struct ethtool_rxfh_context *ctx; + unsigned long context; + + xa_for_each(&dev->ethtool->rss_ctx, context, ctx) { + struct ethtool_rxfh_param rxfh; + + rxfh.indir = ethtool_rxfh_context_indir(ctx); + rxfh.key = ethtool_rxfh_context_key(ctx); + rxfh.hfunc = ctx->hfunc; + rxfh.input_xfrm = ctx->input_xfrm; + rxfh.rss_context = context; + rxfh.rss_delete = true; + + xa_erase(&dev->ethtool->rss_ctx, context); + dev->ethtool_ops->set_rxfh(dev, &rxfh, NULL); + kfree(ctx); + } + xa_destroy(&dev->ethtool->rss_ctx); +} + /** * unregister_netdevice_queue - remove device from the kernel * @dev: device @@ -11338,6 +11363,8 @@ void unregister_netdevice_many_notify(struct list_head *head, netdev_name_node_alt_flush(dev); netdev_name_node_free(dev->name_node); + netdev_rss_contexts_free(dev); + call_netdevice_notifiers(NETDEV_PRE_UNINIT, dev); if (dev->netdev_ops->ndo_uninit) -- cgit v1.2.3-58-ga151 From eac9122f0c41b832065e01977c34946ec8e76c24 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 27 Jun 2024 16:33:48 +0100 Subject: net: ethtool: record custom RSS contexts in the XArray Since drivers are still choosing the context IDs, we have to force the XArray to use the ID they've chosen rather than picking one ourselves, and handle the case where they give us an ID that's already in use. Signed-off-by: Edward Cree Reviewed-by: Przemek Kitszel Link: https://patch.msgid.link/801f5faa4cec87c65b2c6e27fb220c944bce593a.1719502240.git.ecree.xilinx@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 14 ++++++++++ net/ethtool/ioctl.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 87 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index c741d6403364..43a2a143034f 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -199,6 +199,17 @@ static inline u8 *ethtool_rxfh_context_key(struct ethtool_rxfh_context *ctx) return (u8 *)(ethtool_rxfh_context_indir(ctx) + ctx->indir_size); } +static inline size_t ethtool_rxfh_context_size(u32 indir_size, u32 key_size, + u16 priv_size) +{ + size_t indir_bytes = array_size(indir_size, sizeof(u32)); + size_t flex_len; + + flex_len = size_add(size_add(indir_bytes, key_size), + ALIGN(priv_size, sizeof(u32))); + return struct_size_t(struct ethtool_rxfh_context, data, flex_len); +} + /* declare a link mode bitmap */ #define __ETHTOOL_DECLARE_LINK_MODE_MASK(name) \ DECLARE_BITMAP(name, __ETHTOOL_LINK_MODE_MASK_NBITS) @@ -710,6 +721,8 @@ struct ethtool_rxfh_param { * contexts. * @cap_rss_sym_xor_supported: indicates if the driver supports symmetric-xor * RSS. + * @rxfh_priv_size: size of the driver private data area the core should + * allocate for an RSS context (in &struct ethtool_rxfh_context). * @supported_coalesce_params: supported types of interrupt coalescing. * @supported_ring_params: supported ring params. * @get_drvinfo: Report driver/device information. Modern drivers no @@ -895,6 +908,7 @@ struct ethtool_ops { u32 cap_link_lanes_supported:1; u32 cap_rss_ctx_supported:1; u32 cap_rss_sym_xor_supported:1; + u16 rxfh_priv_size; u32 supported_coalesce_params; u32 supported_ring_params; void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 94059ce9e5f2..e901a20e97f5 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1281,10 +1281,12 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, const struct ethtool_ops *ops = dev->ethtool_ops; u32 dev_indir_size = 0, dev_key_size = 0, i; struct ethtool_rxfh_param rxfh_dev = {}; + struct ethtool_rxfh_context *ctx = NULL; struct netlink_ext_ack *extack = NULL; struct ethtool_rxnfc rx_rings; struct ethtool_rxfh rxfh; u32 indir_bytes = 0; + bool create = false; u8 *rss_config; int ret; @@ -1312,6 +1314,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, if ((rxfh.input_xfrm & RXH_XFRM_SYM_XOR) && !ops->cap_rss_sym_xor_supported) return -EOPNOTSUPP; + create = rxfh.rss_context == ETH_RXFH_CONTEXT_ALLOC; /* If either indir, hash key or function is valid, proceed further. * Must request at least one change: indir size, hash key, function @@ -1377,13 +1380,42 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, } } + if (create) { + if (rxfh_dev.rss_delete) { + ret = -EINVAL; + goto out; + } + ctx = kzalloc(ethtool_rxfh_context_size(dev_indir_size, + dev_key_size, + ops->rxfh_priv_size), + GFP_KERNEL_ACCOUNT); + if (!ctx) { + ret = -ENOMEM; + goto out; + } + ctx->indir_size = dev_indir_size; + ctx->key_size = dev_key_size; + ctx->hfunc = rxfh.hfunc; + ctx->input_xfrm = rxfh.input_xfrm; + ctx->priv_size = ops->rxfh_priv_size; + } else if (rxfh.rss_context) { + ctx = xa_load(&dev->ethtool->rss_ctx, rxfh.rss_context); + if (!ctx) { + ret = -ENOENT; + goto out; + } + } rxfh_dev.hfunc = rxfh.hfunc; rxfh_dev.rss_context = rxfh.rss_context; rxfh_dev.input_xfrm = rxfh.input_xfrm; ret = ops->set_rxfh(dev, &rxfh_dev, extack); - if (ret) + if (ret) { + if (create) + /* failed to create, free our new tracking entry */ + kfree(ctx); goto out; + } if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, rss_context), &rxfh_dev.rss_context, sizeof(rxfh_dev.rss_context))) @@ -1396,6 +1428,46 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) dev->priv_flags |= IFF_RXFH_CONFIGURED; } + /* Update rss_ctx tracking */ + if (create) { + /* Ideally this should happen before calling the driver, + * so that we can fail more cleanly; but we don't have the + * context ID until the driver picks it, so we have to + * wait until after. + */ + if (WARN_ON(xa_load(&dev->ethtool->rss_ctx, rxfh.rss_context))) { + /* context ID reused, our tracking is screwed */ + kfree(ctx); + goto out; + } + /* Allocate the exact ID the driver gave us */ + if (xa_is_err(xa_store(&dev->ethtool->rss_ctx, rxfh.rss_context, + ctx, GFP_KERNEL))) { + kfree(ctx); + goto out; + } + ctx->indir_configured = rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE; + ctx->key_configured = !!rxfh.key_size; + } + if (rxfh_dev.rss_delete) { + WARN_ON(xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context) != ctx); + kfree(ctx); + } else if (ctx) { + if (rxfh_dev.indir) { + for (i = 0; i < dev_indir_size; i++) + ethtool_rxfh_context_indir(ctx)[i] = rxfh_dev.indir[i]; + ctx->indir_configured = 1; + } + if (rxfh_dev.key) { + memcpy(ethtool_rxfh_context_key(ctx), rxfh_dev.key, + dev_key_size); + ctx->key_configured = 1; + } + if (rxfh_dev.hfunc != ETH_RSS_HASH_NO_CHANGE) + ctx->hfunc = rxfh_dev.hfunc; + if (rxfh_dev.input_xfrm != RXH_XFRM_NO_CHANGE) + ctx->input_xfrm = rxfh_dev.input_xfrm; + } out: kfree(rss_config); -- cgit v1.2.3-58-ga151 From 847a8ab186767be6ee95643f9739fa9d0f839589 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 27 Jun 2024 16:33:49 +0100 Subject: net: ethtool: let the core choose RSS context IDs Add a new API to create/modify/remove RSS contexts, that passes in the newly-chosen context ID (not as a pointer) rather than leaving the driver to choose it on create. Also pass in the ctx, allowing drivers to easily use its private data area to store their hardware-specific state. Keep the existing .set_rxfh API for now as a fallback, but deprecate it for custom contexts (rss_context != 0). Signed-off-by: Edward Cree Reviewed-by: Przemek Kitszel Link: https://patch.msgid.link/45f1fe61df2163c091ec394c9f52000c8b16cc3b.1719502240.git.ecree.xilinx@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 40 ++++++++++++++++++++++++++++++++++++++++ net/core/dev.c | 6 +++++- net/ethtool/ioctl.c | 46 ++++++++++++++++++++++++++++++++++------------ 3 files changed, 79 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 43a2a143034f..4292a25b2427 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -723,6 +723,10 @@ struct ethtool_rxfh_param { * RSS. * @rxfh_priv_size: size of the driver private data area the core should * allocate for an RSS context (in &struct ethtool_rxfh_context). + * @rxfh_max_context_id: maximum (exclusive) supported RSS context ID. If this + * is zero then the core may choose any (nonzero) ID, otherwise the core + * will only use IDs strictly less than this value, as the @rss_context + * argument to @create_rxfh_context and friends. * @supported_coalesce_params: supported types of interrupt coalescing. * @supported_ring_params: supported ring params. * @get_drvinfo: Report driver/device information. Modern drivers no @@ -819,6 +823,32 @@ struct ethtool_rxfh_param { * will remain unchanged. * Returns a negative error code or zero. An error code must be returned * if at least one unsupported change was requested. + * @create_rxfh_context: Create a new RSS context with the specified RX flow + * hash indirection table, hash key, and hash function. + * The &struct ethtool_rxfh_context for this context is passed in @ctx; + * note that the indir table, hkey and hfunc are not yet populated as + * of this call. The driver does not need to update these; the core + * will do so if this op succeeds. + * However, if @rxfh.indir is set to %NULL, the driver must update the + * indir table in @ctx with the (default or inherited) table actually in + * use; similarly, if @rxfh.key is %NULL, @rxfh.hfunc is + * %ETH_RSS_HASH_NO_CHANGE, or @rxfh.input_xfrm is %RXH_XFRM_NO_CHANGE, + * the driver should update the corresponding information in @ctx. + * If the driver provides this method, it must also provide + * @modify_rxfh_context and @remove_rxfh_context. + * Returns a negative error code or zero. + * @modify_rxfh_context: Reconfigure the specified RSS context. Allows setting + * the contents of the RX flow hash indirection table, hash key, and/or + * hash function associated with the given context. + * Parameters which are set to %NULL or zero will remain unchanged. + * The &struct ethtool_rxfh_context for this context is passed in @ctx; + * note that it will still contain the *old* settings. The driver does + * not need to update these; the core will do so if this op succeeds. + * Returns a negative error code or zero. An error code must be returned + * if at least one unsupported change was requested. + * @remove_rxfh_context: Remove the specified RSS context. + * The &struct ethtool_rxfh_context for this context is passed in @ctx. + * Returns a negative error code or zero. * @get_channels: Get number of channels. * @set_channels: Set number of channels. Returns a negative error code or * zero. @@ -909,6 +939,7 @@ struct ethtool_ops { u32 cap_rss_ctx_supported:1; u32 cap_rss_sym_xor_supported:1; u16 rxfh_priv_size; + u32 rxfh_max_context_id; u32 supported_coalesce_params; u32 supported_ring_params; void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *); @@ -971,6 +1002,15 @@ struct ethtool_ops { int (*get_rxfh)(struct net_device *, struct ethtool_rxfh_param *); int (*set_rxfh)(struct net_device *, struct ethtool_rxfh_param *, struct netlink_ext_ack *extack); + int (*create_rxfh_context)(struct net_device *, + struct ethtool_rxfh_context *ctx, + const struct ethtool_rxfh_param *rxfh); + int (*modify_rxfh_context)(struct net_device *, + struct ethtool_rxfh_context *ctx, + const struct ethtool_rxfh_param *rxfh); + int (*remove_rxfh_context)(struct net_device *, + struct ethtool_rxfh_context *ctx, + u32 rss_context); void (*get_channels)(struct net_device *, struct ethtool_channels *); int (*set_channels)(struct net_device *, struct ethtool_channels *); int (*get_dump_flag)(struct net_device *, struct ethtool_dump *); diff --git a/net/core/dev.c b/net/core/dev.c index bce810f8ae53..8481dc4f2196 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11254,7 +11254,11 @@ static void netdev_rss_contexts_free(struct net_device *dev) rxfh.rss_delete = true; xa_erase(&dev->ethtool->rss_ctx, context); - dev->ethtool_ops->set_rxfh(dev, &rxfh, NULL); + if (dev->ethtool_ops->create_rxfh_context) + dev->ethtool_ops->remove_rxfh_context(dev, ctx, + context); + else + dev->ethtool_ops->set_rxfh(dev, &rxfh, NULL); kfree(ctx); } xa_destroy(&dev->ethtool->rss_ctx); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index e901a20e97f5..46b50ddde013 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1395,9 +1395,24 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, } ctx->indir_size = dev_indir_size; ctx->key_size = dev_key_size; - ctx->hfunc = rxfh.hfunc; - ctx->input_xfrm = rxfh.input_xfrm; ctx->priv_size = ops->rxfh_priv_size; + /* Initialise to an empty context */ + ctx->hfunc = ETH_RSS_HASH_NO_CHANGE; + ctx->input_xfrm = RXH_XFRM_NO_CHANGE; + if (ops->create_rxfh_context) { + u32 limit = ops->rxfh_max_context_id ?: U32_MAX; + u32 ctx_id; + + /* driver uses new API, core allocates ID */ + ret = xa_alloc(&dev->ethtool->rss_ctx, &ctx_id, ctx, + XA_LIMIT(1, limit), GFP_KERNEL_ACCOUNT); + if (ret < 0) { + kfree(ctx); + goto out; + } + WARN_ON(!ctx_id); /* can't happen */ + rxfh.rss_context = ctx_id; + } } else if (rxfh.rss_context) { ctx = xa_load(&dev->ethtool->rss_ctx, rxfh.rss_context); if (!ctx) { @@ -1409,11 +1424,24 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, rxfh_dev.rss_context = rxfh.rss_context; rxfh_dev.input_xfrm = rxfh.input_xfrm; - ret = ops->set_rxfh(dev, &rxfh_dev, extack); - if (ret) { + if (rxfh.rss_context && ops->create_rxfh_context) { if (create) + ret = ops->create_rxfh_context(dev, ctx, &rxfh_dev); + else if (rxfh_dev.rss_delete) + ret = ops->remove_rxfh_context(dev, ctx, + rxfh.rss_context); + else + ret = ops->modify_rxfh_context(dev, ctx, &rxfh_dev); + } else { + ret = ops->set_rxfh(dev, &rxfh_dev, extack); + } + if (ret) { + if (create) { /* failed to create, free our new tracking entry */ + if (ops->create_rxfh_context) + xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context); kfree(ctx); + } goto out; } @@ -1429,12 +1457,8 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, dev->priv_flags |= IFF_RXFH_CONFIGURED; } /* Update rss_ctx tracking */ - if (create) { - /* Ideally this should happen before calling the driver, - * so that we can fail more cleanly; but we don't have the - * context ID until the driver picks it, so we have to - * wait until after. - */ + if (create && !ops->create_rxfh_context) { + /* driver uses old API, it chose context ID */ if (WARN_ON(xa_load(&dev->ethtool->rss_ctx, rxfh.rss_context))) { /* context ID reused, our tracking is screwed */ kfree(ctx); @@ -1446,8 +1470,6 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, kfree(ctx); goto out; } - ctx->indir_configured = rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE; - ctx->key_configured = !!rxfh.key_size; } if (rxfh_dev.rss_delete) { WARN_ON(xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context) != ctx); -- cgit v1.2.3-58-ga151 From 30a32cdf6b130356805b3193a6208de25cbb2015 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 27 Jun 2024 16:33:50 +0100 Subject: net: ethtool: add an extack parameter to new rxfh_context APIs Currently passed as NULL, but will allow drivers to report back errors when ethnl support for these ops is added. Signed-off-by: Edward Cree Reviewed-by: Przemek Kitszel Link: https://patch.msgid.link/6e0012347d175fdd1280363d7bfa76a2f2777e17.1719502240.git.ecree.xilinx@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 9 ++++++--- net/core/dev.c | 2 +- net/ethtool/ioctl.c | 9 ++++++--- 3 files changed, 13 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 4292a25b2427..9cdbc8e3ed5c 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -1004,13 +1004,16 @@ struct ethtool_ops { struct netlink_ext_ack *extack); int (*create_rxfh_context)(struct net_device *, struct ethtool_rxfh_context *ctx, - const struct ethtool_rxfh_param *rxfh); + const struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack); int (*modify_rxfh_context)(struct net_device *, struct ethtool_rxfh_context *ctx, - const struct ethtool_rxfh_param *rxfh); + const struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack); int (*remove_rxfh_context)(struct net_device *, struct ethtool_rxfh_context *ctx, - u32 rss_context); + u32 rss_context, + struct netlink_ext_ack *extack); void (*get_channels)(struct net_device *, struct ethtool_channels *); int (*set_channels)(struct net_device *, struct ethtool_channels *); int (*get_dump_flag)(struct net_device *, struct ethtool_dump *); diff --git a/net/core/dev.c b/net/core/dev.c index 8481dc4f2196..2daed4464c08 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11256,7 +11256,7 @@ static void netdev_rss_contexts_free(struct net_device *dev) xa_erase(&dev->ethtool->rss_ctx, context); if (dev->ethtool_ops->create_rxfh_context) dev->ethtool_ops->remove_rxfh_context(dev, ctx, - context); + context, NULL); else dev->ethtool_ops->set_rxfh(dev, &rxfh, NULL); kfree(ctx); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 46b50ddde013..82c610e9e6b2 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1426,12 +1426,15 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, if (rxfh.rss_context && ops->create_rxfh_context) { if (create) - ret = ops->create_rxfh_context(dev, ctx, &rxfh_dev); + ret = ops->create_rxfh_context(dev, ctx, &rxfh_dev, + extack); else if (rxfh_dev.rss_delete) ret = ops->remove_rxfh_context(dev, ctx, - rxfh.rss_context); + rxfh.rss_context, + extack); else - ret = ops->modify_rxfh_context(dev, ctx, &rxfh_dev); + ret = ops->modify_rxfh_context(dev, ctx, &rxfh_dev, + extack); } else { ret = ops->set_rxfh(dev, &rxfh_dev, extack); } -- cgit v1.2.3-58-ga151 From 87925151191b64d9623e63ccf11e517eacc99d7d Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 27 Jun 2024 16:33:51 +0100 Subject: net: ethtool: add a mutex protecting RSS contexts While this is not needed to serialise the ethtool entry points (which are all under RTNL), drivers may have cause to asynchronously access dev->ethtool->rss_ctx; taking dev->ethtool->rss_lock allows them to do this safely without needing to take the RTNL. Signed-off-by: Edward Cree Reviewed-by: Przemek Kitszel Link: https://patch.msgid.link/7f9c15eb7525bf87af62c275dde3a8570ee8bf0a.1719502240.git.ecree.xilinx@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 3 +++ net/core/dev.c | 5 +++++ net/ethtool/ioctl.c | 7 +++++++ 3 files changed, 15 insertions(+) (limited to 'net') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 9cdbc8e3ed5c..f74bb0cf8ed1 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -1104,10 +1104,13 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev, /** * struct ethtool_netdev_state - per-netdevice state for ethtool features * @rss_ctx: XArray of custom RSS contexts + * @rss_lock: Protects entries in @rss_ctx. May be taken from + * within RTNL. * @wol_enabled: Wake-on-LAN is enabled */ struct ethtool_netdev_state { struct xarray rss_ctx; + struct mutex rss_lock; unsigned wol_enabled:1; }; diff --git a/net/core/dev.c b/net/core/dev.c index 2daed4464c08..385c4091aa77 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10338,6 +10338,7 @@ int register_netdevice(struct net_device *dev) /* rss ctx ID 0 is reserved for the default context, start from 1 */ xa_init_flags(&dev->ethtool->rss_ctx, XA_FLAGS_ALLOC1); + mutex_init(&dev->ethtool->rss_lock); spin_lock_init(&dev->addr_list_lock); netdev_set_addr_lockdep_class(dev); @@ -11243,6 +11244,7 @@ static void netdev_rss_contexts_free(struct net_device *dev) struct ethtool_rxfh_context *ctx; unsigned long context; + mutex_lock(&dev->ethtool->rss_lock); xa_for_each(&dev->ethtool->rss_ctx, context, ctx) { struct ethtool_rxfh_param rxfh; @@ -11262,6 +11264,7 @@ static void netdev_rss_contexts_free(struct net_device *dev) kfree(ctx); } xa_destroy(&dev->ethtool->rss_ctx); + mutex_unlock(&dev->ethtool->rss_lock); } /** @@ -11374,6 +11377,8 @@ void unregister_netdevice_many_notify(struct list_head *head, if (dev->netdev_ops->ndo_uninit) dev->netdev_ops->ndo_uninit(dev); + mutex_destroy(&dev->ethtool->rss_lock); + if (skb) rtmsg_ifinfo_send(skb, dev, GFP_KERNEL, portid, nlh); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 82c610e9e6b2..939ccd106fe1 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1285,6 +1285,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, struct netlink_ext_ack *extack = NULL; struct ethtool_rxnfc rx_rings; struct ethtool_rxfh rxfh; + bool locked = false; /* dev->ethtool->rss_lock taken */ u32 indir_bytes = 0; bool create = false; u8 *rss_config; @@ -1380,6 +1381,10 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, } } + if (rxfh.rss_context) { + mutex_lock(&dev->ethtool->rss_lock); + locked = true; + } if (create) { if (rxfh_dev.rss_delete) { ret = -EINVAL; @@ -1495,6 +1500,8 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, } out: + if (locked) + mutex_unlock(&dev->ethtool->rss_lock); kfree(rss_config); return ret; } -- cgit v1.2.3-58-ga151 From 7964e7884643d56d4ecd0e2b92190f8a361cea48 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 27 Jun 2024 16:33:53 +0100 Subject: net: ethtool: use the tracking array for get_rxfh on custom RSS contexts On 'ethtool -x' with rss_context != 0, instead of calling the driver to read the RSS settings for the context, just get the settings from the rss_ctx xarray, and return them to the user with no driver involvement. Signed-off-by: Edward Cree Reviewed-by: Przemek Kitszel Link: https://patch.msgid.link/2d0190fa29638f307ea720f882ebd41f6f867694.1719502240.git.ecree.xilinx@gmail.com Signed-off-by: Jakub Kicinski --- net/ethtool/ioctl.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 939ccd106fe1..d8795ed07ba3 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1202,6 +1202,7 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev, const struct ethtool_ops *ops = dev->ethtool_ops; struct ethtool_rxfh_param rxfh_dev = {}; u32 user_indir_size, user_key_size; + struct ethtool_rxfh_context *ctx; struct ethtool_rxfh rxfh; u32 indir_bytes; u8 *rss_config; @@ -1249,11 +1250,26 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev, if (user_key_size) rxfh_dev.key = rss_config + indir_bytes; - rxfh_dev.rss_context = rxfh.rss_context; - - ret = dev->ethtool_ops->get_rxfh(dev, &rxfh_dev); - if (ret) - goto out; + if (rxfh.rss_context) { + ctx = xa_load(&dev->ethtool->rss_ctx, rxfh.rss_context); + if (!ctx) { + ret = -ENOENT; + goto out; + } + if (rxfh_dev.indir) + memcpy(rxfh_dev.indir, ethtool_rxfh_context_indir(ctx), + indir_bytes); + if (rxfh_dev.key) + memcpy(rxfh_dev.key, ethtool_rxfh_context_key(ctx), + user_key_size); + rxfh_dev.hfunc = ctx->hfunc; + rxfh_dev.input_xfrm = ctx->input_xfrm; + ret = 0; + } else { + ret = dev->ethtool_ops->get_rxfh(dev, &rxfh_dev); + if (ret) + goto out; + } if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, hfunc), &rxfh_dev.hfunc, sizeof(rxfh.hfunc))) { -- cgit v1.2.3-58-ga151 From 2d5317753e5f02a66e6d0afb9b25105d0beab1be Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 28 Jun 2024 10:46:25 +0200 Subject: xfrm: Export symbol xfrm_dev_state_delete. This fixes a build failure if xfrm_user is build as a module. Fixes: 07b87f9eea0c ("xfrm: Fix unregister netdevice hang on hardware offload.") Reported-by: Mark Brown Tested-by: Leon Romanovsky Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index d531d2a1fae2..936f9348e5f6 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -698,6 +698,7 @@ void xfrm_dev_state_delete(struct xfrm_state *x) spin_unlock_bh(&xfrm_state_dev_gc_lock); } } +EXPORT_SYMBOL_GPL(xfrm_dev_state_delete); void xfrm_dev_state_free(struct xfrm_state *x) { -- cgit v1.2.3-58-ga151 From 74d6529b78f7a440a10aa7f4904ca9f27d1d2f3c Mon Sep 17 00:00:00 2001 From: Heng Qi Date: Fri, 28 Jun 2024 12:40:18 +0800 Subject: net: ethtool: Fix the panic caused by dev being null when dumping coalesce syzbot reported a general protection fault caused by a null pointer dereference in coalesce_fill_reply(). The issue occurs when req_base->dev is null, leading to an invalid memory access. This panic occurs if dumping coalesce when no device name is specified. Fixes: f750dfe825b9 ("ethtool: provide customized dim profile management") Reported-by: syzbot+e77327e34cdc8c36b7d3@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=e77327e34cdc8c36b7d3 Signed-off-by: Heng Qi Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/ethtool/coalesce.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ethtool/coalesce.c b/net/ethtool/coalesce.c index 759b16e3d134..3e18ca1ccc5e 100644 --- a/net/ethtool/coalesce.c +++ b/net/ethtool/coalesce.c @@ -211,9 +211,9 @@ static int coalesce_fill_reply(struct sk_buff *skb, { const struct coalesce_reply_data *data = COALESCE_REPDATA(reply_base); const struct kernel_ethtool_coalesce *kcoal = &data->kernel_coalesce; - struct dim_irq_moder *moder = req_base->dev->irq_moder; const struct ethtool_coalesce *coal = &data->coalesce; u32 supported = data->supported_params; + struct dim_irq_moder *moder; int ret = 0; if (coalesce_put_u32(skb, ETHTOOL_A_COALESCE_RX_USECS, @@ -272,9 +272,10 @@ static int coalesce_fill_reply(struct sk_buff *skb, kcoal->tx_aggr_time_usecs, supported)) return -EMSGSIZE; - if (!moder) + if (!req_base->dev || !req_base->dev->irq_moder) return 0; + moder = req_base->dev->irq_moder; rcu_read_lock(); if (moder->profile_flags & DIM_PROFILE_RX) { ret = coalesce_put_profile(skb, ETHTOOL_A_COALESCE_RX_PROFILE, -- cgit v1.2.3-58-ga151 From 89cc8f1c5f22568142b7ad118c738204708e4207 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 30 Jun 2024 00:26:48 +0200 Subject: netfilter: nf_tables: Add flowtable map for xdp offload This adds a small internal mapping table so that a new bpf (xdp) kfunc can perform lookups in a flowtable. As-is, xdp program has access to the device pointer, but no way to do a lookup in a flowtable -- there is no way to obtain the needed struct without questionable stunts. This allows to obtain an nf_flowtable pointer given a net_device structure. In order to keep backward compatibility, the infrastructure allows the user to add a given device to multiple flowtables, but it will always return the first added mapping performing the lookup since it assumes the right configuration is 1:1 mapping between flowtables and net_devices. Co-developed-by: Lorenzo Bianconi Signed-off-by: Florian Westphal Signed-off-by: Lorenzo Bianconi Signed-off-by: Daniel Borkmann Acked-by: Pablo Neira Ayuso Link: https://lore.kernel.org/bpf/9f20e2c36f494b3bf177328718367f636bb0b2ab.1719698275.git.lorenzo@kernel.org --- include/net/netfilter/nf_flow_table.h | 5 ++ net/netfilter/Makefile | 2 +- net/netfilter/nf_flow_table_offload.c | 2 +- net/netfilter/nf_flow_table_xdp.c | 147 ++++++++++++++++++++++++++++++++++ 4 files changed, 154 insertions(+), 2 deletions(-) create mode 100644 net/netfilter/nf_flow_table_xdp.c (limited to 'net') diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index 9abb7ee40d72..d845745207d2 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -305,6 +305,11 @@ struct flow_ports { __be16 source, dest; }; +struct nf_flowtable *nf_flowtable_by_dev(const struct net_device *dev); +int nf_flow_offload_xdp_setup(struct nf_flowtable *flowtable, + struct net_device *dev, + enum flow_block_command cmd); + unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 614815a3ed73..18046872a38a 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -142,7 +142,7 @@ obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o # flow table infrastructure obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o \ - nf_flow_table_offload.o + nf_flow_table_offload.o nf_flow_table_xdp.o nf_flow_table-$(CONFIG_NF_FLOW_TABLE_PROCFS) += nf_flow_table_procfs.o obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index a010b25076ca..ff1a4e36c2b5 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -1192,7 +1192,7 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable, int err; if (!nf_flowtable_hw_offload(flowtable)) - return 0; + return nf_flow_offload_xdp_setup(flowtable, dev, cmd); if (dev->netdev_ops->ndo_setup_tc) err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd, diff --git a/net/netfilter/nf_flow_table_xdp.c b/net/netfilter/nf_flow_table_xdp.c new file mode 100644 index 000000000000..e1252d042699 --- /dev/null +++ b/net/netfilter/nf_flow_table_xdp.c @@ -0,0 +1,147 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include +#include +#include +#include + +struct flow_offload_xdp_ft { + struct list_head head; + struct nf_flowtable *ft; + struct rcu_head rcuhead; +}; + +struct flow_offload_xdp { + struct hlist_node hnode; + unsigned long net_device_addr; + struct list_head head; +}; + +#define NF_XDP_HT_BITS 4 +static DEFINE_HASHTABLE(nf_xdp_hashtable, NF_XDP_HT_BITS); +static DEFINE_MUTEX(nf_xdp_hashtable_lock); + +/* caller must hold rcu read lock */ +struct nf_flowtable *nf_flowtable_by_dev(const struct net_device *dev) +{ + unsigned long key = (unsigned long)dev; + struct flow_offload_xdp *iter; + + hash_for_each_possible_rcu(nf_xdp_hashtable, iter, hnode, key) { + if (key == iter->net_device_addr) { + struct flow_offload_xdp_ft *ft_elem; + + /* The user is supposed to insert a given net_device + * just into a single nf_flowtable so we always return + * the first element here. + */ + ft_elem = list_first_or_null_rcu(&iter->head, + struct flow_offload_xdp_ft, + head); + return ft_elem ? ft_elem->ft : NULL; + } + } + + return NULL; +} + +static int nf_flowtable_by_dev_insert(struct nf_flowtable *ft, + const struct net_device *dev) +{ + struct flow_offload_xdp *iter, *elem = NULL; + unsigned long key = (unsigned long)dev; + struct flow_offload_xdp_ft *ft_elem; + + ft_elem = kzalloc(sizeof(*ft_elem), GFP_KERNEL_ACCOUNT); + if (!ft_elem) + return -ENOMEM; + + ft_elem->ft = ft; + + mutex_lock(&nf_xdp_hashtable_lock); + + hash_for_each_possible(nf_xdp_hashtable, iter, hnode, key) { + if (key == iter->net_device_addr) { + elem = iter; + break; + } + } + + if (!elem) { + elem = kzalloc(sizeof(*elem), GFP_KERNEL_ACCOUNT); + if (!elem) + goto err_unlock; + + elem->net_device_addr = key; + INIT_LIST_HEAD(&elem->head); + hash_add_rcu(nf_xdp_hashtable, &elem->hnode, key); + } + list_add_tail_rcu(&ft_elem->head, &elem->head); + + mutex_unlock(&nf_xdp_hashtable_lock); + + return 0; + +err_unlock: + mutex_unlock(&nf_xdp_hashtable_lock); + kfree(ft_elem); + + return -ENOMEM; +} + +static void nf_flowtable_by_dev_remove(struct nf_flowtable *ft, + const struct net_device *dev) +{ + struct flow_offload_xdp *iter, *elem = NULL; + unsigned long key = (unsigned long)dev; + + mutex_lock(&nf_xdp_hashtable_lock); + + hash_for_each_possible(nf_xdp_hashtable, iter, hnode, key) { + if (key == iter->net_device_addr) { + elem = iter; + break; + } + } + + if (elem) { + struct flow_offload_xdp_ft *ft_elem, *ft_next; + + list_for_each_entry_safe(ft_elem, ft_next, &elem->head, head) { + if (ft_elem->ft == ft) { + list_del_rcu(&ft_elem->head); + kfree_rcu(ft_elem, rcuhead); + } + } + + if (list_empty(&elem->head)) + hash_del_rcu(&elem->hnode); + else + elem = NULL; + } + + mutex_unlock(&nf_xdp_hashtable_lock); + + if (elem) { + synchronize_rcu(); + kfree(elem); + } +} + +int nf_flow_offload_xdp_setup(struct nf_flowtable *flowtable, + struct net_device *dev, + enum flow_block_command cmd) +{ + switch (cmd) { + case FLOW_BLOCK_BIND: + return nf_flowtable_by_dev_insert(flowtable, dev); + case FLOW_BLOCK_UNBIND: + nf_flowtable_by_dev_remove(flowtable, dev); + return 0; + } + + WARN_ON_ONCE(1); + return 0; +} -- cgit v1.2.3-58-ga151 From 391bb6594fd3a567efb1cd3efc8136c78c4c9e31 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sun, 30 Jun 2024 00:26:49 +0200 Subject: netfilter: Add bpf_xdp_flow_lookup kfunc Introduce bpf_xdp_flow_lookup kfunc in order to perform the lookup of a given flowtable entry based on a fib tuple of incoming traffic. bpf_xdp_flow_lookup can be used as building block to offload in xdp the processing of sw flowtable when hw flowtable is not available. Signed-off-by: Lorenzo Bianconi Signed-off-by: Daniel Borkmann Acked-by: Kumar Kartikeya Dwivedi Acked-by: Pablo Neira Ayuso Link: https://lore.kernel.org/bpf/55d38a4e5856f6d1509d823ff4e98aaa6d356097.1719698275.git.lorenzo@kernel.org --- include/net/netfilter/nf_flow_table.h | 10 +++ net/netfilter/Makefile | 5 ++ net/netfilter/nf_flow_table_bpf.c | 121 ++++++++++++++++++++++++++++++++++ net/netfilter/nf_flow_table_inet.c | 2 +- 4 files changed, 137 insertions(+), 1 deletion(-) create mode 100644 net/netfilter/nf_flow_table_bpf.c (limited to 'net') diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index d845745207d2..b63d53bb9dd6 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -315,6 +315,16 @@ unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); +#if (IS_BUILTIN(CONFIG_NF_FLOW_TABLE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \ + (IS_MODULE(CONFIG_NF_FLOW_TABLE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) +extern int nf_flow_register_bpf(void); +#else +static inline int nf_flow_register_bpf(void) +{ + return 0; +} +#endif + #define MODULE_ALIAS_NF_FLOWTABLE(family) \ MODULE_ALIAS("nf-flowtable-" __stringify(family)) diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 18046872a38a..f0aa4d7ef499 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -144,6 +144,11 @@ obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o \ nf_flow_table_offload.o nf_flow_table_xdp.o nf_flow_table-$(CONFIG_NF_FLOW_TABLE_PROCFS) += nf_flow_table_procfs.o +ifeq ($(CONFIG_NF_FLOW_TABLE),m) +nf_flow_table-$(CONFIG_DEBUG_INFO_BTF_MODULES) += nf_flow_table_bpf.o +else ifeq ($(CONFIG_NF_FLOW_TABLE),y) +nf_flow_table-$(CONFIG_DEBUG_INFO_BTF) += nf_flow_table_bpf.o +endif obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o diff --git a/net/netfilter/nf_flow_table_bpf.c b/net/netfilter/nf_flow_table_bpf.c new file mode 100644 index 000000000000..4a5f5195f2d2 --- /dev/null +++ b/net/netfilter/nf_flow_table_bpf.c @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Unstable Flow Table Helpers for XDP hook + * + * These are called from the XDP programs. + * Note that it is allowed to break compatibility for these functions since + * the interface they are exposed through to BPF programs is explicitly + * unstable. + */ + +#include +#include +#include +#include +#include +#include +#include + +/* bpf_flowtable_opts - options for bpf flowtable helpers + * @error: out parameter, set for any encountered error + */ +struct bpf_flowtable_opts { + s32 error; +}; + +enum { + NF_BPF_FLOWTABLE_OPTS_SZ = 4, +}; + +__diag_push(); +__diag_ignore_all("-Wmissing-prototypes", + "Global functions as their definitions will be in nf_flow_table BTF"); + +__bpf_kfunc_start_defs(); + +static struct flow_offload_tuple_rhash * +bpf_xdp_flow_tuple_lookup(struct net_device *dev, + struct flow_offload_tuple *tuple, __be16 proto) +{ + struct flow_offload_tuple_rhash *tuplehash; + struct nf_flowtable *nf_flow_table; + struct flow_offload *nf_flow; + + nf_flow_table = nf_flowtable_by_dev(dev); + if (!nf_flow_table) + return ERR_PTR(-ENOENT); + + tuplehash = flow_offload_lookup(nf_flow_table, tuple); + if (!tuplehash) + return ERR_PTR(-ENOENT); + + nf_flow = container_of(tuplehash, struct flow_offload, + tuplehash[tuplehash->tuple.dir]); + flow_offload_refresh(nf_flow_table, nf_flow, false); + + return tuplehash; +} + +__bpf_kfunc struct flow_offload_tuple_rhash * +bpf_xdp_flow_lookup(struct xdp_md *ctx, struct bpf_fib_lookup *fib_tuple, + struct bpf_flowtable_opts *opts, u32 opts_len) +{ + struct xdp_buff *xdp = (struct xdp_buff *)ctx; + struct flow_offload_tuple tuple = { + .iifidx = fib_tuple->ifindex, + .l3proto = fib_tuple->family, + .l4proto = fib_tuple->l4_protocol, + .src_port = fib_tuple->sport, + .dst_port = fib_tuple->dport, + }; + struct flow_offload_tuple_rhash *tuplehash; + __be16 proto; + + if (opts_len != NF_BPF_FLOWTABLE_OPTS_SZ) { + opts->error = -EINVAL; + return NULL; + } + + switch (fib_tuple->family) { + case AF_INET: + tuple.src_v4.s_addr = fib_tuple->ipv4_src; + tuple.dst_v4.s_addr = fib_tuple->ipv4_dst; + proto = htons(ETH_P_IP); + break; + case AF_INET6: + tuple.src_v6 = *(struct in6_addr *)&fib_tuple->ipv6_src; + tuple.dst_v6 = *(struct in6_addr *)&fib_tuple->ipv6_dst; + proto = htons(ETH_P_IPV6); + break; + default: + opts->error = -EAFNOSUPPORT; + return NULL; + } + + tuplehash = bpf_xdp_flow_tuple_lookup(xdp->rxq->dev, &tuple, proto); + if (IS_ERR(tuplehash)) { + opts->error = PTR_ERR(tuplehash); + return NULL; + } + + return tuplehash; +} + +__diag_pop() + +__bpf_kfunc_end_defs(); + +BTF_KFUNCS_START(nf_ft_kfunc_set) +BTF_ID_FLAGS(func, bpf_xdp_flow_lookup, KF_TRUSTED_ARGS | KF_RET_NULL) +BTF_KFUNCS_END(nf_ft_kfunc_set) + +static const struct btf_kfunc_id_set nf_flow_kfunc_set = { + .owner = THIS_MODULE, + .set = &nf_ft_kfunc_set, +}; + +int nf_flow_register_bpf(void) +{ + return register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, + &nf_flow_kfunc_set); +} +EXPORT_SYMBOL_GPL(nf_flow_register_bpf); diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c index 6eef15648b7b..88787b45e30d 100644 --- a/net/netfilter/nf_flow_table_inet.c +++ b/net/netfilter/nf_flow_table_inet.c @@ -98,7 +98,7 @@ static int __init nf_flow_inet_module_init(void) nft_register_flowtable_type(&flowtable_ipv6); nft_register_flowtable_type(&flowtable_inet); - return 0; + return nf_flow_register_bpf(); } static void __exit nf_flow_inet_module_exit(void) -- cgit v1.2.3-58-ga151 From 9e2db9d3993e270b24fbc4ce1ca7e09756e8df25 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 27 Jun 2024 13:59:41 +0100 Subject: net: always try to set ubuf in skb_zerocopy_iter_stream skb_zcopy_set() does nothing if there is already a ubuf_info associated with an skb, and since ->link_skb should have set it several lines above the check here essentially does nothing and can be removed. It's also safer this way, because even if the callback is faulty we'll have it set. Signed-off-by: Pavel Begunkov Reviewed-by: Willem de Bruijn Reviewed-by: Jens Axboe Signed-off-by: Paolo Abeni --- net/core/skbuff.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index eb9a7e65b5c8..52986e1ce13e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1899,8 +1899,7 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, return err; } - if (!uarg->ops->link_skb) - skb_zcopy_set(skb, uarg, NULL); + skb_zcopy_set(skb, uarg, NULL); return skb->len - orig_len; } EXPORT_SYMBOL_GPL(skb_zerocopy_iter_stream); -- cgit v1.2.3-58-ga151 From 7fb05423fed41686ccc1a76c20d486728f62023f Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 27 Jun 2024 13:59:42 +0100 Subject: net: split __zerocopy_sg_from_iter() Split a function out of __zerocopy_sg_from_iter() that only cares about the traditional path with refcounted pages and doesn't need to know about ->sg_from_iter. A preparation patch, we'll improve on the function later. Signed-off-by: Pavel Begunkov Reviewed-by: Willem de Bruijn Reviewed-by: Jens Axboe Signed-off-by: Paolo Abeni --- net/core/datagram.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/core/datagram.c b/net/core/datagram.c index e614cfd8e14a..ef81d6ecbe1e 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -610,16 +610,10 @@ fault: } EXPORT_SYMBOL(skb_copy_datagram_from_iter); -int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk, - struct sk_buff *skb, struct iov_iter *from, - size_t length) +static int zerocopy_fill_skb_from_iter(struct sock *sk, struct sk_buff *skb, + struct iov_iter *from, size_t length) { - int frag; - - if (msg && msg->msg_ubuf && msg->sg_from_iter) - return msg->sg_from_iter(sk, skb, from, length); - - frag = skb_shinfo(skb)->nr_frags; + int frag = skb_shinfo(skb)->nr_frags; while (length && iov_iter_count(from)) { struct page *head, *last_head = NULL; @@ -692,6 +686,16 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk, } return 0; } + +int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb, struct iov_iter *from, + size_t length) +{ + if (msg && msg->msg_ubuf && msg->sg_from_iter) + return msg->sg_from_iter(sk, skb, from, length); + else + return zerocopy_fill_skb_from_iter(sk, skb, from, length); +} EXPORT_SYMBOL(__zerocopy_sg_from_iter); /** -- cgit v1.2.3-58-ga151 From aeb320fc05c74e1d3b429aa0e3a777b8a931c189 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 27 Jun 2024 13:59:43 +0100 Subject: net: batch zerocopy_fill_skb_from_iter accounting Instead of accounting every page range against the socket separately, do it in batch based on the change in skb->truesize. It's also moved into __zerocopy_sg_from_iter(), so that zerocopy_fill_skb_from_iter() is simpler and responsible for setting frags but not the accounting. Signed-off-by: Pavel Begunkov Reviewed-by: Willem de Bruijn Reviewed-by: Jens Axboe Signed-off-by: Paolo Abeni --- net/core/datagram.c | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/core/datagram.c b/net/core/datagram.c index ef81d6ecbe1e..b0dccefd4a09 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -610,7 +610,7 @@ fault: } EXPORT_SYMBOL(skb_copy_datagram_from_iter); -static int zerocopy_fill_skb_from_iter(struct sock *sk, struct sk_buff *skb, +static int zerocopy_fill_skb_from_iter(struct sk_buff *skb, struct iov_iter *from, size_t length) { int frag = skb_shinfo(skb)->nr_frags; @@ -621,7 +621,6 @@ static int zerocopy_fill_skb_from_iter(struct sock *sk, struct sk_buff *skb, int refs, order, n = 0; size_t start; ssize_t copied; - unsigned long truesize; if (frag == MAX_SKB_FRAGS) return -EMSGSIZE; @@ -633,17 +632,9 @@ static int zerocopy_fill_skb_from_iter(struct sock *sk, struct sk_buff *skb, length -= copied; - truesize = PAGE_ALIGN(copied + start); skb->data_len += copied; skb->len += copied; - skb->truesize += truesize; - if (sk && sk->sk_type == SOCK_STREAM) { - sk_wmem_queued_add(sk, truesize); - if (!skb_zcopy_pure(skb)) - sk_mem_charge(sk, truesize); - } else { - refcount_add(truesize, &skb->sk->sk_wmem_alloc); - } + skb->truesize += PAGE_ALIGN(copied + start); head = compound_head(pages[n]); order = compound_order(head); @@ -691,10 +682,24 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk, struct sk_buff *skb, struct iov_iter *from, size_t length) { + unsigned long orig_size = skb->truesize; + unsigned long truesize; + int ret; + if (msg && msg->msg_ubuf && msg->sg_from_iter) return msg->sg_from_iter(sk, skb, from, length); - else - return zerocopy_fill_skb_from_iter(sk, skb, from, length); + + ret = zerocopy_fill_skb_from_iter(skb, from, length); + truesize = skb->truesize - orig_size; + + if (sk && sk->sk_type == SOCK_STREAM) { + sk_wmem_queued_add(sk, truesize); + if (!skb_zcopy_pure(skb)) + sk_mem_charge(sk, truesize); + } else { + refcount_add(truesize, &skb->sk->sk_wmem_alloc); + } + return ret; } EXPORT_SYMBOL(__zerocopy_sg_from_iter); -- cgit v1.2.3-58-ga151 From 060f4ba6e40338a70932603a3564903acf5f5734 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 27 Jun 2024 13:59:44 +0100 Subject: io_uring/net: move charging socket out of zc io_uring Currently, io_uring's io_sg_from_iter() duplicates the part of __zerocopy_sg_from_iter() charging pages to the socket. It'd be too easy to miss while changing it in net/, the chunk is not the most straightforward for outside users and full of internal implementation details. io_uring is not a good place to keep it, deduplicate it by moving out of the callback into __zerocopy_sg_from_iter(). Signed-off-by: Pavel Begunkov Reviewed-by: Willem de Bruijn Reviewed-by: Jens Axboe Signed-off-by: Paolo Abeni --- include/linux/skbuff.h | 3 +++ include/linux/socket.h | 2 +- io_uring/net.c | 16 ++++------------ net/core/datagram.c | 10 +++++----- 4 files changed, 13 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f4cda3fbdb75..9c29bdd5596d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1703,6 +1703,9 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk, struct sk_buff *skb, struct iov_iter *from, size_t length); +int zerocopy_fill_skb_from_iter(struct sk_buff *skb, + struct iov_iter *from, size_t length); + static inline int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len) { diff --git a/include/linux/socket.h b/include/linux/socket.h index 89d16b90370b..2a1ff91d1914 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -76,7 +76,7 @@ struct msghdr { __kernel_size_t msg_controllen; /* ancillary data buffer length */ struct kiocb *msg_iocb; /* ptr to iocb for async requests */ struct ubuf_info *msg_ubuf; - int (*sg_from_iter)(struct sock *sk, struct sk_buff *skb, + int (*sg_from_iter)(struct sk_buff *skb, struct iov_iter *from, size_t length); }; diff --git a/io_uring/net.c b/io_uring/net.c index 7c98c4d50946..84a7602bcef1 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -1265,14 +1265,14 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return io_sendmsg_prep_setup(req, req->opcode == IORING_OP_SENDMSG_ZC); } -static int io_sg_from_iter_iovec(struct sock *sk, struct sk_buff *skb, +static int io_sg_from_iter_iovec(struct sk_buff *skb, struct iov_iter *from, size_t length) { skb_zcopy_downgrade_managed(skb); - return __zerocopy_sg_from_iter(NULL, sk, skb, from, length); + return zerocopy_fill_skb_from_iter(skb, from, length); } -static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb, +static int io_sg_from_iter(struct sk_buff *skb, struct iov_iter *from, size_t length) { struct skb_shared_info *shinfo = skb_shinfo(skb); @@ -1285,7 +1285,7 @@ static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb, if (!frag) shinfo->flags |= SKBFL_MANAGED_FRAG_REFS; else if (unlikely(!skb_zcopy_managed(skb))) - return __zerocopy_sg_from_iter(NULL, sk, skb, from, length); + return zerocopy_fill_skb_from_iter(skb, from, length); bi.bi_size = min(from->count, length); bi.bi_bvec_done = from->iov_offset; @@ -1312,14 +1312,6 @@ static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb, skb->data_len += copied; skb->len += copied; skb->truesize += truesize; - - if (sk && sk->sk_type == SOCK_STREAM) { - sk_wmem_queued_add(sk, truesize); - if (!skb_zcopy_pure(skb)) - sk_mem_charge(sk, truesize); - } else { - refcount_add(truesize, &skb->sk->sk_wmem_alloc); - } return ret; } diff --git a/net/core/datagram.c b/net/core/datagram.c index b0dccefd4a09..16507b7cc4fb 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -610,8 +610,8 @@ fault: } EXPORT_SYMBOL(skb_copy_datagram_from_iter); -static int zerocopy_fill_skb_from_iter(struct sk_buff *skb, - struct iov_iter *from, size_t length) +int zerocopy_fill_skb_from_iter(struct sk_buff *skb, + struct iov_iter *from, size_t length) { int frag = skb_shinfo(skb)->nr_frags; @@ -687,11 +687,11 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk, int ret; if (msg && msg->msg_ubuf && msg->sg_from_iter) - return msg->sg_from_iter(sk, skb, from, length); + ret = msg->sg_from_iter(skb, from, length); + else + ret = zerocopy_fill_skb_from_iter(skb, from, length); - ret = zerocopy_fill_skb_from_iter(skb, from, length); truesize = skb->truesize - orig_size; - if (sk && sk->sk_type == SOCK_STREAM) { sk_wmem_queued_add(sk, truesize); if (!skb_zcopy_pure(skb)) -- cgit v1.2.3-58-ga151 From 2ca58ed21cefdda45520a0a2b1980c008efe9874 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 27 Jun 2024 13:59:45 +0100 Subject: net: limit scope of a skb_zerocopy_iter_stream var skb_zerocopy_iter_stream() only uses @orig_uarg in the !link_skb path, and we can move the local variable in the appropriate block. Signed-off-by: Pavel Begunkov Reviewed-by: Willem de Bruijn Reviewed-by: Jens Axboe Signed-off-by: Paolo Abeni --- net/core/skbuff.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 52986e1ce13e..0ed4d00d258c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1871,7 +1871,6 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, struct msghdr *msg, int len, struct ubuf_info *uarg) { - struct ubuf_info *orig_uarg = skb_zcopy(skb); int err, orig_len = skb->len; if (uarg->ops->link_skb) { @@ -1879,6 +1878,8 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, if (err) return err; } else { + struct ubuf_info *orig_uarg = skb_zcopy(skb); + /* An skb can only point to one uarg. This edge case happens * when TCP appends to an skb, but zerocopy_realloc triggered * a new alloc. -- cgit v1.2.3-58-ga151 From d7f39aee79f04eeaa42085728423501b33ac5be5 Mon Sep 17 00:00:00 2001 From: David Wei Date: Wed, 26 Jun 2024 20:01:59 -0700 Subject: page_pool: export page_pool_disable_direct_recycling() 56ef27e3 unexported page_pool_unlink_napi() and renamed it to page_pool_disable_direct_recycling(). This is because there was no in-tree user of page_pool_unlink_napi(). Since then Rx queue API and an implementation in bnxt got merged. In the bnxt implementation, it broadly follows the following steps: allocate new queue memory + page pool, stop old rx queue, swap, then destroy old queue memory + page pool. The existing NAPI instance is re-used so when the old page pool that is no longer used but still linked to this shared NAPI instance is destroyed, it will trigger warnings. In my initial patches I unlinked a page pool from a NAPI instance directly. Instead, export page_pool_disable_direct_recycling() and call that instead to avoid having a driver touch a core struct. Suggested-by: Jakub Kicinski Signed-off-by: David Wei Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- include/net/page_pool/types.h | 1 + net/core/page_pool.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 7e8477057f3d..9093a964fc33 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -229,6 +229,7 @@ struct page_pool *page_pool_create_percpu(const struct page_pool_params *params, struct xdp_mem_info; #ifdef CONFIG_PAGE_POOL +void page_pool_disable_direct_recycling(struct page_pool *pool); void page_pool_destroy(struct page_pool *pool); void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), const struct xdp_mem_info *mem); diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 3927a0a7fa9a..5f1ed6f2ca8f 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -1014,7 +1014,7 @@ void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), pool->xdp_mem_id = mem->id; } -static void page_pool_disable_direct_recycling(struct page_pool *pool) +void page_pool_disable_direct_recycling(struct page_pool *pool) { /* Disable direct recycling based on pool->cpuid. * Paired with READ_ONCE() in page_pool_napi_local(). @@ -1032,6 +1032,7 @@ static void page_pool_disable_direct_recycling(struct page_pool *pool) WRITE_ONCE(pool->p.napi, NULL); } +EXPORT_SYMBOL(page_pool_disable_direct_recycling); void page_pool_destroy(struct page_pool *pool) { -- cgit v1.2.3-58-ga151 From d839a73179ae91c07f5f2f97ccb9c69b2b7c3306 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 28 Jun 2024 12:18:55 +0200 Subject: net: Optimize xdp_do_flush() with bpf_net_context infos. Every NIC driver utilizing XDP should invoke xdp_do_flush() after processing all packages. With the introduction of the bpf_net_context logic the flush lists (for dev, CPU-map and xsk) are lazy initialized only if used. However xdp_do_flush() tries to flush all three of them so all three lists are always initialized and the likely empty lists are "iterated". Without the usage of XDP but with CONFIG_DEBUG_NET the lists are also initialized due to xdp_do_check_flushed(). Jakub suggest to utilize the hints in bpf_net_context and avoid invoking the flush function. This will also avoiding initializing the lists which are otherwise unused. Introduce bpf_net_ctx_get_all_used_flush_lists() to return the individual list if not-empty. Use the logic in xdp_do_flush() and xdp_do_check_flushed(). Remove the not needed .*_check_flush(). Suggested-by: Jakub Kicinski Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- include/linux/bpf.h | 10 ++++------ include/linux/filter.h | 27 +++++++++++++++++++++++++++ include/net/xdp_sock.h | 14 ++------------ kernel/bpf/cpumap.c | 13 +------------ kernel/bpf/devmap.c | 13 +------------ net/core/filter.c | 33 +++++++++++++++++++++++++-------- net/xdp/xsk.c | 13 +------------ 7 files changed, 61 insertions(+), 62 deletions(-) (limited to 'net') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a834f4b761bc..f5c6bc9093a6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2494,7 +2494,7 @@ struct sk_buff; struct bpf_dtab_netdev; struct bpf_cpu_map_entry; -void __dev_flush(void); +void __dev_flush(struct list_head *flush_list); int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf, struct net_device *dev_rx); int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf, @@ -2507,7 +2507,7 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, struct bpf_prog *xdp_prog, struct bpf_map *map, bool exclude_ingress); -void __cpu_map_flush(void); +void __cpu_map_flush(struct list_head *flush_list); int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf, struct net_device *dev_rx); int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu, @@ -2644,8 +2644,6 @@ void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr); void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr); -bool dev_check_flush(void); -bool cpu_map_check_flush(void); #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -2738,7 +2736,7 @@ static inline struct bpf_token *bpf_token_get_from_fd(u32 ufd) return ERR_PTR(-EOPNOTSUPP); } -static inline void __dev_flush(void) +static inline void __dev_flush(struct list_head *flush_list) { } @@ -2784,7 +2782,7 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, return 0; } -static inline void __cpu_map_flush(void) +static inline void __cpu_map_flush(struct list_head *flush_list) { } diff --git a/include/linux/filter.h b/include/linux/filter.h index c0349522de8f..02ddcfdf94c4 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -829,6 +829,33 @@ static inline struct list_head *bpf_net_ctx_get_xskmap_flush_list(void) return &bpf_net_ctx->xskmap_map_flush_list; } +static inline void bpf_net_ctx_get_all_used_flush_lists(struct list_head **lh_map, + struct list_head **lh_dev, + struct list_head **lh_xsk) +{ + struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get(); + u32 kern_flags = bpf_net_ctx->ri.kern_flags; + struct list_head *lh; + + *lh_map = *lh_dev = *lh_xsk = NULL; + + if (!IS_ENABLED(CONFIG_BPF_SYSCALL)) + return; + + lh = &bpf_net_ctx->dev_map_flush_list; + if (kern_flags & BPF_RI_F_DEV_MAP_INIT && !list_empty(lh)) + *lh_dev = lh; + + lh = &bpf_net_ctx->cpu_map_flush_list; + if (kern_flags & BPF_RI_F_CPU_MAP_INIT && !list_empty(lh)) + *lh_map = lh; + + lh = &bpf_net_ctx->xskmap_map_flush_list; + if (IS_ENABLED(CONFIG_XDP_SOCKETS) && + kern_flags & BPF_RI_F_XSK_MAP_INIT && !list_empty(lh)) + *lh_xsk = lh; +} + /* Compute the linear packet data range [data, data_end) which * will be accessed by various program types (cls_bpf, act_bpf, * lwt, ...). Subsystems allowing direct data access must (!) diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 3d54de168a6d..bfe625b55d55 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -121,7 +121,7 @@ struct xsk_tx_metadata_ops { int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp); int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp); -void __xsk_map_flush(void); +void __xsk_map_flush(struct list_head *flush_list); /** * xsk_tx_metadata_to_compl - Save enough relevant metadata information @@ -206,7 +206,7 @@ static inline int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp) return -EOPNOTSUPP; } -static inline void __xsk_map_flush(void) +static inline void __xsk_map_flush(struct list_head *flush_list) { } @@ -228,14 +228,4 @@ static inline void xsk_tx_metadata_complete(struct xsk_tx_metadata_compl *compl, } #endif /* CONFIG_XDP_SOCKETS */ - -#if defined(CONFIG_XDP_SOCKETS) && defined(CONFIG_DEBUG_NET) -bool xsk_map_check_flush(void); -#else -static inline bool xsk_map_check_flush(void) -{ - return false; -} -#endif - #endif /* _LINUX_XDP_SOCK_H */ diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 068e994ed781..4acf90cd79eb 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -757,9 +757,8 @@ trace: return ret; } -void __cpu_map_flush(void) +void __cpu_map_flush(struct list_head *flush_list) { - struct list_head *flush_list = bpf_net_ctx_get_cpu_map_flush_list(); struct xdp_bulk_queue *bq, *tmp; list_for_each_entry_safe(bq, tmp, flush_list, flush_node) { @@ -769,13 +768,3 @@ void __cpu_map_flush(void) wake_up_process(bq->obj->kthread); } } - -#ifdef CONFIG_DEBUG_NET -bool cpu_map_check_flush(void) -{ - if (list_empty(bpf_net_ctx_get_cpu_map_flush_list())) - return false; - __cpu_map_flush(); - return true; -} -#endif diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 317ac2d66ebd..9ca47eaacdd5 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -412,9 +412,8 @@ out: * driver before returning from its napi->poll() routine. See the comment above * xdp_do_flush() in filter.c. */ -void __dev_flush(void) +void __dev_flush(struct list_head *flush_list) { - struct list_head *flush_list = bpf_net_ctx_get_dev_flush_list(); struct xdp_dev_bulk_queue *bq, *tmp; list_for_each_entry_safe(bq, tmp, flush_list, flush_node) { @@ -425,16 +424,6 @@ void __dev_flush(void) } } -#ifdef CONFIG_DEBUG_NET -bool dev_check_flush(void) -{ - if (list_empty(bpf_net_ctx_get_dev_flush_list())) - return false; - __dev_flush(); - return true; -} -#endif - /* Elements are kept alive by RCU; either by rcu_read_lock() (from syscall) or * by local_bh_disable() (from XDP calls inside NAPI). The * rcu_read_lock_bh_held() below makes lockdep accept both. diff --git a/net/core/filter.c b/net/core/filter.c index eb1c4425c06f..403d23faf22e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4277,22 +4277,39 @@ static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = { */ void xdp_do_flush(void) { - __dev_flush(); - __cpu_map_flush(); - __xsk_map_flush(); + struct list_head *lh_map, *lh_dev, *lh_xsk; + + bpf_net_ctx_get_all_used_flush_lists(&lh_map, &lh_dev, &lh_xsk); + if (lh_dev) + __dev_flush(lh_dev); + if (lh_map) + __cpu_map_flush(lh_map); + if (lh_xsk) + __xsk_map_flush(lh_xsk); } EXPORT_SYMBOL_GPL(xdp_do_flush); #if defined(CONFIG_DEBUG_NET) && defined(CONFIG_BPF_SYSCALL) void xdp_do_check_flushed(struct napi_struct *napi) { - bool ret; + struct list_head *lh_map, *lh_dev, *lh_xsk; + bool missed = false; - ret = dev_check_flush(); - ret |= cpu_map_check_flush(); - ret |= xsk_map_check_flush(); + bpf_net_ctx_get_all_used_flush_lists(&lh_map, &lh_dev, &lh_xsk); + if (lh_dev) { + __dev_flush(lh_dev); + missed = true; + } + if (lh_map) { + __cpu_map_flush(lh_map); + missed = true; + } + if (lh_xsk) { + __xsk_map_flush(lh_xsk); + missed = true; + } - WARN_ONCE(ret, "Missing xdp_do_flush() invocation after NAPI by %ps\n", + WARN_ONCE(missed, "Missing xdp_do_flush() invocation after NAPI by %ps\n", napi->poll); } #endif diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index ed062e038389..de9c0322bc29 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -383,9 +383,8 @@ int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp) return 0; } -void __xsk_map_flush(void) +void __xsk_map_flush(struct list_head *flush_list) { - struct list_head *flush_list = bpf_net_ctx_get_xskmap_flush_list(); struct xdp_sock *xs, *tmp; list_for_each_entry_safe(xs, tmp, flush_list, flush_node) { @@ -394,16 +393,6 @@ void __xsk_map_flush(void) } } -#ifdef CONFIG_DEBUG_NET -bool xsk_map_check_flush(void) -{ - if (list_empty(bpf_net_ctx_get_xskmap_flush_list())) - return false; - __xsk_map_flush(); - return true; -} -#endif - void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries) { xskq_prod_submit_n(pool->cq, nb_entries); -- cgit v1.2.3-58-ga151 From e3d69f585d651aba877e18866de7e8cfa2476caa Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 28 Jun 2024 12:18:56 +0200 Subject: net: Move flush list retrieval to where it is used. The bpf_net_ctx_get_.*_flush_list() are used at the top of the function. This means the variable is always assigned even if unused. By moving the function to where it is used, it is possible to delay the initialisation until it is unavoidable. Not sure how much this gains in reality but by looking at bq_enqueue() (in devmap.c) gcc pushes one register less to the stack. \o/. Move flush list retrieval to where it is used. Signed-off-by: Sebastian Andrzej Siewior Acked-by: Jesper Dangaard Brouer Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- kernel/bpf/cpumap.c | 6 ++++-- kernel/bpf/devmap.c | 3 ++- net/xdp/xsk.c | 6 ++++-- 3 files changed, 10 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 4acf90cd79eb..fbdf5a1aabfe 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -707,7 +707,6 @@ static void bq_flush_to_queue(struct xdp_bulk_queue *bq) */ static void bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf) { - struct list_head *flush_list = bpf_net_ctx_get_cpu_map_flush_list(); struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq); if (unlikely(bq->count == CPU_MAP_BULK_SIZE)) @@ -724,8 +723,11 @@ static void bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf) */ bq->q[bq->count++] = xdpf; - if (!bq->flush_node.prev) + if (!bq->flush_node.prev) { + struct list_head *flush_list = bpf_net_ctx_get_cpu_map_flush_list(); + list_add(&bq->flush_node, flush_list); + } } int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf, diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 9ca47eaacdd5..b18d4a14a0a7 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -448,7 +448,6 @@ static void *__dev_map_lookup_elem(struct bpf_map *map, u32 key) static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf, struct net_device *dev_rx, struct bpf_prog *xdp_prog) { - struct list_head *flush_list = bpf_net_ctx_get_dev_flush_list(); struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq); if (unlikely(bq->count == DEV_MAP_BULK_SIZE)) @@ -462,6 +461,8 @@ static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf, * are only ever modified together. */ if (!bq->dev_rx) { + struct list_head *flush_list = bpf_net_ctx_get_dev_flush_list(); + bq->dev_rx = dev_rx; bq->xdp_prog = xdp_prog; list_add(&bq->flush_node, flush_list); diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index de9c0322bc29..7e16336044b2 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -370,15 +370,17 @@ static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp) { - struct list_head *flush_list = bpf_net_ctx_get_xskmap_flush_list(); int err; err = xsk_rcv(xs, xdp); if (err) return err; - if (!xs->flush_node.prev) + if (!xs->flush_node.prev) { + struct list_head *flush_list = bpf_net_ctx_get_xskmap_flush_list(); + list_add(&xs->flush_node, flush_list); + } return 0; } -- cgit v1.2.3-58-ga151 From 4dec64c52e24c2c9a15f81c115f1be5ea35121cb Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Fri, 28 Jun 2024 00:32:42 +0000 Subject: page_pool: convert to use netmem Abstract the memory type from the page_pool so we can later add support for new memory types. Convert the page_pool to use the new netmem type abstraction, rather than use struct page directly. As of this patch the netmem type is a no-op abstraction: it's always a struct page underneath. All the page pool internals are converted to use struct netmem instead of struct page, and the page pool now exports 2 APIs: 1. The existing struct page API. 2. The new struct netmem API. Keeping the existing API is transitional; we do not want to refactor all the current drivers using the page pool at once. The netmem abstraction is currently a no-op. The page_pool uses page_to_netmem() to convert allocated pages to netmem, and uses netmem_to_page() to convert the netmem back to pages to pass to mm APIs, Follow up patches to this series add non-paged netmem support to the page_pool. This change is factored out on its own to limit the code churn to this 1 patch, for ease of code review. Signed-off-by: Mina Almasry Reviewed-by: Pavel Begunkov Link: https://patch.msgid.link/20240628003253.1694510-6-almasrymina@google.com Signed-off-by: Jakub Kicinski --- include/linux/skbuff_ref.h | 4 +- include/net/netmem.h | 15 ++ include/net/page_pool/helpers.h | 91 +++++++++--- include/net/page_pool/types.h | 14 +- include/trace/events/page_pool.h | 30 ++-- net/bpf/test_run.c | 5 +- net/core/page_pool.c | 304 +++++++++++++++++++++------------------ net/core/skbuff.c | 8 +- 8 files changed, 287 insertions(+), 184 deletions(-) (limited to 'net') diff --git a/include/linux/skbuff_ref.h b/include/linux/skbuff_ref.h index 11f0a4063403..16c241a23472 100644 --- a/include/linux/skbuff_ref.h +++ b/include/linux/skbuff_ref.h @@ -32,13 +32,13 @@ static inline void skb_frag_ref(struct sk_buff *skb, int f) __skb_frag_ref(&skb_shinfo(skb)->frags[f]); } -bool napi_pp_put_page(struct page *page); +bool napi_pp_put_page(netmem_ref netmem); static inline void skb_page_unref(struct page *page, bool recycle) { #ifdef CONFIG_PAGE_POOL - if (recycle && napi_pp_put_page(page)) + if (recycle && napi_pp_put_page(page_to_netmem(page))) return; #endif put_page(page); diff --git a/include/net/netmem.h b/include/net/netmem.h index d8b810245c1d..46cc9b89ac79 100644 --- a/include/net/netmem.h +++ b/include/net/netmem.h @@ -38,4 +38,19 @@ static inline netmem_ref page_to_netmem(struct page *page) return (__force netmem_ref)page; } +static inline int netmem_ref_count(netmem_ref netmem) +{ + return page_ref_count(netmem_to_page(netmem)); +} + +static inline unsigned long netmem_to_pfn(netmem_ref netmem) +{ + return page_to_pfn(netmem_to_page(netmem)); +} + +static inline netmem_ref netmem_compound_head(netmem_ref netmem) +{ + return page_to_netmem(compound_head(netmem_to_page(netmem))); +} + #endif /* _NET_NETMEM_H */ diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index 873631c79ab1..2b43a893c619 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -55,6 +55,8 @@ #include #include +#include +#include #ifdef CONFIG_PAGE_POOL_STATS /* Deprecated driver-facing API, use netlink instead */ @@ -212,6 +214,11 @@ page_pool_get_dma_dir(const struct page_pool *pool) return pool->p.dma_dir; } +static inline void page_pool_fragment_netmem(netmem_ref netmem, long nr) +{ + atomic_long_set(&netmem_to_page(netmem)->pp_ref_count, nr); +} + /** * page_pool_fragment_page() - split a fresh page into fragments * @page: page to split @@ -232,11 +239,12 @@ page_pool_get_dma_dir(const struct page_pool *pool) */ static inline void page_pool_fragment_page(struct page *page, long nr) { - atomic_long_set(&page->pp_ref_count, nr); + page_pool_fragment_netmem(page_to_netmem(page), nr); } -static inline long page_pool_unref_page(struct page *page, long nr) +static inline long page_pool_unref_netmem(netmem_ref netmem, long nr) { + struct page *page = netmem_to_page(netmem); long ret; /* If nr == pp_ref_count then we have cleared all remaining @@ -279,15 +287,41 @@ static inline long page_pool_unref_page(struct page *page, long nr) return ret; } +static inline long page_pool_unref_page(struct page *page, long nr) +{ + return page_pool_unref_netmem(page_to_netmem(page), nr); +} + +static inline void page_pool_ref_netmem(netmem_ref netmem) +{ + atomic_long_inc(&netmem_to_page(netmem)->pp_ref_count); +} + static inline void page_pool_ref_page(struct page *page) { - atomic_long_inc(&page->pp_ref_count); + page_pool_ref_netmem(page_to_netmem(page)); } -static inline bool page_pool_is_last_ref(struct page *page) +static inline bool page_pool_is_last_ref(netmem_ref netmem) { /* If page_pool_unref_page() returns 0, we were the last user */ - return page_pool_unref_page(page, 1) == 0; + return page_pool_unref_netmem(netmem, 1) == 0; +} + +static inline void page_pool_put_netmem(struct page_pool *pool, + netmem_ref netmem, + unsigned int dma_sync_size, + bool allow_direct) +{ + /* When page_pool isn't compiled-in, net/core/xdp.c doesn't + * allow registering MEM_TYPE_PAGE_POOL, but shield linker. + */ +#ifdef CONFIG_PAGE_POOL + if (!page_pool_is_last_ref(netmem)) + return; + + page_pool_put_unrefed_netmem(pool, netmem, dma_sync_size, allow_direct); +#endif } /** @@ -308,15 +342,15 @@ static inline void page_pool_put_page(struct page_pool *pool, unsigned int dma_sync_size, bool allow_direct) { - /* When page_pool isn't compiled-in, net/core/xdp.c doesn't - * allow registering MEM_TYPE_PAGE_POOL, but shield linker. - */ -#ifdef CONFIG_PAGE_POOL - if (!page_pool_is_last_ref(page)) - return; + page_pool_put_netmem(pool, page_to_netmem(page), dma_sync_size, + allow_direct); +} - page_pool_put_unrefed_page(pool, page, dma_sync_size, allow_direct); -#endif +static inline void page_pool_put_full_netmem(struct page_pool *pool, + netmem_ref netmem, + bool allow_direct) +{ + page_pool_put_netmem(pool, netmem, -1, allow_direct); } /** @@ -331,7 +365,7 @@ static inline void page_pool_put_page(struct page_pool *pool, static inline void page_pool_put_full_page(struct page_pool *pool, struct page *page, bool allow_direct) { - page_pool_put_page(pool, page, -1, allow_direct); + page_pool_put_netmem(pool, page_to_netmem(page), -1, allow_direct); } /** @@ -365,6 +399,18 @@ static inline void page_pool_free_va(struct page_pool *pool, void *va, page_pool_put_page(pool, virt_to_head_page(va), -1, allow_direct); } +static inline dma_addr_t page_pool_get_dma_addr_netmem(netmem_ref netmem) +{ + struct page *page = netmem_to_page(netmem); + + dma_addr_t ret = page->dma_addr; + + if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) + ret <<= PAGE_SHIFT; + + return ret; +} + /** * page_pool_get_dma_addr() - Retrieve the stored DMA address. * @page: page allocated from a page pool @@ -374,16 +420,14 @@ static inline void page_pool_free_va(struct page_pool *pool, void *va, */ static inline dma_addr_t page_pool_get_dma_addr(const struct page *page) { - dma_addr_t ret = page->dma_addr; - - if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) - ret <<= PAGE_SHIFT; - - return ret; + return page_pool_get_dma_addr_netmem(page_to_netmem((struct page *)page)); } -static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr) +static inline bool page_pool_set_dma_addr_netmem(netmem_ref netmem, + dma_addr_t addr) { + struct page *page = netmem_to_page(netmem); + if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) { page->dma_addr = addr >> PAGE_SHIFT; @@ -419,6 +463,11 @@ static inline void page_pool_dma_sync_for_cpu(const struct page_pool *pool, page_pool_get_dma_dir(pool)); } +static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr) +{ + return page_pool_set_dma_addr_netmem(page_to_netmem(page), addr); +} + static inline bool page_pool_put(struct page_pool *pool) { return refcount_dec_and_test(&pool->user_cnt); diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 9093a964fc33..b70bcc14ceda 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -6,6 +6,7 @@ #include #include #include +#include #define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA * map/unmap @@ -40,7 +41,7 @@ #define PP_ALLOC_CACHE_REFILL 64 struct pp_alloc_cache { u32 count; - struct page *cache[PP_ALLOC_CACHE_SIZE]; + netmem_ref cache[PP_ALLOC_CACHE_SIZE]; }; /** @@ -73,7 +74,7 @@ struct page_pool_params { struct net_device *netdev; unsigned int flags; /* private: used by test code only */ - void (*init_callback)(struct page *page, void *arg); + void (*init_callback)(netmem_ref netmem, void *arg); void *init_arg; ); }; @@ -151,7 +152,7 @@ struct page_pool { */ __cacheline_group_begin(frag) __aligned(4 * sizeof(long)); long frag_users; - struct page *frag_page; + netmem_ref frag_page; unsigned int frag_offset; __cacheline_group_end(frag); @@ -220,8 +221,12 @@ struct page_pool { }; struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp); +netmem_ref page_pool_alloc_netmem(struct page_pool *pool, gfp_t gfp); struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset, unsigned int size, gfp_t gfp); +netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool, + unsigned int *offset, unsigned int size, + gfp_t gfp); struct page_pool *page_pool_create(const struct page_pool_params *params); struct page_pool *page_pool_create_percpu(const struct page_pool_params *params, int cpuid); @@ -252,6 +257,9 @@ static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data, } #endif +void page_pool_put_unrefed_netmem(struct page_pool *pool, netmem_ref netmem, + unsigned int dma_sync_size, + bool allow_direct); void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page, unsigned int dma_sync_size, bool allow_direct); diff --git a/include/trace/events/page_pool.h b/include/trace/events/page_pool.h index 6834356b2d2a..543e54e432a1 100644 --- a/include/trace/events/page_pool.h +++ b/include/trace/events/page_pool.h @@ -42,51 +42,53 @@ TRACE_EVENT(page_pool_release, TRACE_EVENT(page_pool_state_release, TP_PROTO(const struct page_pool *pool, - const struct page *page, u32 release), + netmem_ref netmem, u32 release), - TP_ARGS(pool, page, release), + TP_ARGS(pool, netmem, release), TP_STRUCT__entry( __field(const struct page_pool *, pool) - __field(const struct page *, page) + __field(unsigned long, netmem) __field(u32, release) __field(unsigned long, pfn) ), TP_fast_assign( __entry->pool = pool; - __entry->page = page; + __entry->netmem = (__force unsigned long)netmem; __entry->release = release; - __entry->pfn = page_to_pfn(page); + __entry->pfn = netmem_to_pfn(netmem); ), - TP_printk("page_pool=%p page=%p pfn=0x%lx release=%u", - __entry->pool, __entry->page, __entry->pfn, __entry->release) + TP_printk("page_pool=%p netmem=%p pfn=0x%lx release=%u", + __entry->pool, (void *)__entry->netmem, + __entry->pfn, __entry->release) ); TRACE_EVENT(page_pool_state_hold, TP_PROTO(const struct page_pool *pool, - const struct page *page, u32 hold), + netmem_ref netmem, u32 hold), - TP_ARGS(pool, page, hold), + TP_ARGS(pool, netmem, hold), TP_STRUCT__entry( __field(const struct page_pool *, pool) - __field(const struct page *, page) + __field(unsigned long, netmem) __field(u32, hold) __field(unsigned long, pfn) ), TP_fast_assign( __entry->pool = pool; - __entry->page = page; + __entry->netmem = (__force unsigned long)netmem; __entry->hold = hold; - __entry->pfn = page_to_pfn(page); + __entry->pfn = netmem_to_pfn(netmem); ), - TP_printk("page_pool=%p page=%p pfn=0x%lx hold=%u", - __entry->pool, __entry->page, __entry->pfn, __entry->hold) + TP_printk("page_pool=%p netmem=%p pfn=0x%lx hold=%u", + __entry->pool, (void *)__entry->netmem, + __entry->pfn, __entry->hold) ); TRACE_EVENT(page_pool_update_nid, diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index a6d7f790cdda..26417ab34ff4 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -127,9 +127,10 @@ struct xdp_test_data { #define TEST_XDP_FRAME_SIZE (PAGE_SIZE - sizeof(struct xdp_page_head)) #define TEST_XDP_MAX_BATCH 256 -static void xdp_test_run_init_page(struct page *page, void *arg) +static void xdp_test_run_init_page(netmem_ref netmem, void *arg) { - struct xdp_page_head *head = phys_to_virt(page_to_phys(page)); + struct xdp_page_head *head = + phys_to_virt(page_to_phys(netmem_to_page(netmem))); struct xdp_buff *new_ctx, *orig_ctx; u32 headroom = XDP_PACKET_HEADROOM; struct xdp_test_data *xdp = arg; diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 5f1ed6f2ca8f..dc52f078adde 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -327,19 +327,18 @@ struct page_pool *page_pool_create(const struct page_pool_params *params) } EXPORT_SYMBOL(page_pool_create); -static void page_pool_return_page(struct page_pool *pool, struct page *page); +static void page_pool_return_page(struct page_pool *pool, netmem_ref netmem); -noinline -static struct page *page_pool_refill_alloc_cache(struct page_pool *pool) +static noinline netmem_ref page_pool_refill_alloc_cache(struct page_pool *pool) { struct ptr_ring *r = &pool->ring; - struct page *page; + netmem_ref netmem; int pref_nid; /* preferred NUMA node */ /* Quicker fallback, avoid locks when ring is empty */ if (__ptr_ring_empty(r)) { alloc_stat_inc(pool, empty); - return NULL; + return 0; } /* Softirq guarantee CPU and thus NUMA node is stable. This, @@ -354,57 +353,57 @@ static struct page *page_pool_refill_alloc_cache(struct page_pool *pool) /* Refill alloc array, but only if NUMA match */ do { - page = __ptr_ring_consume(r); - if (unlikely(!page)) + netmem = (__force netmem_ref)__ptr_ring_consume(r); + if (unlikely(!netmem)) break; - if (likely(page_to_nid(page) == pref_nid)) { - pool->alloc.cache[pool->alloc.count++] = page; + if (likely(page_to_nid(netmem_to_page(netmem)) == pref_nid)) { + pool->alloc.cache[pool->alloc.count++] = netmem; } else { /* NUMA mismatch; * (1) release 1 page to page-allocator and * (2) break out to fallthrough to alloc_pages_node. * This limit stress on page buddy alloactor. */ - page_pool_return_page(pool, page); + page_pool_return_page(pool, netmem); alloc_stat_inc(pool, waive); - page = NULL; + netmem = 0; break; } } while (pool->alloc.count < PP_ALLOC_CACHE_REFILL); /* Return last page */ if (likely(pool->alloc.count > 0)) { - page = pool->alloc.cache[--pool->alloc.count]; + netmem = pool->alloc.cache[--pool->alloc.count]; alloc_stat_inc(pool, refill); } - return page; + return netmem; } /* fast path */ -static struct page *__page_pool_get_cached(struct page_pool *pool) +static netmem_ref __page_pool_get_cached(struct page_pool *pool) { - struct page *page; + netmem_ref netmem; /* Caller MUST guarantee safe non-concurrent access, e.g. softirq */ if (likely(pool->alloc.count)) { /* Fast-path */ - page = pool->alloc.cache[--pool->alloc.count]; + netmem = pool->alloc.cache[--pool->alloc.count]; alloc_stat_inc(pool, fast); } else { - page = page_pool_refill_alloc_cache(pool); + netmem = page_pool_refill_alloc_cache(pool); } - return page; + return netmem; } static void __page_pool_dma_sync_for_device(const struct page_pool *pool, - const struct page *page, + netmem_ref netmem, u32 dma_sync_size) { #if defined(CONFIG_HAS_DMA) && defined(CONFIG_DMA_NEED_SYNC) - dma_addr_t dma_addr = page_pool_get_dma_addr(page); + dma_addr_t dma_addr = page_pool_get_dma_addr_netmem(netmem); dma_sync_size = min(dma_sync_size, pool->p.max_len); __dma_sync_single_for_device(pool->p.dev, dma_addr + pool->p.offset, @@ -414,14 +413,14 @@ static void __page_pool_dma_sync_for_device(const struct page_pool *pool, static __always_inline void page_pool_dma_sync_for_device(const struct page_pool *pool, - const struct page *page, + netmem_ref netmem, u32 dma_sync_size) { if (pool->dma_sync && dma_dev_need_sync(pool->p.dev)) - __page_pool_dma_sync_for_device(pool, page, dma_sync_size); + __page_pool_dma_sync_for_device(pool, netmem, dma_sync_size); } -static bool page_pool_dma_map(struct page_pool *pool, struct page *page) +static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem) { dma_addr_t dma; @@ -430,17 +429,17 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page) * into page private data (i.e 32bit cpu with 64bit DMA caps) * This mapping is kept for lifetime of page, until leaving pool. */ - dma = dma_map_page_attrs(pool->p.dev, page, 0, - (PAGE_SIZE << pool->p.order), - pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC | - DMA_ATTR_WEAK_ORDERING); + dma = dma_map_page_attrs(pool->p.dev, netmem_to_page(netmem), 0, + (PAGE_SIZE << pool->p.order), pool->p.dma_dir, + DMA_ATTR_SKIP_CPU_SYNC | + DMA_ATTR_WEAK_ORDERING); if (dma_mapping_error(pool->p.dev, dma)) return false; - if (page_pool_set_dma_addr(page, dma)) + if (page_pool_set_dma_addr_netmem(netmem, dma)) goto unmap_failed; - page_pool_dma_sync_for_device(pool, page, pool->p.max_len); + page_pool_dma_sync_for_device(pool, netmem, pool->p.max_len); return true; @@ -452,9 +451,10 @@ unmap_failed: return false; } -static void page_pool_set_pp_info(struct page_pool *pool, - struct page *page) +static void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem) { + struct page *page = netmem_to_page(netmem); + page->pp = pool; page->pp_magic |= PP_SIGNATURE; @@ -464,13 +464,15 @@ static void page_pool_set_pp_info(struct page_pool *pool, * is dirtying the same cache line as the page->pp_magic above, so * the overhead is negligible. */ - page_pool_fragment_page(page, 1); + page_pool_fragment_netmem(netmem, 1); if (pool->has_init_callback) - pool->slow.init_callback(page, pool->slow.init_arg); + pool->slow.init_callback(netmem, pool->slow.init_arg); } -static void page_pool_clear_pp_info(struct page *page) +static void page_pool_clear_pp_info(netmem_ref netmem) { + struct page *page = netmem_to_page(netmem); + page->pp_magic = 0; page->pp = NULL; } @@ -485,34 +487,34 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool, if (unlikely(!page)) return NULL; - if (pool->dma_map && unlikely(!page_pool_dma_map(pool, page))) { + if (pool->dma_map && unlikely(!page_pool_dma_map(pool, page_to_netmem(page)))) { put_page(page); return NULL; } alloc_stat_inc(pool, slow_high_order); - page_pool_set_pp_info(pool, page); + page_pool_set_pp_info(pool, page_to_netmem(page)); /* Track how many pages are held 'in-flight' */ pool->pages_state_hold_cnt++; - trace_page_pool_state_hold(pool, page, pool->pages_state_hold_cnt); + trace_page_pool_state_hold(pool, page_to_netmem(page), + pool->pages_state_hold_cnt); return page; } /* slow path */ -noinline -static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool, - gfp_t gfp) +static noinline netmem_ref __page_pool_alloc_pages_slow(struct page_pool *pool, + gfp_t gfp) { const int bulk = PP_ALLOC_CACHE_REFILL; unsigned int pp_order = pool->p.order; bool dma_map = pool->dma_map; - struct page *page; + netmem_ref netmem; int i, nr_pages; /* Don't support bulk alloc for high-order pages */ if (unlikely(pp_order)) - return __page_pool_alloc_page_order(pool, gfp); + return page_to_netmem(__page_pool_alloc_page_order(pool, gfp)); /* Unnecessary as alloc cache is empty, but guarantees zero count */ if (unlikely(pool->alloc.count > 0)) @@ -521,56 +523,63 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool, /* Mark empty alloc.cache slots "empty" for alloc_pages_bulk_array */ memset(&pool->alloc.cache, 0, sizeof(void *) * bulk); - nr_pages = alloc_pages_bulk_array_node(gfp, pool->p.nid, bulk, - pool->alloc.cache); + nr_pages = alloc_pages_bulk_array_node(gfp, + pool->p.nid, bulk, + (struct page **)pool->alloc.cache); if (unlikely(!nr_pages)) - return NULL; + return 0; /* Pages have been filled into alloc.cache array, but count is zero and * page element have not been (possibly) DMA mapped. */ for (i = 0; i < nr_pages; i++) { - page = pool->alloc.cache[i]; - if (dma_map && unlikely(!page_pool_dma_map(pool, page))) { - put_page(page); + netmem = pool->alloc.cache[i]; + if (dma_map && unlikely(!page_pool_dma_map(pool, netmem))) { + put_page(netmem_to_page(netmem)); continue; } - page_pool_set_pp_info(pool, page); - pool->alloc.cache[pool->alloc.count++] = page; + page_pool_set_pp_info(pool, netmem); + pool->alloc.cache[pool->alloc.count++] = netmem; /* Track how many pages are held 'in-flight' */ pool->pages_state_hold_cnt++; - trace_page_pool_state_hold(pool, page, + trace_page_pool_state_hold(pool, netmem, pool->pages_state_hold_cnt); } /* Return last page */ if (likely(pool->alloc.count > 0)) { - page = pool->alloc.cache[--pool->alloc.count]; + netmem = pool->alloc.cache[--pool->alloc.count]; alloc_stat_inc(pool, slow); } else { - page = NULL; + netmem = 0; } /* When page just alloc'ed is should/must have refcnt 1. */ - return page; + return netmem; } /* For using page_pool replace: alloc_pages() API calls, but provide * synchronization guarantee for allocation side. */ -struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp) +netmem_ref page_pool_alloc_netmem(struct page_pool *pool, gfp_t gfp) { - struct page *page; + netmem_ref netmem; /* Fast-path: Get a page from cache */ - page = __page_pool_get_cached(pool); - if (page) - return page; + netmem = __page_pool_get_cached(pool); + if (netmem) + return netmem; /* Slow-path: cache empty, do real allocation */ - page = __page_pool_alloc_pages_slow(pool, gfp); - return page; + netmem = __page_pool_alloc_pages_slow(pool, gfp); + return netmem; +} +EXPORT_SYMBOL(page_pool_alloc_netmem); + +struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp) +{ + return netmem_to_page(page_pool_alloc_netmem(pool, gfp)); } EXPORT_SYMBOL(page_pool_alloc_pages); ALLOW_ERROR_INJECTION(page_pool_alloc_pages, NULL); @@ -599,8 +608,8 @@ s32 page_pool_inflight(const struct page_pool *pool, bool strict) return inflight; } -static __always_inline -void __page_pool_release_page_dma(struct page_pool *pool, struct page *page) +static __always_inline void __page_pool_release_page_dma(struct page_pool *pool, + netmem_ref netmem) { dma_addr_t dma; @@ -610,13 +619,13 @@ void __page_pool_release_page_dma(struct page_pool *pool, struct page *page) */ return; - dma = page_pool_get_dma_addr(page); + dma = page_pool_get_dma_addr_netmem(netmem); /* When page is unmapped, it cannot be returned to our pool */ dma_unmap_page_attrs(pool->p.dev, dma, PAGE_SIZE << pool->p.order, pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); - page_pool_set_dma_addr(page, 0); + page_pool_set_dma_addr_netmem(netmem, 0); } /* Disconnects a page (from a page_pool). API users can have a need @@ -624,35 +633,34 @@ void __page_pool_release_page_dma(struct page_pool *pool, struct page *page) * a regular page (that will eventually be returned to the normal * page-allocator via put_page). */ -void page_pool_return_page(struct page_pool *pool, struct page *page) +void page_pool_return_page(struct page_pool *pool, netmem_ref netmem) { int count; - __page_pool_release_page_dma(pool, page); - - page_pool_clear_pp_info(page); + __page_pool_release_page_dma(pool, netmem); /* This may be the last page returned, releasing the pool, so * it is not safe to reference pool afterwards. */ count = atomic_inc_return_relaxed(&pool->pages_state_release_cnt); - trace_page_pool_state_release(pool, page, count); + trace_page_pool_state_release(pool, netmem, count); - put_page(page); + page_pool_clear_pp_info(netmem); + put_page(netmem_to_page(netmem)); /* An optimization would be to call __free_pages(page, pool->p.order) * knowing page is not part of page-cache (thus avoiding a * __page_cache_release() call). */ } -static bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page) +static bool page_pool_recycle_in_ring(struct page_pool *pool, netmem_ref netmem) { int ret; /* BH protection not needed if current is softirq */ if (in_softirq()) - ret = ptr_ring_produce(&pool->ring, page); + ret = ptr_ring_produce(&pool->ring, (__force void *)netmem); else - ret = ptr_ring_produce_bh(&pool->ring, page); + ret = ptr_ring_produce_bh(&pool->ring, (__force void *)netmem); if (!ret) { recycle_stat_inc(pool, ring); @@ -667,7 +675,7 @@ static bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page) * * Caller must provide appropriate safe context. */ -static bool page_pool_recycle_in_cache(struct page *page, +static bool page_pool_recycle_in_cache(netmem_ref netmem, struct page_pool *pool) { if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE)) { @@ -676,14 +684,15 @@ static bool page_pool_recycle_in_cache(struct page *page, } /* Caller MUST have verified/know (page_ref_count(page) == 1) */ - pool->alloc.cache[pool->alloc.count++] = page; + pool->alloc.cache[pool->alloc.count++] = netmem; recycle_stat_inc(pool, cached); return true; } -static bool __page_pool_page_can_be_recycled(const struct page *page) +static bool __page_pool_page_can_be_recycled(netmem_ref netmem) { - return page_ref_count(page) == 1 && !page_is_pfmemalloc(page); + return page_ref_count(netmem_to_page(netmem)) == 1 && + !page_is_pfmemalloc(netmem_to_page(netmem)); } /* If the page refcnt == 1, this will try to recycle the page. @@ -692,8 +701,8 @@ static bool __page_pool_page_can_be_recycled(const struct page *page) * If the page refcnt != 1, then the page will be returned to memory * subsystem. */ -static __always_inline struct page * -__page_pool_put_page(struct page_pool *pool, struct page *page, +static __always_inline netmem_ref +__page_pool_put_page(struct page_pool *pool, netmem_ref netmem, unsigned int dma_sync_size, bool allow_direct) { lockdep_assert_no_hardirq(); @@ -707,16 +716,16 @@ __page_pool_put_page(struct page_pool *pool, struct page *page, * page is NOT reusable when allocated when system is under * some pressure. (page_is_pfmemalloc) */ - if (likely(__page_pool_page_can_be_recycled(page))) { + if (likely(__page_pool_page_can_be_recycled(netmem))) { /* Read barrier done in page_ref_count / READ_ONCE */ - page_pool_dma_sync_for_device(pool, page, dma_sync_size); + page_pool_dma_sync_for_device(pool, netmem, dma_sync_size); - if (allow_direct && page_pool_recycle_in_cache(page, pool)) - return NULL; + if (allow_direct && page_pool_recycle_in_cache(netmem, pool)) + return 0; /* Page found as candidate for recycling */ - return page; + return netmem; } /* Fallback/non-XDP mode: API user have elevated refcnt. * @@ -732,9 +741,9 @@ __page_pool_put_page(struct page_pool *pool, struct page *page, * will be invoking put_page. */ recycle_stat_inc(pool, released_refcnt); - page_pool_return_page(pool, page); + page_pool_return_page(pool, netmem); - return NULL; + return 0; } static bool page_pool_napi_local(const struct page_pool *pool) @@ -760,19 +769,28 @@ static bool page_pool_napi_local(const struct page_pool *pool) return napi && READ_ONCE(napi->list_owner) == cpuid; } -void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page, - unsigned int dma_sync_size, bool allow_direct) +void page_pool_put_unrefed_netmem(struct page_pool *pool, netmem_ref netmem, + unsigned int dma_sync_size, bool allow_direct) { if (!allow_direct) allow_direct = page_pool_napi_local(pool); - page = __page_pool_put_page(pool, page, dma_sync_size, allow_direct); - if (page && !page_pool_recycle_in_ring(pool, page)) { + netmem = + __page_pool_put_page(pool, netmem, dma_sync_size, allow_direct); + if (netmem && !page_pool_recycle_in_ring(pool, netmem)) { /* Cache full, fallback to free pages */ recycle_stat_inc(pool, ring_full); - page_pool_return_page(pool, page); + page_pool_return_page(pool, netmem); } } +EXPORT_SYMBOL(page_pool_put_unrefed_netmem); + +void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page, + unsigned int dma_sync_size, bool allow_direct) +{ + page_pool_put_unrefed_netmem(pool, page_to_netmem(page), dma_sync_size, + allow_direct); +} EXPORT_SYMBOL(page_pool_put_unrefed_page); /** @@ -800,16 +818,16 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data, allow_direct = page_pool_napi_local(pool); for (i = 0; i < count; i++) { - struct page *page = virt_to_head_page(data[i]); + netmem_ref netmem = page_to_netmem(virt_to_head_page(data[i])); /* It is not the last user for the page frag case */ - if (!page_pool_is_last_ref(page)) + if (!page_pool_is_last_ref(netmem)) continue; - page = __page_pool_put_page(pool, page, -1, allow_direct); + netmem = __page_pool_put_page(pool, netmem, -1, allow_direct); /* Approved for bulk recycling in ptr_ring cache */ - if (page) - data[bulk_len++] = page; + if (netmem) + data[bulk_len++] = (__force void *)netmem; } if (!bulk_len) @@ -835,98 +853,106 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data, * since put_page() with refcnt == 1 can be an expensive operation */ for (; i < bulk_len; i++) - page_pool_return_page(pool, data[i]); + page_pool_return_page(pool, (__force netmem_ref)data[i]); } EXPORT_SYMBOL(page_pool_put_page_bulk); -static struct page *page_pool_drain_frag(struct page_pool *pool, - struct page *page) +static netmem_ref page_pool_drain_frag(struct page_pool *pool, + netmem_ref netmem) { long drain_count = BIAS_MAX - pool->frag_users; /* Some user is still using the page frag */ - if (likely(page_pool_unref_page(page, drain_count))) - return NULL; + if (likely(page_pool_unref_netmem(netmem, drain_count))) + return 0; - if (__page_pool_page_can_be_recycled(page)) { - page_pool_dma_sync_for_device(pool, page, -1); - return page; + if (__page_pool_page_can_be_recycled(netmem)) { + page_pool_dma_sync_for_device(pool, netmem, -1); + return netmem; } - page_pool_return_page(pool, page); - return NULL; + page_pool_return_page(pool, netmem); + return 0; } static void page_pool_free_frag(struct page_pool *pool) { long drain_count = BIAS_MAX - pool->frag_users; - struct page *page = pool->frag_page; + netmem_ref netmem = pool->frag_page; - pool->frag_page = NULL; + pool->frag_page = 0; - if (!page || page_pool_unref_page(page, drain_count)) + if (!netmem || page_pool_unref_netmem(netmem, drain_count)) return; - page_pool_return_page(pool, page); + page_pool_return_page(pool, netmem); } -struct page *page_pool_alloc_frag(struct page_pool *pool, - unsigned int *offset, - unsigned int size, gfp_t gfp) +netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool, + unsigned int *offset, unsigned int size, + gfp_t gfp) { unsigned int max_size = PAGE_SIZE << pool->p.order; - struct page *page = pool->frag_page; + netmem_ref netmem = pool->frag_page; if (WARN_ON(size > max_size)) - return NULL; + return 0; size = ALIGN(size, dma_get_cache_alignment()); *offset = pool->frag_offset; - if (page && *offset + size > max_size) { - page = page_pool_drain_frag(pool, page); - if (page) { + if (netmem && *offset + size > max_size) { + netmem = page_pool_drain_frag(pool, netmem); + if (netmem) { alloc_stat_inc(pool, fast); goto frag_reset; } } - if (!page) { - page = page_pool_alloc_pages(pool, gfp); - if (unlikely(!page)) { - pool->frag_page = NULL; - return NULL; + if (!netmem) { + netmem = page_pool_alloc_netmem(pool, gfp); + if (unlikely(!netmem)) { + pool->frag_page = 0; + return 0; } - pool->frag_page = page; + pool->frag_page = netmem; frag_reset: pool->frag_users = 1; *offset = 0; pool->frag_offset = size; - page_pool_fragment_page(page, BIAS_MAX); - return page; + page_pool_fragment_netmem(netmem, BIAS_MAX); + return netmem; } pool->frag_users++; pool->frag_offset = *offset + size; alloc_stat_inc(pool, fast); - return page; + return netmem; +} +EXPORT_SYMBOL(page_pool_alloc_frag_netmem); + +struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset, + unsigned int size, gfp_t gfp) +{ + return netmem_to_page(page_pool_alloc_frag_netmem(pool, offset, size, + gfp)); } EXPORT_SYMBOL(page_pool_alloc_frag); static void page_pool_empty_ring(struct page_pool *pool) { - struct page *page; + netmem_ref netmem; /* Empty recycle ring */ - while ((page = ptr_ring_consume_bh(&pool->ring))) { + while ((netmem = (__force netmem_ref)ptr_ring_consume_bh(&pool->ring))) { /* Verify the refcnt invariant of cached pages */ - if (!(page_ref_count(page) == 1)) + if (!(page_ref_count(netmem_to_page(netmem)) == 1)) pr_crit("%s() page_pool refcnt %d violation\n", - __func__, page_ref_count(page)); + __func__, netmem_ref_count(netmem)); - page_pool_return_page(pool, page); + page_pool_return_page(pool, netmem); } } @@ -942,7 +968,7 @@ static void __page_pool_destroy(struct page_pool *pool) static void page_pool_empty_alloc_cache_once(struct page_pool *pool) { - struct page *page; + netmem_ref netmem; if (pool->destroy_cnt) return; @@ -952,8 +978,8 @@ static void page_pool_empty_alloc_cache_once(struct page_pool *pool) * call concurrently. */ while (pool->alloc.count) { - page = pool->alloc.cache[--pool->alloc.count]; - page_pool_return_page(pool, page); + netmem = pool->alloc.cache[--pool->alloc.count]; + page_pool_return_page(pool, netmem); } } @@ -1060,15 +1086,15 @@ EXPORT_SYMBOL(page_pool_destroy); /* Caller must provide appropriate safe context, e.g. NAPI. */ void page_pool_update_nid(struct page_pool *pool, int new_nid) { - struct page *page; + netmem_ref netmem; trace_page_pool_update_nid(pool, new_nid); pool->p.nid = new_nid; /* Flush pool alloc cache, as refill will check NUMA node */ while (pool->alloc.count) { - page = pool->alloc.cache[--pool->alloc.count]; - page_pool_return_page(pool, page); + netmem = pool->alloc.cache[--pool->alloc.count]; + page_pool_return_page(pool, netmem); } } EXPORT_SYMBOL(page_pool_update_nid); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 0ed4d00d258c..83f8cd8aa2d1 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1015,8 +1015,10 @@ int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb, EXPORT_SYMBOL(skb_cow_data_for_xdp); #if IS_ENABLED(CONFIG_PAGE_POOL) -bool napi_pp_put_page(struct page *page) +bool napi_pp_put_page(netmem_ref netmem) { + struct page *page = netmem_to_page(netmem); + page = compound_head(page); /* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation @@ -1029,7 +1031,7 @@ bool napi_pp_put_page(struct page *page) if (unlikely(!is_pp_page(page))) return false; - page_pool_put_full_page(page->pp, page, false); + page_pool_put_full_netmem(page->pp, page_to_netmem(page), false); return true; } @@ -1040,7 +1042,7 @@ static bool skb_pp_recycle(struct sk_buff *skb, void *data) { if (!IS_ENABLED(CONFIG_PAGE_POOL) || !skb->pp_recycle) return false; - return napi_pp_put_page(virt_to_page(data)); + return napi_pp_put_page(page_to_netmem(virt_to_page(data))); } /** -- cgit v1.2.3-58-ga151 From cda91d5b911a5a168a1c6e6917afda43b0e458c8 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 1 Jul 2024 13:48:49 -0400 Subject: sctp: cancel a blocking accept when shutdown a listen socket As David Laight noticed, "In a multithreaded program it is reasonable to have a thread blocked in accept(). With TCP a subsequent shutdown(listen_fd, SHUT_RDWR) causes the accept to fail. But nothing happens for SCTP." sctp_disconnect() is eventually called when shutdown a listen socket, but nothing is done in this function. This patch sets RCV_SHUTDOWN flag in sk->sk_shutdown there, and adds the check (sk->sk_shutdown & RCV_SHUTDOWN) to break and return in sctp_accept(). Note that shutdown() is only supported on TCP-style SCTP socket. Reported-by: David Laight Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index c009383369b2..32f76f1298da 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4834,10 +4834,14 @@ int sctp_inet_connect(struct socket *sock, struct sockaddr *uaddr, return sctp_connect(sock->sk, uaddr, addr_len, flags); } -/* FIXME: Write comments. */ +/* Only called when shutdown a listening SCTP socket. */ static int sctp_disconnect(struct sock *sk, int flags) { - return -EOPNOTSUPP; /* STUB */ + if (!sctp_style(sk, TCP)) + return -EOPNOTSUPP; + + sk->sk_shutdown |= RCV_SHUTDOWN; + return 0; } /* 4.1.4 accept() - TCP Style Syntax @@ -4866,7 +4870,8 @@ static struct sock *sctp_accept(struct sock *sk, struct proto_accept_arg *arg) goto out; } - if (!sctp_sstate(sk, LISTENING)) { + if (!sctp_sstate(sk, LISTENING) || + (sk->sk_shutdown & RCV_SHUTDOWN)) { error = -EINVAL; goto out; } @@ -9393,7 +9398,8 @@ static int sctp_wait_for_accept(struct sock *sk, long timeo) } err = -EINVAL; - if (!sctp_sstate(sk, LISTENING)) + if (!sctp_sstate(sk, LISTENING) || + (sk->sk_shutdown & RCV_SHUTDOWN)) break; err = 0; -- cgit v1.2.3-58-ga151 From 1a16cdf77e0d7de0fb640e65d65c0898b38c1b4b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 2 Jul 2024 09:41:57 -0700 Subject: net: ethtool: fix compat with old RSS context API Device driver gets access to rxfh_dev, while rxfh is just a local copy of user space params. We need to check what RSS context ID driver assigned in rxfh_dev, not rxfh. Using rxfh leads to trying to store all contexts at index 0xffffffff. From the user perspective it leads to "driver chose duplicate ID" warnings when second context is added and inability to access any contexts even tho they were successfully created - xa_load() for the actual context ID will return NULL, and syscall will return -ENOENT. Looks like a rebasing mistake, since rxfh_dev was added relatively recently by commit fb6e30a72539 ("net: ethtool: pass a pointer to parameters to get/set_rxfh ethtool ops"). Fixes: eac9122f0c41 ("net: ethtool: record custom RSS contexts in the XArray") Reviewed-by: Edward Cree Link: https://patch.msgid.link/20240702164157.4018425-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ethtool/ioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index d8795ed07ba3..46f0497ae6bc 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1483,13 +1483,13 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, /* Update rss_ctx tracking */ if (create && !ops->create_rxfh_context) { /* driver uses old API, it chose context ID */ - if (WARN_ON(xa_load(&dev->ethtool->rss_ctx, rxfh.rss_context))) { + if (WARN_ON(xa_load(&dev->ethtool->rss_ctx, rxfh_dev.rss_context))) { /* context ID reused, our tracking is screwed */ kfree(ctx); goto out; } /* Allocate the exact ID the driver gave us */ - if (xa_is_err(xa_store(&dev->ethtool->rss_ctx, rxfh.rss_context, + if (xa_is_err(xa_store(&dev->ethtool->rss_ctx, rxfh_dev.rss_context, ctx, GFP_KERNEL))) { kfree(ctx); goto out; -- cgit v1.2.3-58-ga151 From 47c130130de2fd68d9e4f591b0ea25975bdad68a Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Wed, 3 Jul 2024 08:11:48 +0200 Subject: l2tp: Remove duplicate included header file trace.h Remove duplicate included header file trace.h and the following warning reported by make includecheck: trace.h is included more than once Compile-tested only. Signed-off-by: Thorsten Blum Reviewed-by: Michal Kubiak Link: https://patch.msgid.link/20240703061147.691973-2-thorsten.blum@toblux.com Signed-off-by: Paolo Abeni --- net/l2tp/l2tp_core.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 64f446f0930b..a99032076e04 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -60,7 +60,6 @@ #include #include "l2tp_core.h" -#include "trace.h" #define CREATE_TRACE_POINTS #include "trace.h" -- cgit v1.2.3-58-ga151 From caa93b7c25945d302689de07bd404655db93ae6e Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Wed, 3 Jul 2024 13:18:49 +0100 Subject: ethtool: move firmware flashing flag to struct ethtool_netdev_state Commit 31e0aa99dc02 ("ethtool: Veto some operations during firmware flashing process") added a flag module_fw_flash_in_progress to struct net_device. As this is ethtool related state, move it to the recently created struct ethtool_netdev_state, accessed via the 'ethtool' member of struct net_device. Suggested-by: Jakub Kicinski Signed-off-by: Edward Cree Reviewed-by: Michal Kubiak Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20240703121849.652893-1-edward.cree@amd.com Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 2 ++ include/linux/netdevice.h | 3 --- net/ethtool/eeprom.c | 2 +- net/ethtool/ioctl.c | 8 ++++---- net/ethtool/module.c | 10 +++++----- net/ethtool/netlink.c | 2 +- 6 files changed, 13 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index f74bb0cf8ed1..3a99238ef895 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -1107,11 +1107,13 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev, * @rss_lock: Protects entries in @rss_ctx. May be taken from * within RTNL. * @wol_enabled: Wake-on-LAN is enabled + * @module_fw_flash_in_progress: Module firmware flashing is in progress. */ struct ethtool_netdev_state { struct xarray rss_ctx; struct mutex rss_lock; unsigned wol_enabled:1; + unsigned module_fw_flash_in_progress:1; }; struct phy_device; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3c719f0d5f5a..93558645c6d0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1989,8 +1989,6 @@ enum netdev_reg_state { * * @threaded: napi threaded mode is enabled * - * @module_fw_flash_in_progress: Module firmware flashing is in progress. - * * @net_notifier_list: List of per-net netdev notifier block * that follow this device when it is moved * to another network namespace. @@ -2376,7 +2374,6 @@ struct net_device { bool proto_down; bool threaded; - unsigned module_fw_flash_in_progress:1; struct list_head net_notifier_list; #if IS_ENABLED(CONFIG_MACSEC) diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c index f36811b3ecf1..3b8209e930fd 100644 --- a/net/ethtool/eeprom.c +++ b/net/ethtool/eeprom.c @@ -91,7 +91,7 @@ static int get_module_eeprom_by_page(struct net_device *dev, { const struct ethtool_ops *ops = dev->ethtool_ops; - if (dev->module_fw_flash_in_progress) { + if (dev->ethtool->module_fw_flash_in_progress) { NL_SET_ERR_MSG(extack, "Module firmware flashing is in progress"); return -EBUSY; diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 46f0497ae6bc..d72b0fec89af 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -658,7 +658,7 @@ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) if (!dev->ethtool_ops->get_link_ksettings) return -EOPNOTSUPP; - if (dev->module_fw_flash_in_progress) + if (dev->ethtool->module_fw_flash_in_progress) return -EBUSY; memset(&link_ksettings, 0, sizeof(link_ksettings)); @@ -1572,7 +1572,7 @@ static int ethtool_reset(struct net_device *dev, char __user *useraddr) if (!dev->ethtool_ops->reset) return -EOPNOTSUPP; - if (dev->module_fw_flash_in_progress) + if (dev->ethtool->module_fw_flash_in_progress) return -EBUSY; if (copy_from_user(&reset, useraddr, sizeof(reset))) @@ -2588,7 +2588,7 @@ int ethtool_get_module_info_call(struct net_device *dev, const struct ethtool_ops *ops = dev->ethtool_ops; struct phy_device *phydev = dev->phydev; - if (dev->module_fw_flash_in_progress) + if (dev->ethtool->module_fw_flash_in_progress) return -EBUSY; if (dev->sfp_bus) @@ -2628,7 +2628,7 @@ int ethtool_get_module_eeprom_call(struct net_device *dev, const struct ethtool_ops *ops = dev->ethtool_ops; struct phy_device *phydev = dev->phydev; - if (dev->module_fw_flash_in_progress) + if (dev->ethtool->module_fw_flash_in_progress) return -EBUSY; if (dev->sfp_bus) diff --git a/net/ethtool/module.c b/net/ethtool/module.c index 6b7448df08d5..aba78436d350 100644 --- a/net/ethtool/module.c +++ b/net/ethtool/module.c @@ -37,7 +37,7 @@ static int module_get_power_mode(struct net_device *dev, if (!ops->get_module_power_mode) return 0; - if (dev->module_fw_flash_in_progress) { + if (dev->ethtool->module_fw_flash_in_progress) { NL_SET_ERR_MSG(extack, "Module firmware flashing is in progress"); return -EBUSY; @@ -119,7 +119,7 @@ ethnl_set_module_validate(struct ethnl_req_info *req_info, if (!tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY]) return 0; - if (req_info->dev->module_fw_flash_in_progress) { + if (req_info->dev->ethtool->module_fw_flash_in_progress) { NL_SET_ERR_MSG(info->extack, "Module firmware flashing is in progress"); return -EBUSY; @@ -226,7 +226,7 @@ static void module_flash_fw_work(struct work_struct *work) ethtool_cmis_fw_update(&module_fw->fw_update); module_flash_fw_work_list_del(&module_fw->list); - module_fw->fw_update.dev->module_fw_flash_in_progress = false; + module_fw->fw_update.dev->ethtool->module_fw_flash_in_progress = false; netdev_put(module_fw->fw_update.dev, &module_fw->dev_tracker); release_firmware(module_fw->fw_update.fw); kfree(module_fw); @@ -318,7 +318,7 @@ module_flash_fw_schedule(struct net_device *dev, const char *file_name, if (err < 0) goto err_release_firmware; - dev->module_fw_flash_in_progress = true; + dev->ethtool->module_fw_flash_in_progress = true; netdev_hold(dev, &module_fw->dev_tracker, GFP_KERNEL); fw_update->dev = dev; fw_update->ntf_params.portid = info->snd_portid; @@ -385,7 +385,7 @@ static int ethnl_module_fw_flash_validate(struct net_device *dev, return -EOPNOTSUPP; } - if (dev->module_fw_flash_in_progress) { + if (dev->ethtool->module_fw_flash_in_progress) { NL_SET_ERR_MSG(extack, "Module firmware flashing already in progress"); return -EBUSY; } diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 81fe2e5b95f6..cb1eea00e349 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -807,7 +807,7 @@ static int ethnl_netdev_event(struct notifier_block *this, unsigned long event, ethnl_notify_features(ptr); break; case NETDEV_PRE_UP: - if (dev->module_fw_flash_in_progress) { + if (dev->ethtool->module_fw_flash_in_progress) { NL_SET_ERR_MSG(extack, "Can't set port up while flashing module firmware"); return NOTIFY_BAD; } -- cgit v1.2.3-58-ga151 From c7f79f2620b7776586c626edf21eb6ed6ed3d1eb Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 3 Jul 2024 12:46:34 +0200 Subject: openvswitch: prepare for stolen verdict coming from conntrack and nat engine At this time, conntrack either returns NF_ACCEPT or NF_DROP. To improve debuging it would be nice to be able to replace NF_DROP verdict with NF_DROP_REASON() helper, This helper releases the skb instantly (so drop_monitor can pinpoint precise location) and returns NF_STOLEN. Prepare call sites to deal with this before introducing such changes in conntrack and nat core. Signed-off-by: Florian Westphal Reviewed-by: Aaron Conole Signed-off-by: David S. Miller --- net/openvswitch/conntrack.c | 47 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 3b980bf2770b..8eb1d644b741 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -679,6 +679,8 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, action |= BIT(NF_NAT_MANIP_DST); err = nf_ct_nat(skb, ct, ctinfo, &action, &info->range, info->commit); + if (err != NF_ACCEPT) + return err; if (action & BIT(NF_NAT_MANIP_SRC)) ovs_nat_update_key(key, skb, NF_NAT_MANIP_SRC); @@ -697,6 +699,22 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, } #endif +static int verdict_to_errno(unsigned int verdict) +{ + switch (verdict & NF_VERDICT_MASK) { + case NF_ACCEPT: + return 0; + case NF_DROP: + return -EINVAL; + case NF_STOLEN: + return -EINPROGRESS; + default: + break; + } + + return -EINVAL; +} + /* Pass 'skb' through conntrack in 'net', using zone configured in 'info', if * not done already. Update key with new CT state after passing the packet * through conntrack. @@ -735,7 +753,7 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, err = nf_conntrack_in(skb, &state); if (err != NF_ACCEPT) - return -ENOENT; + return verdict_to_errno(err); /* Clear CT state NAT flags to mark that we have not yet done * NAT after the nf_conntrack_in() call. We can actually clear @@ -762,9 +780,12 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, * the key->ct_state. */ if (info->nat && !(key->ct_state & OVS_CS_F_NAT_MASK) && - (nf_ct_is_confirmed(ct) || info->commit) && - ovs_ct_nat(net, key, info, skb, ct, ctinfo) != NF_ACCEPT) { - return -EINVAL; + (nf_ct_is_confirmed(ct) || info->commit)) { + int err = ovs_ct_nat(net, key, info, skb, ct, ctinfo); + + err = verdict_to_errno(err); + if (err) + return err; } /* Userspace may decide to perform a ct lookup without a helper @@ -795,9 +816,12 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, * - When committing an unconfirmed connection. */ if ((nf_ct_is_confirmed(ct) ? !cached || add_helper : - info->commit) && - nf_ct_helper(skb, ct, ctinfo, info->family) != NF_ACCEPT) { - return -EINVAL; + info->commit)) { + int err = nf_ct_helper(skb, ct, ctinfo, info->family); + + err = verdict_to_errno(err); + if (err) + return err; } if (nf_ct_protonum(ct) == IPPROTO_TCP && @@ -1001,10 +1025,9 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key, /* This will take care of sending queued events even if the connection * is already confirmed. */ - if (nf_conntrack_confirm(skb) != NF_ACCEPT) - return -EINVAL; + err = nf_conntrack_confirm(skb); - return 0; + return verdict_to_errno(err); } /* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero @@ -1039,6 +1062,10 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb, else err = ovs_ct_lookup(net, key, info, skb); + /* conntrack core returned NF_STOLEN */ + if (err == -EINPROGRESS) + return err; + skb_push_rcsum(skb, nh_ofs); if (err) ovs_kfree_skb_reason(skb, OVS_DROP_CONNTRACK); -- cgit v1.2.3-58-ga151 From fecef4cd42c689a200bdd39e6fffa71475904bc1 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 4 Jul 2024 16:48:15 +0200 Subject: tun: Assign missing bpf_net_context. During the introduction of struct bpf_net_context handling for XDP-redirect, the tun driver has been missed. Jakub also pointed out that there is another call chain to do_xdp_generic() originating from netif_receive_skb() and drivers may use it outside from the NAPI context. Set the bpf_net_context before invoking BPF XDP program within the TUN driver. Set the bpf_net_context also in do_xdp_generic() if a xdp program is available. Reported-by: syzbot+0b5c75599f1d872bea6f@syzkaller.appspotmail.com Reported-by: syzbot+5ae46b237278e2369cac@syzkaller.appspotmail.com Reported-by: syzbot+c1e04a422bbc0f0f2921@syzkaller.appspotmail.com Fixes: 401cb7dae8130 ("net: Reference bpf_redirect_info via task_struct on PREEMPT_RT.") Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20240704144815.j8xQda5r@linutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/tun.c | 7 +++++++ net/core/dev.c | 5 +++++ 2 files changed, 12 insertions(+) (limited to 'net') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 9254bca2813d..9b24861464bc 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1661,6 +1661,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, int len, int *skb_xdp) { struct page_frag *alloc_frag = ¤t->task_frag; + struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; struct bpf_prog *xdp_prog; int buflen = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); char *buf; @@ -1700,6 +1701,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, local_bh_disable(); rcu_read_lock(); + bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); xdp_prog = rcu_dereference(tun->xdp_prog); if (xdp_prog) { struct xdp_buff xdp; @@ -1728,12 +1730,14 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, pad = xdp.data - xdp.data_hard_start; len = xdp.data_end - xdp.data; } + bpf_net_ctx_clear(bpf_net_ctx); rcu_read_unlock(); local_bh_enable(); return __tun_build_skb(tfile, alloc_frag, buf, buflen, len, pad); out: + bpf_net_ctx_clear(bpf_net_ctx); rcu_read_unlock(); local_bh_enable(); return NULL; @@ -2566,6 +2570,7 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) if (m->msg_controllen == sizeof(struct tun_msg_ctl) && ctl && ctl->type == TUN_MSG_PTR) { + struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; struct tun_page tpage; int n = ctl->num; int flush = 0, queued = 0; @@ -2574,6 +2579,7 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) local_bh_disable(); rcu_read_lock(); + bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); for (i = 0; i < n; i++) { xdp = &((struct xdp_buff *)ctl->ptr)[i]; @@ -2588,6 +2594,7 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) if (tfile->napi_enabled && queued > 0) napi_schedule(&tfile->napi); + bpf_net_ctx_clear(bpf_net_ctx); rcu_read_unlock(); local_bh_enable(); diff --git a/net/core/dev.c b/net/core/dev.c index 385c4091aa77..73e5af6943c3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5126,11 +5126,14 @@ static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key); int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb) { + struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; + if (xdp_prog) { struct xdp_buff xdp; u32 act; int err; + bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); act = netif_receive_generic_xdp(pskb, &xdp, xdp_prog); if (act != XDP_PASS) { switch (act) { @@ -5144,11 +5147,13 @@ int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb) generic_xdp_tx(*pskb, xdp_prog); break; } + bpf_net_ctx_clear(bpf_net_ctx); return XDP_DROP; } } return XDP_PASS; out_redir: + bpf_net_ctx_clear(bpf_net_ctx); kfree_skb_reason(*pskb, SKB_DROP_REASON_XDP); return XDP_DROP; } -- cgit v1.2.3-58-ga151 From 093b0f366567aa3fed85c316f832607069202b23 Mon Sep 17 00:00:00 2001 From: Adrian Moreno Date: Thu, 4 Jul 2024 10:56:52 +0200 Subject: net: psample: add user cookie Add a user cookie to the sample metadata so that sample emitters can provide more contextual information to samples. If present, send the user cookie in a new attribute: PSAMPLE_ATTR_USER_COOKIE. Reviewed-by: Michal Kubiak Acked-by: Eelco Chaudron Reviewed-by: Simon Horman Reviewed-by: Ido Schimmel Signed-off-by: Adrian Moreno Link: https://patch.msgid.link/20240704085710.353845-2-amorenoz@redhat.com Signed-off-by: Jakub Kicinski --- include/net/psample.h | 2 ++ include/uapi/linux/psample.h | 1 + net/psample/psample.c | 9 ++++++++- 3 files changed, 11 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/net/psample.h b/include/net/psample.h index 0509d2d6be67..2ac71260a546 100644 --- a/include/net/psample.h +++ b/include/net/psample.h @@ -25,6 +25,8 @@ struct psample_metadata { out_tc_occ_valid:1, latency_valid:1, unused:5; + const u8 *user_cookie; + u32 user_cookie_len; }; struct psample_group *psample_group_get(struct net *net, u32 group_num); diff --git a/include/uapi/linux/psample.h b/include/uapi/linux/psample.h index e585db5bf2d2..e80637e1d97b 100644 --- a/include/uapi/linux/psample.h +++ b/include/uapi/linux/psample.h @@ -19,6 +19,7 @@ enum { PSAMPLE_ATTR_LATENCY, /* u64, nanoseconds */ PSAMPLE_ATTR_TIMESTAMP, /* u64, nanoseconds */ PSAMPLE_ATTR_PROTO, /* u16 */ + PSAMPLE_ATTR_USER_COOKIE, /* binary, user provided data */ __PSAMPLE_ATTR_MAX }; diff --git a/net/psample/psample.c b/net/psample/psample.c index a5d9b8446f77..b37488f426bc 100644 --- a/net/psample/psample.c +++ b/net/psample/psample.c @@ -386,7 +386,9 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, nla_total_size(sizeof(u32)) + /* group_num */ nla_total_size(sizeof(u32)) + /* seq */ nla_total_size_64bit(sizeof(u64)) + /* timestamp */ - nla_total_size(sizeof(u16)); /* protocol */ + nla_total_size(sizeof(u16)) + /* protocol */ + (md->user_cookie_len ? + nla_total_size(md->user_cookie_len) : 0); /* user cookie */ #ifdef CONFIG_INET tun_info = skb_tunnel_info(skb); @@ -486,6 +488,11 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, } #endif + if (md->user_cookie && md->user_cookie_len && + nla_put(nl_skb, PSAMPLE_ATTR_USER_COOKIE, md->user_cookie_len, + md->user_cookie)) + goto error; + genlmsg_end(nl_skb, data); genlmsg_multicast_netns(&psample_nl_family, group->net, nl_skb, 0, PSAMPLE_NL_MCGRP_SAMPLE, GFP_ATOMIC); -- cgit v1.2.3-58-ga151 From 03448444ae5c6e1de1ae7ff9fa19dc320eeb2d49 Mon Sep 17 00:00:00 2001 From: Adrian Moreno Date: Thu, 4 Jul 2024 10:56:53 +0200 Subject: net: sched: act_sample: add action cookie to sample If the action has a user_cookie, pass it along to the sample so it can be easily identified. Reviewed-by: Michal Kubiak Reviewed-by: Aaron Conole Acked-by: Eelco Chaudron Reviewed-by: Ido Schimmel Signed-off-by: Adrian Moreno Link: https://patch.msgid.link/20240704085710.353845-3-amorenoz@redhat.com Signed-off-by: Jakub Kicinski --- net/sched/act_sample.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'net') diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index a69b53d54039..2ceb4d141b71 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -167,7 +167,9 @@ TC_INDIRECT_SCOPE int tcf_sample_act(struct sk_buff *skb, { struct tcf_sample *s = to_sample(a); struct psample_group *psample_group; + u8 cookie_data[TC_COOKIE_MAX_SIZE]; struct psample_metadata md = {}; + struct tc_cookie *user_cookie; int retval; tcf_lastuse_update(&s->tcf_tm); @@ -189,6 +191,16 @@ TC_INDIRECT_SCOPE int tcf_sample_act(struct sk_buff *skb, if (skb_at_tc_ingress(skb) && tcf_sample_dev_ok_push(skb->dev)) skb_push(skb, skb->mac_len); + rcu_read_lock(); + user_cookie = rcu_dereference(a->user_cookie); + if (user_cookie) { + memcpy(cookie_data, user_cookie->data, + user_cookie->len); + md.user_cookie = cookie_data; + md.user_cookie_len = user_cookie->len; + } + rcu_read_unlock(); + md.trunc_size = s->truncate ? s->trunc_size : skb->len; psample_sample_packet(psample_group, skb, s->rate, &md); -- cgit v1.2.3-58-ga151 From c35d86a23029f1186e3c7a65df7c38b762fb0434 Mon Sep 17 00:00:00 2001 From: Adrian Moreno Date: Thu, 4 Jul 2024 10:56:54 +0200 Subject: net: psample: skip packet copy if no listeners If nobody is listening on the multicast group, generating the sample, which involves copying packet data, seems completely unnecessary. Return fast in this case. Reviewed-by: Aaron Conole Acked-by: Eelco Chaudron Reviewed-by: Ido Schimmel Reviewed-by: Simon Horman Signed-off-by: Adrian Moreno Link: https://patch.msgid.link/20240704085710.353845-4-amorenoz@redhat.com Signed-off-by: Jakub Kicinski --- net/psample/psample.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/psample/psample.c b/net/psample/psample.c index b37488f426bc..1c76f3e48dcd 100644 --- a/net/psample/psample.c +++ b/net/psample/psample.c @@ -376,6 +376,10 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, void *data; int ret; + if (!genl_has_listeners(&psample_nl_family, group->net, + PSAMPLE_NL_MCGRP_SAMPLE)) + return; + meta_len = (in_ifindex ? nla_total_size(sizeof(u16)) : 0) + (out_ifindex ? nla_total_size(sizeof(u16)) : 0) + (md->out_tc_valid ? nla_total_size(sizeof(u16)) : 0) + -- cgit v1.2.3-58-ga151 From 7b1b2b60c63f070e0dfbe072ccaae13168b38d01 Mon Sep 17 00:00:00 2001 From: Adrian Moreno Date: Thu, 4 Jul 2024 10:56:55 +0200 Subject: net: psample: allow using rate as probability Although not explicitly documented in the psample module itself, the definition of PSAMPLE_ATTR_SAMPLE_RATE seems inherited from act_sample. Quoting tc-sample(8): "RATE of 100 will lead to an average of one sampled packet out of every 100 observed." With this semantics, the rates that we can express with an unsigned 32-bits number are very unevenly distributed and concentrated towards "sampling few packets". For example, we can express a probability of 2.32E-8% but we cannot express anything between 100% and 50%. For sampling applications that are capable of sampling a decent amount of packets, this sampling rate semantics is not very useful. Add a new flag to the uAPI that indicates that the sampling rate is expressed in scaled probability, this is: - 0 is 0% probability, no packets get sampled. - U32_MAX is 100% probability, all packets get sampled. Reviewed-by: Aaron Conole Acked-by: Eelco Chaudron Reviewed-by: Ido Schimmel Signed-off-by: Adrian Moreno Link: https://patch.msgid.link/20240704085710.353845-5-amorenoz@redhat.com Signed-off-by: Jakub Kicinski --- include/net/psample.h | 3 ++- include/uapi/linux/psample.h | 10 +++++++++- net/psample/psample.c | 3 +++ 3 files changed, 14 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/net/psample.h b/include/net/psample.h index 2ac71260a546..c52e9ebd88dd 100644 --- a/include/net/psample.h +++ b/include/net/psample.h @@ -24,7 +24,8 @@ struct psample_metadata { u8 out_tc_valid:1, out_tc_occ_valid:1, latency_valid:1, - unused:5; + rate_as_probability:1, + unused:4; const u8 *user_cookie; u32 user_cookie_len; }; diff --git a/include/uapi/linux/psample.h b/include/uapi/linux/psample.h index e80637e1d97b..b765f0e81f20 100644 --- a/include/uapi/linux/psample.h +++ b/include/uapi/linux/psample.h @@ -8,7 +8,11 @@ enum { PSAMPLE_ATTR_ORIGSIZE, PSAMPLE_ATTR_SAMPLE_GROUP, PSAMPLE_ATTR_GROUP_SEQ, - PSAMPLE_ATTR_SAMPLE_RATE, + PSAMPLE_ATTR_SAMPLE_RATE, /* u32, ratio between observed and + * sampled packets or scaled probability + * if PSAMPLE_ATTR_SAMPLE_PROBABILITY + * is set. + */ PSAMPLE_ATTR_DATA, PSAMPLE_ATTR_GROUP_REFCOUNT, PSAMPLE_ATTR_TUNNEL, @@ -20,6 +24,10 @@ enum { PSAMPLE_ATTR_TIMESTAMP, /* u64, nanoseconds */ PSAMPLE_ATTR_PROTO, /* u16 */ PSAMPLE_ATTR_USER_COOKIE, /* binary, user provided data */ + PSAMPLE_ATTR_SAMPLE_PROBABILITY,/* no argument, interpret rate in + * PSAMPLE_ATTR_SAMPLE_RATE as a + * probability scaled 0 - U32_MAX. + */ __PSAMPLE_ATTR_MAX }; diff --git a/net/psample/psample.c b/net/psample/psample.c index 1c76f3e48dcd..f48b5b9cd409 100644 --- a/net/psample/psample.c +++ b/net/psample/psample.c @@ -497,6 +497,9 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, md->user_cookie)) goto error; + if (md->rate_as_probability) + nla_put_flag(skb, PSAMPLE_ATTR_SAMPLE_PROBABILITY); + genlmsg_end(nl_skb, data); genlmsg_multicast_netns(&psample_nl_family, group->net, nl_skb, 0, PSAMPLE_NL_MCGRP_SAMPLE, GFP_ATOMIC); -- cgit v1.2.3-58-ga151 From aae0b82b46cb5004bdf82a000c004d69a0885c33 Mon Sep 17 00:00:00 2001 From: Adrian Moreno Date: Thu, 4 Jul 2024 10:56:56 +0200 Subject: net: openvswitch: add psample action Add support for a new action: psample. This action accepts a u32 group id and a variable-length cookie and uses the psample multicast group to make the packet available for observability. The maximum length of the user-defined cookie is set to 16, same as tc_cookie, to discourage using cookies that will not be offloadable. Reviewed-by: Michal Kubiak Reviewed-by: Aaron Conole Reviewed-by: Ilya Maximets Acked-by: Eelco Chaudron Signed-off-by: Adrian Moreno Link: https://patch.msgid.link/20240704085710.353845-6-amorenoz@redhat.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/ovs_flow.yaml | 17 +++++++++++ include/uapi/linux/openvswitch.h | 28 ++++++++++++++++++ net/openvswitch/Kconfig | 1 + net/openvswitch/actions.c | 48 +++++++++++++++++++++++++++++++ net/openvswitch/flow_netlink.c | 32 ++++++++++++++++++++- 5 files changed, 125 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/Documentation/netlink/specs/ovs_flow.yaml b/Documentation/netlink/specs/ovs_flow.yaml index 4fdfc6b5cae9..46f5d1cd8a5f 100644 --- a/Documentation/netlink/specs/ovs_flow.yaml +++ b/Documentation/netlink/specs/ovs_flow.yaml @@ -727,6 +727,12 @@ attribute-sets: name: dec-ttl type: nest nested-attributes: dec-ttl-attrs + - + name: psample + type: nest + nested-attributes: psample-attrs + doc: | + Sends a packet sample to psample for external observation. - name: tunnel-key-attrs enum-name: ovs-tunnel-key-attr @@ -938,6 +944,17 @@ attribute-sets: - name: gbp type: u32 + - + name: psample-attrs + enum-name: ovs-psample-attr + name-prefix: ovs-psample-attr- + attributes: + - + name: group + type: u32 + - + name: cookie + type: binary operations: name-prefix: ovs-flow-cmd- diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index efc82c318fa2..3dd653748725 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -914,6 +914,31 @@ struct check_pkt_len_arg { }; #endif +#define OVS_PSAMPLE_COOKIE_MAX_SIZE 16 +/** + * enum ovs_psample_attr - Attributes for %OVS_ACTION_ATTR_PSAMPLE + * action. + * + * @OVS_PSAMPLE_ATTR_GROUP: 32-bit number to identify the source of the + * sample. + * @OVS_PSAMPLE_ATTR_COOKIE: An optional variable-length binary cookie that + * contains user-defined metadata. The maximum length is + * OVS_PSAMPLE_COOKIE_MAX_SIZE bytes. + * + * Sends the packet to the psample multicast group with the specified group and + * cookie. It is possible to combine this action with the + * %OVS_ACTION_ATTR_TRUNC action to limit the size of the sample. + */ +enum ovs_psample_attr { + OVS_PSAMPLE_ATTR_GROUP = 1, /* u32 number. */ + OVS_PSAMPLE_ATTR_COOKIE, /* Optional, user specified cookie. */ + + /* private: */ + __OVS_PSAMPLE_ATTR_MAX +}; + +#define OVS_PSAMPLE_ATTR_MAX (__OVS_PSAMPLE_ATTR_MAX - 1) + /** * enum ovs_action_attr - Action types. * @@ -966,6 +991,8 @@ struct check_pkt_len_arg { * of l3 tunnel flag in the tun_flags field of OVS_ACTION_ATTR_ADD_MPLS * argument. * @OVS_ACTION_ATTR_DROP: Explicit drop action. + * @OVS_ACTION_ATTR_PSAMPLE: Send a sample of the packet to external observers + * via psample. * * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all * fields within a header are modifiable, e.g. the IPv4 protocol and fragment @@ -1004,6 +1031,7 @@ enum ovs_action_attr { OVS_ACTION_ATTR_ADD_MPLS, /* struct ovs_action_add_mpls. */ OVS_ACTION_ATTR_DEC_TTL, /* Nested OVS_DEC_TTL_ATTR_*. */ OVS_ACTION_ATTR_DROP, /* u32 error code. */ + OVS_ACTION_ATTR_PSAMPLE, /* Nested OVS_PSAMPLE_ATTR_*. */ __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted * from userspace. */ diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index 29a7081858cd..2535f3f9f462 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -10,6 +10,7 @@ config OPENVSWITCH (NF_CONNTRACK && ((!NF_DEFRAG_IPV6 || NF_DEFRAG_IPV6) && \ (!NF_NAT || NF_NAT) && \ (!NETFILTER_CONNCOUNT || NETFILTER_CONNCOUNT))) + depends on PSAMPLE || !PSAMPLE select LIBCRC32C select MPLS select NET_MPLS_GSO diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 964225580824..892d7e48fc5b 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -24,6 +24,11 @@ #include #include #include + +#if IS_ENABLED(CONFIG_PSAMPLE) +#include +#endif + #include #include "datapath.h" @@ -1299,6 +1304,40 @@ static int execute_dec_ttl(struct sk_buff *skb, struct sw_flow_key *key) return 0; } +#if IS_ENABLED(CONFIG_PSAMPLE) +static void execute_psample(struct datapath *dp, struct sk_buff *skb, + const struct nlattr *attr) +{ + struct psample_group psample_group = {}; + struct psample_metadata md = {}; + const struct nlattr *a; + int rem; + + nla_for_each_attr(a, nla_data(attr), nla_len(attr), rem) { + switch (nla_type(a)) { + case OVS_PSAMPLE_ATTR_GROUP: + psample_group.group_num = nla_get_u32(a); + break; + + case OVS_PSAMPLE_ATTR_COOKIE: + md.user_cookie = nla_data(a); + md.user_cookie_len = nla_len(a); + break; + } + } + + psample_group.net = ovs_dp_get_net(dp); + md.in_ifindex = OVS_CB(skb)->input_vport->dev->ifindex; + md.trunc_size = skb->len - OVS_CB(skb)->cutlen; + + psample_sample_packet(&psample_group, skb, 0, &md); +} +#else +static void execute_psample(struct datapath *dp, struct sk_buff *skb, + const struct nlattr *attr) +{} +#endif + /* Execute a list of actions against 'skb'. */ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, struct sw_flow_key *key, @@ -1502,6 +1541,15 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, ovs_kfree_skb_reason(skb, reason); return 0; } + + case OVS_ACTION_ATTR_PSAMPLE: + execute_psample(dp, skb, a); + OVS_CB(skb)->cutlen = 0; + if (nla_is_last(a, rem)) { + consume_skb(skb); + return 0; + } + break; } if (unlikely(err)) { diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index f224d9bcea5e..c92bdc4dfe19 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -64,6 +64,7 @@ static bool actions_may_change_flow(const struct nlattr *actions) case OVS_ACTION_ATTR_TRUNC: case OVS_ACTION_ATTR_USERSPACE: case OVS_ACTION_ATTR_DROP: + case OVS_ACTION_ATTR_PSAMPLE: break; case OVS_ACTION_ATTR_CT: @@ -2409,7 +2410,7 @@ static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len) /* Whenever new actions are added, the need to update this * function should be considered. */ - BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 24); + BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 25); if (!actions) return; @@ -3157,6 +3158,28 @@ static int validate_and_copy_check_pkt_len(struct net *net, return 0; } +static int validate_psample(const struct nlattr *attr) +{ + static const struct nla_policy policy[OVS_PSAMPLE_ATTR_MAX + 1] = { + [OVS_PSAMPLE_ATTR_GROUP] = { .type = NLA_U32 }, + [OVS_PSAMPLE_ATTR_COOKIE] = { + .type = NLA_BINARY, + .len = OVS_PSAMPLE_COOKIE_MAX_SIZE, + }, + }; + struct nlattr *a[OVS_PSAMPLE_ATTR_MAX + 1]; + int err; + + if (!IS_ENABLED(CONFIG_PSAMPLE)) + return -EOPNOTSUPP; + + err = nla_parse_nested(a, OVS_PSAMPLE_ATTR_MAX, attr, policy, NULL); + if (err) + return err; + + return a[OVS_PSAMPLE_ATTR_GROUP] ? 0 : -EINVAL; +} + static int copy_action(const struct nlattr *from, struct sw_flow_actions **sfa, bool log) { @@ -3212,6 +3235,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, [OVS_ACTION_ATTR_ADD_MPLS] = sizeof(struct ovs_action_add_mpls), [OVS_ACTION_ATTR_DEC_TTL] = (u32)-1, [OVS_ACTION_ATTR_DROP] = sizeof(u32), + [OVS_ACTION_ATTR_PSAMPLE] = (u32)-1, }; const struct ovs_action_push_vlan *vlan; int type = nla_type(a); @@ -3490,6 +3514,12 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, return -EINVAL; break; + case OVS_ACTION_ATTR_PSAMPLE: + err = validate_psample(a); + if (err) + return err; + break; + default: OVS_NLERR(log, "Unknown Action type %d", type); return -EINVAL; -- cgit v1.2.3-58-ga151 From 71763d8a8203c28178d7be7f18af73d4dddb36ba Mon Sep 17 00:00:00 2001 From: Adrian Moreno Date: Thu, 4 Jul 2024 10:56:57 +0200 Subject: net: openvswitch: store sampling probability in cb. When a packet sample is observed, the sampling rate that was used is important to estimate the real frequency of such event. Store the probability of the parent sample action in the skb's cb area and use it in psample action to pass it down to psample module. Reviewed-by: Aaron Conole Acked-by: Eelco Chaudron Reviewed-by: Ilya Maximets Signed-off-by: Adrian Moreno Link: https://patch.msgid.link/20240704085710.353845-7-amorenoz@redhat.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/openvswitch.h | 3 ++- net/openvswitch/actions.c | 20 +++++++++++++++++--- net/openvswitch/datapath.h | 3 +++ net/openvswitch/vport.c | 1 + 4 files changed, 23 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 3dd653748725..3a701bd1f31b 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -649,7 +649,8 @@ enum ovs_flow_attr { * Actions are passed as nested attributes. * * Executes the specified actions with the given probability on a per-packet - * basis. + * basis. Nested actions will be able to access the probability value of the + * parent @OVS_ACTION_ATTR_SAMPLE. */ enum ovs_sample_attr { OVS_SAMPLE_ATTR_UNSPEC, diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 892d7e48fc5b..101f9a23792c 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -1048,12 +1048,15 @@ static int sample(struct datapath *dp, struct sk_buff *skb, struct nlattr *sample_arg; int rem = nla_len(attr); const struct sample_arg *arg; + u32 init_probability; bool clone_flow_key; + int err; /* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */ sample_arg = nla_data(attr); arg = nla_data(sample_arg); actions = nla_next(sample_arg, &rem); + init_probability = OVS_CB(skb)->probability; if ((arg->probability != U32_MAX) && (!arg->probability || get_random_u32() > arg->probability)) { @@ -1062,9 +1065,16 @@ static int sample(struct datapath *dp, struct sk_buff *skb, return 0; } + OVS_CB(skb)->probability = arg->probability; + clone_flow_key = !arg->exec; - return clone_execute(dp, skb, key, 0, actions, rem, last, - clone_flow_key); + err = clone_execute(dp, skb, key, 0, actions, rem, last, + clone_flow_key); + + if (!last) + OVS_CB(skb)->probability = init_probability; + + return err; } /* When 'last' is true, clone() should always consume the 'skb'. @@ -1311,6 +1321,7 @@ static void execute_psample(struct datapath *dp, struct sk_buff *skb, struct psample_group psample_group = {}; struct psample_metadata md = {}; const struct nlattr *a; + u32 rate; int rem; nla_for_each_attr(a, nla_data(attr), nla_len(attr), rem) { @@ -1329,8 +1340,11 @@ static void execute_psample(struct datapath *dp, struct sk_buff *skb, psample_group.net = ovs_dp_get_net(dp); md.in_ifindex = OVS_CB(skb)->input_vport->dev->ifindex; md.trunc_size = skb->len - OVS_CB(skb)->cutlen; + md.rate_as_probability = 1; + + rate = OVS_CB(skb)->probability ? OVS_CB(skb)->probability : U32_MAX; - psample_sample_packet(&psample_group, skb, 0, &md); + psample_sample_packet(&psample_group, skb, rate, &md); } #else static void execute_psample(struct datapath *dp, struct sk_buff *skb, diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 0cd29971a907..9ca6231ea647 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -115,12 +115,15 @@ struct datapath { * fragmented. * @acts_origlen: The netlink size of the flow actions applied to this skb. * @cutlen: The number of bytes from the packet end to be removed. + * @probability: The sampling probability that was applied to this skb; 0 means + * no sampling has occurred; U32_MAX means 100% probability. */ struct ovs_skb_cb { struct vport *input_vport; u16 mru; u16 acts_origlen; u32 cutlen; + u32 probability; }; #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb) diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 972ae01a70f7..8732f6e51ae5 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -500,6 +500,7 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb, OVS_CB(skb)->input_vport = vport; OVS_CB(skb)->mru = 0; OVS_CB(skb)->cutlen = 0; + OVS_CB(skb)->probability = 0; if (unlikely(dev_net(skb->dev) != ovs_dp_get_net(vport->dp))) { u32 mark; -- cgit v1.2.3-58-ga151 From e46296002113b4556baffd5dc68c4f9e22dae13a Mon Sep 17 00:00:00 2001 From: "Kory Maincent (Dent Project)" Date: Thu, 4 Jul 2024 10:11:56 +0200 Subject: net: ethtool: pse-pd: Expand C33 PSE status with class, power and extended state This update expands the status information provided by ethtool for PSE c33. It includes details such as the detected class, current power delivered, and extended state information. Reviewed-by: Oleksij Rempel Signed-off-by: Kory Maincent Link: https://patch.msgid.link/20240704-feature_poe_power_cap-v6-1-320003204264@bootlin.com Signed-off-by: Jakub Kicinski --- Documentation/networking/ethtool-netlink.rst | 37 ++++++ include/linux/ethtool.h | 15 +++ include/linux/pse-pd/pse.h | 8 ++ include/uapi/linux/ethtool.h | 191 +++++++++++++++++++++++++++ include/uapi/linux/ethtool_netlink.h | 4 + net/ethtool/pse-pd.c | 32 ++++- 6 files changed, 286 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index bfe2eda8580d..0656ad4be000 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -1740,6 +1740,13 @@ Kernel response contents: PSE functions. ``ETHTOOL_A_C33_PSE_PW_D_STATUS`` u32 power detection status of the PoE PSE. + ``ETHTOOL_A_C33_PSE_PW_CLASS`` u32 power class of the PoE PSE. + ``ETHTOOL_A_C33_PSE_ACTUAL_PW`` u32 actual power drawn on the + PoE PSE. + ``ETHTOOL_A_C33_PSE_EXT_STATE`` u32 power extended state of the + PoE PSE. + ``ETHTOOL_A_C33_PSE_EXT_SUBSTATE`` u32 power extended substatus of + the PoE PSE. ====================================== ====== ============================= When set, the optional ``ETHTOOL_A_PODL_PSE_ADMIN_STATE`` attribute identifies @@ -1772,6 +1779,36 @@ The same goes for ``ETHTOOL_A_C33_PSE_ADMIN_PW_D_STATUS`` implementing .. kernel-doc:: include/uapi/linux/ethtool.h :identifiers: ethtool_c33_pse_pw_d_status +When set, the optional ``ETHTOOL_A_C33_PSE_PW_CLASS`` attribute identifies +the power class of the C33 PSE. It depends on the class negotiated between +the PSE and the PD. This option is corresponding to ``IEEE 802.3-2022`` +30.9.1.1.8 aPSEPowerClassification. + +When set, the optional ``ETHTOOL_A_C33_PSE_ACTUAL_PW`` attribute identifies +This option is corresponding to ``IEEE 802.3-2022`` 30.9.1.1.23 aPSEActualPower. +Actual power is reported in mW. + +When set, the optional ``ETHTOOL_A_C33_PSE_EXT_STATE`` attribute identifies +the extended error state of the C33 PSE. Possible values are: + +.. kernel-doc:: include/uapi/linux/ethtool.h + :identifiers: ethtool_c33_pse_ext_state + +When set, the optional ``ETHTOOL_A_C33_PSE_EXT_SUBSTATE`` attribute identifies +the extended error state of the C33 PSE. Possible values are: +Possible values are: + +.. kernel-doc:: include/uapi/linux/ethtool.h + :identifiers: ethtool_c33_pse_ext_substate_class_num_events + ethtool_c33_pse_ext_substate_error_condition + ethtool_c33_pse_ext_substate_mr_pse_enable + ethtool_c33_pse_ext_substate_option_detect_ted + ethtool_c33_pse_ext_substate_option_vport_lim + ethtool_c33_pse_ext_substate_ovld_detected + ethtool_c33_pse_ext_substate_pd_dll_power_type + ethtool_c33_pse_ext_substate_power_not_available + ethtool_c33_pse_ext_substate_short_detected + PSE_SET ======= diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 3a99238ef895..3e70f5d9e0bb 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -1273,4 +1273,19 @@ struct ethtool_forced_speed_map { void ethtool_forced_speed_maps_init(struct ethtool_forced_speed_map *maps, u32 size); + +/* C33 PSE extended state and substate. */ +struct ethtool_c33_pse_ext_state_info { + enum ethtool_c33_pse_ext_state c33_pse_ext_state; + union { + enum ethtool_c33_pse_ext_substate_error_condition error_condition; + enum ethtool_c33_pse_ext_substate_mr_pse_enable mr_pse_enable; + enum ethtool_c33_pse_ext_substate_option_detect_ted option_detect_ted; + enum ethtool_c33_pse_ext_substate_option_vport_lim option_vport_lim; + enum ethtool_c33_pse_ext_substate_ovld_detected ovld_detected; + enum ethtool_c33_pse_ext_substate_power_not_available power_not_available; + enum ethtool_c33_pse_ext_substate_short_detected short_detected; + u32 __c33_pse_ext_substate; + }; +}; #endif /* _LINUX_ETHTOOL_H */ diff --git a/include/linux/pse-pd/pse.h b/include/linux/pse-pd/pse.h index 6eec24ffa866..38b9308e5e7a 100644 --- a/include/linux/pse-pd/pse.h +++ b/include/linux/pse-pd/pse.h @@ -36,12 +36,20 @@ struct pse_control_config { * functions. IEEE 802.3-2022 30.9.1.1.2 aPSEAdminState * @c33_pw_status: power detection status of the PSE. * IEEE 802.3-2022 30.9.1.1.5 aPSEPowerDetectionStatus: + * @c33_pw_class: detected class of a powered PD + * IEEE 802.3-2022 30.9.1.1.8 aPSEPowerClassification + * @c33_actual_pw: power currently delivered by the PSE in mW + * IEEE 802.3-2022 30.9.1.1.23 aPSEActualPower + * @c33_ext_state_info: extended state information of the PSE */ struct pse_control_status { enum ethtool_podl_pse_admin_state podl_admin_state; enum ethtool_podl_pse_pw_d_status podl_pw_status; enum ethtool_c33_pse_admin_state c33_admin_state; enum ethtool_c33_pse_pw_d_status c33_pw_status; + u32 c33_pw_class; + u32 c33_actual_pw; + struct ethtool_c33_pse_ext_state_info c33_ext_state_info; }; /** diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index e011384c915c..230110b97029 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -752,6 +752,197 @@ enum ethtool_module_power_mode { ETHTOOL_MODULE_POWER_MODE_HIGH, }; +/** + * enum ethtool_c33_pse_ext_state - groups of PSE extended states + * functions. IEEE 802.3-2022 33.2.4.4 Variables + * + * @ETHTOOL_C33_PSE_EXT_STATE_ERROR_CONDITION: Group of error_condition states + * @ETHTOOL_C33_PSE_EXT_STATE_MR_MPS_VALID: Group of mr_mps_valid states + * @ETHTOOL_C33_PSE_EXT_STATE_MR_PSE_ENABLE: Group of mr_pse_enable states + * @ETHTOOL_C33_PSE_EXT_STATE_OPTION_DETECT_TED: Group of option_detect_ted + * states + * @ETHTOOL_C33_PSE_EXT_STATE_OPTION_VPORT_LIM: Group of option_vport_lim states + * @ETHTOOL_C33_PSE_EXT_STATE_OVLD_DETECTED: Group of ovld_detected states + * @ETHTOOL_C33_PSE_EXT_STATE_PD_DLL_POWER_TYPE: Group of pd_dll_power_type + * states + * @ETHTOOL_C33_PSE_EXT_STATE_POWER_NOT_AVAILABLE: Group of power_not_available + * states + * @ETHTOOL_C33_PSE_EXT_STATE_SHORT_DETECTED: Group of short_detected states + */ +enum ethtool_c33_pse_ext_state { + ETHTOOL_C33_PSE_EXT_STATE_ERROR_CONDITION = 1, + ETHTOOL_C33_PSE_EXT_STATE_MR_MPS_VALID, + ETHTOOL_C33_PSE_EXT_STATE_MR_PSE_ENABLE, + ETHTOOL_C33_PSE_EXT_STATE_OPTION_DETECT_TED, + ETHTOOL_C33_PSE_EXT_STATE_OPTION_VPORT_LIM, + ETHTOOL_C33_PSE_EXT_STATE_OVLD_DETECTED, + ETHTOOL_C33_PSE_EXT_STATE_PD_DLL_POWER_TYPE, + ETHTOOL_C33_PSE_EXT_STATE_POWER_NOT_AVAILABLE, + ETHTOOL_C33_PSE_EXT_STATE_SHORT_DETECTED, +}; + +/** + * enum ethtool_c33_pse_ext_substate_mr_mps_valid - mr_mps_valid states + * functions. IEEE 802.3-2022 33.2.4.4 Variables + * + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_MPS_VALID_DETECTED_UNDERLOAD: Underload + * state + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_MPS_VALID_CONNECTION_OPEN: Port is not + * connected + * + * The PSE monitors either the DC or AC Maintain Power Signature + * (MPS, see 33.2.9.1). This variable indicates the presence or absence of + * a valid MPS. + */ +enum ethtool_c33_pse_ext_substate_mr_mps_valid { + ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_MPS_VALID_DETECTED_UNDERLOAD = 1, + ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_MPS_VALID_CONNECTION_OPEN, +}; + +/** + * enum ethtool_c33_pse_ext_substate_error_condition - error_condition states + * functions. IEEE 802.3-2022 33.2.4.4 Variables + * + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_NON_EXISTING_PORT: Non-existing + * port number + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_UNDEFINED_PORT: Undefined port + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_INTERNAL_HW_FAULT: Internal + * hardware fault + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_COMM_ERROR_AFTER_FORCE_ON: + * Communication error after force on + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_UNKNOWN_PORT_STATUS: Unknown + * port status + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_HOST_CRASH_TURN_OFF: Host + * crash turn off + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_HOST_CRASH_FORCE_SHUTDOWN: + * Host crash force shutdown + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_CONFIG_CHANGE: Configuration + * change + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_DETECTED_OVER_TEMP: Over + * temperature detected + * + * error_condition is a variable indicating the status of + * implementation-specific fault conditions or optionally other system faults + * that prevent the PSE from meeting the specifications in Table 33–11 and that + * require the PSE not to source power. These error conditions are different + * from those monitored by the state diagrams in Figure 33–10. + */ +enum ethtool_c33_pse_ext_substate_error_condition { + ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_NON_EXISTING_PORT = 1, + ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_UNDEFINED_PORT, + ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_INTERNAL_HW_FAULT, + ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_COMM_ERROR_AFTER_FORCE_ON, + ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_UNKNOWN_PORT_STATUS, + ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_HOST_CRASH_TURN_OFF, + ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_HOST_CRASH_FORCE_SHUTDOWN, + ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_CONFIG_CHANGE, + ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_DETECTED_OVER_TEMP, +}; + +/** + * enum ethtool_c33_pse_ext_substate_mr_pse_enable - mr_pse_enable states + * functions. IEEE 802.3-2022 33.2.4.4 Variables + * + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_PSE_ENABLE_DISABLE_PIN_ACTIVE: Disable + * pin active + * + * mr_pse_enable is control variable that selects PSE operation and test + * functions. + */ +enum ethtool_c33_pse_ext_substate_mr_pse_enable { + ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_PSE_ENABLE_DISABLE_PIN_ACTIVE = 1, +}; + +/** + * enum ethtool_c33_pse_ext_substate_option_detect_ted - option_detect_ted + * states functions. IEEE 802.3-2022 33.2.4.4 Variables + * + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_DETECT_TED_DET_IN_PROCESS: Detection + * in process + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_DETECT_TED_CONNECTION_CHECK_ERROR: + * Connection check error + * + * option_detect_ted is a variable indicating if detection can be performed + * by the PSE during the ted_timer interval. + */ +enum ethtool_c33_pse_ext_substate_option_detect_ted { + ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_DETECT_TED_DET_IN_PROCESS = 1, + ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_DETECT_TED_CONNECTION_CHECK_ERROR, +}; + +/** + * enum ethtool_c33_pse_ext_substate_option_vport_lim - option_vport_lim states + * functions. IEEE 802.3-2022 33.2.4.4 Variables + * + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_HIGH_VOLTAGE: Main supply + * voltage is high + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_LOW_VOLTAGE: Main supply + * voltage is low + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_VOLTAGE_INJECTION: Voltage + * injection into the port + * + * option_vport_lim is an optional variable indicates if VPSE is out of the + * operating range during normal operating state. + */ +enum ethtool_c33_pse_ext_substate_option_vport_lim { + ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_HIGH_VOLTAGE = 1, + ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_LOW_VOLTAGE, + ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_VOLTAGE_INJECTION, +}; + +/** + * enum ethtool_c33_pse_ext_substate_ovld_detected - ovld_detected states + * functions. IEEE 802.3-2022 33.2.4.4 Variables + * + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_OVLD_DETECTED_OVERLOAD: Overload state + * + * ovld_detected is a variable indicating if the PSE output current has been + * in an overload condition (see 33.2.7.6) for at least TCUT of a one-second + * sliding time. + */ +enum ethtool_c33_pse_ext_substate_ovld_detected { + ETHTOOL_C33_PSE_EXT_SUBSTATE_OVLD_DETECTED_OVERLOAD = 1, +}; + +/** + * enum ethtool_c33_pse_ext_substate_power_not_available - power_not_available + * states functions. IEEE 802.3-2022 33.2.4.4 Variables + * + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_BUDGET_EXCEEDED: Power + * budget exceeded for the controller + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_PORT_PW_LIMIT_EXCEEDS_CONTROLLER_BUDGET: + * Configured port power limit exceeded controller power budget + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_PD_REQUEST_EXCEEDS_PORT_LIMIT: + * Power request from PD exceeds port limit + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_HW_PW_LIMIT: Power + * denied due to Hardware power limit + * + * power_not_available is a variable that is asserted in an + * implementation-dependent manner when the PSE is no longer capable of + * sourcing sufficient power to support the attached PD. Sufficient power + * is defined by classification; see 33.2.6. + */ +enum ethtool_c33_pse_ext_substate_power_not_available { + ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_BUDGET_EXCEEDED = 1, + ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_PORT_PW_LIMIT_EXCEEDS_CONTROLLER_BUDGET, + ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_PD_REQUEST_EXCEEDS_PORT_LIMIT, + ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_HW_PW_LIMIT, +}; + +/** + * enum ethtool_c33_pse_ext_substate_short_detected - short_detected states + * functions. IEEE 802.3-2022 33.2.4.4 Variables + * + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_SHORT_DETECTED_SHORT_CONDITION: Short + * condition was detected + * + * short_detected is a variable indicating if the PSE output current has been + * in a short circuit condition for TLIM within a sliding window (see 33.2.7.7). + */ +enum ethtool_c33_pse_ext_substate_short_detected { + ETHTOOL_C33_PSE_EXT_SUBSTATE_SHORT_DETECTED_SHORT_CONDITION = 1, +}; + /** * enum ethtool_pse_types - Types of PSE controller. * @ETHTOOL_PSE_UNKNOWN: Type of PSE controller is unknown diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 840dabdc9d88..b8895da001bc 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -939,6 +939,10 @@ enum { ETHTOOL_A_C33_PSE_ADMIN_STATE, /* u32 */ ETHTOOL_A_C33_PSE_ADMIN_CONTROL, /* u32 */ ETHTOOL_A_C33_PSE_PW_D_STATUS, /* u32 */ + ETHTOOL_A_C33_PSE_PW_CLASS, /* u32 */ + ETHTOOL_A_C33_PSE_ACTUAL_PW, /* u32 */ + ETHTOOL_A_C33_PSE_EXT_STATE, /* u32 */ + ETHTOOL_A_C33_PSE_EXT_SUBSTATE, /* u32 */ /* add new constants above here */ __ETHTOOL_A_PSE_CNT, diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c index 2c981d443f27..d2a1c14d789f 100644 --- a/net/ethtool/pse-pd.c +++ b/net/ethtool/pse-pd.c @@ -86,7 +86,16 @@ static int pse_reply_size(const struct ethnl_req_info *req_base, len += nla_total_size(sizeof(u32)); /* _C33_PSE_ADMIN_STATE */ if (st->c33_pw_status > 0) len += nla_total_size(sizeof(u32)); /* _C33_PSE_PW_D_STATUS */ - + if (st->c33_pw_class > 0) + len += nla_total_size(sizeof(u32)); /* _C33_PSE_PW_CLASS */ + if (st->c33_actual_pw > 0) + len += nla_total_size(sizeof(u32)); /* _C33_PSE_ACTUAL_PW */ + if (st->c33_ext_state_info.c33_pse_ext_state > 0) { + len += nla_total_size(sizeof(u32)); /* _C33_PSE_EXT_STATE */ + if (st->c33_ext_state_info.__c33_pse_ext_substate > 0) + /* _C33_PSE_EXT_SUBSTATE */ + len += nla_total_size(sizeof(u32)); + } return len; } @@ -117,6 +126,27 @@ static int pse_fill_reply(struct sk_buff *skb, st->c33_pw_status)) return -EMSGSIZE; + if (st->c33_pw_class > 0 && + nla_put_u32(skb, ETHTOOL_A_C33_PSE_PW_CLASS, + st->c33_pw_class)) + return -EMSGSIZE; + + if (st->c33_actual_pw > 0 && + nla_put_u32(skb, ETHTOOL_A_C33_PSE_ACTUAL_PW, + st->c33_actual_pw)) + return -EMSGSIZE; + + if (st->c33_ext_state_info.c33_pse_ext_state > 0) { + if (nla_put_u32(skb, ETHTOOL_A_C33_PSE_EXT_STATE, + st->c33_ext_state_info.c33_pse_ext_state)) + return -EMSGSIZE; + + if (st->c33_ext_state_info.__c33_pse_ext_substate > 0 && + nla_put_u32(skb, ETHTOOL_A_C33_PSE_EXT_SUBSTATE, + st->c33_ext_state_info.__c33_pse_ext_substate)) + return -EMSGSIZE; + } + return 0; } -- cgit v1.2.3-58-ga151 From 30d7b6727724ce3729f2cb5b8be985d2d1931d2b Mon Sep 17 00:00:00 2001 From: "Kory Maincent (Dent Project)" Date: Thu, 4 Jul 2024 10:12:00 +0200 Subject: net: ethtool: Add new power limit get and set features This patch expands the status information provided by ethtool for PSE c33 with available power limit and available power limit ranges. It also adds a call to pse_ethtool_set_pw_limit() to configure the PSE control power limit. Reviewed-by: Oleksij Rempel Signed-off-by: Kory Maincent Link: https://patch.msgid.link/20240704-feature_poe_power_cap-v6-5-320003204264@bootlin.com Signed-off-by: Jakub Kicinski --- Documentation/networking/ethtool-netlink.rst | 64 ++++++++++++++------ include/linux/ethtool.h | 5 ++ include/uapi/linux/ethtool_netlink.h | 8 +++ net/ethtool/pse-pd.c | 89 +++++++++++++++++++++++++--- 4 files changed, 141 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 0656ad4be000..3ab423b80e91 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -1730,24 +1730,28 @@ Request contents: Kernel response contents: - ====================================== ====== ============================= - ``ETHTOOL_A_PSE_HEADER`` nested reply header - ``ETHTOOL_A_PODL_PSE_ADMIN_STATE`` u32 Operational state of the PoDL - PSE functions - ``ETHTOOL_A_PODL_PSE_PW_D_STATUS`` u32 power detection status of the - PoDL PSE. - ``ETHTOOL_A_C33_PSE_ADMIN_STATE`` u32 Operational state of the PoE - PSE functions. - ``ETHTOOL_A_C33_PSE_PW_D_STATUS`` u32 power detection status of the - PoE PSE. - ``ETHTOOL_A_C33_PSE_PW_CLASS`` u32 power class of the PoE PSE. - ``ETHTOOL_A_C33_PSE_ACTUAL_PW`` u32 actual power drawn on the - PoE PSE. - ``ETHTOOL_A_C33_PSE_EXT_STATE`` u32 power extended state of the - PoE PSE. - ``ETHTOOL_A_C33_PSE_EXT_SUBSTATE`` u32 power extended substatus of - the PoE PSE. - ====================================== ====== ============================= + ========================================== ====== ============================= + ``ETHTOOL_A_PSE_HEADER`` nested reply header + ``ETHTOOL_A_PODL_PSE_ADMIN_STATE`` u32 Operational state of the PoDL + PSE functions + ``ETHTOOL_A_PODL_PSE_PW_D_STATUS`` u32 power detection status of the + PoDL PSE. + ``ETHTOOL_A_C33_PSE_ADMIN_STATE`` u32 Operational state of the PoE + PSE functions. + ``ETHTOOL_A_C33_PSE_PW_D_STATUS`` u32 power detection status of the + PoE PSE. + ``ETHTOOL_A_C33_PSE_PW_CLASS`` u32 power class of the PoE PSE. + ``ETHTOOL_A_C33_PSE_ACTUAL_PW`` u32 actual power drawn on the + PoE PSE. + ``ETHTOOL_A_C33_PSE_EXT_STATE`` u32 power extended state of the + PoE PSE. + ``ETHTOOL_A_C33_PSE_EXT_SUBSTATE`` u32 power extended substatus of + the PoE PSE. + ``ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT`` u32 currently configured power + limit of the PoE PSE. + ``ETHTOOL_A_C33_PSE_PW_LIMIT_RANGES`` nested Supported power limit + configuration ranges. + ========================================== ====== ============================= When set, the optional ``ETHTOOL_A_PODL_PSE_ADMIN_STATE`` attribute identifies the operational state of the PoDL PSE functions. The operational state of the @@ -1809,6 +1813,16 @@ Possible values are: ethtool_c33_pse_ext_substate_power_not_available ethtool_c33_pse_ext_substate_short_detected +When set, the optional ``ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT`` attribute +identifies the C33 PSE power limit in mW. + +When set the optional ``ETHTOOL_A_C33_PSE_PW_LIMIT_RANGES`` nested attribute +identifies the C33 PSE power limit ranges through +``ETHTOOL_A_C33_PSE_PWR_VAL_LIMIT_RANGE_MIN`` and +``ETHTOOL_A_C33_PSE_PWR_VAL_LIMIT_RANGE_MAX``. +If the controller works with fixed classes, the min and max values will be +equal. + PSE_SET ======= @@ -1820,6 +1834,8 @@ Request contents: ``ETHTOOL_A_PSE_HEADER`` nested request header ``ETHTOOL_A_PODL_PSE_ADMIN_CONTROL`` u32 Control PoDL PSE Admin state ``ETHTOOL_A_C33_PSE_ADMIN_CONTROL`` u32 Control PSE Admin state + ``ETHTOOL_A_C33_PSE_AVAIL_PWR_LIMIT`` u32 Control PoE PSE available + power limit ====================================== ====== ============================= When set, the optional ``ETHTOOL_A_PODL_PSE_ADMIN_CONTROL`` attribute is used @@ -1830,6 +1846,18 @@ to control PoDL PSE Admin functions. This option is implementing The same goes for ``ETHTOOL_A_C33_PSE_ADMIN_CONTROL`` implementing ``IEEE 802.3-2022`` 30.9.1.2.1 acPSEAdminControl. +When set, the optional ``ETHTOOL_A_C33_PSE_AVAIL_PWR_LIMIT`` attribute is +used to control the available power value limit for C33 PSE in milliwatts. +This attribute corresponds to the `pse_available_power` variable described in +``IEEE 802.3-2022`` 33.2.4.4 Variables and `pse_avail_pwr` in 145.2.5.4 +Variables, which are described in power classes. + +It was decided to use milliwatts for this interface to unify it with other +power monitoring interfaces, which also use milliwatts, and to align with +various existing products that document power consumption in watts rather than +classes. If power limit configuration based on classes is needed, the +conversion can be done in user space, for example by ethtool. + RSS_GET ======= diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 3e70f5d9e0bb..e213b5508da6 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -1288,4 +1288,9 @@ struct ethtool_c33_pse_ext_state_info { u32 __c33_pse_ext_substate; }; }; + +struct ethtool_c33_pse_pw_limit_range { + u32 min; + u32 max; +}; #endif /* _LINUX_ETHTOOL_H */ diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index b8895da001bc..6d5bdcc67631 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -930,6 +930,12 @@ enum { }; /* Power Sourcing Equipment */ +enum { + ETHTOOL_A_C33_PSE_PW_LIMIT_UNSPEC, + ETHTOOL_A_C33_PSE_PW_LIMIT_MIN, /* u32 */ + ETHTOOL_A_C33_PSE_PW_LIMIT_MAX, /* u32 */ +}; + enum { ETHTOOL_A_PSE_UNSPEC, ETHTOOL_A_PSE_HEADER, /* nest - _A_HEADER_* */ @@ -943,6 +949,8 @@ enum { ETHTOOL_A_C33_PSE_ACTUAL_PW, /* u32 */ ETHTOOL_A_C33_PSE_EXT_STATE, /* u32 */ ETHTOOL_A_C33_PSE_EXT_SUBSTATE, /* u32 */ + ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT, /* u32 */ + ETHTOOL_A_C33_PSE_PW_LIMIT_RANGES, /* nest - _C33_PSE_PW_LIMIT_* */ /* add new constants above here */ __ETHTOOL_A_PSE_CNT, diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c index d2a1c14d789f..ba46c9c8b12d 100644 --- a/net/ethtool/pse-pd.c +++ b/net/ethtool/pse-pd.c @@ -96,9 +96,46 @@ static int pse_reply_size(const struct ethnl_req_info *req_base, /* _C33_PSE_EXT_SUBSTATE */ len += nla_total_size(sizeof(u32)); } + if (st->c33_avail_pw_limit > 0) + /* _C33_AVAIL_PSE_PW_LIMIT */ + len += nla_total_size(sizeof(u32)); + if (st->c33_pw_limit_nb_ranges > 0) + /* _C33_PSE_PW_LIMIT_RANGES */ + len += st->c33_pw_limit_nb_ranges * + (nla_total_size(0) + + nla_total_size(sizeof(u32)) * 2); + return len; } +static int pse_put_pw_limit_ranges(struct sk_buff *skb, + const struct pse_control_status *st) +{ + const struct ethtool_c33_pse_pw_limit_range *pw_limit_ranges; + int i; + + pw_limit_ranges = st->c33_pw_limit_ranges; + for (i = 0; i < st->c33_pw_limit_nb_ranges; i++) { + struct nlattr *nest; + + nest = nla_nest_start(skb, ETHTOOL_A_C33_PSE_PW_LIMIT_RANGES); + if (!nest) + return -EMSGSIZE; + + if (nla_put_u32(skb, ETHTOOL_A_C33_PSE_PW_LIMIT_MIN, + pw_limit_ranges->min) || + nla_put_u32(skb, ETHTOOL_A_C33_PSE_PW_LIMIT_MAX, + pw_limit_ranges->max)) { + nla_nest_cancel(skb, nest); + return -EMSGSIZE; + } + nla_nest_end(skb, nest); + pw_limit_ranges++; + } + + return 0; +} + static int pse_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) @@ -147,9 +184,25 @@ static int pse_fill_reply(struct sk_buff *skb, return -EMSGSIZE; } + if (st->c33_avail_pw_limit > 0 && + nla_put_u32(skb, ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT, + st->c33_avail_pw_limit)) + return -EMSGSIZE; + + if (st->c33_pw_limit_nb_ranges > 0 && + pse_put_pw_limit_ranges(skb, st)) + return -EMSGSIZE; + return 0; } +static void pse_cleanup_data(struct ethnl_reply_data *reply_base) +{ + const struct pse_reply_data *data = PSE_REPDATA(reply_base); + + kfree(data->status.c33_pw_limit_ranges); +} + /* PSE_SET */ const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1] = { @@ -160,6 +213,7 @@ const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1] = { [ETHTOOL_A_C33_PSE_ADMIN_CONTROL] = NLA_POLICY_RANGE(NLA_U32, ETHTOOL_C33_PSE_ADMIN_STATE_DISABLED, ETHTOOL_C33_PSE_ADMIN_STATE_ENABLED), + [ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT] = { .type = NLA_U32 }, }; static int @@ -202,19 +256,39 @@ static int ethnl_set_pse(struct ethnl_req_info *req_info, struct genl_info *info) { struct net_device *dev = req_info->dev; - struct pse_control_config config = {}; struct nlattr **tb = info->attrs; struct phy_device *phydev; + int ret = 0; phydev = dev->phydev; + + if (tb[ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT]) { + unsigned int pw_limit; + + pw_limit = nla_get_u32(tb[ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT]); + ret = pse_ethtool_set_pw_limit(phydev->psec, info->extack, + pw_limit); + if (ret) + return ret; + } + /* These values are already validated by the ethnl_pse_set_policy */ - if (pse_has_podl(phydev->psec)) - config.podl_admin_control = nla_get_u32(tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL]); - if (pse_has_c33(phydev->psec)) - config.c33_admin_control = nla_get_u32(tb[ETHTOOL_A_C33_PSE_ADMIN_CONTROL]); + if (tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL] || + tb[ETHTOOL_A_C33_PSE_ADMIN_CONTROL]) { + struct pse_control_config config = {}; + + if (pse_has_podl(phydev->psec)) + config.podl_admin_control = nla_get_u32(tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL]); + if (pse_has_c33(phydev->psec)) + config.c33_admin_control = nla_get_u32(tb[ETHTOOL_A_C33_PSE_ADMIN_CONTROL]); + + ret = pse_ethtool_set_config(phydev->psec, info->extack, + &config); + if (ret) + return ret; + } - /* Return errno directly - PSE has no notification */ - return pse_ethtool_set_config(phydev->psec, info->extack, &config); + return ret; } const struct ethnl_request_ops ethnl_pse_request_ops = { @@ -227,6 +301,7 @@ const struct ethnl_request_ops ethnl_pse_request_ops = { .prepare_data = pse_prepare_data, .reply_size = pse_reply_size, .fill_reply = pse_fill_reply, + .cleanup_data = pse_cleanup_data, .set_validate = ethnl_set_pse_validate, .set = ethnl_set_pse, -- cgit v1.2.3-58-ga151 From 3abbd7ed8b768ff37798e81910b47c2e05156d49 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 4 Jul 2024 13:29:20 +0200 Subject: act_ct: prepare for stolen verdict coming from conntrack and nat engine At this time, conntrack either returns NF_ACCEPT or NF_DROP. To improve debuging it would be nice to be able to replace NF_DROP verdict with NF_DROP_REASON() helper, This helper releases the skb instantly (so drop_monitor can pinpoint exact location) and returns NF_STOLEN. Prepare call sites to deal with this before introducing such changes in conntrack and nat core. Signed-off-by: Florian Westphal Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/sched/act_ct.c | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 2a96d9c1db65..a6b7c514a181 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -944,6 +944,8 @@ static int tcf_ct_act_nat(struct sk_buff *skb, action |= BIT(NF_NAT_MANIP_DST); err = nf_ct_nat(skb, ct, ctinfo, &action, range, commit); + if (err != NF_ACCEPT) + return err & NF_VERDICT_MASK; if (action & BIT(NF_NAT_MANIP_SRC)) tc_skb_cb(skb)->post_ct_snat = 1; @@ -1035,7 +1037,7 @@ TC_INDIRECT_SCOPE int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, state.pf = family; err = nf_conntrack_in(skb, &state); if (err != NF_ACCEPT) - goto out_push; + goto nf_error; } do_nat: @@ -1047,7 +1049,7 @@ do_nat: err = tcf_ct_act_nat(skb, ct, ctinfo, p->ct_action, &p->range, commit); if (err != NF_ACCEPT) - goto drop; + goto nf_error; if (!nf_ct_is_confirmed(ct) && commit && p->helper && !nfct_help(ct)) { err = __nf_ct_try_assign_helper(ct, p->tmpl, GFP_ATOMIC); @@ -1061,8 +1063,9 @@ do_nat: } if (nf_ct_is_confirmed(ct) ? ((!cached && !skip_add) || add_helper) : commit) { - if (nf_ct_helper(skb, ct, ctinfo, family) != NF_ACCEPT) - goto drop; + err = nf_ct_helper(skb, ct, ctinfo, family); + if (err != NF_ACCEPT) + goto nf_error; } if (commit) { @@ -1075,8 +1078,9 @@ do_nat: /* This will take care of sending queued events * even if the connection is already confirmed. */ - if (nf_conntrack_confirm(skb) != NF_ACCEPT) - goto drop; + err = nf_conntrack_confirm(skb); + if (err != NF_ACCEPT) + goto nf_error; } if (!skip_add) @@ -1100,6 +1104,21 @@ out_frag: drop: tcf_action_inc_drop_qstats(&c->common); return TC_ACT_SHOT; + +nf_error: + /* some verdicts store extra data in upper bits, such + * as errno or queue number. + */ + switch (err & NF_VERDICT_MASK) { + case NF_DROP: + goto drop; + case NF_STOLEN: + tcf_action_inc_drop_qstats(&c->common); + return TC_ACT_CONSUMED; + default: + DEBUG_NET_WARN_ON_ONCE(1); + goto drop; + } } static const struct nla_policy ct_policy[TCA_CT_MAX + 1] = { -- cgit v1.2.3-58-ga151 From 9199b915e9fad7f5eff6160d24ff6b38e970107d Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Mon, 8 Jul 2024 09:58:11 +0300 Subject: xfrm: fix netdev reference count imbalance In cited commit, netdev_tracker_alloc() is called for the newly allocated xfrm state, but dev_hold() is missed, which causes netdev reference count imbalance, because netdev_put() is called when the state is freed in xfrm_dev_state_free(). Fix the issue by replacing netdev_tracker_alloc() with netdev_hold(). Fixes: f8a70afafc17 ("xfrm: add TX datapath support for IPsec packet offload mode") Signed-off-by: Jianbo Liu Reviewed-by: Cosmin Ratiu Signed-off-by: Leon Romanovsky Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 936f9348e5f6..67b2a399a48a 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1331,8 +1331,7 @@ found: xso->dev = xdo->dev; xso->real_dev = xdo->real_dev; xso->flags = XFRM_DEV_OFFLOAD_FLAG_ACQ; - netdev_tracker_alloc(xso->dev, &xso->dev_tracker, - GFP_ATOMIC); + netdev_hold(xso->dev, &xso->dev_tracker, GFP_ATOMIC); error = xso->dev->xfrmdev_ops->xdo_dev_state_add(x, NULL); if (error) { xso->dir = 0; -- cgit v1.2.3-58-ga151 From 89a2aefe4b084686c2ffc1ee939585111ea4fc0f Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Mon, 8 Jul 2024 09:58:12 +0300 Subject: xfrm: call xfrm_dev_policy_delete when kill policy xfrm_policy_kill() is called at different places to delete xfrm policy. It will call xfrm_pol_put(). But xfrm_dev_policy_delete() is not called to free the policy offloaded to hardware. The three commits cited here are to handle this issue by calling xfrm_dev_policy_delete() outside xfrm_get_policy(). But they didn't cover all the cases. An example, which is not handled for now, is xfrm_policy_insert(). It is called when XFRM_MSG_UPDPOLICY request is received. Old policy is replaced by new one, but the offloaded policy is not deleted, so driver doesn't have the chance to release hardware resources. To resolve this issue for all cases, move xfrm_dev_policy_delete() into xfrm_policy_kill(), so the offloaded policy can be deleted from hardware when it is called, which avoids hardware resources leakage. Fixes: 919e43fad516 ("xfrm: add an interface to offload policy") Fixes: bf06fcf4be0f ("xfrm: add missed call to delete offloaded policies") Fixes: 982c3aca8bac ("xfrm: delete offloaded policy") Signed-off-by: Jianbo Liu Reviewed-by: Cosmin Ratiu Signed-off-by: Leon Romanovsky Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 5 ++--- net/xfrm/xfrm_user.c | 1 - 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 475b904fe68b..10f68d572885 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -452,6 +452,8 @@ EXPORT_SYMBOL(xfrm_policy_destroy); static void xfrm_policy_kill(struct xfrm_policy *policy) { + xfrm_dev_policy_delete(policy); + write_lock_bh(&policy->lock); policy->walk.dead = 1; write_unlock_bh(&policy->lock); @@ -1850,7 +1852,6 @@ again: __xfrm_policy_unlink(pol, dir); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); - xfrm_dev_policy_delete(pol); cnt++; xfrm_audit_policy_delete(pol, 1, task_valid); xfrm_policy_kill(pol); @@ -1891,7 +1892,6 @@ again: __xfrm_policy_unlink(pol, dir); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); - xfrm_dev_policy_delete(pol); cnt++; xfrm_audit_policy_delete(pol, 1, task_valid); xfrm_policy_kill(pol); @@ -2342,7 +2342,6 @@ int xfrm_policy_delete(struct xfrm_policy *pol, int dir) pol = __xfrm_policy_unlink(pol, dir); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); if (pol) { - xfrm_dev_policy_delete(pol); xfrm_policy_kill(pol); return 0; } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index e83c687bd64e..77355422ce82 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2455,7 +2455,6 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, NETLINK_CB(skb).portid); } } else { - xfrm_dev_policy_delete(xp); xfrm_audit_policy_delete(xp, err ? 0 : 1, true); if (err != 0) -- cgit v1.2.3-58-ga151 From 34ce9c8b8ada0c03ea8f2a99fcc7a1b297ccaa95 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 2 Jul 2024 13:35:56 +0200 Subject: wifi: nl80211: split helper function from nl80211_put_iface_combinations Create a helper function that puts the data from struct ieee80211_iface_combination to a nl80211 message. This will be used for adding per-radio interface combination data. Signed-off-by: Felix Fietkau Link: https://patch.msgid.link/22a0eee19dbcf98627239328bc66decd3395122c.1719919832.git-series.nbd@nbd.name Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 111 ++++++++++++++++++++++++++----------------------- 1 file changed, 59 insertions(+), 52 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 674368d028f3..deacd5f3f256 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1631,71 +1631,78 @@ nla_put_failure: return -ENOBUFS; } -static int nl80211_put_iface_combinations(struct wiphy *wiphy, - struct sk_buff *msg, - bool large) +static int nl80211_put_ifcomb_data(struct sk_buff *msg, bool large, int idx, + const struct ieee80211_iface_combination *c) { - struct nlattr *nl_combis; - int i, j; + struct nlattr *nl_combi, *nl_limits; + int i; - nl_combis = nla_nest_start_noflag(msg, - NL80211_ATTR_INTERFACE_COMBINATIONS); - if (!nl_combis) + nl_combi = nla_nest_start_noflag(msg, idx); + if (!nl_combi) goto nla_put_failure; - for (i = 0; i < wiphy->n_iface_combinations; i++) { - const struct ieee80211_iface_combination *c; - struct nlattr *nl_combi, *nl_limits; + nl_limits = nla_nest_start_noflag(msg, NL80211_IFACE_COMB_LIMITS); + if (!nl_limits) + goto nla_put_failure; - c = &wiphy->iface_combinations[i]; + for (i = 0; i < c->n_limits; i++) { + struct nlattr *nl_limit; - nl_combi = nla_nest_start_noflag(msg, i + 1); - if (!nl_combi) + nl_limit = nla_nest_start_noflag(msg, i + 1); + if (!nl_limit) goto nla_put_failure; - - nl_limits = nla_nest_start_noflag(msg, - NL80211_IFACE_COMB_LIMITS); - if (!nl_limits) + if (nla_put_u32(msg, NL80211_IFACE_LIMIT_MAX, c->limits[i].max)) goto nla_put_failure; + if (nl80211_put_iftypes(msg, NL80211_IFACE_LIMIT_TYPES, + c->limits[i].types)) + goto nla_put_failure; + nla_nest_end(msg, nl_limit); + } - for (j = 0; j < c->n_limits; j++) { - struct nlattr *nl_limit; + nla_nest_end(msg, nl_limits); - nl_limit = nla_nest_start_noflag(msg, j + 1); - if (!nl_limit) - goto nla_put_failure; - if (nla_put_u32(msg, NL80211_IFACE_LIMIT_MAX, - c->limits[j].max)) - goto nla_put_failure; - if (nl80211_put_iftypes(msg, NL80211_IFACE_LIMIT_TYPES, - c->limits[j].types)) - goto nla_put_failure; - nla_nest_end(msg, nl_limit); - } + if (c->beacon_int_infra_match && + nla_put_flag(msg, NL80211_IFACE_COMB_STA_AP_BI_MATCH)) + goto nla_put_failure; + if (nla_put_u32(msg, NL80211_IFACE_COMB_NUM_CHANNELS, + c->num_different_channels) || + nla_put_u32(msg, NL80211_IFACE_COMB_MAXNUM, + c->max_interfaces)) + goto nla_put_failure; + if (large && + (nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS, + c->radar_detect_widths) || + nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_REGIONS, + c->radar_detect_regions))) + goto nla_put_failure; + if (c->beacon_int_min_gcd && + nla_put_u32(msg, NL80211_IFACE_COMB_BI_MIN_GCD, + c->beacon_int_min_gcd)) + goto nla_put_failure; - nla_nest_end(msg, nl_limits); + nla_nest_end(msg, nl_combi); - if (c->beacon_int_infra_match && - nla_put_flag(msg, NL80211_IFACE_COMB_STA_AP_BI_MATCH)) - goto nla_put_failure; - if (nla_put_u32(msg, NL80211_IFACE_COMB_NUM_CHANNELS, - c->num_different_channels) || - nla_put_u32(msg, NL80211_IFACE_COMB_MAXNUM, - c->max_interfaces)) - goto nla_put_failure; - if (large && - (nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS, - c->radar_detect_widths) || - nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_REGIONS, - c->radar_detect_regions))) - goto nla_put_failure; - if (c->beacon_int_min_gcd && - nla_put_u32(msg, NL80211_IFACE_COMB_BI_MIN_GCD, - c->beacon_int_min_gcd)) - goto nla_put_failure; + return 0; +nla_put_failure: + return -ENOBUFS; +} - nla_nest_end(msg, nl_combi); - } +static int nl80211_put_iface_combinations(struct wiphy *wiphy, + struct sk_buff *msg, + bool large) +{ + struct nlattr *nl_combis; + int i; + + nl_combis = nla_nest_start_noflag(msg, + NL80211_ATTR_INTERFACE_COMBINATIONS); + if (!nl_combis) + goto nla_put_failure; + + for (i = 0; i < wiphy->n_iface_combinations; i++) + if (nl80211_put_ifcomb_data(msg, large, i + 1, + &wiphy->iface_combinations[i])) + goto nla_put_failure; nla_nest_end(msg, nl_combis); -- cgit v1.2.3-58-ga151 From 574e609c4e6a0843a9ed53de79e00da8fb3e7437 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 4 Jul 2024 15:09:47 +0200 Subject: wifi: mac80211: clear vif drv_priv after remove_interface when stopping Avoid reusing stale driver data when an interface is brought down and up again. In order to avoid having to duplicate the memset in every single driver, do it here. Signed-off-by: Felix Fietkau Link: https://patch.msgid.link/20240704130947.48609-1-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 6d969d9f1ac9..97aee0a1a39a 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -689,8 +689,12 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do fallthrough; default: - if (going_down) - drv_remove_interface(local, sdata); + if (!going_down) + break; + drv_remove_interface(local, sdata); + + /* Clear private driver data to prevent reuse */ + memset(sdata->vif.drv_priv, 0, local->hw.vif_data_size); } ieee80211_recalc_ps(local); -- cgit v1.2.3-58-ga151 From 7cd445635562a38c1e3534e3e691c25d2737be54 Mon Sep 17 00:00:00 2001 From: Michael-CY Lee Date: Fri, 5 Jul 2024 15:43:26 +0800 Subject: wifi: mac80211: cancel color change finalize work when link is stopped The color change finalize work might be called after the link is stopped, which might lead to a kernel crash. Signed-off-by: Michael-CY Lee Link: https://patch.msgid.link/20240705074326.11172-1-michael-cy.lee@mediatek.com Signed-off-by: Johannes Berg --- net/mac80211/link.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/mac80211/link.c b/net/mac80211/link.c index 2e6e92defbca..1a211b8d4057 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -71,6 +71,8 @@ void ieee80211_link_stop(struct ieee80211_link_data *link) ieee80211_mgd_stop_link(link); cancel_delayed_work_sync(&link->color_collision_detect_work); + wiphy_work_cancel(link->sdata->local->hw.wiphy, + &link->color_change_finalize_work); wiphy_work_cancel(link->sdata->local->hw.wiphy, &link->csa.finalize_work); ieee80211_link_release_channel(link); -- cgit v1.2.3-58-ga151 From 4044b23781104801f70c4a4ec3ca090730a161c5 Mon Sep 17 00:00:00 2001 From: Michael-CY Lee Date: Fri, 5 Jul 2024 15:43:46 +0800 Subject: wifi: mac80211: do not check BSS color collision in certain cases Do not check BSS color collision in following cases 1. already under a color change 2. color change is disabled Signed-off-by: Michael-CY Lee Link: https://patch.msgid.link/20240705074346.11228-1-michael-cy.lee@mediatek.com Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 0ff9062a130c..59ad24a71141 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3358,6 +3358,7 @@ static void ieee80211_rx_check_bss_color_collision(struct ieee80211_rx_data *rx) { struct ieee80211_mgmt *mgmt = (void *)rx->skb->data; + struct ieee80211_bss_conf *bss_conf; const struct element *ie; size_t baselen; @@ -3368,7 +3369,9 @@ ieee80211_rx_check_bss_color_collision(struct ieee80211_rx_data *rx) if (ieee80211_hw_check(&rx->local->hw, DETECTS_COLOR_COLLISION)) return; - if (rx->link->conf->csa_active) + bss_conf = rx->link->conf; + if (bss_conf->csa_active || bss_conf->color_change_active || + !bss_conf->he_bss_color.enabled) return; baselen = mgmt->u.beacon.variable - rx->skb->data; @@ -3380,7 +3383,6 @@ ieee80211_rx_check_bss_color_collision(struct ieee80211_rx_data *rx) rx->skb->len - baselen); if (ie && ie->datalen >= sizeof(struct ieee80211_he_operation) && ie->datalen >= ieee80211_he_oper_size(ie->data + 1)) { - struct ieee80211_bss_conf *bss_conf = rx->link->conf; const struct ieee80211_he_operation *he_oper; u8 color; -- cgit v1.2.3-58-ga151 From 946b6c48cca48591fb495508c5dbfade767173d0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 5 Jul 2024 13:42:06 +0200 Subject: net: page_pool: fix warning code WARN_ON_ONCE("string") doesn't really do what appears to be intended, so fix that. Signed-off-by: Johannes Berg Fixes: 90de47f020db ("page_pool: fragment API support for 32-bit arch with 64-bit DMA") Link: https://patch.msgid.link/20240705134221.2f4de205caa1.I28496dc0f2ced580282d1fb892048017c4491e21@changeid Signed-off-by: Jakub Kicinski --- net/core/page_pool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/page_pool.c b/net/core/page_pool.c index dc52f078adde..855271a6cad2 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -444,7 +444,7 @@ static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem) return true; unmap_failed: - WARN_ON_ONCE("unexpected DMA address, please report to netdev@"); + WARN_ONCE(1, "unexpected DMA address, please report to netdev@"); dma_unmap_page_attrs(pool->p.dev, dma, PAGE_SIZE << pool->p.order, pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); -- cgit v1.2.3-58-ga151 From f8ad00f3fb2af98f29aacd7ceb4ecdd5ad3c9a7f Mon Sep 17 00:00:00 2001 From: James Chapman Date: Thu, 4 Jul 2024 16:25:08 +0100 Subject: l2tp: fix possible UAF when cleaning up tunnels syzbot reported a UAF caused by a race when the L2TP work queue closes a tunnel at the same time as a userspace thread closes a session in that tunnel. Tunnel cleanup is handled by a work queue which iterates through the sessions contained within a tunnel, and closes them in turn. Meanwhile, a userspace thread may arbitrarily close a session via either netlink command or by closing the pppox socket in the case of l2tp_ppp. The race condition may occur when l2tp_tunnel_closeall walks the list of sessions in the tunnel and deletes each one. Currently this is implemented using list_for_each_safe, but because the list spinlock is dropped in the loop body it's possible for other threads to manipulate the list during list_for_each_safe's list walk. This can lead to the list iterator being corrupted, leading to list_for_each_safe spinning. One sequence of events which may lead to this is as follows: * A tunnel is created, containing two sessions A and B. * A thread closes the tunnel, triggering tunnel cleanup via the work queue. * l2tp_tunnel_closeall runs in the context of the work queue. It removes session A from the tunnel session list, then drops the list lock. At this point the list_for_each_safe temporary variable is pointing to the other session on the list, which is session B, and the list can be manipulated by other threads since the list lock has been released. * Userspace closes session B, which removes the session from its parent tunnel via l2tp_session_delete. Since l2tp_tunnel_closeall has released the tunnel list lock, l2tp_session_delete is able to call list_del_init on the session B list node. * Back on the work queue, l2tp_tunnel_closeall resumes execution and will now spin forever on the same list entry until the underlying session structure is freed, at which point UAF occurs. The solution is to iterate over the tunnel's session list using list_first_entry_not_null to avoid the possibility of the list iterator pointing at a list item which may be removed during the walk. Also, have l2tp_tunnel_closeall ref each session while it processes it to prevent another thread from freeing it. cpu1 cpu2 --- --- pppol2tp_release() spin_lock_bh(&tunnel->list_lock); for (;;) { session = list_first_entry_or_null(&tunnel->session_list, struct l2tp_session, list); if (!session) break; list_del_init(&session->list); spin_unlock_bh(&tunnel->list_lock); l2tp_session_delete(session); l2tp_session_delete(session); spin_lock_bh(&tunnel->list_lock); } spin_unlock_bh(&tunnel->list_lock); Calling l2tp_session_delete on the same session twice isn't a problem per-se, but if cpu2 manages to destruct the socket and unref the session to zero before cpu1 progresses then it would lead to UAF. Reported-by: syzbot+b471b7c936301a59745b@syzkaller.appspotmail.com Reported-by: syzbot+c041b4ce3a6dfd1e63e2@syzkaller.appspotmail.com Fixes: d18d3f0a24fc ("l2tp: replace hlist with simple list for per-tunnel session list") Signed-off-by: James Chapman Signed-off-by: Tom Parkin Link: https://patch.msgid.link/20240704152508.1923908-1-jchapman@katalix.com Signed-off-by: Paolo Abeni --- net/l2tp/l2tp_core.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index a99032076e04..29dfbd70c79c 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1289,17 +1289,20 @@ static void l2tp_session_unhash(struct l2tp_session *session) static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel) { struct l2tp_session *session; - struct list_head *pos; - struct list_head *tmp; spin_lock_bh(&tunnel->list_lock); tunnel->acpt_newsess = false; - list_for_each_safe(pos, tmp, &tunnel->session_list) { - session = list_entry(pos, struct l2tp_session, list); + for (;;) { + session = list_first_entry_or_null(&tunnel->session_list, + struct l2tp_session, list); + if (!session) + break; + l2tp_session_inc_refcount(session); list_del_init(&session->list); spin_unlock_bh(&tunnel->list_lock); l2tp_session_delete(session); spin_lock_bh(&tunnel->list_lock); + l2tp_session_dec_refcount(session); } spin_unlock_bh(&tunnel->list_lock); } -- cgit v1.2.3-58-ga151 From 19b815ed71aadee9a2d31b7a700ef61ae8048010 Mon Sep 17 00:00:00 2001 From: Zong-Zhe Yang Date: Tue, 9 Jul 2024 15:35:31 +0800 Subject: wifi: mac80211: chanctx emulation set CHANGE_CHANNEL when in_reconfig Chanctx emulation didn't info IEEE80211_CONF_CHANGE_CHANNEL to drivers during ieee80211_restart_hw (ieee80211_emulate_add_chanctx). It caused non-chanctx drivers to not stand on the correct channel after recovery. RX then behaved abnormally. Finally, disconnection/reconnection occurred. So, set IEEE80211_CONF_CHANGE_CHANNEL when in_reconfig. Signed-off-by: Zong-Zhe Yang Link: https://patch.msgid.link/20240709073531.30565-1-kevin_yang@realtek.com Cc: stable@vger.kernel.org Fixes: 0a44dfc07074 ("wifi: mac80211: simplify non-chanctx drivers") Signed-off-by: Johannes Berg --- net/mac80211/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 7578ea56c12f..85a267bdb3e3 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -148,7 +148,7 @@ static u32 ieee80211_calc_hw_conf_chan(struct ieee80211_local *local, offchannel_flag ^= local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL; /* force it also for scanning, since drivers might config differently */ - if (offchannel_flag || local->scanning || + if (offchannel_flag || local->scanning || local->in_reconfig || !cfg80211_chandef_identical(&local->hw.conf.chandef, &chandef)) { local->hw.conf.chandef = chandef; changed |= IEEE80211_CONF_CHANGE_CHANNEL; -- cgit v1.2.3-58-ga151 From e6c06ca8f21d1cdb444c708e385d86a54bc5fc60 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 9 Jul 2024 10:38:30 +0200 Subject: wifi: cfg80211: add support for advertising multiple radios belonging to a wiphy The prerequisite for MLO support in cfg80211/mac80211 is that all the links participating in MLO must be from the same wiphy/ieee80211_hw. To meet this expectation, some drivers may need to group multiple discrete hardware each acting as a link in MLO under single wiphy. With this change, supported frequencies and interface combinations of each individual radio are reported to user space. This allows user space to figure out the limitations of what combination of channels can be used concurrently. Even for non-MLO devices, this improves support for devices capable of running on multiple channels at the same time. Signed-off-by: Felix Fietkau Link: https://patch.msgid.link/18a88f9ce82b1c9f7c12f1672430eaf2bb0be295.1720514221.git-series.nbd@nbd.name Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 42 +++++++++++++++- include/uapi/linux/nl80211.h | 65 +++++++++++++++++++++++++ net/wireless/nl80211.c | 113 ++++++++++++++++++++++++++++++++++++++----- 3 files changed, 208 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 6f992aff74ae..a00cf80e61dc 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5046,7 +5046,9 @@ struct ieee80211_iface_limit { * struct ieee80211_iface_combination - possible interface combination * * With this structure the driver can describe which interface - * combinations it supports concurrently. + * combinations it supports concurrently. When set in a struct wiphy_radio, + * the combinations refer to combinations of interfaces currently active on + * that radio. * * Examples: * @@ -5406,6 +5408,38 @@ struct wiphy_iftype_akm_suites { int n_akm_suites; }; +/** + * struct wiphy_radio_freq_range - wiphy frequency range + * @start_freq: start range edge frequency (kHz) + * @end_freq: end range edge frequency (kHz) + */ +struct wiphy_radio_freq_range { + u32 start_freq; + u32 end_freq; +}; + + +/** + * struct wiphy_radio - physical radio of a wiphy + * This structure describes a physical radio belonging to a wiphy. + * It is used to describe concurrent-channel capabilities. Only one channel + * can be active on the radio described by struct wiphy_radio. + * + * @freq_range: frequency range that the radio can operate on. + * @n_freq_range: number of elements in @freq_range + * + * @iface_combinations: Valid interface combinations array, should not + * list single interface types. + * @n_iface_combinations: number of entries in @iface_combinations array. + */ +struct wiphy_radio { + const struct wiphy_radio_freq_range *freq_range; + int n_freq_range; + + const struct ieee80211_iface_combination *iface_combinations; + int n_iface_combinations; +}; + #define CFG80211_HW_TIMESTAMP_ALL_PEERS 0xffff /** @@ -5624,6 +5658,9 @@ struct wiphy_iftype_akm_suites { * A value of %CFG80211_HW_TIMESTAMP_ALL_PEERS indicates the driver * supports enabling HW timestamping for all peers (i.e. no need to * specify a mac address). + * + * @radio: radios belonging to this wiphy + * @n_radio: number of radios */ struct wiphy { struct mutex mtx; @@ -5774,6 +5811,9 @@ struct wiphy { u16 hw_timestamp_max_peers; + int n_radio; + const struct wiphy_radio *radio; + char priv[] __aligned(NETDEV_ALIGN); }; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 6ae3997061b6..f97f5adc8d51 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2052,6 +2052,10 @@ enum nl80211_commands { * @NL80211_ATTR_INTERFACE_COMBINATIONS: Nested attribute listing the supported * interface combinations. In each nested item, it contains attributes * defined in &enum nl80211_if_combination_attrs. + * If the wiphy uses multiple radios (@NL80211_ATTR_WIPHY_RADIOS is set), + * this attribute contains the interface combinations of the first radio. + * See @NL80211_ATTR_WIPHY_INTERFACE_COMBINATIONS for the global wiphy + * combinations for the sum of all radios. * @NL80211_ATTR_SOFTWARE_IFTYPES: Nested attribute (just like * %NL80211_ATTR_SUPPORTED_IFTYPES) containing the interface types that * are managed in software: interfaces of these types aren't subject to @@ -2856,6 +2860,14 @@ enum nl80211_commands { * %NL80211_CMD_ASSOCIATE indicating the SPP A-MSDUs * are used on this connection * + * @NL80211_ATTR_WIPHY_RADIOS: Nested attribute describing physical radios + * belonging to this wiphy. See &enum nl80211_wiphy_radio_attrs. + * + * @NL80211_ATTR_WIPHY_INTERFACE_COMBINATIONS: Nested attribute listing the + * supported interface combinations for all radios combined. In each + * nested item, it contains attributes defined in + * &enum nl80211_if_combination_attrs. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3401,6 +3413,9 @@ enum nl80211_attrs { NL80211_ATTR_ASSOC_SPP_AMSDU, + NL80211_ATTR_WIPHY_RADIOS, + NL80211_ATTR_WIPHY_INTERFACE_COMBINATIONS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -8005,4 +8020,54 @@ enum nl80211_ap_settings_flags { NL80211_AP_SETTINGS_SA_QUERY_OFFLOAD_SUPPORT = 1 << 1, }; +/** + * enum nl80211_wiphy_radio_attrs - wiphy radio attributes + * + * @__NL80211_WIPHY_RADIO_ATTR_INVALID: Invalid + * + * @NL80211_WIPHY_RADIO_ATTR_INDEX: Index of this radio (u32) + * @NL80211_WIPHY_RADIO_ATTR_FREQ_RANGE: Frequency range supported by this + * radio. Attribute may be present multiple times. + * @NL80211_WIPHY_RADIO_ATTR_INTERFACE_COMBINATION: Supported interface + * combination for this radio. Attribute may be present multiple times + * and contains attributes defined in &enum nl80211_if_combination_attrs. + * + * @__NL80211_WIPHY_RADIO_ATTR_LAST: Internal + * @NL80211_WIPHY_RADIO_ATTR_MAX: Highest attribute + */ +enum nl80211_wiphy_radio_attrs { + __NL80211_WIPHY_RADIO_ATTR_INVALID, + + NL80211_WIPHY_RADIO_ATTR_INDEX, + NL80211_WIPHY_RADIO_ATTR_FREQ_RANGE, + NL80211_WIPHY_RADIO_ATTR_INTERFACE_COMBINATION, + + /* keep last */ + __NL80211_WIPHY_RADIO_ATTR_LAST, + NL80211_WIPHY_RADIO_ATTR_MAX = __NL80211_WIPHY_RADIO_ATTR_LAST - 1, +}; + +/** + * enum nl80211_wiphy_radio_freq_range - wiphy radio frequency range + * + * @__NL80211_WIPHY_RADIO_FREQ_ATTR_INVALID: Invalid + * + * @NL80211_WIPHY_RADIO_FREQ_ATTR_START: Frequency range start (u32). + * The unit is kHz. + * @NL80211_WIPHY_RADIO_FREQ_ATTR_END: Frequency range end (u32). + * The unit is kHz. + * + * @__NL80211_WIPHY_RADIO_FREQ_ATTR_LAST: Internal + * @NL80211_WIPHY_RADIO_FREQ_ATTR_MAX: Highest attribute + */ +enum nl80211_wiphy_radio_freq_range { + __NL80211_WIPHY_RADIO_FREQ_ATTR_INVALID, + + NL80211_WIPHY_RADIO_FREQ_ATTR_START, + NL80211_WIPHY_RADIO_FREQ_ATTR_END, + + __NL80211_WIPHY_RADIO_FREQ_ATTR_LAST, + NL80211_WIPHY_RADIO_FREQ_ATTR_MAX = __NL80211_WIPHY_RADIO_FREQ_ATTR_LAST - 1, +}; + #endif /* __LINUX_NL80211_H */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index deacd5f3f256..7397a372c78e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1632,16 +1632,18 @@ nla_put_failure: } static int nl80211_put_ifcomb_data(struct sk_buff *msg, bool large, int idx, - const struct ieee80211_iface_combination *c) + const struct ieee80211_iface_combination *c, + u16 nested) { struct nlattr *nl_combi, *nl_limits; int i; - nl_combi = nla_nest_start_noflag(msg, idx); + nl_combi = nla_nest_start_noflag(msg, idx | nested); if (!nl_combi) goto nla_put_failure; - nl_limits = nla_nest_start_noflag(msg, NL80211_IFACE_COMB_LIMITS); + nl_limits = nla_nest_start_noflag(msg, NL80211_IFACE_COMB_LIMITS | + nested); if (!nl_limits) goto nla_put_failure; @@ -1689,19 +1691,26 @@ nla_put_failure: static int nl80211_put_iface_combinations(struct wiphy *wiphy, struct sk_buff *msg, - bool large) + int attr, int radio, + bool large, u16 nested) { + const struct ieee80211_iface_combination *c; struct nlattr *nl_combis; - int i; + int i, n; - nl_combis = nla_nest_start_noflag(msg, - NL80211_ATTR_INTERFACE_COMBINATIONS); + nl_combis = nla_nest_start_noflag(msg, attr | nested); if (!nl_combis) goto nla_put_failure; - for (i = 0; i < wiphy->n_iface_combinations; i++) - if (nl80211_put_ifcomb_data(msg, large, i + 1, - &wiphy->iface_combinations[i])) + if (radio >= 0) { + c = wiphy->radio[0].iface_combinations; + n = wiphy->radio[0].n_iface_combinations; + } else { + c = wiphy->iface_combinations; + n = wiphy->n_iface_combinations; + } + for (i = 0; i < n; i++) + if (nl80211_put_ifcomb_data(msg, large, i + 1, &c[i], nested)) goto nla_put_failure; nla_nest_end(msg, nl_combis); @@ -2408,6 +2417,80 @@ fail: return -ENOBUFS; } +static int nl80211_put_radio(struct wiphy *wiphy, struct sk_buff *msg, int idx) +{ + const struct wiphy_radio *r = &wiphy->radio[idx]; + struct nlattr *radio, *freq; + int i; + + radio = nla_nest_start(msg, idx); + if (!radio) + return -ENOBUFS; + + if (nla_put_u32(msg, NL80211_WIPHY_RADIO_ATTR_INDEX, idx)) + goto nla_put_failure; + + for (i = 0; i < r->n_freq_range; i++) { + const struct wiphy_radio_freq_range *range = &r->freq_range[i]; + + freq = nla_nest_start(msg, NL80211_WIPHY_RADIO_ATTR_FREQ_RANGE); + if (!freq) + goto nla_put_failure; + + if (nla_put_u32(msg, NL80211_WIPHY_RADIO_FREQ_ATTR_START, + range->start_freq) || + nla_put_u32(msg, NL80211_WIPHY_RADIO_FREQ_ATTR_END, + range->end_freq)) + goto nla_put_failure; + + nla_nest_end(msg, freq); + } + + for (i = 0; i < r->n_iface_combinations; i++) + if (nl80211_put_ifcomb_data(msg, true, + NL80211_WIPHY_RADIO_ATTR_INTERFACE_COMBINATION, + &r->iface_combinations[i], + NLA_F_NESTED)) + goto nla_put_failure; + + nla_nest_end(msg, radio); + + return 0; + +nla_put_failure: + return -ENOBUFS; +} + +static int nl80211_put_radios(struct wiphy *wiphy, struct sk_buff *msg) +{ + struct nlattr *radios; + int i; + + if (!wiphy->n_radio) + return 0; + + radios = nla_nest_start(msg, NL80211_ATTR_WIPHY_RADIOS); + if (!radios) + return -ENOBUFS; + + for (i = 0; i < wiphy->n_radio; i++) + if (nl80211_put_radio(wiphy, msg, i)) + goto fail; + + nla_nest_end(msg, radios); + + if (nl80211_put_iface_combinations(wiphy, msg, + NL80211_ATTR_WIPHY_INTERFACE_COMBINATIONS, + -1, true, NLA_F_NESTED)) + return -ENOBUFS; + + return 0; + +fail: + nla_nest_cancel(msg, radios); + return -ENOBUFS; +} + struct nl80211_dump_wiphy_state { s64 filter_wiphy; long start; @@ -2703,7 +2786,9 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, goto nla_put_failure; if (nl80211_put_iface_combinations(&rdev->wiphy, msg, - state->split)) + NL80211_ATTR_INTERFACE_COMBINATIONS, + rdev->wiphy.n_radio ? 0 : -1, + state->split, 0)) goto nla_put_failure; state->split_start++; @@ -3017,6 +3102,12 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, rdev->wiphy.hw_timestamp_max_peers)) goto nla_put_failure; + state->split_start++; + break; + case 17: + if (nl80211_put_radios(&rdev->wiphy, msg)) + goto nla_put_failure; + /* done */ state->split_start = 0; break; -- cgit v1.2.3-58-ga151 From abb4cfe3661aa05426916b21164f88ca5a405a3a Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 9 Jul 2024 10:38:31 +0200 Subject: wifi: cfg80211: extend interface combination check for multi-radio Add a field in struct iface_combination_params to check per-radio interface combinations instead of per-wiphy ones. Signed-off-by: Felix Fietkau Link: https://patch.msgid.link/32b28da89c2d759b0324deeefe2be4cee91de18e.1720514221.git-series.nbd@nbd.name Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 5 +++++ net/mac80211/util.c | 5 ++++- net/wireless/rdev-ops.h | 12 ++++++++++++ net/wireless/util.c | 33 ++++++++++++++++++++++++++------- 4 files changed, 47 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index a00cf80e61dc..4767e2c76b01 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1598,6 +1598,7 @@ struct cfg80211_color_change_settings { * * Used to pass interface combination parameters * + * @radio_idx: wiphy radio index or -1 for global * @num_different_channels: the number of different channels we want * to use for verification * @radar_detect: a bitmap where each bit corresponds to a channel @@ -1611,6 +1612,7 @@ struct cfg80211_color_change_settings { * the verification */ struct iface_combination_params { + int radio_idx; int num_different_channels; u8 radar_detect; int iftype_num[NUM_NL80211_IFTYPES]; @@ -4580,6 +4582,8 @@ struct mgmt_frame_regs { * * @set_hw_timestamp: Enable/disable HW timestamping of TM/FTM frames. * @set_ttlm: set the TID to link mapping. + * @get_radio_mask: get bitmask of radios in use. + * (invoked with the wiphy mutex held) */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -4941,6 +4945,7 @@ struct cfg80211_ops { struct cfg80211_set_hw_timestamp *hwts); int (*set_ttlm)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ttlm_params *params); + u32 (*get_radio_mask)(struct wiphy *wiphy, struct net_device *dev); }; /* diff --git a/net/mac80211/util.c b/net/mac80211/util.c index c6d5f73119d8..27f0db2e9796 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -3944,6 +3944,7 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata, int total = 1; struct iface_combination_params params = { .radar_detect = radar_detect, + .radio_idx = -1, }; lockdep_assert_wiphy(local->hw.wiphy); @@ -4034,7 +4035,9 @@ int ieee80211_max_num_channels(struct ieee80211_local *local) struct ieee80211_chanctx *ctx; u32 max_num_different_channels = 1; int err; - struct iface_combination_params params = {0}; + struct iface_combination_params params = { + .radio_idx = -1, + }; lockdep_assert_wiphy(local->hw.wiphy); diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 8f15658002ee..ec3f4aa1c807 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -1532,4 +1532,16 @@ rdev_set_ttlm(struct cfg80211_registered_device *rdev, return ret; } + +static inline u32 +rdev_get_radio_mask(struct cfg80211_registered_device *rdev, + struct net_device *dev) +{ + struct wiphy *wiphy = &rdev->wiphy; + + if (!rdev->ops->get_radio_mask) + return 0; + + return rdev->ops->get_radio_mask(wiphy, dev); +} #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/util.c b/net/wireless/util.c index af6ec719567f..2492f259621f 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -2307,13 +2307,16 @@ static int cfg80211_wdev_bi(struct wireless_dev *wdev) static void cfg80211_calculate_bi_data(struct wiphy *wiphy, u32 new_beacon_int, u32 *beacon_int_gcd, - bool *beacon_int_different) + bool *beacon_int_different, + int radio_idx) { + struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; *beacon_int_gcd = 0; *beacon_int_different = false; + rdev = wiphy_to_rdev(wiphy); list_for_each_entry(wdev, &wiphy->wdev_list, list) { int wdev_bi; @@ -2321,6 +2324,11 @@ static void cfg80211_calculate_bi_data(struct wiphy *wiphy, u32 new_beacon_int, if (wdev->valid_links) continue; + /* skip wdevs not active on the given wiphy radio */ + if (radio_idx >= 0 && + !(rdev_get_radio_mask(rdev, wdev->netdev) & BIT(radio_idx))) + continue; + wdev_bi = cfg80211_wdev_bi(wdev); if (!wdev_bi) @@ -2368,14 +2376,19 @@ int cfg80211_iter_combinations(struct wiphy *wiphy, void *data), void *data) { + const struct wiphy_radio *radio = NULL; + const struct ieee80211_iface_combination *c, *cs; const struct ieee80211_regdomain *regdom; enum nl80211_dfs_regions region = 0; - int i, j, iftype; + int i, j, n, iftype; int num_interfaces = 0; u32 used_iftypes = 0; u32 beacon_int_gcd; bool beacon_int_different; + if (params->radio_idx >= 0) + radio = &wiphy->radio[params->radio_idx]; + /* * This is a bit strange, since the iteration used to rely only on * the data given by the driver, but here it now relies on context, @@ -2387,7 +2400,8 @@ int cfg80211_iter_combinations(struct wiphy *wiphy, * interfaces (while being brought up) and channel/radar data. */ cfg80211_calculate_bi_data(wiphy, params->new_beacon_int, - &beacon_int_gcd, &beacon_int_different); + &beacon_int_gcd, &beacon_int_different, + params->radio_idx); if (params->radar_detect) { rcu_read_lock(); @@ -2404,13 +2418,18 @@ int cfg80211_iter_combinations(struct wiphy *wiphy, used_iftypes |= BIT(iftype); } - for (i = 0; i < wiphy->n_iface_combinations; i++) { - const struct ieee80211_iface_combination *c; + if (radio) { + cs = radio->iface_combinations; + n = radio->n_iface_combinations; + } else { + cs = wiphy->iface_combinations; + n = wiphy->n_iface_combinations; + } + for (i = 0; i < n; i++) { struct ieee80211_iface_limit *limits; u32 all_iftypes = 0; - c = &wiphy->iface_combinations[i]; - + c = &cs[i]; if (num_interfaces > c->max_interfaces) continue; if (params->num_different_channels > c->num_different_channels) -- cgit v1.2.3-58-ga151 From 510dba80ed669d6123901ccf0476706122b008b1 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 9 Jul 2024 10:38:32 +0200 Subject: wifi: cfg80211: add helper for checking if a chandef is valid on a radio Check if the full channel width is in the radio's frequency range. Signed-off-by: Felix Fietkau Link: https://patch.msgid.link/7c8ea146feb6f37cee62e5ba6be5370403695797.1720514221.git-series.nbd@nbd.name [add missing Return: documentation] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 11 +++++++++++ net/wireless/util.c | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 4767e2c76b01..192d72c8b465 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -6508,6 +6508,17 @@ static inline bool cfg80211_channel_is_psc(struct ieee80211_channel *chan) return ieee80211_frequency_to_channel(chan->center_freq) % 16 == 5; } +/** + * cfg80211_radio_chandef_valid - Check if the radio supports the chandef + * + * @radio: wiphy radio + * @chandef: chandef for current channel + * + * Return: whether or not the given chandef is valid for the given radio + */ +bool cfg80211_radio_chandef_valid(const struct wiphy_radio *radio, + const struct cfg80211_chan_def *chandef); + /** * ieee80211_get_response_rate - get basic rate for a given rate * diff --git a/net/wireless/util.c b/net/wireless/util.c index 2492f259621f..9a7c3adc8a3b 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -2886,3 +2886,38 @@ cfg80211_get_iftype_ext_capa(struct wiphy *wiphy, enum nl80211_iftype type) return NULL; } EXPORT_SYMBOL(cfg80211_get_iftype_ext_capa); + +static bool +ieee80211_radio_freq_range_valid(const struct wiphy_radio *radio, + u32 freq, u32 width) +{ + const struct wiphy_radio_freq_range *r; + int i; + + for (i = 0; i < radio->n_freq_range; i++) { + r = &radio->freq_range[i]; + if (freq - width / 2 >= r->start_freq && + freq + width / 2 <= r->end_freq) + return true; + } + + return false; +} + +bool cfg80211_radio_chandef_valid(const struct wiphy_radio *radio, + const struct cfg80211_chan_def *chandef) +{ + u32 freq, width; + + freq = ieee80211_chandef_to_khz(chandef); + width = nl80211_chan_width_to_mhz(chandef->width); + if (!ieee80211_radio_freq_range_valid(radio, freq, width)) + return false; + + freq = MHZ_TO_KHZ(chandef->center_freq2); + if (freq && !ieee80211_radio_freq_range_valid(radio, freq, width)) + return false; + + return true; +} +EXPORT_SYMBOL(cfg80211_radio_chandef_valid); -- cgit v1.2.3-58-ga151 From a01b1e9f9955679fa04adead06c100022e313e55 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 9 Jul 2024 10:38:33 +0200 Subject: wifi: mac80211: add support for DFS with multiple radios DFS can be supported with multi-channel combinations, as long as each DFS capable radio only supports one channel. Signed-off-by: Felix Fietkau Link: https://patch.msgid.link/4d27a4adca99fa832af1f7cda4f2e71016bd9fda.1720514221.git-series.nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/main.c | 50 +++++++++++++++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 85a267bdb3e3..a3104b6ea6f0 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1091,6 +1091,27 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local) return 0; } +static bool +ieee80211_ifcomb_check(const struct ieee80211_iface_combination *c, int n_comb) +{ + int i, j; + + for (i = 0; i < n_comb; i++, c++) { + /* DFS is not supported with multi-channel combinations yet */ + if (c->radar_detect_widths && + c->num_different_channels > 1) + return false; + + /* mac80211 doesn't support more than one IBSS interface */ + for (j = 0; j < c->n_limits; j++) + if ((c->limits[j].types & BIT(NL80211_IFTYPE_ADHOC)) && + c->limits[j].max > 1) + return false; + } + + return true; +} + int ieee80211_register_hw(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); @@ -1177,17 +1198,20 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) if (comb->num_different_channels > 1) return -EINVAL; } - } else { - /* DFS is not supported with multi-channel combinations yet */ - for (i = 0; i < local->hw.wiphy->n_iface_combinations; i++) { - const struct ieee80211_iface_combination *comb; + } - comb = &local->hw.wiphy->iface_combinations[i]; + if (hw->wiphy->n_radio) { + for (i = 0; i < hw->wiphy->n_radio; i++) { + const struct wiphy_radio *radio = &hw->wiphy->radio[i]; - if (comb->radar_detect_widths && - comb->num_different_channels > 1) + if (!ieee80211_ifcomb_check(radio->iface_combinations, + radio->n_iface_combinations)) return -EINVAL; } + } else { + if (!ieee80211_ifcomb_check(hw->wiphy->iface_combinations, + hw->wiphy->n_iface_combinations)) + return -EINVAL; } /* Only HW csum features are currently compatible with mac80211 */ @@ -1317,18 +1341,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) hw->wiphy->interface_modes |= BIT(NL80211_IFTYPE_MONITOR); hw->wiphy->software_iftypes |= BIT(NL80211_IFTYPE_MONITOR); - /* mac80211 doesn't support more than one IBSS interface right now */ - for (i = 0; i < hw->wiphy->n_iface_combinations; i++) { - const struct ieee80211_iface_combination *c; - int j; - - c = &hw->wiphy->iface_combinations[i]; - - for (j = 0; j < c->n_limits; j++) - if ((c->limits[j].types & BIT(NL80211_IFTYPE_ADHOC)) && - c->limits[j].max > 1) - return -EINVAL; - } local->int_scan_req = kzalloc(sizeof(*local->int_scan_req) + sizeof(void *) * channels, GFP_KERNEL); -- cgit v1.2.3-58-ga151 From 2920bc8d916d30b5273ec16e6878f13b24e3851f Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 9 Jul 2024 10:38:34 +0200 Subject: wifi: mac80211: add radio index to ieee80211_chanctx_conf Will be used to explicitly assign a channel context to a wiphy radio. Signed-off-by: Felix Fietkau Link: https://patch.msgid.link/59f76f57d935f155099276be22badfa671d5bfd9.1720514221.git-series.nbd@nbd.name Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 ++ net/mac80211/chan.c | 8 +++++--- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index bd0f8aefa797..e78ccbe38d6d 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -250,6 +250,7 @@ struct ieee80211_chan_req { * @min_def: the minimum channel definition currently required. * @ap: the channel definition the AP actually is operating as, * for use with (wider bandwidth) OFDMA + * @radio_idx: index of the wiphy radio used used for this channel * @rx_chains_static: The number of RX chains that must always be * active on the channel to receive MIMO transmissions * @rx_chains_dynamic: The number of RX chains that must be enabled @@ -264,6 +265,7 @@ struct ieee80211_chanctx_conf { struct cfg80211_chan_def min_def; struct cfg80211_chan_def ap; + int radio_idx; u8 rx_chains_static, rx_chains_dynamic; bool radar_enabled; diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 06a65dc6f6c6..6c4d02cb07bb 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -656,7 +656,8 @@ ieee80211_chanctx_radar_required(struct ieee80211_local *local, static struct ieee80211_chanctx * ieee80211_alloc_chanctx(struct ieee80211_local *local, const struct ieee80211_chan_req *chanreq, - enum ieee80211_chanctx_mode mode) + enum ieee80211_chanctx_mode mode, + int radio_idx) { struct ieee80211_chanctx *ctx; @@ -674,6 +675,7 @@ ieee80211_alloc_chanctx(struct ieee80211_local *local, ctx->conf.rx_chains_dynamic = 1; ctx->mode = mode; ctx->conf.radar_enabled = false; + ctx->conf.radio_idx = radio_idx; _ieee80211_recalc_chanctx_min_def(local, ctx, NULL, false); return ctx; @@ -714,7 +716,7 @@ ieee80211_new_chanctx(struct ieee80211_local *local, lockdep_assert_wiphy(local->hw.wiphy); - ctx = ieee80211_alloc_chanctx(local, chanreq, mode); + ctx = ieee80211_alloc_chanctx(local, chanreq, mode, -1); if (!ctx) return ERR_PTR(-ENOMEM); @@ -1155,7 +1157,7 @@ int ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link, !list_empty(&curr_ctx->reserved_links)) return -EBUSY; - new_ctx = ieee80211_alloc_chanctx(local, chanreq, mode); + new_ctx = ieee80211_alloc_chanctx(local, chanreq, mode, -1); if (!new_ctx) return -ENOMEM; -- cgit v1.2.3-58-ga151 From 0874bcd0e1c97db0bada32df0934d5cf2507bedd Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 9 Jul 2024 10:38:35 +0200 Subject: wifi: mac80211: extend ifcomb check functions for multi-radio Add support for counting global and per-radio max/current number of channels, as well as checking radio-specific interface combinations. Signed-off-by: Felix Fietkau Link: https://patch.msgid.link/e76307f8ce562a91a74faab274ae01f6a5ba0a2e.1720514221.git-series.nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 7 ++- net/mac80211/chan.c | 17 ++++-- net/mac80211/ibss.c | 2 +- net/mac80211/ieee80211_i.h | 5 +- net/mac80211/iface.c | 2 +- net/mac80211/util.c | 146 +++++++++++++++++++++++++++++---------------- 6 files changed, 114 insertions(+), 65 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 3d49b3ee3a2b..85cb71de370f 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -263,7 +263,7 @@ static int ieee80211_start_p2p_device(struct wiphy *wiphy, lockdep_assert_wiphy(sdata->local->hw.wiphy); - ret = ieee80211_check_combinations(sdata, NULL, 0, 0); + ret = ieee80211_check_combinations(sdata, NULL, 0, 0, -1); if (ret < 0) return ret; @@ -285,7 +285,7 @@ static int ieee80211_start_nan(struct wiphy *wiphy, lockdep_assert_wiphy(sdata->local->hw.wiphy); - ret = ieee80211_check_combinations(sdata, NULL, 0, 0); + ret = ieee80211_check_combinations(sdata, NULL, 0, 0, -1); if (ret < 0) return ret; @@ -4008,7 +4008,7 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, goto out; /* if reservation is invalid then this will fail */ - err = ieee80211_check_combinations(sdata, NULL, chanctx->mode, 0); + err = ieee80211_check_combinations(sdata, NULL, chanctx->mode, 0, -1); if (err) { ieee80211_link_unreserve_chanctx(link_data); goto out; @@ -5203,4 +5203,5 @@ const struct cfg80211_ops mac80211_config_ops = { .del_link_station = ieee80211_del_link_station, .set_hw_timestamp = ieee80211_set_hw_timestamp, .set_ttlm = ieee80211_set_ttlm, + .get_radio_mask = ieee80211_get_radio_mask, }; diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 6c4d02cb07bb..12bad02da561 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -47,24 +47,29 @@ int ieee80211_chanctx_refcount(struct ieee80211_local *local, ieee80211_chanctx_num_reserved(local, ctx); } -static int ieee80211_num_chanctx(struct ieee80211_local *local) +static int ieee80211_num_chanctx(struct ieee80211_local *local, int radio_idx) { struct ieee80211_chanctx *ctx; int num = 0; lockdep_assert_wiphy(local->hw.wiphy); - list_for_each_entry(ctx, &local->chanctx_list, list) + list_for_each_entry(ctx, &local->chanctx_list, list) { + if (radio_idx >= 0 && ctx->conf.radio_idx != radio_idx) + continue; num++; + } return num; } -static bool ieee80211_can_create_new_chanctx(struct ieee80211_local *local) +static bool ieee80211_can_create_new_chanctx(struct ieee80211_local *local, + int radio_idx) { lockdep_assert_wiphy(local->hw.wiphy); - return ieee80211_num_chanctx(local) < ieee80211_max_num_channels(local); + return ieee80211_num_chanctx(local, radio_idx) < + ieee80211_max_num_channels(local, radio_idx); } static struct ieee80211_chanctx * @@ -1101,7 +1106,7 @@ int ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link, new_ctx = ieee80211_find_reservation_chanctx(local, chanreq, mode); if (!new_ctx) { - if (ieee80211_can_create_new_chanctx(local)) { + if (ieee80211_can_create_new_chanctx(local, -1)) { new_ctx = ieee80211_new_chanctx(local, chanreq, mode, false); if (IS_ERR(new_ctx)) @@ -1822,7 +1827,7 @@ int _ieee80211_link_use_channel(struct ieee80211_link_data *link, link->radar_required = ret; ret = ieee80211_check_combinations(sdata, &chanreq->oper, mode, - radar_detect_width); + radar_detect_width, -1); if (ret < 0) goto out; diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 7db4c3ee7e6d..3f74bbceeca5 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -1746,7 +1746,7 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, IEEE80211_CHANCTX_SHARED : IEEE80211_CHANCTX_EXCLUSIVE; ret = ieee80211_check_combinations(sdata, ¶ms->chandef, chanmode, - radar_detect_width); + radar_detect_width, -1); if (ret < 0) return ret; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 6349552e62a8..a3485e4c6132 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2640,8 +2640,9 @@ void ieee80211_recalc_dtim(struct ieee80211_local *local, int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata, const struct cfg80211_chan_def *chandef, enum ieee80211_chanctx_mode chanmode, - u8 radar_detect); -int ieee80211_max_num_channels(struct ieee80211_local *local); + u8 radar_detect, int radio_idx); +int ieee80211_max_num_channels(struct ieee80211_local *local, int radio_idx); +u32 ieee80211_get_radio_mask(struct wiphy *wiphy, struct net_device *dev); void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local, struct ieee80211_chanctx *ctx); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 97aee0a1a39a..b4ad66af3af3 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -397,7 +397,7 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata, } } - return ieee80211_check_combinations(sdata, NULL, 0, 0); + return ieee80211_check_combinations(sdata, NULL, 0, 0, -1); } static int ieee80211_check_queues(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 27f0db2e9796..ced19ce7c51a 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -3932,20 +3932,103 @@ static u8 ieee80211_chanctx_radar_detect(struct ieee80211_local *local, return radar_detect; } +static u32 +__ieee80211_get_radio_mask(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_bss_conf *link_conf; + struct ieee80211_chanctx_conf *conf; + unsigned int link_id; + u32 mask = 0; + + for_each_vif_active_link(&sdata->vif, link_conf, link_id) { + conf = sdata_dereference(link_conf->chanctx_conf, sdata); + if (!conf || conf->radio_idx < 0) + continue; + + mask |= BIT(conf->radio_idx); + } + + return mask; +} + +u32 ieee80211_get_radio_mask(struct wiphy *wiphy, struct net_device *dev) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + return __ieee80211_get_radio_mask(sdata); +} + +static bool +ieee80211_sdata_uses_radio(struct ieee80211_sub_if_data *sdata, int radio_idx) +{ + if (radio_idx < 0) + return true; + + return __ieee80211_get_radio_mask(sdata) & BIT(radio_idx); +} + +static int +ieee80211_fill_ifcomb_params(struct ieee80211_local *local, + struct iface_combination_params *params, + const struct cfg80211_chan_def *chandef, + struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_sub_if_data *sdata_iter; + struct ieee80211_chanctx *ctx; + int total = !!sdata; + + list_for_each_entry(ctx, &local->chanctx_list, list) { + if (ctx->replace_state == IEEE80211_CHANCTX_WILL_BE_REPLACED) + continue; + + if (params->radio_idx >= 0 && + ctx->conf.radio_idx != params->radio_idx) + continue; + + params->radar_detect |= + ieee80211_chanctx_radar_detect(local, ctx); + + if (chandef && ctx->mode != IEEE80211_CHANCTX_EXCLUSIVE && + cfg80211_chandef_compatible(chandef, &ctx->conf.def)) + continue; + + params->num_different_channels++; + } + + list_for_each_entry(sdata_iter, &local->interfaces, list) { + struct wireless_dev *wdev_iter; + + wdev_iter = &sdata_iter->wdev; + + if (sdata_iter == sdata || + !ieee80211_sdata_running(sdata_iter) || + cfg80211_iftype_allowed(local->hw.wiphy, + wdev_iter->iftype, 0, 1)) + continue; + + if (!ieee80211_sdata_uses_radio(sdata_iter, params->radio_idx)) + continue; + + params->iftype_num[wdev_iter->iftype]++; + total++; + } + + return total; +} + int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata, const struct cfg80211_chan_def *chandef, enum ieee80211_chanctx_mode chanmode, - u8 radar_detect) + u8 radar_detect, int radio_idx) { + bool shared = chanmode == IEEE80211_CHANCTX_SHARED; struct ieee80211_local *local = sdata->local; - struct ieee80211_sub_if_data *sdata_iter; enum nl80211_iftype iftype = sdata->wdev.iftype; - struct ieee80211_chanctx *ctx; - int total = 1; struct iface_combination_params params = { .radar_detect = radar_detect, - .radio_idx = -1, + .radio_idx = radio_idx, }; + int total; lockdep_assert_wiphy(local->hw.wiphy); @@ -3982,37 +4065,9 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata, if (iftype != NL80211_IFTYPE_UNSPECIFIED) params.iftype_num[iftype] = 1; - list_for_each_entry(ctx, &local->chanctx_list, list) { - if (ctx->replace_state == IEEE80211_CHANCTX_WILL_BE_REPLACED) - continue; - params.radar_detect |= - ieee80211_chanctx_radar_detect(local, ctx); - if (ctx->mode == IEEE80211_CHANCTX_EXCLUSIVE) { - params.num_different_channels++; - continue; - } - if (chandef && chanmode == IEEE80211_CHANCTX_SHARED && - cfg80211_chandef_compatible(chandef, - &ctx->conf.def)) - continue; - params.num_different_channels++; - } - - list_for_each_entry_rcu(sdata_iter, &local->interfaces, list) { - struct wireless_dev *wdev_iter; - - wdev_iter = &sdata_iter->wdev; - - if (sdata_iter == sdata || - !ieee80211_sdata_running(sdata_iter) || - cfg80211_iftype_allowed(local->hw.wiphy, - wdev_iter->iftype, 0, 1)) - continue; - - params.iftype_num[wdev_iter->iftype]++; - total++; - } - + total = ieee80211_fill_ifcomb_params(local, ¶ms, + shared ? chandef : NULL, + sdata); if (total == 1 && !params.radar_detect) return 0; @@ -4029,30 +4084,17 @@ ieee80211_iter_max_chans(const struct ieee80211_iface_combination *c, c->num_different_channels); } -int ieee80211_max_num_channels(struct ieee80211_local *local) +int ieee80211_max_num_channels(struct ieee80211_local *local, int radio_idx) { - struct ieee80211_sub_if_data *sdata; - struct ieee80211_chanctx *ctx; u32 max_num_different_channels = 1; int err; struct iface_combination_params params = { - .radio_idx = -1, + .radio_idx = radio_idx, }; lockdep_assert_wiphy(local->hw.wiphy); - list_for_each_entry(ctx, &local->chanctx_list, list) { - if (ctx->replace_state == IEEE80211_CHANCTX_WILL_BE_REPLACED) - continue; - - params.num_different_channels++; - - params.radar_detect |= - ieee80211_chanctx_radar_detect(local, ctx); - } - - list_for_each_entry_rcu(sdata, &local->interfaces, list) - params.iftype_num[sdata->wdev.iftype]++; + ieee80211_fill_ifcomb_params(local, ¶ms, NULL, NULL); err = cfg80211_iter_combinations(local->hw.wiphy, ¶ms, ieee80211_iter_max_chans, -- cgit v1.2.3-58-ga151 From 6265c67f2668047c97834c5c434f6abcf86e3406 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 9 Jul 2024 10:38:36 +0200 Subject: wifi: mac80211: move code in ieee80211_link_reserve_chanctx to a helper Reduces indentation in preparation for further changes Signed-off-by: Felix Fietkau Link: https://patch.msgid.link/cce95007092336254d51570f4a27e05a6f150a53.1720514221.git-series.nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/chan.c | 141 +++++++++++++++++++++++++++------------------------- 1 file changed, 72 insertions(+), 69 deletions(-) (limited to 'net') diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 12bad02da561..9aa3b9e25228 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -1089,6 +1089,71 @@ int ieee80211_link_unreserve_chanctx(struct ieee80211_link_data *link) return 0; } +static struct ieee80211_chanctx * +ieee80211_replace_chanctx(struct ieee80211_local *local, + const struct ieee80211_chan_req *chanreq, + enum ieee80211_chanctx_mode mode, + struct ieee80211_chanctx *curr_ctx) +{ + struct ieee80211_chanctx *new_ctx, *ctx; + + if (!curr_ctx || (curr_ctx->replace_state == + IEEE80211_CHANCTX_WILL_BE_REPLACED) || + !list_empty(&curr_ctx->reserved_links)) { + /* + * Another link already requested this context for a + * reservation. Find another one hoping all links assigned + * to it will also switch soon enough. + * + * TODO: This needs a little more work as some cases + * (more than 2 chanctx capable devices) may fail which could + * otherwise succeed provided some channel context juggling was + * performed. + * + * Consider ctx1..3, link1..6, each ctx has 2 links. link1 and + * link2 from ctx1 request new different chandefs starting 2 + * in-place reserations with ctx4 and ctx5 replacing ctx1 and + * ctx2 respectively. Next link5 and link6 from ctx3 reserve + * ctx4. If link3 and link4 remain on ctx2 as they are then this + * fails unless `replace_ctx` from ctx5 is replaced with ctx3. + */ + list_for_each_entry(ctx, &local->chanctx_list, list) { + if (ctx->replace_state != + IEEE80211_CHANCTX_REPLACE_NONE) + continue; + + if (!list_empty(&ctx->reserved_links)) + continue; + + curr_ctx = ctx; + break; + } + } + + /* + * If that's true then all available contexts already have reservations + * and cannot be used. + */ + if (!curr_ctx || (curr_ctx->replace_state == + IEEE80211_CHANCTX_WILL_BE_REPLACED) || + !list_empty(&curr_ctx->reserved_links)) + return ERR_PTR(-EBUSY); + + new_ctx = ieee80211_alloc_chanctx(local, chanreq, mode, -1); + if (!new_ctx) + return ERR_PTR(-ENOMEM); + + new_ctx->replace_ctx = curr_ctx; + new_ctx->replace_state = IEEE80211_CHANCTX_REPLACES_OTHER; + + curr_ctx->replace_ctx = new_ctx; + curr_ctx->replace_state = IEEE80211_CHANCTX_WILL_BE_REPLACED; + + list_add_rcu(&new_ctx->list, &local->chanctx_list); + + return new_ctx; +} + int ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link, const struct ieee80211_chan_req *chanreq, enum ieee80211_chanctx_mode mode, @@ -1096,7 +1161,7 @@ int ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link, { struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_local *local = sdata->local; - struct ieee80211_chanctx *new_ctx, *curr_ctx, *ctx; + struct ieee80211_chanctx *new_ctx, *curr_ctx; lockdep_assert_wiphy(local->hw.wiphy); @@ -1106,76 +1171,14 @@ int ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link, new_ctx = ieee80211_find_reservation_chanctx(local, chanreq, mode); if (!new_ctx) { - if (ieee80211_can_create_new_chanctx(local, -1)) { + if (ieee80211_can_create_new_chanctx(local, -1)) new_ctx = ieee80211_new_chanctx(local, chanreq, mode, false); - if (IS_ERR(new_ctx)) - return PTR_ERR(new_ctx); - } else { - if (!curr_ctx || - (curr_ctx->replace_state == - IEEE80211_CHANCTX_WILL_BE_REPLACED) || - !list_empty(&curr_ctx->reserved_links)) { - /* - * Another link already requested this context - * for a reservation. Find another one hoping - * all links assigned to it will also switch - * soon enough. - * - * TODO: This needs a little more work as some - * cases (more than 2 chanctx capable devices) - * may fail which could otherwise succeed - * provided some channel context juggling was - * performed. - * - * Consider ctx1..3, link1..6, each ctx has 2 - * links. link1 and link2 from ctx1 request new - * different chandefs starting 2 in-place - * reserations with ctx4 and ctx5 replacing - * ctx1 and ctx2 respectively. Next link5 and - * link6 from ctx3 reserve ctx4. If link3 and - * link4 remain on ctx2 as they are then this - * fails unless `replace_ctx` from ctx5 is - * replaced with ctx3. - */ - list_for_each_entry(ctx, &local->chanctx_list, - list) { - if (ctx->replace_state != - IEEE80211_CHANCTX_REPLACE_NONE) - continue; - - if (!list_empty(&ctx->reserved_links)) - continue; - - curr_ctx = ctx; - break; - } - } - - /* - * If that's true then all available contexts already - * have reservations and cannot be used. - */ - if (!curr_ctx || - (curr_ctx->replace_state == - IEEE80211_CHANCTX_WILL_BE_REPLACED) || - !list_empty(&curr_ctx->reserved_links)) - return -EBUSY; - - new_ctx = ieee80211_alloc_chanctx(local, chanreq, mode, -1); - if (!new_ctx) - return -ENOMEM; - - new_ctx->replace_ctx = curr_ctx; - new_ctx->replace_state = - IEEE80211_CHANCTX_REPLACES_OTHER; - - curr_ctx->replace_ctx = new_ctx; - curr_ctx->replace_state = - IEEE80211_CHANCTX_WILL_BE_REPLACED; - - list_add_rcu(&new_ctx->list, &local->chanctx_list); - } + else + new_ctx = ieee80211_replace_chanctx(local, chanreq, + mode, curr_ctx); + if (IS_ERR(new_ctx)) + return PTR_ERR(new_ctx); } list_add(&link->reserved_chanctx_list, &new_ctx->reserved_links); -- cgit v1.2.3-58-ga151 From 27d4c03441eb951142c79fff0a25dd8ba3263875 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 9 Jul 2024 10:38:37 +0200 Subject: wifi: mac80211: add wiphy radio assignment and validation Validate number of channels and interface combinations per radio. Assign each channel context to a radio. Signed-off-by: Felix Fietkau Link: https://patch.msgid.link/1d3e9ba70a30ce18aaff337f0a76d7aeb311bafb.1720514221.git-series.nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/chan.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 9aa3b9e25228..e8567723e94d 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -714,14 +714,15 @@ static struct ieee80211_chanctx * ieee80211_new_chanctx(struct ieee80211_local *local, const struct ieee80211_chan_req *chanreq, enum ieee80211_chanctx_mode mode, - bool assign_on_failure) + bool assign_on_failure, + int radio_idx) { struct ieee80211_chanctx *ctx; int err; lockdep_assert_wiphy(local->hw.wiphy); - ctx = ieee80211_alloc_chanctx(local, chanreq, mode, -1); + ctx = ieee80211_alloc_chanctx(local, chanreq, mode, radio_idx); if (!ctx) return ERR_PTR(-ENOMEM); @@ -1096,6 +1097,8 @@ ieee80211_replace_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *curr_ctx) { struct ieee80211_chanctx *new_ctx, *ctx; + struct wiphy *wiphy = local->hw.wiphy; + const struct wiphy_radio *radio; if (!curr_ctx || (curr_ctx->replace_state == IEEE80211_CHANCTX_WILL_BE_REPLACED) || @@ -1125,6 +1128,12 @@ ieee80211_replace_chanctx(struct ieee80211_local *local, if (!list_empty(&ctx->reserved_links)) continue; + if (ctx->conf.radio_idx >= 0) { + radio = &wiphy->radio[ctx->conf.radio_idx]; + if (!cfg80211_radio_chandef_valid(radio, &chanreq->oper)) + continue; + } + curr_ctx = ctx; break; } @@ -1154,6 +1163,34 @@ ieee80211_replace_chanctx(struct ieee80211_local *local, return new_ctx; } +static bool +ieee80211_find_available_radio(struct ieee80211_local *local, + const struct ieee80211_chan_req *chanreq, + int *radio_idx) +{ + struct wiphy *wiphy = local->hw.wiphy; + const struct wiphy_radio *radio; + int i; + + *radio_idx = -1; + if (!wiphy->n_radio) + return true; + + for (i = 0; i < wiphy->n_radio; i++) { + radio = &wiphy->radio[i]; + if (!cfg80211_radio_chandef_valid(radio, &chanreq->oper)) + continue; + + if (!ieee80211_can_create_new_chanctx(local, i)) + continue; + + *radio_idx = i; + return true; + } + + return false; +} + int ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link, const struct ieee80211_chan_req *chanreq, enum ieee80211_chanctx_mode mode, @@ -1162,6 +1199,7 @@ int ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link, struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_local *local = sdata->local; struct ieee80211_chanctx *new_ctx, *curr_ctx; + int radio_idx; lockdep_assert_wiphy(local->hw.wiphy); @@ -1171,9 +1209,10 @@ int ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link, new_ctx = ieee80211_find_reservation_chanctx(local, chanreq, mode); if (!new_ctx) { - if (ieee80211_can_create_new_chanctx(local, -1)) + if (ieee80211_can_create_new_chanctx(local, -1) && + ieee80211_find_available_radio(local, chanreq, &radio_idx)) new_ctx = ieee80211_new_chanctx(local, chanreq, mode, - false); + false, radio_idx); else new_ctx = ieee80211_replace_chanctx(local, chanreq, mode, curr_ctx); @@ -1810,6 +1849,7 @@ int _ieee80211_link_use_channel(struct ieee80211_link_data *link, struct ieee80211_chanctx *ctx; u8 radar_detect_width = 0; bool reserved = false; + int radio_idx; int ret; lockdep_assert_wiphy(local->hw.wiphy); @@ -1840,9 +1880,11 @@ int _ieee80211_link_use_channel(struct ieee80211_link_data *link, /* Note: context is now reserved */ if (ctx) reserved = true; + else if (!ieee80211_find_available_radio(local, chanreq, &radio_idx)) + ctx = ERR_PTR(-EBUSY); else ctx = ieee80211_new_chanctx(local, chanreq, mode, - assign_on_failure); + assign_on_failure, radio_idx); if (IS_ERR(ctx)) { ret = PTR_ERR(ctx); goto out; -- cgit v1.2.3-58-ga151 From 0787ab206f802deb7edbae32ba021d6b1101472a Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Sat, 6 Jul 2024 09:11:33 +0200 Subject: udp: Remove duplicate included header file trace/events/udp.h Remove duplicate included header file trace/events/udp.h and the following warning reported by make includecheck: trace/events/udp.h is included more than once Compile-tested only. Signed-off-by: Thorsten Blum Reviewed-by: Simon Horman Link: https://patch.msgid.link/20240706071132.274352-2-thorsten.blum@toblux.com Signed-off-by: Paolo Abeni --- net/ipv6/udp.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b5456394cc67..6602a2e9cdb5 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -46,7 +46,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3-58-ga151 From 0d9e699d3421b5db83eb0cb7a708882440ee78ef Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 8 Jul 2024 08:27:19 +0100 Subject: net: tls: Pass union tls_crypto_context pointer to memzero_explicit Pass union tls_crypto_context pointer, rather than struct tls_crypto_info pointer, to memzero_explicit(). The address of the pointer is the same before and after. But the new construct means that the size of the dereferenced pointer type matches the size being zeroed. Which aids static analysis. As reported by Smatch: .../tls_main.c:842 do_tls_setsockopt_conf() error: memzero_explicit() 'crypto_info' too small (4 vs 56) No functional change intended. Compile tested only. Reviewed-by: Przemek Kitszel Signed-off-by: Simon Horman Link: https://patch.msgid.link/20240708-tls-memzero-v2-1-9694eaf31b79@kernel.org Signed-off-by: Jakub Kicinski --- net/tls/tls_main.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 90b7f253d363..6b4b9f2749a6 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -616,6 +616,7 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, struct tls_crypto_info *alt_crypto_info; struct tls_context *ctx = tls_get_ctx(sk); const struct tls_cipher_desc *cipher_desc; + union tls_crypto_context *crypto_ctx; int rc = 0; int conf; @@ -623,13 +624,15 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, return -EINVAL; if (tx) { - crypto_info = &ctx->crypto_send.info; + crypto_ctx = &ctx->crypto_send; alt_crypto_info = &ctx->crypto_recv.info; } else { - crypto_info = &ctx->crypto_recv.info; + crypto_ctx = &ctx->crypto_recv; alt_crypto_info = &ctx->crypto_send.info; } + crypto_info = &crypto_ctx->info; + /* Currently we don't support set crypto info more than one time */ if (TLS_CRYPTO_INFO_READY(crypto_info)) return -EBUSY; @@ -710,7 +713,7 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, return 0; err_crypto_info: - memzero_explicit(crypto_info, sizeof(union tls_crypto_context)); + memzero_explicit(crypto_ctx, sizeof(*crypto_ctx)); return rc; } -- cgit v1.2.3-58-ga151 From 408ac28c62f0b869d5477be12dd9eddc0c37242e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 9 Jul 2024 16:08:50 +0200 Subject: wifi: mac80211: fix AP chandef capturing in CSA When the CSA is announced with only HT elements, the AP chandef isn't captured correctly, leading to crashes in the later code that checks for TPE changes during CSA. Capture the AP chandef correctly in both cases to fix this. Reported-by: Jouni Malinen Fixes: 4540568136fe ("wifi: mac80211: handle TPE element during CSA") Link: https://patch.msgid.link/20240709160851.47805f24624d.I024091f701447f7921e93bb23b46e01c2f46347d@changeid Signed-off-by: Johannes Berg --- net/mac80211/spectmgmt.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index e91ca4ccdd37..073ff9e0f397 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -343,6 +343,9 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, break; } + /* capture the AP configuration */ + csa_ie->chanreq.ap = csa_ie->chanreq.oper; + /* parse one of the Elements to build a new chandef */ memset(&new_chandef, 0, sizeof(new_chandef)); new_chandef.chan = new_chan; @@ -369,11 +372,11 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, validate_chandef_by_ht_vht_oper(sdata, conn, vht_cap_info, &new_chandef); - /* capture the AP chandef before (potential) downgrading */ - csa_ie->chanreq.ap = new_chandef; - /* if data is there validate the bandwidth & use it */ if (new_chandef.chan) { + /* capture the AP chandef before (potential) downgrading */ + csa_ie->chanreq.ap = new_chandef; + if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_320 && new_chandef.width == NL80211_CHAN_WIDTH_320) ieee80211_chandef_downgrade(&new_chandef, NULL); -- cgit v1.2.3-58-ga151 From c13fda93aca118b8e5cd202e339046728ee7dddb Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 10 Jul 2024 16:16:31 +0200 Subject: bpf: Remove tst_run from lwt_seg6local_prog_ops. The syzbot reported that the lwt_seg6 related BPF ops can be invoked via bpf_test_run() without without entering input_action_end_bpf() first. Martin KaFai Lau said that self test for BPF_PROG_TYPE_LWT_SEG6LOCAL probably didn't work since it was introduced in commit 04d4b274e2a ("ipv6: sr: Add seg6local action End.BPF"). The reason is that the per-CPU variable seg6_bpf_srh_states::srh is never assigned in the self test case but each BPF function expects it. Remove test_run for BPF_PROG_TYPE_LWT_SEG6LOCAL. Suggested-by: Martin KaFai Lau Reported-by: syzbot+608a2acde8c5a101d07d@syzkaller.appspotmail.com Fixes: d1542d4ae4df ("seg6: Use nested-BH locking for seg6_bpf_srh_states.") Fixes: 004d4b274e2a ("ipv6: sr: Add seg6local action End.BPF") Signed-off-by: Sebastian Andrzej Siewior Acked-by: Daniel Borkmann Link: https://lore.kernel.org/r/20240710141631.FbmHcQaX@linutronix.de Signed-off-by: Martin KaFai Lau --- net/core/filter.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index d767880c276d..4cf1d34f7617 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -11053,7 +11053,6 @@ const struct bpf_verifier_ops lwt_seg6local_verifier_ops = { }; const struct bpf_prog_ops lwt_seg6local_prog_ops = { - .test_run = bpf_prog_test_run_skb, }; const struct bpf_verifier_ops cg_sock_verifier_ops = { -- cgit v1.2.3-58-ga151 From 39daa09d34ada1bc7227d68def63e0a2105b5496 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 20 Jun 2024 15:53:35 +0200 Subject: page_pool: use __cacheline_group_{begin, end}_aligned() Instead of doing __cacheline_group_begin() __aligned(), use the new __cacheline_group_{begin,end}_aligned(), so that it will take care of the group alignment itself. Also replace open-coded `4 * sizeof(long)` in two places with a definition. Signed-off-by: Alexander Lobakin Signed-off-by: Tony Nguyen --- include/net/page_pool/types.h | 22 ++++++++++++---------- net/core/page_pool.c | 3 ++- 2 files changed, 14 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index b70bcc14ceda..50569fed7868 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -129,6 +129,16 @@ struct page_pool_stats { }; #endif +/* The whole frag API block must stay within one cacheline. On 32-bit systems, + * sizeof(long) == sizeof(int), so that the block size is ``3 * sizeof(long)``. + * On 64-bit systems, the actual size is ``2 * sizeof(long) + sizeof(int)``. + * The closest pow-2 to both of them is ``4 * sizeof(long)``, so just use that + * one for simplicity. + * Having it aligned to a cacheline boundary may be excessive and doesn't bring + * any good. + */ +#define PAGE_POOL_FRAG_GROUP_ALIGN (4 * sizeof(long)) + struct page_pool { struct page_pool_params_fast p; @@ -142,19 +152,11 @@ struct page_pool { bool system:1; /* This is a global percpu pool */ #endif - /* The following block must stay within one cacheline. On 32-bit - * systems, sizeof(long) == sizeof(int), so that the block size is - * ``3 * sizeof(long)``. On 64-bit systems, the actual size is - * ``2 * sizeof(long) + sizeof(int)``. The closest pow-2 to both of - * them is ``4 * sizeof(long)``, so just use that one for simplicity. - * Having it aligned to a cacheline boundary may be excessive and - * doesn't bring any good. - */ - __cacheline_group_begin(frag) __aligned(4 * sizeof(long)); + __cacheline_group_begin_aligned(frag, PAGE_POOL_FRAG_GROUP_ALIGN); long frag_users; netmem_ref frag_page; unsigned int frag_offset; - __cacheline_group_end(frag); + __cacheline_group_end_aligned(frag, PAGE_POOL_FRAG_GROUP_ALIGN); struct delayed_work release_dw; void (*disconnect)(void *pool); diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 855271a6cad2..2abe6e919224 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -178,7 +178,8 @@ static void page_pool_struct_check(void) CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_users); CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_page); CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_offset); - CACHELINE_ASSERT_GROUP_SIZE(struct page_pool, frag, 4 * sizeof(long)); + CACHELINE_ASSERT_GROUP_SIZE(struct page_pool, frag, + PAGE_POOL_FRAG_GROUP_ALIGN); } static int page_pool_init(struct page_pool *pool, -- cgit v1.2.3-58-ga151 From 2899d58462ba868287d6ff3acad3675e7adf934f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 10 Jul 2024 10:40:42 -0700 Subject: ethtool: fail closed if we can't get max channel used in indirection tables Commit 0d1b7d6c9274 ("bnxt: fix crashes when reducing ring count with active RSS contexts") proves that allowing indirection table to contain channels with out of bounds IDs may lead to crashes. Currently the max channel check in the core gets skipped if driver can't fetch the indirection table or when we can't allocate memory. Both of those conditions should be extremely rare but if they do happen we should try to be safe and fail the channel change. Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20240710174043.754664-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ethtool/channels.c | 6 ++---- net/ethtool/common.c | 26 +++++++++++++++----------- net/ethtool/common.h | 2 +- net/ethtool/ioctl.c | 4 +--- 4 files changed, 19 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/ethtool/channels.c b/net/ethtool/channels.c index 7b4bbd674bae..cee188da54f8 100644 --- a/net/ethtool/channels.c +++ b/net/ethtool/channels.c @@ -171,11 +171,9 @@ ethnl_set_channels(struct ethnl_req_info *req_info, struct genl_info *info) */ if (ethtool_get_max_rxnfc_channel(dev, &max_rxnfc_in_use)) max_rxnfc_in_use = 0; - if (!netif_is_rxfh_configured(dev) || - ethtool_get_max_rxfh_channel(dev, &max_rxfh_in_use)) - max_rxfh_in_use = 0; + max_rxfh_in_use = ethtool_get_max_rxfh_channel(dev); if (channels.combined_count + channels.rx_count <= max_rxfh_in_use) { - GENL_SET_ERR_MSG(info, "requested channel counts are too low for existing indirection table settings"); + GENL_SET_ERR_MSG_FMT(info, "requested channel counts are too low for existing indirection table (%d)", max_rxfh_in_use); return -EINVAL; } if (channels.combined_count + channels.rx_count <= max_rxnfc_in_use) { diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 6b2a360dcdf0..8a62375ebd1f 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -587,35 +587,39 @@ err_free_info: return err; } -int ethtool_get_max_rxfh_channel(struct net_device *dev, u32 *max) +u32 ethtool_get_max_rxfh_channel(struct net_device *dev) { struct ethtool_rxfh_param rxfh = {}; - u32 dev_size, current_max = 0; + u32 dev_size, current_max; int ret; + if (!netif_is_rxfh_configured(dev)) + return 0; + if (!dev->ethtool_ops->get_rxfh_indir_size || !dev->ethtool_ops->get_rxfh) - return -EOPNOTSUPP; + return 0; dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev); if (dev_size == 0) - return -EOPNOTSUPP; + return 0; rxfh.indir = kcalloc(dev_size, sizeof(rxfh.indir[0]), GFP_USER); if (!rxfh.indir) - return -ENOMEM; + return U32_MAX; ret = dev->ethtool_ops->get_rxfh(dev, &rxfh); - if (ret) - goto out; + if (ret) { + current_max = U32_MAX; + goto out_free; + } + current_max = 0; while (dev_size--) current_max = max(current_max, rxfh.indir[dev_size]); - *max = current_max; - -out: +out_free: kfree(rxfh.indir); - return ret; + return current_max; } int ethtool_check_ops(const struct ethtool_ops *ops) diff --git a/net/ethtool/common.h b/net/ethtool/common.h index 28b8aaaf9bcb..b55705a9ad5a 100644 --- a/net/ethtool/common.h +++ b/net/ethtool/common.h @@ -42,7 +42,7 @@ int __ethtool_get_link(struct net_device *dev); bool convert_legacy_settings_to_link_ksettings( struct ethtool_link_ksettings *link_ksettings, const struct ethtool_cmd *legacy_settings); -int ethtool_get_max_rxfh_channel(struct net_device *dev, u32 *max); +u32 ethtool_get_max_rxfh_channel(struct net_device *dev); int ethtool_get_max_rxnfc_channel(struct net_device *dev, u64 *max); int __ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index d72b0fec89af..615812ff8974 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -2049,9 +2049,7 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev, * indirection table/rxnfc settings */ if (ethtool_get_max_rxnfc_channel(dev, &max_rxnfc_in_use)) max_rxnfc_in_use = 0; - if (!netif_is_rxfh_configured(dev) || - ethtool_get_max_rxfh_channel(dev, &max_rxfh_in_use)) - max_rxfh_in_use = 0; + max_rxfh_in_use = ethtool_get_max_rxfh_channel(dev); if (channels.combined_count + channels.rx_count <= max_t(u64, max_rxnfc_in_use, max_rxfh_in_use)) return -EINVAL; -- cgit v1.2.3-58-ga151 From 24ac7e5440815bb03bdfa9bc7e43a412b050dbaa Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 10 Jul 2024 10:40:43 -0700 Subject: ethtool: use the rss context XArray in ring deactivation safety-check ethtool_get_max_rxfh_channel() gets called when user requests deactivating Rx channels. Check the additional RSS contexts, too. While we do track whether RSS context has an indirection table explicitly set by the user, no driver looks at that bit. Assume drivers won't auto-regenerate the additional tables, to be safe. Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20240710174043.754664-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ethtool/common.c | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 8a62375ebd1f..7bda9600efcf 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -587,21 +587,47 @@ err_free_info: return err; } +static u32 ethtool_get_max_rss_ctx_channel(struct net_device *dev) +{ + struct ethtool_rxfh_context *ctx; + unsigned long context; + u32 max_ring = 0; + + mutex_lock(&dev->ethtool->rss_lock); + xa_for_each(&dev->ethtool->rss_ctx, context, ctx) { + u32 i, *tbl; + + tbl = ethtool_rxfh_context_indir(ctx); + for (i = 0; i < ctx->indir_size; i++) + max_ring = max(max_ring, tbl[i]); + } + mutex_unlock(&dev->ethtool->rss_lock); + + return max_ring; +} + u32 ethtool_get_max_rxfh_channel(struct net_device *dev) { struct ethtool_rxfh_param rxfh = {}; u32 dev_size, current_max; int ret; + /* While we do track whether RSS context has an indirection + * table explicitly set by the user, no driver looks at that bit. + * Assume drivers won't auto-regenerate the additional tables, + * to be safe. + */ + current_max = ethtool_get_max_rss_ctx_channel(dev); + if (!netif_is_rxfh_configured(dev)) - return 0; + return current_max; if (!dev->ethtool_ops->get_rxfh_indir_size || !dev->ethtool_ops->get_rxfh) - return 0; + return current_max; dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev); if (dev_size == 0) - return 0; + return current_max; rxfh.indir = kcalloc(dev_size, sizeof(rxfh.indir[0]), GFP_USER); if (!rxfh.indir) @@ -613,7 +639,6 @@ u32 ethtool_get_max_rxfh_channel(struct net_device *dev) goto out_free; } - current_max = 0; while (dev_size--) current_max = max(current_max, rxfh.indir[dev_size]); -- cgit v1.2.3-58-ga151 From b07593edd2faf310f9b758896f4f1c6054515be4 Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Tue, 9 Jul 2024 15:28:38 +0800 Subject: net/sched: act_skbmod: convert comma to semicolon Replace a comma between expression statements by a semicolon. Signed-off-by: Chen Ni Reviewed-by: Simon Horman Link: https://patch.msgid.link/20240709072838.1152880-1-nichen@iscas.ac.cn Signed-off-by: Jakub Kicinski --- net/sched/act_skbmod.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index cd0accaf844a..dc0229693461 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -246,7 +246,7 @@ static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a, memset(&opt, 0, sizeof(opt)); opt.index = d->tcf_index; - opt.refcnt = refcount_read(&d->tcf_refcnt) - ref, + opt.refcnt = refcount_read(&d->tcf_refcnt) - ref; opt.bindcnt = atomic_read(&d->tcf_bindcnt) - bind; spin_lock_bh(&d->tcf_lock); opt.action = d->tcf_action; -- cgit v1.2.3-58-ga151 From cef4902b0fadfc4181176ef5713f0b7cf2a40d8f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 10 Jul 2024 15:16:53 +0000 Subject: net: reduce rtnetlink_rcv_msg() stack usage IFLA_MAX is increasing slowly but surely. Some compilers use more than 512 bytes of stack in rtnetlink_rcv_msg() because it calls rtnl_calcit() for RTM_GETLINK message. Use noinline_for_stack attribute to not inline rtnl_calcit(), and directly use nla_for_each_attr_type() (Jakub suggestion) because we only care about IFLA_EXT_MASK at this stage. Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Link: https://patch.msgid.link/20240710151653.3786604-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/rtnetlink.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index eabfc8290f5e..87e67194f240 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3969,22 +3969,28 @@ static int rtnl_dellinkprop(struct sk_buff *skb, struct nlmsghdr *nlh, return rtnl_linkprop(RTM_DELLINKPROP, skb, nlh, extack); } -static u32 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh) +static noinline_for_stack u32 rtnl_calcit(struct sk_buff *skb, + struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); size_t min_ifinfo_dump_size = 0; - struct nlattr *tb[IFLA_MAX+1]; u32 ext_filter_mask = 0; struct net_device *dev; - int hdrlen; + struct nlattr *nla; + int hdrlen, rem; /* Same kernel<->userspace interface hack as in rtnl_dump_ifinfo. */ hdrlen = nlmsg_len(nlh) < sizeof(struct ifinfomsg) ? sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg); - if (nlmsg_parse_deprecated(nlh, hdrlen, tb, IFLA_MAX, ifla_policy, NULL) >= 0) { - if (tb[IFLA_EXT_MASK]) - ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); + if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) + return NLMSG_GOODSIZE; + + nla_for_each_attr_type(nla, IFLA_EXT_MASK, + nlmsg_attrdata(nlh, hdrlen), + nlmsg_attrlen(nlh, hdrlen), rem) { + if (nla_len(nla) == sizeof(u32)) + ext_filter_mask = nla_get_u32(nla); } if (!ext_filter_mask) -- cgit v1.2.3-58-ga151 From 8341eee81c794db0d8dd503c2b0ea2f55eba7334 Mon Sep 17 00:00:00 2001 From: Adrian Moreno Date: Wed, 10 Jul 2024 19:10:04 +0200 Subject: net: psample: fix flag being set in wrong skb A typo makes PSAMPLE_ATTR_SAMPLE_RATE netlink flag be added to the wrong sk_buff. Fix the error and make the input sk_buff pointer "const" so that it doesn't happen again. Acked-by: Eelco Chaudron Fixes: 7b1b2b60c63f ("net: psample: allow using rate as probability") Signed-off-by: Adrian Moreno Reviewed-by: Ido Schimmel Reviewed-by: Antoine Tenart Link: https://patch.msgid.link/20240710171004.2164034-1-amorenoz@redhat.com Signed-off-by: Jakub Kicinski --- include/net/psample.h | 8 +++++--- net/psample/psample.c | 7 ++++--- 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/include/net/psample.h b/include/net/psample.h index c52e9ebd88dd..5071b5fc2b59 100644 --- a/include/net/psample.h +++ b/include/net/psample.h @@ -38,13 +38,15 @@ struct sk_buff; #if IS_ENABLED(CONFIG_PSAMPLE) -void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, - u32 sample_rate, const struct psample_metadata *md); +void psample_sample_packet(struct psample_group *group, + const struct sk_buff *skb, u32 sample_rate, + const struct psample_metadata *md); #else static inline void psample_sample_packet(struct psample_group *group, - struct sk_buff *skb, u32 sample_rate, + const struct sk_buff *skb, + u32 sample_rate, const struct psample_metadata *md) { } diff --git a/net/psample/psample.c b/net/psample/psample.c index f48b5b9cd409..a0ddae8a65f9 100644 --- a/net/psample/psample.c +++ b/net/psample/psample.c @@ -360,8 +360,9 @@ static int psample_tunnel_meta_len(struct ip_tunnel_info *tun_info) } #endif -void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, - u32 sample_rate, const struct psample_metadata *md) +void psample_sample_packet(struct psample_group *group, + const struct sk_buff *skb, u32 sample_rate, + const struct psample_metadata *md) { ktime_t tstamp = ktime_get_real(); int out_ifindex = md->out_ifindex; @@ -498,7 +499,7 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, goto error; if (md->rate_as_probability) - nla_put_flag(skb, PSAMPLE_ATTR_SAMPLE_PROBABILITY); + nla_put_flag(nl_skb, PSAMPLE_ATTR_SAMPLE_PROBABILITY); genlmsg_end(nl_skb, data); genlmsg_multicast_netns(&psample_nl_family, group->net, nl_skb, 0, -- cgit v1.2.3-58-ga151 From 13cabc47f8ae69d24653f32c28399d493fde0a56 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 10 Jul 2024 04:30:28 -0700 Subject: netdevice: define and allocate &net_device _properly_ In fact, this structure contains a flexible array at the end, but historically its size, alignment etc., is calculated manually. There are several instances of the structure embedded into other structures, but also there's ongoing effort to remove them and we could in the meantime declare &net_device properly. Declare the array explicitly, use struct_size() and store the array size inside the structure, so that __counted_by() can be applied. Don't use PTR_ALIGN(), as SLUB itself tries its best to ensure the allocated buffer is aligned to what the user expects. Also, change its alignment from %NETDEV_ALIGN to the cacheline size as per several suggestions on the netdev ML. bloat-o-meter for vmlinux: free_netdev 445 440 -5 netdev_freemem 24 - -24 alloc_netdev_mqs 1481 1450 -31 On x86_64 with several NICs of different vendors, I was never able to get a &net_device pointer not aligned to the cacheline size after the change. Signed-off-by: Alexander Lobakin Signed-off-by: Breno Leitao Reviewed-by: Przemek Kitszel Reviewed-by: Eric Dumazet Reviewed-by: Kees Cook Link: https://patch.msgid.link/20240710113036.2125584-1-leitao@debian.org Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 15 +++++++++------ net/core/dev.c | 30 ++++++------------------------ net/core/net-sysfs.c | 2 +- 3 files changed, 16 insertions(+), 31 deletions(-) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 93558645c6d0..607009150b5f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1819,7 +1819,8 @@ enum netdev_reg_state { * @priv_flags: Like 'flags' but invisible to userspace, * see if.h for the definitions * @gflags: Global flags ( kept as legacy ) - * @padded: How much padding added by alloc_netdev() + * @priv_len: Size of the ->priv flexible array + * @priv: Flexible array containing private data * @operstate: RFC2863 operstate * @link_mode: Mapping policy to operstate * @if_port: Selectable AUI, TP, ... @@ -2199,10 +2200,10 @@ struct net_device { unsigned short neigh_priv_len; unsigned short dev_id; unsigned short dev_port; - unsigned short padded; + int irq; + u32 priv_len; spinlock_t addr_list_lock; - int irq; struct netdev_hw_addr_list uc; struct netdev_hw_addr_list mc; @@ -2406,7 +2407,10 @@ struct net_device { /** @irq_moder: dim parameters used if IS_ENABLED(CONFIG_DIMLIB). */ struct dim_irq_moder *irq_moder; -}; + + u8 priv[] ____cacheline_aligned + __counted_by(priv_len); +} ____cacheline_aligned; #define to_net_dev(d) container_of(d, struct net_device, dev) /* @@ -2596,7 +2600,7 @@ void dev_net_set(struct net_device *dev, struct net *net) */ static inline void *netdev_priv(const struct net_device *dev) { - return (char *)dev + ALIGN(sizeof(struct net_device), NETDEV_ALIGN); + return (void *)dev->priv; } /* Set the sysfs physical device reference for the network logical device @@ -3127,7 +3131,6 @@ static inline void unregister_netdevice(struct net_device *dev) int netdev_refcnt_read(const struct net_device *dev); void free_netdev(struct net_device *dev); -void netdev_freemem(struct net_device *dev); void init_dummy_netdev(struct net_device *dev); struct net_device *netdev_get_xmit_slave(struct net_device *dev, diff --git a/net/core/dev.c b/net/core/dev.c index 73e5af6943c3..6ea1d20676fb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11006,13 +11006,6 @@ void netdev_sw_irq_coalesce_default_on(struct net_device *dev) } EXPORT_SYMBOL_GPL(netdev_sw_irq_coalesce_default_on); -void netdev_freemem(struct net_device *dev) -{ - char *addr = (char *)dev - dev->padded; - - kvfree(addr); -} - /** * alloc_netdev_mqs - allocate network device * @sizeof_priv: size of private data to allocate space for @@ -11032,8 +11025,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, unsigned int txqs, unsigned int rxqs) { struct net_device *dev; - unsigned int alloc_size; - struct net_device *p; BUG_ON(strlen(name) >= sizeof(dev->name)); @@ -11047,21 +11038,12 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, return NULL; } - alloc_size = sizeof(struct net_device); - if (sizeof_priv) { - /* ensure 32-byte alignment of private area */ - alloc_size = ALIGN(alloc_size, NETDEV_ALIGN); - alloc_size += sizeof_priv; - } - /* ensure 32-byte alignment of whole construct */ - alloc_size += NETDEV_ALIGN - 1; - - p = kvzalloc(alloc_size, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL); - if (!p) + dev = kvzalloc(struct_size(dev, priv, sizeof_priv), + GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL); + if (!dev) return NULL; - dev = PTR_ALIGN(p, NETDEV_ALIGN); - dev->padded = (char *)dev - (char *)p; + dev->priv_len = sizeof_priv; ref_tracker_dir_init(&dev->refcnt_tracker, 128, name); #ifdef CONFIG_PCPU_DEV_REFCNT @@ -11148,7 +11130,7 @@ free_pcpu: free_percpu(dev->pcpu_refcnt); free_dev: #endif - netdev_freemem(dev); + kvfree(dev); return NULL; } EXPORT_SYMBOL(alloc_netdev_mqs); @@ -11203,7 +11185,7 @@ void free_netdev(struct net_device *dev) /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED || dev->reg_state == NETREG_DUMMY) { - netdev_freemem(dev); + kvfree(dev); return; } diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 4c27a360c294..0e2084ce7b75 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -2028,7 +2028,7 @@ static void netdev_release(struct device *d) * device is dead and about to be freed. */ kfree(rcu_access_pointer(dev->ifalias)); - netdev_freemem(dev); + kvfree(dev); } static const void *net_namespace(const struct device *d) -- cgit v1.2.3-58-ga151 From 534ea0a95e2d7b31eb1e1248d04edaffed75e366 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Tue, 9 Jul 2024 23:34:10 +0900 Subject: tipc: Remove unused struct declaration struct tipc_name_table in core.h is not used. Remove this declaration. Signed-off-by: Shigeru Yoshida Reviewed-by: Tung Nguyen Signed-off-by: David S. Miller --- net/tipc/core.h | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/tipc/core.h b/net/tipc/core.h index 7eccd97e0609..7f3fe3401c45 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -72,7 +72,6 @@ struct tipc_node; struct tipc_bearer; struct tipc_bc_base; struct tipc_link; -struct tipc_name_table; struct tipc_topsrv; struct tipc_monitor; #ifdef CONFIG_TIPC_CRYPTO -- cgit v1.2.3-58-ga151 From b6c67967897e2e02482f7bbd26232090a6e30ae5 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Tue, 9 Jul 2024 23:36:32 +0900 Subject: tipc: Consolidate redundant functions link_is_up() and tipc_link_is_up() have the same functionality. Consolidate these functions. Signed-off-by: Shigeru Yoshida Reviewed-by: Tung Nguyen Signed-off-by: David S. Miller --- net/tipc/link.c | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/tipc/link.c b/net/tipc/link.c index 0716eb5c8a31..5c2088a469ce 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -241,13 +241,6 @@ enum { LINK_SYNCHING = 0xc << 24 }; -/* Link FSM state checking routines - */ -static int link_is_up(struct tipc_link *l) -{ - return l->state & (LINK_ESTABLISHED | LINK_SYNCHING); -} - static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq); static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, @@ -274,7 +267,7 @@ static void tipc_link_update_cwin(struct tipc_link *l, int released, */ bool tipc_link_is_up(struct tipc_link *l) { - return link_is_up(l); + return l->state & (LINK_ESTABLISHED | LINK_SYNCHING); } bool tipc_link_peer_is_down(struct tipc_link *l) @@ -1790,7 +1783,7 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, rcv_nxt = l->rcv_nxt; win_lim = rcv_nxt + TIPC_MAX_LINK_WIN; - if (unlikely(!link_is_up(l))) { + if (unlikely(!tipc_link_is_up(l))) { if (l->state == LINK_ESTABLISHING) rc = TIPC_LINK_UP_EVT; kfree_skb(skb); @@ -1848,7 +1841,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, struct tipc_link *bcl = l->bc_rcvlink; struct tipc_msg *hdr; struct sk_buff *skb; - bool node_up = link_is_up(bcl); + bool node_up = tipc_link_is_up(bcl); u16 glen = 0, bc_rcvgap = 0; int dlen = 0; void *data; @@ -2163,7 +2156,7 @@ bool tipc_link_validate_msg(struct tipc_link *l, struct tipc_msg *hdr) if (session != curr_session) return false; /* Extra sanity check */ - if (!link_is_up(l) && msg_ack(hdr)) + if (!tipc_link_is_up(l) && msg_ack(hdr)) return false; if (!(l->peer_caps & TIPC_LINK_PROTO_SEQNO)) return true; @@ -2261,7 +2254,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, } /* ACTIVATE_MSG serves as PEER_RESET if link is already down */ - if (mtyp == RESET_MSG || !link_is_up(l)) + if (mtyp == RESET_MSG || !tipc_link_is_up(l)) rc = tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT); /* ACTIVATE_MSG takes up link if it was already locally reset */ @@ -2300,7 +2293,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, if (msg_probe(hdr)) l->stats.recv_probes++; - if (!link_is_up(l)) { + if (!tipc_link_is_up(l)) { if (l->state == LINK_ESTABLISHING) rc = TIPC_LINK_UP_EVT; break; @@ -2387,7 +2380,7 @@ void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr) int mtyp = msg_type(hdr); u16 peers_snd_nxt = msg_bc_snd_nxt(hdr); - if (link_is_up(l)) + if (tipc_link_is_up(l)) return; if (msg_user(hdr) == BCAST_PROTOCOL) { @@ -2415,7 +2408,7 @@ int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, u16 peers_snd_nxt = msg_bc_snd_nxt(hdr); int rc = 0; - if (!link_is_up(l)) + if (!tipc_link_is_up(l)) return rc; if (!msg_peer_node_is_up(hdr)) @@ -2475,7 +2468,7 @@ int tipc_link_bc_ack_rcv(struct tipc_link *r, u16 acked, u16 gap, bool unused = false; int rc = 0; - if (!link_is_up(r) || !r->bc_peer_is_up) + if (!tipc_link_is_up(r) || !r->bc_peer_is_up) return 0; if (gap) { @@ -2873,7 +2866,7 @@ void tipc_link_set_tolerance(struct tipc_link *l, u32 tol, l->tolerance = tol; if (l->bc_rcvlink) l->bc_rcvlink->tolerance = tol; - if (link_is_up(l)) + if (tipc_link_is_up(l)) tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq); } -- cgit v1.2.3-58-ga151 From 2146b7dd354c2a1384381ca3cd5751bfff6137d6 Mon Sep 17 00:00:00 2001 From: James Chapman Date: Tue, 9 Jul 2024 17:28:39 +0100 Subject: l2tp: fix l2tp_session_register with colliding l2tpv3 IDs When handling colliding L2TPv3 session IDs, we use the existing session IDR entry and link the new session on that using session->coll_list. However, when using an existing IDR entry, we must not do the idr_replace step. Fixes: aa5e17e1f5ec ("l2tp: store l2tpv3 sessions in per-net IDR") Signed-off-by: James Chapman Signed-off-by: Tom Parkin Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 29dfbd70c79c..1c1decce7f06 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -436,6 +436,7 @@ int l2tp_session_register(struct l2tp_session *session, struct l2tp_tunnel *tunnel) { struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net); + struct l2tp_session *other_session = NULL; u32 session_key; int err; @@ -456,11 +457,10 @@ int l2tp_session_register(struct l2tp_session *session, * support existing userspace which depends on it. */ if (err == -ENOSPC && tunnel->encap == L2TP_ENCAPTYPE_UDP) { - struct l2tp_session *session2; - - session2 = idr_find(&pn->l2tp_v3_session_idr, - session_key); - err = l2tp_session_collision_add(pn, session, session2); + other_session = idr_find(&pn->l2tp_v3_session_idr, + session_key); + err = l2tp_session_collision_add(pn, session, + other_session); } spin_unlock_bh(&pn->l2tp_session_idr_lock); } else { @@ -484,10 +484,12 @@ int l2tp_session_register(struct l2tp_session *session, spin_unlock_bh(&tunnel->list_lock); spin_lock_bh(&pn->l2tp_session_idr_lock); - if (tunnel->version == L2TP_HDR_VER_3) - idr_replace(&pn->l2tp_v3_session_idr, session, session_key); - else + if (tunnel->version == L2TP_HDR_VER_3) { + if (!other_session) + idr_replace(&pn->l2tp_v3_session_idr, session, session_key); + } else { idr_replace(&pn->l2tp_v2_session_idr, session, session_key); + } spin_unlock_bh(&pn->l2tp_session_idr_lock); trace_register_session(session); -- cgit v1.2.3-58-ga151 From f7e8542d71e908706855a9f3e9e7844fea1feea0 Mon Sep 17 00:00:00 2001 From: Mike Yu Date: Fri, 12 Jul 2024 10:51:22 +0800 Subject: xfrm: Support crypto offload for inbound IPv6 ESP packets not in GRO path IPsec crypt offload supports outbound IPv6 ESP packets, but it doesn't support inbound IPv6 ESP packets. This change enables the crypto offload for inbound IPv6 ESP packets that are not handled through GRO code path. If HW drivers add the offload information to the skb, the packet will be handled in the crypto offload rx code path. Apart from the change in crypto offload rx code path, the change in xfrm_policy_check is also needed. Exampe of RX data path: +-----------+ +-------+ | HW Driver |-->| wlan0 |--------+ +-----------+ +-------+ | v +---------------+ +------+ +------>| Network Stack |-->| Apps | | +---------------+ +------+ | | | v +--------+ +------------+ | ipsec1 |<--| XFRM Stack | +--------+ +------------+ Test: Enabled both in/out IPsec crypto offload, and verified IPv6 ESP packets on Android device on both wifi/cellular network Signed-off-by: Mike Yu Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_input.c | 2 +- net/xfrm/xfrm_policy.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index d2ea18dcb0cb..ba8deb0235ba 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -471,7 +471,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) struct xfrm_offload *xo = xfrm_offload(skb); struct sec_path *sp; - if (encap_type < 0 || (xo && xo->flags & XFRM_GRO)) { + if (encap_type < 0 || (xo && (xo->flags & XFRM_GRO || encap_type == 0))) { x = xfrm_input_state(skb); if (unlikely(x->dir && x->dir != XFRM_SA_DIR_IN)) { diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 6603d3bd171f..2a9a31f2a9c1 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3718,12 +3718,15 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, pol = xfrm_in_fwd_icmp(skb, &fl, family, if_id); if (!pol) { + const bool is_crypto_offload = sp && + (xfrm_input_state(skb)->xso.type == XFRM_DEV_OFFLOAD_CRYPTO); + if (net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS); return 0; } - if (sp && secpath_has_nontransport(sp, 0, &xerr_idx)) { + if (sp && secpath_has_nontransport(sp, 0, &xerr_idx) && !is_crypto_offload) { xfrm_secpath_reject(xerr_idx, skb, &fl); XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS); return 0; -- cgit v1.2.3-58-ga151 From a10fb4a84a693891b2e205684456d0825363fc24 Mon Sep 17 00:00:00 2001 From: Mike Yu Date: Fri, 12 Jul 2024 10:51:23 +0800 Subject: xfrm: Allow UDP encapsulation in crypto offload control path Unblock this limitation so that SAs with encapsulation specified can be passed to HW drivers. HW drivers can still reject the SA in their implementation of xdo_dev_state_add if the encapsulation is not supported. Test: Verified on Android device Signed-off-by: Mike Yu Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_device.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 2455a76a1cff..9a44d363ba62 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -261,9 +261,9 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, is_packet_offload = xuo->flags & XFRM_OFFLOAD_PACKET; - /* We don't yet support UDP encapsulation and TFC padding. */ - if ((!is_packet_offload && x->encap) || x->tfcpad) { - NL_SET_ERR_MSG(extack, "Encapsulation and TFC padding can't be offloaded"); + /* We don't yet support TFC padding. */ + if (x->tfcpad) { + NL_SET_ERR_MSG(extack, "TFC padding can't be offloaded"); return -EINVAL; } -- cgit v1.2.3-58-ga151 From 4ecbac84b5b8c4ad033b473782582568810baf61 Mon Sep 17 00:00:00 2001 From: Mike Yu Date: Fri, 12 Jul 2024 10:51:24 +0800 Subject: xfrm: Support crypto offload for inbound IPv4 UDP-encapsulated ESP packet If xfrm_input() is called with UDP_ENCAP_ESPINUDP, the packet is already processed in UDP layer that removes the UDP header. Therefore, there should be no much difference to treat it as an ESP packet in the XFRM stack. Test: Enabled dir=in IPsec crypto offload, and verified IPv4 UDP-encapsulated ESP packets on both wifi/cellular network Signed-off-by: Mike Yu Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_input.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index ba8deb0235ba..7cee9c0a2cdc 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -471,7 +471,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) struct xfrm_offload *xo = xfrm_offload(skb); struct sec_path *sp; - if (encap_type < 0 || (xo && (xo->flags & XFRM_GRO || encap_type == 0))) { + if (encap_type < 0 || (xo && (xo->flags & XFRM_GRO || encap_type == 0 || + encap_type == UDP_ENCAP_ESPINUDP))) { x = xfrm_input_state(skb); if (unlikely(x->dir && x->dir != XFRM_SA_DIR_IN)) { -- cgit v1.2.3-58-ga151 From 447bc4b1906f100e65c662528b7ae4e1dc2e9b80 Mon Sep 17 00:00:00 2001 From: Mike Yu Date: Fri, 12 Jul 2024 10:51:25 +0800 Subject: xfrm: Support crypto offload for outbound IPv4 UDP-encapsulated ESP packet esp_xmit() is already able to handle UDP encapsulation through the call to esp_output_head(). However, the ESP header and the outer IP header are not correct and need to be corrected. Test: Enabled both dir=in/out IPsec crypto offload, and verified IPv4 UDP-encapsulated ESP packets on both wifi/cellular network Signed-off-by: Mike Yu Signed-off-by: Steffen Klassert --- net/ipv4/esp4.c | 8 +++++++- net/ipv4/esp4_offload.c | 17 ++++++++++++++++- 2 files changed, 23 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 3968d3f98e08..73981595f062 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -349,6 +349,7 @@ static struct ip_esp_hdr *esp_output_udp_encap(struct sk_buff *skb, { struct udphdr *uh; unsigned int len; + struct xfrm_offload *xo = xfrm_offload(skb); len = skb->len + esp->tailen - skb_transport_offset(skb); if (len + sizeof(struct iphdr) > IP_MAX_MTU) @@ -360,7 +361,12 @@ static struct ip_esp_hdr *esp_output_udp_encap(struct sk_buff *skb, uh->len = htons(len); uh->check = 0; - *skb_mac_header(skb) = IPPROTO_UDP; + /* For IPv4 ESP with UDP encapsulation, if xo is not null, the skb is in the crypto offload + * data path, which means that esp_output_udp_encap is called outside of the XFRM stack. + * In this case, the mac header doesn't point to the IPv4 protocol field, so don't set it. + */ + if (!xo || encap_type != UDP_ENCAP_ESPINUDP) + *skb_mac_header(skb) = IPPROTO_UDP; return (struct ip_esp_hdr *)(uh + 1); } diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index b3271957ad9a..a37d18858c72 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -264,6 +264,7 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_ struct esp_info esp; bool hw_offload = true; __u32 seq; + int encap_type = 0; esp.inplace = true; @@ -296,8 +297,10 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_ esp.esph = ip_esp_hdr(skb); + if (x->encap) + encap_type = x->encap->encap_type; - if (!hw_offload || !skb_is_gso(skb)) { + if (!hw_offload || !skb_is_gso(skb) || (hw_offload && encap_type == UDP_ENCAP_ESPINUDP)) { esp.nfrags = esp_output_head(x, skb, &esp); if (esp.nfrags < 0) return esp.nfrags; @@ -324,6 +327,18 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_ esp.seqno = cpu_to_be64(seq + ((u64)xo->seq.hi << 32)); + if (hw_offload && encap_type == UDP_ENCAP_ESPINUDP) { + /* In the XFRM stack, the encapsulation protocol is set to iphdr->protocol by + * setting *skb_mac_header(skb) (see esp_output_udp_encap()) where skb->mac_header + * points to iphdr->protocol (see xfrm4_tunnel_encap_add()). + * However, in esp_xmit(), skb->mac_header doesn't point to iphdr->protocol. + * Therefore, the protocol field needs to be corrected. + */ + ip_hdr(skb)->protocol = IPPROTO_UDP; + + esph->seq_no = htonl(seq); + } + ip_hdr(skb)->tot_len = htons(skb->len); ip_send_check(ip_hdr(skb)); -- cgit v1.2.3-58-ga151 From d69ba6bbaf1f606ac354e925571a54d025e32aae Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 11 Jul 2024 15:07:03 -0700 Subject: net: ethtool: let drivers remove lost RSS contexts RSS contexts may get lost from a device, in various extreme circumstances. Specifically if the firmware leaks resources and resets, or crashes and either recovers in partially working state or the crash causes a different FW version to run - creating the context again may fail. Drivers should do their absolute best to prevent this from happening. When it does, however, telling user that a context exists, when it can't possibly be used any more is counter productive. Add a helper for drivers to discard contexts. Print an error, in the future netlink notification will also be sent. More robust approaches were proposed, like keeping the contexts but marking them as "dead" (but possibly resurrected by next reset). That may be better but it's unclear at this stage whether the effort is worth the benefits. Reviewed-by: Pavan Chebbi Link: https://patch.msgid.link/20240711220713.283778-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 2 ++ net/ethtool/common.c | 14 ++++++++++++++ 2 files changed, 16 insertions(+) (limited to 'net') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index e213b5508da6..89da0254ccd4 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -210,6 +210,8 @@ static inline size_t ethtool_rxfh_context_size(u32 indir_size, u32 key_size, return struct_size_t(struct ethtool_rxfh_context, data, flex_len); } +void ethtool_rxfh_context_lost(struct net_device *dev, u32 context_id); + /* declare a link mode bitmap */ #define __ETHTOOL_DECLARE_LINK_MODE_MASK(name) \ DECLARE_BITMAP(name, __ETHTOOL_LINK_MODE_MASK_NBITS) diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 7bda9600efcf..67d06cd002a5 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -741,3 +741,17 @@ ethtool_forced_speed_maps_init(struct ethtool_forced_speed_map *maps, u32 size) } } EXPORT_SYMBOL_GPL(ethtool_forced_speed_maps_init); + +void ethtool_rxfh_context_lost(struct net_device *dev, u32 context_id) +{ + struct ethtool_rxfh_context *ctx; + + WARN_ONCE(!rtnl_is_locked() && + !lockdep_is_held_type(&dev->ethtool->rss_lock, -1), + "RSS context lock assertion failed\n"); + + netdev_err(dev, "device error, RSS context %d lost\n", context_id); + ctx = xa_erase(&dev->ethtool->rss_ctx, context_id); + kfree(ctx); +} +EXPORT_SYMBOL(ethtool_rxfh_context_lost); -- cgit v1.2.3-58-ga151 From 28c8757a792bbbc76407777bd0303862daa75057 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 11 Jul 2024 15:07:04 -0700 Subject: net: ethtool: let drivers declare max size of RSS indir table and key Some drivers (bnxt but I think also mlx5 from ML discussions) change the size of the indirection table depending on the number of Rx rings. Decouple the max table size from the size of the currently used table, so that we can reserve space in the context for table growth. Static members in ethtool_ops are good enough for now, we can add callbacks to read the max size more dynamically if someone needs that. Reviewed-by: Pavan Chebbi Link: https://patch.msgid.link/20240711220713.283778-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 20 ++++++++------------ net/ethtool/ioctl.c | 46 ++++++++++++++++++++++++++++++++++++---------- 2 files changed, 44 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 89da0254ccd4..a1ee76936f53 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -181,6 +181,7 @@ struct ethtool_rxfh_context { /* private: driver private data, indirection table, and hash key are * stored sequentially in @data area. Use below helpers to access. */ + u32 key_off; u8 data[] __aligned(sizeof(void *)); }; @@ -196,18 +197,7 @@ static inline u32 *ethtool_rxfh_context_indir(struct ethtool_rxfh_context *ctx) static inline u8 *ethtool_rxfh_context_key(struct ethtool_rxfh_context *ctx) { - return (u8 *)(ethtool_rxfh_context_indir(ctx) + ctx->indir_size); -} - -static inline size_t ethtool_rxfh_context_size(u32 indir_size, u32 key_size, - u16 priv_size) -{ - size_t indir_bytes = array_size(indir_size, sizeof(u32)); - size_t flex_len; - - flex_len = size_add(size_add(indir_bytes, key_size), - ALIGN(priv_size, sizeof(u32))); - return struct_size_t(struct ethtool_rxfh_context, data, flex_len); + return &ctx->data[ctx->key_off]; } void ethtool_rxfh_context_lost(struct net_device *dev, u32 context_id); @@ -723,6 +713,10 @@ struct ethtool_rxfh_param { * contexts. * @cap_rss_sym_xor_supported: indicates if the driver supports symmetric-xor * RSS. + * @rxfh_indir_space: max size of RSS indirection tables, if indirection table + * size as returned by @get_rxfh_indir_size may change during lifetime + * of the device. Leave as 0 if the table size is constant. + * @rxfh_key_space: same as @rxfh_indir_space, but for the key. * @rxfh_priv_size: size of the driver private data area the core should * allocate for an RSS context (in &struct ethtool_rxfh_context). * @rxfh_max_context_id: maximum (exclusive) supported RSS context ID. If this @@ -940,6 +934,8 @@ struct ethtool_ops { u32 cap_link_lanes_supported:1; u32 cap_rss_ctx_supported:1; u32 cap_rss_sym_xor_supported:1; + u32 rxfh_indir_space; + u16 rxfh_key_space; u16 rxfh_priv_size; u32 rxfh_max_context_id; u32 supported_coalesce_params; diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 615812ff8974..0732710a4836 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1290,6 +1290,40 @@ out: return ret; } +static struct ethtool_rxfh_context * +ethtool_rxfh_ctx_alloc(const struct ethtool_ops *ops, + u32 indir_size, u32 key_size) +{ + size_t indir_bytes, flex_len, key_off, size; + struct ethtool_rxfh_context *ctx; + u32 priv_bytes, indir_max; + u16 key_max; + + key_max = max(key_size, ops->rxfh_key_space); + indir_max = max(indir_size, ops->rxfh_indir_space); + + priv_bytes = ALIGN(ops->rxfh_priv_size, sizeof(u32)); + indir_bytes = array_size(indir_max, sizeof(u32)); + + key_off = size_add(priv_bytes, indir_bytes); + flex_len = size_add(key_off, key_max); + size = struct_size_t(struct ethtool_rxfh_context, data, flex_len); + + ctx = kzalloc(size, GFP_KERNEL_ACCOUNT); + if (!ctx) + return NULL; + + ctx->indir_size = indir_size; + ctx->key_size = key_size; + ctx->key_off = key_off; + ctx->priv_size = ops->rxfh_priv_size; + + ctx->hfunc = ETH_RSS_HASH_NO_CHANGE; + ctx->input_xfrm = RXH_XFRM_NO_CHANGE; + + return ctx; +} + static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, void __user *useraddr) { @@ -1406,20 +1440,12 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, ret = -EINVAL; goto out; } - ctx = kzalloc(ethtool_rxfh_context_size(dev_indir_size, - dev_key_size, - ops->rxfh_priv_size), - GFP_KERNEL_ACCOUNT); + ctx = ethtool_rxfh_ctx_alloc(ops, dev_indir_size, dev_key_size); if (!ctx) { ret = -ENOMEM; goto out; } - ctx->indir_size = dev_indir_size; - ctx->key_size = dev_key_size; - ctx->priv_size = ops->rxfh_priv_size; - /* Initialise to an empty context */ - ctx->hfunc = ETH_RSS_HASH_NO_CHANGE; - ctx->input_xfrm = RXH_XFRM_NO_CHANGE; + if (ops->create_rxfh_context) { u32 limit = ops->rxfh_max_context_id ?: U32_MAX; u32 ctx_id; -- cgit v1.2.3-58-ga151 From 23e89e8ee7be73e21200947885a6d3a109a2c58d Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 10 Jul 2024 10:12:45 -0700 Subject: tcp: Don't drop SYN+ACK for simultaneous connect(). RFC 9293 states that in the case of simultaneous connect(), the connection gets established when SYN+ACK is received. [0] TCP Peer A TCP Peer B 1. CLOSED CLOSED 2. SYN-SENT --> ... 3. SYN-RECEIVED <-- <-- SYN-SENT 4. ... --> SYN-RECEIVED 5. SYN-RECEIVED --> ... 6. ESTABLISHED <-- <-- SYN-RECEIVED 7. ... --> ESTABLISHED However, since commit 0c24604b68fc ("tcp: implement RFC 5961 4.2"), such a SYN+ACK is dropped in tcp_validate_incoming() and responded with Challenge ACK. For example, the write() syscall in the following packetdrill script fails with -EAGAIN, and wrong SNMP stats get incremented. 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) +0 > S 0:0(0) +0 < S 0:0(0) win 1000 +0 > S. 0:0(0) ack 1 +0 < S. 0:0(0) ack 1 win 1000 +0 write(3, ..., 100) = 100 +0 > P. 1:101(100) ack 1 -- # packetdrill cross-synack.pkt cross-synack.pkt:13: runtime error in write call: Expected result 100 but got -1 with errno 11 (Resource temporarily unavailable) # nstat ... TcpExtTCPChallengeACK 1 0.0 TcpExtTCPSYNChallenge 1 0.0 The problem is that bpf_skops_established() is triggered by the Challenge ACK instead of SYN+ACK. This causes the bpf prog to miss the chance to check if the peer supports a TCP option that is expected to be exchanged in SYN and SYN+ACK. Let's accept a bare SYN+ACK for active-open TCP_SYN_RECV sockets to avoid such a situation. Note that tcp_ack_snd_check() in tcp_rcv_state_process() is skipped not to send an unnecessary ACK, but this could be a bit risky for net.git, so this targets for net-next. Link: https://www.rfc-editor.org/rfc/rfc9293.html#section-3.5-7 [0] Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20240710171246.87533-2-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_input.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e0f54b9be850..ff9ab3d01ced 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5998,6 +5998,11 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, * RFC 5961 4.2 : Send a challenge ack */ if (th->syn) { + if (sk->sk_state == TCP_SYN_RECV && sk->sk_socket && th->ack && + TCP_SKB_CB(skb)->seq + 1 == TCP_SKB_CB(skb)->end_seq && + TCP_SKB_CB(skb)->seq + 1 == tp->rcv_nxt && + TCP_SKB_CB(skb)->ack_seq == tp->snd_nxt) + goto pass; syn_challenge: if (syn_inerr) TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); @@ -6007,6 +6012,7 @@ syn_challenge: goto discard; } +pass: bpf_skops_parse_hdr(sk, skb); return true; @@ -6813,6 +6819,9 @@ tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) tcp_fast_path_on(tp); if (sk->sk_shutdown & SEND_SHUTDOWN) tcp_shutdown(sk, SEND_SHUTDOWN); + + if (sk->sk_socket) + goto consume; break; case TCP_FIN_WAIT1: { -- cgit v1.2.3-58-ga151 From 275a63c9fe10f39066782d2d775d7d3efb20b01f Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Thu, 11 Jul 2024 11:09:34 +0300 Subject: net: ethtool: Monotonically increase the message sequence number Currently, during the module firmware flashing process, unicast notifications are sent from the kernel using the same sequence number, making it impossible for user space to track missed notifications. Monotonically increase the message sequence number, so the order of notifications could be tracked effectively. Signed-off-by: Danielle Ratson Reviewed-by: Ido Schimmel Reviewed-by: Jakub Kicinski Link: https://patch.msgid.link/20240711080934.2071869-1-danieller@nvidia.com Signed-off-by: Jakub Kicinski --- net/ethtool/module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ethtool/module.c b/net/ethtool/module.c index aba78436d350..6988e07bdcd6 100644 --- a/net/ethtool/module.c +++ b/net/ethtool/module.c @@ -488,7 +488,7 @@ ethnl_module_fw_flash_ntf(struct net_device *dev, if (!skb) return; - hdr = ethnl_unicast_put(skb, ntf_params->portid, ntf_params->seq, + hdr = ethnl_unicast_put(skb, ntf_params->portid, ++ntf_params->seq, ETHTOOL_MSG_MODULE_FW_FLASH_NTF); if (!hdr) goto err_skb; -- cgit v1.2.3-58-ga151 From 93c3a96c301f0b1ac0bafb5e74bef58e79937648 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Thu, 11 Jul 2024 15:55:18 +0200 Subject: net: pse-pd: Do not return EOPNOSUPP if config is null For a PSE supporting both c33 and PoDL, setting config for one type of PoE leaves the other type's config null. Currently, this case returns EOPNOTSUPP, which is incorrect. Instead, we should do nothing if the configuration is empty. Signed-off-by: Kory Maincent Fixes: d83e13761d5b ("net: pse-pd: Use regulator framework within PSE framework") Link: https://patch.msgid.link/20240711-fix_pse_pd_deref-v3-1-edd78fc4fe42@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/pse-pd/pse_core.c | 4 ++-- net/ethtool/pse-pd.c | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/drivers/net/pse-pd/pse_core.c b/drivers/net/pse-pd/pse_core.c index 795ab264eaf2..513cd7f85933 100644 --- a/drivers/net/pse-pd/pse_core.c +++ b/drivers/net/pse-pd/pse_core.c @@ -719,13 +719,13 @@ int pse_ethtool_set_config(struct pse_control *psec, { int err = 0; - if (pse_has_c33(psec)) { + if (pse_has_c33(psec) && config->c33_admin_control) { err = pse_ethtool_c33_set_config(psec, config); if (err) return err; } - if (pse_has_podl(psec)) + if (pse_has_podl(psec) && config->podl_admin_control) err = pse_ethtool_podl_set_config(psec, config); return err; diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c index 2c981d443f27..982995ff1628 100644 --- a/net/ethtool/pse-pd.c +++ b/net/ethtool/pse-pd.c @@ -183,7 +183,9 @@ ethnl_set_pse(struct ethnl_req_info *req_info, struct genl_info *info) if (pse_has_c33(phydev->psec)) config.c33_admin_control = nla_get_u32(tb[ETHTOOL_A_C33_PSE_ADMIN_CONTROL]); - /* Return errno directly - PSE has no notification */ + /* Return errno directly - PSE has no notification + * pse_ethtool_set_config() will do nothing if the config is null + */ return pse_ethtool_set_config(phydev->psec, info->extack, &config); } -- cgit v1.2.3-58-ga151 From 4cddb0f15ea9c62f81b4889ea69a99368cc63a86 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Thu, 11 Jul 2024 15:55:19 +0200 Subject: net: ethtool: pse-pd: Fix possible null-deref Fix a possible null dereference when a PSE supports both c33 and PoDL, but only one of the netlink attributes is specified. The c33 or PoDL PSE capabilities are already validated in the ethnl_set_pse_validate() call. Signed-off-by: Kory Maincent Reported-by: Jakub Kicinski Closes: https://lore.kernel.org/netdev/20240705184116.13d8235a@kernel.org/ Fixes: 4d18e3ddf427 ("net: ethtool: pse-pd: Expand pse commands with the PSE PoE interface") Link: https://patch.msgid.link/20240711-fix_pse_pd_deref-v3-2-edd78fc4fe42@bootlin.com Signed-off-by: Jakub Kicinski --- net/ethtool/pse-pd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c index 982995ff1628..776ac96cdadc 100644 --- a/net/ethtool/pse-pd.c +++ b/net/ethtool/pse-pd.c @@ -178,9 +178,9 @@ ethnl_set_pse(struct ethnl_req_info *req_info, struct genl_info *info) phydev = dev->phydev; /* These values are already validated by the ethnl_pse_set_policy */ - if (pse_has_podl(phydev->psec)) + if (tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL]) config.podl_admin_control = nla_get_u32(tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL]); - if (pse_has_c33(phydev->psec)) + if (tb[ETHTOOL_A_C33_PSE_ADMIN_CONTROL]) config.c33_admin_control = nla_get_u32(tb[ETHTOOL_A_C33_PSE_ADMIN_CONTROL]); /* Return errno directly - PSE has no notification -- cgit v1.2.3-58-ga151 From 6807352353561187a718e87204458999dbcbba1b Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 10 Jul 2024 10:14:27 +0200 Subject: ipv4: fix source address selection with route leak By default, an address assigned to the output interface is selected when the source address is not specified. This is problematic when a route, configured in a vrf, uses an interface from another vrf (aka route leak). The original vrf does not own the selected source address. Let's add a check against the output interface and call the appropriate function to select the source address. CC: stable@vger.kernel.org Fixes: 8cbb512c923d ("net: Add source address lookup op for VRF") Signed-off-by: Nicolas Dichtel Reviewed-by: David Ahern Link: https://patch.msgid.link/20240710081521.3809742-2-nicolas.dichtel@6wind.com Signed-off-by: Jakub Kicinski --- net/ipv4/fib_semantics.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index f669da98d11d..8956026bc0a2 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -2270,6 +2270,15 @@ void fib_select_path(struct net *net, struct fib_result *res, fib_select_default(fl4, res); check_saddr: - if (!fl4->saddr) - fl4->saddr = fib_result_prefsrc(net, res); + if (!fl4->saddr) { + struct net_device *l3mdev; + + l3mdev = dev_get_by_index_rcu(net, fl4->flowi4_l3mdev); + + if (!l3mdev || + l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) == l3mdev) + fl4->saddr = fib_result_prefsrc(net, res); + else + fl4->saddr = inet_select_addr(l3mdev, 0, RT_SCOPE_LINK); + } } -- cgit v1.2.3-58-ga151 From 252442f2ae317d109ef0b4b39ce0608c09563042 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 10 Jul 2024 10:14:28 +0200 Subject: ipv6: fix source address selection with route leak By default, an address assigned to the output interface is selected when the source address is not specified. This is problematic when a route, configured in a vrf, uses an interface from another vrf (aka route leak). The original vrf does not own the selected source address. Let's add a check against the output interface and call the appropriate function to select the source address. CC: stable@vger.kernel.org Fixes: 0d240e7811c4 ("net: vrf: Implement get_saddr for IPv6") Signed-off-by: Nicolas Dichtel Link: https://patch.msgid.link/20240710081521.3809742-3-nicolas.dichtel@6wind.com Signed-off-by: Jakub Kicinski --- include/net/ip6_route.h | 20 ++++++++++++++------ net/ipv6/ip6_output.c | 1 + net/ipv6/route.c | 2 +- 3 files changed, 16 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index a18ed24fed94..6dbdf60b342f 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -127,18 +127,26 @@ void rt6_age_exceptions(struct fib6_info *f6i, struct fib6_gc_args *gc_args, static inline int ip6_route_get_saddr(struct net *net, struct fib6_info *f6i, const struct in6_addr *daddr, - unsigned int prefs, + unsigned int prefs, int l3mdev_index, struct in6_addr *saddr) { + struct net_device *l3mdev; + struct net_device *dev; + bool same_vrf; int err = 0; - if (f6i && f6i->fib6_prefsrc.plen) { + rcu_read_lock(); + + l3mdev = dev_get_by_index_rcu(net, l3mdev_index); + if (!f6i || !f6i->fib6_prefsrc.plen || l3mdev) + dev = f6i ? fib6_info_nh_dev(f6i) : NULL; + same_vrf = !l3mdev || l3mdev_master_dev_rcu(dev) == l3mdev; + if (f6i && f6i->fib6_prefsrc.plen && same_vrf) *saddr = f6i->fib6_prefsrc.addr; - } else { - struct net_device *dev = f6i ? fib6_info_nh_dev(f6i) : NULL; + else + err = ipv6_dev_get_saddr(net, same_vrf ? dev : l3mdev, daddr, prefs, saddr); - err = ipv6_dev_get_saddr(net, dev, daddr, prefs, saddr); - } + rcu_read_unlock(); return err; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 27d8725445e3..784424ac4147 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1124,6 +1124,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, from = rt ? rcu_dereference(rt->from) : NULL; err = ip6_route_get_saddr(net, from, &fl6->daddr, sk ? READ_ONCE(inet6_sk(sk)->srcprefs) : 0, + fl6->flowi6_l3mdev, &fl6->saddr); rcu_read_unlock(); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8d72ca0b086d..c9a9506b714d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5689,7 +5689,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, goto nla_put_failure; } else if (dest) { struct in6_addr saddr_buf; - if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 && + if (ip6_route_get_saddr(net, rt, dest, 0, 0, &saddr_buf) == 0 && nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) goto nla_put_failure; } -- cgit v1.2.3-58-ga151 From abb9a68d2c64dd9b128ae1f2e635e4d805e7ce64 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 10 Jul 2024 10:14:29 +0200 Subject: ipv6: take care of scope when choosing the src addr When the source address is selected, the scope must be checked. For example, if a loopback address is assigned to the vrf device, it must not be chosen for packets sent outside. CC: stable@vger.kernel.org Fixes: afbac6010aec ("net: ipv6: Address selection needs to consider L3 domains") Signed-off-by: Nicolas Dichtel Reviewed-by: David Ahern Link: https://patch.msgid.link/20240710081521.3809742-4-nicolas.dichtel@6wind.com Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 5c424a0e7232..4f2c5cc31015 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1873,7 +1873,8 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev, master, &dst, scores, hiscore_idx); - if (scores[hiscore_idx].ifa) + if (scores[hiscore_idx].ifa && + scores[hiscore_idx].scopedist >= 0) goto out; } -- cgit v1.2.3-58-ga151 From 0ece498c27d8cd2fdad6f49a6abc34b8badd8fbc Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 10 May 2024 10:36:45 -0400 Subject: Bluetooth: MGMT: Make MGMT_OP_LOAD_CONN_PARAM update existing connection This makes MGMT_OP_LOAD_CONN_PARAM update existing connection by dectecting the request is just for one connection, parameters already exists and there is a connection. Since this is a new behavior the revision is also updated to enable userspace to detect it. Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_sync.h | 3 +++ net/bluetooth/hci_sync.c | 18 +++++++++++++++ net/bluetooth/mgmt.c | 50 ++++++++++++++++++++++++++++++++++++++-- 3 files changed, 69 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 534c3386e714..20168732f20e 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -138,6 +138,7 @@ int hci_suspend_sync(struct hci_dev *hdev); int hci_resume_sync(struct hci_dev *hdev); struct hci_conn; +struct hci_conn_params; int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason); @@ -156,3 +157,5 @@ int hci_connect_acl_sync(struct hci_dev *hdev, struct hci_conn *conn); int hci_connect_le_sync(struct hci_dev *hdev, struct hci_conn *conn); int hci_cancel_connect_sync(struct hci_dev *hdev, struct hci_conn *conn); +int hci_le_conn_update_sync(struct hci_dev *hdev, struct hci_conn *conn, + struct hci_conn_params *params); diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index eea34e6a236f..82db6092965b 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -6724,3 +6724,21 @@ int hci_cancel_connect_sync(struct hci_dev *hdev, struct hci_conn *conn) return -ENOENT; } + +int hci_le_conn_update_sync(struct hci_dev *hdev, struct hci_conn *conn, + struct hci_conn_params *params) +{ + struct hci_cp_le_conn_update cp; + + memset(&cp, 0, sizeof(cp)); + cp.handle = cpu_to_le16(conn->handle); + cp.conn_interval_min = cpu_to_le16(params->conn_min_interval); + cp.conn_interval_max = cpu_to_le16(params->conn_max_interval); + cp.conn_latency = cpu_to_le16(params->conn_latency); + cp.supervision_timeout = cpu_to_le16(params->supervision_timeout); + cp.min_ce_len = cpu_to_le16(0x0000); + cp.max_ce_len = cpu_to_le16(0x0000); + + return __hci_cmd_sync_status(hdev, HCI_OP_LE_CONN_UPDATE, + sizeof(cp), &cp, HCI_CMD_TIMEOUT); +} diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 80f220b7e19d..20eca8a9c681 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -42,7 +42,7 @@ #include "aosp.h" #define MGMT_VERSION 1 -#define MGMT_REVISION 22 +#define MGMT_REVISION 23 static const u16 mgmt_commands[] = { MGMT_OP_READ_INDEX_LIST, @@ -7813,6 +7813,18 @@ unlock: return err; } +static int conn_update_sync(struct hci_dev *hdev, void *data) +{ + struct hci_conn_params *params = data; + struct hci_conn *conn; + + conn = hci_conn_hash_lookup_le(hdev, ¶ms->addr, params->addr_type); + if (!conn) + return -ECANCELED; + + return hci_le_conn_update_sync(hdev, conn, params); +} + static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { @@ -7846,13 +7858,15 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data, hci_dev_lock(hdev); - hci_conn_params_clear_disabled(hdev); + if (param_count > 1) + hci_conn_params_clear_disabled(hdev); for (i = 0; i < param_count; i++) { struct mgmt_conn_param *param = &cp->params[i]; struct hci_conn_params *hci_param; u16 min, max, latency, timeout; u8 addr_type; + bool update; bt_dev_dbg(hdev, "Adding %pMR (type %u)", ¶m->addr.bdaddr, param->addr.type); @@ -7879,6 +7893,19 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data, continue; } + /* Detect when the loading is for an existing parameter then + * attempt to trigger the connection update procedure. + */ + if (!i && param_count == 1) { + hci_param = hci_conn_params_lookup(hdev, + ¶m->addr.bdaddr, + addr_type); + if (hci_param) + update = true; + else + hci_conn_params_clear_disabled(hdev); + } + hci_param = hci_conn_params_add(hdev, ¶m->addr.bdaddr, addr_type); if (!hci_param) { @@ -7890,6 +7917,25 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data, hci_param->conn_max_interval = max; hci_param->conn_latency = latency; hci_param->supervision_timeout = timeout; + + /* Check if we need to trigger a connection update */ + if (update) { + struct hci_conn *conn; + + /* Lookup for existing connection as central and check + * if parameters match and if they don't then trigger + * a connection update. + */ + conn = hci_conn_hash_lookup_le(hdev, &hci_param->addr, + addr_type); + if (conn && conn->role == HCI_ROLE_MASTER && + (conn->le_conn_min_interval != min || + conn->le_conn_max_interval != max || + conn->le_conn_latency != latency || + conn->le_supv_timeout != timeout)) + hci_cmd_sync_queue(hdev, conn_update_sync, + hci_param, NULL); + } } hci_dev_unlock(hdev); -- cgit v1.2.3-58-ga151 From 8f7dfe171c576aaec4911cc59feaed26d79c7c7f Mon Sep 17 00:00:00 2001 From: Erick Archer Date: Sat, 18 May 2024 10:30:38 +0200 Subject: Bluetooth: hci_core: Prefer struct_size over open coded arithmetic This is an effort to get rid of all multiplications from allocation functions in order to prevent integer overflows [1][2]. As the "dl" variable is a pointer to "struct hci_dev_list_req" and this structure ends in a flexible array: struct hci_dev_list_req { [...] struct hci_dev_req dev_req[]; /* hci_dev_req structures */ }; the preferred way in the kernel is to use the struct_size() helper to do the arithmetic instead of the calculation "size + count * size" in the kzalloc() and copy_to_user() functions. At the same time, prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). In this case, it is important to note that the logic needs a little refactoring to ensure that the "dev_num" member is initialized before the first access to the flex array. Specifically, add the assignment before the list_for_each_entry() loop. Also remove the "size" variable as it is no longer needed. This way, the code is more readable and safer. This code was detected with the help of Coccinelle, and audited and modified manually. Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#open-coded-arithmetic-in-allocator-arguments [1] Link: https://github.com/KSPP/linux/issues/160 [2] Reviewed-by: Kees Cook Signed-off-by: Erick Archer Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_sock.h | 2 +- net/bluetooth/hci_core.c | 11 ++++------- 2 files changed, 5 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci_sock.h b/include/net/bluetooth/hci_sock.h index 9949870f7d78..13e8cd4414a1 100644 --- a/include/net/bluetooth/hci_sock.h +++ b/include/net/bluetooth/hci_sock.h @@ -144,7 +144,7 @@ struct hci_dev_req { struct hci_dev_list_req { __u16 dev_num; - struct hci_dev_req dev_req[]; /* hci_dev_req structures */ + struct hci_dev_req dev_req[] __counted_by(dev_num); }; struct hci_conn_list_req { diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index c644b30977bd..a2cad8a982f6 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -801,7 +801,7 @@ int hci_get_dev_list(void __user *arg) struct hci_dev *hdev; struct hci_dev_list_req *dl; struct hci_dev_req *dr; - int n = 0, size, err; + int n = 0, err; __u16 dev_num; if (get_user(dev_num, (__u16 __user *) arg)) @@ -810,12 +810,11 @@ int hci_get_dev_list(void __user *arg) if (!dev_num || dev_num > (PAGE_SIZE * 2) / sizeof(*dr)) return -EINVAL; - size = sizeof(*dl) + dev_num * sizeof(*dr); - - dl = kzalloc(size, GFP_KERNEL); + dl = kzalloc(struct_size(dl, dev_req, dev_num), GFP_KERNEL); if (!dl) return -ENOMEM; + dl->dev_num = dev_num; dr = dl->dev_req; read_lock(&hci_dev_list_lock); @@ -838,9 +837,7 @@ int hci_get_dev_list(void __user *arg) read_unlock(&hci_dev_list_lock); dl->dev_num = n; - size = sizeof(*dl) + n * sizeof(*dr); - - err = copy_to_user(arg, dl, size); + err = copy_to_user(arg, dl, struct_size(dl, dev_req, n)); kfree(dl); return err ? -EFAULT : 0; -- cgit v1.2.3-58-ga151 From c61e41121036aa610e904ef60f8520e10455ee8c Mon Sep 17 00:00:00 2001 From: Erick Archer Date: Sat, 18 May 2024 10:30:39 +0200 Subject: Bluetooth: hci_core: Prefer array indexing over pointer arithmetic Refactor the list_for_each_entry() loop of hci_get_dev_list() function to use array indexing instead of pointer arithmetic. This way, the code is more readable and idiomatic. Reviewed-by: Kees Cook Signed-off-by: Erick Archer Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index a2cad8a982f6..55bdc365916f 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -828,8 +828,8 @@ int hci_get_dev_list(void __user *arg) if (hci_dev_test_flag(hdev, HCI_AUTO_OFF)) flags &= ~BIT(HCI_UP); - (dr + n)->dev_id = hdev->id; - (dr + n)->dev_opt = flags; + dr[n].dev_id = hdev->id; + dr[n].dev_opt = flags; if (++n >= dev_num) break; -- cgit v1.2.3-58-ga151 From 7d2c7ddba6238e6a14cd89ef869878dd22f2a661 Mon Sep 17 00:00:00 2001 From: Erick Archer Date: Fri, 17 May 2024 19:21:49 +0200 Subject: tty: rfcomm: prefer struct_size over open coded arithmetic This is an effort to get rid of all multiplications from allocation functions in order to prevent integer overflows [1][2]. As the "dl" variable is a pointer to "struct rfcomm_dev_list_req" and this structure ends in a flexible array: struct rfcomm_dev_list_req { [...] struct rfcomm_dev_info dev_info[]; }; the preferred way in the kernel is to use the struct_size() helper to do the arithmetic instead of the calculation "size + count * size" in the kzalloc() and copy_to_user() functions. At the same time, prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). In this case, it is important to note that the logic needs a little refactoring to ensure that the "dev_num" member is initialized before the first access to the flex array. Specifically, add the assignment before the list_for_each_entry() loop. Also remove the "size" variable as it is no longer needed. This way, the code is more readable and safer. This code was detected with the help of Coccinelle, and audited and modified manually. Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#open-coded-arithmetic-in-allocator-arguments [1] Link: https://github.com/KSPP/linux/issues/160 [2] Reviewed-by: Kees Cook Signed-off-by: Erick Archer Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/rfcomm.h | 2 +- net/bluetooth/rfcomm/tty.c | 11 ++++------- 2 files changed, 5 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h index 99d26879b02a..c05882476900 100644 --- a/include/net/bluetooth/rfcomm.h +++ b/include/net/bluetooth/rfcomm.h @@ -355,7 +355,7 @@ struct rfcomm_dev_info { struct rfcomm_dev_list_req { u16 dev_num; - struct rfcomm_dev_info dev_info[]; + struct rfcomm_dev_info dev_info[] __counted_by(dev_num); }; int rfcomm_dev_ioctl(struct sock *sk, unsigned int cmd, void __user *arg); diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 69c75c041fe1..44b781e7569e 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -504,7 +504,7 @@ static int rfcomm_get_dev_list(void __user *arg) struct rfcomm_dev *dev; struct rfcomm_dev_list_req *dl; struct rfcomm_dev_info *di; - int n = 0, size, err; + int n = 0, err; u16 dev_num; BT_DBG(""); @@ -515,12 +515,11 @@ static int rfcomm_get_dev_list(void __user *arg) if (!dev_num || dev_num > (PAGE_SIZE * 4) / sizeof(*di)) return -EINVAL; - size = sizeof(*dl) + dev_num * sizeof(*di); - - dl = kzalloc(size, GFP_KERNEL); + dl = kzalloc(struct_size(dl, dev_info, dev_num), GFP_KERNEL); if (!dl) return -ENOMEM; + dl->dev_num = dev_num; di = dl->dev_info; mutex_lock(&rfcomm_dev_lock); @@ -542,9 +541,7 @@ static int rfcomm_get_dev_list(void __user *arg) mutex_unlock(&rfcomm_dev_lock); dl->dev_num = n; - size = sizeof(*dl) + n * sizeof(*di); - - err = copy_to_user(arg, dl, size); + err = copy_to_user(arg, dl, struct_size(dl, dev_info, n)); kfree(dl); return err ? -EFAULT : 0; -- cgit v1.2.3-58-ga151 From b1c7cd6caaf6209ed9f7df59b61a2ac885eb79b3 Mon Sep 17 00:00:00 2001 From: Erick Archer Date: Fri, 17 May 2024 19:21:50 +0200 Subject: tty: rfcomm: prefer array indexing over pointer arithmetic Refactor the list_for_each_entry() loop of rfcomm_get_dev_list() function to use array indexing instead of pointer arithmetic. This way, the code is more readable and idiomatic. Reviewed-by: Kees Cook Signed-off-by: Erick Archer Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/rfcomm/tty.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 44b781e7569e..af80d599c337 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -527,12 +527,12 @@ static int rfcomm_get_dev_list(void __user *arg) list_for_each_entry(dev, &rfcomm_dev_list, list) { if (!tty_port_get(&dev->port)) continue; - (di + n)->id = dev->id; - (di + n)->flags = dev->flags; - (di + n)->state = dev->dlc->state; - (di + n)->channel = dev->channel; - bacpy(&(di + n)->src, &dev->src); - bacpy(&(di + n)->dst, &dev->dst); + di[n].id = dev->id; + di[n].flags = dev->flags; + di[n].state = dev->dlc->state; + di[n].channel = dev->channel; + bacpy(&di[n].src, &dev->src); + bacpy(&di[n].dst, &dev->dst); tty_port_put(&dev->port); if (++n >= dev_num) break; -- cgit v1.2.3-58-ga151 From 183469bccf68cb05dfb56cc47e73520217854bab Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 24 May 2024 13:14:02 +0300 Subject: Bluetooth: MGMT: Uninitialized variable in load_conn_param() The "update" variable needs to be initialized to false. Fixes: 0ece498c27d8 ("Bluetooth: MGMT: Make MGMT_OP_LOAD_CONN_PARAM update existing connection") Signed-off-by: Dan Carpenter Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/mgmt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 20eca8a9c681..3ab1558ff391 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -7865,8 +7865,8 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data, struct mgmt_conn_param *param = &cp->params[i]; struct hci_conn_params *hci_param; u16 min, max, latency, timeout; + bool update = false; u8 addr_type; - bool update; bt_dev_dbg(hdev, "Adding %pMR (type %u)", ¶m->addr.bdaddr, param->addr.type); -- cgit v1.2.3-58-ga151 From d4cc4ee48167b4c25abf49ead19412c29a1ad9f4 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sat, 1 Jun 2024 00:26:14 +0100 Subject: Bluetooth: iso: remove unused struct 'iso_list_data' 'iso_list_data' has been unused since the original commit ccf74f2390d6 ("Bluetooth: Add BTPROTO_ISO socket type"). Remove it. Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/iso.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'net') diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 398fb81f7a13..d5e00d0dd1a0 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -1720,11 +1720,6 @@ static void iso_sock_ready(struct sock *sk) release_sock(sk); } -struct iso_list_data { - struct hci_conn *hcon; - int count; -}; - static bool iso_match_big(struct sock *sk, void *data) { struct hci_evt_le_big_sync_estabilished *ev = data; -- cgit v1.2.3-58-ga151 From f25b7fd36cc3a850e006aed686f5bbecd200de1b Mon Sep 17 00:00:00 2001 From: Ying Hsu Date: Wed, 29 May 2024 08:00:00 +0000 Subject: Bluetooth: Add vendor-specific packet classification for ISO data When HCI raw sockets are opened, the Bluetooth kernel module doesn't track CIS/BIS connections. User-space applications have to identify ISO data by maintaining connection information and look up the mapping for each ACL data packet received. Besides, btsnoop log captured in kernel couldn't tell ISO data from ACL data in this case. To avoid additional lookups, this patch introduces vendor-specific packet classification for Intel BT controllers to distinguish ISO data packets from ACL data packets. Signed-off-by: Ying Hsu Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel.c | 25 +++++++++++++++++++++++-- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_core.c | 16 ++++++++++++++++ 3 files changed, 40 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index 27e03951e68b..ff33e1aa2929 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -2549,6 +2549,24 @@ static void btintel_set_dsm_reset_method(struct hci_dev *hdev, data->acpi_reset_method = btintel_acpi_reset_method; } +#define BTINTEL_ISODATA_HANDLE_BASE 0x900 + +static u8 btintel_classify_pkt_type(struct hci_dev *hdev, struct sk_buff *skb) +{ + /* + * Distinguish ISO data packets form ACL data packets + * based on their connection handle value range. + */ + if (hci_skb_pkt_type(skb) == HCI_ACLDATA_PKT) { + __u16 handle = __le16_to_cpu(hci_acl_hdr(skb)->handle); + + if (hci_handle(handle) >= BTINTEL_ISODATA_HANDLE_BASE) + return HCI_ISODATA_PKT; + } + + return hci_skb_pkt_type(skb); +} + int btintel_bootloader_setup_tlv(struct hci_dev *hdev, struct intel_version_tlv *ver) { @@ -2989,11 +3007,14 @@ static int btintel_setup_combined(struct hci_dev *hdev) err = btintel_bootloader_setup(hdev, &ver); btintel_register_devcoredump_support(hdev); break; + case 0x18: /* GfP2 */ + case 0x1c: /* GaP */ + /* Re-classify packet type for controllers with LE audio */ + hdev->classify_pkt_type = btintel_classify_pkt_type; + fallthrough; case 0x17: - case 0x18: case 0x19: case 0x1b: - case 0x1c: case 0x1e: /* Display version information of TLV type */ btintel_version_info_tlv(hdev, &ver_tlv); diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index c43716edf205..f7de2681d457 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -649,6 +649,7 @@ struct hci_dev { int (*get_codec_config_data)(struct hci_dev *hdev, __u8 type, struct bt_codec *codec, __u8 *vnd_len, __u8 **vnd_data); + u8 (*classify_pkt_type)(struct hci_dev *hdev, struct sk_buff *skb); }; #define HCI_PHY_HANDLE(handle) (handle & 0xff) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 55bdc365916f..144e85ebfbdb 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2909,15 +2909,31 @@ int hci_reset_dev(struct hci_dev *hdev) } EXPORT_SYMBOL(hci_reset_dev); +static u8 hci_dev_classify_pkt_type(struct hci_dev *hdev, struct sk_buff *skb) +{ + if (hdev->classify_pkt_type) + return hdev->classify_pkt_type(hdev, skb); + + return hci_skb_pkt_type(skb); +} + /* Receive frame from HCI drivers */ int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb) { + u8 dev_pkt_type; + if (!hdev || (!test_bit(HCI_UP, &hdev->flags) && !test_bit(HCI_INIT, &hdev->flags))) { kfree_skb(skb); return -ENXIO; } + /* Check if the driver agree with packet type classification */ + dev_pkt_type = hci_dev_classify_pkt_type(hdev, skb); + if (hci_skb_pkt_type(skb) != dev_pkt_type) { + hci_skb_pkt_type(skb) = dev_pkt_type; + } + switch (hci_skb_pkt_type(skb)) { case HCI_EVENT_PKT: break; -- cgit v1.2.3-58-ga151 From 5a820b13db3988122080f8de2920721f770c37a0 Mon Sep 17 00:00:00 2001 From: Iulia Tanasescu Date: Mon, 17 Jun 2024 11:52:06 +0300 Subject: Bluetooth: hci_event: Set QoS encryption from BIGInfo report On a Broadcast Sink, after synchronizing to the PA transimitted by a Broadcast Source, the BIGInfo advertising reports emitted by the Controller hold the encryption field, which indicates whether the Broadcast Source is transmitting encrypted streams. This updates the PA sync hcon QoS with the encryption value reported in the BIGInfo report, so that this information is accurate if the userspace tries to access the QoS struct via getsockopt. Fixes: 1d11d70d1f6b ("Bluetooth: ISO: Pass BIG encryption info through QoS") Signed-off-by: Iulia Tanasescu Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 93f7ac905cec..4611a67d7dcc 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -6988,6 +6988,8 @@ static void hci_le_big_info_adv_report_evt(struct hci_dev *hdev, void *data, if (!pa_sync) goto unlock; + pa_sync->iso_qos.bcast.encryption = ev->encryption; + /* Notify iso layer */ hci_connect_cfm(pa_sync, 0); -- cgit v1.2.3-58-ga151 From da63f331353c9e1e6dc29e49e28f8f4fe5d642fd Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Tue, 18 Jun 2024 21:59:32 +0300 Subject: Bluetooth: hci_core, hci_sync: cleanup struct discovery_state After commit 78db544b5d27 ("Bluetooth: hci_core: Remove le_restart_scan work"), 'scan_start' and 'scan_duration' of 'struct discovery_state' are still initialized but actually unused. So remove the aforementioned fields and adjust 'hci_discovery_filter_clear()' and 'le_scan_disable()' accordingly. Compile tested only. Fixes: 78db544b5d27 ("Bluetooth: hci_core: Remove le_restart_scan work") Signed-off-by: Dmitry Antipov Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 4 ---- net/bluetooth/hci_sync.c | 2 -- 2 files changed, 6 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index f7de2681d457..eaeaf3dc07aa 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -91,8 +91,6 @@ struct discovery_state { s8 rssi; u16 uuid_count; u8 (*uuids)[16]; - unsigned long scan_start; - unsigned long scan_duration; unsigned long name_resolve_timeout; }; @@ -891,8 +889,6 @@ static inline void hci_discovery_filter_clear(struct hci_dev *hdev) hdev->discovery.uuid_count = 0; kfree(hdev->discovery.uuids); hdev->discovery.uuids = NULL; - hdev->discovery.scan_start = 0; - hdev->discovery.scan_duration = 0; } bool hci_discovery_active(struct hci_dev *hdev); diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 82db6092965b..ccad43f10415 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -371,8 +371,6 @@ static void le_scan_disable(struct work_struct *work) goto _return; } - hdev->discovery.scan_start = 0; - /* If we were running LE only scan, change discovery state. If * we were running both LE and BR/EDR inquiry simultaneously, * and BR/EDR inquiry is already finished, stop discovery, -- cgit v1.2.3-58-ga151 From 79eecf631c14e7f4057186570ac20e2cfac3802e Mon Sep 17 00:00:00 2001 From: Chengen Du Date: Sat, 13 Jul 2024 19:47:35 +0800 Subject: af_packet: Handle outgoing VLAN packets without hardware offloading The issue initially stems from libpcap. The ethertype will be overwritten as the VLAN TPID if the network interface lacks hardware VLAN offloading. In the outbound packet path, if hardware VLAN offloading is unavailable, the VLAN tag is inserted into the payload but then cleared from the sk_buff struct. Consequently, this can lead to a false negative when checking for the presence of a VLAN tag, causing the packet sniffing outcome to lack VLAN tag information (i.e., TCI-TPID). As a result, the packet capturing tool may be unable to parse packets as expected. The TCI-TPID is missing because the prb_fill_vlan_info() function does not modify the tp_vlan_tci/tp_vlan_tpid values, as the information is in the payload and not in the sk_buff struct. The skb_vlan_tag_present() function only checks vlan_all in the sk_buff struct. In cooked mode, the L2 header is stripped, preventing the packet capturing tool from determining the correct TCI-TPID value. Additionally, the protocol in SLL is incorrect, which means the packet capturing tool cannot parse the L3 header correctly. Link: https://github.com/the-tcpdump-group/libpcap/issues/1105 Link: https://lore.kernel.org/netdev/20240520070348.26725-1-chengen.du@canonical.com/T/#u Fixes: 393e52e33c6c ("packet: deliver VLAN TCI to userspace") Cc: stable@vger.kernel.org Signed-off-by: Chengen Du Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20240713114735.62360-1-chengen.du@canonical.com Signed-off-by: Jakub Kicinski --- net/packet/af_packet.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 84 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ea3ebc160e25..4692a9ef110b 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -538,6 +538,61 @@ static void *packet_current_frame(struct packet_sock *po, return packet_lookup_frame(po, rb, rb->head, status); } +static u16 vlan_get_tci(struct sk_buff *skb, struct net_device *dev) +{ + u8 *skb_orig_data = skb->data; + int skb_orig_len = skb->len; + struct vlan_hdr vhdr, *vh; + unsigned int header_len; + + if (!dev) + return 0; + + /* In the SOCK_DGRAM scenario, skb data starts at the network + * protocol, which is after the VLAN headers. The outer VLAN + * header is at the hard_header_len offset in non-variable + * length link layer headers. If it's a VLAN device, the + * min_header_len should be used to exclude the VLAN header + * size. + */ + if (dev->min_header_len == dev->hard_header_len) + header_len = dev->hard_header_len; + else if (is_vlan_dev(dev)) + header_len = dev->min_header_len; + else + return 0; + + skb_push(skb, skb->data - skb_mac_header(skb)); + vh = skb_header_pointer(skb, header_len, sizeof(vhdr), &vhdr); + if (skb_orig_data != skb->data) { + skb->data = skb_orig_data; + skb->len = skb_orig_len; + } + if (unlikely(!vh)) + return 0; + + return ntohs(vh->h_vlan_TCI); +} + +static __be16 vlan_get_protocol_dgram(struct sk_buff *skb) +{ + __be16 proto = skb->protocol; + + if (unlikely(eth_type_vlan(proto))) { + u8 *skb_orig_data = skb->data; + int skb_orig_len = skb->len; + + skb_push(skb, skb->data - skb_mac_header(skb)); + proto = __vlan_get_protocol(skb, proto, NULL); + if (skb_orig_data != skb->data) { + skb->data = skb_orig_data; + skb->len = skb_orig_len; + } + } + + return proto; +} + static void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc) { del_timer_sync(&pkc->retire_blk_timer); @@ -1007,10 +1062,16 @@ static void prb_clear_rxhash(struct tpacket_kbdq_core *pkc, static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc, struct tpacket3_hdr *ppd) { + struct packet_sock *po = container_of(pkc, struct packet_sock, rx_ring.prb_bdqc); + if (skb_vlan_tag_present(pkc->skb)) { ppd->hv1.tp_vlan_tci = skb_vlan_tag_get(pkc->skb); ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->vlan_proto); ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; + } else if (unlikely(po->sk.sk_type == SOCK_DGRAM && eth_type_vlan(pkc->skb->protocol))) { + ppd->hv1.tp_vlan_tci = vlan_get_tci(pkc->skb, pkc->skb->dev); + ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->protocol); + ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; } else { ppd->hv1.tp_vlan_tci = 0; ppd->hv1.tp_vlan_tpid = 0; @@ -2428,6 +2489,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, h.h2->tp_vlan_tci = skb_vlan_tag_get(skb); h.h2->tp_vlan_tpid = ntohs(skb->vlan_proto); status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; + } else if (unlikely(sk->sk_type == SOCK_DGRAM && eth_type_vlan(skb->protocol))) { + h.h2->tp_vlan_tci = vlan_get_tci(skb, skb->dev); + h.h2->tp_vlan_tpid = ntohs(skb->protocol); + status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; } else { h.h2->tp_vlan_tci = 0; h.h2->tp_vlan_tpid = 0; @@ -2457,7 +2522,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, sll->sll_halen = dev_parse_header(skb, sll->sll_addr); sll->sll_family = AF_PACKET; sll->sll_hatype = dev->type; - sll->sll_protocol = skb->protocol; + sll->sll_protocol = (sk->sk_type == SOCK_DGRAM) ? + vlan_get_protocol_dgram(skb) : skb->protocol; sll->sll_pkttype = skb->pkt_type; if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV))) sll->sll_ifindex = orig_dev->ifindex; @@ -3482,7 +3548,8 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, /* Original length was stored in sockaddr_ll fields */ origlen = PACKET_SKB_CB(skb)->sa.origlen; sll->sll_family = AF_PACKET; - sll->sll_protocol = skb->protocol; + sll->sll_protocol = (sock->type == SOCK_DGRAM) ? + vlan_get_protocol_dgram(skb) : skb->protocol; } sock_recv_cmsgs(msg, sk, skb); @@ -3539,6 +3606,21 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, aux.tp_vlan_tci = skb_vlan_tag_get(skb); aux.tp_vlan_tpid = ntohs(skb->vlan_proto); aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; + } else if (unlikely(sock->type == SOCK_DGRAM && eth_type_vlan(skb->protocol))) { + struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll; + struct net_device *dev; + + rcu_read_lock(); + dev = dev_get_by_index_rcu(sock_net(sk), sll->sll_ifindex); + if (dev) { + aux.tp_vlan_tci = vlan_get_tci(skb, dev); + aux.tp_vlan_tpid = ntohs(skb->protocol); + aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; + } else { + aux.tp_vlan_tci = 0; + aux.tp_vlan_tpid = 0; + } + rcu_read_unlock(); } else { aux.tp_vlan_tci = 0; aux.tp_vlan_tpid = 0; -- cgit v1.2.3-58-ga151 From 2c1583290b08c5aa9005178a573be8f329de2976 Mon Sep 17 00:00:00 2001 From: "Kamil Horák (2N)" Date: Fri, 12 Jul 2024 17:07:06 +0200 Subject: net: phy: bcm54811: New link mode for BroadR-Reach MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a new link mode necessary for 10 MBit single-pair connection in BroadR-Reach mode on bcm5481x PHY by Broadcom. This new link mode, 10baseT1BRR, is known as 1BR10 in the Broadcom terminology. Another link mode to be used is 1BR100 and it is already present as 100baseT1, because Broadcom's 1BR100 became 100baseT1 (IEEE 802.3bw). Signed-off-by: Kamil Horák (2N) Reviewed-by: Florian Fainelli Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20240712150709.3134474-2-kamilh@axis.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy-core.c | 3 ++- include/uapi/linux/ethtool.h | 1 + net/ethtool/common.c | 3 +++ 3 files changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c index a235ea2264a7..1f98b6a96c15 100644 --- a/drivers/net/phy/phy-core.c +++ b/drivers/net/phy/phy-core.c @@ -13,7 +13,7 @@ */ const char *phy_speed_to_str(int speed) { - BUILD_BUG_ON_MSG(__ETHTOOL_LINK_MODE_MASK_NBITS != 102, + BUILD_BUG_ON_MSG(__ETHTOOL_LINK_MODE_MASK_NBITS != 103, "Enum ethtool_link_mode_bit_indices and phylib are out of sync. " "If a speed or mode has been added please update phy_speed_to_str " "and the PHY settings array.\n"); @@ -266,6 +266,7 @@ static const struct phy_setting settings[] = { PHY_SETTING( 10, FULL, 10baseT1S_Full ), PHY_SETTING( 10, HALF, 10baseT1S_Half ), PHY_SETTING( 10, HALF, 10baseT1S_P2MP_Half ), + PHY_SETTING( 10, FULL, 10baseT1BRR_Full ), }; #undef PHY_SETTING diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 230110b97029..4a0a6e703483 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -2054,6 +2054,7 @@ enum ethtool_link_mode_bit_indices { ETHTOOL_LINK_MODE_10baseT1S_Full_BIT = 99, ETHTOOL_LINK_MODE_10baseT1S_Half_BIT = 100, ETHTOOL_LINK_MODE_10baseT1S_P2MP_Half_BIT = 101, + ETHTOOL_LINK_MODE_10baseT1BRR_Full_BIT = 102, /* must be last entry */ __ETHTOOL_LINK_MODE_MASK_NBITS diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 67d06cd002a5..5f99a32e4fe5 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -211,6 +211,7 @@ const char link_mode_names[][ETH_GSTRING_LEN] = { __DEFINE_LINK_MODE_NAME(10, T1S, Full), __DEFINE_LINK_MODE_NAME(10, T1S, Half), __DEFINE_LINK_MODE_NAME(10, T1S_P2MP, Half), + __DEFINE_LINK_MODE_NAME(10, T1BRR, Full), }; static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS); @@ -251,6 +252,7 @@ static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS); #define __LINK_MODE_LANES_T1S_P2MP 1 #define __LINK_MODE_LANES_VR8 8 #define __LINK_MODE_LANES_DR8_2 8 +#define __LINK_MODE_LANES_T1BRR 1 #define __DEFINE_LINK_MODE_PARAMS(_speed, _type, _duplex) \ [ETHTOOL_LINK_MODE(_speed, _type, _duplex)] = { \ @@ -374,6 +376,7 @@ const struct link_mode_info link_mode_params[] = { __DEFINE_LINK_MODE_PARAMS(10, T1S, Full), __DEFINE_LINK_MODE_PARAMS(10, T1S, Half), __DEFINE_LINK_MODE_PARAMS(10, T1S_P2MP, Half), + __DEFINE_LINK_MODE_PARAMS(10, T1BRR, Full), }; static_assert(ARRAY_SIZE(link_mode_params) == __ETHTOOL_LINK_MODE_MASK_NBITS); -- cgit v1.2.3-58-ga151 From 59a931c5b732ca5fc2ca727f5a72aeabaafa85ec Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Fri, 12 Jul 2024 09:51:16 +0000 Subject: xdp: fix invalid wait context of page_pool_destroy() If the driver uses a page pool, it creates a page pool with page_pool_create(). The reference count of page pool is 1 as default. A page pool will be destroyed only when a reference count reaches 0. page_pool_destroy() is used to destroy page pool, it decreases a reference count. When a page pool is destroyed, ->disconnect() is called, which is mem_allocator_disconnect(). This function internally acquires mutex_lock(). If the driver uses XDP, it registers a memory model with xdp_rxq_info_reg_mem_model(). The xdp_rxq_info_reg_mem_model() internally increases a page pool reference count if a memory model is a page pool. Now the reference count is 2. To destroy a page pool, the driver should call both page_pool_destroy() and xdp_unreg_mem_model(). The xdp_unreg_mem_model() internally calls page_pool_destroy(). Only page_pool_destroy() decreases a reference count. If a driver calls page_pool_destroy() then xdp_unreg_mem_model(), we will face an invalid wait context warning. Because xdp_unreg_mem_model() calls page_pool_destroy() with rcu_read_lock(). The page_pool_destroy() internally acquires mutex_lock(). Splat looks like: ============================= [ BUG: Invalid wait context ] 6.10.0-rc6+ #4 Tainted: G W ----------------------------- ethtool/1806 is trying to lock: ffffffff90387b90 (mem_id_lock){+.+.}-{4:4}, at: mem_allocator_disconnect+0x73/0x150 other info that might help us debug this: context-{5:5} 3 locks held by ethtool/1806: stack backtrace: CPU: 0 PID: 1806 Comm: ethtool Tainted: G W 6.10.0-rc6+ #4 f916f41f172891c800f2fed Hardware name: ASUS System Product Name/PRIME Z690-P D4, BIOS 0603 11/01/2021 Call Trace: dump_stack_lvl+0x7e/0xc0 __lock_acquire+0x1681/0x4de0 ? _printk+0x64/0xe0 ? __pfx_mark_lock.part.0+0x10/0x10 ? __pfx___lock_acquire+0x10/0x10 lock_acquire+0x1b3/0x580 ? mem_allocator_disconnect+0x73/0x150 ? __wake_up_klogd.part.0+0x16/0xc0 ? __pfx_lock_acquire+0x10/0x10 ? dump_stack_lvl+0x91/0xc0 __mutex_lock+0x15c/0x1690 ? mem_allocator_disconnect+0x73/0x150 ? __pfx_prb_read_valid+0x10/0x10 ? mem_allocator_disconnect+0x73/0x150 ? __pfx_llist_add_batch+0x10/0x10 ? console_unlock+0x193/0x1b0 ? lockdep_hardirqs_on+0xbe/0x140 ? __pfx___mutex_lock+0x10/0x10 ? tick_nohz_tick_stopped+0x16/0x90 ? __irq_work_queue_local+0x1e5/0x330 ? irq_work_queue+0x39/0x50 ? __wake_up_klogd.part.0+0x79/0xc0 ? mem_allocator_disconnect+0x73/0x150 mem_allocator_disconnect+0x73/0x150 ? __pfx_mem_allocator_disconnect+0x10/0x10 ? mark_held_locks+0xa5/0xf0 ? rcu_is_watching+0x11/0xb0 page_pool_release+0x36e/0x6d0 page_pool_destroy+0xd7/0x440 xdp_unreg_mem_model+0x1a7/0x2a0 ? __pfx_xdp_unreg_mem_model+0x10/0x10 ? kfree+0x125/0x370 ? bnxt_free_ring.isra.0+0x2eb/0x500 ? bnxt_free_mem+0x5ac/0x2500 xdp_rxq_info_unreg+0x4a/0xd0 bnxt_free_mem+0x1356/0x2500 bnxt_close_nic+0xf0/0x3b0 ? __pfx_bnxt_close_nic+0x10/0x10 ? ethnl_parse_bit+0x2c6/0x6d0 ? __pfx___nla_validate_parse+0x10/0x10 ? __pfx_ethnl_parse_bit+0x10/0x10 bnxt_set_features+0x2a8/0x3e0 __netdev_update_features+0x4dc/0x1370 ? ethnl_parse_bitset+0x4ff/0x750 ? __pfx_ethnl_parse_bitset+0x10/0x10 ? __pfx___netdev_update_features+0x10/0x10 ? mark_held_locks+0xa5/0xf0 ? _raw_spin_unlock_irqrestore+0x42/0x70 ? __pm_runtime_resume+0x7d/0x110 ethnl_set_features+0x32d/0xa20 To fix this problem, it uses rhashtable_lookup_fast() instead of rhashtable_lookup() with rcu_read_lock(). Using xa without rcu_read_lock() here is safe. xa is freed by __xdp_mem_allocator_rcu_free() and this is called by call_rcu() of mem_xa_remove(). The mem_xa_remove() is called by page_pool_destroy() if a reference count reaches 0. The xa is already protected by the reference count mechanism well in the control plane. So removing rcu_read_lock() for page_pool_destroy() is safe. Fixes: c3f812cea0d7 ("page_pool: do not release pool until inflight == 0.") Signed-off-by: Taehee Yoo Reviewed-by: Jakub Kicinski Link: https://patch.msgid.link/20240712095116.3801586-1-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- net/core/xdp.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/xdp.c b/net/core/xdp.c index 022c12059cf2..bcc5551c6424 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -127,10 +127,8 @@ void xdp_unreg_mem_model(struct xdp_mem_info *mem) return; if (type == MEM_TYPE_PAGE_POOL) { - rcu_read_lock(); - xa = rhashtable_lookup(mem_id_ht, &id, mem_id_rht_params); + xa = rhashtable_lookup_fast(mem_id_ht, &id, mem_id_rht_params); page_pool_destroy(xa->page_pool); - rcu_read_unlock(); } } EXPORT_SYMBOL_GPL(xdp_unreg_mem_model); -- cgit v1.2.3-58-ga151 From 0a1868b93fad5938dbcca77286b25bf211c49f7a Mon Sep 17 00:00:00 2001 From: Elliot Ayrey Date: Fri, 12 Jul 2024 13:31:33 +1200 Subject: net: bridge: mst: Check vlan state for egress decision If a port is blocking in the common instance but forwarding in an MST instance, traffic egressing the bridge will be dropped because the state of the common instance is overriding that of the MST instance. Fix this by skipping the port state check in MST mode to allow checking the vlan state via br_allowed_egress(). This is similar to what happens in br_handle_frame_finish() when checking ingress traffic, which was introduced in the change below. Fixes: ec7328b59176 ("net: bridge: mst: Multiple Spanning Tree (MST) mode") Signed-off-by: Elliot Ayrey Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_forward.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index d97064d460dc..e19b583ff2c6 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -25,8 +25,8 @@ static inline int should_deliver(const struct net_bridge_port *p, vg = nbp_vlan_group_rcu(p); return ((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) && - p->state == BR_STATE_FORWARDING && br_allowed_egress(vg, skb) && - nbp_switchdev_allowed_egress(p, skb) && + (br_mst_is_enabled(p->br) || p->state == BR_STATE_FORWARDING) && + br_allowed_egress(vg, skb) && nbp_switchdev_allowed_egress(p, skb) && !br_skb_isolated(p, skb); } -- cgit v1.2.3-58-ga151 From dcfe7673787b4bfea2c213df443d312aa754757b Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 13 Jul 2024 23:16:09 +0200 Subject: net: dsa: tag_sja1105: absorb logic for not overwriting precise info into dsa_8021q_rcv() In both sja1105_rcv() and sja1110_rcv(), we may have precise source port information coming from parallel hardware mechanisms, in addition to the tag_8021q header. Only sja1105_rcv() has extra logic to not overwrite that precise info with what's present in the VLAN tag. This is because sja1110_rcv() gets by, by having a reversed set of checks when assigning skb->dev. When the source port is imprecise (vbid >=1), source_port and switch_id will be set to zeroes by dsa_8021q_rcv(), which might be problematic. But by checking for vbid >= 1 first, sja1110_rcv() fends that off. We would like to make more code common between sja1105_rcv() and sja1110_rcv(), and for that, we need to make sure that sja1110_rcv() also goes through the precise source port preservation logic. Signed-off-by: Vladimir Oltean Signed-off-by: Pawel Dembicki Reviewed-by: Vladimir Oltean Tested-by: Vladimir Oltean Link: https://patch.msgid.link/20240713211620.1125910-4-paweldembicki@gmail.com Signed-off-by: Jakub Kicinski --- net/dsa/tag_8021q.c | 32 +++++++++++++++++++++++++++++--- net/dsa/tag_sja1105.c | 23 +++-------------------- 2 files changed, 32 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index 71b26ae6db39..3cb0293793a5 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -497,9 +497,21 @@ struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *conduit, } EXPORT_SYMBOL_GPL(dsa_tag_8021q_find_port_by_vbid); +/** + * dsa_8021q_rcv - Decode source information from tag_8021q header + * @skb: RX socket buffer + * @source_port: pointer to storage for precise source port information. + * If this is known already from outside tag_8021q, the pre-initialized + * value is preserved. If not known, pass -1. + * @switch_id: similar to source_port. + * @vbid: pointer to storage for imprecise bridge ID. Must be pre-initialized + * with -1. If a positive value is returned, the source_port and switch_id + * are invalid. + */ void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id, int *vbid) { + int tmp_source_port, tmp_switch_id, tmp_vbid; u16 vid, tci; if (skb_vlan_tag_present(skb)) { @@ -513,11 +525,25 @@ void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id, vid = tci & VLAN_VID_MASK; - *source_port = dsa_8021q_rx_source_port(vid); - *switch_id = dsa_8021q_rx_switch_id(vid); + tmp_source_port = dsa_8021q_rx_source_port(vid); + tmp_switch_id = dsa_8021q_rx_switch_id(vid); + tmp_vbid = dsa_tag_8021q_rx_vbid(vid); + + /* Precise source port information is unknown when receiving from a + * VLAN-unaware bridging domain, and tmp_source_port and tmp_switch_id + * are zeroes in this case. + * + * Preserve the source information from hardware-specific mechanisms, + * if available. This allows us to not overwrite a valid source port + * and switch ID with less precise values. + */ + if (tmp_vbid == 0 && *source_port == -1) + *source_port = tmp_source_port; + if (tmp_vbid == 0 && *switch_id == -1) + *switch_id = tmp_switch_id; if (vbid) - *vbid = dsa_tag_8021q_rx_vbid(vid); + *vbid = tmp_vbid; skb->priority = (tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; } diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index 1aba1d05c27a..48886d4b7e3e 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -524,30 +524,13 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb, /* Normal data plane traffic and link-local frames are tagged with * a tag_8021q VLAN which we have to strip */ - if (sja1105_skb_has_tag_8021q(skb)) { - int tmp_source_port = -1, tmp_switch_id = -1; - - sja1105_vlan_rcv(skb, &tmp_source_port, &tmp_switch_id, &vbid, - &vid); - /* Preserve the source information from the INCL_SRCPT option, - * if available. This allows us to not overwrite a valid source - * port and switch ID with zeroes when receiving link-local - * frames from a VLAN-unaware bridged port (non-zero vbid) or a - * VLAN-aware bridged port (non-zero vid). Furthermore, the - * tag_8021q source port information is only of trust when the - * vbid is 0 (precise port). Otherwise, tmp_source_port and - * tmp_switch_id will be zeroes. - */ - if (vbid == 0 && source_port == -1) - source_port = tmp_source_port; - if (vbid == 0 && switch_id == -1) - switch_id = tmp_switch_id; - } else if (source_port == -1 && switch_id == -1) { + if (sja1105_skb_has_tag_8021q(skb)) + sja1105_vlan_rcv(skb, &source_port, &switch_id, &vbid, &vid); + else if (source_port == -1 && switch_id == -1) /* Packets with no source information have no chance of * getting accepted, drop them straight away. */ return NULL; - } if (source_port != -1 && switch_id != -1) skb->dev = dsa_conduit_find_user(netdev, switch_id, source_port); -- cgit v1.2.3-58-ga151 From 0064b863abdc7bd5e209cdbce7619464069458fe Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 13 Jul 2024 23:16:10 +0200 Subject: net: dsa: tag_sja1105: absorb entire sja1105_vlan_rcv() into dsa_8021q_rcv() tag_sja1105 has a wrapper over dsa_8021q_rcv(): sja1105_vlan_rcv(), which determines whether the packet came from a bridge with vlan_filtering=1 (the case resolved via dsa_find_designated_bridge_port_by_vid()), or if it contains a tag_8021q header. Looking at a new tagger implementation for vsc73xx, based also on tag_8021q, it is becoming clear that the logic is needed there as well. So instead of forcing each tagger to wrap around dsa_8021q_rcv(), let's merge the logic into the core. Signed-off-by: Vladimir Oltean Reviewed-by: Vladimir Oltean Tested-by: Vladimir Oltean Signed-off-by: Pawel Dembicki Link: https://patch.msgid.link/20240713211620.1125910-5-paweldembicki@gmail.com Signed-off-by: Jakub Kicinski --- net/dsa/tag_8021q.c | 34 ++++++++++++++++++++++++++++------ net/dsa/tag_8021q.h | 2 +- net/dsa/tag_ocelot_8021q.c | 2 +- net/dsa/tag_sja1105.c | 32 ++++---------------------------- 4 files changed, 34 insertions(+), 36 deletions(-) (limited to 'net') diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index 3cb0293793a5..2d1c554a63ff 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -507,27 +507,48 @@ EXPORT_SYMBOL_GPL(dsa_tag_8021q_find_port_by_vbid); * @vbid: pointer to storage for imprecise bridge ID. Must be pre-initialized * with -1. If a positive value is returned, the source_port and switch_id * are invalid. + * @vid: pointer to storage for original VID, in case tag_8021q decoding failed. + * + * If the packet has a tag_8021q header, decode it and set @source_port, + * @switch_id and @vbid, and strip the header. Otherwise set @vid and keep the + * header in the hwaccel area of the packet. */ void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id, - int *vbid) + int *vbid, int *vid) { int tmp_source_port, tmp_switch_id, tmp_vbid; - u16 vid, tci; + __be16 vlan_proto; + u16 tmp_vid, tci; if (skb_vlan_tag_present(skb)) { + vlan_proto = skb->vlan_proto; tci = skb_vlan_tag_get(skb); __vlan_hwaccel_clear_tag(skb); } else { + struct vlan_ethhdr *hdr = vlan_eth_hdr(skb); + + vlan_proto = hdr->h_vlan_proto; skb_push_rcsum(skb, ETH_HLEN); __skb_vlan_pop(skb, &tci); skb_pull_rcsum(skb, ETH_HLEN); } - vid = tci & VLAN_VID_MASK; + tmp_vid = tci & VLAN_VID_MASK; + if (!vid_is_dsa_8021q(tmp_vid)) { + /* Not a tag_8021q frame, so return the VID to the + * caller for further processing, and put the tag back + */ + if (vid) + *vid = tmp_vid; + + __vlan_hwaccel_put_tag(skb, vlan_proto, tci); + + return; + } - tmp_source_port = dsa_8021q_rx_source_port(vid); - tmp_switch_id = dsa_8021q_rx_switch_id(vid); - tmp_vbid = dsa_tag_8021q_rx_vbid(vid); + tmp_source_port = dsa_8021q_rx_source_port(tmp_vid); + tmp_switch_id = dsa_8021q_rx_switch_id(tmp_vid); + tmp_vbid = dsa_tag_8021q_rx_vbid(tmp_vid); /* Precise source port information is unknown when receiving from a * VLAN-unaware bridging domain, and tmp_source_port and tmp_switch_id @@ -546,5 +567,6 @@ void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id, *vbid = tmp_vbid; skb->priority = (tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; + return; } EXPORT_SYMBOL_GPL(dsa_8021q_rcv); diff --git a/net/dsa/tag_8021q.h b/net/dsa/tag_8021q.h index 41f7167ac520..0c6671d7c1c2 100644 --- a/net/dsa/tag_8021q.h +++ b/net/dsa/tag_8021q.h @@ -14,7 +14,7 @@ struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, u16 tpid, u16 tci); void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id, - int *vbid); + int *vbid, int *vid); struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *conduit, int vbid); diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c index b059381310fe..8e8b1bef6af6 100644 --- a/net/dsa/tag_ocelot_8021q.c +++ b/net/dsa/tag_ocelot_8021q.c @@ -81,7 +81,7 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb, { int src_port, switch_id; - dsa_8021q_rcv(skb, &src_port, &switch_id, NULL); + dsa_8021q_rcv(skb, &src_port, &switch_id, NULL, NULL); skb->dev = dsa_conduit_find_user(netdev, switch_id, src_port); if (!skb->dev) diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index 48886d4b7e3e..7639ccb94d35 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -472,37 +472,14 @@ static bool sja1110_skb_has_inband_control_extension(const struct sk_buff *skb) return ntohs(eth_hdr(skb)->h_proto) == ETH_P_SJA1110; } -/* If the VLAN in the packet is a tag_8021q one, set @source_port and - * @switch_id and strip the header. Otherwise set @vid and keep it in the - * packet. - */ -static void sja1105_vlan_rcv(struct sk_buff *skb, int *source_port, - int *switch_id, int *vbid, u16 *vid) -{ - struct vlan_ethhdr *hdr = vlan_eth_hdr(skb); - u16 vlan_tci; - - if (skb_vlan_tag_present(skb)) - vlan_tci = skb_vlan_tag_get(skb); - else - vlan_tci = ntohs(hdr->h_vlan_TCI); - - if (vid_is_dsa_8021q(vlan_tci & VLAN_VID_MASK)) - return dsa_8021q_rcv(skb, source_port, switch_id, vbid); - - /* Try our best with imprecise RX */ - *vid = vlan_tci & VLAN_VID_MASK; -} - static struct sk_buff *sja1105_rcv(struct sk_buff *skb, struct net_device *netdev) { - int source_port = -1, switch_id = -1, vbid = -1; + int source_port = -1, switch_id = -1, vbid = -1, vid = -1; struct sja1105_meta meta = {0}; struct ethhdr *hdr; bool is_link_local; bool is_meta; - u16 vid; hdr = eth_hdr(skb); is_link_local = sja1105_is_link_local(skb); @@ -525,7 +502,7 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb, * a tag_8021q VLAN which we have to strip */ if (sja1105_skb_has_tag_8021q(skb)) - sja1105_vlan_rcv(skb, &source_port, &switch_id, &vbid, &vid); + dsa_8021q_rcv(skb, &source_port, &switch_id, &vbid, &vid); else if (source_port == -1 && switch_id == -1) /* Packets with no source information have no chance of * getting accepted, drop them straight away. @@ -660,9 +637,8 @@ static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb, static struct sk_buff *sja1110_rcv(struct sk_buff *skb, struct net_device *netdev) { - int source_port = -1, switch_id = -1, vbid = -1; + int source_port = -1, switch_id = -1, vbid = -1, vid = -1; bool host_only = false; - u16 vid = 0; if (sja1110_skb_has_inband_control_extension(skb)) { skb = sja1110_rcv_inband_control_extension(skb, &source_port, @@ -674,7 +650,7 @@ static struct sk_buff *sja1110_rcv(struct sk_buff *skb, /* Packets with in-band control extensions might still have RX VLANs */ if (likely(sja1105_skb_has_tag_8021q(skb))) - sja1105_vlan_rcv(skb, &source_port, &switch_id, &vbid, &vid); + dsa_8021q_rcv(skb, &source_port, &switch_id, &vbid, &vid); if (vbid >= 1) skb->dev = dsa_tag_8021q_find_port_by_vbid(netdev, vbid); -- cgit v1.2.3-58-ga151 From 823e5cc141c6408b470b8302dd8b5082636fbfaf Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 13 Jul 2024 23:16:11 +0200 Subject: net: dsa: tag_sja1105: prefer precise source port info on SJA1110 too Now that dsa_8021q_rcv() handles better the case where we don't overwrite the precise source information if it comes from an external (non-tag_8021q) source, we can now unify the call sequence between sja1105_rcv() and sja1110_rcv(). This is a preparatory change for creating a higher-level wrapper for the entire sequence which will live in tag_8021q. Signed-off-by: Vladimir Oltean Signed-off-by: Pawel Dembicki Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20240713211620.1125910-6-paweldembicki@gmail.com Signed-off-by: Jakub Kicinski --- net/dsa/tag_sja1105.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index 7639ccb94d35..35a6346549f2 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -652,12 +652,12 @@ static struct sk_buff *sja1110_rcv(struct sk_buff *skb, if (likely(sja1105_skb_has_tag_8021q(skb))) dsa_8021q_rcv(skb, &source_port, &switch_id, &vbid, &vid); - if (vbid >= 1) + if (source_port != -1 && switch_id != -1) + skb->dev = dsa_conduit_find_user(netdev, switch_id, source_port); + else if (vbid >= 1) skb->dev = dsa_tag_8021q_find_port_by_vbid(netdev, vbid); - else if (source_port == -1 || switch_id == -1) - skb->dev = dsa_find_designated_bridge_port_by_vid(netdev, vid); else - skb->dev = dsa_conduit_find_user(netdev, switch_id, source_port); + skb->dev = dsa_find_designated_bridge_port_by_vid(netdev, vid); if (!skb->dev) { netdev_warn(netdev, "Couldn't decode source port\n"); return NULL; -- cgit v1.2.3-58-ga151 From d124cf54df6fafa5ab63d3ee40d64bd54487b5cc Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 13 Jul 2024 23:16:12 +0200 Subject: net: dsa: tag_sja1105: refactor skb->dev assignment to dsa_tag_8021q_find_user() A new tagging protocol implementation based on tag_8021q is on the horizon, and it appears that it also has to open-code the complicated logic of finding a source port based on a VLAN header. Create a single dsa_tag_8021q_find_user() and make sja1105 call it. Signed-off-by: Vladimir Oltean Signed-off-by: Pawel Dembicki Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20240713211620.1125910-7-paweldembicki@gmail.com Signed-off-by: Jakub Kicinski --- net/dsa/tag_8021q.c | 19 ++++++++++++++++--- net/dsa/tag_8021q.h | 5 +++-- net/dsa/tag_sja1105.c | 17 +++++------------ 3 files changed, 24 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index 2d1c554a63ff..c0eee113a2b9 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -468,8 +468,8 @@ struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, } EXPORT_SYMBOL_GPL(dsa_8021q_xmit); -struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *conduit, - int vbid) +static struct net_device * +dsa_tag_8021q_find_port_by_vbid(struct net_device *conduit, int vbid) { struct dsa_port *cpu_dp = conduit->dsa_ptr; struct dsa_switch_tree *dst = cpu_dp->dst; @@ -495,7 +495,20 @@ struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *conduit, return NULL; } -EXPORT_SYMBOL_GPL(dsa_tag_8021q_find_port_by_vbid); + +struct net_device *dsa_tag_8021q_find_user(struct net_device *conduit, + int source_port, int switch_id, + int vid, int vbid) +{ + /* Always prefer precise source port information, if available */ + if (source_port != -1 && switch_id != -1) + return dsa_conduit_find_user(conduit, switch_id, source_port); + else if (vbid >= 1) + return dsa_tag_8021q_find_port_by_vbid(conduit, vbid); + + return dsa_find_designated_bridge_port_by_vid(conduit, vid); +} +EXPORT_SYMBOL_GPL(dsa_tag_8021q_find_user); /** * dsa_8021q_rcv - Decode source information from tag_8021q header diff --git a/net/dsa/tag_8021q.h b/net/dsa/tag_8021q.h index 0c6671d7c1c2..27b8906f99ec 100644 --- a/net/dsa/tag_8021q.h +++ b/net/dsa/tag_8021q.h @@ -16,8 +16,9 @@ struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id, int *vbid, int *vid); -struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *conduit, - int vbid); +struct net_device *dsa_tag_8021q_find_user(struct net_device *conduit, + int source_port, int switch_id, + int vid, int vbid); int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds, struct dsa_notifier_tag_8021q_vlan_info *info); diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index 35a6346549f2..3e902af7eea6 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -509,12 +509,8 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb, */ return NULL; - if (source_port != -1 && switch_id != -1) - skb->dev = dsa_conduit_find_user(netdev, switch_id, source_port); - else if (vbid >= 1) - skb->dev = dsa_tag_8021q_find_port_by_vbid(netdev, vbid); - else - skb->dev = dsa_find_designated_bridge_port_by_vid(netdev, vid); + skb->dev = dsa_tag_8021q_find_user(netdev, source_port, switch_id, + vid, vbid); if (!skb->dev) { netdev_warn(netdev, "Couldn't decode source port\n"); return NULL; @@ -652,12 +648,9 @@ static struct sk_buff *sja1110_rcv(struct sk_buff *skb, if (likely(sja1105_skb_has_tag_8021q(skb))) dsa_8021q_rcv(skb, &source_port, &switch_id, &vbid, &vid); - if (source_port != -1 && switch_id != -1) - skb->dev = dsa_conduit_find_user(netdev, switch_id, source_port); - else if (vbid >= 1) - skb->dev = dsa_tag_8021q_find_port_by_vbid(netdev, vbid); - else - skb->dev = dsa_find_designated_bridge_port_by_vid(netdev, vid); + skb->dev = dsa_tag_8021q_find_user(netdev, source_port, switch_id, + vid, vbid); + if (!skb->dev) { netdev_warn(netdev, "Couldn't decode source port\n"); return NULL; -- cgit v1.2.3-58-ga151 From 6c87e1a4792804efce8ab3dfdb6e9ada314ec6dd Mon Sep 17 00:00:00 2001 From: Pawel Dembicki Date: Sat, 13 Jul 2024 23:16:13 +0200 Subject: net: dsa: vsc73xx: introduce tag 8021q for vsc73xx This commit introduces a new tagger based on 802.1q tagging. It's designed for the vsc73xx driver. The VSC73xx family doesn't have any tag support for the RGMII port, but it could be based on VLANs. Reviewed-by: Florian Fainelli Reviewed-by: Vladimir Oltean Signed-off-by: Pawel Dembicki Reviewed-by: Linus Walleij Link: https://patch.msgid.link/20240713211620.1125910-8-paweldembicki@gmail.com Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 2 ++ net/dsa/Kconfig | 6 ++++ net/dsa/Makefile | 1 + net/dsa/tag_vsc73xx_8021q.c | 68 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 77 insertions(+) create mode 100644 net/dsa/tag_vsc73xx_8021q.c (limited to 'net') diff --git a/include/net/dsa.h b/include/net/dsa.h index f9ae3ca66b6f..5a5a03a7b4c3 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -53,6 +53,7 @@ struct tc_action; #define DSA_TAG_PROTO_RTL8_4T_VALUE 25 #define DSA_TAG_PROTO_RZN1_A5PSW_VALUE 26 #define DSA_TAG_PROTO_LAN937X_VALUE 27 +#define DSA_TAG_PROTO_VSC73XX_8021Q_VALUE 28 enum dsa_tag_protocol { DSA_TAG_PROTO_NONE = DSA_TAG_PROTO_NONE_VALUE, @@ -83,6 +84,7 @@ enum dsa_tag_protocol { DSA_TAG_PROTO_RTL8_4T = DSA_TAG_PROTO_RTL8_4T_VALUE, DSA_TAG_PROTO_RZN1_A5PSW = DSA_TAG_PROTO_RZN1_A5PSW_VALUE, DSA_TAG_PROTO_LAN937X = DSA_TAG_PROTO_LAN937X_VALUE, + DSA_TAG_PROTO_VSC73XX_8021Q = DSA_TAG_PROTO_VSC73XX_8021Q_VALUE, }; struct dsa_switch; diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 8d5bf869eb14..2dfe9063613f 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -166,6 +166,12 @@ config NET_DSA_TAG_TRAILER Say Y or M if you want to enable support for tagging frames at with a trailed. e.g. Marvell 88E6060. +config NET_DSA_TAG_VSC73XX_8021Q + tristate "Tag driver for Microchip/Vitesse VSC73xx family of switches, using VLAN" + help + Say Y or M if you want to enable support for tagging frames with a + custom VLAN-based header. + config NET_DSA_TAG_XRS700X tristate "Tag driver for XRS700x switches" help diff --git a/net/dsa/Makefile b/net/dsa/Makefile index 8a1894a42552..555c07cfeb71 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -37,6 +37,7 @@ obj-$(CONFIG_NET_DSA_TAG_RTL8_4) += tag_rtl8_4.o obj-$(CONFIG_NET_DSA_TAG_RZN1_A5PSW) += tag_rzn1_a5psw.o obj-$(CONFIG_NET_DSA_TAG_SJA1105) += tag_sja1105.o obj-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o +obj-$(CONFIG_NET_DSA_TAG_VSC73XX_8021Q) += tag_vsc73xx_8021q.o obj-$(CONFIG_NET_DSA_TAG_XRS700X) += tag_xrs700x.o # for tracing framework to find trace.h diff --git a/net/dsa/tag_vsc73xx_8021q.c b/net/dsa/tag_vsc73xx_8021q.c new file mode 100644 index 000000000000..af121a9aff7f --- /dev/null +++ b/net/dsa/tag_vsc73xx_8021q.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* Copyright (C) 2024 Pawel Dembicki + */ +#include + +#include "tag.h" +#include "tag_8021q.h" + +#define VSC73XX_8021Q_NAME "vsc73xx-8021q" + +static struct sk_buff * +vsc73xx_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + struct dsa_port *dp = dsa_user_to_port(netdev); + u16 queue_mapping = skb_get_queue_mapping(skb); + u16 tx_vid = dsa_tag_8021q_standalone_vid(dp); + u8 pcp; + + if (skb->offload_fwd_mark) { + unsigned int bridge_num = dsa_port_bridge_num_get(dp); + struct net_device *br = dsa_port_bridge_dev_get(dp); + + if (br_vlan_enabled(br)) + return skb; + + tx_vid = dsa_tag_8021q_bridge_vid(bridge_num); + } + + pcp = netdev_txq_to_tc(netdev, queue_mapping); + + return dsa_8021q_xmit(skb, netdev, ETH_P_8021Q, + ((pcp << VLAN_PRIO_SHIFT) | tx_vid)); +} + +static struct sk_buff * +vsc73xx_rcv(struct sk_buff *skb, struct net_device *netdev) +{ + int src_port = -1, switch_id = -1, vbid = -1, vid = -1; + + dsa_8021q_rcv(skb, &src_port, &switch_id, &vbid, &vid); + + skb->dev = dsa_tag_8021q_find_user(netdev, src_port, switch_id, + vid, vbid); + if (!skb->dev) { + dev_warn_ratelimited(&netdev->dev, + "Couldn't decode source port\n"); + return NULL; + } + + dsa_default_offload_fwd_mark(skb); + + return skb; +} + +static const struct dsa_device_ops vsc73xx_8021q_netdev_ops = { + .name = VSC73XX_8021Q_NAME, + .proto = DSA_TAG_PROTO_VSC73XX_8021Q, + .xmit = vsc73xx_xmit, + .rcv = vsc73xx_rcv, + .needed_headroom = VLAN_HLEN, + .promisc_on_conduit = true, +}; + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("DSA tag driver for VSC73XX family of switches, using VLAN"); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_VSC73XX_8021Q, VSC73XX_8021Q_NAME); + +module_dsa_tag_driver(vsc73xx_8021q_netdev_ops); -- cgit v1.2.3-58-ga151 From 85aabd1fe9d6af4dc5d11a2d8be567ec45d1dc5e Mon Sep 17 00:00:00 2001 From: Pawel Dembicki Date: Sat, 13 Jul 2024 23:16:16 +0200 Subject: net: dsa: prepare 'dsa_tag_8021q_bridge_join' for standalone use The 'dsa_tag_8021q_bridge_join' could be used as a generic implementation of the 'ds->ops->port_bridge_join()' function. However, it is necessary to synchronize their arguments. This patch also moves the 'tx_fwd_offload' flag configuration line into 'dsa_tag_8021q_bridge_join' body. Currently, every (sja1105) driver sets it, and the future vsc73xx implementation will also need it for simplification. Suggested-by: Vladimir Oltean Signed-off-by: Pawel Dembicki Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20240713211620.1125910-11-paweldembicki@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/sja1105/sja1105_main.c | 5 ++--- include/linux/dsa/8021q.h | 3 ++- net/dsa/tag_8021q.c | 5 ++++- 3 files changed, 8 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 0c55a29d7dd3..c7282ce3d11c 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -2133,14 +2133,13 @@ static int sja1105_bridge_join(struct dsa_switch *ds, int port, if (rc) return rc; - rc = dsa_tag_8021q_bridge_join(ds, port, bridge); + rc = dsa_tag_8021q_bridge_join(ds, port, bridge, tx_fwd_offload, + extack); if (rc) { sja1105_bridge_member(ds, port, bridge, false); return rc; } - *tx_fwd_offload = true; - return 0; } diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h index 1dda2a13b832..d13aabdeb4b2 100644 --- a/include/linux/dsa/8021q.h +++ b/include/linux/dsa/8021q.h @@ -18,7 +18,8 @@ int dsa_tag_8021q_register(struct dsa_switch *ds, __be16 proto); void dsa_tag_8021q_unregister(struct dsa_switch *ds); int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, int port, - struct dsa_bridge bridge); + struct dsa_bridge bridge, bool *tx_fwd_offload, + struct netlink_ext_ack *extack); void dsa_tag_8021q_bridge_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge); diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index c0eee113a2b9..3ee53e28ec2e 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -286,7 +286,8 @@ int dsa_switch_tag_8021q_vlan_del(struct dsa_switch *ds, * be used for VLAN-unaware bridging. */ int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, int port, - struct dsa_bridge bridge) + struct dsa_bridge bridge, bool *tx_fwd_offload, + struct netlink_ext_ack *extack) { struct dsa_port *dp = dsa_to_port(ds, port); u16 standalone_vid, bridge_vid; @@ -304,6 +305,8 @@ int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, int port, dsa_port_tag_8021q_vlan_del(dp, standalone_vid, false); + *tx_fwd_offload = true; + return 0; } EXPORT_SYMBOL_GPL(dsa_tag_8021q_bridge_join); -- cgit v1.2.3-58-ga151 From 3ba74b2f288bbc17c0c2a58ab219e1df19f80153 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Fri, 21 Jun 2024 16:01:55 +0300 Subject: Bluetooth: hci_core: cleanup struct hci_dev Remove unused and set but otherwise unused 'discovery_old_state' and 'sco_last_tx' members of 'struct hci_dev'. The first one is a leftover after commit 182ee45da083 ("Bluetooth: hci_sync: Rework hci_suspend_notifier"); the second one is originated from ancient 2.4.19 and I was unable to find any actual use since that. Signed-off-by: Dmitry Antipov Reviewed-by: Paul Menzel Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 2 -- net/bluetooth/hci_sync.c | 1 - 2 files changed, 3 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index eaeaf3dc07aa..31020891fc68 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -476,7 +476,6 @@ struct hci_dev { unsigned int iso_pkts; unsigned long acl_last_tx; - unsigned long sco_last_tx; unsigned long le_last_tx; __u8 le_tx_def_phys; @@ -528,7 +527,6 @@ struct hci_dev { struct discovery_state discovery; - int discovery_old_state; bool discovery_paused; int advertising_old_state; bool advertising_paused; diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index ccad43f10415..76b283b8e90d 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -5853,7 +5853,6 @@ static int hci_pause_discovery_sync(struct hci_dev *hdev) return err; hdev->discovery_paused = true; - hdev->discovery_old_state = old_state; hci_discovery_set_state(hdev, DISCOVERY_STOPPED); return 0; -- cgit v1.2.3-58-ga151 From 87be7b189b2c50d4b51512f59e4e97db4eedee8a Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 1 Jul 2024 12:07:46 -0400 Subject: Bluetooth: Fix usage of __hci_cmd_sync_status __hci_cmd_sync_status shall only be used if hci_req_sync_lock is _not_ required which is not the case of hci_dev_cmd so it needs to use hci_cmd_sync_status which uses hci_req_sync_lock internally. Fixes: f1a8f402f13f ("Bluetooth: L2CAP: Fix deadlock") Reported-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_core.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 144e85ebfbdb..db9b9bb7d74d 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -718,8 +718,8 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) switch (cmd) { case HCISETAUTH: - err = __hci_cmd_sync_status(hdev, HCI_OP_WRITE_AUTH_ENABLE, - 1, &dr.dev_opt, HCI_CMD_TIMEOUT); + err = hci_cmd_sync_status(hdev, HCI_OP_WRITE_AUTH_ENABLE, + 1, &dr.dev_opt, HCI_CMD_TIMEOUT); break; case HCISETENCRYPT: @@ -730,23 +730,21 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) if (!test_bit(HCI_AUTH, &hdev->flags)) { /* Auth must be enabled first */ - err = __hci_cmd_sync_status(hdev, - HCI_OP_WRITE_AUTH_ENABLE, - 1, &dr.dev_opt, - HCI_CMD_TIMEOUT); + err = hci_cmd_sync_status(hdev, + HCI_OP_WRITE_AUTH_ENABLE, + 1, &dr.dev_opt, + HCI_CMD_TIMEOUT); if (err) break; } - err = __hci_cmd_sync_status(hdev, HCI_OP_WRITE_ENCRYPT_MODE, - 1, &dr.dev_opt, - HCI_CMD_TIMEOUT); + err = hci_cmd_sync_status(hdev, HCI_OP_WRITE_ENCRYPT_MODE, + 1, &dr.dev_opt, HCI_CMD_TIMEOUT); break; case HCISETSCAN: - err = __hci_cmd_sync_status(hdev, HCI_OP_WRITE_SCAN_ENABLE, - 1, &dr.dev_opt, - HCI_CMD_TIMEOUT); + err = hci_cmd_sync_status(hdev, HCI_OP_WRITE_SCAN_ENABLE, + 1, &dr.dev_opt, HCI_CMD_TIMEOUT); /* Ensure that the connectable and discoverable states * get correctly modified as this was a non-mgmt change. @@ -758,9 +756,8 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) case HCISETLINKPOL: policy = cpu_to_le16(dr.dev_opt); - err = __hci_cmd_sync_status(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, - 2, &policy, - HCI_CMD_TIMEOUT); + err = hci_cmd_sync_status(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, + 2, &policy, HCI_CMD_TIMEOUT); break; case HCISETLINKMODE: -- cgit v1.2.3-58-ga151 From 92048ab2e2e6cc90ad1cc9f55deb5cec4d731793 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 1 Jul 2024 16:00:08 -0400 Subject: Bluetooth: hci_core: Remove usage of hci_req_sync hci_request functions are considered deprecated so this replaces the usage of hci_req_sync with hci_inquiry_sync. Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_sync.h | 2 ++ net/bluetooth/hci_core.c | 29 ++++------------------------- net/bluetooth/hci_sync.c | 10 +++++----- 3 files changed, 11 insertions(+), 30 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 20168732f20e..620e6014beb2 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -131,6 +131,8 @@ int hci_update_discoverable(struct hci_dev *hdev); int hci_update_connectable_sync(struct hci_dev *hdev); +int hci_inquiry_sync(struct hci_dev *hdev, u8 length, u8 num_rsp); + int hci_start_discovery_sync(struct hci_dev *hdev); int hci_stop_discovery_sync(struct hci_dev *hdev); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index db9b9bb7d74d..cce755a84ea7 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -312,33 +312,12 @@ static int inquiry_cache_dump(struct hci_dev *hdev, int num, __u8 *buf) return copied; } -static int hci_inq_req(struct hci_request *req, unsigned long opt) -{ - struct hci_inquiry_req *ir = (struct hci_inquiry_req *) opt; - struct hci_dev *hdev = req->hdev; - struct hci_cp_inquiry cp; - - BT_DBG("%s", hdev->name); - - if (test_bit(HCI_INQUIRY, &hdev->flags)) - return 0; - - /* Start Inquiry */ - memcpy(&cp.lap, &ir->lap, 3); - cp.length = ir->length; - cp.num_rsp = ir->num_rsp; - hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp); - - return 0; -} - int hci_inquiry(void __user *arg) { __u8 __user *ptr = arg; struct hci_inquiry_req ir; struct hci_dev *hdev; int err = 0, do_inquiry = 0, max_rsp; - long timeo; __u8 *buf; if (copy_from_user(&ir, ptr, sizeof(ir))) @@ -377,11 +356,11 @@ int hci_inquiry(void __user *arg) } hci_dev_unlock(hdev); - timeo = ir.length * msecs_to_jiffies(2000); - if (do_inquiry) { - err = hci_req_sync(hdev, hci_inq_req, (unsigned long) &ir, - timeo, NULL); + hci_req_sync_lock(hdev); + err = hci_inquiry_sync(hdev, ir.length, ir.num_rsp); + hci_req_sync_unlock(hdev); + if (err < 0) goto done; diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 76b283b8e90d..46b394842a27 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -347,10 +347,9 @@ static int scan_disable_sync(struct hci_dev *hdev, void *data) return hci_scan_disable_sync(hdev); } -static int hci_inquiry_sync(struct hci_dev *hdev, u8 length); static int interleaved_inquiry_sync(struct hci_dev *hdev, void *data) { - return hci_inquiry_sync(hdev, DISCOV_INTERLEAVED_INQUIRY_LEN); + return hci_inquiry_sync(hdev, DISCOV_INTERLEAVED_INQUIRY_LEN, 0); } static void le_scan_disable(struct work_struct *work) @@ -5662,7 +5661,7 @@ int hci_update_connectable_sync(struct hci_dev *hdev) return hci_update_passive_scan_sync(hdev); } -static int hci_inquiry_sync(struct hci_dev *hdev, u8 length) +int hci_inquiry_sync(struct hci_dev *hdev, u8 length, u8 num_rsp) { const u8 giac[3] = { 0x33, 0x8b, 0x9e }; const u8 liac[3] = { 0x00, 0x8b, 0x9e }; @@ -5685,6 +5684,7 @@ static int hci_inquiry_sync(struct hci_dev *hdev, u8 length) memcpy(&cp.lap, giac, sizeof(cp.lap)); cp.length = length; + cp.num_rsp = num_rsp; return __hci_cmd_sync_status(hdev, HCI_OP_INQUIRY, sizeof(cp), &cp, HCI_CMD_TIMEOUT); @@ -5771,7 +5771,7 @@ static int hci_start_interleaved_discovery_sync(struct hci_dev *hdev) if (err) return err; - return hci_inquiry_sync(hdev, DISCOV_BREDR_INQUIRY_LEN); + return hci_inquiry_sync(hdev, DISCOV_BREDR_INQUIRY_LEN, 0); } int hci_start_discovery_sync(struct hci_dev *hdev) @@ -5783,7 +5783,7 @@ int hci_start_discovery_sync(struct hci_dev *hdev) switch (hdev->discovery.type) { case DISCOV_TYPE_BREDR: - return hci_inquiry_sync(hdev, DISCOV_BREDR_INQUIRY_LEN); + return hci_inquiry_sync(hdev, DISCOV_BREDR_INQUIRY_LEN, 0); case DISCOV_TYPE_INTERLEAVED: /* When running simultaneous discovery, the LE scanning time * should occupy the whole discovery time sine BR/EDR inquiry -- cgit v1.2.3-58-ga151 From 176cbeceb5c5a740216a6be3e751e76aaddf94b9 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 1 Jul 2024 16:13:56 -0400 Subject: Bluetooth: hci_core: Don't use hci_prepare_cmd This replaces the instance of hci_prepare_cmd with hci_cmd_sync_alloc since the former is part of hci_request.c which is considered deprecated. Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_sync.h | 4 ++++ net/bluetooth/hci_core.c | 4 ++-- net/bluetooth/hci_sync.c | 5 ++--- 3 files changed, 8 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 620e6014beb2..a8d88247ac89 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -20,6 +20,10 @@ struct hci_cmd_sync_work_entry { }; struct adv_info; + +struct sk_buff *hci_cmd_sync_alloc(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, struct sock *sk); + /* Function with sync suffix shall not be called with hdev->lock held as they * wait the command to complete and in the meantime an event could be received * which could attempt to acquire hdev->lock causing a deadlock. diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index cce755a84ea7..9bdd3e1f8cfc 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3054,7 +3054,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen); - skb = hci_prepare_cmd(hdev, opcode, plen, param); + skb = hci_cmd_sync_alloc(hdev, opcode, plen, param, NULL); if (!skb) { bt_dev_err(hdev, "no memory for command"); return -ENOMEM; @@ -3089,7 +3089,7 @@ int __hci_cmd_send(struct hci_dev *hdev, u16 opcode, u32 plen, return -EINVAL; } - skb = hci_prepare_cmd(hdev, opcode, plen, param); + skb = hci_cmd_sync_alloc(hdev, opcode, plen, param, NULL); if (!skb) { bt_dev_err(hdev, "no memory for command (opcode 0x%4.4x)", opcode); diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 46b394842a27..0c8888448d3c 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -49,9 +49,8 @@ static void hci_cmd_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, wake_up_interruptible(&hdev->req_wait_q); } -static struct sk_buff *hci_cmd_sync_alloc(struct hci_dev *hdev, u16 opcode, - u32 plen, const void *param, - struct sock *sk) +struct sk_buff *hci_cmd_sync_alloc(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, struct sock *sk) { int len = HCI_COMMAND_HDR_SIZE + plen; struct hci_command_hdr *hdr; -- cgit v1.2.3-58-ga151 From 8bedf130c265384fb136b19b20504239240c51bf Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 1 Jul 2024 16:50:30 -0400 Subject: Bluetooth: hci_sync: Move handling of interleave_scan This moves handling of interleave_scan work to hci_sync.c since hci_request.c is deprecated. Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_core.c | 3 +-- net/bluetooth/hci_sync.c | 53 +++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 49 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 9bdd3e1f8cfc..fc0424ae551e 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2552,7 +2552,6 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv) INIT_DELAYED_WORK(&hdev->ncmd_timer, hci_ncmd_timeout); hci_devcd_setup(hdev); - hci_request_setup(hdev); hci_init_sysfs(hdev); discovery_init(hdev); @@ -4074,7 +4073,7 @@ static void hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb) return; } - if (hci_req_status_pend(hdev) && + if (hdev->req_status == HCI_REQ_PEND && !hci_dev_test_and_set_flag(hdev, HCI_CMD_PENDING)) { kfree_skb(hdev->req_skb); hdev->req_skb = skb_clone(hdev->sent_cmd, GFP_KERNEL); diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 0c8888448d3c..63548c4cc1ac 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -566,6 +566,53 @@ unlock: hci_dev_unlock(hdev); } +static bool is_interleave_scanning(struct hci_dev *hdev) +{ + return hdev->interleave_scan_state != INTERLEAVE_SCAN_NONE; +} + +static int hci_passive_scan_sync(struct hci_dev *hdev); + +static void interleave_scan_work(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + interleave_scan.work); + unsigned long timeout; + + if (hdev->interleave_scan_state == INTERLEAVE_SCAN_ALLOWLIST) { + timeout = msecs_to_jiffies(hdev->advmon_allowlist_duration); + } else if (hdev->interleave_scan_state == INTERLEAVE_SCAN_NO_FILTER) { + timeout = msecs_to_jiffies(hdev->advmon_no_filter_duration); + } else { + bt_dev_err(hdev, "unexpected error"); + return; + } + + hci_passive_scan_sync(hdev); + + hci_dev_lock(hdev); + + switch (hdev->interleave_scan_state) { + case INTERLEAVE_SCAN_ALLOWLIST: + bt_dev_dbg(hdev, "next state: allowlist"); + hdev->interleave_scan_state = INTERLEAVE_SCAN_NO_FILTER; + break; + case INTERLEAVE_SCAN_NO_FILTER: + bt_dev_dbg(hdev, "next state: no filter"); + hdev->interleave_scan_state = INTERLEAVE_SCAN_ALLOWLIST; + break; + case INTERLEAVE_SCAN_NONE: + bt_dev_err(hdev, "unexpected error"); + } + + hci_dev_unlock(hdev); + + /* Don't continue interleaving if it was canceled */ + if (is_interleave_scanning(hdev)) + queue_delayed_work(hdev->req_workqueue, + &hdev->interleave_scan, timeout); +} + void hci_cmd_sync_init(struct hci_dev *hdev) { INIT_WORK(&hdev->cmd_sync_work, hci_cmd_sync_work); @@ -577,6 +624,7 @@ void hci_cmd_sync_init(struct hci_dev *hdev) INIT_WORK(&hdev->reenable_adv_work, reenable_adv); INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable); INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire); + INIT_DELAYED_WORK(&hdev->interleave_scan, interleave_scan_work); } static void _hci_cmd_sync_cancel_entry(struct hci_dev *hdev, @@ -2110,11 +2158,6 @@ static void hci_start_interleave_scan(struct hci_dev *hdev) &hdev->interleave_scan, 0); } -static bool is_interleave_scanning(struct hci_dev *hdev) -{ - return hdev->interleave_scan_state != INTERLEAVE_SCAN_NONE; -} - static void cancel_interleave_scan(struct hci_dev *hdev) { bt_dev_dbg(hdev, "cancelling interleave scan"); -- cgit v1.2.3-58-ga151 From f2d89775358606c7ab6b6b6c4a02fe1e8cd270b1 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 1 Jul 2024 16:52:57 -0400 Subject: Bluetooth: hci_sync: Remove remaining dependencies of hci_request This removes the dependencies of hci_req_init and hci_request_cancel_all from hci_sync.c. Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_sync.h | 17 +++++++++++++++++ net/bluetooth/hci_request.h | 17 ----------------- net/bluetooth/hci_sync.c | 14 +++++++++++--- 3 files changed, 28 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index a8d88247ac89..75e052909b5f 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -8,6 +8,23 @@ #define UINT_PTR(_handle) ((void *)((uintptr_t)_handle)) #define PTR_UINT(_ptr) ((uintptr_t)((void *)_ptr)) +#define HCI_REQ_DONE 0 +#define HCI_REQ_PEND 1 +#define HCI_REQ_CANCELED 2 + +#define hci_req_sync_lock(hdev) mutex_lock(&hdev->req_lock) +#define hci_req_sync_unlock(hdev) mutex_unlock(&hdev->req_lock) + +struct hci_request { + struct hci_dev *hdev; + struct sk_buff_head cmd_q; + + /* If something goes wrong when building the HCI request, the error + * value is stored in this field. + */ + int err; +}; + typedef int (*hci_cmd_sync_work_func_t)(struct hci_dev *hdev, void *data); typedef void (*hci_cmd_sync_work_destroy_t)(struct hci_dev *hdev, void *data, int err); diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index c91f2838f542..b730da4a8b47 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -22,23 +22,6 @@ #include -#define HCI_REQ_DONE 0 -#define HCI_REQ_PEND 1 -#define HCI_REQ_CANCELED 2 - -#define hci_req_sync_lock(hdev) mutex_lock(&hdev->req_lock) -#define hci_req_sync_unlock(hdev) mutex_unlock(&hdev->req_lock) - -struct hci_request { - struct hci_dev *hdev; - struct sk_buff_head cmd_q; - - /* If something goes wrong when building the HCI request, the error - * value is stored in this field. - */ - int err; -}; - void hci_req_init(struct hci_request *req, struct hci_dev *hdev); void hci_req_purge(struct hci_request *req); bool hci_req_status_pend(struct hci_dev *hdev); diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 63548c4cc1ac..cd2ed16da8a4 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -12,7 +12,6 @@ #include #include -#include "hci_request.h" #include "hci_codec.h" #include "hci_debugfs.h" #include "smp.h" @@ -146,6 +145,13 @@ static int hci_cmd_sync_run(struct hci_request *req) return 0; } +static void hci_request_init(struct hci_request *req, struct hci_dev *hdev) +{ + skb_queue_head_init(&req->cmd_q); + req->hdev = hdev; + req->err = 0; +} + /* This function requires the caller holds hdev->req_lock. */ struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param, u8 event, u32 timeout, @@ -157,7 +163,7 @@ struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen, bt_dev_dbg(hdev, "Opcode 0x%4.4x", opcode); - hci_req_init(&req, hdev); + hci_request_init(&req, hdev); hci_cmd_sync_add(&req, opcode, plen, param, event, sk); @@ -5056,7 +5062,9 @@ int hci_dev_close_sync(struct hci_dev *hdev) cancel_delayed_work(&hdev->ncmd_timer); cancel_delayed_work(&hdev->le_scan_disable); - hci_request_cancel_all(hdev); + hci_cmd_sync_cancel_sync(hdev, ENODEV); + + cancel_interleave_scan(hdev); if (hdev->adv_instance_timeout) { cancel_delayed_work_sync(&hdev->adv_instance_expire); -- cgit v1.2.3-58-ga151 From 936daee9cf08c5e58c9a0fe687f52adb2d80e87d Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 1 Jul 2024 17:10:41 -0400 Subject: Bluetooth: Remove hci_request.{c,h} This removes hci_request.{c,h} since it shall no longer be used. Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/bluetooth.h | 4 + net/bluetooth/Makefile | 3 +- net/bluetooth/hci_conn.c | 1 - net/bluetooth/hci_core.c | 1 - net/bluetooth/hci_debugfs.c | 1 - net/bluetooth/hci_event.c | 1 - net/bluetooth/hci_request.c | 903 -------------------------------------- net/bluetooth/hci_request.h | 54 --- net/bluetooth/mgmt.c | 1 - net/bluetooth/msft.c | 1 - 10 files changed, 5 insertions(+), 965 deletions(-) delete mode 100644 net/bluetooth/hci_request.c delete mode 100644 net/bluetooth/hci_request.h (limited to 'net') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index b3228bd6cd6b..5d655e109b2c 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -441,6 +441,10 @@ typedef void (*hci_req_complete_t)(struct hci_dev *hdev, u8 status, u16 opcode); typedef void (*hci_req_complete_skb_t)(struct hci_dev *hdev, u8 status, u16 opcode, struct sk_buff *skb); +void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status, + hci_req_complete_t *req_complete, + hci_req_complete_skb_t *req_complete_skb); + #define HCI_REQ_START BIT(0) #define HCI_REQ_SKB BIT(1) diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile index 628d448d78be..5a3835b7dfcd 100644 --- a/net/bluetooth/Makefile +++ b/net/bluetooth/Makefile @@ -14,8 +14,7 @@ bluetooth_6lowpan-y := 6lowpan.o bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \ hci_sock.o hci_sysfs.o l2cap_core.o l2cap_sock.o smp.o lib.o \ - ecdh_helper.o hci_request.o mgmt_util.o mgmt_config.o hci_codec.o \ - eir.o hci_sync.o + ecdh_helper.o mgmt_util.o mgmt_config.o hci_codec.o eir.o hci_sync.o bluetooth-$(CONFIG_DEV_COREDUMP) += coredump.o diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 080053a85b4d..8e48ccd2af30 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -34,7 +34,6 @@ #include #include -#include "hci_request.h" #include "smp.h" #include "eir.h" diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index fc0424ae551e..8a4ebd93adfc 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -40,7 +40,6 @@ #include #include -#include "hci_request.h" #include "hci_debugfs.h" #include "smp.h" #include "leds.h" diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c index ce3ff2fa72e5..f625074d1f00 100644 --- a/net/bluetooth/hci_debugfs.c +++ b/net/bluetooth/hci_debugfs.c @@ -28,7 +28,6 @@ #include #include "smp.h" -#include "hci_request.h" #include "hci_debugfs.h" #define DEFINE_QUIRK_ATTRIBUTE(__name, __quirk) \ diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 4611a67d7dcc..dce8035ca799 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -33,7 +33,6 @@ #include #include -#include "hci_request.h" #include "hci_debugfs.h" #include "hci_codec.h" #include "smp.h" diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c deleted file mode 100644 index efea25eb56ce..000000000000 --- a/net/bluetooth/hci_request.c +++ /dev/null @@ -1,903 +0,0 @@ -/* - BlueZ - Bluetooth protocol stack for Linux - - Copyright (C) 2014 Intel Corporation - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License version 2 as - published by the Free Software Foundation; - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. - IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY - CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, - COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS - SOFTWARE IS DISCLAIMED. -*/ - -#include - -#include -#include -#include - -#include "smp.h" -#include "hci_request.h" -#include "msft.h" -#include "eir.h" - -void hci_req_init(struct hci_request *req, struct hci_dev *hdev) -{ - skb_queue_head_init(&req->cmd_q); - req->hdev = hdev; - req->err = 0; -} - -void hci_req_purge(struct hci_request *req) -{ - skb_queue_purge(&req->cmd_q); -} - -bool hci_req_status_pend(struct hci_dev *hdev) -{ - return hdev->req_status == HCI_REQ_PEND; -} - -static int req_run(struct hci_request *req, hci_req_complete_t complete, - hci_req_complete_skb_t complete_skb) -{ - struct hci_dev *hdev = req->hdev; - struct sk_buff *skb; - unsigned long flags; - - bt_dev_dbg(hdev, "length %u", skb_queue_len(&req->cmd_q)); - - /* If an error occurred during request building, remove all HCI - * commands queued on the HCI request queue. - */ - if (req->err) { - skb_queue_purge(&req->cmd_q); - return req->err; - } - - /* Do not allow empty requests */ - if (skb_queue_empty(&req->cmd_q)) - return -ENODATA; - - skb = skb_peek_tail(&req->cmd_q); - if (complete) { - bt_cb(skb)->hci.req_complete = complete; - } else if (complete_skb) { - bt_cb(skb)->hci.req_complete_skb = complete_skb; - bt_cb(skb)->hci.req_flags |= HCI_REQ_SKB; - } - - spin_lock_irqsave(&hdev->cmd_q.lock, flags); - skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q); - spin_unlock_irqrestore(&hdev->cmd_q.lock, flags); - - queue_work(hdev->workqueue, &hdev->cmd_work); - - return 0; -} - -int hci_req_run(struct hci_request *req, hci_req_complete_t complete) -{ - return req_run(req, complete, NULL); -} - -int hci_req_run_skb(struct hci_request *req, hci_req_complete_skb_t complete) -{ - return req_run(req, NULL, complete); -} - -void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, - struct sk_buff *skb) -{ - bt_dev_dbg(hdev, "result 0x%2.2x", result); - - if (hdev->req_status == HCI_REQ_PEND) { - hdev->req_result = result; - hdev->req_status = HCI_REQ_DONE; - if (skb) { - kfree_skb(hdev->req_skb); - hdev->req_skb = skb_get(skb); - } - wake_up_interruptible(&hdev->req_wait_q); - } -} - -/* Execute request and wait for completion. */ -int __hci_req_sync(struct hci_dev *hdev, int (*func)(struct hci_request *req, - unsigned long opt), - unsigned long opt, u32 timeout, u8 *hci_status) -{ - struct hci_request req; - int err = 0; - - bt_dev_dbg(hdev, "start"); - - hci_req_init(&req, hdev); - - hdev->req_status = HCI_REQ_PEND; - - err = func(&req, opt); - if (err) { - if (hci_status) - *hci_status = HCI_ERROR_UNSPECIFIED; - return err; - } - - err = hci_req_run_skb(&req, hci_req_sync_complete); - if (err < 0) { - hdev->req_status = 0; - - /* ENODATA means the HCI request command queue is empty. - * This can happen when a request with conditionals doesn't - * trigger any commands to be sent. This is normal behavior - * and should not trigger an error return. - */ - if (err == -ENODATA) { - if (hci_status) - *hci_status = 0; - return 0; - } - - if (hci_status) - *hci_status = HCI_ERROR_UNSPECIFIED; - - return err; - } - - err = wait_event_interruptible_timeout(hdev->req_wait_q, - hdev->req_status != HCI_REQ_PEND, timeout); - - if (err == -ERESTARTSYS) - return -EINTR; - - switch (hdev->req_status) { - case HCI_REQ_DONE: - err = -bt_to_errno(hdev->req_result); - if (hci_status) - *hci_status = hdev->req_result; - break; - - case HCI_REQ_CANCELED: - err = -hdev->req_result; - if (hci_status) - *hci_status = HCI_ERROR_UNSPECIFIED; - break; - - default: - err = -ETIMEDOUT; - if (hci_status) - *hci_status = HCI_ERROR_UNSPECIFIED; - break; - } - - kfree_skb(hdev->req_skb); - hdev->req_skb = NULL; - hdev->req_status = hdev->req_result = 0; - - bt_dev_dbg(hdev, "end: err %d", err); - - return err; -} - -int hci_req_sync(struct hci_dev *hdev, int (*req)(struct hci_request *req, - unsigned long opt), - unsigned long opt, u32 timeout, u8 *hci_status) -{ - int ret; - - /* Serialize all requests */ - hci_req_sync_lock(hdev); - /* check the state after obtaing the lock to protect the HCI_UP - * against any races from hci_dev_do_close when the controller - * gets removed. - */ - if (test_bit(HCI_UP, &hdev->flags)) - ret = __hci_req_sync(hdev, req, opt, timeout, hci_status); - else - ret = -ENETDOWN; - hci_req_sync_unlock(hdev); - - return ret; -} - -struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, - const void *param) -{ - int len = HCI_COMMAND_HDR_SIZE + plen; - struct hci_command_hdr *hdr; - struct sk_buff *skb; - - skb = bt_skb_alloc(len, GFP_ATOMIC); - if (!skb) - return NULL; - - hdr = skb_put(skb, HCI_COMMAND_HDR_SIZE); - hdr->opcode = cpu_to_le16(opcode); - hdr->plen = plen; - - if (plen) - skb_put_data(skb, param, plen); - - bt_dev_dbg(hdev, "skb len %d", skb->len); - - hci_skb_pkt_type(skb) = HCI_COMMAND_PKT; - hci_skb_opcode(skb) = opcode; - - return skb; -} - -/* Queue a command to an asynchronous HCI request */ -void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, - const void *param, u8 event) -{ - struct hci_dev *hdev = req->hdev; - struct sk_buff *skb; - - bt_dev_dbg(hdev, "opcode 0x%4.4x plen %d", opcode, plen); - - /* If an error occurred during request building, there is no point in - * queueing the HCI command. We can simply return. - */ - if (req->err) - return; - - skb = hci_prepare_cmd(hdev, opcode, plen, param); - if (!skb) { - bt_dev_err(hdev, "no memory for command (opcode 0x%4.4x)", - opcode); - req->err = -ENOMEM; - return; - } - - if (skb_queue_empty(&req->cmd_q)) - bt_cb(skb)->hci.req_flags |= HCI_REQ_START; - - hci_skb_event(skb) = event; - - skb_queue_tail(&req->cmd_q, skb); -} - -void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, - const void *param) -{ - bt_dev_dbg(req->hdev, "HCI_REQ-0x%4.4x", opcode); - hci_req_add_ev(req, opcode, plen, param, 0); -} - -static void start_interleave_scan(struct hci_dev *hdev) -{ - hdev->interleave_scan_state = INTERLEAVE_SCAN_NO_FILTER; - queue_delayed_work(hdev->req_workqueue, - &hdev->interleave_scan, 0); -} - -static bool is_interleave_scanning(struct hci_dev *hdev) -{ - return hdev->interleave_scan_state != INTERLEAVE_SCAN_NONE; -} - -static void cancel_interleave_scan(struct hci_dev *hdev) -{ - bt_dev_dbg(hdev, "cancelling interleave scan"); - - cancel_delayed_work_sync(&hdev->interleave_scan); - - hdev->interleave_scan_state = INTERLEAVE_SCAN_NONE; -} - -/* Return true if interleave_scan wasn't started until exiting this function, - * otherwise, return false - */ -static bool __hci_update_interleaved_scan(struct hci_dev *hdev) -{ - /* Do interleaved scan only if all of the following are true: - * - There is at least one ADV monitor - * - At least one pending LE connection or one device to be scanned for - * - Monitor offloading is not supported - * If so, we should alternate between allowlist scan and one without - * any filters to save power. - */ - bool use_interleaving = hci_is_adv_monitoring(hdev) && - !(list_empty(&hdev->pend_le_conns) && - list_empty(&hdev->pend_le_reports)) && - hci_get_adv_monitor_offload_ext(hdev) == - HCI_ADV_MONITOR_EXT_NONE; - bool is_interleaving = is_interleave_scanning(hdev); - - if (use_interleaving && !is_interleaving) { - start_interleave_scan(hdev); - bt_dev_dbg(hdev, "starting interleave scan"); - return true; - } - - if (!use_interleaving && is_interleaving) - cancel_interleave_scan(hdev); - - return false; -} - -void hci_req_add_le_scan_disable(struct hci_request *req, bool rpa_le_conn) -{ - struct hci_dev *hdev = req->hdev; - - if (hdev->scanning_paused) { - bt_dev_dbg(hdev, "Scanning is paused for suspend"); - return; - } - - if (use_ext_scan(hdev)) { - struct hci_cp_le_set_ext_scan_enable cp; - - memset(&cp, 0, sizeof(cp)); - cp.enable = LE_SCAN_DISABLE; - hci_req_add(req, HCI_OP_LE_SET_EXT_SCAN_ENABLE, sizeof(cp), - &cp); - } else { - struct hci_cp_le_set_scan_enable cp; - - memset(&cp, 0, sizeof(cp)); - cp.enable = LE_SCAN_DISABLE; - hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); - } - - /* Disable address resolution */ - if (hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION) && !rpa_le_conn) { - __u8 enable = 0x00; - - hci_req_add(req, HCI_OP_LE_SET_ADDR_RESOLV_ENABLE, 1, &enable); - } -} - -static void del_from_accept_list(struct hci_request *req, bdaddr_t *bdaddr, - u8 bdaddr_type) -{ - struct hci_cp_le_del_from_accept_list cp; - - cp.bdaddr_type = bdaddr_type; - bacpy(&cp.bdaddr, bdaddr); - - bt_dev_dbg(req->hdev, "Remove %pMR (0x%x) from accept list", &cp.bdaddr, - cp.bdaddr_type); - hci_req_add(req, HCI_OP_LE_DEL_FROM_ACCEPT_LIST, sizeof(cp), &cp); - - if (use_ll_privacy(req->hdev)) { - struct smp_irk *irk; - - irk = hci_find_irk_by_addr(req->hdev, bdaddr, bdaddr_type); - if (irk) { - struct hci_cp_le_del_from_resolv_list cp; - - cp.bdaddr_type = bdaddr_type; - bacpy(&cp.bdaddr, bdaddr); - - hci_req_add(req, HCI_OP_LE_DEL_FROM_RESOLV_LIST, - sizeof(cp), &cp); - } - } -} - -/* Adds connection to accept list if needed. On error, returns -1. */ -static int add_to_accept_list(struct hci_request *req, - struct hci_conn_params *params, u8 *num_entries, - bool allow_rpa) -{ - struct hci_cp_le_add_to_accept_list cp; - struct hci_dev *hdev = req->hdev; - - /* Already in accept list */ - if (hci_bdaddr_list_lookup(&hdev->le_accept_list, ¶ms->addr, - params->addr_type)) - return 0; - - /* Select filter policy to accept all advertising */ - if (*num_entries >= hdev->le_accept_list_size) - return -1; - - /* Accept list can not be used with RPAs */ - if (!allow_rpa && - !hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY) && - hci_find_irk_by_addr(hdev, ¶ms->addr, params->addr_type)) { - return -1; - } - - /* During suspend, only wakeable devices can be in accept list */ - if (hdev->suspended && - !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP)) - return 0; - - *num_entries += 1; - cp.bdaddr_type = params->addr_type; - bacpy(&cp.bdaddr, ¶ms->addr); - - bt_dev_dbg(hdev, "Add %pMR (0x%x) to accept list", &cp.bdaddr, - cp.bdaddr_type); - hci_req_add(req, HCI_OP_LE_ADD_TO_ACCEPT_LIST, sizeof(cp), &cp); - - if (use_ll_privacy(hdev)) { - struct smp_irk *irk; - - irk = hci_find_irk_by_addr(hdev, ¶ms->addr, - params->addr_type); - if (irk) { - struct hci_cp_le_add_to_resolv_list cp; - - cp.bdaddr_type = params->addr_type; - bacpy(&cp.bdaddr, ¶ms->addr); - memcpy(cp.peer_irk, irk->val, 16); - - if (hci_dev_test_flag(hdev, HCI_PRIVACY)) - memcpy(cp.local_irk, hdev->irk, 16); - else - memset(cp.local_irk, 0, 16); - - hci_req_add(req, HCI_OP_LE_ADD_TO_RESOLV_LIST, - sizeof(cp), &cp); - } - } - - return 0; -} - -static u8 update_accept_list(struct hci_request *req) -{ - struct hci_dev *hdev = req->hdev; - struct hci_conn_params *params; - struct bdaddr_list *b; - u8 num_entries = 0; - bool pend_conn, pend_report; - /* We allow usage of accept list even with RPAs in suspend. In the worst - * case, we won't be able to wake from devices that use the privacy1.2 - * features. Additionally, once we support privacy1.2 and IRK - * offloading, we can update this to also check for those conditions. - */ - bool allow_rpa = hdev->suspended; - - if (use_ll_privacy(hdev)) - allow_rpa = true; - - /* Go through the current accept list programmed into the - * controller one by one and check if that address is still - * in the list of pending connections or list of devices to - * report. If not present in either list, then queue the - * command to remove it from the controller. - */ - list_for_each_entry(b, &hdev->le_accept_list, list) { - pend_conn = hci_pend_le_action_lookup(&hdev->pend_le_conns, - &b->bdaddr, - b->bdaddr_type); - pend_report = hci_pend_le_action_lookup(&hdev->pend_le_reports, - &b->bdaddr, - b->bdaddr_type); - - /* If the device is not likely to connect or report, - * remove it from the accept list. - */ - if (!pend_conn && !pend_report) { - del_from_accept_list(req, &b->bdaddr, b->bdaddr_type); - continue; - } - - /* Accept list can not be used with RPAs */ - if (!allow_rpa && - !hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY) && - hci_find_irk_by_addr(hdev, &b->bdaddr, b->bdaddr_type)) { - return 0x00; - } - - num_entries++; - } - - /* Since all no longer valid accept list entries have been - * removed, walk through the list of pending connections - * and ensure that any new device gets programmed into - * the controller. - * - * If the list of the devices is larger than the list of - * available accept list entries in the controller, then - * just abort and return filer policy value to not use the - * accept list. - */ - list_for_each_entry(params, &hdev->pend_le_conns, action) { - if (add_to_accept_list(req, params, &num_entries, allow_rpa)) - return 0x00; - } - - /* After adding all new pending connections, walk through - * the list of pending reports and also add these to the - * accept list if there is still space. Abort if space runs out. - */ - list_for_each_entry(params, &hdev->pend_le_reports, action) { - if (add_to_accept_list(req, params, &num_entries, allow_rpa)) - return 0x00; - } - - /* Use the allowlist unless the following conditions are all true: - * - We are not currently suspending - * - There are 1 or more ADV monitors registered and it's not offloaded - * - Interleaved scanning is not currently using the allowlist - */ - if (!idr_is_empty(&hdev->adv_monitors_idr) && !hdev->suspended && - hci_get_adv_monitor_offload_ext(hdev) == HCI_ADV_MONITOR_EXT_NONE && - hdev->interleave_scan_state != INTERLEAVE_SCAN_ALLOWLIST) - return 0x00; - - /* Select filter policy to use accept list */ - return 0x01; -} - -static bool scan_use_rpa(struct hci_dev *hdev) -{ - return hci_dev_test_flag(hdev, HCI_PRIVACY); -} - -static void hci_req_start_scan(struct hci_request *req, u8 type, u16 interval, - u16 window, u8 own_addr_type, u8 filter_policy, - bool filter_dup, bool addr_resolv) -{ - struct hci_dev *hdev = req->hdev; - - if (hdev->scanning_paused) { - bt_dev_dbg(hdev, "Scanning is paused for suspend"); - return; - } - - if (use_ll_privacy(hdev) && addr_resolv) { - u8 enable = 0x01; - - hci_req_add(req, HCI_OP_LE_SET_ADDR_RESOLV_ENABLE, 1, &enable); - } - - /* Use ext scanning if set ext scan param and ext scan enable is - * supported - */ - if (use_ext_scan(hdev)) { - struct hci_cp_le_set_ext_scan_params *ext_param_cp; - struct hci_cp_le_set_ext_scan_enable ext_enable_cp; - struct hci_cp_le_scan_phy_params *phy_params; - u8 data[sizeof(*ext_param_cp) + sizeof(*phy_params) * 2]; - u32 plen; - - ext_param_cp = (void *)data; - phy_params = (void *)ext_param_cp->data; - - memset(ext_param_cp, 0, sizeof(*ext_param_cp)); - ext_param_cp->own_addr_type = own_addr_type; - ext_param_cp->filter_policy = filter_policy; - - plen = sizeof(*ext_param_cp); - - if (scan_1m(hdev) || scan_2m(hdev)) { - ext_param_cp->scanning_phys |= LE_SCAN_PHY_1M; - - memset(phy_params, 0, sizeof(*phy_params)); - phy_params->type = type; - phy_params->interval = cpu_to_le16(interval); - phy_params->window = cpu_to_le16(window); - - plen += sizeof(*phy_params); - phy_params++; - } - - if (scan_coded(hdev)) { - ext_param_cp->scanning_phys |= LE_SCAN_PHY_CODED; - - memset(phy_params, 0, sizeof(*phy_params)); - phy_params->type = type; - phy_params->interval = cpu_to_le16(interval); - phy_params->window = cpu_to_le16(window); - - plen += sizeof(*phy_params); - phy_params++; - } - - hci_req_add(req, HCI_OP_LE_SET_EXT_SCAN_PARAMS, - plen, ext_param_cp); - - memset(&ext_enable_cp, 0, sizeof(ext_enable_cp)); - ext_enable_cp.enable = LE_SCAN_ENABLE; - ext_enable_cp.filter_dup = filter_dup; - - hci_req_add(req, HCI_OP_LE_SET_EXT_SCAN_ENABLE, - sizeof(ext_enable_cp), &ext_enable_cp); - } else { - struct hci_cp_le_set_scan_param param_cp; - struct hci_cp_le_set_scan_enable enable_cp; - - memset(¶m_cp, 0, sizeof(param_cp)); - param_cp.type = type; - param_cp.interval = cpu_to_le16(interval); - param_cp.window = cpu_to_le16(window); - param_cp.own_address_type = own_addr_type; - param_cp.filter_policy = filter_policy; - hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(param_cp), - ¶m_cp); - - memset(&enable_cp, 0, sizeof(enable_cp)); - enable_cp.enable = LE_SCAN_ENABLE; - enable_cp.filter_dup = filter_dup; - hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(enable_cp), - &enable_cp); - } -} - -static void set_random_addr(struct hci_request *req, bdaddr_t *rpa); -static int hci_update_random_address(struct hci_request *req, - bool require_privacy, bool use_rpa, - u8 *own_addr_type) -{ - struct hci_dev *hdev = req->hdev; - int err; - - /* If privacy is enabled use a resolvable private address. If - * current RPA has expired or there is something else than - * the current RPA in use, then generate a new one. - */ - if (use_rpa) { - /* If Controller supports LL Privacy use own address type is - * 0x03 - */ - if (use_ll_privacy(hdev)) - *own_addr_type = ADDR_LE_DEV_RANDOM_RESOLVED; - else - *own_addr_type = ADDR_LE_DEV_RANDOM; - - if (rpa_valid(hdev)) - return 0; - - err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa); - if (err < 0) { - bt_dev_err(hdev, "failed to generate new RPA"); - return err; - } - - set_random_addr(req, &hdev->rpa); - - return 0; - } - - /* In case of required privacy without resolvable private address, - * use an non-resolvable private address. This is useful for active - * scanning and non-connectable advertising. - */ - if (require_privacy) { - bdaddr_t nrpa; - - while (true) { - /* The non-resolvable private address is generated - * from random six bytes with the two most significant - * bits cleared. - */ - get_random_bytes(&nrpa, 6); - nrpa.b[5] &= 0x3f; - - /* The non-resolvable private address shall not be - * equal to the public address. - */ - if (bacmp(&hdev->bdaddr, &nrpa)) - break; - } - - *own_addr_type = ADDR_LE_DEV_RANDOM; - set_random_addr(req, &nrpa); - return 0; - } - - /* If forcing static address is in use or there is no public - * address use the static address as random address (but skip - * the HCI command if the current random address is already the - * static one. - * - * In case BR/EDR has been disabled on a dual-mode controller - * and a static address has been configured, then use that - * address instead of the public BR/EDR address. - */ - if (hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR) || - !bacmp(&hdev->bdaddr, BDADDR_ANY) || - (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) && - bacmp(&hdev->static_addr, BDADDR_ANY))) { - *own_addr_type = ADDR_LE_DEV_RANDOM; - if (bacmp(&hdev->static_addr, &hdev->random_addr)) - hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, - &hdev->static_addr); - return 0; - } - - /* Neither privacy nor static address is being used so use a - * public address. - */ - *own_addr_type = ADDR_LE_DEV_PUBLIC; - - return 0; -} - -/* Ensure to call hci_req_add_le_scan_disable() first to disable the - * controller based address resolution to be able to reconfigure - * resolving list. - */ -void hci_req_add_le_passive_scan(struct hci_request *req) -{ - struct hci_dev *hdev = req->hdev; - u8 own_addr_type; - u8 filter_policy; - u16 window, interval; - /* Default is to enable duplicates filter */ - u8 filter_dup = LE_SCAN_FILTER_DUP_ENABLE; - /* Background scanning should run with address resolution */ - bool addr_resolv = true; - - if (hdev->scanning_paused) { - bt_dev_dbg(hdev, "Scanning is paused for suspend"); - return; - } - - /* Set require_privacy to false since no SCAN_REQ are send - * during passive scanning. Not using an non-resolvable address - * here is important so that peer devices using direct - * advertising with our address will be correctly reported - * by the controller. - */ - if (hci_update_random_address(req, false, scan_use_rpa(hdev), - &own_addr_type)) - return; - - if (hdev->enable_advmon_interleave_scan && - __hci_update_interleaved_scan(hdev)) - return; - - bt_dev_dbg(hdev, "interleave state %d", hdev->interleave_scan_state); - /* Adding or removing entries from the accept list must - * happen before enabling scanning. The controller does - * not allow accept list modification while scanning. - */ - filter_policy = update_accept_list(req); - - /* When the controller is using random resolvable addresses and - * with that having LE privacy enabled, then controllers with - * Extended Scanner Filter Policies support can now enable support - * for handling directed advertising. - * - * So instead of using filter polices 0x00 (no accept list) - * and 0x01 (accept list enabled) use the new filter policies - * 0x02 (no accept list) and 0x03 (accept list enabled). - */ - if (hci_dev_test_flag(hdev, HCI_PRIVACY) && - (hdev->le_features[0] & HCI_LE_EXT_SCAN_POLICY)) - filter_policy |= 0x02; - - if (hdev->suspended) { - window = hdev->le_scan_window_suspend; - interval = hdev->le_scan_int_suspend; - } else if (hci_is_le_conn_scanning(hdev)) { - window = hdev->le_scan_window_connect; - interval = hdev->le_scan_int_connect; - } else if (hci_is_adv_monitoring(hdev)) { - window = hdev->le_scan_window_adv_monitor; - interval = hdev->le_scan_int_adv_monitor; - - /* Disable duplicates filter when scanning for advertisement - * monitor for the following reasons. - * - * For HW pattern filtering (ex. MSFT), Realtek and Qualcomm - * controllers ignore RSSI_Sampling_Period when the duplicates - * filter is enabled. - * - * For SW pattern filtering, when we're not doing interleaved - * scanning, it is necessary to disable duplicates filter, - * otherwise hosts can only receive one advertisement and it's - * impossible to know if a peer is still in range. - */ - filter_dup = LE_SCAN_FILTER_DUP_DISABLE; - } else { - window = hdev->le_scan_window; - interval = hdev->le_scan_interval; - } - - bt_dev_dbg(hdev, "LE passive scan with accept list = %d", - filter_policy); - hci_req_start_scan(req, LE_SCAN_PASSIVE, interval, window, - own_addr_type, filter_policy, filter_dup, - addr_resolv); -} - -static int hci_req_add_le_interleaved_scan(struct hci_request *req, - unsigned long opt) -{ - struct hci_dev *hdev = req->hdev; - int ret = 0; - - hci_dev_lock(hdev); - - if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) - hci_req_add_le_scan_disable(req, false); - hci_req_add_le_passive_scan(req); - - switch (hdev->interleave_scan_state) { - case INTERLEAVE_SCAN_ALLOWLIST: - bt_dev_dbg(hdev, "next state: allowlist"); - hdev->interleave_scan_state = INTERLEAVE_SCAN_NO_FILTER; - break; - case INTERLEAVE_SCAN_NO_FILTER: - bt_dev_dbg(hdev, "next state: no filter"); - hdev->interleave_scan_state = INTERLEAVE_SCAN_ALLOWLIST; - break; - case INTERLEAVE_SCAN_NONE: - BT_ERR("unexpected error"); - ret = -1; - } - - hci_dev_unlock(hdev); - - return ret; -} - -static void interleave_scan_work(struct work_struct *work) -{ - struct hci_dev *hdev = container_of(work, struct hci_dev, - interleave_scan.work); - u8 status; - unsigned long timeout; - - if (hdev->interleave_scan_state == INTERLEAVE_SCAN_ALLOWLIST) { - timeout = msecs_to_jiffies(hdev->advmon_allowlist_duration); - } else if (hdev->interleave_scan_state == INTERLEAVE_SCAN_NO_FILTER) { - timeout = msecs_to_jiffies(hdev->advmon_no_filter_duration); - } else { - bt_dev_err(hdev, "unexpected error"); - return; - } - - hci_req_sync(hdev, hci_req_add_le_interleaved_scan, 0, - HCI_CMD_TIMEOUT, &status); - - /* Don't continue interleaving if it was canceled */ - if (is_interleave_scanning(hdev)) - queue_delayed_work(hdev->req_workqueue, - &hdev->interleave_scan, timeout); -} - -static void set_random_addr(struct hci_request *req, bdaddr_t *rpa) -{ - struct hci_dev *hdev = req->hdev; - - /* If we're advertising or initiating an LE connection we can't - * go ahead and change the random address at this time. This is - * because the eventual initiator address used for the - * subsequently created connection will be undefined (some - * controllers use the new address and others the one we had - * when the operation started). - * - * In this kind of scenario skip the update and let the random - * address be updated at the next cycle. - */ - if (hci_dev_test_flag(hdev, HCI_LE_ADV) || - hci_lookup_le_connect(hdev)) { - bt_dev_dbg(hdev, "Deferring random address update"); - hci_dev_set_flag(hdev, HCI_RPA_EXPIRED); - return; - } - - hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, rpa); -} - -void hci_request_setup(struct hci_dev *hdev) -{ - INIT_DELAYED_WORK(&hdev->interleave_scan, interleave_scan_work); -} - -void hci_request_cancel_all(struct hci_dev *hdev) -{ - hci_cmd_sync_cancel_sync(hdev, ENODEV); - - cancel_interleave_scan(hdev); -} diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h deleted file mode 100644 index b730da4a8b47..000000000000 --- a/net/bluetooth/hci_request.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - BlueZ - Bluetooth protocol stack for Linux - Copyright (C) 2014 Intel Corporation - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License version 2 as - published by the Free Software Foundation; - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. - IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY - CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, - COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS - SOFTWARE IS DISCLAIMED. -*/ - -#include - -void hci_req_init(struct hci_request *req, struct hci_dev *hdev); -void hci_req_purge(struct hci_request *req); -bool hci_req_status_pend(struct hci_dev *hdev); -int hci_req_run(struct hci_request *req, hci_req_complete_t complete); -int hci_req_run_skb(struct hci_request *req, hci_req_complete_skb_t complete); -void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, - struct sk_buff *skb); -void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, - const void *param); -void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, - const void *param, u8 event); -void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status, - hci_req_complete_t *req_complete, - hci_req_complete_skb_t *req_complete_skb); - -int hci_req_sync(struct hci_dev *hdev, int (*req)(struct hci_request *req, - unsigned long opt), - unsigned long opt, u32 timeout, u8 *hci_status); -int __hci_req_sync(struct hci_dev *hdev, int (*func)(struct hci_request *req, - unsigned long opt), - unsigned long opt, u32 timeout, u8 *hci_status); - -struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, - const void *param); - -void hci_req_add_le_scan_disable(struct hci_request *req, bool rpa_le_conn); -void hci_req_add_le_passive_scan(struct hci_request *req); - -void hci_request_setup(struct hci_dev *hdev); -void hci_request_cancel_all(struct hci_dev *hdev); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 3ab1558ff391..40d4887c7f79 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -33,7 +33,6 @@ #include #include -#include "hci_request.h" #include "smp.h" #include "mgmt_util.h" #include "mgmt_config.h" diff --git a/net/bluetooth/msft.c b/net/bluetooth/msft.c index d039683d3bdd..5a8ccc491b14 100644 --- a/net/bluetooth/msft.c +++ b/net/bluetooth/msft.c @@ -7,7 +7,6 @@ #include #include -#include "hci_request.h" #include "mgmt_util.h" #include "msft.h" -- cgit v1.2.3-58-ga151 From 2dd35600590148d843367c04975acad3c1a527c3 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Tue, 9 Jul 2024 15:53:36 +0200 Subject: net: Change the API of PHY default timestamp to MAC Change the API to select MAC default time stamping instead of the PHY. Indeed the PHY is closer to the wire therefore theoretically it has less delay than the MAC timestamping but the reality is different. Due to lower time stamping clock frequency, latency in the MDIO bus and no PHC hardware synchronization between different PHY, the PHY PTP is often less precise than the MAC. The exception is for PHY designed specially for PTP case but these devices are not very widespread. For not breaking the compatibility default_timestamp flag has been introduced in phy_device that is set by the phy driver to know we are using the old API behavior. Reviewed-by: Rahul Rameshbabu Signed-off-by: Kory Maincent Link: https://patch.msgid.link/20240709-feature_ptp_netnext-v17-4-b5317f50df2a@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/bcm-phy-ptp.c | 3 +++ drivers/net/phy/dp83640.c | 2 ++ drivers/net/phy/micrel.c | 6 ++++++ drivers/net/phy/mscc/mscc_ptp.c | 3 +++ drivers/net/phy/nxp-c45-tja11xx.c | 3 +++ include/linux/phy.h | 19 +++++++++++++++++++ net/core/dev_ioctl.c | 8 +++----- net/core/timestamping.c | 5 +++-- net/ethtool/common.c | 2 +- 9 files changed, 43 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/drivers/net/phy/bcm-phy-ptp.c b/drivers/net/phy/bcm-phy-ptp.c index 617d384d4551..d3e825c951ee 100644 --- a/drivers/net/phy/bcm-phy-ptp.c +++ b/drivers/net/phy/bcm-phy-ptp.c @@ -931,6 +931,9 @@ struct bcm_ptp_private *bcm_ptp_probe(struct phy_device *phydev) return ERR_CAST(clock); priv->ptp_clock = clock; + /* Timestamp selected by default to keep legacy API */ + phydev->default_timestamp = true; + priv->phydev = phydev; bcm_ptp_init(priv); diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index 5c42c47dc564..d3e72d5c1472 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -1447,6 +1447,8 @@ static int dp83640_probe(struct phy_device *phydev) for (i = 0; i < MAX_RXTS; i++) list_add(&dp83640->rx_pool_data[i].list, &dp83640->rxpool); + /* Timestamp selected by default to keep legacy API */ + phydev->default_timestamp = true; phydev->mii_ts = &dp83640->mii_ts; phydev->priv = dp83640; diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index ebafedde0ab7..8d57225d8575 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -3781,6 +3781,9 @@ static void lan8814_ptp_init(struct phy_device *phydev) ptp_priv->mii_ts.ts_info = lan8814_ts_info; phydev->mii_ts = &ptp_priv->mii_ts; + + /* Timestamp selected by default to keep legacy API */ + phydev->default_timestamp = true; } static int lan8814_ptp_probe_once(struct phy_device *phydev) @@ -5279,6 +5282,9 @@ static int lan8841_probe(struct phy_device *phydev) phydev->mii_ts = &ptp_priv->mii_ts; + /* Timestamp selected by default to keep legacy API */ + phydev->default_timestamp = true; + return 0; } diff --git a/drivers/net/phy/mscc/mscc_ptp.c b/drivers/net/phy/mscc/mscc_ptp.c index eb0b032cb613..e66d20eff7c4 100644 --- a/drivers/net/phy/mscc/mscc_ptp.c +++ b/drivers/net/phy/mscc/mscc_ptp.c @@ -1570,6 +1570,9 @@ int vsc8584_ptp_probe(struct phy_device *phydev) return PTR_ERR(vsc8531->load_save); } + /* Timestamp selected by default to keep legacy API */ + phydev->default_timestamp = true; + vsc8531->ptp->phydev = phydev; return 0; diff --git a/drivers/net/phy/nxp-c45-tja11xx.c b/drivers/net/phy/nxp-c45-tja11xx.c index 3cf614b4cd52..d18c133e6013 100644 --- a/drivers/net/phy/nxp-c45-tja11xx.c +++ b/drivers/net/phy/nxp-c45-tja11xx.c @@ -1660,6 +1660,9 @@ static int nxp_c45_probe(struct phy_device *phydev) priv->mii_ts.ts_info = nxp_c45_ts_info; phydev->mii_ts = &priv->mii_ts; ret = nxp_c45_init_ptp_clock(priv); + + /* Timestamp selected by default to keep legacy API */ + phydev->default_timestamp = true; } else { phydev_dbg(phydev, "PTP support not enabled even if the phy supports it"); } diff --git a/include/linux/phy.h b/include/linux/phy.h index bd68f9d8e74f..e7a38137211c 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -616,6 +616,8 @@ struct macsec_ops; * handling shall be postponed until PHY has resumed * @irq_rerun: Flag indicating interrupts occurred while PHY was suspended, * requiring a rerun of the interrupt handler after resume + * @default_timestamp: Flag indicating whether we are using the phy + * timestamp as the default one * @interface: enum phy_interface_t value * @possible_interfaces: bitmap if interface modes that the attached PHY * will switch between depending on media speed. @@ -681,6 +683,8 @@ struct phy_device { unsigned irq_suspended:1; unsigned irq_rerun:1; + unsigned default_timestamp:1; + int rate_matching; enum phy_state state; @@ -1625,6 +1629,21 @@ static inline void phy_txtstamp(struct phy_device *phydev, struct sk_buff *skb, phydev->mii_ts->txtstamp(phydev->mii_ts, skb, type); } +/** + * phy_is_default_hwtstamp - Is the PHY hwtstamp the default timestamp + * @phydev: Pointer to phy_device + * + * This is used to get default timestamping device taking into account + * the new API choice, which is selecting the timestamping from MAC by + * default if the phydev does not have default_timestamp flag enabled. + * + * Return: True if phy is the default hw timestamp, false otherwise. + */ +static inline bool phy_is_default_hwtstamp(struct phy_device *phydev) +{ + return phy_has_hwtstamp(phydev) && phydev->default_timestamp; +} + /** * phy_is_internal - Convenience function for testing if a PHY is internal * @phydev: the phy_device struct diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index b9719ed3c3fd..8592c052c0f4 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -259,9 +259,7 @@ static int dev_eth_ioctl(struct net_device *dev, * @dev: Network device * @cfg: Timestamping configuration structure * - * Helper for enforcing a common policy that phylib timestamping, if available, - * should take precedence in front of hardware timestamping provided by the - * netdev. + * Helper for calling the default hardware provider timestamping. * * Note: phy_mii_ioctl() only handles SIOCSHWTSTAMP (not SIOCGHWTSTAMP), and * there only exists a phydev->mii_ts->hwtstamp() method. So this will return @@ -271,7 +269,7 @@ static int dev_eth_ioctl(struct net_device *dev, static int dev_get_hwtstamp_phylib(struct net_device *dev, struct kernel_hwtstamp_config *cfg) { - if (phy_has_hwtstamp(dev->phydev)) + if (phy_is_default_hwtstamp(dev->phydev)) return phy_hwtstamp_get(dev->phydev, cfg); return dev->netdev_ops->ndo_hwtstamp_get(dev, cfg); @@ -327,7 +325,7 @@ int dev_set_hwtstamp_phylib(struct net_device *dev, struct netlink_ext_ack *extack) { const struct net_device_ops *ops = dev->netdev_ops; - bool phy_ts = phy_has_hwtstamp(dev->phydev); + bool phy_ts = phy_is_default_hwtstamp(dev->phydev); struct kernel_hwtstamp_config old_cfg = {}; bool changed = false; int err; diff --git a/net/core/timestamping.c b/net/core/timestamping.c index 04840697fe79..3717fb152ecc 100644 --- a/net/core/timestamping.c +++ b/net/core/timestamping.c @@ -25,7 +25,8 @@ void skb_clone_tx_timestamp(struct sk_buff *skb) struct sk_buff *clone; unsigned int type; - if (!skb->sk) + if (!skb->sk || !skb->dev || + !phy_is_default_hwtstamp(skb->dev->phydev)) return; type = classify(skb); @@ -47,7 +48,7 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb) struct mii_timestamper *mii_ts; unsigned int type; - if (!skb->dev || !skb->dev->phydev || !skb->dev->phydev->mii_ts) + if (!skb->dev || !phy_is_default_hwtstamp(skb->dev->phydev)) return false; if (skb_headroom(skb) < ETH_HLEN) diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 6b2a360dcdf0..01b7550f12c6 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -637,7 +637,7 @@ int __ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info) memset(info, 0, sizeof(*info)); info->cmd = ETHTOOL_GET_TS_INFO; - if (phy_has_tsinfo(phydev)) + if (phy_is_default_hwtstamp(phydev) && phy_has_tsinfo(phydev)) return phy_ts_info(phydev, info); if (ops->get_ts_info) return ops->get_ts_info(dev, info); -- cgit v1.2.3-58-ga151 From 2111375b85ad173d58e7b8604246a3de60950ac8 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Tue, 9 Jul 2024 15:53:38 +0200 Subject: net: Add struct kernel_ethtool_ts_info In prevision to add new UAPI for hwtstamp we will be limited to the struct ethtool_ts_info that is currently passed in fixed binary format through the ETHTOOL_GET_TS_INFO ethtool ioctl. It would be good if new kernel code already started operating on an extensible kernel variant of that structure, similar in concept to struct kernel_hwtstamp_config vs struct hwtstamp_config. Since struct ethtool_ts_info is in include/uapi/linux/ethtool.h, here we introduce the kernel-only structure in include/linux/ethtool.h. The manual copy is then made in the function called by ETHTOOL_GET_TS_INFO. Acked-by: Shannon Nelson Acked-by: Alexandra Winter Signed-off-by: Kory Maincent Link: https://patch.msgid.link/20240709-feature_ptp_netnext-v17-6-b5317f50df2a@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_main.c | 4 ++-- drivers/net/can/dev/dev.c | 2 +- drivers/net/can/peak_canfd/peak_canfd.c | 2 +- drivers/net/can/usb/gs_usb.c | 2 +- drivers/net/can/usb/peak_usb/pcan_usb_core.c | 2 +- drivers/net/can/usb/peak_usb/pcan_usb_core.h | 2 +- drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c | 2 +- drivers/net/dsa/hirschmann/hellcreek_hwtstamp.h | 2 +- drivers/net/dsa/microchip/ksz_ptp.c | 2 +- drivers/net/dsa/microchip/ksz_ptp.h | 2 +- drivers/net/dsa/mv88e6xxx/hwtstamp.c | 2 +- drivers/net/dsa/mv88e6xxx/hwtstamp.h | 4 ++-- drivers/net/dsa/ocelot/felix.c | 2 +- drivers/net/dsa/sja1105/sja1105_ptp.c | 2 +- drivers/net/dsa/sja1105/sja1105_ptp.h | 2 +- drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c | 2 +- .../net/ethernet/aquantia/atlantic/aq_ethtool.c | 2 +- .../net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 2 +- drivers/net/ethernet/broadcom/tg3.c | 2 +- drivers/net/ethernet/cadence/macb.h | 2 +- drivers/net/ethernet/cadence/macb_main.c | 4 ++-- drivers/net/ethernet/cavium/liquidio/lio_ethtool.c | 2 +- .../net/ethernet/cavium/thunder/nicvf_ethtool.c | 2 +- drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c | 2 +- drivers/net/ethernet/cisco/enic/enic_ethtool.c | 2 +- drivers/net/ethernet/engleder/tsnep_ethtool.c | 2 +- drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c | 2 +- .../net/ethernet/freescale/dpaa2/dpaa2-ethtool.c | 2 +- .../net/ethernet/freescale/enetc/enetc_ethtool.c | 2 +- drivers/net/ethernet/freescale/fec_main.c | 2 +- drivers/net/ethernet/freescale/gianfar_ethtool.c | 2 +- .../net/ethernet/fungible/funeth/funeth_ethtool.c | 2 +- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 2 +- drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c | 2 +- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c | 2 +- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h | 2 +- drivers/net/ethernet/intel/e1000e/ethtool.c | 2 +- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 2 +- drivers/net/ethernet/intel/ice/ice_ethtool.c | 2 +- drivers/net/ethernet/intel/igb/igb_ethtool.c | 2 +- drivers/net/ethernet/intel/igc/igc_ethtool.c | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 2 +- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 2 +- .../ethernet/marvell/octeontx2/nic/otx2_ethtool.c | 2 +- drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 4 ++-- .../ethernet/mellanox/mlx5/core/ipoib/ethtool.c | 2 +- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 2 +- .../net/ethernet/mellanox/mlxsw/spectrum_ethtool.c | 2 +- drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c | 4 ++-- drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h | 10 ++++----- drivers/net/ethernet/microchip/lan743x_ethtool.c | 2 +- .../ethernet/microchip/lan966x/lan966x_ethtool.c | 2 +- .../net/ethernet/microchip/sparx5/sparx5_ethtool.c | 2 +- drivers/net/ethernet/mscc/ocelot_net.c | 2 +- drivers/net/ethernet/mscc/ocelot_ptp.c | 2 +- .../net/ethernet/pensando/ionic/ionic_ethtool.c | 2 +- drivers/net/ethernet/qlogic/qede/qede_ethtool.c | 2 +- drivers/net/ethernet/qlogic/qede/qede_ptp.c | 2 +- drivers/net/ethernet/qlogic/qede/qede_ptp.h | 2 +- drivers/net/ethernet/renesas/ravb_main.c | 2 +- drivers/net/ethernet/renesas/rswitch.c | 2 +- drivers/net/ethernet/renesas/rtsn.c | 2 +- drivers/net/ethernet/sfc/ethtool.c | 2 +- drivers/net/ethernet/sfc/falcon/nic.h | 2 +- drivers/net/ethernet/sfc/ptp.c | 2 +- drivers/net/ethernet/sfc/ptp.h | 5 +++-- drivers/net/ethernet/sfc/siena/ethtool.c | 2 +- drivers/net/ethernet/sfc/siena/ptp.c | 2 +- drivers/net/ethernet/sfc/siena/ptp.h | 4 ++-- .../net/ethernet/stmicro/stmmac/stmmac_ethtool.c | 2 +- drivers/net/ethernet/ti/am65-cpsw-ethtool.c | 2 +- drivers/net/ethernet/ti/cpsw_ethtool.c | 4 ++-- drivers/net/ethernet/ti/cpsw_priv.h | 2 +- drivers/net/ethernet/ti/icssg/icssg_ethtool.c | 2 +- drivers/net/ethernet/ti/netcp_ethss.c | 4 ++-- drivers/net/ethernet/xscale/ixp4xx_eth.c | 2 +- drivers/net/macvlan.c | 2 +- drivers/net/netdevsim/ethtool.c | 2 +- drivers/net/phy/bcm-phy-ptp.c | 2 +- drivers/net/phy/dp83640.c | 2 +- drivers/net/phy/micrel.c | 4 ++-- drivers/net/phy/mscc/mscc_ptp.c | 2 +- drivers/net/phy/nxp-c45-tja11xx.c | 2 +- drivers/ptp/ptp_ines.c | 2 +- drivers/s390/net/qeth_ethtool.c | 2 +- include/linux/can/dev.h | 2 +- include/linux/ethtool.h | 25 +++++++++++++++++++--- include/linux/mii_timestamper.h | 2 +- include/linux/phy.h | 2 +- include/net/dsa.h | 2 +- include/soc/mscc/ocelot.h | 2 +- net/8021q/vlan_dev.c | 2 +- net/dsa/user.c | 2 +- net/ethtool/common.c | 6 +++--- net/ethtool/common.h | 2 +- net/ethtool/ioctl.c | 14 +++++++++--- net/ethtool/tsinfo.c | 6 +++--- net/sched/sch_taprio.c | 2 +- 101 files changed, 151 insertions(+), 123 deletions(-) (limited to 'net') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index d19aabf5d4fb..af9ddd3902cc 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -5755,10 +5755,10 @@ static void bond_ethtool_get_drvinfo(struct net_device *bond_dev, } static int bond_ethtool_get_ts_info(struct net_device *bond_dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct bonding *bond = netdev_priv(bond_dev); - struct ethtool_ts_info ts_info; + struct kernel_ethtool_ts_info ts_info; struct net_device *real_dev; bool sw_tx_support = false; struct list_head *iter; diff --git a/drivers/net/can/dev/dev.c b/drivers/net/can/dev/dev.c index 83e724e0ab87..87828f953073 100644 --- a/drivers/net/can/dev/dev.c +++ b/drivers/net/can/dev/dev.c @@ -376,7 +376,7 @@ EXPORT_SYMBOL(can_eth_ioctl_hwts); * supporting hardware timestamps */ int can_ethtool_op_get_ts_info_hwts(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | diff --git a/drivers/net/can/peak_canfd/peak_canfd.c b/drivers/net/can/peak_canfd/peak_canfd.c index 31c9c127e24b..b50005397463 100644 --- a/drivers/net/can/peak_canfd/peak_canfd.c +++ b/drivers/net/can/peak_canfd/peak_canfd.c @@ -777,7 +777,7 @@ static const struct net_device_ops peak_canfd_netdev_ops = { }; static int peak_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index 340297e3bec7..bc86e9b329fd 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -1148,7 +1148,7 @@ static int gs_usb_set_phys_id(struct net_device *netdev, } static int gs_usb_get_ts_info(struct net_device *netdev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct gs_can *dev = netdev_priv(netdev); diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c index 1efa39e134f4..3d68fef46ded 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c @@ -897,7 +897,7 @@ int peak_usb_set_eeprom(struct net_device *netdev, return 0; } -int pcan_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info) +int pcan_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info) { info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.h b/drivers/net/can/usb/peak_usb/pcan_usb_core.h index f6cf84bb718f..abab00930b9d 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_core.h +++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.h @@ -145,7 +145,7 @@ void peak_usb_get_ts_time(struct peak_time_ref *time_ref, u32 ts, ktime_t *tv); int peak_usb_netif_rx_64(struct sk_buff *skb, u32 ts_low, u32 ts_high); void peak_usb_async_complete(struct urb *urb); void peak_usb_restart_complete(struct peak_usb_device *dev); -int pcan_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info); +int pcan_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info); /* common 32-bit CAN channel ID ethtool management */ int peak_usb_get_eeprom_len(struct net_device *netdev); diff --git a/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c b/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c index bd7aacc71a63..ca2500aba96f 100644 --- a/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c +++ b/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c @@ -16,7 +16,7 @@ #include "hellcreek_ptp.h" int hellcreek_get_ts_info(struct dsa_switch *ds, int port, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct hellcreek *hellcreek = ds->priv; diff --git a/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.h b/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.h index 71af77efb28b..7d88da2134f2 100644 --- a/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.h +++ b/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.h @@ -48,7 +48,7 @@ void hellcreek_port_txtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb); int hellcreek_get_ts_info(struct dsa_switch *ds, int port, - struct ethtool_ts_info *info); + struct kernel_ethtool_ts_info *info); long hellcreek_hwtstamp_work(struct ptp_clock_info *ptp); diff --git a/drivers/net/dsa/microchip/ksz_ptp.c b/drivers/net/dsa/microchip/ksz_ptp.c index 1fe105913c75..f0bd46e5d4ec 100644 --- a/drivers/net/dsa/microchip/ksz_ptp.c +++ b/drivers/net/dsa/microchip/ksz_ptp.c @@ -293,7 +293,7 @@ static int ksz_ptp_enable_mode(struct ksz_device *dev) /* The function is return back the capability of timestamping feature when * requested through ethtool -T utility */ -int ksz_get_ts_info(struct dsa_switch *ds, int port, struct ethtool_ts_info *ts) +int ksz_get_ts_info(struct dsa_switch *ds, int port, struct kernel_ethtool_ts_info *ts) { struct ksz_device *dev = ds->priv; struct ksz_ptp_data *ptp_data; diff --git a/drivers/net/dsa/microchip/ksz_ptp.h b/drivers/net/dsa/microchip/ksz_ptp.h index 0ca8ca4f804e..2f1783c0d723 100644 --- a/drivers/net/dsa/microchip/ksz_ptp.h +++ b/drivers/net/dsa/microchip/ksz_ptp.h @@ -38,7 +38,7 @@ int ksz_ptp_clock_register(struct dsa_switch *ds); void ksz_ptp_clock_unregister(struct dsa_switch *ds); int ksz_get_ts_info(struct dsa_switch *ds, int port, - struct ethtool_ts_info *ts); + struct kernel_ethtool_ts_info *ts); int ksz_hwtstamp_get(struct dsa_switch *ds, int port, struct ifreq *ifr); int ksz_hwtstamp_set(struct dsa_switch *ds, int port, struct ifreq *ifr); void ksz_port_txtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb); diff --git a/drivers/net/dsa/mv88e6xxx/hwtstamp.c b/drivers/net/dsa/mv88e6xxx/hwtstamp.c index 331b4ca089ff..49e6e1355142 100644 --- a/drivers/net/dsa/mv88e6xxx/hwtstamp.c +++ b/drivers/net/dsa/mv88e6xxx/hwtstamp.c @@ -64,7 +64,7 @@ static int mv88e6xxx_ptp_read(struct mv88e6xxx_chip *chip, int addr, #define TX_TSTAMP_TIMEOUT msecs_to_jiffies(40) int mv88e6xxx_get_ts_info(struct dsa_switch *ds, int port, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { const struct mv88e6xxx_ptp_ops *ptp_ops; struct mv88e6xxx_chip *chip; diff --git a/drivers/net/dsa/mv88e6xxx/hwtstamp.h b/drivers/net/dsa/mv88e6xxx/hwtstamp.h index cf7fb6d660b1..85acc758e3eb 100644 --- a/drivers/net/dsa/mv88e6xxx/hwtstamp.h +++ b/drivers/net/dsa/mv88e6xxx/hwtstamp.h @@ -121,7 +121,7 @@ void mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb); int mv88e6xxx_get_ts_info(struct dsa_switch *ds, int port, - struct ethtool_ts_info *info); + struct kernel_ethtool_ts_info *info); int mv88e6xxx_hwtstamp_setup(struct mv88e6xxx_chip *chip); void mv88e6xxx_hwtstamp_free(struct mv88e6xxx_chip *chip); @@ -157,7 +157,7 @@ static inline void mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port, } static inline int mv88e6xxx_get_ts_info(struct dsa_switch *ds, int port, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { return -EOPNOTSUPP; } diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index d12c4e85baa7..e554699f06d4 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -1234,7 +1234,7 @@ static int felix_get_sset_count(struct dsa_switch *ds, int port, int sset) } static int felix_get_ts_info(struct dsa_switch *ds, int port, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct ocelot *ocelot = ds->priv; diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.c b/drivers/net/dsa/sja1105/sja1105_ptp.c index a7d41e781398..a1f4ca6ad888 100644 --- a/drivers/net/dsa/sja1105/sja1105_ptp.c +++ b/drivers/net/dsa/sja1105/sja1105_ptp.c @@ -111,7 +111,7 @@ int sja1105_hwtstamp_get(struct dsa_switch *ds, int port, struct ifreq *ifr) } int sja1105_get_ts_info(struct dsa_switch *ds, int port, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct sja1105_private *priv = ds->priv; struct sja1105_ptp_data *ptp_data = &priv->ptp_data; diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.h b/drivers/net/dsa/sja1105/sja1105_ptp.h index 416461ee95d2..8add2bd5f728 100644 --- a/drivers/net/dsa/sja1105/sja1105_ptp.h +++ b/drivers/net/dsa/sja1105/sja1105_ptp.h @@ -101,7 +101,7 @@ void sja1105pqrs_ptp_cmd_packing(u8 *buf, struct sja1105_ptp_cmd *cmd, enum packing_op op); int sja1105_get_ts_info(struct dsa_switch *ds, int port, - struct ethtool_ts_info *ts); + struct kernel_ethtool_ts_info *ts); void sja1105_ptp_txtstamp_skb(struct dsa_switch *ds, int slot, struct sk_buff *clone); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c index 58e7e88aae5b..21407a26f806 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c @@ -577,7 +577,7 @@ static int xgbe_set_rxfh(struct net_device *netdev, } static int xgbe_get_ts_info(struct net_device *netdev, - struct ethtool_ts_info *ts_info) + struct kernel_ethtool_ts_info *ts_info) { struct xgbe_prv_data *pdata = netdev_priv(netdev); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c index a2606ee3b0a5..d0aecd1d7357 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c @@ -652,7 +652,7 @@ static int aq_ethtool_set_wol(struct net_device *ndev, } static int aq_ethtool_get_ts_info(struct net_device *ndev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct aq_nic_s *aq_nic = netdev_priv(ndev); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index 58956ed8f531..c7b56a5e5425 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -3634,7 +3634,7 @@ static int bnx2x_set_channels(struct net_device *dev, } static int bnx2x_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct bnx2x *bp = netdev_priv(dev); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index bf157f6cc042..fcbf38f79c23 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -5013,7 +5013,7 @@ static int bnxt_get_dump_data(struct net_device *dev, struct ethtool_dump *dump, } static int bnxt_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct bnxt *bp = netdev_priv(dev); struct bnxt_ptp_cfg *ptp; diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 1589a49b876c..0ec5f01551f9 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -6141,7 +6141,7 @@ static void tg3_refclk_write(struct tg3 *tp, u64 newval) static inline void tg3_full_lock(struct tg3 *tp, int irq_sync); static inline void tg3_full_unlock(struct tg3 *tp); -static int tg3_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info) +static int tg3_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info) { struct tg3 *tp = netdev_priv(dev); diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 122663ff7834..ea71612f6b36 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -1168,7 +1168,7 @@ struct macb_ptp_info { s32 (*get_ptp_max_adj)(void); unsigned int (*get_tsu_rate)(struct macb *bp); int (*get_ts_info)(struct net_device *dev, - struct ethtool_ts_info *info); + struct kernel_ethtool_ts_info *info); int (*get_hwtst)(struct net_device *netdev, struct kernel_hwtstamp_config *tstamp_config); int (*set_hwtst)(struct net_device *netdev, diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index cecc3d6e630f..11665be3a22c 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -3399,7 +3399,7 @@ static s32 gem_get_ptp_max_adj(void) } static int gem_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct macb *bp = netdev_priv(dev); @@ -3440,7 +3440,7 @@ static struct macb_ptp_info gem_ptp_info = { #endif static int macb_get_ts_info(struct net_device *netdev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct macb *bp = netdev_priv(netdev); diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c index d3e07b6ed5e1..5835965dbc32 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c @@ -2497,7 +2497,7 @@ ret_intrmod: } static int lio_get_ts_info(struct net_device *netdev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct lio *lio = GET_LIO(netdev); diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c index 34125b8cd935..6a04d2530176 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c @@ -836,7 +836,7 @@ static int nicvf_set_pauseparam(struct net_device *dev, } static int nicvf_get_ts_info(struct net_device *netdev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct nicvf *nic = netdev_priv(netdev); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c index 47eecde36285..3d091947ae00 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c @@ -1550,7 +1550,7 @@ out_free_fw: return ret; } -static int get_ts_info(struct net_device *dev, struct ethtool_ts_info *ts_info) +static int get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *ts_info) { struct port_info *pi = netdev_priv(dev); struct adapter *adapter = pi->adapter; diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c index a42f3f280f3e..f2f1055880b2 100644 --- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c +++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c @@ -599,7 +599,7 @@ static int enic_set_rxfh(struct net_device *netdev, } static int enic_get_ts_info(struct net_device *netdev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | SOF_TIMESTAMPING_RX_SOFTWARE | diff --git a/drivers/net/ethernet/engleder/tsnep_ethtool.c b/drivers/net/ethernet/engleder/tsnep_ethtool.c index 65ec1abc9442..9aa286ba1f00 100644 --- a/drivers/net/ethernet/engleder/tsnep_ethtool.c +++ b/drivers/net/ethernet/engleder/tsnep_ethtool.c @@ -305,7 +305,7 @@ static void tsnep_ethtool_get_channels(struct net_device *netdev, } static int tsnep_ethtool_get_ts_info(struct net_device *netdev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct tsnep_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c index 5bd0b36d1feb..0e923d805732 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c @@ -394,7 +394,7 @@ static int dpaa_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) } static int dpaa_get_ts_info(struct net_device *net_dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct device *dev = net_dev->dev.parent; struct device_node *mac_node = dev->of_node; diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c index e80e9388c71f..7f476519b7ad 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c @@ -794,7 +794,7 @@ int dpaa2_phc_index = -1; EXPORT_SYMBOL(dpaa2_phc_index); static int dpaa2_eth_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { if (!dpaa2_ptp) return ethtool_op_get_ts_info(dev, info); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index f7753ea5b57e..5e684b23c5f5 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -841,7 +841,7 @@ static int enetc_set_coalesce(struct net_device *ndev, } static int enetc_get_ts_info(struct net_device *ndev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { int *phc_idx; diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index fb19295529a2..a923cb95cdc6 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2768,7 +2768,7 @@ static void fec_enet_get_regs(struct net_device *ndev, } static int fec_enet_get_ts_info(struct net_device *ndev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct fec_enet_private *fep = netdev_priv(ndev); diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c index 7a15b9245698..f581402ad740 100644 --- a/drivers/net/ethernet/freescale/gianfar_ethtool.c +++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c @@ -1448,7 +1448,7 @@ static int gfar_get_nfc(struct net_device *dev, struct ethtool_rxnfc *cmd, } static int gfar_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct gfar_private *priv = netdev_priv(dev); struct platform_device *ptp_dev; diff --git a/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c b/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c index 4edd0adfc6c7..7f081e6e8c87 100644 --- a/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c +++ b/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c @@ -1040,7 +1040,7 @@ static int fun_set_rxfh(struct net_device *netdev, } static int fun_get_ts_info(struct net_device *netdev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_RX_HARDWARE | diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 7cebb08bd320..27dbe367f3d3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -786,7 +786,7 @@ struct hnae3_ae_ops { void (*get_rx_hwts)(struct hnae3_handle *handle, struct sk_buff *skb, u32 nsec, u32 sec); int (*get_ts_info)(struct hnae3_handle *handle, - struct ethtool_ts_info *info); + struct kernel_ethtool_ts_info *info); int (*get_link_diagnosis_info)(struct hnae3_handle *handle, u32 *status_code); void (*clean_vf_config)(struct hnae3_ae_dev *ae_dev, int num_vfs); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index 941cb529d671..b1e988347347 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -2009,7 +2009,7 @@ static int hns3_set_tunable(struct net_device *netdev, ETHTOOL_RING_USE_TX_PUSH) static int hns3_get_ts_info(struct net_device *netdev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct hnae3_handle *handle = hns3_get_handle(netdev); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c index 507d7ce26d83..5fff8ed388f8 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c @@ -378,7 +378,7 @@ int hclge_ptp_set_cfg(struct hclge_dev *hdev, struct ifreq *ifr) } int hclge_ptp_get_ts_info(struct hnae3_handle *handle, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h index bbee74cd8404..63483636c074 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h @@ -138,6 +138,6 @@ int hclge_ptp_set_cfg(struct hclge_dev *hdev, struct ifreq *ifr); int hclge_ptp_init(struct hclge_dev *hdev); void hclge_ptp_uninit(struct hclge_dev *hdev); int hclge_ptp_get_ts_info(struct hnae3_handle *handle, - struct ethtool_ts_info *info); + struct kernel_ethtool_ts_info *info); int hclge_ptp_cfg_qry(struct hclge_dev *hdev, u32 *cfg); #endif diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c index 85da20778e0f..9364bc2b4eb1 100644 --- a/drivers/net/ethernet/intel/e1000e/ethtool.c +++ b/drivers/net/ethernet/intel/e1000e/ethtool.c @@ -2263,7 +2263,7 @@ static int e1000e_set_eee(struct net_device *netdev, struct ethtool_keee *edata) } static int e1000e_get_ts_info(struct net_device *netdev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct e1000_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 4e28785c9fb2..1d0d2e526adb 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -2546,7 +2546,7 @@ static void i40e_get_strings(struct net_device *netdev, u32 stringset, } static int i40e_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct i40e_pf *pf = i40e_netdev_to_pf(dev); diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 62c8205fceba..b8f142f04330 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -3434,7 +3434,7 @@ ice_set_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh, } static int -ice_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info) +ice_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info) { struct ice_pf *pf = ice_netdev_to_pf(dev); diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 61d72250c0ed..06b9970dffad 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2381,7 +2381,7 @@ static void igb_get_strings(struct net_device *netdev, u32 stringset, u8 *data) } static int igb_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct igb_adapter *adapter = netdev_priv(dev); diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index 0cd2bd695db1..9c3adb18f922 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -1559,7 +1559,7 @@ static int igc_ethtool_set_channels(struct net_device *netdev, } static int igc_ethtool_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct igc_adapter *adapter = netdev_priv(dev); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 6e6e6f1847b6..4cac76254966 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -3170,7 +3170,7 @@ static int ixgbe_set_rxfh(struct net_device *netdev, } static int ixgbe_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct ixgbe_adapter *adapter = netdev_priv(dev); diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 9adf4301c9b1..8c45ad983abc 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -5259,7 +5259,7 @@ static int mvpp2_get_ts_config(struct mvpp2_port *port, struct ifreq *ifr) } static int mvpp2_ethtool_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct mvpp2_port *port = netdev_priv(dev); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c index 7f786de61014..0db62eb0dab3 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c @@ -954,7 +954,7 @@ static u32 otx2_get_link(struct net_device *netdev) } static int otx2_get_ts_info(struct net_device *netdev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct otx2_nic *pfvf = netdev_priv(netdev); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 0606f18e5bbe..943d6918c2ec 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -1884,7 +1884,7 @@ out: } static int mlx4_en_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 6a343a8f162f..5fd82c67b6ab 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -1191,7 +1191,7 @@ int mlx5e_set_per_queue_coalesce(struct net_device *dev, u32 queue, u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv); u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv); int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, - struct ethtool_ts_info *info); + struct kernel_ethtool_ts_info *info); int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, struct ethtool_flash *flash); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 3320f12ba2db..cbb3945529d7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1658,7 +1658,7 @@ static int mlx5e_set_pauseparam(struct net_device *netdev, } int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct mlx5_core_dev *mdev = priv->mdev; @@ -1682,7 +1682,7 @@ int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, } static int mlx5e_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct mlx5e_priv *priv = netdev_priv(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c index 779d92b762d3..905bdbaffb9a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c @@ -136,7 +136,7 @@ static int mlx5i_get_coalesce(struct net_device *netdev, } static int mlx5i_get_ts_info(struct net_device *netdev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct mlx5e_priv *priv = mlx5i_epriv(netdev); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index bb0586b45c8d..8d3c61287696 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -238,7 +238,7 @@ struct mlxsw_sp_ptp_ops { struct hwtstamp_config *config); void (*shaper_work)(struct work_struct *work); int (*get_ts_info)(struct mlxsw_sp *mlxsw_sp, - struct ethtool_ts_info *info); + struct kernel_ethtool_ts_info *info); int (*get_stats_count)(void); void (*get_stats_strings)(u8 **p); void (*get_stats)(struct mlxsw_sp_port *mlxsw_sp_port, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c index c79da1411d33..2bed8c86b7cf 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c @@ -1082,7 +1082,7 @@ mlxsw_sp_set_module_eeprom_by_page(struct net_device *dev, } static int -mlxsw_sp_get_ts_info(struct net_device *netdev, struct ethtool_ts_info *info) +mlxsw_sp_get_ts_info(struct net_device *netdev, struct kernel_ethtool_ts_info *info) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev); struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c index cbb6c75a6620..5b174cb95eb8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c @@ -1276,7 +1276,7 @@ int mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, } int mlxsw_sp1_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { info->phc_index = ptp_clock_index(mlxsw_sp->clock->ptp); @@ -1661,7 +1661,7 @@ err_get_message_types: } int mlxsw_sp2_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { info->phc_index = ptp_clock_index(mlxsw_sp->clock->ptp); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h index a8b88230959a..769095d4932d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h @@ -11,7 +11,7 @@ struct mlxsw_sp; struct mlxsw_sp_port; struct mlxsw_sp_ptp_clock; -static inline int mlxsw_sp_ptp_get_ts_info_noptp(struct ethtool_ts_info *info) +static inline int mlxsw_sp_ptp_get_ts_info_noptp(struct kernel_ethtool_ts_info *info) { info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_SOFTWARE; @@ -50,7 +50,7 @@ int mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, void mlxsw_sp1_ptp_shaper_work(struct work_struct *work); int mlxsw_sp1_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, - struct ethtool_ts_info *info); + struct kernel_ethtool_ts_info *info); int mlxsw_sp1_get_stats_count(void); void mlxsw_sp1_get_stats_strings(u8 **p); @@ -84,7 +84,7 @@ int mlxsw_sp2_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, struct hwtstamp_config *config); int mlxsw_sp2_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, - struct ethtool_ts_info *info); + struct kernel_ethtool_ts_info *info); int mlxsw_sp2_ptp_txhdr_construct(struct mlxsw_core *mlxsw_core, struct mlxsw_sp_port *mlxsw_sp_port, @@ -152,7 +152,7 @@ static inline void mlxsw_sp1_ptp_shaper_work(struct work_struct *work) } static inline int mlxsw_sp1_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { return mlxsw_sp_ptp_get_ts_info_noptp(info); } @@ -227,7 +227,7 @@ mlxsw_sp2_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, } static inline int mlxsw_sp2_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { return mlxsw_sp_ptp_get_ts_info_noptp(info); } diff --git a/drivers/net/ethernet/microchip/lan743x_ethtool.c b/drivers/net/ethernet/microchip/lan743x_ethtool.c index 0d1740d64676..3a63ec091413 100644 --- a/drivers/net/ethernet/microchip/lan743x_ethtool.c +++ b/drivers/net/ethernet/microchip/lan743x_ethtool.c @@ -1029,7 +1029,7 @@ static int lan743x_ethtool_set_rxfh(struct net_device *netdev, } static int lan743x_ethtool_get_ts_info(struct net_device *netdev, - struct ethtool_ts_info *ts_info) + struct kernel_ethtool_ts_info *ts_info) { struct lan743x_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c b/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c index c0fc85ac5db3..aec7066d83b3 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c @@ -538,7 +538,7 @@ static int lan966x_set_pauseparam(struct net_device *dev, } static int lan966x_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct lan966x_port *port = netdev_priv(dev); struct lan966x *lan966x = port->lan966x; diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c b/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c index a06dc5a9b355..4f800c1a435d 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c @@ -1183,7 +1183,7 @@ static void sparx5_config_port_stats(struct sparx5 *sparx5, int portno) } static int sparx5_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct sparx5_port *port = netdev_priv(dev); struct sparx5 *sparx5 = port->sparx5; diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index 21a87a3fc556..7c9540a71725 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -980,7 +980,7 @@ static int ocelot_port_get_sset_count(struct net_device *dev, int sset) } static int ocelot_port_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct ocelot_port_private *priv = netdev_priv(dev); struct ocelot *ocelot = priv->port.ocelot; diff --git a/drivers/net/ethernet/mscc/ocelot_ptp.c b/drivers/net/ethernet/mscc/ocelot_ptp.c index cb32234a5bf1..b3c28260adf8 100644 --- a/drivers/net/ethernet/mscc/ocelot_ptp.c +++ b/drivers/net/ethernet/mscc/ocelot_ptp.c @@ -580,7 +580,7 @@ int ocelot_hwstamp_set(struct ocelot *ocelot, int port, struct ifreq *ifr) EXPORT_SYMBOL(ocelot_hwstamp_set); int ocelot_get_ts_info(struct ocelot *ocelot, int port, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { info->phc_index = ocelot->ptp_clock ? ptp_clock_index(ocelot->ptp_clock) : -1; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c index 185a03514ae3..4619fd74f3e3 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c @@ -977,7 +977,7 @@ static int ionic_get_module_eeprom(struct net_device *netdev, } static int ionic_get_ts_info(struct net_device *netdev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct ionic_lif *lif = netdev_priv(netdev); struct ionic *ionic = lif->ionic; diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index f497f6ca1018..97b059be1041 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -1137,7 +1137,7 @@ static int qede_set_channels(struct net_device *dev, } static int qede_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct qede_dev *edev = netdev_priv(dev); diff --git a/drivers/net/ethernet/qlogic/qede/qede_ptp.c b/drivers/net/ethernet/qlogic/qede/qede_ptp.c index 747cc5e2bb78..63e3dac4d5f7 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ptp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ptp.c @@ -321,7 +321,7 @@ int qede_ptp_hw_ts(struct qede_dev *edev, struct ifreq *ifr) sizeof(config)) ? -EFAULT : 0; } -int qede_ptp_get_ts_info(struct qede_dev *edev, struct ethtool_ts_info *info) +int qede_ptp_get_ts_info(struct qede_dev *edev, struct kernel_ethtool_ts_info *info) { struct qede_ptp *ptp = edev->ptp; diff --git a/drivers/net/ethernet/qlogic/qede/qede_ptp.h b/drivers/net/ethernet/qlogic/qede/qede_ptp.h index 1db0f021c645..adafc894797e 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ptp.h +++ b/drivers/net/ethernet/qlogic/qede/qede_ptp.h @@ -17,7 +17,7 @@ void qede_ptp_tx_ts(struct qede_dev *edev, struct sk_buff *skb); int qede_ptp_hw_ts(struct qede_dev *edev, struct ifreq *req); void qede_ptp_disable(struct qede_dev *edev); int qede_ptp_enable(struct qede_dev *edev); -int qede_ptp_get_ts_info(struct qede_dev *edev, struct ethtool_ts_info *ts); +int qede_ptp_get_ts_info(struct qede_dev *edev, struct kernel_ethtool_ts_info *ts); static inline void qede_ptp_record_rx_ts(struct qede_dev *edev, union eth_rx_cqe *cqe, diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 6605e4f4af53..c02fb296bf7d 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1737,7 +1737,7 @@ static int ravb_set_ringparam(struct net_device *ndev, } static int ravb_get_ts_info(struct net_device *ndev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct ravb_private *priv = netdev_priv(ndev); const struct ravb_hw_info *hw_info = priv->info; diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index 24c90d8f5a44..ff50e20856ec 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -1809,7 +1809,7 @@ static const struct net_device_ops rswitch_netdev_ops = { .ndo_set_mac_address = eth_mac_addr, }; -static int rswitch_get_ts_info(struct net_device *ndev, struct ethtool_ts_info *info) +static int rswitch_get_ts_info(struct net_device *ndev, struct kernel_ethtool_ts_info *info) { struct rswitch_device *rdev = netdev_priv(ndev); diff --git a/drivers/net/ethernet/renesas/rtsn.c b/drivers/net/ethernet/renesas/rtsn.c index ad69d47463cb..577227c007ab 100644 --- a/drivers/net/ethernet/renesas/rtsn.c +++ b/drivers/net/ethernet/renesas/rtsn.c @@ -1213,7 +1213,7 @@ static const struct net_device_ops rtsn_netdev_ops = { }; static int rtsn_get_ts_info(struct net_device *ndev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct rtsn_private *priv = netdev_priv(ndev); diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index 0f5c68b8bab7..7c887160e2ef 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -226,7 +226,7 @@ static void efx_ethtool_get_fec_stats(struct net_device *net_dev, } static int efx_ethtool_get_ts_info(struct net_device *net_dev, - struct ethtool_ts_info *ts_info) + struct kernel_ethtool_ts_info *ts_info) { struct efx_nic *efx = efx_netdev_priv(net_dev); diff --git a/drivers/net/ethernet/sfc/falcon/nic.h b/drivers/net/ethernet/sfc/falcon/nic.h index 9f413474bd9f..ada6e036fd97 100644 --- a/drivers/net/ethernet/sfc/falcon/nic.h +++ b/drivers/net/ethernet/sfc/falcon/nic.h @@ -297,7 +297,7 @@ static inline struct falcon_board *falcon_board(struct ef4_nic *efx) return &data->board; } -struct ethtool_ts_info; +struct kernel_ethtool_ts_info; extern const struct ef4_nic_type falcon_a1_nic_type; extern const struct ef4_nic_type falcon_b0_nic_type; diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index c3bffbf0ba2b..6fd2fdbaa418 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -1864,7 +1864,7 @@ static int efx_ptp_ts_init(struct efx_nic *efx, struct kernel_hwtstamp_config *i return 0; } -void efx_ptp_get_ts_info(struct efx_nic *efx, struct ethtool_ts_info *ts_info) +void efx_ptp_get_ts_info(struct efx_nic *efx, struct kernel_ethtool_ts_info *ts_info) { struct efx_ptp_data *ptp = efx->ptp_data; struct efx_nic *primary = efx->primary; diff --git a/drivers/net/ethernet/sfc/ptp.h b/drivers/net/ethernet/sfc/ptp.h index 2f30dbb490d2..6946203499ef 100644 --- a/drivers/net/ethernet/sfc/ptp.h +++ b/drivers/net/ethernet/sfc/ptp.h @@ -12,7 +12,7 @@ #include #include "net_driver.h" -struct ethtool_ts_info; +struct kernel_ethtool_ts_info; int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel); void efx_ptp_defer_probe_with_channel(struct efx_nic *efx); struct efx_channel *efx_ptp_channel(struct efx_nic *efx); @@ -23,7 +23,8 @@ int efx_ptp_set_ts_config(struct efx_nic *efx, struct netlink_ext_ack *extack); int efx_ptp_get_ts_config(struct efx_nic *efx, struct kernel_hwtstamp_config *config); -void efx_ptp_get_ts_info(struct efx_nic *efx, struct ethtool_ts_info *ts_info); +void efx_ptp_get_ts_info(struct efx_nic *efx, + struct kernel_ethtool_ts_info *ts_info); bool efx_ptp_is_ptp_tx(struct efx_nic *efx, struct sk_buff *skb); int efx_ptp_get_mode(struct efx_nic *efx); int efx_ptp_change_mode(struct efx_nic *efx, bool enable_wanted, diff --git a/drivers/net/ethernet/sfc/siena/ethtool.c b/drivers/net/ethernet/sfc/siena/ethtool.c index 14dd3893bdef..4c182d4edfc2 100644 --- a/drivers/net/ethernet/sfc/siena/ethtool.c +++ b/drivers/net/ethernet/sfc/siena/ethtool.c @@ -226,7 +226,7 @@ static void efx_ethtool_get_fec_stats(struct net_device *net_dev, } static int efx_ethtool_get_ts_info(struct net_device *net_dev, - struct ethtool_ts_info *ts_info) + struct kernel_ethtool_ts_info *ts_info) { struct efx_nic *efx = netdev_priv(net_dev); diff --git a/drivers/net/ethernet/sfc/siena/ptp.c b/drivers/net/ethernet/sfc/siena/ptp.c index 4b5e2f0ba350..c473a4b6dd44 100644 --- a/drivers/net/ethernet/sfc/siena/ptp.c +++ b/drivers/net/ethernet/sfc/siena/ptp.c @@ -1780,7 +1780,7 @@ static int efx_ptp_ts_init(struct efx_nic *efx, } void efx_siena_ptp_get_ts_info(struct efx_nic *efx, - struct ethtool_ts_info *ts_info) + struct kernel_ethtool_ts_info *ts_info) { struct efx_ptp_data *ptp = efx->ptp_data; struct efx_nic *primary = efx->primary; diff --git a/drivers/net/ethernet/sfc/siena/ptp.h b/drivers/net/ethernet/sfc/siena/ptp.h index 6352f84424f6..b6133e7c5608 100644 --- a/drivers/net/ethernet/sfc/siena/ptp.h +++ b/drivers/net/ethernet/sfc/siena/ptp.h @@ -12,7 +12,7 @@ #include #include "net_driver.h" -struct ethtool_ts_info; +struct kernel_ethtool_ts_info; void efx_siena_ptp_defer_probe_with_channel(struct efx_nic *efx); struct efx_channel *efx_siena_ptp_channel(struct efx_nic *efx); int efx_siena_ptp_set_ts_config(struct efx_nic *efx, @@ -21,7 +21,7 @@ int efx_siena_ptp_set_ts_config(struct efx_nic *efx, int efx_siena_ptp_get_ts_config(struct efx_nic *efx, struct kernel_hwtstamp_config *config); void efx_siena_ptp_get_ts_info(struct efx_nic *efx, - struct ethtool_ts_info *ts_info); + struct kernel_ethtool_ts_info *ts_info); bool efx_siena_ptp_is_ptp_tx(struct efx_nic *efx, struct sk_buff *skb); int efx_siena_ptp_get_mode(struct efx_nic *efx); int efx_siena_ptp_change_mode(struct efx_nic *efx, bool enable_wanted, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index 18468c0228f0..7008219fd88d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -1199,7 +1199,7 @@ static int stmmac_set_channels(struct net_device *dev, } static int stmmac_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct stmmac_priv *priv = netdev_priv(dev); diff --git a/drivers/net/ethernet/ti/am65-cpsw-ethtool.c b/drivers/net/ethernet/ti/am65-cpsw-ethtool.c index a1d0935d1ebe..b60976947da5 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-ethtool.c +++ b/drivers/net/ethernet/ti/am65-cpsw-ethtool.c @@ -692,7 +692,7 @@ static void am65_cpsw_get_eth_mac_stats(struct net_device *ndev, }; static int am65_cpsw_get_ethtool_ts_info(struct net_device *ndev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct am65_cpsw_common *common = am65_ndev_to_common(ndev); unsigned int ptp_v2_filter; diff --git a/drivers/net/ethernet/ti/cpsw_ethtool.c b/drivers/net/ethernet/ti/cpsw_ethtool.c index f7b283353ba2..53ed23d68722 100644 --- a/drivers/net/ethernet/ti/cpsw_ethtool.c +++ b/drivers/net/ethernet/ti/cpsw_ethtool.c @@ -717,7 +717,7 @@ err: } #if IS_ENABLED(CONFIG_TI_CPTS) -int cpsw_get_ts_info(struct net_device *ndev, struct ethtool_ts_info *info) +int cpsw_get_ts_info(struct net_device *ndev, struct kernel_ethtool_ts_info *info) { struct cpsw_common *cpsw = ndev_to_cpsw(ndev); @@ -738,7 +738,7 @@ int cpsw_get_ts_info(struct net_device *ndev, struct ethtool_ts_info *info) return 0; } #else -int cpsw_get_ts_info(struct net_device *ndev, struct ethtool_ts_info *info) +int cpsw_get_ts_info(struct net_device *ndev, struct kernel_ethtool_ts_info *info) { info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | diff --git a/drivers/net/ethernet/ti/cpsw_priv.h b/drivers/net/ethernet/ti/cpsw_priv.h index 7efa72502c86..1f448290b9f4 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.h +++ b/drivers/net/ethernet/ti/cpsw_priv.h @@ -510,6 +510,6 @@ int cpsw_set_ringparam(struct net_device *ndev, int cpsw_set_channels_common(struct net_device *ndev, struct ethtool_channels *chs, cpdma_handler_fn rx_handler); -int cpsw_get_ts_info(struct net_device *ndev, struct ethtool_ts_info *info); +int cpsw_get_ts_info(struct net_device *ndev, struct kernel_ethtool_ts_info *info); #endif /* DRIVERS_NET_ETHERNET_TI_CPSW_PRIV_H_ */ diff --git a/drivers/net/ethernet/ti/icssg/icssg_ethtool.c b/drivers/net/ethernet/ti/icssg/icssg_ethtool.c index c8d0f45cc5b1..524723ced9ed 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_ethtool.c +++ b/drivers/net/ethernet/ti/icssg/icssg_ethtool.c @@ -110,7 +110,7 @@ static void emac_get_ethtool_stats(struct net_device *ndev, } static int emac_get_ts_info(struct net_device *ndev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct prueth_emac *emac = netdev_priv(ndev); diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c index 02cb6474f6dc..d286709ca3b9 100644 --- a/drivers/net/ethernet/ti/netcp_ethss.c +++ b/drivers/net/ethernet/ti/netcp_ethss.c @@ -1999,7 +1999,7 @@ static int keystone_set_link_ksettings(struct net_device *ndev, #if IS_ENABLED(CONFIG_TI_CPTS) static int keystone_get_ts_info(struct net_device *ndev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct netcp_intf *netcp = netdev_priv(ndev); struct gbe_intf *gbe_intf; @@ -2027,7 +2027,7 @@ static int keystone_get_ts_info(struct net_device *ndev, } #else static int keystone_get_ts_info(struct net_device *ndev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c index 8aff6a73ca0a..56df37f8d50a 100644 --- a/drivers/net/ethernet/xscale/ixp4xx_eth.c +++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c @@ -1015,7 +1015,7 @@ static void ixp4xx_get_drvinfo(struct net_device *dev, } static int ixp4xx_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct port *port = netdev_priv(dev); diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 67b7ef2d463f..24298a33e0e9 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1084,7 +1084,7 @@ static int macvlan_ethtool_get_link_ksettings(struct net_device *dev, } static int macvlan_ethtool_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct net_device *real_dev = macvlan_dev_real_dev(dev); diff --git a/drivers/net/netdevsim/ethtool.c b/drivers/net/netdevsim/ethtool.c index 3f9c9327f149..1436905bc106 100644 --- a/drivers/net/netdevsim/ethtool.c +++ b/drivers/net/netdevsim/ethtool.c @@ -148,7 +148,7 @@ nsim_get_fec_stats(struct net_device *dev, struct ethtool_fec_stats *fec_stats) } static int nsim_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct netdevsim *ns = netdev_priv(dev); diff --git a/drivers/net/phy/bcm-phy-ptp.c b/drivers/net/phy/bcm-phy-ptp.c index d3e825c951ee..874a1b64b115 100644 --- a/drivers/net/phy/bcm-phy-ptp.c +++ b/drivers/net/phy/bcm-phy-ptp.c @@ -841,7 +841,7 @@ static int bcm_ptp_hwtstamp(struct mii_timestamper *mii_ts, } static int bcm_ptp_ts_info(struct mii_timestamper *mii_ts, - struct ethtool_ts_info *ts_info) + struct kernel_ethtool_ts_info *ts_info) { struct bcm_ptp_private *priv = mii2priv(mii_ts); diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index d3e72d5c1472..075d2beea716 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -1395,7 +1395,7 @@ static void dp83640_txtstamp(struct mii_timestamper *mii_ts, } static int dp83640_ts_info(struct mii_timestamper *mii_ts, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct dp83640_private *dp83640 = container_of(mii_ts, struct dp83640_private, mii_ts); diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 8d57225d8575..dd519805deee 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -2552,7 +2552,7 @@ static void lan8814_ptp_tx_ts_get(struct phy_device *phydev, *seq_id = lanphy_read_page_reg(phydev, 5, PTP_TX_MSG_HEADER2); } -static int lan8814_ts_info(struct mii_timestamper *mii_ts, struct ethtool_ts_info *info) +static int lan8814_ts_info(struct mii_timestamper *mii_ts, struct kernel_ethtool_ts_info *info) { struct kszphy_ptp_priv *ptp_priv = container_of(mii_ts, struct kszphy_ptp_priv, mii_ts); struct phy_device *phydev = ptp_priv->phydev; @@ -4317,7 +4317,7 @@ static irqreturn_t lan8841_handle_interrupt(struct phy_device *phydev) } static int lan8841_ts_info(struct mii_timestamper *mii_ts, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct kszphy_ptp_priv *ptp_priv; diff --git a/drivers/net/phy/mscc/mscc_ptp.c b/drivers/net/phy/mscc/mscc_ptp.c index e66d20eff7c4..c1ddae36a2ae 100644 --- a/drivers/net/phy/mscc/mscc_ptp.c +++ b/drivers/net/phy/mscc/mscc_ptp.c @@ -1134,7 +1134,7 @@ static int vsc85xx_hwtstamp(struct mii_timestamper *mii_ts, } static int vsc85xx_ts_info(struct mii_timestamper *mii_ts, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct vsc8531_private *vsc8531 = container_of(mii_ts, struct vsc8531_private, mii_ts); diff --git a/drivers/net/phy/nxp-c45-tja11xx.c b/drivers/net/phy/nxp-c45-tja11xx.c index d18c133e6013..5af5ade4fc64 100644 --- a/drivers/net/phy/nxp-c45-tja11xx.c +++ b/drivers/net/phy/nxp-c45-tja11xx.c @@ -1058,7 +1058,7 @@ nxp_c45_no_ptp_irq: } static int nxp_c45_ts_info(struct mii_timestamper *mii_ts, - struct ethtool_ts_info *ts_info) + struct kernel_ethtool_ts_info *ts_info) { struct nxp_c45_phy *priv = container_of(mii_ts, struct nxp_c45_phy, mii_ts); diff --git a/drivers/ptp/ptp_ines.c b/drivers/ptp/ptp_ines.c index 385643f3f8fe..e6f7d2bf8dde 100644 --- a/drivers/ptp/ptp_ines.c +++ b/drivers/ptp/ptp_ines.c @@ -556,7 +556,7 @@ static bool ines_timestamp_expired(struct ines_timestamp *ts) } static int ines_ts_info(struct mii_timestamper *mii_ts, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE | diff --git a/drivers/s390/net/qeth_ethtool.c b/drivers/s390/net/qeth_ethtool.c index c1caf7734c3e..f184c58ecf24 100644 --- a/drivers/s390/net/qeth_ethtool.c +++ b/drivers/s390/net/qeth_ethtool.c @@ -247,7 +247,7 @@ static int qeth_set_channels(struct net_device *dev, } static int qeth_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct qeth_card *card = dev->ml_priv; diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 1b92aed49363..23492213ea35 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -186,7 +186,7 @@ void close_candev(struct net_device *dev); int can_change_mtu(struct net_device *dev, int new_mtu); int can_eth_ioctl_hwts(struct net_device *netdev, struct ifreq *ifr, int cmd); int can_ethtool_op_get_ts_info_hwts(struct net_device *dev, - struct ethtool_ts_info *info); + struct kernel_ethtool_ts_info *info); int register_candev(struct net_device *dev); void unregister_candev(struct net_device *dev); diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index e213b5508da6..6b38bbda5790 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -18,6 +18,7 @@ #include #include #include +#include struct compat_ethtool_rx_flow_spec { u32 flow_type; @@ -713,6 +714,22 @@ struct ethtool_rxfh_param { u8 input_xfrm; }; +/** + * struct kernel_ethtool_ts_info - kernel copy of struct ethtool_ts_info + * @cmd: command number = %ETHTOOL_GET_TS_INFO + * @so_timestamping: bit mask of the sum of the supported SO_TIMESTAMPING flags + * @phc_index: device index of the associated PHC, or -1 if there is none + * @tx_types: bit mask of the supported hwtstamp_tx_types enumeration values + * @rx_filters: bit mask of the supported hwtstamp_rx_filters enumeration values + */ +struct kernel_ethtool_ts_info { + u32 cmd; + u32 so_timestamping; + int phc_index; + enum hwtstamp_tx_types tx_types; + enum hwtstamp_rx_filters rx_filters; +}; + /** * struct ethtool_ops - optional netdev operations * @cap_link_lanes_supported: indicates if the driver supports lanes @@ -1020,7 +1037,7 @@ struct ethtool_ops { int (*get_dump_data)(struct net_device *, struct ethtool_dump *, void *); int (*set_dump)(struct net_device *, struct ethtool_dump *); - int (*get_ts_info)(struct net_device *, struct ethtool_ts_info *); + int (*get_ts_info)(struct net_device *, struct kernel_ethtool_ts_info *); void (*get_ts_stats)(struct net_device *dev, struct ethtool_ts_stats *ts_stats); int (*get_module_info)(struct net_device *, @@ -1181,7 +1198,8 @@ int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index); /* Some generic methods drivers may use in their ethtool_ops */ u32 ethtool_op_get_link(struct net_device *dev); -int ethtool_op_get_ts_info(struct net_device *dev, struct ethtool_ts_info *eti); +int ethtool_op_get_ts_info(struct net_device *dev, + struct kernel_ethtool_ts_info *eti); /** * ethtool_mm_frag_size_add_to_min - Translate (standard) additional fragment @@ -1230,7 +1248,8 @@ static inline int ethtool_mm_frag_size_min_to_add(u32 val_min, u32 *val_add, * @info: buffer to hold the result * Returns zero on success, non-zero otherwise. */ -int ethtool_get_ts_info_by_layer(struct net_device *dev, struct ethtool_ts_info *info); +int ethtool_get_ts_info_by_layer(struct net_device *dev, + struct kernel_ethtool_ts_info *info); /** * ethtool_sprintf - Write formatted string to ethtool string data diff --git a/include/linux/mii_timestamper.h b/include/linux/mii_timestamper.h index 26b04f73f214..995db62570f9 100644 --- a/include/linux/mii_timestamper.h +++ b/include/linux/mii_timestamper.h @@ -59,7 +59,7 @@ struct mii_timestamper { struct phy_device *phydev); int (*ts_info)(struct mii_timestamper *mii_ts, - struct ethtool_ts_info *ts_info); + struct kernel_ethtool_ts_info *ts_info); struct device *device; }; diff --git a/include/linux/phy.h b/include/linux/phy.h index e7a38137211c..04ae5c811cfb 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1618,7 +1618,7 @@ static inline bool phy_rxtstamp(struct phy_device *phydev, struct sk_buff *skb, } static inline int phy_ts_info(struct phy_device *phydev, - struct ethtool_ts_info *tsinfo) + struct kernel_ethtool_ts_info *tsinfo) { return phydev->mii_ts->ts_info(phydev->mii_ts, tsinfo); } diff --git a/include/net/dsa.h b/include/net/dsa.h index f9ae3ca66b6f..96efdd9f90c9 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -934,7 +934,7 @@ struct dsa_switch_ops { * ethtool timestamp info */ int (*get_ts_info)(struct dsa_switch *ds, int port, - struct ethtool_ts_info *ts); + struct kernel_ethtool_ts_info *ts); /* * ethtool MAC merge layer diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 1e1b40f4e664..6a37b29f4b4c 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -1016,7 +1016,7 @@ void ocelot_port_get_eth_mac_stats(struct ocelot *ocelot, int port, void ocelot_port_get_eth_phy_stats(struct ocelot *ocelot, int port, struct ethtool_eth_phy_stats *phy_stats); int ocelot_get_ts_info(struct ocelot *ocelot, int port, - struct ethtool_ts_info *info); + struct kernel_ethtool_ts_info *info); void ocelot_set_ageing_time(struct ocelot *ocelot, unsigned int msecs); int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, bool enabled, struct netlink_ext_ack *extack); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 3efba4f857ac..217be32426b5 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -677,7 +677,7 @@ static void vlan_ethtool_get_drvinfo(struct net_device *dev, } static int vlan_ethtool_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); return ethtool_get_ts_info_by_layer(vlan->real_dev, info); diff --git a/net/dsa/user.c b/net/dsa/user.c index e8f56a40b614..f5adfa1d978a 100644 --- a/net/dsa/user.c +++ b/net/dsa/user.c @@ -1729,7 +1729,7 @@ static int dsa_user_set_rxnfc(struct net_device *dev, } static int dsa_user_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *ts) + struct kernel_ethtool_ts_info *ts) { struct dsa_user_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->dp->ds; diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 01b7550f12c6..461017a37955 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -629,7 +629,7 @@ int ethtool_check_ops(const struct ethtool_ops *ops) return 0; } -int __ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info) +int __ethtool_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info) { const struct ethtool_ops *ops = dev->ethtool_ops; struct phy_device *phydev = dev->phydev; @@ -651,7 +651,7 @@ int __ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info) int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index) { - struct ethtool_ts_info info = { }; + struct kernel_ethtool_ts_info info = { }; int num = 0; if (!__ethtool_get_ts_info(dev, &info)) @@ -661,7 +661,7 @@ int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index) } EXPORT_SYMBOL(ethtool_get_phc_vclocks); -int ethtool_get_ts_info_by_layer(struct net_device *dev, struct ethtool_ts_info *info) +int ethtool_get_ts_info_by_layer(struct net_device *dev, struct kernel_ethtool_ts_info *info) { return __ethtool_get_ts_info(dev, info); } diff --git a/net/ethtool/common.h b/net/ethtool/common.h index 28b8aaaf9bcb..b9daeecbd84d 100644 --- a/net/ethtool/common.h +++ b/net/ethtool/common.h @@ -44,7 +44,7 @@ bool convert_legacy_settings_to_link_ksettings( const struct ethtool_cmd *legacy_settings); int ethtool_get_max_rxfh_channel(struct net_device *dev, u32 *max); int ethtool_get_max_rxnfc_channel(struct net_device *dev, u64 *max); -int __ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info); +int __ethtool_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info); extern const struct ethtool_phy_ops *ethtool_phy_ops; extern const struct ethtool_pse_ops *ethtool_pse_ops; diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index d72b0fec89af..bc8988ddf51c 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -65,7 +65,8 @@ u32 ethtool_op_get_link(struct net_device *dev) } EXPORT_SYMBOL(ethtool_op_get_link); -int ethtool_op_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info) +int ethtool_op_get_ts_info(struct net_device *dev, + struct kernel_ethtool_ts_info *info) { info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | @@ -2569,13 +2570,20 @@ out: static int ethtool_get_ts_info(struct net_device *dev, void __user *useraddr) { - struct ethtool_ts_info info; + struct kernel_ethtool_ts_info kernel_info; + struct ethtool_ts_info info = {}; int err; - err = __ethtool_get_ts_info(dev, &info); + err = __ethtool_get_ts_info(dev, &kernel_info); if (err) return err; + info.cmd = kernel_info.cmd; + info.so_timestamping = kernel_info.so_timestamping; + info.phc_index = kernel_info.phc_index; + info.tx_types = kernel_info.tx_types; + info.rx_filters = kernel_info.rx_filters; + if (copy_to_user(useraddr, &info, sizeof(info))) return -EFAULT; diff --git a/net/ethtool/tsinfo.c b/net/ethtool/tsinfo.c index 57d496287e52..03d12d6f79ca 100644 --- a/net/ethtool/tsinfo.c +++ b/net/ethtool/tsinfo.c @@ -12,7 +12,7 @@ struct tsinfo_req_info { struct tsinfo_reply_data { struct ethnl_reply_data base; - struct ethtool_ts_info ts_info; + struct kernel_ethtool_ts_info ts_info; struct ethtool_ts_stats stats; }; @@ -55,7 +55,7 @@ static int tsinfo_reply_size(const struct ethnl_req_info *req_base, { const struct tsinfo_reply_data *data = TSINFO_REPDATA(reply_base); bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; - const struct ethtool_ts_info *ts_info = &data->ts_info; + const struct kernel_ethtool_ts_info *ts_info = &data->ts_info; int len = 0; int ret; @@ -136,7 +136,7 @@ static int tsinfo_fill_reply(struct sk_buff *skb, { const struct tsinfo_reply_data *data = TSINFO_REPDATA(reply_base); bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; - const struct ethtool_ts_info *ts_info = &data->ts_info; + const struct kernel_ethtool_ts_info *ts_info = &data->ts_info; int ret; if (ts_info->so_timestamping) { diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index b284a06b5a75..cc2df9f8c14a 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1610,7 +1610,7 @@ static int taprio_parse_clockid(struct Qdisc *sch, struct nlattr **tb, if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { const struct ethtool_ops *ops = dev->ethtool_ops; - struct ethtool_ts_info info = { + struct kernel_ethtool_ts_info info = { .cmd = ETHTOOL_GET_TS_INFO, .phc_index = -1, }; -- cgit v1.2.3-58-ga151 From 70de41ef78573ce958ac04ecc2b5671851723c59 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 14 Jul 2024 18:05:56 +0200 Subject: llc: Constify struct llc_conn_state_trans 'struct llc_conn_state_trans' are not modified in this driver. Constifying this structure moves some data to a read-only section, so increase overall security. On a x86_64, with allmodconfig, as an example: Before: ====== text data bss dec hex filename 13923 10896 32 24851 6113 net/llc/llc_c_st.o After: ===== text data bss dec hex filename 21859 3328 0 25187 6263 net/llc/llc_c_st.o Signed-off-by: Christophe JAILLET Reviewed-by: Simon Horman Link: https://patch.msgid.link/87cda89e4c9414e71d1a54bb1eb491b0e7f70375.1720973029.git.christophe.jaillet@wanadoo.fr Signed-off-by: Jakub Kicinski --- include/net/llc_c_st.h | 4 +- net/llc/llc_c_st.c | 500 ++++++++++++++++++++++++------------------------- net/llc/llc_conn.c | 20 +- 3 files changed, 262 insertions(+), 262 deletions(-) (limited to 'net') diff --git a/include/net/llc_c_st.h b/include/net/llc_c_st.h index 53823d61d8b6..a4bea0f33188 100644 --- a/include/net/llc_c_st.h +++ b/include/net/llc_c_st.h @@ -44,8 +44,8 @@ struct llc_conn_state_trans { }; struct llc_conn_state { - u8 current_state; - struct llc_conn_state_trans **transitions; + u8 current_state; + const struct llc_conn_state_trans **transitions; }; extern struct llc_conn_state llc_conn_state_table[]; diff --git a/net/llc/llc_c_st.c b/net/llc/llc_c_st.c index 2467573b5f84..1c267db304df 100644 --- a/net/llc/llc_c_st.c +++ b/net/llc/llc_c_st.c @@ -42,7 +42,7 @@ static const llc_conn_action_t llc_common_actions_1[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_common_state_trans_1 = { +static const struct llc_conn_state_trans llc_common_state_trans_1 = { .ev = llc_conn_ev_disc_req, .next_state = LLC_CONN_STATE_D_CONN, .ev_qualifiers = NONE, @@ -59,7 +59,7 @@ static const llc_conn_action_t llc_common_actions_2[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_common_state_trans_2 = { +static const struct llc_conn_state_trans llc_common_state_trans_2 = { .ev = llc_conn_ev_rst_req, .next_state = LLC_CONN_STATE_RESET, .ev_qualifiers = NONE, @@ -79,7 +79,7 @@ static const llc_conn_action_t llc_common_actions_3[] = { [8] = NULL, }; -static struct llc_conn_state_trans llc_common_state_trans_3 = { +static const struct llc_conn_state_trans llc_common_state_trans_3 = { .ev = llc_conn_ev_rx_sabme_cmd_pbit_set_x, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = NONE, @@ -95,7 +95,7 @@ static const llc_conn_action_t llc_common_actions_4[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_common_state_trans_4 = { +static const struct llc_conn_state_trans llc_common_state_trans_4 = { .ev = llc_conn_ev_rx_disc_cmd_pbit_set_x, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = NONE, @@ -114,7 +114,7 @@ static const llc_conn_action_t llc_common_actions_5[] = { [7] = NULL, }; -static struct llc_conn_state_trans llc_common_state_trans_5 = { +static const struct llc_conn_state_trans llc_common_state_trans_5 = { .ev = llc_conn_ev_rx_frmr_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_RESET, .ev_qualifiers = NONE, @@ -129,7 +129,7 @@ static const llc_conn_action_t llc_common_actions_6[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_common_state_trans_6 = { +static const struct llc_conn_state_trans llc_common_state_trans_6 = { .ev = llc_conn_ev_rx_dm_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = NONE, @@ -145,7 +145,7 @@ static const llc_conn_action_t llc_common_actions_7a[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_common_state_trans_7a = { +static const struct llc_conn_state_trans llc_common_state_trans_7a = { .ev = llc_conn_ev_rx_zzz_cmd_pbit_set_x_inval_nr, .next_state = LLC_CONN_STATE_ERROR, .ev_qualifiers = NONE, @@ -161,7 +161,7 @@ static const llc_conn_action_t llc_common_actions_7b[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_common_state_trans_7b = { +static const struct llc_conn_state_trans llc_common_state_trans_7b = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_x_inval_ns, .next_state = LLC_CONN_STATE_ERROR, .ev_qualifiers = NONE, @@ -177,7 +177,7 @@ static const llc_conn_action_t llc_common_actions_8a[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_common_state_trans_8a = { +static const struct llc_conn_state_trans llc_common_state_trans_8a = { .ev = llc_conn_ev_rx_zzz_rsp_fbit_set_x_inval_nr, .next_state = LLC_CONN_STATE_ERROR, .ev_qualifiers = NONE, @@ -193,7 +193,7 @@ static const llc_conn_action_t llc_common_actions_8b[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_common_state_trans_8b = { +static const struct llc_conn_state_trans llc_common_state_trans_8b = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_x_inval_ns, .next_state = LLC_CONN_STATE_ERROR, .ev_qualifiers = NONE, @@ -209,7 +209,7 @@ static const llc_conn_action_t llc_common_actions_8c[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_common_state_trans_8c = { +static const struct llc_conn_state_trans llc_common_state_trans_8c = { .ev = llc_conn_ev_rx_bad_pdu, .next_state = LLC_CONN_STATE_ERROR, .ev_qualifiers = NONE, @@ -225,7 +225,7 @@ static const llc_conn_action_t llc_common_actions_9[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_common_state_trans_9 = { +static const struct llc_conn_state_trans llc_common_state_trans_9 = { .ev = llc_conn_ev_rx_ua_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_ERROR, .ev_qualifiers = NONE, @@ -247,7 +247,7 @@ static const llc_conn_action_t llc_common_actions_10[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_common_state_trans_10 = { +static const struct llc_conn_state_trans llc_common_state_trans_10 = { .ev = llc_conn_ev_rx_xxx_rsp_fbit_set_1, .next_state = LLC_CONN_STATE_ERROR, .ev_qualifiers = llc_common_ev_qfyrs_10, @@ -270,7 +270,7 @@ static const llc_conn_action_t llc_common_actions_11a[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_common_state_trans_11a = { +static const struct llc_conn_state_trans llc_common_state_trans_11a = { .ev = llc_conn_ev_p_tmr_exp, .next_state = LLC_CONN_STATE_RESET, .ev_qualifiers = llc_common_ev_qfyrs_11a, @@ -292,7 +292,7 @@ static const llc_conn_action_t llc_common_actions_11b[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_common_state_trans_11b = { +static const struct llc_conn_state_trans llc_common_state_trans_11b = { .ev = llc_conn_ev_ack_tmr_exp, .next_state = LLC_CONN_STATE_RESET, .ev_qualifiers = llc_common_ev_qfyrs_11b, @@ -314,7 +314,7 @@ static const llc_conn_action_t llc_common_actions_11c[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_common_state_trans_11c = { +static const struct llc_conn_state_trans llc_common_state_trans_11c = { .ev = llc_conn_ev_rej_tmr_exp, .next_state = LLC_CONN_STATE_RESET, .ev_qualifiers = llc_common_ev_qfyrs_11c, @@ -336,7 +336,7 @@ static const llc_conn_action_t llc_common_actions_11d[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_common_state_trans_11d = { +static const struct llc_conn_state_trans llc_common_state_trans_11d = { .ev = llc_conn_ev_busy_tmr_exp, .next_state = LLC_CONN_STATE_RESET, .ev_qualifiers = llc_common_ev_qfyrs_11d, @@ -347,7 +347,7 @@ static struct llc_conn_state_trans llc_common_state_trans_11d = { * Common dummy state transition; must be last entry for all state * transition groups - it'll be on .bss, so will be zeroed. */ -static struct llc_conn_state_trans llc_common_state_trans_end; +static const struct llc_conn_state_trans llc_common_state_trans_end; /* LLC_CONN_STATE_ADM transitions */ /* State transitions for LLC_CONN_EV_CONN_REQ event */ @@ -359,7 +359,7 @@ static const llc_conn_action_t llc_adm_actions_1[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_adm_state_trans_1 = { +static const struct llc_conn_state_trans llc_adm_state_trans_1 = { .ev = llc_conn_ev_conn_req, .next_state = LLC_CONN_STATE_SETUP, .ev_qualifiers = NONE, @@ -378,7 +378,7 @@ static const llc_conn_action_t llc_adm_actions_2[] = { [7] = NULL, }; -static struct llc_conn_state_trans llc_adm_state_trans_2 = { +static const struct llc_conn_state_trans llc_adm_state_trans_2 = { .ev = llc_conn_ev_rx_sabme_cmd_pbit_set_x, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = NONE, @@ -392,7 +392,7 @@ static const llc_conn_action_t llc_adm_actions_3[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_adm_state_trans_3 = { +static const struct llc_conn_state_trans llc_adm_state_trans_3 = { .ev = llc_conn_ev_rx_disc_cmd_pbit_set_x, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = NONE, @@ -406,7 +406,7 @@ static const llc_conn_action_t llc_adm_actions_4[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_adm_state_trans_4 = { +static const struct llc_conn_state_trans llc_adm_state_trans_4 = { .ev = llc_conn_ev_rx_xxx_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = NONE, @@ -419,7 +419,7 @@ static const llc_conn_action_t llc_adm_actions_5[] = { [1] = NULL, }; -static struct llc_conn_state_trans llc_adm_state_trans_5 = { +static const struct llc_conn_state_trans llc_adm_state_trans_5 = { .ev = llc_conn_ev_rx_any_frame, .next_state = LLC_CONN_OUT_OF_SVC, .ev_qualifiers = NONE, @@ -430,7 +430,7 @@ static struct llc_conn_state_trans llc_adm_state_trans_5 = { * Array of pointers; * one to each transition */ -static struct llc_conn_state_trans *llc_adm_state_transitions[] = { +static const struct llc_conn_state_trans *llc_adm_state_transitions[] = { [0] = &llc_adm_state_trans_1, /* Request */ [1] = &llc_common_state_trans_end, [2] = &llc_common_state_trans_end, /* local_busy */ @@ -453,7 +453,7 @@ static const llc_conn_action_t llc_setup_actions_1[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_setup_state_trans_1 = { +static const struct llc_conn_state_trans llc_setup_state_trans_1 = { .ev = llc_conn_ev_rx_sabme_cmd_pbit_set_x, .next_state = LLC_CONN_STATE_SETUP, .ev_qualifiers = NONE, @@ -477,7 +477,7 @@ static const llc_conn_action_t llc_setup_actions_2[] = { [6] = NULL, }; -static struct llc_conn_state_trans llc_setup_state_trans_2 = { +static const struct llc_conn_state_trans llc_setup_state_trans_2 = { .ev = llc_conn_ev_rx_ua_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_setup_ev_qfyrs_2, @@ -498,7 +498,7 @@ static const llc_conn_action_t llc_setup_actions_3[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_setup_state_trans_3 = { +static const struct llc_conn_state_trans llc_setup_state_trans_3 = { .ev = llc_conn_ev_ack_tmr_exp, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_setup_ev_qfyrs_3, @@ -519,7 +519,7 @@ static const llc_conn_action_t llc_setup_actions_4[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_setup_state_trans_4 = { +static const struct llc_conn_state_trans llc_setup_state_trans_4 = { .ev = llc_conn_ev_rx_disc_cmd_pbit_set_x, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = llc_setup_ev_qfyrs_4, @@ -539,7 +539,7 @@ static const llc_conn_action_t llc_setup_actions_5[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_setup_state_trans_5 = { +static const struct llc_conn_state_trans llc_setup_state_trans_5 = { .ev = llc_conn_ev_rx_dm_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = llc_setup_ev_qfyrs_5, @@ -560,7 +560,7 @@ static const llc_conn_action_t llc_setup_actions_7[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_setup_state_trans_7 = { +static const struct llc_conn_state_trans llc_setup_state_trans_7 = { .ev = llc_conn_ev_ack_tmr_exp, .next_state = LLC_CONN_STATE_SETUP, .ev_qualifiers = llc_setup_ev_qfyrs_7, @@ -581,7 +581,7 @@ static const llc_conn_action_t llc_setup_actions_8[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_setup_state_trans_8 = { +static const struct llc_conn_state_trans llc_setup_state_trans_8 = { .ev = llc_conn_ev_ack_tmr_exp, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = llc_setup_ev_qfyrs_8, @@ -592,7 +592,7 @@ static struct llc_conn_state_trans llc_setup_state_trans_8 = { * Array of pointers; * one to each transition */ -static struct llc_conn_state_trans *llc_setup_state_transitions[] = { +static const struct llc_conn_state_trans *llc_setup_state_transitions[] = { [0] = &llc_common_state_trans_end, /* Request */ [1] = &llc_common_state_trans_end, /* local busy */ [2] = &llc_common_state_trans_end, /* init_pf_cycle */ @@ -622,7 +622,7 @@ static const llc_conn_action_t llc_normal_actions_1[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_1 = { +static const struct llc_conn_state_trans llc_normal_state_trans_1 = { .ev = llc_conn_ev_data_req, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_normal_ev_qfyrs_1, @@ -643,7 +643,7 @@ static const llc_conn_action_t llc_normal_actions_2[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_2 = { +static const struct llc_conn_state_trans llc_normal_state_trans_2 = { .ev = llc_conn_ev_data_req, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_normal_ev_qfyrs_2, @@ -660,7 +660,7 @@ static const llc_conn_ev_qfyr_t llc_normal_ev_qfyrs_2_1[] = { /* just one member, NULL, .bss zeroes it */ static const llc_conn_action_t llc_normal_actions_2_1[1]; -static struct llc_conn_state_trans llc_normal_state_trans_2_1 = { +static const struct llc_conn_state_trans llc_normal_state_trans_2_1 = { .ev = llc_conn_ev_data_req, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_normal_ev_qfyrs_2_1, @@ -680,7 +680,7 @@ static const llc_conn_action_t llc_normal_actions_3[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_3 = { +static const struct llc_conn_state_trans llc_normal_state_trans_3 = { .ev = llc_conn_ev_local_busy_detected, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_normal_ev_qfyrs_3, @@ -700,7 +700,7 @@ static const llc_conn_action_t llc_normal_actions_4[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_4 = { +static const struct llc_conn_state_trans llc_normal_state_trans_4 = { .ev = llc_conn_ev_local_busy_detected, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_normal_ev_qfyrs_4, @@ -723,7 +723,7 @@ static const llc_conn_action_t llc_normal_actions_5a[] = { [6] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_5a = { +static const struct llc_conn_state_trans llc_normal_state_trans_5a = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_0_unexpd_ns, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_normal_ev_qfyrs_5a, @@ -746,7 +746,7 @@ static const llc_conn_action_t llc_normal_actions_5b[] = { [6] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_5b = { +static const struct llc_conn_state_trans llc_normal_state_trans_5b = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_0_unexpd_ns, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_normal_ev_qfyrs_5b, @@ -769,7 +769,7 @@ static const llc_conn_action_t llc_normal_actions_5c[] = { [6] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_5c = { +static const struct llc_conn_state_trans llc_normal_state_trans_5c = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_1_unexpd_ns, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_normal_ev_qfyrs_5c, @@ -790,7 +790,7 @@ static const llc_conn_action_t llc_normal_actions_6a[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_6a = { +static const struct llc_conn_state_trans llc_normal_state_trans_6a = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_0_unexpd_ns, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_normal_ev_qfyrs_6a, @@ -811,7 +811,7 @@ static const llc_conn_action_t llc_normal_actions_6b[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_6b = { +static const struct llc_conn_state_trans llc_normal_state_trans_6b = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_0_unexpd_ns, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_normal_ev_qfyrs_6b, @@ -827,7 +827,7 @@ static const llc_conn_action_t llc_normal_actions_7[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_7 = { +static const struct llc_conn_state_trans llc_normal_state_trans_7 = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_1_unexpd_ns, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = NONE, @@ -850,7 +850,7 @@ static const llc_conn_action_t llc_normal_actions_8[] = { [6] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_8a = { +static const struct llc_conn_state_trans llc_normal_state_trans_8a = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_normal_ev_qfyrs_8a, @@ -863,7 +863,7 @@ static const llc_conn_ev_qfyr_t llc_normal_ev_qfyrs_8b[] = { [1] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_8b = { +static const struct llc_conn_state_trans llc_normal_state_trans_8b = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_normal_ev_qfyrs_8b, @@ -884,7 +884,7 @@ static const llc_conn_action_t llc_normal_actions_9a[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_9a = { +static const struct llc_conn_state_trans llc_normal_state_trans_9a = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_normal_ev_qfyrs_9a, @@ -905,7 +905,7 @@ static const llc_conn_action_t llc_normal_actions_9b[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_9b = { +static const struct llc_conn_state_trans llc_normal_state_trans_9b = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_normal_ev_qfyrs_9b, @@ -922,7 +922,7 @@ static const llc_conn_action_t llc_normal_actions_10[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_10 = { +static const struct llc_conn_state_trans llc_normal_state_trans_10 = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = NONE, @@ -937,7 +937,7 @@ static const llc_conn_action_t llc_normal_actions_11a[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_11a = { +static const struct llc_conn_state_trans llc_normal_state_trans_11a = { .ev = llc_conn_ev_rx_rr_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = NONE, @@ -952,7 +952,7 @@ static const llc_conn_action_t llc_normal_actions_11b[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_11b = { +static const struct llc_conn_state_trans llc_normal_state_trans_11b = { .ev = llc_conn_ev_rx_rr_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = NONE, @@ -973,7 +973,7 @@ static const llc_conn_action_t llc_normal_actions_11c[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_11c = { +static const struct llc_conn_state_trans llc_normal_state_trans_11c = { .ev = llc_conn_ev_rx_rr_rsp_fbit_set_1, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_normal_ev_qfyrs_11c, @@ -990,7 +990,7 @@ static const llc_conn_action_t llc_normal_actions_12[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_12 = { +static const struct llc_conn_state_trans llc_normal_state_trans_12 = { .ev = llc_conn_ev_rx_rr_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = NONE, @@ -1005,7 +1005,7 @@ static const llc_conn_action_t llc_normal_actions_13a[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_13a = { +static const struct llc_conn_state_trans llc_normal_state_trans_13a = { .ev = llc_conn_ev_rx_rnr_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = NONE, @@ -1020,7 +1020,7 @@ static const llc_conn_action_t llc_normal_actions_13b[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_13b = { +static const struct llc_conn_state_trans llc_normal_state_trans_13b = { .ev = llc_conn_ev_rx_rnr_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = NONE, @@ -1040,7 +1040,7 @@ static const llc_conn_action_t llc_normal_actions_13c[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_13c = { +static const struct llc_conn_state_trans llc_normal_state_trans_13c = { .ev = llc_conn_ev_rx_rnr_rsp_fbit_set_1, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_normal_ev_qfyrs_13c, @@ -1057,7 +1057,7 @@ static const llc_conn_action_t llc_normal_actions_14[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_14 = { +static const struct llc_conn_state_trans llc_normal_state_trans_14 = { .ev = llc_conn_ev_rx_rnr_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = NONE, @@ -1080,7 +1080,7 @@ static const llc_conn_action_t llc_normal_actions_15a[] = { [6] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_15a = { +static const struct llc_conn_state_trans llc_normal_state_trans_15a = { .ev = llc_conn_ev_rx_rej_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_normal_ev_qfyrs_15a, @@ -1103,7 +1103,7 @@ static const llc_conn_action_t llc_normal_actions_15b[] = { [6] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_15b = { +static const struct llc_conn_state_trans llc_normal_state_trans_15b = { .ev = llc_conn_ev_rx_rej_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_normal_ev_qfyrs_15b, @@ -1125,7 +1125,7 @@ static const llc_conn_action_t llc_normal_actions_16a[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_16a = { +static const struct llc_conn_state_trans llc_normal_state_trans_16a = { .ev = llc_conn_ev_rx_rej_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_normal_ev_qfyrs_16a, @@ -1147,7 +1147,7 @@ static const llc_conn_action_t llc_normal_actions_16b[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_16b = { +static const struct llc_conn_state_trans llc_normal_state_trans_16b = { .ev = llc_conn_ev_rx_rej_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_normal_ev_qfyrs_16b, @@ -1164,7 +1164,7 @@ static const llc_conn_action_t llc_normal_actions_17[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_17 = { +static const struct llc_conn_state_trans llc_normal_state_trans_17 = { .ev = llc_conn_ev_rx_rej_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = NONE, @@ -1183,7 +1183,7 @@ static const llc_conn_action_t llc_normal_actions_18[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_18 = { +static const struct llc_conn_state_trans llc_normal_state_trans_18 = { .ev = llc_conn_ev_init_p_f_cycle, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_normal_ev_qfyrs_18, @@ -1205,7 +1205,7 @@ static const llc_conn_action_t llc_normal_actions_19[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_19 = { +static const struct llc_conn_state_trans llc_normal_state_trans_19 = { .ev = llc_conn_ev_p_tmr_exp, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = llc_normal_ev_qfyrs_19, @@ -1228,7 +1228,7 @@ static const llc_conn_action_t llc_normal_actions_20a[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_20a = { +static const struct llc_conn_state_trans llc_normal_state_trans_20a = { .ev = llc_conn_ev_ack_tmr_exp, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = llc_normal_ev_qfyrs_20a, @@ -1251,7 +1251,7 @@ static const llc_conn_action_t llc_normal_actions_20b[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_20b = { +static const struct llc_conn_state_trans llc_normal_state_trans_20b = { .ev = llc_conn_ev_busy_tmr_exp, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = llc_normal_ev_qfyrs_20b, @@ -1270,7 +1270,7 @@ static const llc_conn_action_t llc_normal_actions_21[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_21 = { +static const struct llc_conn_state_trans llc_normal_state_trans_21 = { .ev = llc_conn_ev_tx_buffer_full, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_normal_ev_qfyrs_21, @@ -1281,7 +1281,7 @@ static struct llc_conn_state_trans llc_normal_state_trans_21 = { * Array of pointers; * one to each transition */ -static struct llc_conn_state_trans *llc_normal_state_transitions[] = { +static const struct llc_conn_state_trans *llc_normal_state_transitions[] = { [0] = &llc_normal_state_trans_1, /* Requests */ [1] = &llc_normal_state_trans_2, [2] = &llc_normal_state_trans_2_1, @@ -1354,7 +1354,7 @@ static const llc_conn_action_t llc_busy_actions_1[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_1 = { +static const struct llc_conn_state_trans llc_busy_state_trans_1 = { .ev = llc_conn_ev_data_req, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_1, @@ -1374,7 +1374,7 @@ static const llc_conn_action_t llc_busy_actions_2[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_2 = { +static const struct llc_conn_state_trans llc_busy_state_trans_2 = { .ev = llc_conn_ev_data_req, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_2, @@ -1391,7 +1391,7 @@ static const llc_conn_ev_qfyr_t llc_busy_ev_qfyrs_2_1[] = { /* just one member, NULL, .bss zeroes it */ static const llc_conn_action_t llc_busy_actions_2_1[1]; -static struct llc_conn_state_trans llc_busy_state_trans_2_1 = { +static const struct llc_conn_state_trans llc_busy_state_trans_2_1 = { .ev = llc_conn_ev_data_req, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_2_1, @@ -1411,7 +1411,7 @@ static const llc_conn_action_t llc_busy_actions_3[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_3 = { +static const struct llc_conn_state_trans llc_busy_state_trans_3 = { .ev = llc_conn_ev_local_busy_cleared, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_busy_ev_qfyrs_3, @@ -1431,7 +1431,7 @@ static const llc_conn_action_t llc_busy_actions_4[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_4 = { +static const struct llc_conn_state_trans llc_busy_state_trans_4 = { .ev = llc_conn_ev_local_busy_cleared, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_busy_ev_qfyrs_4, @@ -1450,7 +1450,7 @@ static const llc_conn_action_t llc_busy_actions_5[] = { [1] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_5 = { +static const struct llc_conn_state_trans llc_busy_state_trans_5 = { .ev = llc_conn_ev_local_busy_cleared, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_busy_ev_qfyrs_5, @@ -1469,7 +1469,7 @@ static const llc_conn_action_t llc_busy_actions_6[] = { [1] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_6 = { +static const struct llc_conn_state_trans llc_busy_state_trans_6 = { .ev = llc_conn_ev_local_busy_cleared, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_busy_ev_qfyrs_6, @@ -1488,7 +1488,7 @@ static const llc_conn_action_t llc_busy_actions_7[] = { [1] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_7 = { +static const struct llc_conn_state_trans llc_busy_state_trans_7 = { .ev = llc_conn_ev_local_busy_cleared, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_busy_ev_qfyrs_7, @@ -1507,7 +1507,7 @@ static const llc_conn_action_t llc_busy_actions_8[] = { [1] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_8 = { +static const struct llc_conn_state_trans llc_busy_state_trans_8 = { .ev = llc_conn_ev_local_busy_cleared, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_busy_ev_qfyrs_8, @@ -1529,7 +1529,7 @@ static const llc_conn_action_t llc_busy_actions_9a[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_9a = { +static const struct llc_conn_state_trans llc_busy_state_trans_9a = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_x_unexpd_ns, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_9a, @@ -1551,7 +1551,7 @@ static const llc_conn_action_t llc_busy_actions_9b[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_9b = { +static const struct llc_conn_state_trans llc_busy_state_trans_9b = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_0_unexpd_ns, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_9b, @@ -1571,7 +1571,7 @@ static const llc_conn_action_t llc_busy_actions_10a[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_10a = { +static const struct llc_conn_state_trans llc_busy_state_trans_10a = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_0_unexpd_ns, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_10a, @@ -1591,7 +1591,7 @@ static const llc_conn_action_t llc_busy_actions_10b[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_10b = { +static const struct llc_conn_state_trans llc_busy_state_trans_10b = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_0_unexpd_ns, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_10b, @@ -1606,7 +1606,7 @@ static const llc_conn_action_t llc_busy_actions_11[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_11 = { +static const struct llc_conn_state_trans llc_busy_state_trans_11 = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_1_unexpd_ns, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = NONE, @@ -1624,7 +1624,7 @@ static const llc_conn_action_t llc_busy_actions_12[] = { [6] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_12 = { +static const struct llc_conn_state_trans llc_busy_state_trans_12 = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = NONE, @@ -1649,7 +1649,7 @@ static const llc_conn_action_t llc_busy_actions_13a[] = { [8] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_13a = { +static const struct llc_conn_state_trans llc_busy_state_trans_13a = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_13a, @@ -1674,7 +1674,7 @@ static const llc_conn_action_t llc_busy_actions_13b[] = { [8] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_13b = { +static const struct llc_conn_state_trans llc_busy_state_trans_13b = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_13b, @@ -1697,7 +1697,7 @@ static const llc_conn_action_t llc_busy_actions_14a[] = { [6] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_14a = { +static const struct llc_conn_state_trans llc_busy_state_trans_14a = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_14a, @@ -1720,7 +1720,7 @@ static const llc_conn_action_t llc_busy_actions_14b[] = { [6] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_14b = { +static const struct llc_conn_state_trans llc_busy_state_trans_14b = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_14b, @@ -1735,7 +1735,7 @@ static const llc_conn_action_t llc_busy_actions_15a[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_15a = { +static const struct llc_conn_state_trans llc_busy_state_trans_15a = { .ev = llc_conn_ev_rx_rr_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = NONE, @@ -1750,7 +1750,7 @@ static const llc_conn_action_t llc_busy_actions_15b[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_15b = { +static const struct llc_conn_state_trans llc_busy_state_trans_15b = { .ev = llc_conn_ev_rx_rr_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = NONE, @@ -1770,7 +1770,7 @@ static const llc_conn_action_t llc_busy_actions_15c[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_15c = { +static const struct llc_conn_state_trans llc_busy_state_trans_15c = { .ev = llc_conn_ev_rx_rr_rsp_fbit_set_1, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_15c, @@ -1785,7 +1785,7 @@ static const llc_conn_action_t llc_busy_actions_16[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_16 = { +static const struct llc_conn_state_trans llc_busy_state_trans_16 = { .ev = llc_conn_ev_rx_rr_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = NONE, @@ -1800,7 +1800,7 @@ static const llc_conn_action_t llc_busy_actions_17a[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_17a = { +static const struct llc_conn_state_trans llc_busy_state_trans_17a = { .ev = llc_conn_ev_rx_rnr_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = NONE, @@ -1815,7 +1815,7 @@ static const llc_conn_action_t llc_busy_actions_17b[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_17b = { +static const struct llc_conn_state_trans llc_busy_state_trans_17b = { .ev = llc_conn_ev_rx_rnr_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = NONE, @@ -1835,7 +1835,7 @@ static const llc_conn_action_t llc_busy_actions_17c[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_17c = { +static const struct llc_conn_state_trans llc_busy_state_trans_17c = { .ev = llc_conn_ev_rx_rnr_rsp_fbit_set_1, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_17c, @@ -1850,7 +1850,7 @@ static const llc_conn_action_t llc_busy_actions_18[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_18 = { +static const struct llc_conn_state_trans llc_busy_state_trans_18 = { .ev = llc_conn_ev_rx_rnr_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = NONE, @@ -1872,7 +1872,7 @@ static const llc_conn_action_t llc_busy_actions_19a[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_19a = { +static const struct llc_conn_state_trans llc_busy_state_trans_19a = { .ev = llc_conn_ev_rx_rej_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_19a, @@ -1894,7 +1894,7 @@ static const llc_conn_action_t llc_busy_actions_19b[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_19b = { +static const struct llc_conn_state_trans llc_busy_state_trans_19b = { .ev = llc_conn_ev_rx_rej_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_19b, @@ -1915,7 +1915,7 @@ static const llc_conn_action_t llc_busy_actions_20a[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_20a = { +static const struct llc_conn_state_trans llc_busy_state_trans_20a = { .ev = llc_conn_ev_rx_rej_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_20a, @@ -1936,7 +1936,7 @@ static const llc_conn_action_t llc_busy_actions_20b[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_20b = { +static const struct llc_conn_state_trans llc_busy_state_trans_20b = { .ev = llc_conn_ev_rx_rej_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_20b, @@ -1953,7 +1953,7 @@ static const llc_conn_action_t llc_busy_actions_21[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_21 = { +static const struct llc_conn_state_trans llc_busy_state_trans_21 = { .ev = llc_conn_ev_rx_rej_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = NONE, @@ -1972,7 +1972,7 @@ static const llc_conn_action_t llc_busy_actions_22[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_22 = { +static const struct llc_conn_state_trans llc_busy_state_trans_22 = { .ev = llc_conn_ev_init_p_f_cycle, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_22, @@ -1993,7 +1993,7 @@ static const llc_conn_action_t llc_busy_actions_23[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_23 = { +static const struct llc_conn_state_trans llc_busy_state_trans_23 = { .ev = llc_conn_ev_p_tmr_exp, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_23, @@ -2015,7 +2015,7 @@ static const llc_conn_action_t llc_busy_actions_24a[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_24a = { +static const struct llc_conn_state_trans llc_busy_state_trans_24a = { .ev = llc_conn_ev_ack_tmr_exp, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_24a, @@ -2037,7 +2037,7 @@ static const llc_conn_action_t llc_busy_actions_24b[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_24b = { +static const struct llc_conn_state_trans llc_busy_state_trans_24b = { .ev = llc_conn_ev_busy_tmr_exp, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_24b, @@ -2060,7 +2060,7 @@ static const llc_conn_action_t llc_busy_actions_25[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_25 = { +static const struct llc_conn_state_trans llc_busy_state_trans_25 = { .ev = llc_conn_ev_rej_tmr_exp, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_25, @@ -2079,7 +2079,7 @@ static const llc_conn_action_t llc_busy_actions_26[] = { [1] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_26 = { +static const struct llc_conn_state_trans llc_busy_state_trans_26 = { .ev = llc_conn_ev_rej_tmr_exp, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_26, @@ -2090,7 +2090,7 @@ static struct llc_conn_state_trans llc_busy_state_trans_26 = { * Array of pointers; * one to each transition */ -static struct llc_conn_state_trans *llc_busy_state_transitions[] = { +static const struct llc_conn_state_trans *llc_busy_state_transitions[] = { [0] = &llc_common_state_trans_1, /* Request */ [1] = &llc_common_state_trans_2, [2] = &llc_busy_state_trans_1, @@ -2166,7 +2166,7 @@ static const llc_conn_action_t llc_reject_actions_1[] = { [1] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_1 = { +static const struct llc_conn_state_trans llc_reject_state_trans_1 = { .ev = llc_conn_ev_data_req, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_reject_ev_qfyrs_1, @@ -2185,7 +2185,7 @@ static const llc_conn_action_t llc_reject_actions_2[] = { [1] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_2 = { +static const struct llc_conn_state_trans llc_reject_state_trans_2 = { .ev = llc_conn_ev_data_req, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_reject_ev_qfyrs_2, @@ -2202,7 +2202,7 @@ static const llc_conn_ev_qfyr_t llc_reject_ev_qfyrs_2_1[] = { /* just one member, NULL, .bss zeroes it */ static const llc_conn_action_t llc_reject_actions_2_1[1]; -static struct llc_conn_state_trans llc_reject_state_trans_2_1 = { +static const struct llc_conn_state_trans llc_reject_state_trans_2_1 = { .ev = llc_conn_ev_data_req, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_reject_ev_qfyrs_2_1, @@ -2222,7 +2222,7 @@ static const llc_conn_action_t llc_reject_actions_3[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_3 = { +static const struct llc_conn_state_trans llc_reject_state_trans_3 = { .ev = llc_conn_ev_local_busy_detected, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_reject_ev_qfyrs_3, @@ -2241,7 +2241,7 @@ static const llc_conn_action_t llc_reject_actions_4[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_4 = { +static const struct llc_conn_state_trans llc_reject_state_trans_4 = { .ev = llc_conn_ev_local_busy_detected, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_reject_ev_qfyrs_4, @@ -2256,7 +2256,7 @@ static const llc_conn_action_t llc_reject_actions_5a[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_5a = { +static const struct llc_conn_state_trans llc_reject_state_trans_5a = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_0_unexpd_ns, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = NONE, @@ -2271,7 +2271,7 @@ static const llc_conn_action_t llc_reject_actions_5b[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_5b = { +static const struct llc_conn_state_trans llc_reject_state_trans_5b = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_0_unexpd_ns, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = NONE, @@ -2291,7 +2291,7 @@ static const llc_conn_action_t llc_reject_actions_5c[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_5c = { +static const struct llc_conn_state_trans llc_reject_state_trans_5c = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_1_unexpd_ns, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_reject_ev_qfyrs_5c, @@ -2305,7 +2305,7 @@ static const llc_conn_action_t llc_reject_actions_6[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_6 = { +static const struct llc_conn_state_trans llc_reject_state_trans_6 = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_1_unexpd_ns, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = NONE, @@ -2330,7 +2330,7 @@ static const llc_conn_action_t llc_reject_actions_7a[] = { }; -static struct llc_conn_state_trans llc_reject_state_trans_7a = { +static const struct llc_conn_state_trans llc_reject_state_trans_7a = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_reject_ev_qfyrs_7a, @@ -2354,7 +2354,7 @@ static const llc_conn_action_t llc_reject_actions_7b[] = { [7] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_7b = { +static const struct llc_conn_state_trans llc_reject_state_trans_7b = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_reject_ev_qfyrs_7b, @@ -2376,7 +2376,7 @@ static const llc_conn_action_t llc_reject_actions_8a[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_8a = { +static const struct llc_conn_state_trans llc_reject_state_trans_8a = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_reject_ev_qfyrs_8a, @@ -2398,7 +2398,7 @@ static const llc_conn_action_t llc_reject_actions_8b[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_8b = { +static const struct llc_conn_state_trans llc_reject_state_trans_8b = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_reject_ev_qfyrs_8b, @@ -2415,7 +2415,7 @@ static const llc_conn_action_t llc_reject_actions_9[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_9 = { +static const struct llc_conn_state_trans llc_reject_state_trans_9 = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = NONE, @@ -2430,7 +2430,7 @@ static const llc_conn_action_t llc_reject_actions_10a[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_10a = { +static const struct llc_conn_state_trans llc_reject_state_trans_10a = { .ev = llc_conn_ev_rx_rr_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = NONE, @@ -2445,7 +2445,7 @@ static const llc_conn_action_t llc_reject_actions_10b[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_10b = { +static const struct llc_conn_state_trans llc_reject_state_trans_10b = { .ev = llc_conn_ev_rx_rr_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = NONE, @@ -2465,7 +2465,7 @@ static const llc_conn_action_t llc_reject_actions_10c[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_10c = { +static const struct llc_conn_state_trans llc_reject_state_trans_10c = { .ev = llc_conn_ev_rx_rr_rsp_fbit_set_1, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_reject_ev_qfyrs_10c, @@ -2480,7 +2480,7 @@ static const llc_conn_action_t llc_reject_actions_11[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_11 = { +static const struct llc_conn_state_trans llc_reject_state_trans_11 = { .ev = llc_conn_ev_rx_rr_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = NONE, @@ -2495,7 +2495,7 @@ static const llc_conn_action_t llc_reject_actions_12a[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_12a = { +static const struct llc_conn_state_trans llc_reject_state_trans_12a = { .ev = llc_conn_ev_rx_rnr_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = NONE, @@ -2510,7 +2510,7 @@ static const llc_conn_action_t llc_reject_actions_12b[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_12b = { +static const struct llc_conn_state_trans llc_reject_state_trans_12b = { .ev = llc_conn_ev_rx_rnr_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = NONE, @@ -2530,7 +2530,7 @@ static const llc_conn_action_t llc_reject_actions_12c[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_12c = { +static const struct llc_conn_state_trans llc_reject_state_trans_12c = { .ev = llc_conn_ev_rx_rnr_rsp_fbit_set_1, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_reject_ev_qfyrs_12c, @@ -2545,7 +2545,7 @@ static const llc_conn_action_t llc_reject_actions_13[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_13 = { +static const struct llc_conn_state_trans llc_reject_state_trans_13 = { .ev = llc_conn_ev_rx_rnr_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = NONE, @@ -2567,7 +2567,7 @@ static const llc_conn_action_t llc_reject_actions_14a[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_14a = { +static const struct llc_conn_state_trans llc_reject_state_trans_14a = { .ev = llc_conn_ev_rx_rej_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_reject_ev_qfyrs_14a, @@ -2589,7 +2589,7 @@ static const llc_conn_action_t llc_reject_actions_14b[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_14b = { +static const struct llc_conn_state_trans llc_reject_state_trans_14b = { .ev = llc_conn_ev_rx_rej_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_reject_ev_qfyrs_14b, @@ -2610,7 +2610,7 @@ static const llc_conn_action_t llc_reject_actions_15a[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_15a = { +static const struct llc_conn_state_trans llc_reject_state_trans_15a = { .ev = llc_conn_ev_rx_rej_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_reject_ev_qfyrs_15a, @@ -2631,7 +2631,7 @@ static const llc_conn_action_t llc_reject_actions_15b[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_15b = { +static const struct llc_conn_state_trans llc_reject_state_trans_15b = { .ev = llc_conn_ev_rx_rej_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_reject_ev_qfyrs_15b, @@ -2647,7 +2647,7 @@ static const llc_conn_action_t llc_reject_actions_16[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_16 = { +static const struct llc_conn_state_trans llc_reject_state_trans_16 = { .ev = llc_conn_ev_rx_rej_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = NONE, @@ -2666,7 +2666,7 @@ static const llc_conn_action_t llc_reject_actions_17[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_17 = { +static const struct llc_conn_state_trans llc_reject_state_trans_17 = { .ev = llc_conn_ev_init_p_f_cycle, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_reject_ev_qfyrs_17, @@ -2688,7 +2688,7 @@ static const llc_conn_action_t llc_reject_actions_18[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_18 = { +static const struct llc_conn_state_trans llc_reject_state_trans_18 = { .ev = llc_conn_ev_rej_tmr_exp, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_reject_ev_qfyrs_18, @@ -2710,7 +2710,7 @@ static const llc_conn_action_t llc_reject_actions_19[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_19 = { +static const struct llc_conn_state_trans llc_reject_state_trans_19 = { .ev = llc_conn_ev_p_tmr_exp, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = llc_reject_ev_qfyrs_19, @@ -2733,7 +2733,7 @@ static const llc_conn_action_t llc_reject_actions_20a[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_20a = { +static const struct llc_conn_state_trans llc_reject_state_trans_20a = { .ev = llc_conn_ev_ack_tmr_exp, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = llc_reject_ev_qfyrs_20a, @@ -2756,7 +2756,7 @@ static const llc_conn_action_t llc_reject_actions_20b[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_20b = { +static const struct llc_conn_state_trans llc_reject_state_trans_20b = { .ev = llc_conn_ev_busy_tmr_exp, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = llc_reject_ev_qfyrs_20b, @@ -2767,7 +2767,7 @@ static struct llc_conn_state_trans llc_reject_state_trans_20b = { * Array of pointers; * one to each transition */ -static struct llc_conn_state_trans *llc_reject_state_transitions[] = { +static const struct llc_conn_state_trans *llc_reject_state_transitions[] = { [0] = &llc_common_state_trans_1, /* Request */ [1] = &llc_common_state_trans_2, [2] = &llc_common_state_trans_end, @@ -2834,7 +2834,7 @@ static const llc_conn_ev_qfyr_t llc_await_ev_qfyrs_1_0[] = { /* just one member, NULL, .bss zeroes it */ static const llc_conn_action_t llc_await_actions_1_0[1]; -static struct llc_conn_state_trans llc_await_state_trans_1_0 = { +static const struct llc_conn_state_trans llc_await_state_trans_1_0 = { .ev = llc_conn_ev_data_req, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = llc_await_ev_qfyrs_1_0, @@ -2848,7 +2848,7 @@ static const llc_conn_action_t llc_await_actions_1[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_1 = { +static const struct llc_conn_state_trans llc_await_state_trans_1 = { .ev = llc_conn_ev_local_busy_detected, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = NONE, @@ -2867,7 +2867,7 @@ static const llc_conn_action_t llc_await_actions_2[] = { [7] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_2 = { +static const struct llc_conn_state_trans llc_await_state_trans_2 = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_1_unexpd_ns, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = NONE, @@ -2883,7 +2883,7 @@ static const llc_conn_action_t llc_await_actions_3a[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_3a = { +static const struct llc_conn_state_trans llc_await_state_trans_3a = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_0_unexpd_ns, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = NONE, @@ -2899,7 +2899,7 @@ static const llc_conn_action_t llc_await_actions_3b[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_3b = { +static const struct llc_conn_state_trans llc_await_state_trans_3b = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_0_unexpd_ns, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = NONE, @@ -2916,7 +2916,7 @@ static const llc_conn_action_t llc_await_actions_4[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_4 = { +static const struct llc_conn_state_trans llc_await_state_trans_4 = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_1_unexpd_ns, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = NONE, @@ -2935,7 +2935,7 @@ static const llc_conn_action_t llc_await_actions_5[] = { [7] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_5 = { +static const struct llc_conn_state_trans llc_await_state_trans_5 = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_1, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = NONE, @@ -2952,7 +2952,7 @@ static const llc_conn_action_t llc_await_actions_6a[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_6a = { +static const struct llc_conn_state_trans llc_await_state_trans_6a = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = NONE, @@ -2969,7 +2969,7 @@ static const llc_conn_action_t llc_await_actions_6b[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_6b = { +static const struct llc_conn_state_trans llc_await_state_trans_6b = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = NONE, @@ -2986,7 +2986,7 @@ static const llc_conn_action_t llc_await_actions_7[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_7 = { +static const struct llc_conn_state_trans llc_await_state_trans_7 = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = NONE, @@ -3003,7 +3003,7 @@ static const llc_conn_action_t llc_await_actions_8a[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_8a = { +static const struct llc_conn_state_trans llc_await_state_trans_8a = { .ev = llc_conn_ev_rx_rr_rsp_fbit_set_1, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = NONE, @@ -3020,7 +3020,7 @@ static const llc_conn_action_t llc_await_actions_8b[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_8b = { +static const struct llc_conn_state_trans llc_await_state_trans_8b = { .ev = llc_conn_ev_rx_rej_rsp_fbit_set_1, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = NONE, @@ -3035,7 +3035,7 @@ static const llc_conn_action_t llc_await_actions_9a[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_9a = { +static const struct llc_conn_state_trans llc_await_state_trans_9a = { .ev = llc_conn_ev_rx_rr_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = NONE, @@ -3050,7 +3050,7 @@ static const llc_conn_action_t llc_await_actions_9b[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_9b = { +static const struct llc_conn_state_trans llc_await_state_trans_9b = { .ev = llc_conn_ev_rx_rr_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = NONE, @@ -3065,7 +3065,7 @@ static const llc_conn_action_t llc_await_actions_9c[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_9c = { +static const struct llc_conn_state_trans llc_await_state_trans_9c = { .ev = llc_conn_ev_rx_rej_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = NONE, @@ -3080,7 +3080,7 @@ static const llc_conn_action_t llc_await_actions_9d[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_9d = { +static const struct llc_conn_state_trans llc_await_state_trans_9d = { .ev = llc_conn_ev_rx_rej_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = NONE, @@ -3096,7 +3096,7 @@ static const llc_conn_action_t llc_await_actions_10a[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_10a = { +static const struct llc_conn_state_trans llc_await_state_trans_10a = { .ev = llc_conn_ev_rx_rr_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = NONE, @@ -3112,7 +3112,7 @@ static const llc_conn_action_t llc_await_actions_10b[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_10b = { +static const struct llc_conn_state_trans llc_await_state_trans_10b = { .ev = llc_conn_ev_rx_rej_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = NONE, @@ -3128,7 +3128,7 @@ static const llc_conn_action_t llc_await_actions_11[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_11 = { +static const struct llc_conn_state_trans llc_await_state_trans_11 = { .ev = llc_conn_ev_rx_rnr_rsp_fbit_set_1, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = NONE, @@ -3143,7 +3143,7 @@ static const llc_conn_action_t llc_await_actions_12a[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_12a = { +static const struct llc_conn_state_trans llc_await_state_trans_12a = { .ev = llc_conn_ev_rx_rnr_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = NONE, @@ -3158,7 +3158,7 @@ static const llc_conn_action_t llc_await_actions_12b[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_12b = { +static const struct llc_conn_state_trans llc_await_state_trans_12b = { .ev = llc_conn_ev_rx_rnr_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = NONE, @@ -3174,7 +3174,7 @@ static const llc_conn_action_t llc_await_actions_13[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_13 = { +static const struct llc_conn_state_trans llc_await_state_trans_13 = { .ev = llc_conn_ev_rx_rnr_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = NONE, @@ -3194,7 +3194,7 @@ static const llc_conn_action_t llc_await_actions_14[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_14 = { +static const struct llc_conn_state_trans llc_await_state_trans_14 = { .ev = llc_conn_ev_p_tmr_exp, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = llc_await_ev_qfyrs_14, @@ -3205,7 +3205,7 @@ static struct llc_conn_state_trans llc_await_state_trans_14 = { * Array of pointers; * one to each transition */ -static struct llc_conn_state_trans *llc_await_state_transitions[] = { +static const struct llc_conn_state_trans *llc_await_state_transitions[] = { [0] = &llc_common_state_trans_1, /* Request */ [1] = &llc_common_state_trans_2, [2] = &llc_await_state_trans_1_0, @@ -3263,7 +3263,7 @@ static const llc_conn_ev_qfyr_t llc_await_busy_ev_qfyrs_1_0[] = { /* just one member, NULL, .bss zeroes it */ static const llc_conn_action_t llc_await_busy_actions_1_0[1]; -static struct llc_conn_state_trans llc_await_busy_state_trans_1_0 = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_1_0 = { .ev = llc_conn_ev_data_req, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = llc_await_busy_ev_qfyrs_1_0, @@ -3282,7 +3282,7 @@ static const llc_conn_action_t llc_await_busy_actions_1[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_1 = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_1 = { .ev = llc_conn_ev_local_busy_cleared, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = llc_await_busy_ev_qfyrs_1, @@ -3300,7 +3300,7 @@ static const llc_conn_action_t llc_await_busy_actions_2[] = { [1] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_2 = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_2 = { .ev = llc_conn_ev_local_busy_cleared, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = llc_await_busy_ev_qfyrs_2, @@ -3318,7 +3318,7 @@ static const llc_conn_action_t llc_await_busy_actions_3[] = { [1] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_3 = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_3 = { .ev = llc_conn_ev_local_busy_cleared, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = llc_await_busy_ev_qfyrs_3, @@ -3337,7 +3337,7 @@ static const llc_conn_action_t llc_await_busy_actions_4[] = { [7] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_4 = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_4 = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_1_unexpd_ns, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = NONE, @@ -3353,7 +3353,7 @@ static const llc_conn_action_t llc_await_busy_actions_5a[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_5a = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_5a = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_0_unexpd_ns, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = NONE, @@ -3369,7 +3369,7 @@ static const llc_conn_action_t llc_await_busy_actions_5b[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_5b = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_5b = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_0_unexpd_ns, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = NONE, @@ -3385,7 +3385,7 @@ static const llc_conn_action_t llc_await_busy_actions_6[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_6 = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_6 = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_1_unexpd_ns, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = NONE, @@ -3406,7 +3406,7 @@ static const llc_conn_action_t llc_await_busy_actions_7[] = { [9] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_7 = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_7 = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_1, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = NONE, @@ -3424,7 +3424,7 @@ static const llc_conn_action_t llc_await_busy_actions_8a[] = { [6] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_8a = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_8a = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = NONE, @@ -3442,7 +3442,7 @@ static const llc_conn_action_t llc_await_busy_actions_8b[] = { [6] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_8b = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_8b = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = NONE, @@ -3460,7 +3460,7 @@ static const llc_conn_action_t llc_await_busy_actions_9[] = { [6] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_9 = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_9 = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = NONE, @@ -3477,7 +3477,7 @@ static const llc_conn_action_t llc_await_busy_actions_10a[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_10a = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_10a = { .ev = llc_conn_ev_rx_rr_rsp_fbit_set_1, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = NONE, @@ -3494,7 +3494,7 @@ static const llc_conn_action_t llc_await_busy_actions_10b[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_10b = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_10b = { .ev = llc_conn_ev_rx_rej_rsp_fbit_set_1, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = NONE, @@ -3509,7 +3509,7 @@ static const llc_conn_action_t llc_await_busy_actions_11a[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_11a = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_11a = { .ev = llc_conn_ev_rx_rr_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = NONE, @@ -3524,7 +3524,7 @@ static const llc_conn_action_t llc_await_busy_actions_11b[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_11b = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_11b = { .ev = llc_conn_ev_rx_rr_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = NONE, @@ -3539,7 +3539,7 @@ static const llc_conn_action_t llc_await_busy_actions_11c[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_11c = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_11c = { .ev = llc_conn_ev_rx_rej_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = NONE, @@ -3554,7 +3554,7 @@ static const llc_conn_action_t llc_await_busy_actions_11d[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_11d = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_11d = { .ev = llc_conn_ev_rx_rej_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = NONE, @@ -3570,7 +3570,7 @@ static const llc_conn_action_t llc_await_busy_actions_12a[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_12a = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_12a = { .ev = llc_conn_ev_rx_rr_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = NONE, @@ -3586,7 +3586,7 @@ static const llc_conn_action_t llc_await_busy_actions_12b[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_12b = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_12b = { .ev = llc_conn_ev_rx_rej_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = NONE, @@ -3602,7 +3602,7 @@ static const llc_conn_action_t llc_await_busy_actions_13[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_13 = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_13 = { .ev = llc_conn_ev_rx_rnr_rsp_fbit_set_1, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = NONE, @@ -3617,7 +3617,7 @@ static const llc_conn_action_t llc_await_busy_actions_14a[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_14a = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_14a = { .ev = llc_conn_ev_rx_rnr_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = NONE, @@ -3632,7 +3632,7 @@ static const llc_conn_action_t llc_await_busy_actions_14b[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_14b = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_14b = { .ev = llc_conn_ev_rx_rnr_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = NONE, @@ -3648,7 +3648,7 @@ static const llc_conn_action_t llc_await_busy_actions_15[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_15 = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_15 = { .ev = llc_conn_ev_rx_rnr_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = NONE, @@ -3668,7 +3668,7 @@ static const llc_conn_action_t llc_await_busy_actions_16[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_16 = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_16 = { .ev = llc_conn_ev_p_tmr_exp, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = llc_await_busy_ev_qfyrs_16, @@ -3679,7 +3679,7 @@ static struct llc_conn_state_trans llc_await_busy_state_trans_16 = { * Array of pointers; * one to each transition */ -static struct llc_conn_state_trans *llc_await_busy_state_transitions[] = { +static const struct llc_conn_state_trans *llc_await_busy_state_transitions[] = { [0] = &llc_common_state_trans_1, /* Request */ [1] = &llc_common_state_trans_2, [2] = &llc_await_busy_state_trans_1_0, @@ -3739,7 +3739,7 @@ static const llc_conn_ev_qfyr_t llc_await_reject_ev_qfyrs_1_0[] = { /* just one member, NULL, .bss zeroes it */ static const llc_conn_action_t llc_await_reject_actions_1_0[1]; -static struct llc_conn_state_trans llc_await_reject_state_trans_1_0 = { +static const struct llc_conn_state_trans llc_await_reject_state_trans_1_0 = { .ev = llc_conn_ev_data_req, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = llc_await_reject_ev_qfyrs_1_0, @@ -3753,7 +3753,7 @@ static const llc_conn_action_t llc_await_rejct_actions_1[] = { [2] = NULL }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_1 = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_1 = { .ev = llc_conn_ev_local_busy_detected, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = NONE, @@ -3767,7 +3767,7 @@ static const llc_conn_action_t llc_await_rejct_actions_2a[] = { [2] = NULL }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_2a = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_2a = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_0_unexpd_ns, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = NONE, @@ -3781,7 +3781,7 @@ static const llc_conn_action_t llc_await_rejct_actions_2b[] = { [2] = NULL }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_2b = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_2b = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_0_unexpd_ns, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = NONE, @@ -3796,7 +3796,7 @@ static const llc_conn_action_t llc_await_rejct_actions_3[] = { [3] = NULL }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_3 = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_3 = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_1_unexpd_ns, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = NONE, @@ -3816,7 +3816,7 @@ static const llc_conn_action_t llc_await_rejct_actions_4[] = { [8] = NULL, }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_4 = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_4 = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_1, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = NONE, @@ -3834,7 +3834,7 @@ static const llc_conn_action_t llc_await_rejct_actions_5a[] = { [6] = NULL, }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_5a = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_5a = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = NONE, @@ -3852,7 +3852,7 @@ static const llc_conn_action_t llc_await_rejct_actions_5b[] = { [6] = NULL, }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_5b = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_5b = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = NONE, @@ -3870,7 +3870,7 @@ static const llc_conn_action_t llc_await_rejct_actions_6[] = { [6] = NULL, }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_6 = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_6 = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = NONE, @@ -3887,7 +3887,7 @@ static const llc_conn_action_t llc_await_rejct_actions_7a[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_7a = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_7a = { .ev = llc_conn_ev_rx_rr_rsp_fbit_set_1, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = NONE, @@ -3904,7 +3904,7 @@ static const llc_conn_action_t llc_await_rejct_actions_7b[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_7b = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_7b = { .ev = llc_conn_ev_rx_rej_rsp_fbit_set_1, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = NONE, @@ -3921,7 +3921,7 @@ static const llc_conn_action_t llc_await_rejct_actions_7c[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_7c = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_7c = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_1_unexpd_ns, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = NONE, @@ -3936,7 +3936,7 @@ static const llc_conn_action_t llc_await_rejct_actions_8a[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_8a = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_8a = { .ev = llc_conn_ev_rx_rr_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = NONE, @@ -3951,7 +3951,7 @@ static const llc_conn_action_t llc_await_rejct_actions_8b[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_8b = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_8b = { .ev = llc_conn_ev_rx_rr_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = NONE, @@ -3966,7 +3966,7 @@ static const llc_conn_action_t llc_await_rejct_actions_8c[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_8c = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_8c = { .ev = llc_conn_ev_rx_rej_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = NONE, @@ -3981,7 +3981,7 @@ static const llc_conn_action_t llc_await_rejct_actions_8d[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_8d = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_8d = { .ev = llc_conn_ev_rx_rej_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = NONE, @@ -3997,7 +3997,7 @@ static const llc_conn_action_t llc_await_rejct_actions_9a[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_9a = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_9a = { .ev = llc_conn_ev_rx_rr_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = NONE, @@ -4013,7 +4013,7 @@ static const llc_conn_action_t llc_await_rejct_actions_9b[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_9b = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_9b = { .ev = llc_conn_ev_rx_rej_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = NONE, @@ -4029,7 +4029,7 @@ static const llc_conn_action_t llc_await_rejct_actions_10[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_10 = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_10 = { .ev = llc_conn_ev_rx_rnr_rsp_fbit_set_1, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = NONE, @@ -4044,7 +4044,7 @@ static const llc_conn_action_t llc_await_rejct_actions_11a[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_11a = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_11a = { .ev = llc_conn_ev_rx_rnr_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = NONE, @@ -4059,7 +4059,7 @@ static const llc_conn_action_t llc_await_rejct_actions_11b[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_11b = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_11b = { .ev = llc_conn_ev_rx_rnr_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = NONE, @@ -4075,7 +4075,7 @@ static const llc_conn_action_t llc_await_rejct_actions_12[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_12 = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_12 = { .ev = llc_conn_ev_rx_rnr_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = NONE, @@ -4095,7 +4095,7 @@ static const llc_conn_action_t llc_await_rejct_actions_13[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_13 = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_13 = { .ev = llc_conn_ev_p_tmr_exp, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = llc_await_rejct_ev_qfyrs_13, @@ -4106,7 +4106,7 @@ static struct llc_conn_state_trans llc_await_rejct_state_trans_13 = { * Array of pointers; * one to each transition */ -static struct llc_conn_state_trans *llc_await_rejct_state_transitions[] = { +static const struct llc_conn_state_trans *llc_await_rejct_state_transitions[] = { [0] = &llc_await_reject_state_trans_1_0, [1] = &llc_common_state_trans_1, /* requests */ [2] = &llc_common_state_trans_2, @@ -4171,7 +4171,7 @@ static const llc_conn_action_t llc_d_conn_actions_1[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_d_conn_state_trans_1 = { +static const struct llc_conn_state_trans llc_d_conn_state_trans_1 = { .ev = llc_conn_ev_rx_sabme_cmd_pbit_set_x, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = llc_d_conn_ev_qfyrs_1, @@ -4194,7 +4194,7 @@ static const llc_conn_action_t llc_d_conn_actions_1_1[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_d_conn_state_trans_1_1 = { +static const struct llc_conn_state_trans llc_d_conn_state_trans_1_1 = { .ev = llc_conn_ev_rx_sabme_cmd_pbit_set_x, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = llc_d_conn_ev_qfyrs_1_1, @@ -4218,7 +4218,7 @@ static const llc_conn_action_t llc_d_conn_actions_2[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_d_conn_state_trans_2 = { +static const struct llc_conn_state_trans llc_d_conn_state_trans_2 = { .ev = llc_conn_ev_rx_ua_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = llc_d_conn_ev_qfyrs_2, @@ -4241,7 +4241,7 @@ static const llc_conn_action_t llc_d_conn_actions_2_1[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_d_conn_state_trans_2_1 = { +static const struct llc_conn_state_trans llc_d_conn_state_trans_2_1 = { .ev = llc_conn_ev_rx_ua_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = llc_d_conn_ev_qfyrs_2_1, @@ -4254,7 +4254,7 @@ static const llc_conn_action_t llc_d_conn_actions_3[] = { [1] = NULL, }; -static struct llc_conn_state_trans llc_d_conn_state_trans_3 = { +static const struct llc_conn_state_trans llc_d_conn_state_trans_3 = { .ev = llc_conn_ev_rx_disc_cmd_pbit_set_x, .next_state = LLC_CONN_STATE_D_CONN, .ev_qualifiers = NONE, @@ -4277,7 +4277,7 @@ static const llc_conn_action_t llc_d_conn_actions_4[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_d_conn_state_trans_4 = { +static const struct llc_conn_state_trans llc_d_conn_state_trans_4 = { .ev = llc_conn_ev_rx_dm_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = llc_d_conn_ev_qfyrs_4, @@ -4299,7 +4299,7 @@ static const llc_conn_action_t llc_d_conn_actions_4_1[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_d_conn_state_trans_4_1 = { +static const struct llc_conn_state_trans llc_d_conn_state_trans_4_1 = { .ev = llc_conn_ev_rx_dm_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = llc_d_conn_ev_qfyrs_4_1, @@ -4318,7 +4318,7 @@ static const llc_conn_ev_qfyr_t llc_d_conn_ev_qfyrs_5[] = { /* just one member, NULL, .bss zeroes it */ static const llc_conn_action_t llc_d_conn_actions_5[1]; -static struct llc_conn_state_trans llc_d_conn_state_trans_5 = { +static const struct llc_conn_state_trans llc_d_conn_state_trans_5 = { .ev = llc_conn_ev_data_req, .next_state = LLC_CONN_STATE_D_CONN, .ev_qualifiers = llc_d_conn_ev_qfyrs_5, @@ -4338,7 +4338,7 @@ static const llc_conn_action_t llc_d_conn_actions_6[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_d_conn_state_trans_6 = { +static const struct llc_conn_state_trans llc_d_conn_state_trans_6 = { .ev = llc_conn_ev_ack_tmr_exp, .next_state = LLC_CONN_STATE_D_CONN, .ev_qualifiers = llc_d_conn_ev_qfyrs_6, @@ -4359,7 +4359,7 @@ static const llc_conn_action_t llc_d_conn_actions_7[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_d_conn_state_trans_7 = { +static const struct llc_conn_state_trans llc_d_conn_state_trans_7 = { .ev = llc_conn_ev_ack_tmr_exp, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = llc_d_conn_ev_qfyrs_7, @@ -4379,7 +4379,7 @@ static const llc_conn_action_t llc_d_conn_actions_8[] = { [1] = NULL, }; -static struct llc_conn_state_trans llc_d_conn_state_trans_8 = { +static const struct llc_conn_state_trans llc_d_conn_state_trans_8 = { .ev = llc_conn_ev_ack_tmr_exp, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = llc_d_conn_ev_qfyrs_8, @@ -4390,7 +4390,7 @@ static struct llc_conn_state_trans llc_d_conn_state_trans_8 = { * Array of pointers; * one to each transition */ -static struct llc_conn_state_trans *llc_d_conn_state_transitions[] = { +static const struct llc_conn_state_trans *llc_d_conn_state_transitions[] = { [0] = &llc_d_conn_state_trans_5, /* Request */ [1] = &llc_common_state_trans_end, [2] = &llc_common_state_trans_end, /* Local busy */ @@ -4419,7 +4419,7 @@ static const llc_conn_action_t llc_rst_actions_1[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_rst_state_trans_1 = { +static const struct llc_conn_state_trans llc_rst_state_trans_1 = { .ev = llc_conn_ev_rx_sabme_cmd_pbit_set_x, .next_state = LLC_CONN_STATE_RESET, .ev_qualifiers = NONE, @@ -4447,7 +4447,7 @@ static const llc_conn_action_t llc_rst_actions_2[] = { [7] = NULL, }; -static struct llc_conn_state_trans llc_rst_state_trans_2 = { +static const struct llc_conn_state_trans llc_rst_state_trans_2 = { .ev = llc_conn_ev_rx_ua_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_rst_ev_qfyrs_2, @@ -4475,7 +4475,7 @@ static const llc_conn_action_t llc_rst_actions_2_1[] = { [7] = NULL, }; -static struct llc_conn_state_trans llc_rst_state_trans_2_1 = { +static const struct llc_conn_state_trans llc_rst_state_trans_2_1 = { .ev = llc_conn_ev_rx_ua_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_rst_ev_qfyrs_2_1, @@ -4495,7 +4495,7 @@ static const llc_conn_action_t llc_rst_actions_3[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_rst_state_trans_3 = { +static const struct llc_conn_state_trans llc_rst_state_trans_3 = { .ev = llc_conn_ev_ack_tmr_exp, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_rst_ev_qfyrs_3, @@ -4518,7 +4518,7 @@ static const llc_conn_action_t llc_rst_actions_4[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_rst_state_trans_4 = { +static const struct llc_conn_state_trans llc_rst_state_trans_4 = { .ev = llc_conn_ev_rx_disc_cmd_pbit_set_x, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = llc_rst_ev_qfyrs_4, @@ -4541,7 +4541,7 @@ static const llc_conn_action_t llc_rst_actions_4_1[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_rst_state_trans_4_1 = { +static const struct llc_conn_state_trans llc_rst_state_trans_4_1 = { .ev = llc_conn_ev_rx_disc_cmd_pbit_set_x, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = llc_rst_ev_qfyrs_4_1, @@ -4564,7 +4564,7 @@ static const llc_conn_action_t llc_rst_actions_5[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_rst_state_trans_5 = { +static const struct llc_conn_state_trans llc_rst_state_trans_5 = { .ev = llc_conn_ev_rx_dm_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = llc_rst_ev_qfyrs_5, @@ -4586,7 +4586,7 @@ static const llc_conn_action_t llc_rst_actions_5_1[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_rst_state_trans_5_1 = { +static const struct llc_conn_state_trans llc_rst_state_trans_5_1 = { .ev = llc_conn_ev_rx_dm_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = llc_rst_ev_qfyrs_5_1, @@ -4602,7 +4602,7 @@ static const llc_conn_ev_qfyr_t llc_rst_ev_qfyrs_6[] = { /* just one member, NULL, .bss zeroes it */ static const llc_conn_action_t llc_rst_actions_6[1]; -static struct llc_conn_state_trans llc_rst_state_trans_6 = { +static const struct llc_conn_state_trans llc_rst_state_trans_6 = { .ev = llc_conn_ev_data_req, .next_state = LLC_CONN_STATE_RESET, .ev_qualifiers = llc_rst_ev_qfyrs_6, @@ -4623,7 +4623,7 @@ static const llc_conn_action_t llc_rst_actions_7[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_rst_state_trans_7 = { +static const struct llc_conn_state_trans llc_rst_state_trans_7 = { .ev = llc_conn_ev_ack_tmr_exp, .next_state = LLC_CONN_STATE_RESET, .ev_qualifiers = llc_rst_ev_qfyrs_7, @@ -4644,7 +4644,7 @@ static const llc_conn_action_t llc_rst_actions_8[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_rst_state_trans_8 = { +static const struct llc_conn_state_trans llc_rst_state_trans_8 = { .ev = llc_conn_ev_ack_tmr_exp, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = llc_rst_ev_qfyrs_8, @@ -4665,7 +4665,7 @@ static const llc_conn_action_t llc_rst_actions_8_1[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_rst_state_trans_8_1 = { +static const struct llc_conn_state_trans llc_rst_state_trans_8_1 = { .ev = llc_conn_ev_ack_tmr_exp, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = llc_rst_ev_qfyrs_8_1, @@ -4676,7 +4676,7 @@ static struct llc_conn_state_trans llc_rst_state_trans_8_1 = { * Array of pointers; * one to each transition */ -static struct llc_conn_state_trans *llc_rst_state_transitions[] = { +static const struct llc_conn_state_trans *llc_rst_state_transitions[] = { [0] = &llc_rst_state_trans_6, /* Request */ [1] = &llc_common_state_trans_end, [2] = &llc_common_state_trans_end, /* Local busy */ @@ -4710,7 +4710,7 @@ static const llc_conn_action_t llc_error_actions_1[] = { [8] = NULL, }; -static struct llc_conn_state_trans llc_error_state_trans_1 = { +static const struct llc_conn_state_trans llc_error_state_trans_1 = { .ev = llc_conn_ev_rx_sabme_cmd_pbit_set_x, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = NONE, @@ -4726,7 +4726,7 @@ static const llc_conn_action_t llc_error_actions_2[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_error_state_trans_2 = { +static const struct llc_conn_state_trans llc_error_state_trans_2 = { .ev = llc_conn_ev_rx_disc_cmd_pbit_set_x, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = NONE, @@ -4741,7 +4741,7 @@ static const llc_conn_action_t llc_error_actions_3[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_error_state_trans_3 = { +static const struct llc_conn_state_trans llc_error_state_trans_3 = { .ev = llc_conn_ev_rx_dm_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = NONE, @@ -4757,7 +4757,7 @@ static const llc_conn_action_t llc_error_actions_4[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_error_state_trans_4 = { +static const struct llc_conn_state_trans llc_error_state_trans_4 = { .ev = llc_conn_ev_rx_frmr_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_RESET, .ev_qualifiers = NONE, @@ -4770,7 +4770,7 @@ static const llc_conn_action_t llc_error_actions_5[] = { [1] = NULL, }; -static struct llc_conn_state_trans llc_error_state_trans_5 = { +static const struct llc_conn_state_trans llc_error_state_trans_5 = { .ev = llc_conn_ev_rx_xxx_cmd_pbit_set_x, .next_state = LLC_CONN_STATE_ERROR, .ev_qualifiers = NONE, @@ -4778,7 +4778,7 @@ static struct llc_conn_state_trans llc_error_state_trans_5 = { }; /* State transitions for LLC_CONN_EV_RX_XXX_RSP_Fbit_SET_X event */ -static struct llc_conn_state_trans llc_error_state_trans_6 = { +static const struct llc_conn_state_trans llc_error_state_trans_6 = { .ev = llc_conn_ev_rx_xxx_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_ERROR, .ev_qualifiers = NONE, @@ -4798,7 +4798,7 @@ static const llc_conn_action_t llc_error_actions_7[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_error_state_trans_7 = { +static const struct llc_conn_state_trans llc_error_state_trans_7 = { .ev = llc_conn_ev_ack_tmr_exp, .next_state = LLC_CONN_STATE_ERROR, .ev_qualifiers = llc_error_ev_qfyrs_7, @@ -4820,7 +4820,7 @@ static const llc_conn_action_t llc_error_actions_8[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_error_state_trans_8 = { +static const struct llc_conn_state_trans llc_error_state_trans_8 = { .ev = llc_conn_ev_ack_tmr_exp, .next_state = LLC_CONN_STATE_RESET, .ev_qualifiers = llc_error_ev_qfyrs_8, @@ -4836,7 +4836,7 @@ static const llc_conn_ev_qfyr_t llc_error_ev_qfyrs_9[] = { /* just one member, NULL, .bss zeroes it */ static const llc_conn_action_t llc_error_actions_9[1]; -static struct llc_conn_state_trans llc_error_state_trans_9 = { +static const struct llc_conn_state_trans llc_error_state_trans_9 = { .ev = llc_conn_ev_data_req, .next_state = LLC_CONN_STATE_ERROR, .ev_qualifiers = llc_error_ev_qfyrs_9, @@ -4847,7 +4847,7 @@ static struct llc_conn_state_trans llc_error_state_trans_9 = { * Array of pointers; * one to each transition */ -static struct llc_conn_state_trans *llc_error_state_transitions[] = { +static const struct llc_conn_state_trans *llc_error_state_transitions[] = { [0] = &llc_error_state_trans_9, /* Request */ [1] = &llc_common_state_trans_end, [2] = &llc_common_state_trans_end, /* Local busy */ @@ -4873,7 +4873,7 @@ static const llc_conn_action_t llc_temp_actions_1[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_temp_state_trans_1 = { +static const struct llc_conn_state_trans llc_temp_state_trans_1 = { .ev = llc_conn_ev_disc_req, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = NONE, @@ -4884,7 +4884,7 @@ static struct llc_conn_state_trans llc_temp_state_trans_1 = { * Array of pointers; * one to each transition */ -static struct llc_conn_state_trans *llc_temp_state_transitions[] = { +static const struct llc_conn_state_trans *llc_temp_state_transitions[] = { [0] = &llc_temp_state_trans_1, /* requests */ [1] = &llc_common_state_trans_end, [2] = &llc_common_state_trans_end, /* local busy */ diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 0a3f5e0bec00..afc6974eafda 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -34,10 +34,10 @@ static int llc_find_offset(int state, int ev_type); static void llc_conn_send_pdus(struct sock *sk); static int llc_conn_service(struct sock *sk, struct sk_buff *skb); static int llc_exec_conn_trans_actions(struct sock *sk, - struct llc_conn_state_trans *trans, + const struct llc_conn_state_trans *trans, struct sk_buff *ev); -static struct llc_conn_state_trans *llc_qualify_conn_ev(struct sock *sk, - struct sk_buff *skb); +static const struct llc_conn_state_trans *llc_qualify_conn_ev(struct sock *sk, + struct sk_buff *skb); /* Offset table on connection states transition diagram */ static int llc_offset_table[NBR_CONN_STATES][NBR_CONN_EV]; @@ -356,9 +356,9 @@ static void llc_conn_send_pdus(struct sock *sk) */ static int llc_conn_service(struct sock *sk, struct sk_buff *skb) { - int rc = 1; + const struct llc_conn_state_trans *trans; struct llc_sock *llc = llc_sk(sk); - struct llc_conn_state_trans *trans; + int rc = 1; if (llc->state > NBR_CONN_STATES) goto out; @@ -384,10 +384,10 @@ out: * This function finds transition that matches with happened event. * Returns pointer to found transition on success, %NULL otherwise. */ -static struct llc_conn_state_trans *llc_qualify_conn_ev(struct sock *sk, - struct sk_buff *skb) +static const struct llc_conn_state_trans *llc_qualify_conn_ev(struct sock *sk, + struct sk_buff *skb) { - struct llc_conn_state_trans **next_trans; + const struct llc_conn_state_trans **next_trans; const llc_conn_ev_qfyr_t *next_qualifier; struct llc_conn_state_ev *ev = llc_conn_ev(skb); struct llc_sock *llc = llc_sk(sk); @@ -432,7 +432,7 @@ static struct llc_conn_state_trans *llc_qualify_conn_ev(struct sock *sk, * success, 1 to indicate failure of at least one action. */ static int llc_exec_conn_trans_actions(struct sock *sk, - struct llc_conn_state_trans *trans, + const struct llc_conn_state_trans *trans, struct sk_buff *skb) { int rc = 0; @@ -635,8 +635,8 @@ u8 llc_data_accept_state(u8 state) */ static u16 __init llc_find_next_offset(struct llc_conn_state *state, u16 offset) { + const struct llc_conn_state_trans **next_trans; u16 cnt = 0; - struct llc_conn_state_trans **next_trans; for (next_trans = state->transitions + offset; (*next_trans)->ev; next_trans++) -- cgit v1.2.3-58-ga151 From 0970bf676f86c2c4d9bf7e672f5504d390c9fce6 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 14 Jul 2024 18:15:20 +0200 Subject: llc: Constify struct llc_sap_state_trans 'struct llc_sap_state_trans' are not modified in this driver. Constifying this structure moves some data to a read-only section, so increase overall security. On a x86_64, with allmodconfig, as an example: Before: ====== text data bss dec hex filename 339 456 24 819 333 net/llc/llc_s_st.o After: ===== text data bss dec hex filename 683 144 0 827 33b net/llc/llc_s_st.o Signed-off-by: Christophe JAILLET Reviewed-by: Simon Horman Link: https://patch.msgid.link/9d17587639195ee94b74ff06a11ef97d1833ee52.1720973710.git.christophe.jaillet@wanadoo.fr Signed-off-by: Jakub Kicinski --- include/net/llc_s_st.h | 4 ++-- net/llc/llc_s_st.c | 26 +++++++++++++------------- net/llc/llc_sap.c | 12 ++++++------ 3 files changed, 21 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/include/net/llc_s_st.h b/include/net/llc_s_st.h index ed5b2fa40d32..fca49d483d20 100644 --- a/include/net/llc_s_st.h +++ b/include/net/llc_s_st.h @@ -29,8 +29,8 @@ struct llc_sap_state_trans { }; struct llc_sap_state { - u8 curr_state; - struct llc_sap_state_trans **transitions; + u8 curr_state; + const struct llc_sap_state_trans **transitions; }; /* only access to SAP state table */ diff --git a/net/llc/llc_s_st.c b/net/llc/llc_s_st.c index 308c616883a4..acccc827c562 100644 --- a/net/llc/llc_s_st.c +++ b/net/llc/llc_s_st.c @@ -24,7 +24,7 @@ * last entry for this state * all members are zeros, .bss zeroes it */ -static struct llc_sap_state_trans llc_sap_state_trans_end; +static const struct llc_sap_state_trans llc_sap_state_trans_end; /* state LLC_SAP_STATE_INACTIVE transition for * LLC_SAP_EV_ACTIVATION_REQ event @@ -34,14 +34,14 @@ static const llc_sap_action_t llc_sap_inactive_state_actions_1[] = { [1] = NULL, }; -static struct llc_sap_state_trans llc_sap_inactive_state_trans_1 = { +static const struct llc_sap_state_trans llc_sap_inactive_state_trans_1 = { .ev = llc_sap_ev_activation_req, .next_state = LLC_SAP_STATE_ACTIVE, .ev_actions = llc_sap_inactive_state_actions_1, }; /* array of pointers; one to each transition */ -static struct llc_sap_state_trans *llc_sap_inactive_state_transitions[] = { +static const struct llc_sap_state_trans *llc_sap_inactive_state_transitions[] = { [0] = &llc_sap_inactive_state_trans_1, [1] = &llc_sap_state_trans_end, }; @@ -52,7 +52,7 @@ static const llc_sap_action_t llc_sap_active_state_actions_1[] = { [1] = NULL, }; -static struct llc_sap_state_trans llc_sap_active_state_trans_1 = { +static const struct llc_sap_state_trans llc_sap_active_state_trans_1 = { .ev = llc_sap_ev_rx_ui, .next_state = LLC_SAP_STATE_ACTIVE, .ev_actions = llc_sap_active_state_actions_1, @@ -64,7 +64,7 @@ static const llc_sap_action_t llc_sap_active_state_actions_2[] = { [1] = NULL, }; -static struct llc_sap_state_trans llc_sap_active_state_trans_2 = { +static const struct llc_sap_state_trans llc_sap_active_state_trans_2 = { .ev = llc_sap_ev_unitdata_req, .next_state = LLC_SAP_STATE_ACTIVE, .ev_actions = llc_sap_active_state_actions_2, @@ -76,7 +76,7 @@ static const llc_sap_action_t llc_sap_active_state_actions_3[] = { [1] = NULL, }; -static struct llc_sap_state_trans llc_sap_active_state_trans_3 = { +static const struct llc_sap_state_trans llc_sap_active_state_trans_3 = { .ev = llc_sap_ev_xid_req, .next_state = LLC_SAP_STATE_ACTIVE, .ev_actions = llc_sap_active_state_actions_3, @@ -88,7 +88,7 @@ static const llc_sap_action_t llc_sap_active_state_actions_4[] = { [1] = NULL, }; -static struct llc_sap_state_trans llc_sap_active_state_trans_4 = { +static const struct llc_sap_state_trans llc_sap_active_state_trans_4 = { .ev = llc_sap_ev_rx_xid_c, .next_state = LLC_SAP_STATE_ACTIVE, .ev_actions = llc_sap_active_state_actions_4, @@ -100,7 +100,7 @@ static const llc_sap_action_t llc_sap_active_state_actions_5[] = { [1] = NULL, }; -static struct llc_sap_state_trans llc_sap_active_state_trans_5 = { +static const struct llc_sap_state_trans llc_sap_active_state_trans_5 = { .ev = llc_sap_ev_rx_xid_r, .next_state = LLC_SAP_STATE_ACTIVE, .ev_actions = llc_sap_active_state_actions_5, @@ -112,7 +112,7 @@ static const llc_sap_action_t llc_sap_active_state_actions_6[] = { [1] = NULL, }; -static struct llc_sap_state_trans llc_sap_active_state_trans_6 = { +static const struct llc_sap_state_trans llc_sap_active_state_trans_6 = { .ev = llc_sap_ev_test_req, .next_state = LLC_SAP_STATE_ACTIVE, .ev_actions = llc_sap_active_state_actions_6, @@ -124,7 +124,7 @@ static const llc_sap_action_t llc_sap_active_state_actions_7[] = { [1] = NULL, }; -static struct llc_sap_state_trans llc_sap_active_state_trans_7 = { +static const struct llc_sap_state_trans llc_sap_active_state_trans_7 = { .ev = llc_sap_ev_rx_test_c, .next_state = LLC_SAP_STATE_ACTIVE, .ev_actions = llc_sap_active_state_actions_7 @@ -136,7 +136,7 @@ static const llc_sap_action_t llc_sap_active_state_actions_8[] = { [1] = NULL, }; -static struct llc_sap_state_trans llc_sap_active_state_trans_8 = { +static const struct llc_sap_state_trans llc_sap_active_state_trans_8 = { .ev = llc_sap_ev_rx_test_r, .next_state = LLC_SAP_STATE_ACTIVE, .ev_actions = llc_sap_active_state_actions_8, @@ -150,14 +150,14 @@ static const llc_sap_action_t llc_sap_active_state_actions_9[] = { [1] = NULL, }; -static struct llc_sap_state_trans llc_sap_active_state_trans_9 = { +static const struct llc_sap_state_trans llc_sap_active_state_trans_9 = { .ev = llc_sap_ev_deactivation_req, .next_state = LLC_SAP_STATE_INACTIVE, .ev_actions = llc_sap_active_state_actions_9 }; /* array of pointers; one to each transition */ -static struct llc_sap_state_trans *llc_sap_active_state_transitions[] = { +static const struct llc_sap_state_trans *llc_sap_active_state_transitions[] = { [0] = &llc_sap_active_state_trans_2, [1] = &llc_sap_active_state_trans_1, [2] = &llc_sap_active_state_trans_3, diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index 116c0e479183..6cd03c2ae7d5 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -114,12 +114,12 @@ void llc_sap_rtn_pdu(struct llc_sap *sap, struct sk_buff *skb) * Returns the pointer to found transition on success or %NULL for * failure. */ -static struct llc_sap_state_trans *llc_find_sap_trans(struct llc_sap *sap, - struct sk_buff *skb) +static const struct llc_sap_state_trans *llc_find_sap_trans(struct llc_sap *sap, + struct sk_buff *skb) { int i = 0; - struct llc_sap_state_trans *rc = NULL; - struct llc_sap_state_trans **next_trans; + const struct llc_sap_state_trans *rc = NULL; + const struct llc_sap_state_trans **next_trans; struct llc_sap_state *curr_state = &llc_sap_state_table[sap->state - 1]; /* * Search thru events for this state until list exhausted or until @@ -143,7 +143,7 @@ static struct llc_sap_state_trans *llc_find_sap_trans(struct llc_sap *sap, * Returns 0 for success and 1 for failure of at least one action. */ static int llc_exec_sap_trans_actions(struct llc_sap *sap, - struct llc_sap_state_trans *trans, + const struct llc_sap_state_trans *trans, struct sk_buff *skb) { int rc = 0; @@ -166,8 +166,8 @@ static int llc_exec_sap_trans_actions(struct llc_sap *sap, */ static int llc_sap_next_state(struct llc_sap *sap, struct sk_buff *skb) { + const struct llc_sap_state_trans *trans; int rc = 1; - struct llc_sap_state_trans *trans; if (sap->state > LLC_NR_SAP_STATES) goto out; -- cgit v1.2.3-58-ga151 From fcb4bb07a9276bfb63c82ce5e8c4f8ff23646635 Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Sat, 13 Jul 2024 02:19:01 +0000 Subject: net/sched: cls_flower: prepare fl_{set,dump}_key_flags() for ENC_FLAGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prepare fl_set_key_flags/fl_dump_key_flags() for use with TCA_FLOWER_KEY_ENC_FLAGS{,_MASK}. This patch adds an encap argument, similar to fl_set_key_ip/ fl_dump_key_ip(), and determine the flower keys based on the encap argument, and use them in the rest of the two functions. Since these functions are so far, only called with encap set false, then there is no functional change. Signed-off-by: Asbjørn Sloth Tønnesen Tested-by: Davide Caratti Reviewed-by: Davide Caratti Link: https://patch.msgid.link/20240713021911.1631517-5-ast@fiberby.net Signed-off-by: Jakub Kicinski --- net/sched/cls_flower.c | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index eef570c577ac..6a5cecfd9561 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1166,19 +1166,28 @@ static void fl_set_key_flag(u32 flower_key, u32 flower_mask, } } -static int fl_set_key_flags(struct nlattr **tb, u32 *flags_key, +static int fl_set_key_flags(struct nlattr **tb, bool encap, u32 *flags_key, u32 *flags_mask, struct netlink_ext_ack *extack) { + int fl_key, fl_mask; u32 key, mask; + if (encap) { + fl_key = TCA_FLOWER_KEY_ENC_FLAGS; + fl_mask = TCA_FLOWER_KEY_ENC_FLAGS_MASK; + } else { + fl_key = TCA_FLOWER_KEY_FLAGS; + fl_mask = TCA_FLOWER_KEY_FLAGS_MASK; + } + /* mask is mandatory for flags */ - if (!tb[TCA_FLOWER_KEY_FLAGS_MASK]) { + if (NL_REQ_ATTR_CHECK(extack, NULL, tb, fl_mask)) { NL_SET_ERR_MSG(extack, "Missing flags mask"); return -EINVAL; } - key = be32_to_cpu(nla_get_be32(tb[TCA_FLOWER_KEY_FLAGS])); - mask = be32_to_cpu(nla_get_be32(tb[TCA_FLOWER_KEY_FLAGS_MASK])); + key = be32_to_cpu(nla_get_be32(tb[fl_key])); + mask = be32_to_cpu(nla_get_be32(tb[fl_mask])); *flags_key = 0; *flags_mask = 0; @@ -2086,7 +2095,7 @@ static int fl_set_key(struct net *net, struct nlattr **tb, return ret; if (tb[TCA_FLOWER_KEY_FLAGS]) { - ret = fl_set_key_flags(tb, &key->control.flags, + ret = fl_set_key_flags(tb, false, &key->control.flags, &mask->control.flags, extack); if (ret) return ret; @@ -3084,12 +3093,22 @@ static void fl_get_key_flag(u32 dissector_key, u32 dissector_mask, } } -static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask) +static int fl_dump_key_flags(struct sk_buff *skb, bool encap, + u32 flags_key, u32 flags_mask) { - u32 key, mask; + int fl_key, fl_mask; __be32 _key, _mask; + u32 key, mask; int err; + if (encap) { + fl_key = TCA_FLOWER_KEY_ENC_FLAGS; + fl_mask = TCA_FLOWER_KEY_ENC_FLAGS_MASK; + } else { + fl_key = TCA_FLOWER_KEY_FLAGS; + fl_mask = TCA_FLOWER_KEY_FLAGS_MASK; + } + if (!memchr_inv(&flags_mask, 0, sizeof(flags_mask))) return 0; @@ -3105,11 +3124,11 @@ static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask) _key = cpu_to_be32(key); _mask = cpu_to_be32(mask); - err = nla_put(skb, TCA_FLOWER_KEY_FLAGS, 4, &_key); + err = nla_put(skb, fl_key, 4, &_key); if (err) return err; - return nla_put(skb, TCA_FLOWER_KEY_FLAGS_MASK, 4, &_mask); + return nla_put(skb, fl_mask, 4, &_mask); } static int fl_dump_key_geneve_opt(struct sk_buff *skb, @@ -3632,7 +3651,8 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net, if (fl_dump_key_ct(skb, &key->ct, &mask->ct)) goto nla_put_failure; - if (fl_dump_key_flags(skb, key->control.flags, mask->control.flags)) + if (fl_dump_key_flags(skb, false, key->control.flags, + mask->control.flags)) goto nla_put_failure; if (fl_dump_key_val(skb, &key->hash.hash, TCA_FLOWER_KEY_HASH, -- cgit v1.2.3-58-ga151 From 0e83a7875d691f5b76c87d9223b22e5a0d34511e Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Sat, 13 Jul 2024 02:19:02 +0000 Subject: net/sched: cls_flower: add policy for TCA_FLOWER_KEY_FLAGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This policy guards fl_set_key_flags() from seeing flags not used in the context of TCA_FLOWER_KEY_FLAGS. In order For the policy check to be performed with the correct endianness, then we also needs to change the attribute type to NLA_BE32 (Thanks Davide). TCA_FLOWER_KEY_FLAGS{,_MASK} already has a be32 comment in include/uapi/linux/pkt_cls.h. Signed-off-by: Asbjørn Sloth Tønnesen Tested-by: Davide Caratti Reviewed-by: Davide Caratti Link: https://patch.msgid.link/20240713021911.1631517-6-ast@fiberby.net Signed-off-by: Jakub Kicinski --- net/sched/cls_flower.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 6a5cecfd9561..fc9a9a0b4897 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -41,6 +41,10 @@ #define TCA_FLOWER_KEY_CT_FLAGS_MASK \ (TCA_FLOWER_KEY_CT_FLAGS_MAX - 1) +#define TCA_FLOWER_KEY_FLAGS_POLICY_MASK \ + (TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT | \ + TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST) + #define TUNNEL_FLAGS_PRESENT (\ _BITUL(IP_TUNNEL_CSUM_BIT) | \ _BITUL(IP_TUNNEL_DONT_FRAGMENT_BIT) | \ @@ -676,8 +680,10 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_ENC_UDP_DST_PORT] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK] = { .type = NLA_U16 }, - [TCA_FLOWER_KEY_FLAGS] = { .type = NLA_U32 }, - [TCA_FLOWER_KEY_FLAGS_MASK] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_FLAGS] = NLA_POLICY_MASK(NLA_BE32, + TCA_FLOWER_KEY_FLAGS_POLICY_MASK), + [TCA_FLOWER_KEY_FLAGS_MASK] = NLA_POLICY_MASK(NLA_BE32, + TCA_FLOWER_KEY_FLAGS_POLICY_MASK), [TCA_FLOWER_KEY_ICMPV4_TYPE] = { .type = NLA_U8 }, [TCA_FLOWER_KEY_ICMPV4_TYPE_MASK] = { .type = NLA_U8 }, [TCA_FLOWER_KEY_ICMPV4_CODE] = { .type = NLA_U8 }, -- cgit v1.2.3-58-ga151 From 4d0aed380f9ddf24dfb1d06a05096b778442c403 Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Sat, 13 Jul 2024 02:19:03 +0000 Subject: flow_dissector: prepare for encapsulated control flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename skb_flow_dissect_set_enc_addr_type() to skb_flow_dissect_set_enc_control(), and make it set both addr_type and flags in FLOW_DISSECTOR_KEY_ENC_CONTROL. Signed-off-by: Asbjørn Sloth Tønnesen Tested-by: Davide Caratti Reviewed-by: Davide Caratti Link: https://patch.msgid.link/20240713021911.1631517-7-ast@fiberby.net Signed-off-by: Jakub Kicinski --- net/core/flow_dissector.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index e64a26379807..1614c6708ea7 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -299,9 +299,10 @@ void skb_flow_dissect_meta(const struct sk_buff *skb, EXPORT_SYMBOL(skb_flow_dissect_meta); static void -skb_flow_dissect_set_enc_addr_type(enum flow_dissector_key_id type, - struct flow_dissector *flow_dissector, - void *target_container) +skb_flow_dissect_set_enc_control(enum flow_dissector_key_id type, + u32 ctrl_flags, + struct flow_dissector *flow_dissector, + void *target_container) { struct flow_dissector_key_control *ctrl; @@ -312,6 +313,7 @@ skb_flow_dissect_set_enc_addr_type(enum flow_dissector_key_id type, FLOW_DISSECTOR_KEY_ENC_CONTROL, target_container); ctrl->addr_type = type; + ctrl->flags = ctrl_flags; } void @@ -367,6 +369,7 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb, { struct ip_tunnel_info *info; struct ip_tunnel_key *key; + u32 ctrl_flags = 0; /* A quick check to see if there might be something to do. */ if (!dissector_uses_key(flow_dissector, @@ -395,9 +398,9 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb, switch (ip_tunnel_info_af(info)) { case AF_INET: - skb_flow_dissect_set_enc_addr_type(FLOW_DISSECTOR_KEY_IPV4_ADDRS, - flow_dissector, - target_container); + skb_flow_dissect_set_enc_control(FLOW_DISSECTOR_KEY_IPV4_ADDRS, + ctrl_flags, flow_dissector, + target_container); if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { struct flow_dissector_key_ipv4_addrs *ipv4; @@ -410,9 +413,9 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb, } break; case AF_INET6: - skb_flow_dissect_set_enc_addr_type(FLOW_DISSECTOR_KEY_IPV6_ADDRS, - flow_dissector, - target_container); + skb_flow_dissect_set_enc_control(FLOW_DISSECTOR_KEY_IPV6_ADDRS, + ctrl_flags, flow_dissector, + target_container); if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) { struct flow_dissector_key_ipv6_addrs *ipv6; -- cgit v1.2.3-58-ga151 From 03afeb613bfe6b0c28e8b843959f716a3d2c42df Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Sat, 13 Jul 2024 02:19:04 +0000 Subject: flow_dissector: set encapsulated control flags from tun_flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set the new FLOW_DIS_F_TUNNEL_* encapsulated control flags, based on if their counter-part is set in tun_flags. These flags are not userspace visible yet, as the code to dump encapsulated control flags will first be added, and later activated in the following patches. Signed-off-by: Asbjørn Sloth Tønnesen Tested-by: Davide Caratti Reviewed-by: Davide Caratti Link: https://patch.msgid.link/20240713021911.1631517-8-ast@fiberby.net Signed-off-by: Jakub Kicinski --- net/core/flow_dissector.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'net') diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 1614c6708ea7..a0263a4c5489 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -396,6 +396,15 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb, key = &info->key; + if (test_bit(IP_TUNNEL_CSUM_BIT, key->tun_flags)) + ctrl_flags |= FLOW_DIS_F_TUNNEL_CSUM; + if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags)) + ctrl_flags |= FLOW_DIS_F_TUNNEL_DONT_FRAGMENT; + if (test_bit(IP_TUNNEL_OAM_BIT, key->tun_flags)) + ctrl_flags |= FLOW_DIS_F_TUNNEL_OAM; + if (test_bit(IP_TUNNEL_CRIT_OPT_BIT, key->tun_flags)) + ctrl_flags |= FLOW_DIS_F_TUNNEL_CRIT_OPT; + switch (ip_tunnel_info_af(info)) { case AF_INET: skb_flow_dissect_set_enc_control(FLOW_DISSECTOR_KEY_IPV4_ADDRS, -- cgit v1.2.3-58-ga151 From 988f8723d398cf122c112342d12d67ba31cadf3b Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Sat, 13 Jul 2024 02:19:05 +0000 Subject: net/sched: cls_flower: add tunnel flags to fl_{set,dump}_key_flags() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prepare to set and dump the tunnel flags. This code won't see any of these flags yet, as these flags aren't allowed by the NLA_POLICY_MASK, and the functions doesn't get called with encap set to true yet. Signed-off-by: Asbjørn Sloth Tønnesen Tested-by: Davide Caratti Reviewed-by: Davide Caratti Link: https://patch.msgid.link/20240713021911.1631517-9-ast@fiberby.net Signed-off-by: Jakub Kicinski --- net/sched/cls_flower.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'net') diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index fc9a9a0b4897..2a440f11fe1f 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1204,6 +1204,21 @@ static int fl_set_key_flags(struct nlattr **tb, bool encap, u32 *flags_key, TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST, FLOW_DIS_FIRST_FRAG); + fl_set_key_flag(key, mask, flags_key, flags_mask, + TCA_FLOWER_KEY_FLAGS_TUNNEL_CSUM, + FLOW_DIS_F_TUNNEL_CSUM); + + fl_set_key_flag(key, mask, flags_key, flags_mask, + TCA_FLOWER_KEY_FLAGS_TUNNEL_DONT_FRAGMENT, + FLOW_DIS_F_TUNNEL_DONT_FRAGMENT); + + fl_set_key_flag(key, mask, flags_key, flags_mask, + TCA_FLOWER_KEY_FLAGS_TUNNEL_OAM, FLOW_DIS_F_TUNNEL_OAM); + + fl_set_key_flag(key, mask, flags_key, flags_mask, + TCA_FLOWER_KEY_FLAGS_TUNNEL_CRIT_OPT, + FLOW_DIS_F_TUNNEL_CRIT_OPT); + return 0; } @@ -3127,6 +3142,21 @@ static int fl_dump_key_flags(struct sk_buff *skb, bool encap, TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST, FLOW_DIS_FIRST_FRAG); + fl_get_key_flag(flags_key, flags_mask, &key, &mask, + TCA_FLOWER_KEY_FLAGS_TUNNEL_CSUM, + FLOW_DIS_F_TUNNEL_CSUM); + + fl_get_key_flag(flags_key, flags_mask, &key, &mask, + TCA_FLOWER_KEY_FLAGS_TUNNEL_DONT_FRAGMENT, + FLOW_DIS_F_TUNNEL_DONT_FRAGMENT); + + fl_get_key_flag(flags_key, flags_mask, &key, &mask, + TCA_FLOWER_KEY_FLAGS_TUNNEL_OAM, FLOW_DIS_F_TUNNEL_OAM); + + fl_get_key_flag(flags_key, flags_mask, &key, &mask, + TCA_FLOWER_KEY_FLAGS_TUNNEL_CRIT_OPT, + FLOW_DIS_F_TUNNEL_CRIT_OPT); + _key = cpu_to_be32(key); _mask = cpu_to_be32(mask); -- cgit v1.2.3-58-ga151 From 11036bd7a0b3b05c5e1f43d107ddb02abf83adb9 Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Sat, 13 Jul 2024 02:19:06 +0000 Subject: net/sched: cls_flower: rework TCA_FLOWER_KEY_ENC_FLAGS usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch changes how TCA_FLOWER_KEY_ENC_FLAGS is used, so that it is used with TCA_FLOWER_KEY_FLAGS_* flags, in the same way as TCA_FLOWER_KEY_FLAGS is currently used. Where TCA_FLOWER_KEY_FLAGS uses {key,mask}->control.flags, then TCA_FLOWER_KEY_ENC_FLAGS now uses {key,mask}->enc_control.flags, therefore {key,mask}->enc_flags is now unused. As the generic fl_set_key_flags/fl_dump_key_flags() is used with encap set to true, then fl_{set,dump}_key_enc_flags() is removed. This breaks unreleased userspace API (net-next since 2024-06-04). Signed-off-by: Asbjørn Sloth Tønnesen Tested-by: Davide Caratti Reviewed-by: Davide Caratti Link: https://patch.msgid.link/20240713021911.1631517-10-ast@fiberby.net Signed-off-by: Jakub Kicinski --- include/uapi/linux/pkt_cls.h | 4 ++-- net/sched/cls_flower.c | 56 ++++++++++---------------------------------- 2 files changed, 15 insertions(+), 45 deletions(-) (limited to 'net') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 3dc4388e944c..d36d9cdf0c00 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -554,8 +554,8 @@ enum { TCA_FLOWER_KEY_SPI, /* be32 */ TCA_FLOWER_KEY_SPI_MASK, /* be32 */ - TCA_FLOWER_KEY_ENC_FLAGS, /* u32 */ - TCA_FLOWER_KEY_ENC_FLAGS_MASK, /* u32 */ + TCA_FLOWER_KEY_ENC_FLAGS, /* be32 */ + TCA_FLOWER_KEY_ENC_FLAGS_MASK, /* be32 */ __TCA_FLOWER_MAX, }; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 2a440f11fe1f..e2239ab01355 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -45,11 +45,11 @@ (TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT | \ TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST) -#define TUNNEL_FLAGS_PRESENT (\ - _BITUL(IP_TUNNEL_CSUM_BIT) | \ - _BITUL(IP_TUNNEL_DONT_FRAGMENT_BIT) | \ - _BITUL(IP_TUNNEL_OAM_BIT) | \ - _BITUL(IP_TUNNEL_CRIT_OPT_BIT)) +#define TCA_FLOWER_KEY_ENC_FLAGS_POLICY_MASK \ + (TCA_FLOWER_KEY_FLAGS_TUNNEL_CSUM | \ + TCA_FLOWER_KEY_FLAGS_TUNNEL_DONT_FRAGMENT | \ + TCA_FLOWER_KEY_FLAGS_TUNNEL_OAM | \ + TCA_FLOWER_KEY_FLAGS_TUNNEL_CRIT_OPT) struct fl_flow_key { struct flow_dissector_key_meta meta; @@ -745,10 +745,10 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_SPI_MASK] = { .type = NLA_U32 }, [TCA_FLOWER_L2_MISS] = NLA_POLICY_MAX(NLA_U8, 1), [TCA_FLOWER_KEY_CFM] = { .type = NLA_NESTED }, - [TCA_FLOWER_KEY_ENC_FLAGS] = NLA_POLICY_MASK(NLA_U32, - TUNNEL_FLAGS_PRESENT), - [TCA_FLOWER_KEY_ENC_FLAGS_MASK] = NLA_POLICY_MASK(NLA_U32, - TUNNEL_FLAGS_PRESENT), + [TCA_FLOWER_KEY_ENC_FLAGS] = NLA_POLICY_MASK(NLA_BE32, + TCA_FLOWER_KEY_ENC_FLAGS_POLICY_MASK), + [TCA_FLOWER_KEY_ENC_FLAGS_MASK] = NLA_POLICY_MASK(NLA_BE32, + TCA_FLOWER_KEY_ENC_FLAGS_POLICY_MASK), }; static const struct nla_policy @@ -1866,21 +1866,6 @@ static int fl_set_key_cfm(struct nlattr **tb, return 0; } -static int fl_set_key_enc_flags(struct nlattr **tb, u32 *flags_key, - u32 *flags_mask, struct netlink_ext_ack *extack) -{ - /* mask is mandatory for flags */ - if (NL_REQ_ATTR_CHECK(extack, NULL, tb, TCA_FLOWER_KEY_ENC_FLAGS_MASK)) { - NL_SET_ERR_MSG(extack, "missing enc_flags mask"); - return -EINVAL; - } - - *flags_key = nla_get_u32(tb[TCA_FLOWER_KEY_ENC_FLAGS]); - *flags_mask = nla_get_u32(tb[TCA_FLOWER_KEY_ENC_FLAGS_MASK]); - - return 0; -} - static int fl_set_key(struct net *net, struct nlattr **tb, struct fl_flow_key *key, struct fl_flow_key *mask, struct netlink_ext_ack *extack) @@ -2123,8 +2108,8 @@ static int fl_set_key(struct net *net, struct nlattr **tb, } if (tb[TCA_FLOWER_KEY_ENC_FLAGS]) - ret = fl_set_key_enc_flags(tb, &key->enc_flags.flags, - &mask->enc_flags.flags, extack); + ret = fl_set_key_flags(tb, true, &key->enc_control.flags, + &mask->enc_control.flags, extack); return ret; } @@ -3381,22 +3366,6 @@ err_cfm_opts: return err; } -static int fl_dump_key_enc_flags(struct sk_buff *skb, - struct flow_dissector_key_enc_flags *key, - struct flow_dissector_key_enc_flags *mask) -{ - if (!memchr_inv(mask, 0, sizeof(*mask))) - return 0; - - if (nla_put_u32(skb, TCA_FLOWER_KEY_ENC_FLAGS, key->flags)) - return -EMSGSIZE; - - if (nla_put_u32(skb, TCA_FLOWER_KEY_ENC_FLAGS_MASK, mask->flags)) - return -EMSGSIZE; - - return 0; -} - static int fl_dump_key_options(struct sk_buff *skb, int enc_opt_type, struct flow_dissector_key_enc_opts *enc_opts) { @@ -3699,7 +3668,8 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net, if (fl_dump_key_cfm(skb, &key->cfm, &mask->cfm)) goto nla_put_failure; - if (fl_dump_key_enc_flags(skb, &key->enc_flags, &mask->enc_flags)) + if (fl_dump_key_flags(skb, true, key->enc_control.flags, + mask->enc_control.flags)) goto nla_put_failure; return 0; -- cgit v1.2.3-58-ga151 From db5271d50ec155abf287a27fa84e2e33a81dbd55 Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Sat, 13 Jul 2024 02:19:08 +0000 Subject: flow_dissector: cleanup FLOW_DISSECTOR_KEY_ENC_FLAGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that TCA_FLOWER_KEY_ENC_FLAGS is unused, as it's former data is stored behind TCA_FLOWER_KEY_ENC_CONTROL, then remove the last bits of FLOW_DISSECTOR_KEY_ENC_FLAGS. FLOW_DISSECTOR_KEY_ENC_FLAGS is unreleased, and have been in net-next since 2024-06-04. Signed-off-by: Asbjørn Sloth Tønnesen Tested-by: Davide Caratti Reviewed-by: Davide Caratti Link: https://patch.msgid.link/20240713021911.1631517-12-ast@fiberby.net Signed-off-by: Jakub Kicinski --- include/net/flow_dissector.h | 9 --------- include/net/ip_tunnels.h | 12 ------------ net/core/flow_dissector.c | 16 +--------------- net/sched/cls_flower.c | 3 --- 4 files changed, 1 insertion(+), 39 deletions(-) (limited to 'net') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 460ea65b9e59..ced79dc8e856 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -342,14 +342,6 @@ struct flow_dissector_key_cfm { #define FLOW_DIS_CFM_MDL_MASK GENMASK(7, 5) #define FLOW_DIS_CFM_MDL_MAX 7 -/** - * struct flow_dissector_key_enc_flags: tunnel metadata control flags - * @flags: tunnel control flags - */ -struct flow_dissector_key_enc_flags { - u32 flags; -}; - enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */ FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ @@ -384,7 +376,6 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_L2TPV3, /* struct flow_dissector_key_l2tpv3 */ FLOW_DISSECTOR_KEY_CFM, /* struct flow_dissector_key_cfm */ FLOW_DISSECTOR_KEY_IPSEC, /* struct flow_dissector_key_ipsec */ - FLOW_DISSECTOR_KEY_ENC_FLAGS, /* struct flow_dissector_key_enc_flags */ FLOW_DISSECTOR_KEY_MAX, }; diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 3877315cf8b8..1db2417b8ff5 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -247,18 +247,6 @@ static inline bool ip_tunnel_is_options_present(const unsigned long *flags) return ip_tunnel_flags_intersect(flags, present); } -static inline void ip_tunnel_set_encflags_present(unsigned long *flags) -{ - IP_TUNNEL_DECLARE_FLAGS(present) = { }; - - __set_bit(IP_TUNNEL_CSUM_BIT, present); - __set_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, present); - __set_bit(IP_TUNNEL_OAM_BIT, present); - __set_bit(IP_TUNNEL_CRIT_OPT_BIT, present); - - ip_tunnel_flags_or(flags, flags, present); -} - static inline bool ip_tunnel_flags_is_be16_compat(const unsigned long *flags) { IP_TUNNEL_DECLARE_FLAGS(supp) = { }; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index a0263a4c5489..1a9ca129fddd 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -385,9 +385,7 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb, !dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_IP) && !dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_ENC_OPTS) && - !dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_ENC_FLAGS)) + FLOW_DISSECTOR_KEY_ENC_OPTS)) return; info = skb_tunnel_info(skb); @@ -489,18 +487,6 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb, IP_TUNNEL_GENEVE_OPT_BIT); enc_opt->dst_opt_type = val < __IP_TUNNEL_FLAG_NUM ? val : 0; } - - if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_FLAGS)) { - struct flow_dissector_key_enc_flags *enc_flags; - IP_TUNNEL_DECLARE_FLAGS(flags) = {}; - - enc_flags = skb_flow_dissector_target(flow_dissector, - FLOW_DISSECTOR_KEY_ENC_FLAGS, - target_container); - ip_tunnel_set_encflags_present(flags); - ip_tunnel_flags_and(flags, flags, info->key.tun_flags); - enc_flags->flags = bitmap_read(flags, IP_TUNNEL_CSUM_BIT, 32); - } } EXPORT_SYMBOL(skb_flow_dissect_tunnel_info); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index e2239ab01355..897d6b683cc6 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -85,7 +85,6 @@ struct fl_flow_key { struct flow_dissector_key_l2tpv3 l2tpv3; struct flow_dissector_key_ipsec ipsec; struct flow_dissector_key_cfm cfm; - struct flow_dissector_key_enc_flags enc_flags; } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ struct fl_flow_mask_range { @@ -2223,8 +2222,6 @@ static void fl_init_dissector(struct flow_dissector *dissector, FLOW_DISSECTOR_KEY_IPSEC, ipsec); FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_CFM, cfm); - FL_KEY_SET_IF_MASKED(mask, keys, cnt, - FLOW_DISSECTOR_KEY_ENC_FLAGS, enc_flags); skb_flow_dissector_init(dissector, keys, cnt); } -- cgit v1.2.3-58-ga151 From 706bf4f44c6d2ae2fdeefeb816b2c35a173ecfa4 Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Sat, 13 Jul 2024 02:19:09 +0000 Subject: flow_dissector: set encapsulation control flags for non-IP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure to set encapsulated control flags also for non-IP packets, such that it's possible to allow matching on e.g. TUNNEL_OAM on a geneve packet carrying a non-IP packet. Suggested-by: Davide Caratti Signed-off-by: Asbjørn Sloth Tønnesen Tested-by: Davide Caratti Reviewed-by: Davide Caratti Link: https://patch.msgid.link/20240713021911.1631517-13-ast@fiberby.net Signed-off-by: Jakub Kicinski --- net/core/flow_dissector.c | 4 ++++ net/sched/cls_flower.c | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 1a9ca129fddd..ada1e39b557e 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -434,6 +434,10 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb, ipv6->dst = key->u.ipv6.dst; } break; + default: + skb_flow_dissect_set_enc_control(0, ctrl_flags, flow_dissector, + target_container); + break; } if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 897d6b683cc6..38b2df387c1e 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -2199,7 +2199,8 @@ static void fl_init_dissector(struct flow_dissector *dissector, FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, enc_ipv6); if (FL_KEY_IS_MASKED(mask, enc_ipv4) || - FL_KEY_IS_MASKED(mask, enc_ipv6)) + FL_KEY_IS_MASKED(mask, enc_ipv6) || + FL_KEY_IS_MASKED(mask, enc_control)) FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_ENC_CONTROL, enc_control); FL_KEY_SET_IF_MASKED(mask, keys, cnt, -- cgit v1.2.3-58-ga151 From 536b97acddd702edacbe5f89c95dcd5a145b490f Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Sat, 13 Jul 2024 02:19:10 +0000 Subject: net/sched: cls_flower: propagate tca[TCA_OPTIONS] to NL_REQ_ATTR_CHECK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NL_REQ_ATTR_CHECK() is used in fl_set_key_flags() to set extended attributes about the origin of an error, this patch propagates tca[TCA_OPTIONS] through. Before this patch: $ sudo ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/tc.yaml \ --do newtfilter --json '{ "chain": 0, "family": 0, "handle": 4, "ifindex": 22, "info": 262152, "kind": "flower", "options": { "flags": 0, "key-enc-flags": 8, "key-eth-type": 2048 }, "parent": 4294967283 }' Netlink error: Invalid argument nl_len = 68 (52) nl_flags = 0x300 nl_type = 2 error: -22 extack: {'msg': 'Missing flags mask', 'miss-type': 111} After this patch: [same cmd] Netlink error: Invalid argument nl_len = 76 (60) nl_flags = 0x300 nl_type = 2 error: -22 extack: {'msg': 'Missing flags mask', 'miss-type': 111, 'miss-nest': 56} Suggested-by: Jakub Kicinski Signed-off-by: Asbjørn Sloth Tønnesen Link: https://patch.msgid.link/20240713021911.1631517-14-ast@fiberby.net Signed-off-by: Jakub Kicinski --- net/sched/cls_flower.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 38b2df387c1e..e280c27cb9f9 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1171,8 +1171,9 @@ static void fl_set_key_flag(u32 flower_key, u32 flower_mask, } } -static int fl_set_key_flags(struct nlattr **tb, bool encap, u32 *flags_key, - u32 *flags_mask, struct netlink_ext_ack *extack) +static int fl_set_key_flags(struct nlattr *tca_opts, struct nlattr **tb, + bool encap, u32 *flags_key, u32 *flags_mask, + struct netlink_ext_ack *extack) { int fl_key, fl_mask; u32 key, mask; @@ -1186,7 +1187,7 @@ static int fl_set_key_flags(struct nlattr **tb, bool encap, u32 *flags_key, } /* mask is mandatory for flags */ - if (NL_REQ_ATTR_CHECK(extack, NULL, tb, fl_mask)) { + if (NL_REQ_ATTR_CHECK(extack, tca_opts, tb, fl_mask)) { NL_SET_ERR_MSG(extack, "Missing flags mask"); return -EINVAL; } @@ -1865,9 +1866,9 @@ static int fl_set_key_cfm(struct nlattr **tb, return 0; } -static int fl_set_key(struct net *net, struct nlattr **tb, - struct fl_flow_key *key, struct fl_flow_key *mask, - struct netlink_ext_ack *extack) +static int fl_set_key(struct net *net, struct nlattr *tca_opts, + struct nlattr **tb, struct fl_flow_key *key, + struct fl_flow_key *mask, struct netlink_ext_ack *extack) { __be16 ethertype; int ret = 0; @@ -2100,14 +2101,16 @@ static int fl_set_key(struct net *net, struct nlattr **tb, return ret; if (tb[TCA_FLOWER_KEY_FLAGS]) { - ret = fl_set_key_flags(tb, false, &key->control.flags, + ret = fl_set_key_flags(tca_opts, tb, false, + &key->control.flags, &mask->control.flags, extack); if (ret) return ret; } if (tb[TCA_FLOWER_KEY_ENC_FLAGS]) - ret = fl_set_key_flags(tb, true, &key->enc_control.flags, + ret = fl_set_key_flags(tca_opts, tb, true, + &key->enc_control.flags, &mask->enc_control.flags, extack); return ret; @@ -2358,6 +2361,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, { struct cls_fl_head *head = fl_head_dereference(tp); bool rtnl_held = !(flags & TCA_ACT_FLAGS_NO_RTNL); + struct nlattr *tca_opts = tca[TCA_OPTIONS]; struct cls_fl_filter *fold = *arg; bool bound_to_filter = false; struct cls_fl_filter *fnew; @@ -2366,7 +2370,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, bool in_ht; int err; - if (!tca[TCA_OPTIONS]) { + if (!tca_opts) { err = -EINVAL; goto errout_fold; } @@ -2384,7 +2388,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, } err = nla_parse_nested_deprecated(tb, TCA_FLOWER_MAX, - tca[TCA_OPTIONS], fl_policy, NULL); + tca_opts, fl_policy, NULL); if (err < 0) goto errout_tb; @@ -2460,7 +2464,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, bound_to_filter = true; } - err = fl_set_key(net, tb, &fnew->key, &mask->key, extack); + err = fl_set_key(net, tca_opts, tb, &fnew->key, &mask->key, extack); if (err) goto unbind_filter; @@ -2800,18 +2804,19 @@ static void *fl_tmplt_create(struct net *net, struct tcf_chain *chain, struct nlattr **tca, struct netlink_ext_ack *extack) { + struct nlattr *tca_opts = tca[TCA_OPTIONS]; struct fl_flow_tmplt *tmplt; struct nlattr **tb; int err; - if (!tca[TCA_OPTIONS]) + if (!tca_opts) return ERR_PTR(-EINVAL); tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL); if (!tb) return ERR_PTR(-ENOBUFS); err = nla_parse_nested_deprecated(tb, TCA_FLOWER_MAX, - tca[TCA_OPTIONS], fl_policy, NULL); + tca_opts, fl_policy, NULL); if (err) goto errout_tb; @@ -2821,7 +2826,8 @@ static void *fl_tmplt_create(struct net *net, struct tcf_chain *chain, goto errout_tb; } tmplt->chain = chain; - err = fl_set_key(net, tb, &tmplt->dummy_key, &tmplt->mask, extack); + err = fl_set_key(net, tca_opts, tb, &tmplt->dummy_key, + &tmplt->mask, extack); if (err) goto errout_tmplt; -- cgit v1.2.3-58-ga151 From 3f45181358e4df50a40ea1bb51b00a1f295f915e Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sun, 14 Jul 2024 09:17:19 -0700 Subject: tcp: Don't access uninit tcp_rsk(req)->ao_keyid in tcp_create_openreq_child(). syzkaller reported KMSAN splat in tcp_create_openreq_child(). [0] The uninit variable is tcp_rsk(req)->ao_keyid. tcp_rsk(req)->ao_keyid is initialised only when tcp_conn_request() finds a valid TCP AO option in SYN. Then, tcp_rsk(req)->used_tcp_ao is set accordingly. Let's not read tcp_rsk(req)->ao_keyid when tcp_rsk(req)->used_tcp_ao is false. [0]: BUG: KMSAN: uninit-value in tcp_create_openreq_child+0x198b/0x1ff0 net/ipv4/tcp_minisocks.c:610 tcp_create_openreq_child+0x198b/0x1ff0 net/ipv4/tcp_minisocks.c:610 tcp_v4_syn_recv_sock+0x18e/0x2170 net/ipv4/tcp_ipv4.c:1754 tcp_check_req+0x1a3e/0x20c0 net/ipv4/tcp_minisocks.c:852 tcp_v4_rcv+0x26a4/0x53a0 net/ipv4/tcp_ipv4.c:2265 ip_protocol_deliver_rcu+0x884/0x1270 net/ipv4/ip_input.c:205 ip_local_deliver_finish+0x30f/0x530 net/ipv4/ip_input.c:233 NF_HOOK include/linux/netfilter.h:314 [inline] ip_local_deliver+0x230/0x4c0 net/ipv4/ip_input.c:254 dst_input include/net/dst.h:460 [inline] ip_sublist_rcv_finish net/ipv4/ip_input.c:580 [inline] ip_list_rcv_finish net/ipv4/ip_input.c:631 [inline] ip_sublist_rcv+0x10f7/0x13e0 net/ipv4/ip_input.c:639 ip_list_rcv+0x952/0x9c0 net/ipv4/ip_input.c:674 __netif_receive_skb_list_ptype net/core/dev.c:5703 [inline] __netif_receive_skb_list_core+0xd92/0x11d0 net/core/dev.c:5751 __netif_receive_skb_list net/core/dev.c:5803 [inline] netif_receive_skb_list_internal+0xd8f/0x1350 net/core/dev.c:5895 gro_normal_list include/net/gro.h:515 [inline] napi_complete_done+0x3f2/0x990 net/core/dev.c:6246 e1000_clean+0x1fa4/0x5e50 drivers/net/ethernet/intel/e1000/e1000_main.c:3808 __napi_poll+0xd9/0x990 net/core/dev.c:6771 napi_poll net/core/dev.c:6840 [inline] net_rx_action+0x90f/0x17e0 net/core/dev.c:6962 handle_softirqs+0x152/0x6b0 kernel/softirq.c:554 __do_softirq kernel/softirq.c:588 [inline] invoke_softirq kernel/softirq.c:428 [inline] __irq_exit_rcu kernel/softirq.c:637 [inline] irq_exit_rcu+0x5d/0x120 kernel/softirq.c:649 common_interrupt+0x83/0x90 arch/x86/kernel/irq.c:278 asm_common_interrupt+0x26/0x40 arch/x86/include/asm/idtentry.h:693 __msan_instrument_asm_store+0xd6/0xe0 arch_atomic_inc arch/x86/include/asm/atomic.h:53 [inline] raw_atomic_inc include/linux/atomic/atomic-arch-fallback.h:992 [inline] atomic_inc include/linux/atomic/atomic-instrumented.h:436 [inline] page_ref_inc include/linux/page_ref.h:153 [inline] folio_ref_inc include/linux/page_ref.h:160 [inline] filemap_map_order0_folio mm/filemap.c:3596 [inline] filemap_map_pages+0x11c7/0x2270 mm/filemap.c:3644 do_fault_around mm/memory.c:4879 [inline] do_read_fault mm/memory.c:4912 [inline] do_fault mm/memory.c:5051 [inline] do_pte_missing mm/memory.c:3897 [inline] handle_pte_fault mm/memory.c:5381 [inline] __handle_mm_fault mm/memory.c:5524 [inline] handle_mm_fault+0x3677/0x6f00 mm/memory.c:5689 do_user_addr_fault+0x1373/0x2b20 arch/x86/mm/fault.c:1338 handle_page_fault arch/x86/mm/fault.c:1481 [inline] exc_page_fault+0x54/0xc0 arch/x86/mm/fault.c:1539 asm_exc_page_fault+0x26/0x30 arch/x86/include/asm/idtentry.h:623 Uninit was stored to memory at: tcp_create_openreq_child+0x1984/0x1ff0 net/ipv4/tcp_minisocks.c:611 tcp_v4_syn_recv_sock+0x18e/0x2170 net/ipv4/tcp_ipv4.c:1754 tcp_check_req+0x1a3e/0x20c0 net/ipv4/tcp_minisocks.c:852 tcp_v4_rcv+0x26a4/0x53a0 net/ipv4/tcp_ipv4.c:2265 ip_protocol_deliver_rcu+0x884/0x1270 net/ipv4/ip_input.c:205 ip_local_deliver_finish+0x30f/0x530 net/ipv4/ip_input.c:233 NF_HOOK include/linux/netfilter.h:314 [inline] ip_local_deliver+0x230/0x4c0 net/ipv4/ip_input.c:254 dst_input include/net/dst.h:460 [inline] ip_sublist_rcv_finish net/ipv4/ip_input.c:580 [inline] ip_list_rcv_finish net/ipv4/ip_input.c:631 [inline] ip_sublist_rcv+0x10f7/0x13e0 net/ipv4/ip_input.c:639 ip_list_rcv+0x952/0x9c0 net/ipv4/ip_input.c:674 __netif_receive_skb_list_ptype net/core/dev.c:5703 [inline] __netif_receive_skb_list_core+0xd92/0x11d0 net/core/dev.c:5751 __netif_receive_skb_list net/core/dev.c:5803 [inline] netif_receive_skb_list_internal+0xd8f/0x1350 net/core/dev.c:5895 gro_normal_list include/net/gro.h:515 [inline] napi_complete_done+0x3f2/0x990 net/core/dev.c:6246 e1000_clean+0x1fa4/0x5e50 drivers/net/ethernet/intel/e1000/e1000_main.c:3808 __napi_poll+0xd9/0x990 net/core/dev.c:6771 napi_poll net/core/dev.c:6840 [inline] net_rx_action+0x90f/0x17e0 net/core/dev.c:6962 handle_softirqs+0x152/0x6b0 kernel/softirq.c:554 __do_softirq kernel/softirq.c:588 [inline] invoke_softirq kernel/softirq.c:428 [inline] __irq_exit_rcu kernel/softirq.c:637 [inline] irq_exit_rcu+0x5d/0x120 kernel/softirq.c:649 common_interrupt+0x83/0x90 arch/x86/kernel/irq.c:278 asm_common_interrupt+0x26/0x40 arch/x86/include/asm/idtentry.h:693 Uninit was created at: __alloc_pages_noprof+0x82d/0xcb0 mm/page_alloc.c:4706 __alloc_pages_node_noprof include/linux/gfp.h:269 [inline] alloc_pages_node_noprof include/linux/gfp.h:296 [inline] alloc_slab_page mm/slub.c:2265 [inline] allocate_slab mm/slub.c:2428 [inline] new_slab+0x2af/0x14e0 mm/slub.c:2481 ___slab_alloc+0xf73/0x3150 mm/slub.c:3667 __slab_alloc mm/slub.c:3757 [inline] __slab_alloc_node mm/slub.c:3810 [inline] slab_alloc_node mm/slub.c:3990 [inline] kmem_cache_alloc_noprof+0x53a/0x9f0 mm/slub.c:4009 reqsk_alloc_noprof net/ipv4/inet_connection_sock.c:920 [inline] inet_reqsk_alloc+0x63/0x700 net/ipv4/inet_connection_sock.c:951 tcp_conn_request+0x339/0x4860 net/ipv4/tcp_input.c:7177 tcp_v4_conn_request+0x13b/0x190 net/ipv4/tcp_ipv4.c:1719 tcp_rcv_state_process+0x2dd/0x4a10 net/ipv4/tcp_input.c:6711 tcp_v4_do_rcv+0xbee/0x10d0 net/ipv4/tcp_ipv4.c:1932 tcp_v4_rcv+0x3fad/0x53a0 net/ipv4/tcp_ipv4.c:2334 ip_protocol_deliver_rcu+0x884/0x1270 net/ipv4/ip_input.c:205 ip_local_deliver_finish+0x30f/0x530 net/ipv4/ip_input.c:233 NF_HOOK include/linux/netfilter.h:314 [inline] ip_local_deliver+0x230/0x4c0 net/ipv4/ip_input.c:254 dst_input include/net/dst.h:460 [inline] ip_sublist_rcv_finish net/ipv4/ip_input.c:580 [inline] ip_list_rcv_finish net/ipv4/ip_input.c:631 [inline] ip_sublist_rcv+0x10f7/0x13e0 net/ipv4/ip_input.c:639 ip_list_rcv+0x952/0x9c0 net/ipv4/ip_input.c:674 __netif_receive_skb_list_ptype net/core/dev.c:5703 [inline] __netif_receive_skb_list_core+0xd92/0x11d0 net/core/dev.c:5751 __netif_receive_skb_list net/core/dev.c:5803 [inline] netif_receive_skb_list_internal+0xd8f/0x1350 net/core/dev.c:5895 gro_normal_list include/net/gro.h:515 [inline] napi_complete_done+0x3f2/0x990 net/core/dev.c:6246 e1000_clean+0x1fa4/0x5e50 drivers/net/ethernet/intel/e1000/e1000_main.c:3808 __napi_poll+0xd9/0x990 net/core/dev.c:6771 napi_poll net/core/dev.c:6840 [inline] net_rx_action+0x90f/0x17e0 net/core/dev.c:6962 handle_softirqs+0x152/0x6b0 kernel/softirq.c:554 __do_softirq kernel/softirq.c:588 [inline] invoke_softirq kernel/softirq.c:428 [inline] __irq_exit_rcu kernel/softirq.c:637 [inline] irq_exit_rcu+0x5d/0x120 kernel/softirq.c:649 common_interrupt+0x83/0x90 arch/x86/kernel/irq.c:278 asm_common_interrupt+0x26/0x40 arch/x86/include/asm/idtentry.h:693 CPU: 0 PID: 239 Comm: modprobe Tainted: G B 6.10.0-rc7-01816-g852e42cc2dd4 #3 1107521f0c7b55c9309062382d0bda9f604dbb6d Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 Fixes: 06b22ef29591 ("net/tcp: Wire TCP-AO to request sockets") Reported-by: syzkaller Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Acked-by: Dmitry Safonov <0x7f454c46@gmail.com> Link: https://patch.msgid.link/20240714161719.6528-1-kuniyu@amazon.com Signed-off-by: Paolo Abeni --- net/ipv4/tcp_minisocks.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index bc67f6b9efae..a19a9dbd3409 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -514,9 +514,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, const struct tcp_sock *oldtp; struct tcp_sock *newtp; u32 seq; -#ifdef CONFIG_TCP_AO - struct tcp_ao_key *ao_key; -#endif if (!newsk) return NULL; @@ -607,10 +604,14 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, #endif #ifdef CONFIG_TCP_AO newtp->ao_info = NULL; - ao_key = treq->af_specific->ao_lookup(sk, req, - tcp_rsk(req)->ao_keyid, -1); - if (ao_key) - newtp->tcp_header_len += tcp_ao_len_aligned(ao_key); + + if (tcp_rsk_used_ao(req)) { + struct tcp_ao_key *ao_key; + + ao_key = treq->af_specific->ao_lookup(sk, req, tcp_rsk(req)->ao_keyid, -1); + if (ao_key) + newtp->tcp_header_len += tcp_ao_len_aligned(ao_key); + } #endif if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len) newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len; -- cgit v1.2.3-58-ga151 From a3bfc095060b51f0198755020daf80d1e14413b1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 13 Jul 2024 21:11:15 -0700 Subject: tcp: Replace strncpy() with strscpy() Replace the deprecated[1] uses of strncpy() in tcp_ca_get_name_by_key() and tcp_get_default_congestion_control(). The callers use the results as standard C strings (via nla_put_string() and proc handlers respectively), so trailing padding is not needed. Since passing the destination buffer arguments decays it to a pointer, the size can't be trivially determined by the compiler. ca->name is the same length in both cases, so strscpy() won't fail (when ca->name is NUL-terminated). Include the length explicitly instead of using the 2-argument strscpy(). Link: https://github.com/KSPP/linux/issues/90 [1] Signed-off-by: Kees Cook Reviewed-by: Simon Horman Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20240714041111.it.918-kees@kernel.org Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_cong.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 48617d99abb0..0306d257fa64 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -202,9 +202,10 @@ char *tcp_ca_get_name_by_key(u32 key, char *buffer) rcu_read_lock(); ca = tcp_ca_find_key(key); - if (ca) - ret = strncpy(buffer, ca->name, - TCP_CA_NAME_MAX); + if (ca) { + strscpy(buffer, ca->name, TCP_CA_NAME_MAX); + ret = buffer; + } rcu_read_unlock(); return ret; @@ -337,7 +338,7 @@ void tcp_get_default_congestion_control(struct net *net, char *name) rcu_read_lock(); ca = rcu_dereference(net->ipv4.tcp_congestion_control); - strncpy(name, ca->name, TCP_CA_NAME_MAX); + strscpy(name, ca->name, TCP_CA_NAME_MAX); rcu_read_unlock(); } -- cgit v1.2.3-58-ga151