diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2022-02-05 00:58:25 -0500 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2022-02-05 00:58:25 -0500 |
commit | 7e6a6b400db8048bd1c06e497e338388413cf5bc (patch) | |
tree | 794f9fcdc7a1bfb9a2812e90fc76809d810203b2 /include/net | |
parent | 6e37ec8825a113bc2dd1b280be10e5ac6eb4f6b1 (diff) | |
parent | 1dd498e5e26ad71e3e9130daf72cfb6a693fee03 (diff) |
Merge tag 'kvmarm-fixes-5.17-2' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/arm64 fixes for 5.17, take #2
- A couple of fixes when handling an exception while a SError has been
delivered
- Workaround for Cortex-A510's single-step[ erratum
Diffstat (limited to 'include/net')
72 files changed, 1451 insertions, 441 deletions
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index 9c6ec78e47a5..24a509f559ee 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -551,6 +551,4 @@ struct p9_fcall { int p9_errstr2errno(char *errstr, int len); int p9_error_init(void); -int p9_trans_fd_init(void); -void p9_trans_fd_exit(void); #endif /* NET_9P_H */ diff --git a/include/net/9p/transport.h b/include/net/9p/transport.h index 15a4e6a9dbf7..ff842f963071 100644 --- a/include/net/9p/transport.h +++ b/include/net/9p/transport.h @@ -54,7 +54,7 @@ struct p9_trans_module { void v9fs_register_trans(struct p9_trans_module *m); void v9fs_unregister_trans(struct p9_trans_module *m); -struct p9_trans_module *v9fs_get_trans_by_name(char *s); +struct p9_trans_module *v9fs_get_trans_by_name(const char *s); struct p9_trans_module *v9fs_get_default_trans(void); void v9fs_put_trans(struct p9_trans_module *m); diff --git a/include/net/act_api.h b/include/net/act_api.h index b5b624c7e488..3049cb69c025 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -7,6 +7,7 @@ */ #include <linux/refcount.h> +#include <net/flow_offload.h> #include <net/sch_generic.h> #include <net/pkt_sched.h> #include <net/net_namespace.h> @@ -44,6 +45,7 @@ struct tc_action { u8 hw_stats; u8 used_hw_stats; bool used_hw_stats_valid; + u32 in_hw_count; }; #define tcf_index common.tcfa_index #define tcf_refcnt common.tcfa_refcnt @@ -88,6 +90,16 @@ static inline void tcf_tm_dump(struct tcf_t *dtm, const struct tcf_t *stm) dtm->expires = jiffies_to_clock_t(stm->expires); } +static inline enum flow_action_hw_stats tc_act_hw_stats(u8 hw_stats) +{ + if (WARN_ON_ONCE(hw_stats > TCA_ACT_HW_STATS_ANY)) + return FLOW_ACTION_HW_STATS_DONT_CARE; + else if (!hw_stats) + return FLOW_ACTION_HW_STATS_DISABLED; + + return hw_stats; +} + #ifdef CONFIG_NET_CLS_ACT #define ACT_P_CREATED 1 @@ -121,6 +133,8 @@ struct tc_action_ops { struct psample_group * (*get_psample_group)(const struct tc_action *a, tc_action_priv_destructor *destructor); + int (*offload_act_setup)(struct tc_action *act, void *entry_data, + u32 *index_inc, bool bind); }; struct tc_action_net { @@ -189,7 +203,7 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, struct tc_action *actions[], int init_res[], size_t *attr_size, - u32 flags, struct netlink_ext_ack *extack); + u32 flags, u32 fl_flags, struct netlink_ext_ack *extack); struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, bool police, bool rtnl_held, struct netlink_ext_ack *extack); @@ -240,6 +254,9 @@ void tcf_action_update_stats(struct tc_action *a, u64 bytes, u64 packets, u64 drops, bool hw); int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int); +int tcf_action_update_hw_stats(struct tc_action *action); +int tcf_action_reoffload_cb(flow_indr_block_bind_cb_t *cb, + void *cb_priv, bool add); int tcf_action_check_ctrlact(int action, struct tcf_proto *tp, struct tcf_chain **handle, struct netlink_ext_ack *newchain); @@ -251,6 +268,14 @@ DECLARE_STATIC_KEY_FALSE(tcf_frag_xmit_count); #endif int tcf_dev_queue_xmit(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb)); + +#else /* !CONFIG_NET_CLS_ACT */ + +static inline int tcf_action_reoffload_cb(flow_indr_block_bind_cb_t *cb, + void *cb_priv, bool add) { + return 0; +} + #endif /* CONFIG_NET_CLS_ACT */ static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes, diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 78ea3e332688..e7ce719838b5 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -6,6 +6,8 @@ #define RTR_SOLICITATION_INTERVAL (4*HZ) #define RTR_SOLICITATION_MAX_INTERVAL (3600*HZ) /* 1 hour */ +#define MIN_VALID_LIFETIME (2*3600) /* 2 hours */ + #define TEMP_VALID_LIFETIME (7*86400) #define TEMP_PREFERRED_LIFETIME (86400) #define REGEN_MAX_RETRY (3) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 7d142e8a0550..a7ef624ed726 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -20,13 +20,12 @@ struct sock *unix_peer_get(struct sock *sk); #define UNIX_HASH_BITS 8 extern unsigned int unix_tot_inflight; -extern spinlock_t unix_table_lock; +extern spinlock_t unix_table_locks[2 * UNIX_HASH_SIZE]; extern struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; struct unix_address { refcount_t refcnt; int len; - unsigned int hash; struct sockaddr_un name[]; }; diff --git a/include/net/arp.h b/include/net/arp.h index 4950191f6b2b..031374ac2f22 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -53,13 +53,7 @@ static inline void __ipv4_confirm_neigh(struct net_device *dev, u32 key) rcu_read_lock_bh(); n = __ipv4_neigh_lookup_noref(dev, key); - if (n) { - unsigned long now = jiffies; - - /* avoid dirtying neighbour */ - if (READ_ONCE(n->confirmed) != now) - WRITE_ONCE(n->confirmed, now); - } + neigh_confirm(n); rcu_read_unlock_bh(); } diff --git a/include/net/ax25.h b/include/net/ax25.h index 03d409de61ad..526e49589197 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -229,7 +229,10 @@ struct ctl_table; typedef struct ax25_dev { struct ax25_dev *next; + struct net_device *dev; + netdevice_tracker dev_tracker; + struct net_device *forward; struct ctl_table_header *sysheader; int values[AX25_MAX_VALUES]; diff --git a/include/net/bareudp.h b/include/net/bareudp.h index dc65a0d71d9b..17610c8d6361 100644 --- a/include/net/bareudp.h +++ b/include/net/bareudp.h @@ -3,21 +3,10 @@ #ifndef __NET_BAREUDP_H #define __NET_BAREUDP_H +#include <linux/netdevice.h> #include <linux/types.h> -#include <linux/skbuff.h> #include <net/rtnetlink.h> -struct bareudp_conf { - __be16 ethertype; - __be16 port; - u16 sport_min; - bool multi_proto_mode; -}; - -struct net_device *bareudp_dev_create(struct net *net, const char *name, - u8 name_assign_type, - struct bareudp_conf *info); - static inline bool netif_is_bareudp(const struct net_device *dev) { return dev->rtnl_link_ops && diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 3271870fd85e..4b3d0b16c185 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -380,6 +380,7 @@ typedef void (*hci_req_complete_skb_t)(struct hci_dev *hdev, u8 status, #define HCI_REQ_SKB BIT(1) struct hci_ctrl { + struct sock *sk; u16 opcode; u8 req_flags; u8 req_event; @@ -389,6 +390,11 @@ struct hci_ctrl { }; }; +struct mgmt_ctrl { + struct hci_dev *hdev; + u16 opcode; +}; + struct bt_skb_cb { u8 pkt_type; u8 force_active; @@ -398,6 +404,7 @@ struct bt_skb_cb { struct l2cap_ctrl l2cap; struct sco_ctrl sco; struct hci_ctrl hci; + struct mgmt_ctrl mgmt; }; }; #define bt_cb(skb) ((struct bt_skb_cb *)((skb)->cb)) @@ -405,6 +412,8 @@ struct bt_skb_cb { #define hci_skb_pkt_type(skb) bt_cb((skb))->pkt_type #define hci_skb_expect(skb) bt_cb((skb))->expect #define hci_skb_opcode(skb) bt_cb((skb))->hci.opcode +#define hci_skb_event(skb) bt_cb((skb))->hci.req_event +#define hci_skb_sk(skb) bt_cb((skb))->hci.sk static inline struct sk_buff *bt_skb_alloc(unsigned int len, gfp_t how) { diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 63065bc01b76..35c073d44ec5 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -246,6 +246,15 @@ enum { * HCI after resume. */ HCI_QUIRK_NO_SUSPEND_NOTIFIER, + + /* + * When this quirk is set, LE tx power is not queried on startup + * and the min/max tx power values default to HCI_TX_POWER_INVALID. + * + * This quirk can be set before hci_register_dev is called or + * during the hdev->setup vendor callback. + */ + HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER, }; /* HCI device flags */ @@ -332,6 +341,7 @@ enum { HCI_FORCE_NO_MITM, HCI_QUALITY_REPORT, HCI_OFFLOAD_CODECS_ENABLED, + HCI_LE_SIMULTANEOUS_ROLES, __HCI_NUM_FLAGS, }; @@ -566,6 +576,7 @@ enum { #define HCI_ERROR_INVALID_LL_PARAMS 0x1e #define HCI_ERROR_UNSPECIFIED 0x1f #define HCI_ERROR_ADVERTISING_TIMEOUT 0x3c +#define HCI_ERROR_CANCELLED_BY_HOST 0x44 /* Flow control modes */ #define HCI_FLOW_CTL_MODE_PACKET_BASED 0x00 @@ -1046,8 +1057,8 @@ struct hci_cp_read_stored_link_key { } __packed; struct hci_rp_read_stored_link_key { __u8 status; - __u8 max_keys; - __u8 num_keys; + __le16 max_keys; + __le16 num_keys; } __packed; #define HCI_OP_DELETE_STORED_LINK_KEY 0x0c12 @@ -1057,7 +1068,7 @@ struct hci_cp_delete_stored_link_key { } __packed; struct hci_rp_delete_stored_link_key { __u8 status; - __u8 num_keys; + __le16 num_keys; } __packed; #define HCI_MAX_NAME_LENGTH 248 @@ -1930,6 +1941,16 @@ struct hci_rp_le_read_transmit_power { __s8 max_le_tx_power; } __packed; +#define HCI_NETWORK_PRIVACY 0x00 +#define HCI_DEVICE_PRIVACY 0x01 + +#define HCI_OP_LE_SET_PRIVACY_MODE 0x204e +struct hci_cp_le_set_privacy_mode { + __u8 bdaddr_type; + bdaddr_t bdaddr; + __u8 mode; +} __packed; + #define HCI_OP_LE_READ_BUFFER_SIZE_V2 0x2060 struct hci_rp_le_read_buffer_size_v2 { __u8 status; @@ -2011,6 +2032,10 @@ struct hci_cp_le_reject_cis { } __packed; /* ---- HCI Events ---- */ +struct hci_ev_status { + __u8 status; +} __packed; + #define HCI_EV_INQUIRY_COMPLETE 0x01 #define HCI_EV_INQUIRY_RESULT 0x02 @@ -2023,6 +2048,11 @@ struct inquiry_info { __le16 clock_offset; } __packed; +struct hci_ev_inquiry_result { + __u8 num; + struct inquiry_info info[]; +}; + #define HCI_EV_CONN_COMPLETE 0x03 struct hci_ev_conn_complete { __u8 status; @@ -2134,7 +2164,7 @@ struct hci_comp_pkts_info { } __packed; struct hci_ev_num_comp_pkts { - __u8 num_hndl; + __u8 num; struct hci_comp_pkts_info handles[]; } __packed; @@ -2184,7 +2214,7 @@ struct hci_ev_pscan_rep_mode { } __packed; #define HCI_EV_INQUIRY_RESULT_WITH_RSSI 0x22 -struct inquiry_info_with_rssi { +struct inquiry_info_rssi { bdaddr_t bdaddr; __u8 pscan_rep_mode; __u8 pscan_period_mode; @@ -2192,7 +2222,7 @@ struct inquiry_info_with_rssi { __le16 clock_offset; __s8 rssi; } __packed; -struct inquiry_info_with_rssi_and_pscan_mode { +struct inquiry_info_rssi_pscan { bdaddr_t bdaddr; __u8 pscan_rep_mode; __u8 pscan_period_mode; @@ -2201,6 +2231,10 @@ struct inquiry_info_with_rssi_and_pscan_mode { __le16 clock_offset; __s8 rssi; } __packed; +struct hci_ev_inquiry_result_rssi { + __u8 num; + __u8 data[]; +} __packed; #define HCI_EV_REMOTE_EXT_FEATURES 0x23 struct hci_ev_remote_ext_features { @@ -2255,6 +2289,11 @@ struct extended_inquiry_info { __u8 data[240]; } __packed; +struct hci_ev_ext_inquiry_result { + __u8 num; + struct extended_inquiry_info info[]; +} __packed; + #define HCI_EV_KEY_REFRESH_COMPLETE 0x30 struct hci_ev_key_refresh_complete { __u8 status; @@ -2422,13 +2461,18 @@ struct hci_ev_le_conn_complete { #define HCI_EV_LE_ADVERTISING_REPORT 0x02 struct hci_ev_le_advertising_info { - __u8 evt_type; + __u8 type; __u8 bdaddr_type; bdaddr_t bdaddr; __u8 length; __u8 data[]; } __packed; +struct hci_ev_le_advertising_report { + __u8 num; + struct hci_ev_le_advertising_info info[]; +} __packed; + #define HCI_EV_LE_CONN_UPDATE_COMPLETE 0x03 struct hci_ev_le_conn_update_complete { __u8 status; @@ -2472,7 +2516,7 @@ struct hci_ev_le_data_len_change { #define HCI_EV_LE_DIRECT_ADV_REPORT 0x0B struct hci_ev_le_direct_adv_info { - __u8 evt_type; + __u8 type; __u8 bdaddr_type; bdaddr_t bdaddr; __u8 direct_addr_type; @@ -2480,6 +2524,11 @@ struct hci_ev_le_direct_adv_info { __s8 rssi; } __packed; +struct hci_ev_le_direct_adv_report { + __u8 num; + struct hci_ev_le_direct_adv_info info[]; +} __packed; + #define HCI_EV_LE_PHY_UPDATE_COMPLETE 0x0c struct hci_ev_le_phy_update_complete { __u8 status; @@ -2489,8 +2538,8 @@ struct hci_ev_le_phy_update_complete { } __packed; #define HCI_EV_LE_EXT_ADV_REPORT 0x0d -struct hci_ev_le_ext_adv_report { - __le16 evt_type; +struct hci_ev_le_ext_adv_info { + __le16 type; __u8 bdaddr_type; bdaddr_t bdaddr; __u8 primary_phy; @@ -2498,11 +2547,16 @@ struct hci_ev_le_ext_adv_report { __u8 sid; __u8 tx_power; __s8 rssi; - __le16 interval; - __u8 direct_addr_type; + __le16 interval; + __u8 direct_addr_type; bdaddr_t direct_addr; - __u8 length; - __u8 data[]; + __u8 length; + __u8 data[]; +} __packed; + +struct hci_ev_le_ext_adv_report { + __u8 num; + struct hci_ev_le_ext_adv_info info[]; } __packed; #define HCI_EV_LE_ENHANCED_CONN_COMPLETE 0x0a diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index dd8840e70e25..586f69d084a2 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -30,6 +30,7 @@ #include <linux/rculist.h> #include <net/bluetooth/hci.h> +#include <net/bluetooth/hci_sync.h> #include <net/bluetooth/hci_sock.h> /* HCI priority */ @@ -87,6 +88,7 @@ struct discovery_state { u8 (*uuids)[16]; unsigned long scan_start; unsigned long scan_duration; + unsigned long name_resolve_timeout; }; #define SUSPEND_NOTIFIER_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */ @@ -150,22 +152,22 @@ struct bdaddr_list_with_irk { u8 local_irk[16]; }; -struct bdaddr_list_with_flags { - struct list_head list; - bdaddr_t bdaddr; - u8 bdaddr_type; - u32 current_flags; -}; - enum hci_conn_flags { HCI_CONN_FLAG_REMOTE_WAKEUP, - HCI_CONN_FLAG_MAX -}; + HCI_CONN_FLAG_DEVICE_PRIVACY, -#define hci_conn_test_flag(nr, flags) ((flags) & (1U << nr)) + __HCI_CONN_NUM_FLAGS, +}; /* Make sure number of flags doesn't exceed sizeof(current_flags) */ -static_assert(HCI_CONN_FLAG_MAX < 32); +static_assert(__HCI_CONN_NUM_FLAGS < 32); + +struct bdaddr_list_with_flags { + struct list_head list; + bdaddr_t bdaddr; + u8 bdaddr_type; + DECLARE_BITMAP(flags, __HCI_CONN_NUM_FLAGS); +}; struct bt_uuid { struct list_head list; @@ -351,8 +353,8 @@ struct hci_dev { __u16 lmp_subver; __u16 voice_setting; __u8 num_iac; - __u8 stored_max_keys; - __u8 stored_num_keys; + __u16 stored_max_keys; + __u16 stored_num_keys; __u8 io_capability; __s8 inq_tx_power; __u8 err_data_reporting; @@ -475,6 +477,10 @@ struct hci_dev { struct work_struct power_on; struct delayed_work power_off; struct work_struct error_reset; + struct work_struct cmd_sync_work; + struct list_head cmd_sync_work_list; + struct mutex cmd_sync_work_lock; + struct work_struct cmd_sync_cancel_work; __u16 discov_timeout; struct delayed_work discov_off; @@ -489,10 +495,7 @@ struct hci_dev { struct work_struct tx_work; struct work_struct discov_update; - struct work_struct bg_scan_update; struct work_struct scan_update; - struct work_struct connectable_update; - struct work_struct discoverable_update; struct delayed_work le_scan_disable; struct delayed_work le_scan_restart; @@ -519,7 +522,6 @@ struct hci_dev { bool advertising_paused; struct notifier_block suspend_notifier; - struct work_struct suspend_prepare; enum suspended_state suspend_state_next; enum suspended_state suspend_state; bool scanning_paused; @@ -528,9 +530,6 @@ struct hci_dev { bdaddr_t wake_addr; u8 wake_addr_type; - wait_queue_head_t suspend_wait_q; - DECLARE_BITMAP(suspend_tasks, __SUSPEND_NUM_TASKS); - struct hci_conn_hash conn_hash; struct list_head mgmt_pending; @@ -562,6 +561,7 @@ struct hci_dev { struct rfkill *rfkill; DECLARE_BITMAP(dev_flags, __HCI_NUM_FLAGS); + DECLARE_BITMAP(conn_flags, __HCI_CONN_NUM_FLAGS); __s8 adv_tx_power; __u8 adv_data[HCI_MAX_EXT_AD_LENGTH]; @@ -603,6 +603,7 @@ struct hci_dev { #if IS_ENABLED(CONFIG_BT_AOSPEXT) bool aosp_capable; + bool aosp_quality_report; #endif int (*open)(struct hci_dev *hdev); @@ -756,7 +757,8 @@ struct hci_conn_params { struct hci_conn *conn; bool explicit_connect; - u32 current_flags; + DECLARE_BITMAP(flags, __HCI_CONN_NUM_FLAGS); + u8 privacy_mode; }; extern struct list_head hci_dev_list; @@ -781,6 +783,12 @@ extern struct mutex hci_cb_list_lock; hci_dev_clear_flag(hdev, HCI_QUALITY_REPORT); \ } while (0) +#define hci_dev_le_state_simultaneous(hdev) \ + (test_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks) && \ + (hdev->le_states[4] & 0x08) && /* Central */ \ + (hdev->le_states[4] & 0x40) && /* Peripheral */ \ + (hdev->le_states[3] & 0x10)) /* Simultaneous */ + /* ----- HCI interface to upper protocols ----- */ int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr); int l2cap_disconn_ind(struct hci_conn *hcon); @@ -1119,8 +1127,7 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, enum conn_reasons conn_reason); struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, u8 dst_type, bool dst_resolved, u8 sec_level, - u16 conn_timeout, u8 role, - bdaddr_t *direct_rpa); + u16 conn_timeout, u8 role); struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, u8 sec_level, u8 auth_type, enum conn_reasons conn_reason); @@ -1461,8 +1468,14 @@ void hci_conn_del_sysfs(struct hci_conn *conn); #define scan_coded(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_CODED) || \ ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_CODED)) +#define ll_privacy_capable(dev) ((dev)->le_features[0] & HCI_LE_LL_PRIVACY) + /* Use LL Privacy based address resolution if supported */ -#define use_ll_privacy(dev) ((dev)->le_features[0] & HCI_LE_LL_PRIVACY) +#define use_ll_privacy(dev) (ll_privacy_capable(dev) && \ + hci_dev_test_flag(dev, HCI_ENABLE_LL_PRIVACY)) + +#define privacy_mode_capable(dev) (use_ll_privacy(dev) && \ + (hdev->commands[39] & 0x04)) /* Use enhanced synchronous connection if command is supported */ #define enhanced_sco_capable(dev) ((dev)->commands[29] & 0x08) @@ -1690,10 +1703,6 @@ static inline int hci_check_conn_params(u16 min, u16 max, u16 latency, int hci_register_cb(struct hci_cb *hcb); int hci_unregister_cb(struct hci_cb *hcb); -struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, - const void *param, u32 timeout); -struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen, - const void *param, u8 event, u32 timeout); int __hci_cmd_send(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param); @@ -1704,9 +1713,6 @@ void hci_send_sco(struct hci_conn *conn, struct sk_buff *skb); void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode); -struct sk_buff *hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, - const void *param, u32 timeout); - u32 hci_conn_get_phy(struct hci_conn *conn); /* ----- HCI Sockets ----- */ @@ -1765,6 +1771,8 @@ void hci_mgmt_chan_unregister(struct hci_mgmt_chan *c); #define DISCOV_LE_FAST_ADV_INT_MIN 0x00A0 /* 100 msec */ #define DISCOV_LE_FAST_ADV_INT_MAX 0x00F0 /* 150 msec */ +#define NAME_RESOLVE_DURATION msecs_to_jiffies(10240) /* 10.24 sec */ + void mgmt_fill_version_info(void *ver); int mgmt_new_settings(struct hci_dev *hdev); void mgmt_index_added(struct hci_dev *hdev); @@ -1806,7 +1814,6 @@ int mgmt_user_passkey_notify(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 entered); void mgmt_auth_failed(struct hci_conn *conn, u8 status); void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status); -void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status); void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class, u8 status); void mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status); @@ -1831,8 +1838,6 @@ void mgmt_new_conn_param(struct hci_dev *hdev, bdaddr_t *bdaddr, u16 max_interval, u16 latency, u16 timeout); void mgmt_smp_complete(struct hci_conn *conn, bool complete); bool mgmt_get_connectable(struct hci_dev *hdev); -void mgmt_set_connectable_complete(struct hci_dev *hdev, u8 status); -void mgmt_set_discoverable_complete(struct hci_dev *hdev, u8 status); u8 mgmt_get_adv_discov_flags(struct hci_dev *hdev); void mgmt_advertising_added(struct sock *sk, struct hci_dev *hdev, u8 instance); diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h new file mode 100644 index 000000000000..2492e3b46a8f --- /dev/null +++ b/include/net/bluetooth/hci_sync.h @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * BlueZ - Bluetooth protocol stack for Linux + * + * Copyright (C) 2021 Intel Corporation + */ + +typedef int (*hci_cmd_sync_work_func_t)(struct hci_dev *hdev, void *data); +typedef void (*hci_cmd_sync_work_destroy_t)(struct hci_dev *hdev, void *data, + int err); + +struct hci_cmd_sync_work_entry { + struct list_head list; + hci_cmd_sync_work_func_t func; + void *data; + hci_cmd_sync_work_destroy_t destroy; +}; + +/* Function with sync suffix shall not be called with hdev->lock held as they + * wait the command to complete and in the meantime an event could be received + * which could attempt to acquire hdev->lock causing a deadlock. + */ +struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, u32 timeout); +struct sk_buff *hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, u32 timeout); +struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, u8 event, u32 timeout); +struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, u8 event, u32 timeout, + struct sock *sk); +int __hci_cmd_sync_status(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, u32 timeout); +int __hci_cmd_sync_status_sk(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, u8 event, u32 timeout, + struct sock *sk); + +void hci_cmd_sync_init(struct hci_dev *hdev); +void hci_cmd_sync_clear(struct hci_dev *hdev); +void hci_cmd_sync_cancel(struct hci_dev *hdev, int err); +void __hci_cmd_sync_cancel(struct hci_dev *hdev, int err); + +int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy); + +int hci_update_eir_sync(struct hci_dev *hdev); +int hci_update_class_sync(struct hci_dev *hdev); + +int hci_update_eir_sync(struct hci_dev *hdev); +int hci_update_class_sync(struct hci_dev *hdev); +int hci_update_name_sync(struct hci_dev *hdev); +int hci_write_ssp_mode_sync(struct hci_dev *hdev, u8 mode); + +int hci_update_random_address_sync(struct hci_dev *hdev, bool require_privacy, + bool rpa, u8 *own_addr_type); + +int hci_update_scan_rsp_data_sync(struct hci_dev *hdev, u8 instance); +int hci_update_adv_data_sync(struct hci_dev *hdev, u8 instance); +int hci_schedule_adv_instance_sync(struct hci_dev *hdev, u8 instance, + bool force); + +int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance); +int hci_start_ext_adv_sync(struct hci_dev *hdev, u8 instance); +int hci_enable_ext_advertising_sync(struct hci_dev *hdev, u8 instance); +int hci_enable_advertising_sync(struct hci_dev *hdev); +int hci_enable_advertising(struct hci_dev *hdev); + +int hci_remove_advertising_sync(struct hci_dev *hdev, struct sock *sk, + u8 instance, bool force); +int hci_disable_advertising_sync(struct hci_dev *hdev); + +int hci_update_passive_scan_sync(struct hci_dev *hdev); +int hci_update_passive_scan(struct hci_dev *hdev); +int hci_read_rssi_sync(struct hci_dev *hdev, __le16 handle); +int hci_read_tx_power_sync(struct hci_dev *hdev, __le16 handle, u8 type); +int hci_write_sc_support_sync(struct hci_dev *hdev, u8 val); +int hci_read_clock_sync(struct hci_dev *hdev, struct hci_cp_read_clock *cp); + +int hci_write_fast_connectable_sync(struct hci_dev *hdev, bool enable); +int hci_update_scan_sync(struct hci_dev *hdev); + +int hci_write_le_host_supported_sync(struct hci_dev *hdev, u8 le, u8 simul); +int hci_remove_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance, + struct sock *sk); +struct sk_buff *hci_read_local_oob_data_sync(struct hci_dev *hdev, bool ext, + struct sock *sk); + +int hci_reset_sync(struct hci_dev *hdev); +int hci_dev_open_sync(struct hci_dev *hdev); +int hci_dev_close_sync(struct hci_dev *hdev); + +int hci_powered_update_sync(struct hci_dev *hdev); +int hci_set_powered_sync(struct hci_dev *hdev, u8 val); + +int hci_update_discoverable_sync(struct hci_dev *hdev); +int hci_update_discoverable(struct hci_dev *hdev); + +int hci_update_connectable_sync(struct hci_dev *hdev); + +int hci_start_discovery_sync(struct hci_dev *hdev); +int hci_stop_discovery_sync(struct hci_dev *hdev); + +int hci_suspend_sync(struct hci_dev *hdev); +int hci_resume_sync(struct hci_dev *hdev); + +struct hci_conn; + +int hci_le_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn); diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index 23a0524061b7..107b25deae68 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -936,10 +936,11 @@ struct mgmt_ev_auth_failed { __u8 status; } __packed; -#define MGMT_DEV_FOUND_CONFIRM_NAME 0x01 -#define MGMT_DEV_FOUND_LEGACY_PAIRING 0x02 -#define MGMT_DEV_FOUND_NOT_CONNECTABLE 0x04 -#define MGMT_DEV_FOUND_INITIATED_CONN 0x08 +#define MGMT_DEV_FOUND_CONFIRM_NAME 0x01 +#define MGMT_DEV_FOUND_LEGACY_PAIRING 0x02 +#define MGMT_DEV_FOUND_NOT_CONNECTABLE 0x04 +#define MGMT_DEV_FOUND_INITIATED_CONN 0x08 +#define MGMT_DEV_FOUND_NAME_REQUEST_FAILED 0x10 #define MGMT_EV_DEVICE_FOUND 0x0012 struct mgmt_ev_device_found { diff --git a/include/net/bond_options.h b/include/net/bond_options.h index e64833a674eb..dd75c071f67e 100644 --- a/include/net/bond_options.h +++ b/include/net/bond_options.h @@ -65,6 +65,7 @@ enum { BOND_OPT_NUM_PEER_NOTIF_ALIAS, BOND_OPT_PEER_NOTIF_DELAY, BOND_OPT_LACP_ACTIVE, + BOND_OPT_MISSED_MAX, BOND_OPT_LAST }; diff --git a/include/net/bonding.h b/include/net/bonding.h index 15e083e18f75..83cfd2d70247 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -121,6 +121,7 @@ struct bond_params { int xmit_policy; int miimon; u8 num_peer_notif; + u8 missed_max; int arp_interval; int arp_validate; int arp_all_targets; @@ -345,7 +346,7 @@ static inline bool bond_uses_primary(struct bonding *bond) static inline struct net_device *bond_option_active_slave_get_rcu(struct bonding *bond) { - struct slave *slave = rcu_dereference(bond->curr_active_slave); + struct slave *slave = rcu_dereference_rtnl(bond->curr_active_slave); return bond_uses_primary(bond) && slave ? slave->dev : NULL; } diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 423f97b982ff..d19e48f9fdc6 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1188,17 +1188,6 @@ struct cfg80211_unsol_bcast_probe_resp { }; /** - * enum cfg80211_ap_settings_flags - AP settings flags - * - * Used by cfg80211_ap_settings - * - * @AP_SETTINGS_EXTERNAL_AUTH_SUPPORT: AP supports external authentication - */ -enum cfg80211_ap_settings_flags { - AP_SETTINGS_EXTERNAL_AUTH_SUPPORT = BIT(0), -}; - -/** * struct cfg80211_ap_settings - AP configuration * * Used to configure an AP interface. @@ -4072,6 +4061,15 @@ struct mgmt_frame_regs { * @set_fils_aad: Set FILS AAD data to the AP driver so that the driver can use * those to decrypt (Re)Association Request and encrypt (Re)Association * Response frame. + * + * @set_radar_background: Configure dedicated offchannel chain available for + * radar/CAC detection on some hw. This chain can't be used to transmit + * or receive frames and it is bounded to a running wdev. + * Background radar/CAC detection allows to avoid the CAC downtime + * switching to a different channel during CAC detection on the selected + * radar channel. + * The caller is expected to set chandef pointer to NULL in order to + * disable background CAC/radar detection. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -4404,6 +4402,8 @@ struct cfg80211_ops { struct cfg80211_color_change_settings *params); int (*set_fils_aad)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_fils_aad *fils_aad); + int (*set_radar_background)(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef); }; /* @@ -6375,17 +6375,6 @@ static inline void cfg80211_gen_new_bssid(const u8 *bssid, u8 max_bssid, } /** - * cfg80211_get_ies_channel_number - returns the channel number from ies - * @ie: IEs - * @ielen: length of IEs - * @band: enum nl80211_band of the channel - * - * Returns the channel number, or -1 if none could be determined. - */ -int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen, - enum nl80211_band band); - -/** * cfg80211_is_element_inherited - returns if element ID should be inherited * @element: element to check * @non_inherit_element: non inheritance element @@ -6421,6 +6410,19 @@ enum cfg80211_bss_frame_type { }; /** + * cfg80211_get_ies_channel_number - returns the channel number from ies + * @ie: IEs + * @ielen: length of IEs + * @band: enum nl80211_band of the channel + * @ftype: frame type + * + * Returns the channel number, or -1 if none could be determined. + */ +int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen, + enum nl80211_band band, + enum cfg80211_bss_frame_type ftype); + +/** * cfg80211_inform_bss_data - inform cfg80211 of a new BSS * * @wiphy: the wiphy reporting the BSS @@ -7594,15 +7596,33 @@ void cfg80211_cqm_txe_notify(struct net_device *dev, const u8 *peer, void cfg80211_cqm_beacon_loss_notify(struct net_device *dev, gfp_t gfp); /** - * cfg80211_radar_event - radar detection event + * __cfg80211_radar_event - radar detection event * @wiphy: the wiphy * @chandef: chandef for the current channel + * @offchan: the radar has been detected on the offchannel chain * @gfp: context flags * * This function is called when a radar is detected on the current chanenl. */ -void cfg80211_radar_event(struct wiphy *wiphy, - struct cfg80211_chan_def *chandef, gfp_t gfp); +void __cfg80211_radar_event(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + bool offchan, gfp_t gfp); + +static inline void +cfg80211_radar_event(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + gfp_t gfp) +{ + __cfg80211_radar_event(wiphy, chandef, false, gfp); +} + +static inline void +cfg80211_background_radar_event(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + gfp_t gfp) +{ + __cfg80211_radar_event(wiphy, chandef, true, gfp); +} /** * cfg80211_sta_opmode_change_notify - STA's ht/vht operation mode change event @@ -7633,6 +7653,14 @@ void cfg80211_cac_event(struct net_device *netdev, const struct cfg80211_chan_def *chandef, enum nl80211_radar_event event, gfp_t gfp); +/** + * cfg80211_background_cac_abort - Channel Availability Check offchan abort event + * @wiphy: the wiphy + * + * This function is called by the driver when a Channel Availability Check + * (CAC) is aborted by a offchannel dedicated chain. + */ +void cfg80211_background_cac_abort(struct wiphy *wiphy); /** * cfg80211_gtk_rekey_notify - notify userspace about driver rekeying @@ -8250,6 +8278,18 @@ bool cfg80211_iftype_allowed(struct wiphy *wiphy, enum nl80211_iftype iftype, bool is_4addr, u8 check_swif); +/** + * cfg80211_assoc_comeback - notification of association that was + * temporarly rejected with a comeback + * @netdev: network device + * @bss: the bss entry with which association is in progress. + * @timeout: timeout interval value TUs. + * + * this function may sleep. the caller must hold the corresponding wdev's mutex. + */ +void cfg80211_assoc_comeback(struct net_device *netdev, + struct cfg80211_bss *bss, u32 timeout); + /* Logging, debugging and troubleshooting/diagnostic helpers. */ /* wiphy_printk helpers, similar to dev_printk */ diff --git a/include/net/checksum.h b/include/net/checksum.h index 5b96d5bd6e54..5218041e5c8f 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -180,4 +180,8 @@ static inline void remcsum_unadjust(__sum16 *psum, __wsum delta) *psum = csum_fold(csum_sub(delta, (__force __wsum)*psum)); } +static inline __wsum wsum_negate(__wsum val) +{ + return (__force __wsum)-((__force u32)val); +} #endif diff --git a/include/net/codel.h b/include/net/codel.h index a6c9e34e62b8..5fed2f16cb8d 100644 --- a/include/net/codel.h +++ b/include/net/codel.h @@ -44,8 +44,6 @@ #include <linux/types.h> #include <linux/ktime.h> #include <linux/skbuff.h> -#include <net/pkt_sched.h> -#include <net/inet_ecn.h> /* Controlling Queue Delay (CoDel) algorithm * ========================================= diff --git a/include/net/codel_impl.h b/include/net/codel_impl.h index 137d40d8cbeb..78a27ac73070 100644 --- a/include/net/codel_impl.h +++ b/include/net/codel_impl.h @@ -49,6 +49,8 @@ * Implemented on linux by Dave Taht and Eric Dumazet */ +#include <net/inet_ecn.h> + static void codel_params_init(struct codel_params *params) { params->interval = MS2TIME(100); diff --git a/include/net/codel_qdisc.h b/include/net/codel_qdisc.h index 098630f83a55..58b6d0ebea10 100644 --- a/include/net/codel_qdisc.h +++ b/include/net/codel_qdisc.h @@ -49,6 +49,8 @@ * Implemented on linux by Dave Taht and Eric Dumazet */ +#include <net/pkt_sched.h> + /* Qdiscs using codel plugin must use codel_skb_cb in their own cb[] */ struct codel_skb_cb { codel_time_t enqueue_time; diff --git a/include/net/devlink.h b/include/net/devlink.h index aab3d007c577..8d5349d2fb68 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -361,33 +361,6 @@ devlink_resource_size_params_init(struct devlink_resource_size_params *size_para typedef u64 devlink_resource_occ_get_t(void *priv); -/** - * struct devlink_resource - devlink resource - * @name: name of the resource - * @id: id, per devlink instance - * @size: size of the resource - * @size_new: updated size of the resource, reload is needed - * @size_valid: valid in case the total size of the resource is valid - * including its children - * @parent: parent resource - * @size_params: size parameters - * @list: parent list - * @resource_list: list of child resources - */ -struct devlink_resource { - const char *name; - u64 id; - u64 size; - u64 size_new; - bool size_valid; - struct devlink_resource *parent; - struct devlink_resource_size_params size_params; - struct list_head list; - struct list_head resource_list; - devlink_resource_occ_get_t *occ_get; - void *occ_get_priv; -}; - #define DEVLINK_RESOURCE_ID_PARENT_TOP 0 #define DEVLINK_RESOURCE_GENERIC_NAME_PORTS "physical_ports" @@ -485,6 +458,9 @@ enum devlink_param_generic_id { DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH, DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA, DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET, + DEVLINK_PARAM_GENERIC_ID_ENABLE_IWARP, + DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE, + DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE, /* add new param generic ids above here*/ __DEVLINK_PARAM_GENERIC_ID_MAX, @@ -534,6 +510,15 @@ enum devlink_param_generic_id { #define DEVLINK_PARAM_GENERIC_ENABLE_VNET_NAME "enable_vnet" #define DEVLINK_PARAM_GENERIC_ENABLE_VNET_TYPE DEVLINK_PARAM_TYPE_BOOL +#define DEVLINK_PARAM_GENERIC_ENABLE_IWARP_NAME "enable_iwarp" +#define DEVLINK_PARAM_GENERIC_ENABLE_IWARP_TYPE DEVLINK_PARAM_TYPE_BOOL + +#define DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_NAME "io_eq_size" +#define DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_TYPE DEVLINK_PARAM_TYPE_U32 + +#define DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_NAME "event_eq_size" +#define DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_TYPE DEVLINK_PARAM_TYPE_U32 + #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \ { \ .id = DEVLINK_PARAM_GENERIC_ID_##_id, \ @@ -687,13 +672,17 @@ struct devlink_health_reporter_ops { * @trap_name: Trap name. * @trap_group_name: Trap group name. * @input_dev: Input netdevice. + * @dev_tracker: refcount tracker for @input_dev. * @fa_cookie: Flow action user cookie. * @trap_type: Trap type. */ struct devlink_trap_metadata { const char *trap_name; const char *trap_group_name; + struct net_device *input_dev; + netdevice_tracker dev_tracker; + const struct flow_action_cookie *fa_cookie; enum devlink_trap_type trap_type; }; @@ -1567,8 +1556,7 @@ int devlink_resource_register(struct devlink *devlink, u64 resource_id, u64 parent_resource_id, const struct devlink_resource_size_params *size_params); -void devlink_resources_unregister(struct devlink *devlink, - struct devlink_resource *resource); +void devlink_resources_unregister(struct devlink *devlink); int devlink_resource_size_get(struct devlink *devlink, u64 resource_id, u64 *p_resource_size); diff --git a/include/net/dsa.h b/include/net/dsa.h index eff5c44ba377..57b3e4e7413b 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -88,6 +88,8 @@ struct dsa_device_ops { struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev); void (*flow_dissect)(const struct sk_buff *skb, __be16 *proto, int *offset); + int (*connect)(struct dsa_switch *ds); + void (*disconnect)(struct dsa_switch *ds); unsigned int needed_headroom; unsigned int needed_tailroom; const char *name; @@ -117,6 +119,9 @@ struct dsa_netdevice_ops { struct dsa_switch_tree { struct list_head list; + /* List of switch ports */ + struct list_head ports; + /* Notifier chain for switch-wide events */ struct raw_notifier_head nh; @@ -126,8 +131,10 @@ struct dsa_switch_tree { /* Number of switches attached to this tree */ struct kref refcount; - /* Has this tree been applied to the hardware? */ - bool setup; + /* Maps offloaded LAG netdevs to a zero-based linear ID for + * drivers that need it. + */ + struct net_device **lags; /* Tagging protocol operations */ const struct dsa_device_ops *tag_ops; @@ -137,22 +144,19 @@ struct dsa_switch_tree { */ enum dsa_tag_protocol default_proto; + /* Has this tree been applied to the hardware? */ + bool setup; + /* * Configuration data for the platform device that owns * this dsa switch tree instance. */ struct dsa_platform_data *pd; - /* List of switch ports */ - struct list_head ports; - /* List of DSA links composing the routing table */ struct list_head rtable; - /* Maps offloaded LAG netdevs to a zero-based linear ID for - * drivers that need it. - */ - struct net_device **lags; + /* Length of "lags" array */ unsigned int lags_len; /* Track the largest switch index within a tree */ @@ -219,6 +223,12 @@ struct dsa_mall_tc_entry { }; }; +struct dsa_bridge { + struct net_device *dev; + unsigned int num; + bool tx_fwd_offload; + refcount_t refcount; +}; struct dsa_port { /* A CPU port is physically connected to a master device. @@ -238,6 +248,10 @@ struct dsa_port { struct dsa_switch_tree *dst; struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev); + struct dsa_switch *ds; + + unsigned int index; + enum { DSA_PORT_TYPE_UNUSED = 0, DSA_PORT_TYPE_CPU, @@ -245,36 +259,40 @@ struct dsa_port { DSA_PORT_TYPE_USER, } type; - struct dsa_switch *ds; - unsigned int index; const char *name; struct dsa_port *cpu_dp; u8 mac[ETH_ALEN]; + + u8 stp_state; + + /* Warning: the following bit fields are not atomic, and updating them + * can only be done from code paths where concurrency is not possible + * (probe time or under rtnl_lock). + */ + u8 vlan_filtering:1; + + /* Managed by DSA on user ports and by drivers on CPU and DSA ports */ + u8 learning:1; + + u8 lag_tx_enabled:1; + + u8 devlink_port_setup:1; + + u8 setup:1; + struct device_node *dn; unsigned int ageing_time; - bool vlan_filtering; - /* Managed by DSA on user ports and by drivers on CPU and DSA ports */ - bool learning; - u8 stp_state; - struct net_device *bridge_dev; - int bridge_num; + + struct dsa_bridge *bridge; struct devlink_port devlink_port; - bool devlink_port_setup; struct phylink *pl; struct phylink_config pl_config; struct net_device *lag_dev; - bool lag_tx_enabled; struct net_device *hsr_dev; struct list_head list; /* - * Give the switch driver somewhere to hang its per-port private data - * structures (accessible from the tagger). - */ - void *priv; - - /* * Original copy of the master netdev ethtool_ops */ const struct ethtool_ops *orig_ethtool_ops; @@ -290,8 +308,6 @@ struct dsa_port { struct mutex addr_lists_lock; struct list_head fdbs; struct list_head mdbs; - - bool setup; }; /* TODO: ideally DSA ports would have a single dp->link_dp member, @@ -313,8 +329,6 @@ struct dsa_mac_addr { }; struct dsa_switch { - bool setup; - struct device *dev; /* @@ -323,6 +337,57 @@ struct dsa_switch { struct dsa_switch_tree *dst; unsigned int index; + /* Warning: the following bit fields are not atomic, and updating them + * can only be done from code paths where concurrency is not possible + * (probe time or under rtnl_lock). + */ + u32 setup:1; + + /* Disallow bridge core from requesting different VLAN awareness + * settings on ports if not hardware-supported + */ + u32 vlan_filtering_is_global:1; + + /* Keep VLAN filtering enabled on ports not offloading any upper */ + u32 needs_standalone_vlan_filtering:1; + + /* Pass .port_vlan_add and .port_vlan_del to drivers even for bridges + * that have vlan_filtering=0. All drivers should ideally set this (and + * then the option would get removed), but it is unknown whether this + * would break things or not. + */ + u32 configure_vlan_while_not_filtering:1; + + /* If the switch driver always programs the CPU port as egress tagged + * despite the VLAN configuration indicating otherwise, then setting + * @untag_bridge_pvid will force the DSA receive path to pop the + * bridge's default_pvid VLAN tagged frames to offer a consistent + * behavior between a vlan_filtering=0 and vlan_filtering=1 bridge + * device. + */ + u32 untag_bridge_pvid:1; + + /* Let DSA manage the FDB entries towards the + * CPU, based on the software bridge database. + */ + u32 assisted_learning_on_cpu_port:1; + + /* In case vlan_filtering_is_global is set, the VLAN awareness state + * should be retrieved from here and not from the per-port settings. + */ + u32 vlan_filtering:1; + + /* MAC PCS does not provide link state change interrupt, and requires + * polling. Flag passed on to PHYLINK. + */ + u32 pcs_poll:1; + + /* For switches that only have the MRU configurable. To ensure the + * configured MTU is not exceeded, normalization of MRU on all bridged + * interfaces is needed. + */ + u32 mtu_enforcement_ingress:1; + /* Listener for switch fabric events */ struct notifier_block nb; @@ -332,6 +397,8 @@ struct dsa_switch { */ void *priv; + void *tagger_data; + /* * Configuration data for this switch. */ @@ -361,50 +428,6 @@ struct dsa_switch { /* Number of switch port queues */ unsigned int num_tx_queues; - /* Disallow bridge core from requesting different VLAN awareness - * settings on ports if not hardware-supported - */ - bool vlan_filtering_is_global; - - /* Keep VLAN filtering enabled on ports not offloading any upper. */ - bool needs_standalone_vlan_filtering; - - /* Pass .port_vlan_add and .port_vlan_del to drivers even for bridges - * that have vlan_filtering=0. All drivers should ideally set this (and - * then the option would get removed), but it is unknown whether this - * would break things or not. - */ - bool configure_vlan_while_not_filtering; - - /* If the switch driver always programs the CPU port as egress tagged - * despite the VLAN configuration indicating otherwise, then setting - * @untag_bridge_pvid will force the DSA receive path to pop the bridge's - * default_pvid VLAN tagged frames to offer a consistent behavior - * between a vlan_filtering=0 and vlan_filtering=1 bridge device. - */ - bool untag_bridge_pvid; - - /* Let DSA manage the FDB entries towards the CPU, based on the - * software bridge database. - */ - bool assisted_learning_on_cpu_port; - - /* In case vlan_filtering_is_global is set, the VLAN awareness state - * should be retrieved from here and not from the per-port settings. - */ - bool vlan_filtering; - - /* MAC PCS does not provide link state change interrupt, and requires - * polling. Flag passed on to PHYLINK. - */ - bool pcs_poll; - - /* For switches that only have the MRU configurable. To ensure the - * configured MTU is not exceeded, normalization of MRU on all bridged - * interfaces is needed. - */ - bool mtu_enforcement_ingress; - /* Drivers that benefit from having an ID associated with each * offloaded LAG should set this to the maximum number of * supported IDs. DSA will then maintain a mapping of _at @@ -413,14 +436,14 @@ struct dsa_switch { */ unsigned int num_lag_ids; - /* Drivers that support bridge forwarding offload should set this to - * the maximum number of bridges spanning the same switch tree (or all - * trees, in the case of cross-tree bridging support) that can be - * offloaded. + /* Drivers that support bridge forwarding offload or FDB isolation + * should set this to the maximum number of bridges spanning the same + * switch tree (or all trees, in the case of cross-tree bridging + * support) that can be offloaded. */ - unsigned int num_fwd_offloading_bridges; + unsigned int max_num_bridges; - size_t num_ports; + unsigned int num_ports; }; static inline struct dsa_port *dsa_to_port(struct dsa_switch *ds, int p) @@ -588,7 +611,7 @@ static inline bool dsa_port_is_vlan_filtering(const struct dsa_port *dp) static inline struct net_device *dsa_port_to_bridge_port(const struct dsa_port *dp) { - if (!dp->bridge_dev) + if (!dp->bridge) return NULL; if (dp->lag_dev) @@ -599,6 +622,76 @@ struct net_device *dsa_port_to_bridge_port(const struct dsa_port *dp) return dp->slave; } +static inline struct net_device * +dsa_port_bridge_dev_get(const struct dsa_port *dp) +{ + return dp->bridge ? dp->bridge->dev : NULL; +} + +static inline unsigned int dsa_port_bridge_num_get(struct dsa_port *dp) +{ + return dp->bridge ? dp->bridge->num : 0; +} + +static inline bool dsa_port_bridge_same(const struct dsa_port *a, + const struct dsa_port *b) +{ + struct net_device *br_a = dsa_port_bridge_dev_get(a); + struct net_device *br_b = dsa_port_bridge_dev_get(b); + + /* Standalone ports are not in the same bridge with one another */ + return (!br_a || !br_b) ? false : (br_a == br_b); +} + +static inline bool dsa_port_offloads_bridge_port(struct dsa_port *dp, + const struct net_device *dev) +{ + return dsa_port_to_bridge_port(dp) == dev; +} + +static inline bool +dsa_port_offloads_bridge_dev(struct dsa_port *dp, + const struct net_device *bridge_dev) +{ + /* DSA ports connected to a bridge, and event was emitted + * for the bridge. + */ + return dsa_port_bridge_dev_get(dp) == bridge_dev; +} + +static inline bool dsa_port_offloads_bridge(struct dsa_port *dp, + const struct dsa_bridge *bridge) +{ + return dsa_port_bridge_dev_get(dp) == bridge->dev; +} + +/* Returns true if any port of this tree offloads the given net_device */ +static inline bool dsa_tree_offloads_bridge_port(struct dsa_switch_tree *dst, + const struct net_device *dev) +{ + struct dsa_port *dp; + + list_for_each_entry(dp, &dst->ports, list) + if (dsa_port_offloads_bridge_port(dp, dev)) + return true; + + return false; +} + +/* Returns true if any port of this tree offloads the given bridge */ +static inline bool +dsa_tree_offloads_bridge_dev(struct dsa_switch_tree *dst, + const struct net_device *bridge_dev) +{ + struct dsa_port *dp; + + list_for_each_entry(dp, &dst->ports, list) + if (dsa_port_offloads_bridge_dev(dp, bridge_dev)) + return true; + + return false; +} + typedef int dsa_fdb_dump_cb_t(const unsigned char *addr, u16 vid, bool is_static, void *data); struct dsa_switch_ops { @@ -614,6 +707,13 @@ struct dsa_switch_ops { enum dsa_tag_protocol mprot); int (*change_tag_protocol)(struct dsa_switch *ds, int port, enum dsa_tag_protocol proto); + /* + * Method for switch drivers to connect to the tagging protocol driver + * in current use. The switch driver can provide handlers for certain + * types of packets for switch management. + */ + int (*connect_tag_protocol)(struct dsa_switch *ds, + enum dsa_tag_protocol proto); /* Optional switch-wide initialization and destruction methods */ int (*setup)(struct dsa_switch *ds); @@ -645,8 +745,8 @@ struct dsa_switch_ops { /* * PHYLINK integration */ - void (*phylink_get_interfaces)(struct dsa_switch *ds, int port, - unsigned long *supported_interfaces); + void (*phylink_get_caps)(struct dsa_switch *ds, int port, + struct phylink_config *config); void (*phylink_validate)(struct dsa_switch *ds, int port, unsigned long *supported, struct phylink_link_state *state); @@ -748,17 +848,10 @@ struct dsa_switch_ops { */ int (*set_ageing_time)(struct dsa_switch *ds, unsigned int msecs); int (*port_bridge_join)(struct dsa_switch *ds, int port, - struct net_device *bridge); + struct dsa_bridge bridge, + bool *tx_fwd_offload); void (*port_bridge_leave)(struct dsa_switch *ds, int port, - struct net_device *bridge); - /* Called right after .port_bridge_join() */ - int (*port_bridge_tx_fwd_offload)(struct dsa_switch *ds, int port, - struct net_device *bridge, - int bridge_num); - /* Called right before .port_bridge_leave() */ - void (*port_bridge_tx_fwd_unoffload)(struct dsa_switch *ds, int port, - struct net_device *bridge, - int bridge_num); + struct dsa_bridge bridge); void (*port_stp_state_set)(struct dsa_switch *ds, int port, u8 state); void (*port_fast_age)(struct dsa_switch *ds, int port); @@ -830,10 +923,10 @@ struct dsa_switch_ops { */ int (*crosschip_bridge_join)(struct dsa_switch *ds, int tree_index, int sw_index, int port, - struct net_device *br); + struct dsa_bridge bridge); void (*crosschip_bridge_leave)(struct dsa_switch *ds, int tree_index, int sw_index, int port, - struct net_device *br); + struct dsa_bridge bridge); int (*crosschip_lag_change)(struct dsa_switch *ds, int sw_index, int port); int (*crosschip_lag_join)(struct dsa_switch *ds, int sw_index, diff --git a/include/net/dst.h b/include/net/dst.h index a057319aabef..6aa252c3fc55 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -77,6 +77,7 @@ struct dst_entry { #ifndef CONFIG_64BIT atomic_t __refcnt; /* 32-bit offset 64 */ #endif + netdevice_tracker dev_tracker; }; struct dst_metrics { diff --git a/include/net/failover.h b/include/net/failover.h index bb15438f39c7..f2b42b4b9cd6 100644 --- a/include/net/failover.h +++ b/include/net/failover.h @@ -25,6 +25,7 @@ struct failover_ops { struct failover { struct list_head list; struct net_device __rcu *failover_dev; + netdevice_tracker dev_tracker; struct failover_ops __rcu *ops; }; diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index bd07484ab9dd..82da359bca03 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -91,7 +91,6 @@ struct fib_rules_ops { void (*flush_cache)(struct fib_rules_ops *ops); int nlgroup; - const struct nla_policy *policy; struct list_head rules_list; struct module *owner; struct net *fro_net; @@ -103,26 +102,6 @@ struct fib_rule_notifier_info { struct fib_rule *rule; }; -#define FRA_GENERIC_POLICY \ - [FRA_UNSPEC] = { .strict_start_type = FRA_DPORT_RANGE + 1 }, \ - [FRA_IIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \ - [FRA_OIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \ - [FRA_PRIORITY] = { .type = NLA_U32 }, \ - [FRA_FWMARK] = { .type = NLA_U32 }, \ - [FRA_TUN_ID] = { .type = NLA_U64 }, \ - [FRA_FWMASK] = { .type = NLA_U32 }, \ - [FRA_TABLE] = { .type = NLA_U32 }, \ - [FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \ - [FRA_SUPPRESS_IFGROUP] = { .type = NLA_U32 }, \ - [FRA_GOTO] = { .type = NLA_U32 }, \ - [FRA_L3MDEV] = { .type = NLA_U8 }, \ - [FRA_UID_RANGE] = { .len = sizeof(struct fib_rule_uid_range) }, \ - [FRA_PROTOCOL] = { .type = NLA_U8 }, \ - [FRA_IP_PROTO] = { .type = NLA_U8 }, \ - [FRA_SPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }, \ - [FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) } - - static inline void fib_rule_get(struct fib_rule *rule) { refcount_inc(&rule->refcnt); diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 3961461d9c8b..5b8c54eb7a6b 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -197,6 +197,7 @@ void flow_action_cookie_destroy(struct flow_action_cookie *cookie); struct flow_action_entry { enum flow_action_id id; + u32 hw_index; enum flow_action_hw_stats hw_stats; action_destr destructor; void *destructor_priv; @@ -232,7 +233,6 @@ struct flow_action_entry { bool truncate; } sample; struct { /* FLOW_ACTION_POLICE */ - u32 index; u32 burst; u64 rate_bytes_ps; u64 burst_pkt; @@ -267,7 +267,6 @@ struct flow_action_entry { u8 ttl; } mpls_mangle; struct { - u32 index; s32 prio; u64 basetime; u64 cycletime; @@ -552,6 +551,23 @@ struct flow_cls_offload { u32 classid; }; +enum offload_act_command { + FLOW_ACT_REPLACE, + FLOW_ACT_DESTROY, + FLOW_ACT_STATS, +}; + +struct flow_offload_action { + struct netlink_ext_ack *extack; /* NULL in FLOW_ACT_STATS process*/ + enum offload_act_command command; + enum flow_action_id id; + u32 index; + struct flow_stats stats; + struct flow_action action; +}; + +struct flow_offload_action *offload_action_alloc(unsigned int num_actions); + static inline struct flow_rule * flow_cls_offload_flow_rule(struct flow_cls_offload *flow_cmd) { diff --git a/include/net/gro.h b/include/net/gro.h index 01edaf3fdda0..8f75802d50fd 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -4,9 +4,368 @@ #define _NET_IPV6_GRO_H #include <linux/indirect_call_wrapper.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <net/ip6_checksum.h> +#include <linux/skbuff.h> +#include <net/udp.h> -struct list_head; -struct sk_buff; +struct napi_gro_cb { + /* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */ + void *frag0; + + /* Length of frag0. */ + unsigned int frag0_len; + + /* This indicates where we are processing relative to skb->data. */ + int data_offset; + + /* This is non-zero if the packet cannot be merged with the new skb. */ + u16 flush; + + /* Save the IP ID here and check when we get to the transport layer */ + u16 flush_id; + + /* Number of segments aggregated. */ + u16 count; + + /* Start offset for remote checksum offload */ + u16 gro_remcsum_start; + + /* jiffies when first packet was created/queued */ + unsigned long age; + + /* Used in ipv6_gro_receive() and foo-over-udp */ + u16 proto; + + /* This is non-zero if the packet may be of the same flow. */ + u8 same_flow:1; + + /* Used in tunnel GRO receive */ + u8 encap_mark:1; + + /* GRO checksum is valid */ + u8 csum_valid:1; + + /* Number of checksums via CHECKSUM_UNNECESSARY */ + u8 csum_cnt:3; + + /* Free the skb? */ + u8 free:2; +#define NAPI_GRO_FREE 1 +#define NAPI_GRO_FREE_STOLEN_HEAD 2 + + /* Used in foo-over-udp, set in udp[46]_gro_receive */ + u8 is_ipv6:1; + + /* Used in GRE, set in fou/gue_gro_receive */ + u8 is_fou:1; + + /* Used to determine if flush_id can be ignored */ + u8 is_atomic:1; + + /* Number of gro_receive callbacks this packet already went through */ + u8 recursion_counter:4; + + /* GRO is done by frag_list pointer chaining. */ + u8 is_flist:1; + + /* used to support CHECKSUM_COMPLETE for tunneling protocols */ + __wsum csum; + + /* used in skb_gro_receive() slow path */ + struct sk_buff *last; +}; + +#define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) + +#define GRO_RECURSION_LIMIT 15 +static inline int gro_recursion_inc_test(struct sk_buff *skb) +{ + return ++NAPI_GRO_CB(skb)->recursion_counter == GRO_RECURSION_LIMIT; +} + +typedef struct sk_buff *(*gro_receive_t)(struct list_head *, struct sk_buff *); +static inline struct sk_buff *call_gro_receive(gro_receive_t cb, + struct list_head *head, + struct sk_buff *skb) +{ + if (unlikely(gro_recursion_inc_test(skb))) { + NAPI_GRO_CB(skb)->flush |= 1; + return NULL; + } + + return cb(head, skb); +} + +typedef struct sk_buff *(*gro_receive_sk_t)(struct sock *, struct list_head *, + struct sk_buff *); +static inline struct sk_buff *call_gro_receive_sk(gro_receive_sk_t cb, + struct sock *sk, + struct list_head *head, + struct sk_buff *skb) +{ + if (unlikely(gro_recursion_inc_test(skb))) { + NAPI_GRO_CB(skb)->flush |= 1; + return NULL; + } + + return cb(sk, head, skb); +} + +static inline unsigned int skb_gro_offset(const struct sk_buff *skb) +{ + return NAPI_GRO_CB(skb)->data_offset; +} + +static inline unsigned int skb_gro_len(const struct sk_buff *skb) +{ + return skb->len - NAPI_GRO_CB(skb)->data_offset; +} + +static inline void skb_gro_pull(struct sk_buff *skb, unsigned int len) +{ + NAPI_GRO_CB(skb)->data_offset += len; +} + +static inline void *skb_gro_header_fast(struct sk_buff *skb, + unsigned int offset) +{ + return NAPI_GRO_CB(skb)->frag0 + offset; +} + +static inline int skb_gro_header_hard(struct sk_buff *skb, unsigned int hlen) +{ + return NAPI_GRO_CB(skb)->frag0_len < hlen; +} + +static inline void skb_gro_frag0_invalidate(struct sk_buff *skb) +{ + NAPI_GRO_CB(skb)->frag0 = NULL; + NAPI_GRO_CB(skb)->frag0_len = 0; +} + +static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen, + unsigned int offset) +{ + if (!pskb_may_pull(skb, hlen)) + return NULL; + + skb_gro_frag0_invalidate(skb); + return skb->data + offset; +} + +static inline void *skb_gro_network_header(struct sk_buff *skb) +{ + return (NAPI_GRO_CB(skb)->frag0 ?: skb->data) + + skb_network_offset(skb); +} + +static inline __wsum inet_gro_compute_pseudo(struct sk_buff *skb, int proto) +{ + const struct iphdr *iph = skb_gro_network_header(skb); + + return csum_tcpudp_nofold(iph->saddr, iph->daddr, + skb_gro_len(skb), proto, 0); +} + +static inline void skb_gro_postpull_rcsum(struct sk_buff *skb, + const void *start, unsigned int len) +{ + if (NAPI_GRO_CB(skb)->csum_valid) + NAPI_GRO_CB(skb)->csum = wsum_negate(csum_partial(start, len, + wsum_negate(NAPI_GRO_CB(skb)->csum))); +} + +/* GRO checksum functions. These are logical equivalents of the normal + * checksum functions (in skbuff.h) except that they operate on the GRO + * offsets and fields in sk_buff. + */ + +__sum16 __skb_gro_checksum_complete(struct sk_buff *skb); + +static inline bool skb_at_gro_remcsum_start(struct sk_buff *skb) +{ + return (NAPI_GRO_CB(skb)->gro_remcsum_start == skb_gro_offset(skb)); +} + +static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb, + bool zero_okay, + __sum16 check) +{ + return ((skb->ip_summed != CHECKSUM_PARTIAL || + skb_checksum_start_offset(skb) < + skb_gro_offset(skb)) && + !skb_at_gro_remcsum_start(skb) && + NAPI_GRO_CB(skb)->csum_cnt == 0 && + (!zero_okay || check)); +} + +static inline __sum16 __skb_gro_checksum_validate_complete(struct sk_buff *skb, + __wsum psum) +{ + if (NAPI_GRO_CB(skb)->csum_valid && + !csum_fold(csum_add(psum, NAPI_GRO_CB(skb)->csum))) + return 0; + + NAPI_GRO_CB(skb)->csum = psum; + + return __skb_gro_checksum_complete(skb); +} + +static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb) +{ + if (NAPI_GRO_CB(skb)->csum_cnt > 0) { + /* Consume a checksum from CHECKSUM_UNNECESSARY */ + NAPI_GRO_CB(skb)->csum_cnt--; + } else { + /* Update skb for CHECKSUM_UNNECESSARY and csum_level when we + * verified a new top level checksum or an encapsulated one + * during GRO. This saves work if we fallback to normal path. + */ + __skb_incr_checksum_unnecessary(skb); + } +} + +#define __skb_gro_checksum_validate(skb, proto, zero_okay, check, \ + compute_pseudo) \ +({ \ + __sum16 __ret = 0; \ + if (__skb_gro_checksum_validate_needed(skb, zero_okay, check)) \ + __ret = __skb_gro_checksum_validate_complete(skb, \ + compute_pseudo(skb, proto)); \ + if (!__ret) \ + skb_gro_incr_csum_unnecessary(skb); \ + __ret; \ +}) + +#define skb_gro_checksum_validate(skb, proto, compute_pseudo) \ + __skb_gro_checksum_validate(skb, proto, false, 0, compute_pseudo) + +#define skb_gro_checksum_validate_zero_check(skb, proto, check, \ + compute_pseudo) \ + __skb_gro_checksum_validate(skb, proto, true, check, compute_pseudo) + +#define skb_gro_checksum_simple_validate(skb) \ + __skb_gro_checksum_validate(skb, 0, false, 0, null_compute_pseudo) + +static inline bool __skb_gro_checksum_convert_check(struct sk_buff *skb) +{ + return (NAPI_GRO_CB(skb)->csum_cnt == 0 && + !NAPI_GRO_CB(skb)->csum_valid); +} + +static inline void __skb_gro_checksum_convert(struct sk_buff *skb, + __wsum pseudo) +{ + NAPI_GRO_CB(skb)->csum = ~pseudo; + NAPI_GRO_CB(skb)->csum_valid = 1; +} + +#define skb_gro_checksum_try_convert(skb, proto, compute_pseudo) \ +do { \ + if (__skb_gro_checksum_convert_check(skb)) \ + __skb_gro_checksum_convert(skb, \ + compute_pseudo(skb, proto)); \ +} while (0) + +struct gro_remcsum { + int offset; + __wsum delta; +}; + +static inline void skb_gro_remcsum_init(struct gro_remcsum *grc) +{ + grc->offset = 0; + grc->delta = 0; +} + +static inline void *skb_gro_remcsum_process(struct sk_buff *skb, void *ptr, + unsigned int off, size_t hdrlen, + int start, int offset, + struct gro_remcsum *grc, + bool nopartial) +{ + __wsum delta; + size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start); + + BUG_ON(!NAPI_GRO_CB(skb)->csum_valid); + + if (!nopartial) { + NAPI_GRO_CB(skb)->gro_remcsum_start = off + hdrlen + start; + return ptr; + } + + ptr = skb_gro_header_fast(skb, off); + if (skb_gro_header_hard(skb, off + plen)) { + ptr = skb_gro_header_slow(skb, off + plen, off); + if (!ptr) + return NULL; + } + + delta = remcsum_adjust(ptr + hdrlen, NAPI_GRO_CB(skb)->csum, + start, offset); + + /* Adjust skb->csum since we changed the packet */ + NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta); + + grc->offset = off + hdrlen + offset; + grc->delta = delta; + + return ptr; +} + +static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb, + struct gro_remcsum *grc) +{ + void *ptr; + size_t plen = grc->offset + sizeof(u16); + + if (!grc->delta) + return; + + ptr = skb_gro_header_fast(skb, grc->offset); + if (skb_gro_header_hard(skb, grc->offset + sizeof(u16))) { + ptr = skb_gro_header_slow(skb, plen, grc->offset); + if (!ptr) + return; + } + + remcsum_unadjust((__sum16 *)ptr, grc->delta); +} + +#ifdef CONFIG_XFRM_OFFLOAD +static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush) +{ + if (PTR_ERR(pp) != -EINPROGRESS) + NAPI_GRO_CB(skb)->flush |= flush; +} +static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb, + struct sk_buff *pp, + int flush, + struct gro_remcsum *grc) +{ + if (PTR_ERR(pp) != -EINPROGRESS) { + NAPI_GRO_CB(skb)->flush |= flush; + skb_gro_remcsum_cleanup(skb, grc); + skb->remcsum_offload = 0; + } +} +#else +static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush) +{ + NAPI_GRO_CB(skb)->flush |= flush; +} +static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb, + struct sk_buff *pp, + int flush, + struct gro_remcsum *grc) +{ + NAPI_GRO_CB(skb)->flush |= flush; + skb_gro_remcsum_cleanup(skb, grc); + skb->remcsum_offload = 0; +} +#endif INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *, struct sk_buff *)); @@ -15,6 +374,14 @@ INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *, struct sk_buff *)); INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int)); +INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *, + struct sk_buff *)); +INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int)); + +INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *, + struct sk_buff *)); +INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int)); + #define indirect_call_gro_receive_inet(cb, f2, f1, head, skb) \ ({ \ unlikely(gro_recursion_inc_test(skb)) ? \ @@ -22,4 +389,54 @@ INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int)); INDIRECT_CALL_INET(cb, f2, f1, head, skb); \ }) +struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, + struct udphdr *uh, struct sock *sk); +int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup); + +static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb) +{ + struct udphdr *uh; + unsigned int hlen, off; + + off = skb_gro_offset(skb); + hlen = off + sizeof(*uh); + uh = skb_gro_header_fast(skb, off); + if (skb_gro_header_hard(skb, hlen)) + uh = skb_gro_header_slow(skb, hlen, off); + + return uh; +} + +static inline __wsum ip6_gro_compute_pseudo(struct sk_buff *skb, int proto) +{ + const struct ipv6hdr *iph = skb_gro_network_header(skb); + + return ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr, + skb_gro_len(skb), proto, 0)); +} + +int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb); + +/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */ +static inline void gro_normal_list(struct napi_struct *napi) +{ + if (!napi->rx_count) + return; + netif_receive_skb_list_internal(&napi->rx_list); + INIT_LIST_HEAD(&napi->rx_list); + napi->rx_count = 0; +} + +/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded, + * pass the whole batch up to the stack. + */ +static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, int segs) +{ + list_add_tail(&skb->list, &napi->rx_list); + napi->rx_count += segs; + if (napi->rx_count >= gro_normal_batch) + gro_normal_list(napi); +} + + #endif /* _NET_IPV6_GRO_H */ diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 653e7d0f65cb..f026cf08a8e8 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -160,6 +160,7 @@ struct ipv6_devstat { struct inet6_dev { struct net_device *dev; + netdevice_tracker dev_tracker; struct list_head addr_list; diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index fa6a87246a7b..4ad47d9f9d27 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -304,7 +304,7 @@ static inline __poll_t inet_csk_listen_poll(const struct sock *sk) (EPOLLIN | EPOLLRDNORM) : 0; } -int inet_csk_listen_start(struct sock *sk, int backlog); +int inet_csk_listen_start(struct sock *sk); void inet_csk_listen_stop(struct sock *sk); void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr); diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 48cc5795ceda..63540be0fc34 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -117,8 +117,15 @@ int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net); static inline void fqdir_pre_exit(struct fqdir *fqdir) { - fqdir->high_thresh = 0; /* prevent creation of new frags */ - fqdir->dead = true; + /* Prevent creation of new frags. + * Pairs with READ_ONCE() in inet_frag_find(). + */ + WRITE_ONCE(fqdir->high_thresh, 0); + + /* Pairs with READ_ONCE() in inet_frag_kill(), ip_expire() + * and ip6frag_expire_frag_queue(). + */ + WRITE_ONCE(fqdir->dead, true); } void fqdir_exit(struct fqdir *fqdir); diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 9e1111f5915b..234d70ae5f4c 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -372,4 +372,16 @@ static inline bool inet_can_nonlocal_bind(struct net *net, inet->freebind || inet->transparent; } +static inline bool inet_addr_valid_or_nonlocal(struct net *net, + struct inet_sock *inet, + __be32 addr, + int addr_type) +{ + return inet_can_nonlocal_bind(net, inet) || + addr == htonl(INADDR_ANY) || + addr_type == RTN_LOCAL || + addr_type == RTN_MULTICAST || + addr_type == RTN_BROADCAST; +} + #endif /* _INET_SOCK_H */ diff --git a/include/net/ip.h b/include/net/ip.h index b71e88507c4a..b51bae43b0dd 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -525,19 +525,18 @@ static inline void ip_select_ident_segs(struct net *net, struct sk_buff *skb, { struct iphdr *iph = ip_hdr(skb); + /* We had many attacks based on IPID, use the private + * generator as much as we can. + */ + if (sk && inet_sk(sk)->inet_daddr) { + iph->id = htons(inet_sk(sk)->inet_id); + inet_sk(sk)->inet_id += segs; + return; + } if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) { - /* This is only to work around buggy Windows95/2000 - * VJ compression implementations. If the ID field - * does not change, they drop every other packet in - * a TCP stream using header compression. - */ - if (sk && inet_sk(sk)->inet_daddr) { - iph->id = htons(inet_sk(sk)->inet_id); - inet_sk(sk)->inet_id += segs; - } else { - iph->id = 0; - } + iph->id = 0; } else { + /* Unfortunately we need the big hammer to get a suitable IPID */ __ip_select_ident(net, iph, segs); } } @@ -568,14 +567,6 @@ static inline void iph_to_flow_copy_v4addrs(struct flow_keys *flow, flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; } -static inline __wsum inet_gro_compute_pseudo(struct sk_buff *skb, int proto) -{ - const struct iphdr *iph = skb_gro_network_header(skb); - - return csum_tcpudp_nofold(iph->saddr, iph->daddr, - skb_gro_len(skb), proto, 0); -} - /* * Map a multicast IP onto multicast MAC for type ethernet. */ @@ -791,5 +782,6 @@ int ip_sock_set_mtu_discover(struct sock *sk, int val); void ip_sock_set_pktinfo(struct sock *sk); void ip_sock_set_recverr(struct sock *sk); void ip_sock_set_tos(struct sock *sk, int val); +void __ip_sock_set_tos(struct sock *sk, int val); #endif /* _IP_H */ diff --git a/include/net/ip6_checksum.h b/include/net/ip6_checksum.h index b3f4eaa88672..c8a96b888277 100644 --- a/include/net/ip6_checksum.h +++ b/include/net/ip6_checksum.h @@ -43,14 +43,6 @@ static inline __wsum ip6_compute_pseudo(struct sk_buff *skb, int proto) skb->len, proto, 0)); } -static inline __wsum ip6_gro_compute_pseudo(struct sk_buff *skb, int proto) -{ - const struct ipv6hdr *iph = skb_gro_network_header(skb); - - return ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr, - skb_gro_len(skb), proto, 0)); -} - static __inline__ __sum16 tcp_v6_check(int len, const struct in6_addr *saddr, const struct in6_addr *daddr, @@ -65,15 +57,9 @@ static inline void __tcp_v6_send_check(struct sk_buff *skb, { struct tcphdr *th = tcp_hdr(skb); - if (skb->ip_summed == CHECKSUM_PARTIAL) { - th->check = ~tcp_v6_check(skb->len, saddr, daddr, 0); - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct tcphdr, check); - } else { - th->check = tcp_v6_check(skb->len, saddr, daddr, - csum_partial(th, th->doff << 2, - skb->csum)); - } + th->check = ~tcp_v6_check(skb->len, saddr, daddr, 0); + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct tcphdr, check); } static inline void tcp_v6_gso_csum_prep(struct sk_buff *skb) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 83b8070d1cc9..40ae8f1b18e5 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -20,6 +20,7 @@ #include <net/inetpeer.h> #include <net/fib_notifier.h> #include <linux/indirect_call_wrapper.h> +#include <uapi/linux/bpf.h> #ifdef CONFIG_IPV6_MULTIPLE_TABLES #define FIB6_TABLE_HASHSZ 256 @@ -281,7 +282,7 @@ static inline bool fib6_get_cookie_safe(const struct fib6_info *f6i, fn = rcu_dereference(f6i->fib6_node); if (fn) { - *cookie = fn->fn_sernum; + *cookie = READ_ONCE(fn->fn_sernum); /* pairs with smp_wmb() in __fib6_update_sernum_upto_root() */ smp_rmb(); status = true; diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 5efd0b71dc67..ca2d6b60e1ec 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -263,19 +263,19 @@ static inline bool ipv6_anycast_destination(const struct dst_entry *dst, int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, int (*output)(struct net *, struct sock *, struct sk_buff *)); -static inline unsigned int ip6_skb_dst_mtu(struct sk_buff *skb) +static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb) { - unsigned int mtu; - - struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? + const struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? inet6_sk(skb->sk) : NULL; + const struct dst_entry *dst = skb_dst(skb); + unsigned int mtu; if (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) { - mtu = READ_ONCE(skb_dst(skb)->dev->mtu); - mtu -= lwtunnel_headroom(skb_dst(skb)->lwtstate, mtu); - } else - mtu = dst_mtu(skb_dst(skb)); - + mtu = READ_ONCE(dst->dev->mtu); + mtu -= lwtunnel_headroom(dst->lwtstate, mtu); + } else { + mtu = dst_mtu(dst); + } return mtu; } diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 028eaea1c854..a38c4f1e4e5c 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -46,6 +46,7 @@ struct __ip6_tnl_parm { struct ip6_tnl { struct ip6_tnl __rcu *next; /* next tunnel in list */ struct net_device *dev; /* virtual device associated with tunnel */ + netdevice_tracker dev_tracker; struct net *net; /* netns for packet i/o */ struct __ip6_tnl_parm parms; /* tunnel configuration parameters */ struct flowi fl; /* flowi template for xmit */ diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 3417ba2d27ad..c4297704bbcb 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -79,6 +79,7 @@ struct fnhe_hash_bucket { struct fib_nh_common { struct net_device *nhc_dev; + netdevice_tracker nhc_dev_tracker; int nhc_oif; unsigned char nhc_scope; u8 nhc_family; @@ -111,6 +112,7 @@ struct fib_nh { int nh_saddr_genid; #define fib_nh_family nh_common.nhc_family #define fib_nh_dev nh_common.nhc_dev +#define fib_nh_dev_tracker nh_common.nhc_dev_tracker #define fib_nh_oif nh_common.nhc_oif #define fib_nh_flags nh_common.nhc_flags #define fib_nh_lws nh_common.nhc_lwtstate diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index bc3b13ec93c9..0219fe907b26 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -104,7 +104,10 @@ struct metadata_dst; struct ip_tunnel { struct ip_tunnel __rcu *next; struct hlist_node hash_node; + struct net_device *dev; + netdevice_tracker dev_tracker; + struct net *net; /* netns for packet i/o */ unsigned long err_time; /* Time when the last ICMP error diff --git a/include/net/ipv6.h b/include/net/ipv6.h index c19bf51ded1d..3afcb128e064 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -21,6 +21,8 @@ #include <net/snmp.h> #include <net/netns/hash.h> +struct ip_tunnel_info; + #define SIN6_LEN_RFC2133 24 #define IPV6_MAXPLEN 65535 @@ -345,9 +347,9 @@ struct ipcm6_cookie { struct sockcm_cookie sockc; __s16 hlimit; __s16 tclass; + __u16 gso_size; __s8 dontfrag; struct ipv6_txoptions *opt; - __u16 gso_size; }; static inline void ipcm6_init(struct ipcm6_cookie *ipc6) diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h index 851029ecff13..0a4779175a52 100644 --- a/include/net/ipv6_frag.h +++ b/include/net/ipv6_frag.h @@ -67,7 +67,8 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq) struct sk_buff *head; rcu_read_lock(); - if (fq->q.fqdir->dead) + /* Paired with the WRITE_ONCE() in fqdir_pre_exit(). */ + if (READ_ONCE(fq->q.fqdir->dead)) goto out_rcu_unlock; spin_lock(&fq->q.lock); diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h index ff06246dbbb9..df85d19fbf84 100644 --- a/include/net/iucv/af_iucv.h +++ b/include/net/iucv/af_iucv.h @@ -112,10 +112,12 @@ enum iucv_tx_notify { struct iucv_sock { struct sock sk; - char src_user_id[8]; - char src_name[8]; - char dst_user_id[8]; - char dst_name[8]; + struct_group(init, + char src_user_id[8]; + char src_name[8]; + char dst_user_id[8]; + char dst_name[8]; + ); struct list_head accept_q; spinlock_t accept_q_lock; struct sock *parent; diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h index ea985aa7a6c5..2c1ea3414640 100644 --- a/include/net/llc_conn.h +++ b/include/net/llc_conn.h @@ -38,6 +38,7 @@ struct llc_sock { struct llc_addr laddr; /* lsap/mac pair */ struct llc_addr daddr; /* dsap/mac pair */ struct net_device *dev; /* device to send to remote */ + netdevice_tracker dev_tracker; u32 copied_seq; /* head of yet unread data */ u8 retry_count; /* number of retries */ u8 ack_must_be_send; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index dd757f0987b0..c50221d7e82c 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1205,12 +1205,7 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) /* clear the rate counts */ for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) info->status.rates[i].count = 0; - - BUILD_BUG_ON( - offsetof(struct ieee80211_tx_info, status.ack_signal) != 20); - memset(&info->status.ampdu_ack_len, 0, - sizeof(struct ieee80211_tx_info) - - offsetof(struct ieee80211_tx_info, status.ampdu_ack_len)); + memset_after(&info->status, 0, rates); } @@ -3944,6 +3939,16 @@ struct ieee80211_prep_tx_info { * twt structure. * @twt_teardown_request: Update the hw with TWT teardown request received * from the peer. + * @set_radar_background: Configure dedicated offchannel chain available for + * radar/CAC detection on some hw. This chain can't be used to transmit + * or receive frames and it is bounded to a running wdev. + * Background radar/CAC detection allows to avoid the CAC downtime + * switching to a different channel during CAC detection on the selected + * radar channel. + * The caller is expected to set chandef pointer to NULL in order to + * disable background CAC/radar detection. + * @net_fill_forward_path: Called from .ndo_fill_forward_path in order to + * resolve a path for hardware flow offloading */ struct ieee80211_ops { void (*tx)(struct ieee80211_hw *hw, @@ -4272,6 +4277,13 @@ struct ieee80211_ops { struct ieee80211_twt_setup *twt); void (*twt_teardown_request)(struct ieee80211_hw *hw, struct ieee80211_sta *sta, u8 flowid); + int (*set_radar_background)(struct ieee80211_hw *hw, + struct cfg80211_chan_def *chandef); + int (*net_fill_forward_path)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_sta *sta, + struct net_device_path_ctx *ctx, + struct net_device_path *path); }; /** @@ -5602,6 +5614,9 @@ void ieee80211_iterate_active_interfaces_atomic(struct ieee80211_hw *hw, * This function iterates over the interfaces associated with a given * hardware that are currently active and calls the callback for them. * This version can only be used while holding the wiphy mutex. + * The driver must not call this with a lock held that it can also take in + * response to callbacks from mac80211, and it must not call this within + * callbacks made by mac80211 - both would result in deadlocks. * * @hw: the hardware struct of which the interfaces should be iterated over * @iter_flags: iteration flags, see &enum ieee80211_interface_iteration_flags @@ -5616,6 +5631,24 @@ void ieee80211_iterate_active_interfaces_mtx(struct ieee80211_hw *hw, void *data); /** + * ieee80211_iterate_stations - iterate stations + * + * This function iterates over all stations associated with a given + * hardware that are currently uploaded to the driver and calls the callback + * function for them. + * This function allows the iterator function to sleep, when the iterator + * function is atomic @ieee80211_iterate_stations_atomic can be used. + * + * @hw: the hardware struct of which the interfaces should be iterated over + * @iterator: the iterator function to call, cannot sleep + * @data: first argument of the iterator function + */ +void ieee80211_iterate_stations(struct ieee80211_hw *hw, + void (*iterator)(void *data, + struct ieee80211_sta *sta), + void *data); + +/** * ieee80211_iterate_stations_atomic - iterate stations * * This function iterates over all stations associated with a given @@ -6064,6 +6097,18 @@ void ieee80211_radar_detected(struct ieee80211_hw *hw); void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success); /** + * ieee80211_channel_switch_disconnect - disconnect due to channel switch error + * @vif &struct ieee80211_vif pointer from the add_interface callback. + * @block_tx: if %true, do not send deauth frame. + * + * Instruct mac80211 to disconnect due to a channel switch error. The channel + * switch can request to block the tx and so, we need to make sure we do not send + * a deauth frame in this case. + */ +void ieee80211_channel_switch_disconnect(struct ieee80211_vif *vif, + bool block_tx); + +/** * ieee80211_request_smps - request SM PS transition * @vif: &struct ieee80211_vif pointer from the add_interface callback. * @smps_mode: new SM PS mode diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 04341d86585d..53cb8de0e589 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -411,13 +411,7 @@ static inline void __ipv6_confirm_neigh(struct net_device *dev, rcu_read_lock_bh(); n = __ipv6_neigh_lookup_noref(dev, pkey); - if (n) { - unsigned long now = jiffies; - - /* avoid dirtying neighbour */ - if (READ_ONCE(n->confirmed) != now) - WRITE_ONCE(n->confirmed, now); - } + neigh_confirm(n); rcu_read_unlock_bh(); } @@ -428,13 +422,7 @@ static inline void __ipv6_confirm_neigh_stub(struct net_device *dev, rcu_read_lock_bh(); n = __ipv6_neigh_lookup_noref_stub(dev, pkey); - if (n) { - unsigned long now = jiffies; - - /* avoid dirtying neighbour */ - if (READ_ONCE(n->confirmed) != now) - WRITE_ONCE(n->confirmed, now); - } + neigh_confirm(n); rcu_read_unlock_bh(); } diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 38a0c1d24570..937389e04c8e 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -70,6 +70,7 @@ enum { struct neigh_parms { possible_net_t net; struct net_device *dev; + netdevice_tracker dev_tracker; struct list_head list; int (*neigh_setup)(struct neighbour *); struct neigh_table *tbl; @@ -158,6 +159,7 @@ struct neighbour { struct list_head managed_list; struct rcu_head rcu; struct net_device *dev; + netdevice_tracker dev_tracker; u8 primary_key[0]; } __randomize_layout; @@ -173,6 +175,7 @@ struct pneigh_entry { struct pneigh_entry *next; possible_net_t net; struct net_device *dev; + netdevice_tracker dev_tracker; u32 flags; u8 protocol; u8 key[]; @@ -321,6 +324,17 @@ static inline struct neighbour *__neigh_lookup_noref(struct neigh_table *tbl, return ___neigh_lookup_noref(tbl, tbl->key_eq, tbl->hash, pkey, dev); } +static inline void neigh_confirm(struct neighbour *n) +{ + if (n) { + unsigned long now = jiffies; + + /* avoid dirtying neighbour */ + if (READ_ONCE(n->confirmed) != now) + WRITE_ONCE(n->confirmed, now); + } +} + void neigh_table_init(int index, struct neigh_table *tbl); int neigh_table_clear(int index, struct neigh_table *tbl); struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index bb5fa5914032..5b61c462e534 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -34,6 +34,7 @@ #include <net/netns/smc.h> #include <net/netns/bpf.h> #include <net/netns/mctp.h> +#include <net/net_trackers.h> #include <linux/ns_common.h> #include <linux/idr.h> #include <linux/skbuff.h> @@ -87,6 +88,7 @@ struct net { struct idr netns_ids; struct ns_common ns; + struct ref_tracker_dir refcnt_tracker; struct list_head dev_base_head; struct proc_dir_entry *proc_net; @@ -240,6 +242,7 @@ void ipx_unregister_sysctl(void); #ifdef CONFIG_NET_NS void __put_net(struct net *net); +/* Try using get_net_track() instead */ static inline struct net *get_net(struct net *net) { refcount_inc(&net->ns.count); @@ -258,6 +261,7 @@ static inline struct net *maybe_get_net(struct net *net) return net; } +/* Try using put_net_track() instead */ static inline void put_net(struct net *net) { if (refcount_dec_and_test(&net->ns.count)) @@ -308,6 +312,36 @@ static inline int check_net(const struct net *net) #endif +static inline void netns_tracker_alloc(struct net *net, + netns_tracker *tracker, gfp_t gfp) +{ +#ifdef CONFIG_NET_NS_REFCNT_TRACKER + ref_tracker_alloc(&net->refcnt_tracker, tracker, gfp); +#endif +} + +static inline void netns_tracker_free(struct net *net, + netns_tracker *tracker) +{ +#ifdef CONFIG_NET_NS_REFCNT_TRACKER + ref_tracker_free(&net->refcnt_tracker, tracker); +#endif +} + +static inline struct net *get_net_track(struct net *net, + netns_tracker *tracker, gfp_t gfp) +{ + get_net(net); + netns_tracker_alloc(net, tracker, gfp); + return net; +} + +static inline void put_net_track(struct net *net, netns_tracker *tracker) +{ + netns_tracker_free(net, tracker); + put_net(net); +} + typedef struct { #ifdef CONFIG_NET_NS struct net *net; diff --git a/include/net/net_trackers.h b/include/net/net_trackers.h new file mode 100644 index 000000000000..d94c76cf15a9 --- /dev/null +++ b/include/net/net_trackers.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __NET_NET_TRACKERS_H +#define __NET_NET_TRACKERS_H +#include <linux/ref_tracker.h> + +#ifdef CONFIG_NET_DEV_REFCNT_TRACKER +typedef struct ref_tracker *netdevice_tracker; +#else +typedef struct {} netdevice_tracker; +#endif + +#ifdef CONFIG_NET_NS_REFCNT_TRACKER +typedef struct ref_tracker *netns_tracker; +#else +typedef struct {} netns_tracker; +#endif + +#endif /* __NET_NET_TRACKERS_H */ diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index d24b0a34c8f0..8731d5bcb47d 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -76,6 +76,8 @@ struct nf_conn { * Hint, SKB address this struct and refcnt via skb->_nfct and * helpers nf_conntrack_get() and nf_conntrack_put(). * Helper nf_ct_put() equals nf_conntrack_put() by dec refcnt, + * except that the latter uses internal indirection and does not + * result in a conntrack module dependency. * beware nf_ct_get() is different and don't inc refcnt. */ struct nf_conntrack ct_general; @@ -95,6 +97,7 @@ struct nf_conn { unsigned long status; u16 cpu; + u16 local_origin:1; possible_net_t ct_net; #if IS_ENABLED(CONFIG_NF_NAT) @@ -169,11 +172,13 @@ nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo) return (struct nf_conn *)(nfct & NFCT_PTRMASK); } +void nf_ct_destroy(struct nf_conntrack *nfct); + /* decrement reference count on a conntrack */ static inline void nf_ct_put(struct nf_conn *ct) { - WARN_ON(!ct); - nf_conntrack_put(&ct->ct_general); + if (ct && refcount_dec_and_test(&ct->ct_general.use)) + nf_ct_destroy(&ct->ct_general); } /* Protocol module loading */ @@ -278,7 +283,7 @@ static inline unsigned long nf_ct_expires(const struct nf_conn *ct) { s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp; - return timeout > 0 ? timeout : 0; + return max(timeout, 0); } static inline bool nf_ct_is_expired(const struct nf_conn *ct) diff --git a/include/net/netfilter/nf_conntrack_act_ct.h b/include/net/netfilter/nf_conntrack_act_ct.h new file mode 100644 index 000000000000..078d3c52c03f --- /dev/null +++ b/include/net/netfilter/nf_conntrack_act_ct.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _NF_CONNTRACK_ACT_CT_H +#define _NF_CONNTRACK_ACT_CT_H + +#include <net/netfilter/nf_conntrack.h> +#include <linux/netfilter/nf_conntrack_common.h> +#include <net/netfilter/nf_conntrack_extend.h> + +struct nf_conn_act_ct_ext { + int ifindex[IP_CT_DIR_MAX]; +}; + +static inline struct nf_conn_act_ct_ext *nf_conn_act_ct_ext_find(const struct nf_conn *ct) +{ +#if IS_ENABLED(CONFIG_NET_ACT_CT) + return nf_ct_ext_find(ct, NF_CT_EXT_ACT_CT); +#else + return NULL; +#endif +} + +static inline struct nf_conn_act_ct_ext *nf_conn_act_ct_ext_add(struct nf_conn *ct) +{ +#if IS_ENABLED(CONFIG_NET_ACT_CT) + struct nf_conn_act_ct_ext *act_ct = nf_ct_ext_find(ct, NF_CT_EXT_ACT_CT); + + if (act_ct) + return act_ct; + + act_ct = nf_ct_ext_add(ct, NF_CT_EXT_ACT_CT, GFP_ATOMIC); + return act_ct; +#else + return NULL; +#endif +} + +static inline void nf_conn_act_ct_ext_fill(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ +#if IS_ENABLED(CONFIG_NET_ACT_CT) + struct nf_conn_act_ct_ext *act_ct_ext; + + act_ct_ext = nf_conn_act_ct_ext_find(ct); + if (dev_net(skb->dev) == &init_net && act_ct_ext) + act_ct_ext->ifindex[CTINFO2DIR(ctinfo)] = skb->dev->ifindex; +#endif +} + +#endif /* _NF_CONNTRACK_ACT_CT_H */ diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index e1e588387103..c7515d82ab06 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -28,6 +28,9 @@ enum nf_ct_ext_id { #if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY) NF_CT_EXT_SYNPROXY, #endif +#if IS_ENABLED(CONFIG_NET_ACT_CT) + NF_CT_EXT_ACT_CT, +#endif NF_CT_EXT_NUM, }; @@ -40,6 +43,7 @@ enum nf_ct_ext_id { #define NF_CT_EXT_TIMEOUT_TYPE struct nf_conn_timeout #define NF_CT_EXT_LABELS_TYPE struct nf_conn_labels #define NF_CT_EXT_SYNPROXY_TYPE struct nf_conn_synproxy +#define NF_CT_EXT_ACT_CT_TYPE struct nf_conn_act_ct_ext /* Extensions: optional stuff which isn't permanently in struct. */ struct nf_ct_ext { diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index a0d9e0b47ab8..eaf55da9a205 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -105,6 +105,8 @@ struct nft_data { }; } __attribute__((aligned(__alignof__(u64)))); +#define NFT_REG32_NUM 20 + /** * struct nft_regs - nf_tables register set * @@ -115,11 +117,21 @@ struct nft_data { */ struct nft_regs { union { - u32 data[20]; + u32 data[NFT_REG32_NUM]; struct nft_verdict verdict; }; }; +struct nft_regs_track { + struct { + const struct nft_expr *selector; + const struct nft_expr *bitwise; + } regs[NFT_REG32_NUM]; + + const struct nft_expr *cur; + const struct nft_expr *last; +}; + /* Store/load an u8, u16 or u64 integer to/from the u32 data register. * * Note, when using concatenations, register allocation happens at 32-bit @@ -346,6 +358,8 @@ int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src); void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr); int nft_expr_dump(struct sk_buff *skb, unsigned int attr, const struct nft_expr *expr); +bool nft_expr_reduce_bitwise(struct nft_regs_track *track, + const struct nft_expr *expr); struct nft_set_ext; @@ -884,6 +898,8 @@ struct nft_expr_ops { int (*validate)(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data); + bool (*reduce)(struct nft_regs_track *track, + const struct nft_expr *expr); bool (*gc)(struct net *net, const struct nft_expr *expr); int (*offload)(struct nft_offload_ctx *ctx, @@ -974,6 +990,20 @@ static inline void nft_set_elem_update_expr(const struct nft_set_ext *ext, #define NFT_CHAIN_POLICY_UNSET U8_MAX +struct nft_rule_dp { + u64 is_last:1, + dlen:12, + handle:42; /* for tracing */ + unsigned char data[] + __attribute__((aligned(__alignof__(struct nft_expr)))); +}; + +struct nft_rule_blob { + unsigned long size; + unsigned char data[] + __attribute__((aligned(__alignof__(struct nft_rule_dp)))); +}; + /** * struct nft_chain - nf_tables chain * @@ -987,8 +1017,8 @@ static inline void nft_set_elem_update_expr(const struct nft_set_ext *ext, * @name: name of the chain */ struct nft_chain { - struct nft_rule *__rcu *rules_gen_0; - struct nft_rule *__rcu *rules_gen_1; + struct nft_rule_blob __rcu *blob_gen_0; + struct nft_rule_blob __rcu *blob_gen_1; struct list_head rules; struct list_head list; struct rhlist_head rhlhead; @@ -1003,7 +1033,7 @@ struct nft_chain { u8 *udata; /* Only used during control plane commit phase: */ - struct nft_rule **rules_next; + struct nft_rule_blob *blob_next; }; int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain); @@ -1321,7 +1351,7 @@ struct nft_traceinfo { const struct nft_pktinfo *pkt; const struct nft_base_chain *basechain; const struct nft_chain *chain; - const struct nft_rule *rule; + const struct nft_rule_dp *rule; const struct nft_verdict *verdict; enum nft_trace_types type; bool packet_dumped; diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 0fa5a6d98a00..b6fb1fdff9b2 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -7,6 +7,7 @@ extern struct nft_expr_type nft_imm_type; extern struct nft_expr_type nft_cmp_type; +extern struct nft_expr_type nft_counter_type; extern struct nft_expr_type nft_lookup_type; extern struct nft_expr_type nft_bitwise_type; extern struct nft_expr_type nft_byteorder_type; @@ -21,6 +22,7 @@ extern struct nft_expr_type nft_last_type; #ifdef CONFIG_NETWORK_SECMARK extern struct nft_object_type nft_secmark_obj_type; #endif +extern struct nft_object_type nft_counter_obj_type; int nf_tables_core_module_init(void); void nf_tables_core_module_exit(void); @@ -120,6 +122,8 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, const u32 *key, const struct nft_set_ext **ext); +void nft_counter_init_seqcount(void); + struct nft_expr; struct nft_regs; struct nft_pktinfo; @@ -143,4 +147,6 @@ void nft_dynset_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt); void nft_rt_get_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_counter_eval(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt); #endif /* _NET_NF_TABLES_CORE_H */ diff --git a/include/net/netns/bpf.h b/include/net/netns/bpf.h index 0ca6a1b87185..2c01a278d1eb 100644 --- a/include/net/netns/bpf.h +++ b/include/net/netns/bpf.h @@ -6,11 +6,18 @@ #ifndef __NETNS_BPF_H__ #define __NETNS_BPF_H__ -#include <linux/bpf-netns.h> +#include <linux/list.h> struct bpf_prog; struct bpf_prog_array; +enum netns_bpf_attach_type { + NETNS_BPF_INVALID = -1, + NETNS_BPF_FLOW_DISSECTOR = 0, + NETNS_BPF_SK_LOOKUP, + MAX_NETNS_BPF_ATTACH_TYPE +}; + struct netns_bpf { /* Array of programs to run compiled from progs or links */ struct bpf_prog_array __rcu *run_array[MAX_NETNS_BPF_ATTACH_TYPE]; diff --git a/include/net/netns/core.h b/include/net/netns/core.h index 36c2d998a43c..552bc25b1933 100644 --- a/include/net/netns/core.h +++ b/include/net/netns/core.h @@ -12,7 +12,6 @@ struct netns_core { int sysctl_somaxconn; #ifdef CONFIG_PROC_FS - int __percpu *sock_inuse; struct prot_inuse __percpu *prot_inuse; #endif }; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 6c5b2efc4f17..78557643526e 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -85,6 +85,9 @@ struct netns_ipv4 { int sysctl_icmp_ratelimit; int sysctl_icmp_ratemask; + u32 ip_rt_min_pmtu; + int ip_rt_mtu_expires; + struct local_ports ip_local_ports; u8 sysctl_tcp_ecn; diff --git a/include/net/page_pool.h b/include/net/page_pool.h index a4082406a003..79a805542d0f 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -80,6 +80,8 @@ struct page_pool_params { enum dma_data_direction dma_dir; /* DMA mapping direction */ unsigned int max_len; /* max DMA sync memory size */ unsigned int offset; /* DMA addr offset */ + void (*init_callback)(struct page *page, void *arg); + void *init_arg; }; struct page_pool { @@ -94,6 +96,7 @@ struct page_pool { unsigned int frag_offset; struct page *frag_page; long frag_users; + u32 xdp_mem_id; /* * Data structure for allocation side @@ -168,9 +171,12 @@ bool page_pool_return_skb_page(struct page *page); struct page_pool *page_pool_create(const struct page_pool_params *params); +struct xdp_mem_info; + #ifdef CONFIG_PAGE_POOL void page_pool_destroy(struct page_pool *pool); -void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *)); +void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), + struct xdp_mem_info *mem); void page_pool_release_page(struct page_pool *pool, struct page *page); void page_pool_put_page_bulk(struct page_pool *pool, void **data, int count); @@ -180,7 +186,8 @@ static inline void page_pool_destroy(struct page_pool *pool) } static inline void page_pool_use_xdp_mem(struct page_pool *pool, - void (*disconnect)(void *)) + void (*disconnect)(void *), + struct xdp_mem_info *mem) { } static inline void page_pool_release_page(struct page_pool *pool, diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 193f88ebf629..676cb8ea9e15 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -202,7 +202,8 @@ struct tcf_exts { __u32 type; /* for backward compat(TCA_OLD_COMPAT) */ int nr_actions; struct tc_action **actions; - struct net *net; + struct net *net; + netns_tracker ns_tracker; #endif /* Map to export classifier specific extension TLV types to the * generic extensions API. Unsupported extensions must be set to 0. @@ -217,6 +218,9 @@ static inline int tcf_exts_init(struct tcf_exts *exts, struct net *net, #ifdef CONFIG_NET_CLS_ACT exts->type = 0; exts->nr_actions = 0; + /* Note: we do not own yet a reference on net. + * This reference might be taken later from tcf_exts_get_net(). + */ exts->net = net; exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *), GFP_KERNEL); @@ -236,6 +240,8 @@ static inline bool tcf_exts_get_net(struct tcf_exts *exts) { #ifdef CONFIG_NET_CLS_ACT exts->net = maybe_get_net(exts->net); + if (exts->net) + netns_tracker_alloc(exts->net, &exts->ns_tracker, GFP_KERNEL); return exts->net != NULL; #else return true; @@ -246,7 +252,7 @@ static inline void tcf_exts_put_net(struct tcf_exts *exts) { #ifdef CONFIG_NET_CLS_ACT if (exts->net) - put_net(exts->net); + put_net_track(exts->net, &exts->ns_tracker); #endif } @@ -258,26 +264,31 @@ static inline void tcf_exts_put_net(struct tcf_exts *exts) for (; 0; (void)(i), (void)(a), (void)(exts)) #endif +#define tcf_act_for_each_action(i, a, actions) \ + for (i = 0; i < TCA_ACT_MAX_PRIO && ((a) = actions[i]); i++) + static inline void -tcf_exts_stats_update(const struct tcf_exts *exts, - u64 bytes, u64 packets, u64 drops, u64 lastuse, - u8 used_hw_stats, bool used_hw_stats_valid) +tcf_exts_hw_stats_update(const struct tcf_exts *exts, + u64 bytes, u64 packets, u64 drops, u64 lastuse, + u8 used_hw_stats, bool used_hw_stats_valid) { #ifdef CONFIG_NET_CLS_ACT int i; - preempt_disable(); - for (i = 0; i < exts->nr_actions; i++) { struct tc_action *a = exts->actions[i]; - tcf_action_stats_update(a, bytes, packets, drops, - lastuse, true); - a->used_hw_stats = used_hw_stats; - a->used_hw_stats_valid = used_hw_stats_valid; - } + /* if stats from hw, just skip */ + if (tcf_action_update_hw_stats(a)) { + preempt_disable(); + tcf_action_stats_update(a, bytes, packets, drops, + lastuse, true); + preempt_enable(); - preempt_enable(); + a->used_hw_stats = used_hw_stats; + a->used_hw_stats_valid = used_hw_stats_valid; + } + } #endif } @@ -321,6 +332,9 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, struct tcf_exts *exts, u32 flags, struct netlink_ext_ack *extack); +int tcf_exts_validate_ex(struct net *net, struct tcf_proto *tp, struct nlattr **tb, + struct nlattr *rate_tlv, struct tcf_exts *exts, + u32 flags, u32 fl_flags, struct netlink_ext_ack *extack); void tcf_exts_destroy(struct tcf_exts *exts); void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src); int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts); @@ -532,9 +546,11 @@ tcf_match_indev(struct sk_buff *skb, int ifindex) return ifindex == skb->skb_iif; } -int tc_setup_flow_action(struct flow_action *flow_action, - const struct tcf_exts *exts); -void tc_cleanup_flow_action(struct flow_action *flow_action); +int tc_setup_offload_action(struct flow_action *flow_action, + const struct tcf_exts *exts); +void tc_cleanup_offload_action(struct flow_action *flow_action); +int tc_setup_action(struct flow_action *flow_action, + struct tc_action *actions[]); int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, void *type_data, bool err_stop, bool rtnl_held); diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 9e71691c491b..9e7b21c0b3a6 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -197,7 +197,9 @@ struct tc_skb_cb { struct qdisc_skb_cb qdisc_cb; u16 mru; - bool post_ct; + u8 post_ct:1; + u8 post_ct_snat:1; + u8 post_ct_dnat:1; u16 zone; /* Only valid if post_ct = true */ }; diff --git a/include/net/route.h b/include/net/route.h index 2e6c0e153e3a..25404fc2b483 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -43,6 +43,7 @@ #define RT_CONN_FLAGS(sk) (RT_TOS(inet_sk(sk)->tos) | sock_flag(sk, SOCK_LOCALROUTE)) #define RT_CONN_FLAGS_TOS(sk,tos) (RT_TOS(tos) | sock_flag(sk, SOCK_LOCALROUTE)) +struct ip_tunnel_info; struct fib_nh; struct fib_info; struct uncached_list; @@ -369,7 +370,7 @@ static inline struct neighbour *ip_neigh_gw4(struct net_device *dev, { struct neighbour *neigh; - neigh = __ipv4_neigh_lookup_noref(dev, daddr); + neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)daddr); if (unlikely(!neigh)) neigh = __neigh_create(&arp_tbl, &daddr, dev, false); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c70e6d2b2fdd..472843eedbae 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -125,7 +125,7 @@ struct Qdisc { spinlock_t seqlock; struct rcu_head rcu; - + netdevice_tracker dev_tracker; /* private data */ long privdata[] ____cacheline_aligned; }; @@ -1244,6 +1244,7 @@ struct psched_ratecfg { u64 rate_bytes_ps; /* bytes per second */ u32 mult; u16 overhead; + u16 mpu; u8 linklayer; u8 shift; }; @@ -1253,6 +1254,9 @@ static inline u64 psched_l2t_ns(const struct psched_ratecfg *r, { len += r->overhead; + if (len < r->mpu) + len = r->mpu; + if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) return ((u64)(DIV_ROUND_UP(len,48)*53) * r->mult) >> r->shift; @@ -1275,6 +1279,7 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res, res->rate = min_t(u64, r->rate_bytes_ps, ~0U); res->overhead = r->overhead; + res->mpu = r->mpu; res->linklayer = (r->linklayer & TC_LINKLAYER_MASK); } diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 3ae61ce2eabd..bf3716fe83e0 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -509,8 +509,8 @@ static inline int sctp_ep_hashfn(struct net *net, __u16 lport) return (net_hash_mix(net) + lport) & (sctp_ep_hashsize - 1); } -#define sctp_for_each_hentry(epb, head) \ - hlist_for_each_entry(epb, head, node) +#define sctp_for_each_hentry(ep, head) \ + hlist_for_each_entry(ep, head, node) /* Is a socket of this style? */ #define sctp_style(sk, style) __sctp_style((sk), (SCTP_SOCKET_##style)) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 8dabd8800006..350f250b0dc7 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -984,12 +984,10 @@ struct sctp_transport { } cacc; struct { - __u32 last_rtx_chunks; __u16 pmtu; __u16 probe_size; __u16 probe_high; - __u8 probe_count:3; - __u8 raise_count:5; + __u8 probe_count; __u8 state; } pl; /* plpmtud related */ @@ -1011,6 +1009,7 @@ void sctp_transport_reset_t3_rtx(struct sctp_transport *); void sctp_transport_reset_hb_timer(struct sctp_transport *); void sctp_transport_reset_reconf_timer(struct sctp_transport *transport); void sctp_transport_reset_probe_timer(struct sctp_transport *transport); +void sctp_transport_reset_raise_timer(struct sctp_transport *transport); int sctp_transport_hold(struct sctp_transport *); void sctp_transport_put(struct sctp_transport *); void sctp_transport_update_rto(struct sctp_transport *, __u32); @@ -1025,7 +1024,7 @@ bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu); void sctp_transport_immediate_rtx(struct sctp_transport *); void sctp_transport_dst_release(struct sctp_transport *t); void sctp_transport_dst_confirm(struct sctp_transport *t); -bool sctp_transport_pl_send(struct sctp_transport *t); +void sctp_transport_pl_send(struct sctp_transport *t); bool sctp_transport_pl_recv(struct sctp_transport *t); @@ -1244,10 +1243,6 @@ enum sctp_endpoint_type { */ struct sctp_ep_common { - /* Fields to help us manage our entries in the hash tables. */ - struct hlist_node node; - int hashent; - /* Runtime type information. What kind of endpoint is this? */ enum sctp_endpoint_type type; @@ -1299,6 +1294,10 @@ struct sctp_endpoint { /* Common substructure for endpoint and association. */ struct sctp_ep_common base; + /* Fields to help us manage our entries in the hash tables. */ + struct hlist_node node; + int hashent; + /* Associations: A list of current associations and mappings * to the data consumers for each association. This * may be in the form of a hash table or other diff --git a/include/net/sock.h b/include/net/sock.h index d47e9658da28..ff9b508d9c5f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -56,13 +56,13 @@ #include <linux/wait.h> #include <linux/cgroup-defs.h> #include <linux/rbtree.h> -#include <linux/filter.h> #include <linux/rculist_nulls.h> #include <linux/poll.h> #include <linux/sockptr.h> #include <linux/indirect_call_wrapper.h> #include <linux/atomic.h> #include <linux/refcount.h> +#include <linux/llist.h> #include <net/dst.h> #include <net/checksum.h> #include <net/tcp_states.h> @@ -248,6 +248,7 @@ struct sock_common { }; struct bpf_local_storage; +struct sk_filter; /** * struct sock - network layer representation of sockets @@ -284,15 +285,14 @@ struct bpf_local_storage; * @sk_no_check_tx: %SO_NO_CHECK setting, set checksum in TX packets * @sk_no_check_rx: allow zero checksum in RX packets * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) - * @sk_route_nocaps: forbidden route capabilities (e.g NETIF_F_GSO_MASK) - * @sk_route_forced_caps: static, forced route capabilities - * (set in tcp_init_sock()) + * @sk_gso_disabled: if set, NETIF_F_GSO_MASK is forbidden. * @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4) * @sk_gso_max_size: Maximum GSO segment size to build * @sk_gso_max_segs: Maximum number of GSO segments * @sk_pacing_shift: scaling factor for TCP Small Queues * @sk_lingertime: %SO_LINGER l_linger setting * @sk_backlog: always used with the per-socket spinlock held + * @defer_list: head of llist storing skbs to be freed * @sk_callback_lock: used with the callbacks in the end of this struct * @sk_error_queue: rarely used * @sk_prot_creator: sk_prot of original sock creator (see ipv6_setsockopt, @@ -350,6 +350,7 @@ struct bpf_local_storage; * @sk_txtime_deadline_mode: set deadline mode for SO_TXTIME * @sk_txtime_report_errors: set report errors mode for SO_TXTIME * @sk_txtime_unused: unused txtime flags + * @ns_tracker: tracker for netns reference */ struct sock { /* @@ -391,6 +392,11 @@ struct sock { #define sk_flags __sk_common.skc_flags #define sk_rxhash __sk_common.skc_rxhash + /* early demux fields */ + struct dst_entry __rcu *sk_rx_dst; + int sk_rx_dst_ifindex; + u32 sk_rx_dst_cookie; + socket_lock_t sk_lock; atomic_t sk_drops; int sk_rcvlowat; @@ -410,6 +416,8 @@ struct sock { struct sk_buff *head; struct sk_buff *tail; } sk_backlog; + struct llist_head defer_list; + #define sk_rmem_alloc sk_backlog.rmem_alloc int sk_forward_alloc; @@ -431,9 +439,6 @@ struct sock { #ifdef CONFIG_XFRM struct xfrm_policy __rcu *sk_policy[2]; #endif - struct dst_entry __rcu *sk_rx_dst; - int sk_rx_dst_ifindex; - u32 sk_rx_dst_cookie; struct dst_entry __rcu *sk_dst_cache; atomic_t sk_omem_alloc; @@ -460,8 +465,6 @@ struct sock { unsigned long sk_max_pacing_rate; struct page_frag sk_frag; netdev_features_t sk_route_caps; - netdev_features_t sk_route_nocaps; - netdev_features_t sk_route_forced_caps; int sk_gso_type; unsigned int sk_gso_max_size; gfp_t sk_allocation; @@ -471,7 +474,7 @@ struct sock { * Because of non atomicity rules, all * changes are protected by socket lock. */ - u8 sk_padding : 1, + u8 sk_gso_disabled : 1, sk_kern_sock : 1, sk_no_check_tx : 1, sk_no_check_rx : 1, @@ -493,6 +496,7 @@ struct sock { u16 sk_busy_poll_budget; #endif spinlock_t sk_peer_lock; + int sk_bind_phc; struct pid *sk_peer_pid; const struct cred *sk_peer_cred; @@ -502,7 +506,6 @@ struct sock { seqlock_t sk_stamp_seq; #endif u16 sk_tsflags; - int sk_bind_phc; u8 sk_shutdown; u32 sk_tskey; atomic_t sk_zckey; @@ -536,6 +539,7 @@ struct sock { struct bpf_local_storage __rcu *sk_bpf_storage; #endif struct rcu_head sk_rcu; + netns_tracker ns_tracker; }; enum sk_pacing { @@ -1022,12 +1026,18 @@ static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *s int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb); +INDIRECT_CALLABLE_DECLARE(int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)); +INDIRECT_CALLABLE_DECLARE(int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)); + static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) { if (sk_memalloc_socks() && skb_pfmemalloc(skb)) return __sk_backlog_rcv(sk, skb); - return sk->sk_backlog_rcv(sk, skb); + return INDIRECT_CALL_INET(sk->sk_backlog_rcv, + tcp_v6_do_rcv, + tcp_v4_do_rcv, + sk, skb); } static inline void sk_incoming_cpu_update(struct sock *sk) @@ -1199,6 +1209,7 @@ struct proto { void (*unhash)(struct sock *sk); void (*rehash)(struct sock *sk); int (*get_port)(struct sock *sk, unsigned short snum); + void (*put_port)(struct sock *sk); #ifdef CONFIG_BPF_SYSCALL int (*psock_update_sk_prot)(struct sock *sk, struct sk_psock *psock, @@ -1210,7 +1221,9 @@ struct proto { unsigned int inuse_idx; #endif +#if IS_ENABLED(CONFIG_MPTCP) int (*forward_alloc_get)(const struct sock *sk); +#endif bool (*stream_memory_free)(const struct sock *sk, int wake); bool (*sock_is_readable)(struct sock *sk); @@ -1299,10 +1312,11 @@ INDIRECT_CALLABLE_DECLARE(bool tcp_stream_memory_free(const struct sock *sk, int static inline int sk_forward_alloc_get(const struct sock *sk) { - if (!sk->sk_prot->forward_alloc_get) - return sk->sk_forward_alloc; - - return sk->sk_prot->forward_alloc_get(sk); +#if IS_ENABLED(CONFIG_MPTCP) + if (sk->sk_prot->forward_alloc_get) + return sk->sk_prot->forward_alloc_get(sk); +#endif + return sk->sk_forward_alloc; } static inline bool __sk_stream_memory_free(const struct sock *sk, int wake) @@ -1419,13 +1433,32 @@ proto_memory_pressure(struct proto *prot) #ifdef CONFIG_PROC_FS -/* Called with local bh disabled */ -void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc); +#define PROTO_INUSE_NR 64 /* should be enough for the first time */ +struct prot_inuse { + int all; + int val[PROTO_INUSE_NR]; +}; + +static inline void sock_prot_inuse_add(const struct net *net, + const struct proto *prot, int val) +{ + this_cpu_add(net->core.prot_inuse->val[prot->inuse_idx], val); +} + +static inline void sock_inuse_add(const struct net *net, int val) +{ + this_cpu_add(net->core.prot_inuse->all, val); +} + int sock_prot_inuse_get(struct net *net, struct proto *proto); int sock_inuse_get(struct net *net); #else -static inline void sock_prot_inuse_add(struct net *net, struct proto *prot, - int inc) +static inline void sock_prot_inuse_add(const struct net *net, + const struct proto *prot, int val) +{ +} + +static inline void sock_inuse_add(const struct net *net, int val) { } #endif @@ -1603,16 +1636,6 @@ static inline void sk_mem_uncharge(struct sock *sk, int size) __sk_mem_reclaim(sk, SK_RECLAIM_CHUNK); } -static inline void sock_release_ownership(struct sock *sk) -{ - if (sk->sk_lock.owned) { - sk->sk_lock.owned = 0; - - /* The sk_lock has mutex_unlock() semantics: */ - mutex_release(&sk->sk_lock.dep_map, _RET_IP_); - } -} - /* * Macro so as to not evaluate some arguments when * lockdep is not enabled. @@ -1739,12 +1762,23 @@ static inline bool sock_owned_by_user_nocheck(const struct sock *sk) return sk->sk_lock.owned; } +static inline void sock_release_ownership(struct sock *sk) +{ + if (sock_owned_by_user_nocheck(sk)) { + sk->sk_lock.owned = 0; + + /* The sk_lock has mutex_unlock() semantics: */ + mutex_release(&sk->sk_lock.dep_map, _RET_IP_); + } +} + /* no reclassification while locks are held */ static inline bool sock_allow_reclassification(const struct sock *csk) { struct sock *sk = (struct sock *)csk; - return !sk->sk_lock.owned && !spin_is_locked(&sk->sk_lock.slock); + return !sock_owned_by_user_nocheck(sk) && + !spin_is_locked(&sk->sk_lock.slock); } struct sock *sk_alloc(struct net *net, int family, gfp_t priority, @@ -2118,13 +2152,10 @@ static inline void sock_confirm_neigh(struct sk_buff *skb, struct neighbour *n) { if (skb_get_dst_pending_confirm(skb)) { struct sock *sk = skb->sk; - unsigned long now = jiffies; - /* avoid dirtying neighbour */ - if (READ_ONCE(n->confirmed) != now) - WRITE_ONCE(n->confirmed, now); if (sk && READ_ONCE(sk->sk_dst_pending_confirm)) WRITE_ONCE(sk->sk_dst_pending_confirm, 0); + neigh_confirm(n); } } @@ -2137,10 +2168,10 @@ static inline bool sk_can_gso(const struct sock *sk) void sk_setup_caps(struct sock *sk, struct dst_entry *dst); -static inline void sk_nocaps_add(struct sock *sk, netdev_features_t flags) +static inline void sk_gso_disable(struct sock *sk) { - sk->sk_route_nocaps |= flags; - sk->sk_route_caps &= ~flags; + sk->sk_gso_disabled = 1; + sk->sk_route_caps &= ~NETIF_F_GSO_MASK; } static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb, @@ -2654,6 +2685,11 @@ static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags) &skb_shinfo(skb)->tskey); } +static inline bool sk_is_tcp(const struct sock *sk) +{ + return sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP; +} + /** * sk_eat_skb - Release a skb if it is no longer needed * @sk: socket to eat this skb from diff --git a/include/net/tc_act/tc_gate.h b/include/net/tc_act/tc_gate.h index 8bc6be81a7ad..c8fa11ebb397 100644 --- a/include/net/tc_act/tc_gate.h +++ b/include/net/tc_act/tc_gate.h @@ -60,11 +60,6 @@ static inline bool is_tcf_gate(const struct tc_action *a) return false; } -static inline u32 tcf_gate_index(const struct tc_action *a) -{ - return a->tcfa_index; -} - static inline s32 tcf_gate_prio(const struct tc_action *a) { s32 tcfg_prio; diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h index 1cace4c69e44..32ce8ea36950 100644 --- a/include/net/tc_act/tc_mirred.h +++ b/include/net/tc_act/tc_mirred.h @@ -10,6 +10,7 @@ struct tcf_mirred { int tcfm_eaction; bool tcfm_mac_header_xmit; struct net_device __rcu *tcfm_dev; + netdevice_tracker tcfm_dev_tracker; struct list_head tcfm_list; }; #define to_mirred(a) ((struct tcf_mirred *)a) diff --git a/include/net/tcp.h b/include/net/tcp.h index 4da22b41bde6..b9fc978fb2ca 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1368,6 +1368,20 @@ static inline bool tcp_checksum_complete(struct sk_buff *skb) } bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb); + +#ifdef CONFIG_INET +void __sk_defer_free_flush(struct sock *sk); + +static inline void sk_defer_free_flush(struct sock *sk) +{ + if (llist_empty(&sk->defer_list)) + return; + __sk_defer_free_flush(sk); +} +#else +static inline void sk_defer_free_flush(struct sock *sk) {} +#endif + int tcp_filter(struct sock *sk, struct sk_buff *skb); void tcp_set_state(struct sock *sk, int state); void tcp_done(struct sock *sk); @@ -2172,9 +2186,13 @@ static inline void tcp_segs_in(struct tcp_sock *tp, const struct sk_buff *skb) u16 segs_in; segs_in = max_t(u16, 1, skb_shinfo(skb)->gso_segs); - tp->segs_in += segs_in; + + /* We update these fields while other threads might + * read them from tcp_get_info() + */ + WRITE_ONCE(tp->segs_in, tp->segs_in + segs_in); if (skb->len > tcp_hdrlen(skb)) - tp->data_segs_in += segs_in; + WRITE_ONCE(tp->data_segs_in, tp->data_segs_in + segs_in); } /* diff --git a/include/net/udp.h b/include/net/udp.h index 909ecf447e0f..f1c2a88c9005 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -167,36 +167,12 @@ static inline void udp_csum_pull_header(struct sk_buff *skb) typedef struct sock *(*udp_lookup_t)(const struct sk_buff *skb, __be16 sport, __be16 dport); -INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *, - struct sk_buff *)); -INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int)); -INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *, - struct sk_buff *)); -INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int)); INDIRECT_CALLABLE_DECLARE(void udp_v6_early_demux(struct sk_buff *)); INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *)); -struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, - struct udphdr *uh, struct sock *sk); -int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup); - struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, netdev_features_t features, bool is_ipv6); -static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb) -{ - struct udphdr *uh; - unsigned int hlen, off; - - off = skb_gro_offset(skb); - hlen = off + sizeof(*uh); - uh = skb_gro_header_fast(skb, off); - if (skb_gro_header_hard(skb, hlen)) - uh = skb_gro_header_slow(skb, hlen, off); - - return uh; -} - /* hash routines shared between UDPv4/6 and UDP-Litev4/6 */ static inline int udp_lib_hash(struct sock *sk) { diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 08537aa14f7c..5a934bebe630 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -10,6 +10,7 @@ #include <net/nexthop.h> #define IANA_VXLAN_UDP_PORT 4789 +#define IANA_VXLAN_GPE_UDP_PORT 4790 /* VXLAN protocol (RFC 7348) header: * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ diff --git a/include/net/xdp.h b/include/net/xdp.h index 447f9b1578f3..8f0812e4996d 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -260,6 +260,9 @@ bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq); int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, enum xdp_mem_type type, void *allocator); void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq); +int xdp_reg_mem_model(struct xdp_mem_info *mem, + enum xdp_mem_type type, void *allocator); +void xdp_unreg_mem_model(struct xdp_mem_info *mem); /* Drivers not supporting XDP metadata can use this helper, which * rejects any room expansion for metadata as a result. diff --git a/include/net/xdp_priv.h b/include/net/xdp_priv.h index a9d5b7603b89..a2d58b1a12e1 100644 --- a/include/net/xdp_priv.h +++ b/include/net/xdp_priv.h @@ -10,7 +10,6 @@ struct xdp_mem_allocator { union { void *allocator; struct page_pool *page_pool; - struct zero_copy_allocator *zc_alloc; }; struct rhash_head node; struct rcu_head rcu; diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index fff069d2ed1b..3057e1a4a11c 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -6,6 +6,7 @@ #ifndef _LINUX_XDP_SOCK_H #define _LINUX_XDP_SOCK_H +#include <linux/bpf.h> #include <linux/workqueue.h> #include <linux/if_xdp.h> #include <linux/mutex.h> diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 2308210793a0..fdb41e8bb626 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -128,6 +128,7 @@ struct xfrm_state_walk { struct xfrm_state_offload { struct net_device *dev; + netdevice_tracker dev_tracker; struct net_device *real_dev; unsigned long offload_handle; unsigned int num_exthdrs; @@ -200,6 +201,11 @@ struct xfrm_state { struct xfrm_algo_aead *aead; const char *geniv; + /* mapping change rate limiting */ + __be16 new_mapping_sport; + u32 new_mapping; /* seconds */ + u32 mapping_maxage; /* seconds for input SA */ + /* Data for encapsulator */ struct xfrm_encap_tmpl *encap; struct sock __rcu *encap_sk; @@ -1162,7 +1168,7 @@ static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family) { struct net *net = dev_net(skb->dev); - if (xfrm_default_allow(net, XFRM_POLICY_FWD)) + if (xfrm_default_allow(net, XFRM_POLICY_OUT)) return !net->xfrm.policy_count[XFRM_POLICY_OUT] || (skb_dst(skb)->flags & DST_NOXFRM) || __xfrm_route_forward(skb, family); @@ -1913,7 +1919,7 @@ static inline void xfrm_dev_state_free(struct xfrm_state *x) if (dev->xfrmdev_ops->xdo_dev_state_free) dev->xfrmdev_ops->xdo_dev_state_free(x); xso->dev = NULL; - dev_put(dev); + dev_put_track(dev, &xso->dev_tracker); } } #else |