diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-18 12:34:53 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-18 12:34:53 -0700 |
commit | 81160dda9a7aad13c04e78bb2cfd3c4630e3afab (patch) | |
tree | 4bf79ffa9fc7dc5e2915ff978778c3402c491113 /net | |
parent | 8b53c76533aa4356602aea98f98a2f3b4051464c (diff) | |
parent | 1bab8d4c488be22d57f9dd09968c90a0ddc413bf (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from David Miller:
1) Support IPV6 RA Captive Portal Identifier, from Maciej Żenczykowski.
2) Use bio_vec in the networking instead of custom skb_frag_t, from
Matthew Wilcox.
3) Make use of xmit_more in r8169 driver, from Heiner Kallweit.
4) Add devmap_hash to xdp, from Toke Høiland-Jørgensen.
5) Support all variants of 5750X bnxt_en chips, from Michael Chan.
6) More RTNL avoidance work in the core and mlx5 driver, from Vlad
Buslov.
7) Add TCP syn cookies bpf helper, from Petar Penkov.
8) Add 'nettest' to selftests and use it, from David Ahern.
9) Add extack support to drop_monitor, add packet alert mode and
support for HW drops, from Ido Schimmel.
10) Add VLAN offload to stmmac, from Jose Abreu.
11) Lots of devm_platform_ioremap_resource() conversions, from
YueHaibing.
12) Add IONIC driver, from Shannon Nelson.
13) Several kTLS cleanups, from Jakub Kicinski.
* git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1930 commits)
mlxsw: spectrum_buffers: Add the ability to query the CPU port's shared buffer
mlxsw: spectrum: Register CPU port with devlink
mlxsw: spectrum_buffers: Prevent changing CPU port's configuration
net: ena: fix incorrect update of intr_delay_resolution
net: ena: fix retrieval of nonadaptive interrupt moderation intervals
net: ena: fix update of interrupt moderation register
net: ena: remove all old adaptive rx interrupt moderation code from ena_com
net: ena: remove ena_restore_ethtool_params() and relevant fields
net: ena: remove old adaptive interrupt moderation code from ena_netdev
net: ena: remove code duplication in ena_com_update_nonadaptive_moderation_interval _*()
net: ena: enable the interrupt_moderation in driver_supported_features
net: ena: reimplement set/get_coalesce()
net: ena: switch to dim algorithm for rx adaptive interrupt moderation
net: ena: add intr_moder_rx_interval to struct ena_com_dev and use it
net: phy: adin: implement Energy Detect Powerdown mode via phy-tunable
ethtool: implement Energy Detect Powerdown support via phy-tunable
xen-netfront: do not assume sk_buff_head list is empty in error handling
s390/ctcm: Delete unnecessary checks before the macro call “dev_kfree_skb”
net: ena: don't wake up tx queue when down
drop_monitor: Better sanitize notified packets
...
Diffstat (limited to 'net')
267 files changed, 14737 insertions, 3102 deletions
diff --git a/net/Kconfig b/net/Kconfig index 57f51a279ad6..3101bfcbdd7a 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -430,6 +430,7 @@ config NET_SOCK_MSG config NET_DEVLINK bool default n + imply NET_DROP_MONITOR config PAGE_POOL bool diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index a8cb6b2e20c1..4072e9d394d6 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -953,8 +953,8 @@ static unsigned long atalk_sum_skb(const struct sk_buff *skb, int offset, if (copy > len) copy = len; vaddr = kmap_atomic(skb_frag_page(frag)); - sum = atalk_sum_partial(vaddr + frag->page_offset + - offset - start, copy, sum); + sum = atalk_sum_partial(vaddr + skb_frag_off(frag) + + offset - start, copy, sum); kunmap_atomic(vaddr); if (!(len -= copy)) diff --git a/net/atm/mpoa_caches.c b/net/atm/mpoa_caches.c index 4bb418313720..3286f9d527d3 100644 --- a/net/atm/mpoa_caches.c +++ b/net/atm/mpoa_caches.c @@ -180,8 +180,7 @@ static int cache_hit(in_cache_entry *entry, struct mpoa_client *mpc) static void in_cache_put(in_cache_entry *entry) { if (refcount_dec_and_test(&entry->use)) { - memset(entry, 0, sizeof(in_cache_entry)); - kfree(entry); + kzfree(entry); } } @@ -416,8 +415,7 @@ static eg_cache_entry *eg_cache_get_by_src_ip(__be32 ipaddr, static void eg_cache_put(eg_cache_entry *entry) { if (refcount_dec_and_test(&entry->use)) { - memset(entry, 0, sizeof(eg_cache_entry)); - kfree(entry); + kzfree(entry); } } diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c index bd3da9af5ef6..45d8e1d5d033 100644 --- a/net/atm/pppoatm.c +++ b/net/atm/pppoatm.c @@ -216,9 +216,7 @@ static void pppoatm_push(struct atm_vcc *atmvcc, struct sk_buff *skb) pvcc->chan.mtu += LLC_LEN; break; } - pr_debug("Couldn't autodetect yet (skb: %02X %02X %02X %02X %02X %02X)\n", - skb->data[0], skb->data[1], skb->data[2], - skb->data[3], skb->data[4], skb->data[5]); + pr_debug("Couldn't autodetect yet (skb: %6ph)\n", skb->data); goto error; case e_vc: break; diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index 22672cb3e25d..64054edc2e3c 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -79,6 +79,7 @@ static int batadv_v_iface_enable(struct batadv_hard_iface *hard_iface) static void batadv_v_iface_disable(struct batadv_hard_iface *hard_iface) { + batadv_v_ogm_iface_disable(hard_iface); batadv_v_elp_iface_disable(hard_iface); } @@ -1081,6 +1082,12 @@ void batadv_v_hardif_init(struct batadv_hard_iface *hard_iface) */ atomic_set(&hard_iface->bat_v.throughput_override, 0); atomic_set(&hard_iface->bat_v.elp_interval, 500); + + hard_iface->bat_v.aggr_len = 0; + skb_queue_head_init(&hard_iface->bat_v.aggr_list); + spin_lock_init(&hard_iface->bat_v.aggr_list_lock); + INIT_DELAYED_WORK(&hard_iface->bat_v.aggr_wq, + batadv_v_ogm_aggr_work); } /** diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index bc06e3cdfa84..dc4f7430cb5a 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -17,12 +17,14 @@ #include <linux/kernel.h> #include <linux/kref.h> #include <linux/list.h> +#include <linux/lockdep.h> #include <linux/netdevice.h> #include <linux/random.h> #include <linux/rculist.h> #include <linux/rcupdate.h> #include <linux/skbuff.h> #include <linux/slab.h> +#include <linux/spinlock.h> #include <linux/stddef.h> #include <linux/string.h> #include <linux/types.h> @@ -77,6 +79,20 @@ struct batadv_orig_node *batadv_v_ogm_orig_get(struct batadv_priv *bat_priv, } /** + * batadv_v_ogm_start_queue_timer() - restart the OGM aggregation timer + * @hard_iface: the interface to use to send the OGM + */ +static void batadv_v_ogm_start_queue_timer(struct batadv_hard_iface *hard_iface) +{ + unsigned int msecs = BATADV_MAX_AGGREGATION_MS * 1000; + + /* msecs * [0.9, 1.1] */ + msecs += prandom_u32() % (msecs / 5) - (msecs / 10); + queue_delayed_work(batadv_event_workqueue, &hard_iface->bat_v.aggr_wq, + msecs_to_jiffies(msecs / 1000)); +} + +/** * batadv_v_ogm_start_timer() - restart the OGM sending timer * @bat_priv: the bat priv with all the soft interface information */ @@ -116,6 +132,130 @@ static void batadv_v_ogm_send_to_if(struct sk_buff *skb, } /** + * batadv_v_ogm_len() - OGMv2 packet length + * @skb: the OGM to check + * + * Return: Length of the given OGMv2 packet, including tvlv length, excluding + * ethernet header length. + */ +static unsigned int batadv_v_ogm_len(struct sk_buff *skb) +{ + struct batadv_ogm2_packet *ogm_packet; + + ogm_packet = (struct batadv_ogm2_packet *)skb->data; + return BATADV_OGM2_HLEN + ntohs(ogm_packet->tvlv_len); +} + +/** + * batadv_v_ogm_queue_left() - check if given OGM still fits aggregation queue + * @skb: the OGM to check + * @hard_iface: the interface to use to send the OGM + * + * Caller needs to hold the hard_iface->bat_v.aggr_list_lock. + * + * Return: True, if the given OGMv2 packet still fits, false otherwise. + */ +static bool batadv_v_ogm_queue_left(struct sk_buff *skb, + struct batadv_hard_iface *hard_iface) +{ + unsigned int max = min_t(unsigned int, hard_iface->net_dev->mtu, + BATADV_MAX_AGGREGATION_BYTES); + unsigned int ogm_len = batadv_v_ogm_len(skb); + + lockdep_assert_held(&hard_iface->bat_v.aggr_list_lock); + + return hard_iface->bat_v.aggr_len + ogm_len <= max; +} + +/** + * batadv_v_ogm_aggr_list_free - free all elements in an aggregation queue + * @hard_iface: the interface holding the aggregation queue + * + * Empties the OGMv2 aggregation queue and frees all the skbs it contained. + * + * Caller needs to hold the hard_iface->bat_v.aggr_list_lock. + */ +static void batadv_v_ogm_aggr_list_free(struct batadv_hard_iface *hard_iface) +{ + struct sk_buff *skb; + + lockdep_assert_held(&hard_iface->bat_v.aggr_list_lock); + + while ((skb = skb_dequeue(&hard_iface->bat_v.aggr_list))) + kfree_skb(skb); + + hard_iface->bat_v.aggr_len = 0; +} + +/** + * batadv_v_ogm_aggr_send() - flush & send aggregation queue + * @hard_iface: the interface with the aggregation queue to flush + * + * Aggregates all OGMv2 packets currently in the aggregation queue into a + * single OGMv2 packet and transmits this aggregate. + * + * The aggregation queue is empty after this call. + * + * Caller needs to hold the hard_iface->bat_v.aggr_list_lock. + */ +static void batadv_v_ogm_aggr_send(struct batadv_hard_iface *hard_iface) +{ + unsigned int aggr_len = hard_iface->bat_v.aggr_len; + struct sk_buff *skb_aggr; + unsigned int ogm_len; + struct sk_buff *skb; + + lockdep_assert_held(&hard_iface->bat_v.aggr_list_lock); + + if (!aggr_len) + return; + + skb_aggr = dev_alloc_skb(aggr_len + ETH_HLEN + NET_IP_ALIGN); + if (!skb_aggr) { + batadv_v_ogm_aggr_list_free(hard_iface); + return; + } + + skb_reserve(skb_aggr, ETH_HLEN + NET_IP_ALIGN); + skb_reset_network_header(skb_aggr); + + while ((skb = skb_dequeue(&hard_iface->bat_v.aggr_list))) { + hard_iface->bat_v.aggr_len -= batadv_v_ogm_len(skb); + + ogm_len = batadv_v_ogm_len(skb); + skb_put_data(skb_aggr, skb->data, ogm_len); + + consume_skb(skb); + } + + batadv_v_ogm_send_to_if(skb_aggr, hard_iface); +} + +/** + * batadv_v_ogm_queue_on_if() - queue a batman ogm on a given interface + * @skb: the OGM to queue + * @hard_iface: the interface to queue the OGM on + */ +static void batadv_v_ogm_queue_on_if(struct sk_buff *skb, + struct batadv_hard_iface *hard_iface) +{ + struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + + if (!atomic_read(&bat_priv->aggregated_ogms)) { + batadv_v_ogm_send_to_if(skb, hard_iface); + return; + } + + spin_lock_bh(&hard_iface->bat_v.aggr_list_lock); + if (!batadv_v_ogm_queue_left(skb, hard_iface)) + batadv_v_ogm_aggr_send(hard_iface); + + hard_iface->bat_v.aggr_len += batadv_v_ogm_len(skb); + skb_queue_tail(&hard_iface->bat_v.aggr_list, skb); + spin_unlock_bh(&hard_iface->bat_v.aggr_list_lock); +} + +/** * batadv_v_ogm_send() - periodic worker broadcasting the own OGM * @work: work queue item */ @@ -210,7 +350,7 @@ static void batadv_v_ogm_send(struct work_struct *work) break; } - batadv_v_ogm_send_to_if(skb_tmp, hard_iface); + batadv_v_ogm_queue_on_if(skb_tmp, hard_iface); batadv_hardif_put(hard_iface); } rcu_read_unlock(); @@ -224,6 +364,27 @@ out: } /** + * batadv_v_ogm_aggr_work() - OGM queue periodic task per interface + * @work: work queue item + * + * Emits aggregated OGM message in regular intervals. + */ +void batadv_v_ogm_aggr_work(struct work_struct *work) +{ + struct batadv_hard_iface_bat_v *batv; + struct batadv_hard_iface *hard_iface; + + batv = container_of(work, struct batadv_hard_iface_bat_v, aggr_wq.work); + hard_iface = container_of(batv, struct batadv_hard_iface, bat_v); + + spin_lock_bh(&hard_iface->bat_v.aggr_list_lock); + batadv_v_ogm_aggr_send(hard_iface); + spin_unlock_bh(&hard_iface->bat_v.aggr_list_lock); + + batadv_v_ogm_start_queue_timer(hard_iface); +} + +/** * batadv_v_ogm_iface_enable() - prepare an interface for B.A.T.M.A.N. V * @hard_iface: the interface to prepare * @@ -235,12 +396,26 @@ int batadv_v_ogm_iface_enable(struct batadv_hard_iface *hard_iface) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + batadv_v_ogm_start_queue_timer(hard_iface); batadv_v_ogm_start_timer(bat_priv); return 0; } /** + * batadv_v_ogm_iface_disable() - release OGM interface private resources + * @hard_iface: interface for which the resources have to be released + */ +void batadv_v_ogm_iface_disable(struct batadv_hard_iface *hard_iface) +{ + cancel_delayed_work_sync(&hard_iface->bat_v.aggr_wq); + + spin_lock_bh(&hard_iface->bat_v.aggr_list_lock); + batadv_v_ogm_aggr_list_free(hard_iface); + spin_unlock_bh(&hard_iface->bat_v.aggr_list_lock); +} + +/** * batadv_v_ogm_primary_iface_set() - set a new primary interface * @primary_iface: the new primary interface */ @@ -382,7 +557,7 @@ static void batadv_v_ogm_forward(struct batadv_priv *bat_priv, if_outgoing->net_dev->name, ntohl(ogm_forward->throughput), ogm_forward->ttl, if_incoming->net_dev->name); - batadv_v_ogm_send_to_if(skb, if_outgoing); + batadv_v_ogm_queue_on_if(skb, if_outgoing); out: if (orig_ifinfo) diff --git a/net/batman-adv/bat_v_ogm.h b/net/batman-adv/bat_v_ogm.h index 2a50df7fc2bf..bf16d040461d 100644 --- a/net/batman-adv/bat_v_ogm.h +++ b/net/batman-adv/bat_v_ogm.h @@ -11,10 +11,13 @@ #include <linux/skbuff.h> #include <linux/types.h> +#include <linux/workqueue.h> int batadv_v_ogm_init(struct batadv_priv *bat_priv); void batadv_v_ogm_free(struct batadv_priv *bat_priv); +void batadv_v_ogm_aggr_work(struct work_struct *work); int batadv_v_ogm_iface_enable(struct batadv_hard_iface *hard_iface); +void batadv_v_ogm_iface_disable(struct batadv_hard_iface *hard_iface); struct batadv_orig_node *batadv_v_ogm_orig_get(struct batadv_priv *bat_priv, const u8 *addr); void batadv_v_ogm_primary_iface_set(struct batadv_hard_iface *primary_iface); diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 3d4c04d87ff3..6967f2e4c3f4 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -13,7 +13,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2019.3" +#define BATADV_SOURCE_VERSION "2019.4" #endif /* B.A.T.M.A.N. parameters */ diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index c7a2e77ca1da..a1146cb10919 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -943,10 +943,10 @@ static const struct net_device_ops batadv_netdev_ops = { static void batadv_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { - strlcpy(info->driver, "B.A.T.M.A.N. advanced", sizeof(info->driver)); - strlcpy(info->version, BATADV_SOURCE_VERSION, sizeof(info->version)); - strlcpy(info->fw_version, "N/A", sizeof(info->fw_version)); - strlcpy(info->bus_info, "batman", sizeof(info->bus_info)); + strscpy(info->driver, "B.A.T.M.A.N. advanced", sizeof(info->driver)); + strscpy(info->version, BATADV_SOURCE_VERSION, sizeof(info->version)); + strscpy(info->fw_version, "N/A", sizeof(info->fw_version)); + strscpy(info->bus_info, "batman", sizeof(info->bus_info)); } /* Inspired by drivers/net/ethernet/dlink/sundance.c:1702 diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index 1efcb97039cd..e5bbc28ed12c 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -1070,7 +1070,7 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj, dev_hold(net_dev); INIT_WORK(&store_work->work, batadv_store_mesh_iface_work); store_work->net_dev = net_dev; - strlcpy(store_work->soft_iface_name, buff, + strscpy(store_work->soft_iface_name, buff, sizeof(store_work->soft_iface_name)); queue_work(batadv_event_workqueue, &store_work->work); diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 6ae139d74e0f..be7c02aa91e2 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -117,6 +117,18 @@ struct batadv_hard_iface_bat_v { /** @elp_wq: workqueue used to schedule ELP transmissions */ struct delayed_work elp_wq; + /** @aggr_wq: workqueue used to transmit queued OGM packets */ + struct delayed_work aggr_wq; + + /** @aggr_list: queue for to be aggregated OGM packets */ + struct sk_buff_head aggr_list; + + /** @aggr_len: size of the OGM aggregate (excluding ethernet header) */ + unsigned int aggr_len; + + /** @aggr_list_lock: protects aggr_list */ + spinlock_t aggr_list_lock; + /** * @throughput_override: throughput override to disable link * auto-detection diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 9d41de1ec90f..bb55d92691b0 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -583,7 +583,7 @@ static const struct net_device_ops netdev_ops = { .ndo_start_xmit = bt_xmit, }; -static struct header_ops header_ops = { +static const struct header_ops header_ops = { .create = header_create, }; diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 621f1a97d803..7f6a581b5b7e 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1054,6 +1054,7 @@ void __hci_req_enable_advertising(struct hci_request *req) struct hci_cp_le_set_adv_param cp; u8 own_addr_type, enable = 0x01; bool connectable; + u16 adv_min_interval, adv_max_interval; u32 flags; flags = get_adv_instance_flags(hdev, hdev->cur_adv_instance); @@ -1087,16 +1088,30 @@ void __hci_req_enable_advertising(struct hci_request *req) return; memset(&cp, 0, sizeof(cp)); - cp.min_interval = cpu_to_le16(hdev->le_adv_min_interval); - cp.max_interval = cpu_to_le16(hdev->le_adv_max_interval); - if (connectable) + if (connectable) { cp.type = LE_ADV_IND; - else if (get_cur_adv_instance_scan_rsp_len(hdev)) - cp.type = LE_ADV_SCAN_IND; - else - cp.type = LE_ADV_NONCONN_IND; + adv_min_interval = hdev->le_adv_min_interval; + adv_max_interval = hdev->le_adv_max_interval; + } else { + if (get_cur_adv_instance_scan_rsp_len(hdev)) + cp.type = LE_ADV_SCAN_IND; + else + cp.type = LE_ADV_NONCONN_IND; + + if (!hci_dev_test_flag(hdev, HCI_DISCOVERABLE) || + hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE)) { + adv_min_interval = DISCOV_LE_FAST_ADV_INT_MIN; + adv_max_interval = DISCOV_LE_FAST_ADV_INT_MAX; + } else { + adv_min_interval = hdev->le_adv_min_interval; + adv_max_interval = hdev->le_adv_max_interval; + } + } + + cp.min_interval = cpu_to_le16(adv_min_interval); + cp.max_interval = cpu_to_le16(adv_max_interval); cp.own_address_type = own_addr_type; cp.channel_map = hdev->le_adv_channel_map; diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 8d889969ae7e..bef84b95e2c4 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -267,7 +267,7 @@ static int hidp_get_raw_report(struct hid_device *hid, set_bit(HIDP_WAITING_FOR_RETURN, &session->flags); data[0] = report_number; ret = hidp_send_ctrl_message(session, report_type, data, 1); - if (ret) + if (ret < 0) goto err; /* Wait for the return of the report. The returned report @@ -343,7 +343,7 @@ static int hidp_set_raw_report(struct hid_device *hid, unsigned char reportnum, data[0] = reportnum; set_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags); ret = hidp_send_ctrl_message(session, report_type, data, count); - if (ret) + if (ret < 0) goto err; /* Wait for the ACK from the device. */ diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 150114e33b20..acb7c6d5643f 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2588,7 +2588,6 @@ static int get_connections(struct sock *sk, struct hci_dev *hdev, void *data, { struct mgmt_rp_get_connections *rp; struct hci_conn *c; - size_t rp_len; int err; u16 i; @@ -2608,8 +2607,7 @@ static int get_connections(struct sock *sk, struct hci_dev *hdev, void *data, i++; } - rp_len = sizeof(*rp) + (i * sizeof(struct mgmt_addr_info)); - rp = kmalloc(rp_len, GFP_KERNEL); + rp = kmalloc(struct_size(rp, addr, i), GFP_KERNEL); if (!rp) { err = -ENOMEM; goto unlock; @@ -2629,10 +2627,8 @@ static int get_connections(struct sock *sk, struct hci_dev *hdev, void *data, rp->conn_count = cpu_to_le16(i); /* Recalculate length in case of filtered SCO connections, etc */ - rp_len = sizeof(*rp) + (i * sizeof(struct mgmt_addr_info)); - err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_CONNECTIONS, 0, rp, - rp_len); + struct_size(rp, addr, i)); kfree(rp); diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 80e6f3a6864d..1153bbcdff72 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -377,6 +377,22 @@ out: return ret; } +static int verify_user_bpf_flow_keys(struct bpf_flow_keys *ctx) +{ + /* make sure the fields we don't use are zeroed */ + if (!range_is_zero(ctx, 0, offsetof(struct bpf_flow_keys, flags))) + return -EINVAL; + + /* flags is allowed */ + + if (!range_is_zero(ctx, offsetof(struct bpf_flow_keys, flags) + + FIELD_SIZEOF(struct bpf_flow_keys, flags), + sizeof(struct bpf_flow_keys))) + return -EINVAL; + + return 0; +} + int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) @@ -384,9 +400,11 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, u32 size = kattr->test.data_size_in; struct bpf_flow_dissector ctx = {}; u32 repeat = kattr->test.repeat; + struct bpf_flow_keys *user_ctx; struct bpf_flow_keys flow_keys; u64 time_start, time_spent = 0; const struct ethhdr *eth; + unsigned int flags = 0; u32 retval, duration; void *data; int ret; @@ -395,9 +413,6 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR) return -EINVAL; - if (kattr->test.ctx_in || kattr->test.ctx_out) - return -EINVAL; - if (size < ETH_HLEN) return -EINVAL; @@ -410,6 +425,18 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, if (!repeat) repeat = 1; + user_ctx = bpf_ctx_init(kattr, sizeof(struct bpf_flow_keys)); + if (IS_ERR(user_ctx)) { + kfree(data); + return PTR_ERR(user_ctx); + } + if (user_ctx) { + ret = verify_user_bpf_flow_keys(user_ctx); + if (ret) + goto out; + flags = user_ctx->flags; + } + ctx.flow_keys = &flow_keys; ctx.data = data; ctx.data_end = (__u8 *)data + size; @@ -419,7 +446,7 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, time_start = ktime_get_ns(); for (i = 0; i < repeat; i++) { retval = bpf_flow_dissect(prog, &ctx, eth->h_proto, ETH_HLEN, - size); + size, flags); if (signal_pending(current)) { preempt_enable(); @@ -450,8 +477,12 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys), retval, duration); + if (!ret) + ret = bpf_ctx_finish(kattr, uattr, user_ctx, + sizeof(struct bpf_flow_keys)); out: + kfree(user_ctx); kfree(data); return ret; } diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 63f9c08625f0..da5ed4cf9233 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -60,6 +60,8 @@ static void __mdb_entry_fill_flags(struct br_mdb_entry *e, unsigned char flags) e->flags = 0; if (flags & MDB_PG_FLAGS_OFFLOAD) e->flags |= MDB_FLAGS_OFFLOAD; + if (flags & MDB_PG_FLAGS_FAST_LEAVE) + e->flags |= MDB_FLAGS_FAST_LEAVE; } static void __mdb_entry_to_br_ip(struct br_mdb_entry *entry, struct br_ip *ip) @@ -75,6 +77,53 @@ static void __mdb_entry_to_br_ip(struct br_mdb_entry *entry, struct br_ip *ip) #endif } +static int __mdb_fill_info(struct sk_buff *skb, + struct net_bridge_mdb_entry *mp, + struct net_bridge_port_group *p) +{ + struct timer_list *mtimer; + struct nlattr *nest_ent; + struct br_mdb_entry e; + u8 flags = 0; + int ifindex; + + memset(&e, 0, sizeof(e)); + if (p) { + ifindex = p->port->dev->ifindex; + mtimer = &p->timer; + flags = p->flags; + } else { + ifindex = mp->br->dev->ifindex; + mtimer = &mp->timer; + } + + __mdb_entry_fill_flags(&e, flags); + e.ifindex = ifindex; + e.vid = mp->addr.vid; + if (mp->addr.proto == htons(ETH_P_IP)) + e.addr.u.ip4 = mp->addr.u.ip4; +#if IS_ENABLED(CONFIG_IPV6) + if (mp->addr.proto == htons(ETH_P_IPV6)) + e.addr.u.ip6 = mp->addr.u.ip6; +#endif + e.addr.proto = mp->addr.proto; + nest_ent = nla_nest_start_noflag(skb, + MDBA_MDB_ENTRY_INFO); + if (!nest_ent) + return -EMSGSIZE; + + if (nla_put_nohdr(skb, sizeof(e), &e) || + nla_put_u32(skb, + MDBA_MDB_EATTR_TIMER, + br_timer_value(mtimer))) { + nla_nest_cancel(skb, nest_ent); + return -EMSGSIZE; + } + nla_nest_end(skb, nest_ent); + + return 0; +} + static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev) { @@ -93,7 +142,6 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) { struct net_bridge_port_group *p; struct net_bridge_port_group __rcu **pp; - struct net_bridge_port *port; if (idx < s_idx) goto skip; @@ -104,43 +152,24 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, break; } + if (mp->host_joined) { + err = __mdb_fill_info(skb, mp, NULL); + if (err) { + nla_nest_cancel(skb, nest2); + break; + } + } + for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL; pp = &p->next) { - struct nlattr *nest_ent; - struct br_mdb_entry e; - - port = p->port; - if (!port) + if (!p->port) continue; - memset(&e, 0, sizeof(e)); - e.ifindex = port->dev->ifindex; - e.vid = p->addr.vid; - __mdb_entry_fill_flags(&e, p->flags); - if (p->addr.proto == htons(ETH_P_IP)) - e.addr.u.ip4 = p->addr.u.ip4; -#if IS_ENABLED(CONFIG_IPV6) - if (p->addr.proto == htons(ETH_P_IPV6)) - e.addr.u.ip6 = p->addr.u.ip6; -#endif - e.addr.proto = p->addr.proto; - nest_ent = nla_nest_start_noflag(skb, - MDBA_MDB_ENTRY_INFO); - if (!nest_ent) { - nla_nest_cancel(skb, nest2); - err = -EMSGSIZE; - goto out; - } - if (nla_put_nohdr(skb, sizeof(e), &e) || - nla_put_u32(skb, - MDBA_MDB_EATTR_TIMER, - br_timer_value(&p->timer))) { - nla_nest_cancel(skb, nest_ent); + err = __mdb_fill_info(skb, mp, p); + if (err) { nla_nest_cancel(skb, nest2); - err = -EMSGSIZE; goto out; } - nla_nest_end(skb, nest_ent); } nla_nest_end(skb, nest2); skip: @@ -587,6 +616,19 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, return err; } + /* host join */ + if (!port) { + /* don't allow any flags for host-joined groups */ + if (state) + return -EINVAL; + if (mp->host_joined) + return -EEXIST; + + br_multicast_host_join(mp, false); + + return 0; + } + for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL; pp = &p->next) { @@ -611,19 +653,21 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br, { struct br_ip ip; struct net_device *dev; - struct net_bridge_port *p; + struct net_bridge_port *p = NULL; int ret; if (!netif_running(br->dev) || !br_opt_get(br, BROPT_MULTICAST_ENABLED)) return -EINVAL; - dev = __dev_get_by_index(net, entry->ifindex); - if (!dev) - return -ENODEV; + if (entry->ifindex != br->dev->ifindex) { + dev = __dev_get_by_index(net, entry->ifindex); + if (!dev) + return -ENODEV; - p = br_port_get_rtnl(dev); - if (!p || p->br != br || p->state == BR_STATE_DISABLED) - return -EINVAL; + p = br_port_get_rtnl(dev); + if (!p || p->br != br || p->state == BR_STATE_DISABLED) + return -EINVAL; + } __mdb_entry_to_br_ip(entry, &ip); @@ -638,9 +682,9 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, { struct net *net = sock_net(skb->sk); struct net_bridge_vlan_group *vg; + struct net_bridge_port *p = NULL; struct net_device *dev, *pdev; struct br_mdb_entry *entry; - struct net_bridge_port *p; struct net_bridge_vlan *v; struct net_bridge *br; int err; @@ -651,18 +695,22 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, br = netdev_priv(dev); + if (entry->ifindex != br->dev->ifindex) { + pdev = __dev_get_by_index(net, entry->ifindex); + if (!pdev) + return -ENODEV; + + p = br_port_get_rtnl(pdev); + if (!p || p->br != br || p->state == BR_STATE_DISABLED) + return -EINVAL; + vg = nbp_vlan_group(p); + } else { + vg = br_vlan_group(br); + } + /* If vlan filtering is enabled and VLAN is not specified * install mdb entry on all vlans configured on the port. */ - pdev = __dev_get_by_index(net, entry->ifindex); - if (!pdev) - return -ENODEV; - - p = br_port_get_rtnl(pdev); - if (!p || p->br != br || p->state == BR_STATE_DISABLED) - return -EINVAL; - - vg = nbp_vlan_group(p); if (br_vlan_enabled(br->dev) && vg && entry->vid == 0) { list_for_each_entry(v, &vg->vlan_list, vlist) { entry->vid = v->vid; @@ -698,6 +746,15 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) if (!mp) goto unlock; + /* host leave */ + if (entry->ifindex == mp->br->dev->ifindex && mp->host_joined) { + br_multicast_host_leave(mp, false); + err = 0; + if (!mp->ports && netif_running(br->dev)) + mod_timer(&mp->timer, jiffies); + goto unlock; + } + for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL; pp = &p->next) { @@ -730,9 +787,9 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, { struct net *net = sock_net(skb->sk); struct net_bridge_vlan_group *vg; + struct net_bridge_port *p = NULL; struct net_device *dev, *pdev; struct br_mdb_entry *entry; - struct net_bridge_port *p; struct net_bridge_vlan *v; struct net_bridge *br; int err; @@ -743,18 +800,22 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, br = netdev_priv(dev); + if (entry->ifindex != br->dev->ifindex) { + pdev = __dev_get_by_index(net, entry->ifindex); + if (!pdev) + return -ENODEV; + + p = br_port_get_rtnl(pdev); + if (!p || p->br != br || p->state == BR_STATE_DISABLED) + return -EINVAL; + vg = nbp_vlan_group(p); + } else { + vg = br_vlan_group(br); + } + /* If vlan filtering is enabled and VLAN is not specified * delete mdb entry on all vlans configured on the port. */ - pdev = __dev_get_by_index(net, entry->ifindex); - if (!pdev) - return -ENODEV; - - p = br_port_get_rtnl(pdev); - if (!p || p->br != br || p->state == BR_STATE_DISABLED) - return -EINVAL; - - vg = nbp_vlan_group(p); if (br_vlan_enabled(br->dev) && vg && entry->vid == 0) { list_for_each_entry(v, &vg->vlan_list, vlist) { entry->vid = v->vid; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index f8cac3702712..ad12fe3fca8c 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -148,8 +148,7 @@ static void br_multicast_group_expired(struct timer_list *t) if (!netif_running(br->dev) || timer_pending(&mp->timer)) goto out; - mp->host_joined = false; - br_mdb_notify(br->dev, NULL, &mp->addr, RTM_DELMDB, 0); + br_multicast_host_leave(mp, true); if (mp->ports) goto out; @@ -512,6 +511,27 @@ static bool br_port_group_equal(struct net_bridge_port_group *p, return ether_addr_equal(src, p->eth_addr); } +void br_multicast_host_join(struct net_bridge_mdb_entry *mp, bool notify) +{ + if (!mp->host_joined) { + mp->host_joined = true; + if (notify) + br_mdb_notify(mp->br->dev, NULL, &mp->addr, + RTM_NEWMDB, 0); + } + mod_timer(&mp->timer, jiffies + mp->br->multicast_membership_interval); +} + +void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify) +{ + if (!mp->host_joined) + return; + + mp->host_joined = false; + if (notify) + br_mdb_notify(mp->br->dev, NULL, &mp->addr, RTM_DELMDB, 0); +} + static int br_multicast_add_group(struct net_bridge *br, struct net_bridge_port *port, struct br_ip *group, @@ -534,11 +554,7 @@ static int br_multicast_add_group(struct net_bridge *br, goto err; if (!port) { - if (!mp->host_joined) { - mp->host_joined = true; - br_mdb_notify(br->dev, NULL, &mp->addr, RTM_NEWMDB, 0); - } - mod_timer(&mp->timer, now + br->multicast_membership_interval); + br_multicast_host_join(mp, true); goto out; } @@ -1396,7 +1412,7 @@ br_multicast_leave_group(struct net_bridge *br, del_timer(&p->timer); kfree_rcu(p, rcu); br_mdb_notify(br->dev, port, group, RTM_DELMDB, - p->flags); + p->flags | MDB_PG_FLAGS_FAST_LEAVE); if (!mp->ports && !mp->host_joined && netif_running(br->dev)) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 646504db0220..ce2ab14ee605 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -199,6 +199,7 @@ struct net_bridge_fdb_entry { #define MDB_PG_FLAGS_PERMANENT BIT(0) #define MDB_PG_FLAGS_OFFLOAD BIT(1) +#define MDB_PG_FLAGS_FAST_LEAVE BIT(2) struct net_bridge_port_group { struct net_bridge_port *port; @@ -701,6 +702,8 @@ void br_multicast_get_stats(const struct net_bridge *br, struct br_mcast_stats *dest); void br_mdb_init(void); void br_mdb_uninit(void); +void br_multicast_host_join(struct net_bridge_mdb_entry *mp, bool notify); +void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify); #define mlock_dereference(X, br) \ rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock)) diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index f5b2aeebbfe9..bb98984cd27d 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -1281,6 +1281,8 @@ int br_vlan_get_info(const struct net_device *dev, u16 vid, p_vinfo->vid = vid; p_vinfo->flags = v->flags; + if (vid == br_get_pvid(vg)) + p_vinfo->flags |= BRIDGE_VLAN_INFO_PVID; return 0; } EXPORT_SYMBOL_GPL(br_vlan_get_info); diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c index 2c8fe24400e5..68c2519bdc52 100644 --- a/net/bridge/netfilter/ebt_802_3.c +++ b/net/bridge/netfilter/ebt_802_3.c @@ -11,7 +11,13 @@ #include <linux/module.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_802_3.h> +#include <linux/skbuff.h> +#include <uapi/linux/netfilter_bridge/ebt_802_3.h> + +static struct ebt_802_3_hdr *ebt_802_3_hdr(const struct sk_buff *skb) +{ + return (struct ebt_802_3_hdr *)skb_mac_header(skb); +} static bool ebt_802_3_mt(const struct sk_buff *skb, struct xt_action_param *par) diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c index 4f5444d2a526..8842798c29e6 100644 --- a/net/bridge/netfilter/nf_conntrack_bridge.c +++ b/net/bridge/netfilter/nf_conntrack_bridge.c @@ -17,7 +17,6 @@ #include <net/netfilter/nf_conntrack_bridge.h> #include <linux/netfilter/nf_tables.h> -#include <net/netfilter/ipv6/nf_defrag_ipv6.h> #include <net/netfilter/nf_tables.h> #include "../br_private.h" @@ -27,9 +26,9 @@ */ static int nf_br_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, - struct nf_ct_bridge_frag_data *data, + struct nf_bridge_frag_data *data, int (*output)(struct net *, struct sock *sk, - const struct nf_ct_bridge_frag_data *data, + const struct nf_bridge_frag_data *data, struct sk_buff *)) { int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; @@ -279,7 +278,7 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb, } static void nf_ct_bridge_frag_save(struct sk_buff *skb, - struct nf_ct_bridge_frag_data *data) + struct nf_bridge_frag_data *data) { if (skb_vlan_tag_present(skb)) { data->vlan_present = true; @@ -294,10 +293,10 @@ static void nf_ct_bridge_frag_save(struct sk_buff *skb, static unsigned int nf_ct_bridge_refrag(struct sk_buff *skb, const struct nf_hook_state *state, int (*output)(struct net *, struct sock *sk, - const struct nf_ct_bridge_frag_data *data, + const struct nf_bridge_frag_data *data, struct sk_buff *)) { - struct nf_ct_bridge_frag_data data; + struct nf_bridge_frag_data data; if (!BR_INPUT_SKB_CB(skb)->frag_max_size) return NF_ACCEPT; @@ -320,7 +319,7 @@ nf_ct_bridge_refrag(struct sk_buff *skb, const struct nf_hook_state *state, /* Actually only slow path refragmentation needs this. */ static int nf_ct_bridge_frag_restore(struct sk_buff *skb, - const struct nf_ct_bridge_frag_data *data) + const struct nf_bridge_frag_data *data) { int err; @@ -341,7 +340,7 @@ static int nf_ct_bridge_frag_restore(struct sk_buff *skb, } static int nf_ct_bridge_refrag_post(struct net *net, struct sock *sk, - const struct nf_ct_bridge_frag_data *data, + const struct nf_bridge_frag_data *data, struct sk_buff *skb) { int err; diff --git a/net/can/Kconfig b/net/can/Kconfig index 0f9fe846ddef..d77042752457 100644 --- a/net/can/Kconfig +++ b/net/can/Kconfig @@ -8,11 +8,12 @@ menuconfig CAN tristate "CAN bus subsystem support" ---help--- Controller Area Network (CAN) is a slow (up to 1Mbit/s) serial - communications protocol which was developed by Bosch in - 1991, mainly for automotive, but now widely used in marine - (NMEA2000), industrial, and medical applications. - More information on the CAN network protocol family PF_CAN - is contained in <Documentation/networking/can.rst>. + communications protocol. Development of the CAN bus started in + 1983 at Robert Bosch GmbH, and the protocol was officially + released in 1986. The CAN bus was originally mainly for automotive, + but is now widely used in marine (NMEA2000), industrial, and medical + applications. More information on the CAN network protocol family + PF_CAN is contained in <Documentation/networking/can.rst>. If you want CAN support you should say Y here and also to the specific driver for your controller(s) below. @@ -52,6 +53,8 @@ config CAN_GW They can be modified with AND/OR/XOR/SET operations as configured by the netlink configuration interface known e.g. from iptables. +source "net/can/j1939/Kconfig" + source "drivers/net/can/Kconfig" endif diff --git a/net/can/Makefile b/net/can/Makefile index 1242bbbfe57f..08bd217fc051 100644 --- a/net/can/Makefile +++ b/net/can/Makefile @@ -15,3 +15,5 @@ can-bcm-y := bcm.o obj-$(CONFIG_CAN_GW) += can-gw.o can-gw-y := gw.o + +obj-$(CONFIG_CAN_J1939) += j1939/ diff --git a/net/can/af_can.c b/net/can/af_can.c index 80281ef2ccbd..5518a7d9eed9 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -1,5 +1,5 @@ -/* - * af_can.c - Protocol family CAN core module +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) +/* af_can.c - Protocol family CAN core module * (used by different CAN protocol modules) * * Copyright (c) 2002-2017 Volkswagen Group Electronic Research @@ -58,6 +58,7 @@ #include <linux/can.h> #include <linux/can/core.h> #include <linux/can/skb.h> +#include <linux/can/can-ml.h> #include <linux/ratelimit.h> #include <net/net_namespace.h> #include <net/sock.h> @@ -83,18 +84,7 @@ static DEFINE_MUTEX(proto_tab_lock); static atomic_t skbcounter = ATOMIC_INIT(0); -/* - * af_can socket functions - */ - -int can_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) -{ - switch (cmd) { - default: - return -ENOIOCTLCMD; - } -} -EXPORT_SYMBOL(can_ioctl); +/* af_can socket functions */ static void can_sock_destruct(struct sock *sk) { @@ -140,14 +130,13 @@ static int can_create(struct net *net, struct socket *sock, int protocol, err = request_module("can-proto-%d", protocol); - /* - * In case of error we only print a message but don't + /* In case of error we only print a message but don't * return the error code immediately. Below we will * return -EPROTONOSUPPORT */ if (err) - printk_ratelimited(KERN_ERR "can: request_module " - "(can-proto-%d) failed.\n", protocol); + pr_err_ratelimited("can: request_module (can-proto-%d) failed.\n", + protocol); cp = can_get_proto(protocol); } @@ -188,9 +177,7 @@ static int can_create(struct net *net, struct socket *sock, int protocol, return err; } -/* - * af_can tx path - */ +/* af_can tx path */ /** * can_send - transmit a CAN frame (optional with local loopback) @@ -212,7 +199,7 @@ int can_send(struct sk_buff *skb, int loop) { struct sk_buff *newskb = NULL; struct canfd_frame *cfd = (struct canfd_frame *)skb->data; - struct s_stats *can_stats = dev_net(skb->dev)->can.can_stats; + struct can_pkg_stats *pkg_stats = dev_net(skb->dev)->can.pkg_stats; int err = -EINVAL; if (skb->len == CAN_MTU) { @@ -223,11 +210,11 @@ int can_send(struct sk_buff *skb, int loop) skb->protocol = htons(ETH_P_CANFD); if (unlikely(cfd->len > CANFD_MAX_DLEN)) goto inval_skb; - } else + } else { goto inval_skb; + } - /* - * Make sure the CAN frame can pass the selected CAN netdevice. + /* Make sure the CAN frame can pass the selected CAN netdevice. * As structs can_frame and canfd_frame are similar, we can provide * CAN FD frames to legacy CAN drivers as long as the length is <= 8 */ @@ -258,8 +245,7 @@ int can_send(struct sk_buff *skb, int loop) /* indication for the CAN driver: do loopback */ skb->pkt_type = PACKET_LOOPBACK; - /* - * The reference to the originating sock may be required + /* The reference to the originating sock may be required * by the receiving socket to check whether the frame is * its own. Example: can_raw sockopt CAN_RAW_RECV_OWN_MSGS * Therefore we have to ensure that skb->sk remains the @@ -268,8 +254,7 @@ int can_send(struct sk_buff *skb, int loop) */ if (!(skb->dev->flags & IFF_ECHO)) { - /* - * If the interface is not capable to do loopback + /* If the interface is not capable to do loopback * itself, we do it here. */ newskb = skb_clone(skb, GFP_ATOMIC); @@ -301,8 +286,8 @@ int can_send(struct sk_buff *skb, int loop) netif_rx_ni(newskb); /* update statistics */ - can_stats->tx_frames++; - can_stats->tx_frames_delta++; + pkg_stats->tx_frames++; + pkg_stats->tx_frames_delta++; return 0; @@ -312,17 +297,17 @@ inval_skb: } EXPORT_SYMBOL(can_send); -/* - * af_can rx path - */ +/* af_can rx path */ -static struct can_dev_rcv_lists *find_dev_rcv_lists(struct net *net, - struct net_device *dev) +static struct can_dev_rcv_lists *can_dev_rcv_lists_find(struct net *net, + struct net_device *dev) { - if (!dev) - return net->can.can_rx_alldev_list; - else - return (struct can_dev_rcv_lists *)dev->ml_priv; + if (dev) { + struct can_ml_priv *ml_priv = dev->ml_priv; + return &ml_priv->dev_rcv_lists; + } else { + return net->can.rx_alldev_list; + } } /** @@ -349,7 +334,7 @@ static unsigned int effhash(canid_t can_id) } /** - * find_rcv_list - determine optimal filterlist inside device filter struct + * can_rcv_list_find - determine optimal filterlist inside device filter struct * @can_id: pointer to CAN identifier of a given can_filter * @mask: pointer to CAN mask of a given can_filter * @d: pointer to the device filter struct @@ -375,8 +360,8 @@ static unsigned int effhash(canid_t can_id) * Constistency checked mask. * Reduced can_id to have a preprocessed filter compare value. */ -static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask, - struct can_dev_rcv_lists *d) +static struct hlist_head *can_rcv_list_find(canid_t *can_id, canid_t *mask, + struct can_dev_rcv_lists *dev_rcv_lists) { canid_t inv = *can_id & CAN_INV_FILTER; /* save flag before masking */ @@ -384,7 +369,7 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask, if (*mask & CAN_ERR_FLAG) { /* clear CAN_ERR_FLAG in filter entry */ *mask &= CAN_ERR_MASK; - return &d->rx[RX_ERR]; + return &dev_rcv_lists->rx[RX_ERR]; } /* with cleared CAN_ERR_FLAG we have a simple mask/value filterpair */ @@ -400,27 +385,26 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask, /* inverse can_id/can_mask filter */ if (inv) - return &d->rx[RX_INV]; + return &dev_rcv_lists->rx[RX_INV]; /* mask == 0 => no condition testing at receive time */ if (!(*mask)) - return &d->rx[RX_ALL]; + return &dev_rcv_lists->rx[RX_ALL]; /* extra filterlists for the subscription of a single non-RTR can_id */ if (((*mask & CAN_EFF_RTR_FLAGS) == CAN_EFF_RTR_FLAGS) && !(*can_id & CAN_RTR_FLAG)) { - if (*can_id & CAN_EFF_FLAG) { if (*mask == (CAN_EFF_MASK | CAN_EFF_RTR_FLAGS)) - return &d->rx_eff[effhash(*can_id)]; + return &dev_rcv_lists->rx_eff[effhash(*can_id)]; } else { if (*mask == (CAN_SFF_MASK | CAN_EFF_RTR_FLAGS)) - return &d->rx_sff[*can_id]; + return &dev_rcv_lists->rx_sff[*can_id]; } } /* default: filter via can_id/can_mask */ - return &d->rx[RX_FIL]; + return &dev_rcv_lists->rx[RX_FIL]; } /** @@ -457,10 +441,10 @@ int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id, canid_t mask, void (*func)(struct sk_buff *, void *), void *data, char *ident, struct sock *sk) { - struct receiver *r; - struct hlist_head *rl; - struct can_dev_rcv_lists *d; - struct s_pstats *can_pstats = net->can.can_pstats; + struct receiver *rcv; + struct hlist_head *rcv_list; + struct can_dev_rcv_lists *dev_rcv_lists; + struct can_rcv_lists_stats *rcv_lists_stats = net->can.rcv_lists_stats; int err = 0; /* insert new receiver (dev,canid,mask) -> (func,data) */ @@ -471,50 +455,42 @@ int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id, if (dev && !net_eq(net, dev_net(dev))) return -ENODEV; - r = kmem_cache_alloc(rcv_cache, GFP_KERNEL); - if (!r) + rcv = kmem_cache_alloc(rcv_cache, GFP_KERNEL); + if (!rcv) return -ENOMEM; - spin_lock(&net->can.can_rcvlists_lock); + spin_lock_bh(&net->can.rcvlists_lock); - d = find_dev_rcv_lists(net, dev); - if (d) { - rl = find_rcv_list(&can_id, &mask, d); + dev_rcv_lists = can_dev_rcv_lists_find(net, dev); + rcv_list = can_rcv_list_find(&can_id, &mask, dev_rcv_lists); - r->can_id = can_id; - r->mask = mask; - r->matches = 0; - r->func = func; - r->data = data; - r->ident = ident; - r->sk = sk; + rcv->can_id = can_id; + rcv->mask = mask; + rcv->matches = 0; + rcv->func = func; + rcv->data = data; + rcv->ident = ident; + rcv->sk = sk; - hlist_add_head_rcu(&r->list, rl); - d->entries++; + hlist_add_head_rcu(&rcv->list, rcv_list); + dev_rcv_lists->entries++; - can_pstats->rcv_entries++; - if (can_pstats->rcv_entries_max < can_pstats->rcv_entries) - can_pstats->rcv_entries_max = can_pstats->rcv_entries; - } else { - kmem_cache_free(rcv_cache, r); - err = -ENODEV; - } - - spin_unlock(&net->can.can_rcvlists_lock); + rcv_lists_stats->rcv_entries++; + rcv_lists_stats->rcv_entries_max = max(rcv_lists_stats->rcv_entries_max, + rcv_lists_stats->rcv_entries); + spin_unlock_bh(&net->can.rcvlists_lock); return err; } EXPORT_SYMBOL(can_rx_register); -/* - * can_rx_delete_receiver - rcu callback for single receiver entry removal - */ +/* can_rx_delete_receiver - rcu callback for single receiver entry removal */ static void can_rx_delete_receiver(struct rcu_head *rp) { - struct receiver *r = container_of(rp, struct receiver, rcu); - struct sock *sk = r->sk; + struct receiver *rcv = container_of(rp, struct receiver, rcu); + struct sock *sk = rcv->sk; - kmem_cache_free(rcv_cache, r); + kmem_cache_free(rcv_cache, rcv); if (sk) sock_put(sk); } @@ -534,10 +510,10 @@ void can_rx_unregister(struct net *net, struct net_device *dev, canid_t can_id, canid_t mask, void (*func)(struct sk_buff *, void *), void *data) { - struct receiver *r = NULL; - struct hlist_head *rl; - struct s_pstats *can_pstats = net->can.can_pstats; - struct can_dev_rcv_lists *d; + struct receiver *rcv = NULL; + struct hlist_head *rcv_list; + struct can_rcv_lists_stats *rcv_lists_stats = net->can.rcv_lists_stats; + struct can_dev_rcv_lists *dev_rcv_lists; if (dev && dev->type != ARPHRD_CAN) return; @@ -545,86 +521,69 @@ void can_rx_unregister(struct net *net, struct net_device *dev, canid_t can_id, if (dev && !net_eq(net, dev_net(dev))) return; - spin_lock(&net->can.can_rcvlists_lock); + spin_lock_bh(&net->can.rcvlists_lock); - d = find_dev_rcv_lists(net, dev); - if (!d) { - pr_err("BUG: receive list not found for " - "dev %s, id %03X, mask %03X\n", - DNAME(dev), can_id, mask); - goto out; - } + dev_rcv_lists = can_dev_rcv_lists_find(net, dev); + rcv_list = can_rcv_list_find(&can_id, &mask, dev_rcv_lists); - rl = find_rcv_list(&can_id, &mask, d); - - /* - * Search the receiver list for the item to delete. This should + /* Search the receiver list for the item to delete. This should * exist, since no receiver may be unregistered that hasn't * been registered before. */ - - hlist_for_each_entry_rcu(r, rl, list) { - if (r->can_id == can_id && r->mask == mask && - r->func == func && r->data == data) + hlist_for_each_entry_rcu(rcv, rcv_list, list) { + if (rcv->can_id == can_id && rcv->mask == mask && + rcv->func == func && rcv->data == data) break; } - /* - * Check for bugs in CAN protocol implementations using af_can.c: - * 'r' will be NULL if no matching list item was found for removal. + /* Check for bugs in CAN protocol implementations using af_can.c: + * 'rcv' will be NULL if no matching list item was found for removal. */ - - if (!r) { - WARN(1, "BUG: receive list entry not found for dev %s, " - "id %03X, mask %03X\n", DNAME(dev), can_id, mask); + if (!rcv) { + WARN(1, "BUG: receive list entry not found for dev %s, id %03X, mask %03X\n", + DNAME(dev), can_id, mask); goto out; } - hlist_del_rcu(&r->list); - d->entries--; + hlist_del_rcu(&rcv->list); + dev_rcv_lists->entries--; - if (can_pstats->rcv_entries > 0) - can_pstats->rcv_entries--; - - /* remove device structure requested by NETDEV_UNREGISTER */ - if (d->remove_on_zero_entries && !d->entries) { - kfree(d); - dev->ml_priv = NULL; - } + if (rcv_lists_stats->rcv_entries > 0) + rcv_lists_stats->rcv_entries--; out: - spin_unlock(&net->can.can_rcvlists_lock); + spin_unlock_bh(&net->can.rcvlists_lock); /* schedule the receiver item for deletion */ - if (r) { - if (r->sk) - sock_hold(r->sk); - call_rcu(&r->rcu, can_rx_delete_receiver); + if (rcv) { + if (rcv->sk) + sock_hold(rcv->sk); + call_rcu(&rcv->rcu, can_rx_delete_receiver); } } EXPORT_SYMBOL(can_rx_unregister); -static inline void deliver(struct sk_buff *skb, struct receiver *r) +static inline void deliver(struct sk_buff *skb, struct receiver *rcv) { - r->func(skb, r->data); - r->matches++; + rcv->func(skb, rcv->data); + rcv->matches++; } -static int can_rcv_filter(struct can_dev_rcv_lists *d, struct sk_buff *skb) +static int can_rcv_filter(struct can_dev_rcv_lists *dev_rcv_lists, struct sk_buff *skb) { - struct receiver *r; + struct receiver *rcv; int matches = 0; struct can_frame *cf = (struct can_frame *)skb->data; canid_t can_id = cf->can_id; - if (d->entries == 0) + if (dev_rcv_lists->entries == 0) return 0; if (can_id & CAN_ERR_FLAG) { /* check for error message frame entries only */ - hlist_for_each_entry_rcu(r, &d->rx[RX_ERR], list) { - if (can_id & r->mask) { - deliver(skb, r); + hlist_for_each_entry_rcu(rcv, &dev_rcv_lists->rx[RX_ERR], list) { + if (can_id & rcv->mask) { + deliver(skb, rcv); matches++; } } @@ -632,23 +591,23 @@ static int can_rcv_filter(struct can_dev_rcv_lists *d, struct sk_buff *skb) } /* check for unfiltered entries */ - hlist_for_each_entry_rcu(r, &d->rx[RX_ALL], list) { - deliver(skb, r); + hlist_for_each_entry_rcu(rcv, &dev_rcv_lists->rx[RX_ALL], list) { + deliver(skb, rcv); matches++; } /* check for can_id/mask entries */ - hlist_for_each_entry_rcu(r, &d->rx[RX_FIL], list) { - if ((can_id & r->mask) == r->can_id) { - deliver(skb, r); + hlist_for_each_entry_rcu(rcv, &dev_rcv_lists->rx[RX_FIL], list) { + if ((can_id & rcv->mask) == rcv->can_id) { + deliver(skb, rcv); matches++; } } /* check for inverted can_id/mask entries */ - hlist_for_each_entry_rcu(r, &d->rx[RX_INV], list) { - if ((can_id & r->mask) != r->can_id) { - deliver(skb, r); + hlist_for_each_entry_rcu(rcv, &dev_rcv_lists->rx[RX_INV], list) { + if ((can_id & rcv->mask) != rcv->can_id) { + deliver(skb, rcv); matches++; } } @@ -658,16 +617,16 @@ static int can_rcv_filter(struct can_dev_rcv_lists *d, struct sk_buff *skb) return matches; if (can_id & CAN_EFF_FLAG) { - hlist_for_each_entry_rcu(r, &d->rx_eff[effhash(can_id)], list) { - if (r->can_id == can_id) { - deliver(skb, r); + hlist_for_each_entry_rcu(rcv, &dev_rcv_lists->rx_eff[effhash(can_id)], list) { + if (rcv->can_id == can_id) { + deliver(skb, rcv); matches++; } } } else { can_id &= CAN_SFF_MASK; - hlist_for_each_entry_rcu(r, &d->rx_sff[can_id], list) { - deliver(skb, r); + hlist_for_each_entry_rcu(rcv, &dev_rcv_lists->rx_sff[can_id], list) { + deliver(skb, rcv); matches++; } } @@ -677,14 +636,14 @@ static int can_rcv_filter(struct can_dev_rcv_lists *d, struct sk_buff *skb) static void can_receive(struct sk_buff *skb, struct net_device *dev) { - struct can_dev_rcv_lists *d; + struct can_dev_rcv_lists *dev_rcv_lists; struct net *net = dev_net(dev); - struct s_stats *can_stats = net->can.can_stats; + struct can_pkg_stats *pkg_stats = net->can.pkg_stats; int matches; /* update statistics */ - can_stats->rx_frames++; - can_stats->rx_frames_delta++; + pkg_stats->rx_frames++; + pkg_stats->rx_frames_delta++; /* create non-zero unique skb identifier together with *skb */ while (!(can_skb_prv(skb)->skbcnt)) @@ -693,12 +652,11 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev) rcu_read_lock(); /* deliver the packet to sockets listening on all devices */ - matches = can_rcv_filter(net->can.can_rx_alldev_list, skb); + matches = can_rcv_filter(net->can.rx_alldev_list, skb); /* find receive list for this device */ - d = find_dev_rcv_lists(net, dev); - if (d) - matches += can_rcv_filter(d, skb); + dev_rcv_lists = can_dev_rcv_lists_find(net, dev); + matches += can_rcv_filter(dev_rcv_lists, skb); rcu_read_unlock(); @@ -706,8 +664,8 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev) consume_skb(skb); if (matches > 0) { - can_stats->matches++; - can_stats->matches_delta++; + pkg_stats->matches++; + pkg_stats->matches_delta++; } } @@ -729,7 +687,7 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev, } static int canfd_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) + struct packet_type *pt, struct net_device *orig_dev) { struct canfd_frame *cfd = (struct canfd_frame *)skb->data; @@ -745,9 +703,7 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev, return NET_RX_SUCCESS; } -/* - * af_can protocol functions - */ +/* af_can protocol functions */ /** * can_proto_register - register CAN transport protocol @@ -778,8 +734,9 @@ int can_proto_register(const struct can_proto *cp) if (rcu_access_pointer(proto_tab[proto])) { pr_err("can: protocol %d already registered\n", proto); err = -EBUSY; - } else + } else { RCU_INIT_POINTER(proto_tab[proto], cp); + } mutex_unlock(&proto_tab_lock); @@ -809,48 +766,19 @@ void can_proto_unregister(const struct can_proto *cp) } EXPORT_SYMBOL(can_proto_unregister); -/* - * af_can notifier to create/remove CAN netdevice specific structs - */ +/* af_can notifier to create/remove CAN netdevice specific structs */ static int can_notifier(struct notifier_block *nb, unsigned long msg, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct can_dev_rcv_lists *d; if (dev->type != ARPHRD_CAN) return NOTIFY_DONE; switch (msg) { - case NETDEV_REGISTER: - - /* create new dev_rcv_lists for this device */ - d = kzalloc(sizeof(*d), GFP_KERNEL); - if (!d) - return NOTIFY_DONE; - BUG_ON(dev->ml_priv); - dev->ml_priv = d; - - break; - - case NETDEV_UNREGISTER: - spin_lock(&dev_net(dev)->can.can_rcvlists_lock); - - d = dev->ml_priv; - if (d) { - if (d->entries) - d->remove_on_zero_entries = 1; - else { - kfree(d); - dev->ml_priv = NULL; - } - } else - pr_err("can: notifier: receive list not found for dev " - "%s\n", dev->name); - - spin_unlock(&dev_net(dev)->can.can_rcvlists_lock); - + WARN(!dev->ml_priv, + "No CAN mid layer private allocated, please fix your driver and use alloc_candev()!\n"); break; } @@ -859,71 +787,54 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg, static int can_pernet_init(struct net *net) { - spin_lock_init(&net->can.can_rcvlists_lock); - net->can.can_rx_alldev_list = - kzalloc(sizeof(struct can_dev_rcv_lists), GFP_KERNEL); - if (!net->can.can_rx_alldev_list) + spin_lock_init(&net->can.rcvlists_lock); + net->can.rx_alldev_list = + kzalloc(sizeof(*net->can.rx_alldev_list), GFP_KERNEL); + if (!net->can.rx_alldev_list) goto out; - net->can.can_stats = kzalloc(sizeof(struct s_stats), GFP_KERNEL); - if (!net->can.can_stats) - goto out_free_alldev_list; - net->can.can_pstats = kzalloc(sizeof(struct s_pstats), GFP_KERNEL); - if (!net->can.can_pstats) - goto out_free_can_stats; + net->can.pkg_stats = kzalloc(sizeof(*net->can.pkg_stats), GFP_KERNEL); + if (!net->can.pkg_stats) + goto out_free_rx_alldev_list; + net->can.rcv_lists_stats = kzalloc(sizeof(*net->can.rcv_lists_stats), GFP_KERNEL); + if (!net->can.rcv_lists_stats) + goto out_free_pkg_stats; if (IS_ENABLED(CONFIG_PROC_FS)) { /* the statistics are updated every second (timer triggered) */ if (stats_timer) { - timer_setup(&net->can.can_stattimer, can_stat_update, + timer_setup(&net->can.stattimer, can_stat_update, 0); - mod_timer(&net->can.can_stattimer, + mod_timer(&net->can.stattimer, round_jiffies(jiffies + HZ)); } - net->can.can_stats->jiffies_init = jiffies; + net->can.pkg_stats->jiffies_init = jiffies; can_init_proc(net); } return 0; - out_free_can_stats: - kfree(net->can.can_stats); - out_free_alldev_list: - kfree(net->can.can_rx_alldev_list); + out_free_pkg_stats: + kfree(net->can.pkg_stats); + out_free_rx_alldev_list: + kfree(net->can.rx_alldev_list); out: return -ENOMEM; } static void can_pernet_exit(struct net *net) { - struct net_device *dev; - if (IS_ENABLED(CONFIG_PROC_FS)) { can_remove_proc(net); if (stats_timer) - del_timer_sync(&net->can.can_stattimer); + del_timer_sync(&net->can.stattimer); } - /* remove created dev_rcv_lists from still registered CAN devices */ - rcu_read_lock(); - for_each_netdev_rcu(net, dev) { - if (dev->type == ARPHRD_CAN && dev->ml_priv) { - struct can_dev_rcv_lists *d = dev->ml_priv; - - BUG_ON(d->entries); - kfree(d); - dev->ml_priv = NULL; - } - } - rcu_read_unlock(); - - kfree(net->can.can_rx_alldev_list); - kfree(net->can.can_stats); - kfree(net->can.can_pstats); + kfree(net->can.rx_alldev_list); + kfree(net->can.pkg_stats); + kfree(net->can.rcv_lists_stats); } -/* - * af_can module init/exit functions - */ +/* af_can module init/exit functions */ static struct packet_type can_packet __read_mostly = { .type = cpu_to_be16(ETH_P_CAN), diff --git a/net/can/af_can.h b/net/can/af_can.h index 9cb3719632bd..7c2d9161e224 100644 --- a/net/can/af_can.h +++ b/net/can/af_can.h @@ -1,5 +1,5 @@ -/* - * Copyright (c) 2002-2007 Volkswagen Group Electronic Research +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ +/* Copyright (c) 2002-2007 Volkswagen Group Electronic Research * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -53,32 +53,17 @@ struct receiver { canid_t can_id; canid_t mask; unsigned long matches; - void (*func)(struct sk_buff *, void *); + void (*func)(struct sk_buff *skb, void *data); void *data; char *ident; struct sock *sk; struct rcu_head rcu; }; -#define CAN_SFF_RCV_ARRAY_SZ (1 << CAN_SFF_ID_BITS) -#define CAN_EFF_RCV_HASH_BITS 10 -#define CAN_EFF_RCV_ARRAY_SZ (1 << CAN_EFF_RCV_HASH_BITS) - -enum { RX_ERR, RX_ALL, RX_FIL, RX_INV, RX_MAX }; - -/* per device receive filters linked at dev->ml_priv */ -struct can_dev_rcv_lists { - struct hlist_head rx[RX_MAX]; - struct hlist_head rx_sff[CAN_SFF_RCV_ARRAY_SZ]; - struct hlist_head rx_eff[CAN_EFF_RCV_ARRAY_SZ]; - int remove_on_zero_entries; - int entries; -}; - /* statistic structures */ /* can be reset e.g. by can_init_stats() */ -struct s_stats { +struct can_pkg_stats { unsigned long jiffies_init; unsigned long rx_frames; @@ -103,7 +88,7 @@ struct s_stats { }; /* persistent statistics */ -struct s_pstats { +struct can_rcv_lists_stats { unsigned long stats_reset; unsigned long user_reset; unsigned long rcv_entries; diff --git a/net/can/bcm.c b/net/can/bcm.c index a34ee52f19ea..c96fa0f33db3 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) /* * bcm.c - Broadcast Manager to filter/send (cyclic) CAN content * @@ -105,7 +106,6 @@ struct bcm_op { unsigned long frames_abs, frames_filtered; struct bcm_timeval ival1, ival2; struct hrtimer timer, thrtimer; - struct tasklet_struct tsklet, thrtsklet; ktime_t rx_stamp, kt_ival1, kt_ival2, kt_lastmsg; int rx_ifindex; int cfsiz; @@ -370,25 +370,34 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head, } } -static void bcm_tx_start_timer(struct bcm_op *op) +static bool bcm_tx_set_expiry(struct bcm_op *op, struct hrtimer *hrt) { + ktime_t ival; + if (op->kt_ival1 && op->count) - hrtimer_start(&op->timer, - ktime_add(ktime_get(), op->kt_ival1), - HRTIMER_MODE_ABS); + ival = op->kt_ival1; else if (op->kt_ival2) - hrtimer_start(&op->timer, - ktime_add(ktime_get(), op->kt_ival2), - HRTIMER_MODE_ABS); + ival = op->kt_ival2; + else + return false; + + hrtimer_set_expires(hrt, ktime_add(ktime_get(), ival)); + return true; } -static void bcm_tx_timeout_tsklet(unsigned long data) +static void bcm_tx_start_timer(struct bcm_op *op) { - struct bcm_op *op = (struct bcm_op *)data; + if (bcm_tx_set_expiry(op, &op->timer)) + hrtimer_start_expires(&op->timer, HRTIMER_MODE_ABS_SOFT); +} + +/* bcm_tx_timeout_handler - performs cyclic CAN frame transmissions */ +static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer) +{ + struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer); struct bcm_msg_head msg_head; if (op->kt_ival1 && (op->count > 0)) { - op->count--; if (!op->count && (op->flags & TX_COUNTEVT)) { @@ -405,22 +414,12 @@ static void bcm_tx_timeout_tsklet(unsigned long data) } bcm_can_tx(op); - } else if (op->kt_ival2) + } else if (op->kt_ival2) { bcm_can_tx(op); + } - bcm_tx_start_timer(op); -} - -/* - * bcm_tx_timeout_handler - performs cyclic CAN frame transmissions - */ -static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer) -{ - struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer); - - tasklet_schedule(&op->tsklet); - - return HRTIMER_NORESTART; + return bcm_tx_set_expiry(op, &op->timer) ? + HRTIMER_RESTART : HRTIMER_NORESTART; } /* @@ -486,7 +485,7 @@ static void bcm_rx_update_and_send(struct bcm_op *op, /* do not send the saved data - only start throttle timer */ hrtimer_start(&op->thrtimer, ktime_add(op->kt_lastmsg, op->kt_ival2), - HRTIMER_MODE_ABS); + HRTIMER_MODE_ABS_SOFT); return; } @@ -545,14 +544,21 @@ static void bcm_rx_starttimer(struct bcm_op *op) return; if (op->kt_ival1) - hrtimer_start(&op->timer, op->kt_ival1, HRTIMER_MODE_REL); + hrtimer_start(&op->timer, op->kt_ival1, HRTIMER_MODE_REL_SOFT); } -static void bcm_rx_timeout_tsklet(unsigned long data) +/* bcm_rx_timeout_handler - when the (cyclic) CAN frame reception timed out */ +static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer) { - struct bcm_op *op = (struct bcm_op *)data; + struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer); struct bcm_msg_head msg_head; + /* if user wants to be informed, when cyclic CAN-Messages come back */ + if ((op->flags & RX_ANNOUNCE_RESUME) && op->last_frames) { + /* clear received CAN frames to indicate 'nothing received' */ + memset(op->last_frames, 0, op->nframes * op->cfsiz); + } + /* create notification to user */ msg_head.opcode = RX_TIMEOUT; msg_head.flags = op->flags; @@ -563,25 +569,6 @@ static void bcm_rx_timeout_tsklet(unsigned long data) msg_head.nframes = 0; bcm_send_to_user(op, &msg_head, NULL, 0); -} - -/* - * bcm_rx_timeout_handler - when the (cyclic) CAN frame reception timed out - */ -static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer) -{ - struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer); - - /* schedule before NET_RX_SOFTIRQ */ - tasklet_hi_schedule(&op->tsklet); - - /* no restart of the timer is done here! */ - - /* if user wants to be informed, when cyclic CAN-Messages come back */ - if ((op->flags & RX_ANNOUNCE_RESUME) && op->last_frames) { - /* clear received CAN frames to indicate 'nothing received' */ - memset(op->last_frames, 0, op->nframes * op->cfsiz); - } return HRTIMER_NORESTART; } @@ -589,14 +576,12 @@ static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer) /* * bcm_rx_do_flush - helper for bcm_rx_thr_flush */ -static inline int bcm_rx_do_flush(struct bcm_op *op, int update, - unsigned int index) +static inline int bcm_rx_do_flush(struct bcm_op *op, unsigned int index) { struct canfd_frame *lcf = op->last_frames + op->cfsiz * index; if ((op->last_frames) && (lcf->flags & RX_THR)) { - if (update) - bcm_rx_changed(op, lcf); + bcm_rx_changed(op, lcf); return 1; } return 0; @@ -604,11 +589,8 @@ static inline int bcm_rx_do_flush(struct bcm_op *op, int update, /* * bcm_rx_thr_flush - Check for throttled data and send it to the userspace - * - * update == 0 : just check if throttled data is available (any irq context) - * update == 1 : check and send throttled data to userspace (soft_irq context) */ -static int bcm_rx_thr_flush(struct bcm_op *op, int update) +static int bcm_rx_thr_flush(struct bcm_op *op) { int updated = 0; @@ -617,24 +599,16 @@ static int bcm_rx_thr_flush(struct bcm_op *op, int update) /* for MUX filter we start at index 1 */ for (i = 1; i < op->nframes; i++) - updated += bcm_rx_do_flush(op, update, i); + updated += bcm_rx_do_flush(op, i); } else { /* for RX_FILTER_ID and simple filter */ - updated += bcm_rx_do_flush(op, update, 0); + updated += bcm_rx_do_flush(op, 0); } return updated; } -static void bcm_rx_thr_tsklet(unsigned long data) -{ - struct bcm_op *op = (struct bcm_op *)data; - - /* push the changed data to the userspace */ - bcm_rx_thr_flush(op, 1); -} - /* * bcm_rx_thr_handler - the time for blocked content updates is over now: * Check for throttled data and send it to the userspace @@ -643,9 +617,7 @@ static enum hrtimer_restart bcm_rx_thr_handler(struct hrtimer *hrtimer) { struct bcm_op *op = container_of(hrtimer, struct bcm_op, thrtimer); - tasklet_schedule(&op->thrtsklet); - - if (bcm_rx_thr_flush(op, 0)) { + if (bcm_rx_thr_flush(op)) { hrtimer_forward(hrtimer, ktime_get(), op->kt_ival2); return HRTIMER_RESTART; } else { @@ -741,23 +713,8 @@ static struct bcm_op *bcm_find_op(struct list_head *ops, static void bcm_remove_op(struct bcm_op *op) { - if (op->tsklet.func) { - while (test_bit(TASKLET_STATE_SCHED, &op->tsklet.state) || - test_bit(TASKLET_STATE_RUN, &op->tsklet.state) || - hrtimer_active(&op->timer)) { - hrtimer_cancel(&op->timer); - tasklet_kill(&op->tsklet); - } - } - - if (op->thrtsklet.func) { - while (test_bit(TASKLET_STATE_SCHED, &op->thrtsklet.state) || - test_bit(TASKLET_STATE_RUN, &op->thrtsklet.state) || - hrtimer_active(&op->thrtimer)) { - hrtimer_cancel(&op->thrtimer); - tasklet_kill(&op->thrtsklet); - } - } + hrtimer_cancel(&op->timer); + hrtimer_cancel(&op->thrtimer); if ((op->frames) && (op->frames != &op->sframe)) kfree(op->frames); @@ -990,15 +947,13 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, op->ifindex = ifindex; /* initialize uninitialized (kzalloc) structure */ - hrtimer_init(&op->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer_init(&op->timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL_SOFT); op->timer.function = bcm_tx_timeout_handler; - /* initialize tasklet for tx countevent notification */ - tasklet_init(&op->tsklet, bcm_tx_timeout_tsklet, - (unsigned long) op); - /* currently unused in tx_ops */ - hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL_SOFT); /* add this bcm_op to the list of the tx_ops */ list_add(&op->list, &bo->tx_ops); @@ -1167,20 +1122,14 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, op->rx_ifindex = ifindex; /* initialize uninitialized (kzalloc) structure */ - hrtimer_init(&op->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer_init(&op->timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL_SOFT); op->timer.function = bcm_rx_timeout_handler; - /* initialize tasklet for rx timeout notification */ - tasklet_init(&op->tsklet, bcm_rx_timeout_tsklet, - (unsigned long) op); - - hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL_SOFT); op->thrtimer.function = bcm_rx_thr_handler; - /* initialize tasklet for rx throttle handling */ - tasklet_init(&op->thrtsklet, bcm_rx_thr_tsklet, - (unsigned long) op); - /* add this bcm_op to the list of the rx_ops */ list_add(&op->list, &bo->rx_ops); @@ -1226,12 +1175,12 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, */ op->kt_lastmsg = 0; hrtimer_cancel(&op->thrtimer); - bcm_rx_thr_flush(op, 1); + bcm_rx_thr_flush(op); } if ((op->flags & STARTTIMER) && op->kt_ival1) hrtimer_start(&op->timer, op->kt_ival1, - HRTIMER_MODE_REL); + HRTIMER_MODE_REL_SOFT); } /* now we can register for can_ids, if we added a new bcm_op */ @@ -1345,7 +1294,7 @@ static int bcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) /* no bound device as default => check msg_name */ DECLARE_SOCKADDR(struct sockaddr_can *, addr, msg->msg_name); - if (msg->msg_namelen < sizeof(*addr)) + if (msg->msg_namelen < CAN_REQUIRED_SIZE(*addr, can_ifindex)) return -EINVAL; if (addr->can_family != AF_CAN) @@ -1587,7 +1536,7 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len, struct net *net = sock_net(sk); int ret = 0; - if (len < sizeof(*addr)) + if (len < CAN_REQUIRED_SIZE(*addr, can_ifindex)) return -EINVAL; lock_sock(sk); @@ -1679,6 +1628,13 @@ static int bcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, return size; } +static int bcm_sock_no_ioctlcmd(struct socket *sock, unsigned int cmd, + unsigned long arg) +{ + /* no ioctls for socket layer -> hand it down to NIC layer */ + return -ENOIOCTLCMD; +} + static const struct proto_ops bcm_ops = { .family = PF_CAN, .release = bcm_release, @@ -1688,7 +1644,7 @@ static const struct proto_ops bcm_ops = { .accept = sock_no_accept, .getname = sock_no_getname, .poll = datagram_poll, - .ioctl = can_ioctl, /* use can_ioctl() from af_can.c */ + .ioctl = bcm_sock_no_ioctlcmd, .gettstamp = sock_gettstamp, .listen = sock_no_listen, .shutdown = sock_no_shutdown, diff --git a/net/can/gw.c b/net/can/gw.c index 72711053ebe6..65d60c93af29 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -1,7 +1,7 @@ -/* - * gw.c - CAN frame Gateway/Router/Bridge with netlink interface +// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) +/* gw.c - CAN frame Gateway/Router/Bridge with netlink interface * - * Copyright (c) 2017 Volkswagen Group Electronic Research + * Copyright (c) 2019 Volkswagen Group Electronic Research * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -59,7 +59,7 @@ #include <net/net_namespace.h> #include <net/sock.h> -#define CAN_GW_VERSION "20170425" +#define CAN_GW_VERSION "20190810" #define CAN_GW_NAME "can-gw" MODULE_DESCRIPTION("PF_CAN netlink gateway"); @@ -85,10 +85,10 @@ static struct kmem_cache *cgw_cache __read_mostly; /* structure that contains the (on-the-fly) CAN frame modifications */ struct cf_mod { struct { - struct can_frame and; - struct can_frame or; - struct can_frame xor; - struct can_frame set; + struct canfd_frame and; + struct canfd_frame or; + struct canfd_frame xor; + struct canfd_frame set; } modframe; struct { u8 and; @@ -96,7 +96,7 @@ struct cf_mod { u8 xor; u8 set; } modtype; - void (*modfunc[MAX_MODFUNCTIONS])(struct can_frame *cf, + void (*modfunc[MAX_MODFUNCTIONS])(struct canfd_frame *cf, struct cf_mod *mod); /* CAN frame checksum calculation after CAN frame modifications */ @@ -105,15 +105,15 @@ struct cf_mod { struct cgw_csum_crc8 crc8; } csum; struct { - void (*xor)(struct can_frame *cf, struct cgw_csum_xor *xor); - void (*crc8)(struct can_frame *cf, struct cgw_csum_crc8 *crc8); + void (*xor)(struct canfd_frame *cf, + struct cgw_csum_xor *xor); + void (*crc8)(struct canfd_frame *cf, + struct cgw_csum_crc8 *crc8); } csumfunc; u32 uid; }; - -/* - * So far we just support CAN -> CAN routing and frame modifications. +/* So far we just support CAN -> CAN routing and frame modifications. * * The internal can_can_gw structure contains data and attributes for * a CAN -> CAN gateway job. @@ -151,39 +151,88 @@ struct cgw_job { /* modification functions that are invoked in the hot path in can_can_gw_rcv */ -#define MODFUNC(func, op) static void func(struct can_frame *cf, \ +#define MODFUNC(func, op) static void func(struct canfd_frame *cf, \ struct cf_mod *mod) { op ; } MODFUNC(mod_and_id, cf->can_id &= mod->modframe.and.can_id) -MODFUNC(mod_and_dlc, cf->can_dlc &= mod->modframe.and.can_dlc) +MODFUNC(mod_and_len, cf->len &= mod->modframe.and.len) +MODFUNC(mod_and_flags, cf->flags &= mod->modframe.and.flags) MODFUNC(mod_and_data, *(u64 *)cf->data &= *(u64 *)mod->modframe.and.data) MODFUNC(mod_or_id, cf->can_id |= mod->modframe.or.can_id) -MODFUNC(mod_or_dlc, cf->can_dlc |= mod->modframe.or.can_dlc) +MODFUNC(mod_or_len, cf->len |= mod->modframe.or.len) +MODFUNC(mod_or_flags, cf->flags |= mod->modframe.or.flags) MODFUNC(mod_or_data, *(u64 *)cf->data |= *(u64 *)mod->modframe.or.data) MODFUNC(mod_xor_id, cf->can_id ^= mod->modframe.xor.can_id) -MODFUNC(mod_xor_dlc, cf->can_dlc ^= mod->modframe.xor.can_dlc) +MODFUNC(mod_xor_len, cf->len ^= mod->modframe.xor.len) +MODFUNC(mod_xor_flags, cf->flags ^= mod->modframe.xor.flags) MODFUNC(mod_xor_data, *(u64 *)cf->data ^= *(u64 *)mod->modframe.xor.data) MODFUNC(mod_set_id, cf->can_id = mod->modframe.set.can_id) -MODFUNC(mod_set_dlc, cf->can_dlc = mod->modframe.set.can_dlc) +MODFUNC(mod_set_len, cf->len = mod->modframe.set.len) +MODFUNC(mod_set_flags, cf->flags = mod->modframe.set.flags) MODFUNC(mod_set_data, *(u64 *)cf->data = *(u64 *)mod->modframe.set.data) -static inline void canframecpy(struct can_frame *dst, struct can_frame *src) +static void mod_and_fddata(struct canfd_frame *cf, struct cf_mod *mod) +{ + int i; + + for (i = 0; i < CANFD_MAX_DLEN; i += 8) + *(u64 *)(cf->data + i) &= *(u64 *)(mod->modframe.and.data + i); +} + +static void mod_or_fddata(struct canfd_frame *cf, struct cf_mod *mod) +{ + int i; + + for (i = 0; i < CANFD_MAX_DLEN; i += 8) + *(u64 *)(cf->data + i) |= *(u64 *)(mod->modframe.or.data + i); +} + +static void mod_xor_fddata(struct canfd_frame *cf, struct cf_mod *mod) +{ + int i; + + for (i = 0; i < CANFD_MAX_DLEN; i += 8) + *(u64 *)(cf->data + i) ^= *(u64 *)(mod->modframe.xor.data + i); +} + +static void mod_set_fddata(struct canfd_frame *cf, struct cf_mod *mod) +{ + memcpy(cf->data, mod->modframe.set.data, CANFD_MAX_DLEN); +} + +static void canframecpy(struct canfd_frame *dst, struct can_frame *src) { - /* - * Copy the struct members separately to ensure that no uninitialized + /* Copy the struct members separately to ensure that no uninitialized * data are copied in the 3 bytes hole of the struct. This is needed * to make easy compares of the data in the struct cf_mod. */ dst->can_id = src->can_id; - dst->can_dlc = src->can_dlc; + dst->len = src->can_dlc; *(u64 *)dst->data = *(u64 *)src->data; } -static int cgw_chk_csum_parms(s8 fr, s8 to, s8 re) +static void canfdframecpy(struct canfd_frame *dst, struct canfd_frame *src) { - /* - * absolute dlc values 0 .. 7 => 0 .. 7, e.g. data [0] + /* Copy the struct members separately to ensure that no uninitialized + * data are copied in the 2 bytes hole of the struct. This is needed + * to make easy compares of the data in the struct cf_mod. + */ + + dst->can_id = src->can_id; + dst->flags = src->flags; + dst->len = src->len; + memcpy(dst->data, src->data, CANFD_MAX_DLEN); +} + +static int cgw_chk_csum_parms(s8 fr, s8 to, s8 re, struct rtcanmsg *r) +{ + s8 dlen = CAN_MAX_DLEN; + + if (r->flags & CGW_FLAGS_CAN_FD) + dlen = CANFD_MAX_DLEN; + + /* absolute dlc values 0 .. 7 => 0 .. 7, e.g. data [0] * relative to received dlc -1 .. -8 : * e.g. for received dlc = 8 * -1 => index = 7 (data[7]) @@ -191,27 +240,27 @@ static int cgw_chk_csum_parms(s8 fr, s8 to, s8 re) * -8 => index = 0 (data[0]) */ - if (fr > -9 && fr < 8 && - to > -9 && to < 8 && - re > -9 && re < 8) + if (fr >= -dlen && fr < dlen && + to >= -dlen && to < dlen && + re >= -dlen && re < dlen) return 0; else return -EINVAL; } -static inline int calc_idx(int idx, int rx_dlc) +static inline int calc_idx(int idx, int rx_len) { if (idx < 0) - return rx_dlc + idx; + return rx_len + idx; else return idx; } -static void cgw_csum_xor_rel(struct can_frame *cf, struct cgw_csum_xor *xor) +static void cgw_csum_xor_rel(struct canfd_frame *cf, struct cgw_csum_xor *xor) { - int from = calc_idx(xor->from_idx, cf->can_dlc); - int to = calc_idx(xor->to_idx, cf->can_dlc); - int res = calc_idx(xor->result_idx, cf->can_dlc); + int from = calc_idx(xor->from_idx, cf->len); + int to = calc_idx(xor->to_idx, cf->len); + int res = calc_idx(xor->result_idx, cf->len); u8 val = xor->init_xor_val; int i; @@ -229,7 +278,7 @@ static void cgw_csum_xor_rel(struct can_frame *cf, struct cgw_csum_xor *xor) cf->data[res] = val; } -static void cgw_csum_xor_pos(struct can_frame *cf, struct cgw_csum_xor *xor) +static void cgw_csum_xor_pos(struct canfd_frame *cf, struct cgw_csum_xor *xor) { u8 val = xor->init_xor_val; int i; @@ -240,7 +289,7 @@ static void cgw_csum_xor_pos(struct can_frame *cf, struct cgw_csum_xor *xor) cf->data[xor->result_idx] = val; } -static void cgw_csum_xor_neg(struct can_frame *cf, struct cgw_csum_xor *xor) +static void cgw_csum_xor_neg(struct canfd_frame *cf, struct cgw_csum_xor *xor) { u8 val = xor->init_xor_val; int i; @@ -251,11 +300,12 @@ static void cgw_csum_xor_neg(struct can_frame *cf, struct cgw_csum_xor *xor) cf->data[xor->result_idx] = val; } -static void cgw_csum_crc8_rel(struct can_frame *cf, struct cgw_csum_crc8 *crc8) +static void cgw_csum_crc8_rel(struct canfd_frame *cf, + struct cgw_csum_crc8 *crc8) { - int from = calc_idx(crc8->from_idx, cf->can_dlc); - int to = calc_idx(crc8->to_idx, cf->can_dlc); - int res = calc_idx(crc8->result_idx, cf->can_dlc); + int from = calc_idx(crc8->from_idx, cf->len); + int to = calc_idx(crc8->to_idx, cf->len); + int res = calc_idx(crc8->result_idx, cf->len); u8 crc = crc8->init_crc_val; int i; @@ -264,96 +314,102 @@ static void cgw_csum_crc8_rel(struct can_frame *cf, struct cgw_csum_crc8 *crc8) if (from <= to) { for (i = crc8->from_idx; i <= crc8->to_idx; i++) - crc = crc8->crctab[crc^cf->data[i]]; + crc = crc8->crctab[crc ^ cf->data[i]]; } else { for (i = crc8->from_idx; i >= crc8->to_idx; i--) - crc = crc8->crctab[crc^cf->data[i]]; + crc = crc8->crctab[crc ^ cf->data[i]]; } switch (crc8->profile) { - case CGW_CRC8PRF_1U8: - crc = crc8->crctab[crc^crc8->profile_data[0]]; + crc = crc8->crctab[crc ^ crc8->profile_data[0]]; break; case CGW_CRC8PRF_16U8: - crc = crc8->crctab[crc^crc8->profile_data[cf->data[1] & 0xF]]; + crc = crc8->crctab[crc ^ crc8->profile_data[cf->data[1] & 0xF]]; break; case CGW_CRC8PRF_SFFID_XOR: - crc = crc8->crctab[crc^(cf->can_id & 0xFF)^ + crc = crc8->crctab[crc ^ (cf->can_id & 0xFF) ^ (cf->can_id >> 8 & 0xFF)]; break; - } - cf->data[crc8->result_idx] = crc^crc8->final_xor_val; + cf->data[crc8->result_idx] = crc ^ crc8->final_xor_val; } -static void cgw_csum_crc8_pos(struct can_frame *cf, struct cgw_csum_crc8 *crc8) +static void cgw_csum_crc8_pos(struct canfd_frame *cf, + struct cgw_csum_crc8 *crc8) { u8 crc = crc8->init_crc_val; int i; for (i = crc8->from_idx; i <= crc8->to_idx; i++) - crc = crc8->crctab[crc^cf->data[i]]; + crc = crc8->crctab[crc ^ cf->data[i]]; switch (crc8->profile) { - case CGW_CRC8PRF_1U8: - crc = crc8->crctab[crc^crc8->profile_data[0]]; + crc = crc8->crctab[crc ^ crc8->profile_data[0]]; break; case CGW_CRC8PRF_16U8: - crc = crc8->crctab[crc^crc8->profile_data[cf->data[1] & 0xF]]; + crc = crc8->crctab[crc ^ crc8->profile_data[cf->data[1] & 0xF]]; break; case CGW_CRC8PRF_SFFID_XOR: - crc = crc8->crctab[crc^(cf->can_id & 0xFF)^ + crc = crc8->crctab[crc ^ (cf->can_id & 0xFF) ^ (cf->can_id >> 8 & 0xFF)]; break; } - cf->data[crc8->result_idx] = crc^crc8->final_xor_val; + cf->data[crc8->result_idx] = crc ^ crc8->final_xor_val; } -static void cgw_csum_crc8_neg(struct can_frame *cf, struct cgw_csum_crc8 *crc8) +static void cgw_csum_crc8_neg(struct canfd_frame *cf, + struct cgw_csum_crc8 *crc8) { u8 crc = crc8->init_crc_val; int i; for (i = crc8->from_idx; i >= crc8->to_idx; i--) - crc = crc8->crctab[crc^cf->data[i]]; + crc = crc8->crctab[crc ^ cf->data[i]]; switch (crc8->profile) { - case CGW_CRC8PRF_1U8: - crc = crc8->crctab[crc^crc8->profile_data[0]]; + crc = crc8->crctab[crc ^ crc8->profile_data[0]]; break; case CGW_CRC8PRF_16U8: - crc = crc8->crctab[crc^crc8->profile_data[cf->data[1] & 0xF]]; + crc = crc8->crctab[crc ^ crc8->profile_data[cf->data[1] & 0xF]]; break; case CGW_CRC8PRF_SFFID_XOR: - crc = crc8->crctab[crc^(cf->can_id & 0xFF)^ + crc = crc8->crctab[crc ^ (cf->can_id & 0xFF) ^ (cf->can_id >> 8 & 0xFF)]; break; } - cf->data[crc8->result_idx] = crc^crc8->final_xor_val; + cf->data[crc8->result_idx] = crc ^ crc8->final_xor_val; } /* the receive & process & send function */ static void can_can_gw_rcv(struct sk_buff *skb, void *data) { struct cgw_job *gwj = (struct cgw_job *)data; - struct can_frame *cf; + struct canfd_frame *cf; struct sk_buff *nskb; int modidx = 0; - /* - * Do not handle CAN frames routed more than 'max_hops' times. + /* process strictly Classic CAN or CAN FD frames */ + if (gwj->flags & CGW_FLAGS_CAN_FD) { + if (skb->len != CANFD_MTU) + return; + } else { + if (skb->len != CAN_MTU) + return; + } + + /* Do not handle CAN frames routed more than 'max_hops' times. * In general we should never catch this delimiter which is intended * to cover a misconfiguration protection (e.g. circular CAN routes). * @@ -384,8 +440,7 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) can_skb_prv(skb)->ifindex == gwj->dst.dev->ifindex) return; - /* - * clone the given skb, which has not been done in can_rcv() + /* clone the given skb, which has not been done in can_rcv() * * When there is at least one modification function activated, * we need to copy the skb as we want to modify skb->data. @@ -410,7 +465,7 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) nskb->dev = gwj->dst.dev; /* pointer to modifiable CAN frame */ - cf = (struct can_frame *)nskb->data; + cf = (struct canfd_frame *)nskb->data; /* perform preprocessed modification functions if there are any */ while (modidx < MAX_MODFUNCTIONS && gwj->mod.modfunc[modidx]) @@ -419,26 +474,22 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) /* Has the CAN frame been modified? */ if (modidx) { /* get available space for the processed CAN frame type */ - int max_len = nskb->len - offsetof(struct can_frame, data); + int max_len = nskb->len - offsetof(struct canfd_frame, data); /* dlc may have changed, make sure it fits to the CAN frame */ - if (cf->can_dlc > max_len) - goto out_delete; - - /* check for checksum updates in classic CAN length only */ - if (gwj->mod.csumfunc.crc8) { - if (cf->can_dlc > 8) - goto out_delete; - - (*gwj->mod.csumfunc.crc8)(cf, &gwj->mod.csum.crc8); + if (cf->len > max_len) { + /* delete frame due to misconfiguration */ + gwj->deleted_frames++; + kfree_skb(nskb); + return; } - if (gwj->mod.csumfunc.xor) { - if (cf->can_dlc > 8) - goto out_delete; + /* check for checksum updates */ + if (gwj->mod.csumfunc.crc8) + (*gwj->mod.csumfunc.crc8)(cf, &gwj->mod.csum.crc8); + if (gwj->mod.csumfunc.xor) (*gwj->mod.csumfunc.xor)(cf, &gwj->mod.csum.xor); - } } /* clear the skb timestamp if not configured the other way */ @@ -450,14 +501,6 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) gwj->dropped_frames++; else gwj->handled_frames++; - - return; - - out_delete: - /* delete frame due to misconfiguration */ - gwj->deleted_frames++; - kfree_skb(nskb); - return; } static inline int cgw_register_filter(struct net *net, struct cgw_job *gwj) @@ -483,14 +526,12 @@ static int cgw_notifier(struct notifier_block *nb, return NOTIFY_DONE; if (msg == NETDEV_UNREGISTER) { - struct cgw_job *gwj = NULL; struct hlist_node *nx; ASSERT_RTNL(); hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) { - if (gwj->src.dev == dev || gwj->dst.dev == dev) { hlist_del(&gwj->list); cgw_unregister_filter(net, gwj); @@ -505,7 +546,6 @@ static int cgw_notifier(struct notifier_block *nb, static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type, u32 pid, u32 seq, int flags) { - struct cgw_frame_mod mb; struct rtcanmsg *rtcan; struct nlmsghdr *nlh; @@ -542,32 +582,66 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type, goto cancel; } - if (gwj->mod.modtype.and) { - memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.and; - if (nla_put(skb, CGW_MOD_AND, sizeof(mb), &mb) < 0) - goto cancel; - } + if (gwj->flags & CGW_FLAGS_CAN_FD) { + struct cgw_fdframe_mod mb; - if (gwj->mod.modtype.or) { - memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.or; - if (nla_put(skb, CGW_MOD_OR, sizeof(mb), &mb) < 0) - goto cancel; - } + if (gwj->mod.modtype.and) { + memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf)); + mb.modtype = gwj->mod.modtype.and; + if (nla_put(skb, CGW_FDMOD_AND, sizeof(mb), &mb) < 0) + goto cancel; + } - if (gwj->mod.modtype.xor) { - memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.xor; - if (nla_put(skb, CGW_MOD_XOR, sizeof(mb), &mb) < 0) - goto cancel; - } + if (gwj->mod.modtype.or) { + memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf)); + mb.modtype = gwj->mod.modtype.or; + if (nla_put(skb, CGW_FDMOD_OR, sizeof(mb), &mb) < 0) + goto cancel; + } - if (gwj->mod.modtype.set) { - memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.set; - if (nla_put(skb, CGW_MOD_SET, sizeof(mb), &mb) < 0) - goto cancel; + if (gwj->mod.modtype.xor) { + memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf)); + mb.modtype = gwj->mod.modtype.xor; + if (nla_put(skb, CGW_FDMOD_XOR, sizeof(mb), &mb) < 0) + goto cancel; + } + + if (gwj->mod.modtype.set) { + memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf)); + mb.modtype = gwj->mod.modtype.set; + if (nla_put(skb, CGW_FDMOD_SET, sizeof(mb), &mb) < 0) + goto cancel; + } + } else { + struct cgw_frame_mod mb; + + if (gwj->mod.modtype.and) { + memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf)); + mb.modtype = gwj->mod.modtype.and; + if (nla_put(skb, CGW_MOD_AND, sizeof(mb), &mb) < 0) + goto cancel; + } + + if (gwj->mod.modtype.or) { + memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf)); + mb.modtype = gwj->mod.modtype.or; + if (nla_put(skb, CGW_MOD_OR, sizeof(mb), &mb) < 0) + goto cancel; + } + + if (gwj->mod.modtype.xor) { + memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf)); + mb.modtype = gwj->mod.modtype.xor; + if (nla_put(skb, CGW_MOD_XOR, sizeof(mb), &mb) < 0) + goto cancel; + } + + if (gwj->mod.modtype.set) { + memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf)); + mb.modtype = gwj->mod.modtype.set; + if (nla_put(skb, CGW_MOD_SET, sizeof(mb), &mb) < 0) + goto cancel; + } } if (gwj->mod.uid) { @@ -588,7 +662,6 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type, } if (gwj->gwtype == CGW_TYPE_CAN_CAN) { - if (gwj->ccgw.filter.can_id || gwj->ccgw.filter.can_mask) { if (nla_put(skb, CGW_FILTER, sizeof(struct can_filter), &gwj->ccgw.filter) < 0) @@ -623,8 +696,9 @@ static int cgw_dump_jobs(struct sk_buff *skb, struct netlink_callback *cb) if (idx < s_idx) goto cont; - if (cgw_put_job(skb, gwj, RTM_NEWROUTE, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI) < 0) + if (cgw_put_job(skb, gwj, RTM_NEWROUTE, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI) < 0) break; cont: idx++; @@ -636,7 +710,7 @@ cont: return skb->len; } -static const struct nla_policy cgw_policy[CGW_MAX+1] = { +static const struct nla_policy cgw_policy[CGW_MAX + 1] = { [CGW_MOD_AND] = { .len = sizeof(struct cgw_frame_mod) }, [CGW_MOD_OR] = { .len = sizeof(struct cgw_frame_mod) }, [CGW_MOD_XOR] = { .len = sizeof(struct cgw_frame_mod) }, @@ -648,14 +722,18 @@ static const struct nla_policy cgw_policy[CGW_MAX+1] = { [CGW_FILTER] = { .len = sizeof(struct can_filter) }, [CGW_LIM_HOPS] = { .type = NLA_U8 }, [CGW_MOD_UID] = { .type = NLA_U32 }, + [CGW_FDMOD_AND] = { .len = sizeof(struct cgw_fdframe_mod) }, + [CGW_FDMOD_OR] = { .len = sizeof(struct cgw_fdframe_mod) }, + [CGW_FDMOD_XOR] = { .len = sizeof(struct cgw_fdframe_mod) }, + [CGW_FDMOD_SET] = { .len = sizeof(struct cgw_fdframe_mod) }, }; /* check for common and gwtype specific attributes */ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod, u8 gwtype, void *gwtypeattr, u8 *limhops) { - struct nlattr *tb[CGW_MAX+1]; - struct cgw_frame_mod mb; + struct nlattr *tb[CGW_MAX + 1]; + struct rtcanmsg *r = nlmsg_data(nlh); int modidx = 0; int err = 0; @@ -675,87 +753,166 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod, } /* check for AND/OR/XOR/SET modifications */ + if (r->flags & CGW_FLAGS_CAN_FD) { + struct cgw_fdframe_mod mb; - if (tb[CGW_MOD_AND]) { - nla_memcpy(&mb, tb[CGW_MOD_AND], CGW_MODATTR_LEN); + if (tb[CGW_FDMOD_AND]) { + nla_memcpy(&mb, tb[CGW_FDMOD_AND], CGW_FDMODATTR_LEN); - canframecpy(&mod->modframe.and, &mb.cf); - mod->modtype.and = mb.modtype; + canfdframecpy(&mod->modframe.and, &mb.cf); + mod->modtype.and = mb.modtype; - if (mb.modtype & CGW_MOD_ID) - mod->modfunc[modidx++] = mod_and_id; + if (mb.modtype & CGW_MOD_ID) + mod->modfunc[modidx++] = mod_and_id; - if (mb.modtype & CGW_MOD_DLC) - mod->modfunc[modidx++] = mod_and_dlc; + if (mb.modtype & CGW_MOD_LEN) + mod->modfunc[modidx++] = mod_and_len; - if (mb.modtype & CGW_MOD_DATA) - mod->modfunc[modidx++] = mod_and_data; - } + if (mb.modtype & CGW_MOD_FLAGS) + mod->modfunc[modidx++] = mod_and_flags; + + if (mb.modtype & CGW_MOD_DATA) + mod->modfunc[modidx++] = mod_and_fddata; + } - if (tb[CGW_MOD_OR]) { - nla_memcpy(&mb, tb[CGW_MOD_OR], CGW_MODATTR_LEN); + if (tb[CGW_FDMOD_OR]) { + nla_memcpy(&mb, tb[CGW_FDMOD_OR], CGW_FDMODATTR_LEN); - canframecpy(&mod->modframe.or, &mb.cf); - mod->modtype.or = mb.modtype; + canfdframecpy(&mod->modframe.or, &mb.cf); + mod->modtype.or = mb.modtype; - if (mb.modtype & CGW_MOD_ID) - mod->modfunc[modidx++] = mod_or_id; + if (mb.modtype & CGW_MOD_ID) + mod->modfunc[modidx++] = mod_or_id; - if (mb.modtype & CGW_MOD_DLC) - mod->modfunc[modidx++] = mod_or_dlc; + if (mb.modtype & CGW_MOD_LEN) + mod->modfunc[modidx++] = mod_or_len; - if (mb.modtype & CGW_MOD_DATA) - mod->modfunc[modidx++] = mod_or_data; - } + if (mb.modtype & CGW_MOD_FLAGS) + mod->modfunc[modidx++] = mod_or_flags; - if (tb[CGW_MOD_XOR]) { - nla_memcpy(&mb, tb[CGW_MOD_XOR], CGW_MODATTR_LEN); + if (mb.modtype & CGW_MOD_DATA) + mod->modfunc[modidx++] = mod_or_fddata; + } - canframecpy(&mod->modframe.xor, &mb.cf); - mod->modtype.xor = mb.modtype; + if (tb[CGW_FDMOD_XOR]) { + nla_memcpy(&mb, tb[CGW_FDMOD_XOR], CGW_FDMODATTR_LEN); - if (mb.modtype & CGW_MOD_ID) - mod->modfunc[modidx++] = mod_xor_id; + canfdframecpy(&mod->modframe.xor, &mb.cf); + mod->modtype.xor = mb.modtype; - if (mb.modtype & CGW_MOD_DLC) - mod->modfunc[modidx++] = mod_xor_dlc; + if (mb.modtype & CGW_MOD_ID) + mod->modfunc[modidx++] = mod_xor_id; - if (mb.modtype & CGW_MOD_DATA) - mod->modfunc[modidx++] = mod_xor_data; - } + if (mb.modtype & CGW_MOD_LEN) + mod->modfunc[modidx++] = mod_xor_len; - if (tb[CGW_MOD_SET]) { - nla_memcpy(&mb, tb[CGW_MOD_SET], CGW_MODATTR_LEN); + if (mb.modtype & CGW_MOD_FLAGS) + mod->modfunc[modidx++] = mod_xor_flags; + + if (mb.modtype & CGW_MOD_DATA) + mod->modfunc[modidx++] = mod_xor_fddata; + } - canframecpy(&mod->modframe.set, &mb.cf); - mod->modtype.set = mb.modtype; + if (tb[CGW_FDMOD_SET]) { + nla_memcpy(&mb, tb[CGW_FDMOD_SET], CGW_FDMODATTR_LEN); + + canfdframecpy(&mod->modframe.set, &mb.cf); + mod->modtype.set = mb.modtype; + + if (mb.modtype & CGW_MOD_ID) + mod->modfunc[modidx++] = mod_set_id; + + if (mb.modtype & CGW_MOD_LEN) + mod->modfunc[modidx++] = mod_set_len; + + if (mb.modtype & CGW_MOD_FLAGS) + mod->modfunc[modidx++] = mod_set_flags; + + if (mb.modtype & CGW_MOD_DATA) + mod->modfunc[modidx++] = mod_set_fddata; + } + } else { + struct cgw_frame_mod mb; - if (mb.modtype & CGW_MOD_ID) - mod->modfunc[modidx++] = mod_set_id; + if (tb[CGW_MOD_AND]) { + nla_memcpy(&mb, tb[CGW_MOD_AND], CGW_MODATTR_LEN); - if (mb.modtype & CGW_MOD_DLC) - mod->modfunc[modidx++] = mod_set_dlc; + canframecpy(&mod->modframe.and, &mb.cf); + mod->modtype.and = mb.modtype; - if (mb.modtype & CGW_MOD_DATA) - mod->modfunc[modidx++] = mod_set_data; + if (mb.modtype & CGW_MOD_ID) + mod->modfunc[modidx++] = mod_and_id; + + if (mb.modtype & CGW_MOD_LEN) + mod->modfunc[modidx++] = mod_and_len; + + if (mb.modtype & CGW_MOD_DATA) + mod->modfunc[modidx++] = mod_and_data; + } + + if (tb[CGW_MOD_OR]) { + nla_memcpy(&mb, tb[CGW_MOD_OR], CGW_MODATTR_LEN); + + canframecpy(&mod->modframe.or, &mb.cf); + mod->modtype.or = mb.modtype; + + if (mb.modtype & CGW_MOD_ID) + mod->modfunc[modidx++] = mod_or_id; + + if (mb.modtype & CGW_MOD_LEN) + mod->modfunc[modidx++] = mod_or_len; + + if (mb.modtype & CGW_MOD_DATA) + mod->modfunc[modidx++] = mod_or_data; + } + + if (tb[CGW_MOD_XOR]) { + nla_memcpy(&mb, tb[CGW_MOD_XOR], CGW_MODATTR_LEN); + + canframecpy(&mod->modframe.xor, &mb.cf); + mod->modtype.xor = mb.modtype; + + if (mb.modtype & CGW_MOD_ID) + mod->modfunc[modidx++] = mod_xor_id; + + if (mb.modtype & CGW_MOD_LEN) + mod->modfunc[modidx++] = mod_xor_len; + + if (mb.modtype & CGW_MOD_DATA) + mod->modfunc[modidx++] = mod_xor_data; + } + + if (tb[CGW_MOD_SET]) { + nla_memcpy(&mb, tb[CGW_MOD_SET], CGW_MODATTR_LEN); + + canframecpy(&mod->modframe.set, &mb.cf); + mod->modtype.set = mb.modtype; + + if (mb.modtype & CGW_MOD_ID) + mod->modfunc[modidx++] = mod_set_id; + + if (mb.modtype & CGW_MOD_LEN) + mod->modfunc[modidx++] = mod_set_len; + + if (mb.modtype & CGW_MOD_DATA) + mod->modfunc[modidx++] = mod_set_data; + } } /* check for checksum operations after CAN frame modifications */ if (modidx) { - if (tb[CGW_CS_CRC8]) { struct cgw_csum_crc8 *c = nla_data(tb[CGW_CS_CRC8]); err = cgw_chk_csum_parms(c->from_idx, c->to_idx, - c->result_idx); + c->result_idx, r); if (err) return err; nla_memcpy(&mod->csum.crc8, tb[CGW_CS_CRC8], CGW_CS_CRC8_LEN); - /* - * select dedicated processing function to reduce + /* select dedicated processing function to reduce * runtime operations in receive hot path. */ if (c->from_idx < 0 || c->to_idx < 0 || @@ -771,15 +928,14 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod, struct cgw_csum_xor *c = nla_data(tb[CGW_CS_XOR]); err = cgw_chk_csum_parms(c->from_idx, c->to_idx, - c->result_idx); + c->result_idx, r); if (err) return err; nla_memcpy(&mod->csum.xor, tb[CGW_CS_XOR], CGW_CS_XOR_LEN); - /* - * select dedicated processing function to reduce + /* select dedicated processing function to reduce * runtime operations in receive hot path. */ if (c->from_idx < 0 || c->to_idx < 0 || @@ -791,16 +947,14 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod, mod->csumfunc.xor = cgw_csum_xor_neg; } - if (tb[CGW_MOD_UID]) { + if (tb[CGW_MOD_UID]) nla_memcpy(&mod->uid, tb[CGW_MOD_UID], sizeof(u32)); - } } if (gwtype == CGW_TYPE_CAN_CAN) { - /* check CGW_TYPE_CAN_CAN specific attributes */ - struct can_can_gw *ccgw = (struct can_can_gw *)gwtypeattr; + memset(ccgw, 0, sizeof(*ccgw)); /* check for can_filter in attributes */ @@ -861,12 +1015,10 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, return err; if (mod.uid) { - ASSERT_RTNL(); /* check for updating an existing job with identical uid */ hlist_for_each_entry(gwj, &net->can.cgw_list, list) { - if (gwj->mod.uid != mod.uid) continue; @@ -987,7 +1139,6 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, /* remove only the first matching entry */ hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) { - if (gwj->flags != r->flags) continue; diff --git a/net/can/j1939/Kconfig b/net/can/j1939/Kconfig new file mode 100644 index 000000000000..2998298b71ec --- /dev/null +++ b/net/can/j1939/Kconfig @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# SAE J1939 network layer core configuration +# + +config CAN_J1939 + tristate "SAE J1939" + depends on CAN + help + SAE J1939 + Say Y to have in-kernel support for j1939 socket type. This + allows communication according to SAE j1939. + The relevant parts in kernel are + SAE j1939-21 (datalink & transport protocol) + & SAE j1939-81 (network management). diff --git a/net/can/j1939/Makefile b/net/can/j1939/Makefile new file mode 100644 index 000000000000..19181bdae173 --- /dev/null +++ b/net/can/j1939/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_CAN_J1939) += can-j1939.o + +can-j1939-objs := \ + address-claim.o \ + bus.o \ + main.o \ + socket.o \ + transport.o diff --git a/net/can/j1939/address-claim.c b/net/can/j1939/address-claim.c new file mode 100644 index 000000000000..f33c47327927 --- /dev/null +++ b/net/can/j1939/address-claim.c @@ -0,0 +1,230 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2010-2011 EIA Electronics, +// Kurt Van Dijck <kurt.van.dijck@eia.be> +// Copyright (c) 2010-2011 EIA Electronics, +// Pieter Beyens <pieter.beyens@eia.be> +// Copyright (c) 2017-2019 Pengutronix, +// Marc Kleine-Budde <kernel@pengutronix.de> +// Copyright (c) 2017-2019 Pengutronix, +// Oleksij Rempel <kernel@pengutronix.de> + +/* J1939 Address Claiming. + * Address Claiming in the kernel + * - keeps track of the AC states of ECU's, + * - resolves NAME<=>SA taking into account the AC states of ECU's. + * + * All Address Claim msgs (including host-originated msg) are processed + * at the receive path (a sent msg is always received again via CAN echo). + * As such, the processing of AC msgs is done in the order on which msgs + * are sent on the bus. + * + * This module doesn't send msgs itself (e.g. replies on Address Claims), + * this is the responsibility of a user space application or daemon. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/netdevice.h> +#include <linux/skbuff.h> + +#include "j1939-priv.h" + +static inline name_t j1939_skb_to_name(const struct sk_buff *skb) +{ + return le64_to_cpup((__le64 *)skb->data); +} + +static inline bool j1939_ac_msg_is_request(struct sk_buff *skb) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + int req_pgn; + + if (skb->len < 3 || skcb->addr.pgn != J1939_PGN_REQUEST) + return false; + + req_pgn = skb->data[0] | (skb->data[1] << 8) | (skb->data[2] << 16); + + return req_pgn == J1939_PGN_ADDRESS_CLAIMED; +} + +static int j1939_ac_verify_outgoing(struct j1939_priv *priv, + struct sk_buff *skb) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + + if (skb->len != 8) { + netdev_notice(priv->ndev, "tx address claim with dlc %i\n", + skb->len); + return -EPROTO; + } + + if (skcb->addr.src_name != j1939_skb_to_name(skb)) { + netdev_notice(priv->ndev, "tx address claim with different name\n"); + return -EPROTO; + } + + if (skcb->addr.sa == J1939_NO_ADDR) { + netdev_notice(priv->ndev, "tx address claim with broadcast sa\n"); + return -EPROTO; + } + + /* ac must always be a broadcast */ + if (skcb->addr.dst_name || skcb->addr.da != J1939_NO_ADDR) { + netdev_notice(priv->ndev, "tx address claim with dest, not broadcast\n"); + return -EPROTO; + } + return 0; +} + +int j1939_ac_fixup(struct j1939_priv *priv, struct sk_buff *skb) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + int ret; + u8 addr; + + /* network mgmt: address claiming msgs */ + if (skcb->addr.pgn == J1939_PGN_ADDRESS_CLAIMED) { + struct j1939_ecu *ecu; + + ret = j1939_ac_verify_outgoing(priv, skb); + /* return both when failure & when successful */ + if (ret < 0) + return ret; + ecu = j1939_ecu_get_by_name(priv, skcb->addr.src_name); + if (!ecu) + return -ENODEV; + + if (ecu->addr != skcb->addr.sa) + /* hold further traffic for ecu, remove from parent */ + j1939_ecu_unmap(ecu); + j1939_ecu_put(ecu); + } else if (skcb->addr.src_name) { + /* assign source address */ + addr = j1939_name_to_addr(priv, skcb->addr.src_name); + if (!j1939_address_is_unicast(addr) && + !j1939_ac_msg_is_request(skb)) { + netdev_notice(priv->ndev, "tx drop: invalid sa for name 0x%016llx\n", + skcb->addr.src_name); + return -EADDRNOTAVAIL; + } + skcb->addr.sa = addr; + } + + /* assign destination address */ + if (skcb->addr.dst_name) { + addr = j1939_name_to_addr(priv, skcb->addr.dst_name); + if (!j1939_address_is_unicast(addr)) { + netdev_notice(priv->ndev, "tx drop: invalid da for name 0x%016llx\n", + skcb->addr.dst_name); + return -EADDRNOTAVAIL; + } + skcb->addr.da = addr; + } + return 0; +} + +static void j1939_ac_process(struct j1939_priv *priv, struct sk_buff *skb) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + struct j1939_ecu *ecu, *prev; + name_t name; + + if (skb->len != 8) { + netdev_notice(priv->ndev, "rx address claim with wrong dlc %i\n", + skb->len); + return; + } + + name = j1939_skb_to_name(skb); + skcb->addr.src_name = name; + if (!name) { + netdev_notice(priv->ndev, "rx address claim without name\n"); + return; + } + + if (!j1939_address_is_valid(skcb->addr.sa)) { + netdev_notice(priv->ndev, "rx address claim with broadcast sa\n"); + return; + } + + write_lock_bh(&priv->lock); + + /* Few words on the ECU ref counting: + * + * First we get an ECU handle, either with + * j1939_ecu_get_by_name_locked() (increments the ref counter) + * or j1939_ecu_create_locked() (initializes an ECU object + * with a ref counter of 1). + * + * j1939_ecu_unmap_locked() will decrement the ref counter, + * but only if the ECU was mapped before. So "ecu" still + * belongs to us. + * + * j1939_ecu_timer_start() will increment the ref counter + * before it starts the timer, so we can put the ecu when + * leaving this function. + */ + ecu = j1939_ecu_get_by_name_locked(priv, name); + if (!ecu && j1939_address_is_unicast(skcb->addr.sa)) + ecu = j1939_ecu_create_locked(priv, name); + + if (IS_ERR_OR_NULL(ecu)) + goto out_unlock_bh; + + /* cancel pending (previous) address claim */ + j1939_ecu_timer_cancel(ecu); + + if (j1939_address_is_idle(skcb->addr.sa)) { + j1939_ecu_unmap_locked(ecu); + goto out_ecu_put; + } + + /* save new addr */ + if (ecu->addr != skcb->addr.sa) + j1939_ecu_unmap_locked(ecu); + ecu->addr = skcb->addr.sa; + + prev = j1939_ecu_get_by_addr_locked(priv, skcb->addr.sa); + if (prev) { + if (ecu->name > prev->name) { + j1939_ecu_unmap_locked(ecu); + j1939_ecu_put(prev); + goto out_ecu_put; + } else { + /* kick prev if less or equal */ + j1939_ecu_unmap_locked(prev); + j1939_ecu_put(prev); + } + } + + j1939_ecu_timer_start(ecu); + out_ecu_put: + j1939_ecu_put(ecu); + out_unlock_bh: + write_unlock_bh(&priv->lock); +} + +void j1939_ac_recv(struct j1939_priv *priv, struct sk_buff *skb) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + struct j1939_ecu *ecu; + + /* network mgmt */ + if (skcb->addr.pgn == J1939_PGN_ADDRESS_CLAIMED) { + j1939_ac_process(priv, skb); + } else if (j1939_address_is_unicast(skcb->addr.sa)) { + /* assign source name */ + ecu = j1939_ecu_get_by_addr(priv, skcb->addr.sa); + if (ecu) { + skcb->addr.src_name = ecu->name; + j1939_ecu_put(ecu); + } + } + + /* assign destination name */ + ecu = j1939_ecu_get_by_addr(priv, skcb->addr.da); + if (ecu) { + skcb->addr.dst_name = ecu->name; + j1939_ecu_put(ecu); + } +} diff --git a/net/can/j1939/bus.c b/net/can/j1939/bus.c new file mode 100644 index 000000000000..486687901602 --- /dev/null +++ b/net/can/j1939/bus.c @@ -0,0 +1,333 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2010-2011 EIA Electronics, +// Kurt Van Dijck <kurt.van.dijck@eia.be> +// Copyright (c) 2017-2019 Pengutronix, +// Marc Kleine-Budde <kernel@pengutronix.de> +// Copyright (c) 2017-2019 Pengutronix, +// Oleksij Rempel <kernel@pengutronix.de> + +/* bus for j1939 remote devices + * Since rtnetlink, no real bus is used. + */ + +#include <net/sock.h> + +#include "j1939-priv.h" + +static void __j1939_ecu_release(struct kref *kref) +{ + struct j1939_ecu *ecu = container_of(kref, struct j1939_ecu, kref); + struct j1939_priv *priv = ecu->priv; + + list_del(&ecu->list); + kfree(ecu); + j1939_priv_put(priv); +} + +void j1939_ecu_put(struct j1939_ecu *ecu) +{ + kref_put(&ecu->kref, __j1939_ecu_release); +} + +static void j1939_ecu_get(struct j1939_ecu *ecu) +{ + kref_get(&ecu->kref); +} + +static bool j1939_ecu_is_mapped_locked(struct j1939_ecu *ecu) +{ + struct j1939_priv *priv = ecu->priv; + + lockdep_assert_held(&priv->lock); + + return j1939_ecu_find_by_addr_locked(priv, ecu->addr) == ecu; +} + +/* ECU device interface */ +/* map ECU to a bus address space */ +static void j1939_ecu_map_locked(struct j1939_ecu *ecu) +{ + struct j1939_priv *priv = ecu->priv; + struct j1939_addr_ent *ent; + + lockdep_assert_held(&priv->lock); + + if (!j1939_address_is_unicast(ecu->addr)) + return; + + ent = &priv->ents[ecu->addr]; + + if (ent->ecu) { + netdev_warn(priv->ndev, "Trying to map already mapped ECU, addr: 0x%02x, name: 0x%016llx. Skip it.\n", + ecu->addr, ecu->name); + return; + } + + j1939_ecu_get(ecu); + ent->ecu = ecu; + ent->nusers += ecu->nusers; +} + +/* unmap ECU from a bus address space */ +void j1939_ecu_unmap_locked(struct j1939_ecu *ecu) +{ + struct j1939_priv *priv = ecu->priv; + struct j1939_addr_ent *ent; + + lockdep_assert_held(&priv->lock); + + if (!j1939_address_is_unicast(ecu->addr)) + return; + + if (!j1939_ecu_is_mapped_locked(ecu)) + return; + + ent = &priv->ents[ecu->addr]; + ent->ecu = NULL; + ent->nusers -= ecu->nusers; + j1939_ecu_put(ecu); +} + +void j1939_ecu_unmap(struct j1939_ecu *ecu) +{ + write_lock_bh(&ecu->priv->lock); + j1939_ecu_unmap_locked(ecu); + write_unlock_bh(&ecu->priv->lock); +} + +void j1939_ecu_unmap_all(struct j1939_priv *priv) +{ + int i; + + write_lock_bh(&priv->lock); + for (i = 0; i < ARRAY_SIZE(priv->ents); i++) + if (priv->ents[i].ecu) + j1939_ecu_unmap_locked(priv->ents[i].ecu); + write_unlock_bh(&priv->lock); +} + +void j1939_ecu_timer_start(struct j1939_ecu *ecu) +{ + /* The ECU is held here and released in the + * j1939_ecu_timer_handler() or j1939_ecu_timer_cancel(). + */ + j1939_ecu_get(ecu); + + /* Schedule timer in 250 msec to commit address change. */ + hrtimer_start(&ecu->ac_timer, ms_to_ktime(250), + HRTIMER_MODE_REL_SOFT); +} + +void j1939_ecu_timer_cancel(struct j1939_ecu *ecu) +{ + if (hrtimer_cancel(&ecu->ac_timer)) + j1939_ecu_put(ecu); +} + +static enum hrtimer_restart j1939_ecu_timer_handler(struct hrtimer *hrtimer) +{ + struct j1939_ecu *ecu = + container_of(hrtimer, struct j1939_ecu, ac_timer); + struct j1939_priv *priv = ecu->priv; + + write_lock_bh(&priv->lock); + /* TODO: can we test if ecu->addr is unicast before starting + * the timer? + */ + j1939_ecu_map_locked(ecu); + + /* The corresponding j1939_ecu_get() is in + * j1939_ecu_timer_start(). + */ + j1939_ecu_put(ecu); + write_unlock_bh(&priv->lock); + + return HRTIMER_NORESTART; +} + +struct j1939_ecu *j1939_ecu_create_locked(struct j1939_priv *priv, name_t name) +{ + struct j1939_ecu *ecu; + + lockdep_assert_held(&priv->lock); + + ecu = kzalloc(sizeof(*ecu), gfp_any()); + if (!ecu) + return ERR_PTR(-ENOMEM); + kref_init(&ecu->kref); + ecu->addr = J1939_IDLE_ADDR; + ecu->name = name; + + hrtimer_init(&ecu->ac_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); + ecu->ac_timer.function = j1939_ecu_timer_handler; + INIT_LIST_HEAD(&ecu->list); + + j1939_priv_get(priv); + ecu->priv = priv; + list_add_tail(&ecu->list, &priv->ecus); + + return ecu; +} + +struct j1939_ecu *j1939_ecu_find_by_addr_locked(struct j1939_priv *priv, + u8 addr) +{ + lockdep_assert_held(&priv->lock); + + return priv->ents[addr].ecu; +} + +struct j1939_ecu *j1939_ecu_get_by_addr_locked(struct j1939_priv *priv, u8 addr) +{ + struct j1939_ecu *ecu; + + lockdep_assert_held(&priv->lock); + + if (!j1939_address_is_unicast(addr)) + return NULL; + + ecu = j1939_ecu_find_by_addr_locked(priv, addr); + if (ecu) + j1939_ecu_get(ecu); + + return ecu; +} + +struct j1939_ecu *j1939_ecu_get_by_addr(struct j1939_priv *priv, u8 addr) +{ + struct j1939_ecu *ecu; + + read_lock_bh(&priv->lock); + ecu = j1939_ecu_get_by_addr_locked(priv, addr); + read_unlock_bh(&priv->lock); + + return ecu; +} + +/* get pointer to ecu without increasing ref counter */ +static struct j1939_ecu *j1939_ecu_find_by_name_locked(struct j1939_priv *priv, + name_t name) +{ + struct j1939_ecu *ecu; + + lockdep_assert_held(&priv->lock); + + list_for_each_entry(ecu, &priv->ecus, list) { + if (ecu->name == name) + return ecu; + } + + return NULL; +} + +struct j1939_ecu *j1939_ecu_get_by_name_locked(struct j1939_priv *priv, + name_t name) +{ + struct j1939_ecu *ecu; + + lockdep_assert_held(&priv->lock); + + if (!name) + return NULL; + + ecu = j1939_ecu_find_by_name_locked(priv, name); + if (ecu) + j1939_ecu_get(ecu); + + return ecu; +} + +struct j1939_ecu *j1939_ecu_get_by_name(struct j1939_priv *priv, name_t name) +{ + struct j1939_ecu *ecu; + + read_lock_bh(&priv->lock); + ecu = j1939_ecu_get_by_name_locked(priv, name); + read_unlock_bh(&priv->lock); + + return ecu; +} + +u8 j1939_name_to_addr(struct j1939_priv *priv, name_t name) +{ + struct j1939_ecu *ecu; + int addr = J1939_IDLE_ADDR; + + if (!name) + return J1939_NO_ADDR; + + read_lock_bh(&priv->lock); + ecu = j1939_ecu_find_by_name_locked(priv, name); + if (ecu && j1939_ecu_is_mapped_locked(ecu)) + /* ecu's SA is registered */ + addr = ecu->addr; + + read_unlock_bh(&priv->lock); + + return addr; +} + +/* TX addr/name accounting + * Transport protocol needs to know if a SA is local or not + * These functions originate from userspace manipulating sockets, + * so locking is straigforward + */ + +int j1939_local_ecu_get(struct j1939_priv *priv, name_t name, u8 sa) +{ + struct j1939_ecu *ecu; + int err = 0; + + write_lock_bh(&priv->lock); + + if (j1939_address_is_unicast(sa)) + priv->ents[sa].nusers++; + + if (!name) + goto done; + + ecu = j1939_ecu_get_by_name_locked(priv, name); + if (!ecu) + ecu = j1939_ecu_create_locked(priv, name); + err = PTR_ERR_OR_ZERO(ecu); + if (err) + goto done; + + ecu->nusers++; + /* TODO: do we care if ecu->addr != sa? */ + if (j1939_ecu_is_mapped_locked(ecu)) + /* ecu's sa is active already */ + priv->ents[ecu->addr].nusers++; + + done: + write_unlock_bh(&priv->lock); + + return err; +} + +void j1939_local_ecu_put(struct j1939_priv *priv, name_t name, u8 sa) +{ + struct j1939_ecu *ecu; + + write_lock_bh(&priv->lock); + + if (j1939_address_is_unicast(sa)) + priv->ents[sa].nusers--; + + if (!name) + goto done; + + ecu = j1939_ecu_find_by_name_locked(priv, name); + if (WARN_ON_ONCE(!ecu)) + goto done; + + ecu->nusers--; + /* TODO: do we care if ecu->addr != sa? */ + if (j1939_ecu_is_mapped_locked(ecu)) + /* ecu's sa is active already */ + priv->ents[ecu->addr].nusers--; + j1939_ecu_put(ecu); + + done: + write_unlock_bh(&priv->lock); +} diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h new file mode 100644 index 000000000000..12369b604ce9 --- /dev/null +++ b/net/can/j1939/j1939-priv.h @@ -0,0 +1,338 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +// Copyright (c) 2010-2011 EIA Electronics, +// Kurt Van Dijck <kurt.van.dijck@eia.be> +// Copyright (c) 2017-2019 Pengutronix, +// Marc Kleine-Budde <kernel@pengutronix.de> +// Copyright (c) 2017-2019 Pengutronix, +// Oleksij Rempel <kernel@pengutronix.de> + +#ifndef _J1939_PRIV_H_ +#define _J1939_PRIV_H_ + +#include <linux/can/j1939.h> +#include <net/sock.h> + +/* Timeout to receive the abort signal over loop back. In case CAN + * bus is open, the timeout should be triggered. + */ +#define J1939_XTP_ABORT_TIMEOUT_MS 500 +#define J1939_SIMPLE_ECHO_TIMEOUT_MS (10 * 1000) + +struct j1939_session; +enum j1939_sk_errqueue_type { + J1939_ERRQUEUE_ACK, + J1939_ERRQUEUE_SCHED, + J1939_ERRQUEUE_ABORT, +}; + +/* j1939 devices */ +struct j1939_ecu { + struct list_head list; + name_t name; + u8 addr; + + /* indicates that this ecu successfully claimed @sa as its address */ + struct hrtimer ac_timer; + struct kref kref; + struct j1939_priv *priv; + + /* count users, to help transport protocol decide for interaction */ + int nusers; +}; + +struct j1939_priv { + struct list_head ecus; + /* local list entry in priv + * These allow irq (& softirq) context lookups on j1939 devices + * This approach (separate lists) is done as the other 2 alternatives + * are not easier or even wrong + * 1) using the pure kobject methods involves mutexes, which are not + * allowed in irq context. + * 2) duplicating data structures would require a lot of synchronization + * code + * usage: + */ + + /* segments need a lock to protect the above list */ + rwlock_t lock; + + struct net_device *ndev; + + /* list of 256 ecu ptrs, that cache the claimed addresses. + * also protected by the above lock + */ + struct j1939_addr_ent { + struct j1939_ecu *ecu; + /* count users, to help transport protocol */ + int nusers; + } ents[256]; + + struct kref kref; + + /* List of active sessions to prevent start of conflicting + * one. + * + * Do not start two sessions of same type, addresses and + * direction. + */ + struct list_head active_session_list; + + /* protects active_session_list */ + spinlock_t active_session_list_lock; + + unsigned int tp_max_packet_size; + + /* lock for j1939_socks list */ + spinlock_t j1939_socks_lock; + struct list_head j1939_socks; + + struct kref rx_kref; +}; + +void j1939_ecu_put(struct j1939_ecu *ecu); + +/* keep the cache of what is local */ +int j1939_local_ecu_get(struct j1939_priv *priv, name_t name, u8 sa); +void j1939_local_ecu_put(struct j1939_priv *priv, name_t name, u8 sa); + +static inline bool j1939_address_is_unicast(u8 addr) +{ + return addr <= J1939_MAX_UNICAST_ADDR; +} + +static inline bool j1939_address_is_idle(u8 addr) +{ + return addr == J1939_IDLE_ADDR; +} + +static inline bool j1939_address_is_valid(u8 addr) +{ + return addr != J1939_NO_ADDR; +} + +static inline bool j1939_pgn_is_pdu1(pgn_t pgn) +{ + /* ignore dp & res bits for this */ + return (pgn & 0xff00) < 0xf000; +} + +/* utility to correctly unmap an ECU */ +void j1939_ecu_unmap_locked(struct j1939_ecu *ecu); +void j1939_ecu_unmap(struct j1939_ecu *ecu); + +u8 j1939_name_to_addr(struct j1939_priv *priv, name_t name); +struct j1939_ecu *j1939_ecu_find_by_addr_locked(struct j1939_priv *priv, + u8 addr); +struct j1939_ecu *j1939_ecu_get_by_addr(struct j1939_priv *priv, u8 addr); +struct j1939_ecu *j1939_ecu_get_by_addr_locked(struct j1939_priv *priv, + u8 addr); +struct j1939_ecu *j1939_ecu_get_by_name(struct j1939_priv *priv, name_t name); +struct j1939_ecu *j1939_ecu_get_by_name_locked(struct j1939_priv *priv, + name_t name); + +enum j1939_transfer_type { + J1939_TP, + J1939_ETP, + J1939_SIMPLE, +}; + +struct j1939_addr { + name_t src_name; + name_t dst_name; + pgn_t pgn; + + u8 sa; + u8 da; + + u8 type; +}; + +/* control buffer of the sk_buff */ +struct j1939_sk_buff_cb { + /* Offset in bytes within one ETP session */ + u32 offset; + + /* for tx, MSG_SYN will be used to sync on sockets */ + u32 msg_flags; + u32 tskey; + + struct j1939_addr addr; + + /* Flags for quick lookups during skb processing. + * These are set in the receive path only. + */ +#define J1939_ECU_LOCAL_SRC BIT(0) +#define J1939_ECU_LOCAL_DST BIT(1) + u8 flags; + + priority_t priority; +}; + +static inline +struct j1939_sk_buff_cb *j1939_skb_to_cb(const struct sk_buff *skb) +{ + BUILD_BUG_ON(sizeof(struct j1939_sk_buff_cb) > sizeof(skb->cb)); + + return (struct j1939_sk_buff_cb *)skb->cb; +} + +int j1939_send_one(struct j1939_priv *priv, struct sk_buff *skb); +void j1939_sk_recv(struct j1939_priv *priv, struct sk_buff *skb); +bool j1939_sk_recv_match(struct j1939_priv *priv, + struct j1939_sk_buff_cb *skcb); +void j1939_sk_send_loop_abort(struct sock *sk, int err); +void j1939_sk_errqueue(struct j1939_session *session, + enum j1939_sk_errqueue_type type); +void j1939_sk_queue_activate_next(struct j1939_session *session); + +/* stack entries */ +struct j1939_session *j1939_tp_send(struct j1939_priv *priv, + struct sk_buff *skb, size_t size); +int j1939_tp_recv(struct j1939_priv *priv, struct sk_buff *skb); +int j1939_ac_fixup(struct j1939_priv *priv, struct sk_buff *skb); +void j1939_ac_recv(struct j1939_priv *priv, struct sk_buff *skb); +void j1939_simple_recv(struct j1939_priv *priv, struct sk_buff *skb); + +/* network management */ +struct j1939_ecu *j1939_ecu_create_locked(struct j1939_priv *priv, name_t name); + +void j1939_ecu_timer_start(struct j1939_ecu *ecu); +void j1939_ecu_timer_cancel(struct j1939_ecu *ecu); +void j1939_ecu_unmap_all(struct j1939_priv *priv); + +struct j1939_priv *j1939_netdev_start(struct net_device *ndev); +void j1939_netdev_stop(struct j1939_priv *priv); + +void j1939_priv_put(struct j1939_priv *priv); +void j1939_priv_get(struct j1939_priv *priv); + +/* notify/alert all j1939 sockets bound to ifindex */ +void j1939_sk_netdev_event_netdown(struct j1939_priv *priv); +int j1939_cancel_active_session(struct j1939_priv *priv, struct sock *sk); +void j1939_tp_init(struct j1939_priv *priv); + +/* decrement pending skb for a j1939 socket */ +void j1939_sock_pending_del(struct sock *sk); + +enum j1939_session_state { + J1939_SESSION_NEW, + J1939_SESSION_ACTIVE, + /* waiting for abort signal on the bus */ + J1939_SESSION_WAITING_ABORT, + J1939_SESSION_ACTIVE_MAX, + J1939_SESSION_DONE, +}; + +struct j1939_session { + struct j1939_priv *priv; + struct list_head active_session_list_entry; + struct list_head sk_session_queue_entry; + struct kref kref; + struct sock *sk; + + /* ifindex, src, dst, pgn define the session block + * the are _never_ modified after insertion in the list + * this decreases locking problems a _lot_ + */ + struct j1939_sk_buff_cb skcb; + struct sk_buff_head skb_queue; + + /* all tx related stuff (last_txcmd, pkt.tx) + * is protected (modified only) with the txtimer hrtimer + * 'total' & 'block' are never changed, + * last_cmd, last & block are protected by ->lock + * this means that the tx may run after cts is received that should + * have stopped tx, but this time discrepancy is never avoided anyhow + */ + u8 last_cmd, last_txcmd; + bool transmission; + bool extd; + /* Total message size, number of bytes */ + unsigned int total_message_size; + /* Total number of bytes queue from socket to the session */ + unsigned int total_queued_size; + unsigned int tx_retry; + + int err; + u32 tskey; + enum j1939_session_state state; + + /* Packets counters for a (extended) transfer session. The packet is + * maximal of 7 bytes. + */ + struct { + /* total - total number of packets for this session */ + unsigned int total; + /* last - last packet of a transfer block after which + * responder should send ETP.CM_CTS and originator + * ETP.CM_DPO + */ + unsigned int last; + /* tx - number of packets send by originator node. + * this counter can be set back if responder node + * didn't received all packets send by originator. + */ + unsigned int tx; + unsigned int tx_acked; + /* rx - number of packets received */ + unsigned int rx; + /* block - amount of packets expected in one block */ + unsigned int block; + /* dpo - ETP.CM_DPO, Data Packet Offset */ + unsigned int dpo; + } pkt; + struct hrtimer txtimer, rxtimer; +}; + +struct j1939_sock { + struct sock sk; /* must be first to skip with memset */ + struct j1939_priv *priv; + struct list_head list; + +#define J1939_SOCK_BOUND BIT(0) +#define J1939_SOCK_CONNECTED BIT(1) +#define J1939_SOCK_PROMISC BIT(2) +#define J1939_SOCK_ERRQUEUE BIT(3) + int state; + + int ifindex; + struct j1939_addr addr; + struct j1939_filter *filters; + int nfilters; + pgn_t pgn_rx_filter; + + /* j1939 may emit equal PGN (!= equal CAN-id's) out of order + * when transport protocol comes in. + * To allow emitting in order, keep a 'pending' nr. of packets + */ + atomic_t skb_pending; + wait_queue_head_t waitq; + + /* lock for the sk_session_queue list */ + spinlock_t sk_session_queue_lock; + struct list_head sk_session_queue; +}; + +static inline struct j1939_sock *j1939_sk(const struct sock *sk) +{ + return container_of(sk, struct j1939_sock, sk); +} + +void j1939_session_get(struct j1939_session *session); +void j1939_session_put(struct j1939_session *session); +void j1939_session_skb_queue(struct j1939_session *session, + struct sk_buff *skb); +int j1939_session_activate(struct j1939_session *session); +void j1939_tp_schedule_txtimer(struct j1939_session *session, int msec); +void j1939_session_timers_cancel(struct j1939_session *session); + +#define J1939_MAX_TP_PACKET_SIZE (7 * 0xff) +#define J1939_MAX_ETP_PACKET_SIZE (7 * 0x00ffffff) + +#define J1939_REGULAR 0 +#define J1939_EXTENDED 1 + +/* CAN protocol */ +extern const struct can_proto j1939_can_proto; + +#endif /* _J1939_PRIV_H_ */ diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c new file mode 100644 index 000000000000..def2f813ffce --- /dev/null +++ b/net/can/j1939/main.c @@ -0,0 +1,403 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2010-2011 EIA Electronics, +// Pieter Beyens <pieter.beyens@eia.be> +// Copyright (c) 2010-2011 EIA Electronics, +// Kurt Van Dijck <kurt.van.dijck@eia.be> +// Copyright (c) 2018 Protonic, +// Robin van der Gracht <robin@protonic.nl> +// Copyright (c) 2017-2019 Pengutronix, +// Marc Kleine-Budde <kernel@pengutronix.de> +// Copyright (c) 2017-2019 Pengutronix, +// Oleksij Rempel <kernel@pengutronix.de> + +/* Core of can-j1939 that links j1939 to CAN. */ + +#include <linux/can/can-ml.h> +#include <linux/can/core.h> +#include <linux/can/skb.h> +#include <linux/if_arp.h> +#include <linux/module.h> + +#include "j1939-priv.h" + +MODULE_DESCRIPTION("PF_CAN SAE J1939"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("EIA Electronics (Kurt Van Dijck & Pieter Beyens)"); +MODULE_ALIAS("can-proto-" __stringify(CAN_J1939)); + +/* LOWLEVEL CAN interface */ + +/* CAN_HDR: #bytes before can_frame data part */ +#define J1939_CAN_HDR (offsetof(struct can_frame, data)) + +/* CAN_FTR: #bytes beyond data part */ +#define J1939_CAN_FTR (sizeof(struct can_frame) - J1939_CAN_HDR - \ + sizeof(((struct can_frame *)0)->data)) + +/* lowest layer */ +static void j1939_can_recv(struct sk_buff *iskb, void *data) +{ + struct j1939_priv *priv = data; + struct sk_buff *skb; + struct j1939_sk_buff_cb *skcb, *iskcb; + struct can_frame *cf; + + /* create a copy of the skb + * j1939 only delivers the real data bytes, + * the header goes into sockaddr. + * j1939 may not touch the incoming skb in such way + */ + skb = skb_clone(iskb, GFP_ATOMIC); + if (!skb) + return; + + can_skb_set_owner(skb, iskb->sk); + + /* get a pointer to the header of the skb + * the skb payload (pointer) is moved, so that the next skb_data + * returns the actual payload + */ + cf = (void *)skb->data; + skb_pull(skb, J1939_CAN_HDR); + + /* fix length, set to dlc, with 8 maximum */ + skb_trim(skb, min_t(uint8_t, cf->can_dlc, 8)); + + /* set addr */ + skcb = j1939_skb_to_cb(skb); + memset(skcb, 0, sizeof(*skcb)); + + iskcb = j1939_skb_to_cb(iskb); + skcb->tskey = iskcb->tskey; + skcb->priority = (cf->can_id >> 26) & 0x7; + skcb->addr.sa = cf->can_id; + skcb->addr.pgn = (cf->can_id >> 8) & J1939_PGN_MAX; + /* set default message type */ + skcb->addr.type = J1939_TP; + if (j1939_pgn_is_pdu1(skcb->addr.pgn)) { + /* Type 1: with destination address */ + skcb->addr.da = skcb->addr.pgn; + /* normalize pgn: strip dst address */ + skcb->addr.pgn &= 0x3ff00; + } else { + /* set broadcast address */ + skcb->addr.da = J1939_NO_ADDR; + } + + /* update localflags */ + read_lock_bh(&priv->lock); + if (j1939_address_is_unicast(skcb->addr.sa) && + priv->ents[skcb->addr.sa].nusers) + skcb->flags |= J1939_ECU_LOCAL_SRC; + if (j1939_address_is_unicast(skcb->addr.da) && + priv->ents[skcb->addr.da].nusers) + skcb->flags |= J1939_ECU_LOCAL_DST; + read_unlock_bh(&priv->lock); + + /* deliver into the j1939 stack ... */ + j1939_ac_recv(priv, skb); + + if (j1939_tp_recv(priv, skb)) + /* this means the transport layer processed the message */ + goto done; + + j1939_simple_recv(priv, skb); + j1939_sk_recv(priv, skb); + done: + kfree_skb(skb); +} + +/* NETDEV MANAGEMENT */ + +/* values for can_rx_(un)register */ +#define J1939_CAN_ID CAN_EFF_FLAG +#define J1939_CAN_MASK (CAN_EFF_FLAG | CAN_RTR_FLAG) + +static DEFINE_SPINLOCK(j1939_netdev_lock); + +static struct j1939_priv *j1939_priv_create(struct net_device *ndev) +{ + struct j1939_priv *priv; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return NULL; + + rwlock_init(&priv->lock); + INIT_LIST_HEAD(&priv->ecus); + priv->ndev = ndev; + kref_init(&priv->kref); + kref_init(&priv->rx_kref); + dev_hold(ndev); + + netdev_dbg(priv->ndev, "%s : 0x%p\n", __func__, priv); + + return priv; +} + +static inline void j1939_priv_set(struct net_device *ndev, + struct j1939_priv *priv) +{ + struct can_ml_priv *can_ml_priv = ndev->ml_priv; + + can_ml_priv->j1939_priv = priv; +} + +static void __j1939_priv_release(struct kref *kref) +{ + struct j1939_priv *priv = container_of(kref, struct j1939_priv, kref); + struct net_device *ndev = priv->ndev; + + netdev_dbg(priv->ndev, "%s: 0x%p\n", __func__, priv); + + dev_put(ndev); + kfree(priv); +} + +void j1939_priv_put(struct j1939_priv *priv) +{ + kref_put(&priv->kref, __j1939_priv_release); +} + +void j1939_priv_get(struct j1939_priv *priv) +{ + kref_get(&priv->kref); +} + +static int j1939_can_rx_register(struct j1939_priv *priv) +{ + struct net_device *ndev = priv->ndev; + int ret; + + j1939_priv_get(priv); + ret = can_rx_register(dev_net(ndev), ndev, J1939_CAN_ID, J1939_CAN_MASK, + j1939_can_recv, priv, "j1939", NULL); + if (ret < 0) { + j1939_priv_put(priv); + return ret; + } + + return 0; +} + +static void j1939_can_rx_unregister(struct j1939_priv *priv) +{ + struct net_device *ndev = priv->ndev; + + can_rx_unregister(dev_net(ndev), ndev, J1939_CAN_ID, J1939_CAN_MASK, + j1939_can_recv, priv); + + j1939_priv_put(priv); +} + +static void __j1939_rx_release(struct kref *kref) + __releases(&j1939_netdev_lock) +{ + struct j1939_priv *priv = container_of(kref, struct j1939_priv, + rx_kref); + + j1939_can_rx_unregister(priv); + j1939_ecu_unmap_all(priv); + j1939_priv_set(priv->ndev, NULL); + spin_unlock(&j1939_netdev_lock); +} + +/* get pointer to priv without increasing ref counter */ +static inline struct j1939_priv *j1939_ndev_to_priv(struct net_device *ndev) +{ + struct can_ml_priv *can_ml_priv = ndev->ml_priv; + + return can_ml_priv->j1939_priv; +} + +static struct j1939_priv *j1939_priv_get_by_ndev_locked(struct net_device *ndev) +{ + struct j1939_priv *priv; + + lockdep_assert_held(&j1939_netdev_lock); + + if (ndev->type != ARPHRD_CAN) + return NULL; + + priv = j1939_ndev_to_priv(ndev); + if (priv) + j1939_priv_get(priv); + + return priv; +} + +static struct j1939_priv *j1939_priv_get_by_ndev(struct net_device *ndev) +{ + struct j1939_priv *priv; + + spin_lock(&j1939_netdev_lock); + priv = j1939_priv_get_by_ndev_locked(ndev); + spin_unlock(&j1939_netdev_lock); + + return priv; +} + +struct j1939_priv *j1939_netdev_start(struct net_device *ndev) +{ + struct j1939_priv *priv, *priv_new; + int ret; + + priv = j1939_priv_get_by_ndev(ndev); + if (priv) { + kref_get(&priv->rx_kref); + return priv; + } + + priv = j1939_priv_create(ndev); + if (!priv) + return ERR_PTR(-ENOMEM); + + j1939_tp_init(priv); + spin_lock_init(&priv->j1939_socks_lock); + INIT_LIST_HEAD(&priv->j1939_socks); + + spin_lock(&j1939_netdev_lock); + priv_new = j1939_priv_get_by_ndev_locked(ndev); + if (priv_new) { + /* Someone was faster than us, use their priv and roll + * back our's. + */ + spin_unlock(&j1939_netdev_lock); + dev_put(ndev); + kfree(priv); + kref_get(&priv_new->rx_kref); + return priv_new; + } + j1939_priv_set(ndev, priv); + spin_unlock(&j1939_netdev_lock); + + ret = j1939_can_rx_register(priv); + if (ret < 0) + goto out_priv_put; + + return priv; + + out_priv_put: + j1939_priv_set(ndev, NULL); + dev_put(ndev); + kfree(priv); + + return ERR_PTR(ret); +} + +void j1939_netdev_stop(struct j1939_priv *priv) +{ + kref_put_lock(&priv->rx_kref, __j1939_rx_release, &j1939_netdev_lock); + j1939_priv_put(priv); +} + +int j1939_send_one(struct j1939_priv *priv, struct sk_buff *skb) +{ + int ret, dlc; + canid_t canid; + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + struct can_frame *cf; + + /* apply sanity checks */ + if (j1939_pgn_is_pdu1(skcb->addr.pgn)) + skcb->addr.pgn &= J1939_PGN_PDU1_MAX; + else + skcb->addr.pgn &= J1939_PGN_MAX; + + if (skcb->priority > 7) + skcb->priority = 6; + + ret = j1939_ac_fixup(priv, skb); + if (unlikely(ret)) + goto failed; + dlc = skb->len; + + /* re-claim the CAN_HDR from the SKB */ + cf = skb_push(skb, J1939_CAN_HDR); + + /* make it a full can frame again */ + skb_put(skb, J1939_CAN_FTR + (8 - dlc)); + + canid = CAN_EFF_FLAG | + (skcb->priority << 26) | + (skcb->addr.pgn << 8) | + skcb->addr.sa; + if (j1939_pgn_is_pdu1(skcb->addr.pgn)) + canid |= skcb->addr.da << 8; + + cf->can_id = canid; + cf->can_dlc = dlc; + + return can_send(skb, 1); + + failed: + kfree_skb(skb); + return ret; +} + +static int j1939_netdev_notify(struct notifier_block *nb, + unsigned long msg, void *data) +{ + struct net_device *ndev = netdev_notifier_info_to_dev(data); + struct j1939_priv *priv; + + priv = j1939_priv_get_by_ndev(ndev); + if (!priv) + goto notify_done; + + if (ndev->type != ARPHRD_CAN) + goto notify_put; + + switch (msg) { + case NETDEV_DOWN: + j1939_cancel_active_session(priv, NULL); + j1939_sk_netdev_event_netdown(priv); + j1939_ecu_unmap_all(priv); + break; + } + +notify_put: + j1939_priv_put(priv); + +notify_done: + return NOTIFY_DONE; +} + +static struct notifier_block j1939_netdev_notifier = { + .notifier_call = j1939_netdev_notify, +}; + +/* MODULE interface */ +static __init int j1939_module_init(void) +{ + int ret; + + pr_info("can: SAE J1939\n"); + + ret = register_netdevice_notifier(&j1939_netdev_notifier); + if (ret) + goto fail_notifier; + + ret = can_proto_register(&j1939_can_proto); + if (ret < 0) { + pr_err("can: registration of j1939 protocol failed\n"); + goto fail_sk; + } + + return 0; + + fail_sk: + unregister_netdevice_notifier(&j1939_netdev_notifier); + fail_notifier: + return ret; +} + +static __exit void j1939_module_exit(void) +{ + can_proto_unregister(&j1939_can_proto); + + unregister_netdevice_notifier(&j1939_netdev_notifier); +} + +module_init(j1939_module_init); +module_exit(j1939_module_exit); diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c new file mode 100644 index 000000000000..37c1040bcb9c --- /dev/null +++ b/net/can/j1939/socket.c @@ -0,0 +1,1160 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2010-2011 EIA Electronics, +// Pieter Beyens <pieter.beyens@eia.be> +// Copyright (c) 2010-2011 EIA Electronics, +// Kurt Van Dijck <kurt.van.dijck@eia.be> +// Copyright (c) 2018 Protonic, +// Robin van der Gracht <robin@protonic.nl> +// Copyright (c) 2017-2019 Pengutronix, +// Marc Kleine-Budde <kernel@pengutronix.de> +// Copyright (c) 2017-2019 Pengutronix, +// Oleksij Rempel <kernel@pengutronix.de> + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/can/core.h> +#include <linux/can/skb.h> +#include <linux/errqueue.h> +#include <linux/if_arp.h> + +#include "j1939-priv.h" + +#define J1939_MIN_NAMELEN CAN_REQUIRED_SIZE(struct sockaddr_can, can_addr.j1939) + +/* conversion function between struct sock::sk_priority from linux and + * j1939 priority field + */ +static inline priority_t j1939_prio(u32 sk_priority) +{ + sk_priority = min(sk_priority, 7U); + + return 7 - sk_priority; +} + +static inline u32 j1939_to_sk_priority(priority_t prio) +{ + return 7 - prio; +} + +/* function to see if pgn is to be evaluated */ +static inline bool j1939_pgn_is_valid(pgn_t pgn) +{ + return pgn <= J1939_PGN_MAX; +} + +/* test function to avoid non-zero DA placeholder for pdu1 pgn's */ +static inline bool j1939_pgn_is_clean_pdu(pgn_t pgn) +{ + if (j1939_pgn_is_pdu1(pgn)) + return !(pgn & 0xff); + else + return true; +} + +static inline void j1939_sock_pending_add(struct sock *sk) +{ + struct j1939_sock *jsk = j1939_sk(sk); + + atomic_inc(&jsk->skb_pending); +} + +static int j1939_sock_pending_get(struct sock *sk) +{ + struct j1939_sock *jsk = j1939_sk(sk); + + return atomic_read(&jsk->skb_pending); +} + +void j1939_sock_pending_del(struct sock *sk) +{ + struct j1939_sock *jsk = j1939_sk(sk); + + /* atomic_dec_return returns the new value */ + if (!atomic_dec_return(&jsk->skb_pending)) + wake_up(&jsk->waitq); /* no pending SKB's */ +} + +static void j1939_jsk_add(struct j1939_priv *priv, struct j1939_sock *jsk) +{ + jsk->state |= J1939_SOCK_BOUND; + j1939_priv_get(priv); + jsk->priv = priv; + + spin_lock_bh(&priv->j1939_socks_lock); + list_add_tail(&jsk->list, &priv->j1939_socks); + spin_unlock_bh(&priv->j1939_socks_lock); +} + +static void j1939_jsk_del(struct j1939_priv *priv, struct j1939_sock *jsk) +{ + spin_lock_bh(&priv->j1939_socks_lock); + list_del_init(&jsk->list); + spin_unlock_bh(&priv->j1939_socks_lock); + + jsk->priv = NULL; + j1939_priv_put(priv); + jsk->state &= ~J1939_SOCK_BOUND; +} + +static bool j1939_sk_queue_session(struct j1939_session *session) +{ + struct j1939_sock *jsk = j1939_sk(session->sk); + bool empty; + + spin_lock_bh(&jsk->sk_session_queue_lock); + empty = list_empty(&jsk->sk_session_queue); + j1939_session_get(session); + list_add_tail(&session->sk_session_queue_entry, &jsk->sk_session_queue); + spin_unlock_bh(&jsk->sk_session_queue_lock); + j1939_sock_pending_add(&jsk->sk); + + return empty; +} + +static struct +j1939_session *j1939_sk_get_incomplete_session(struct j1939_sock *jsk) +{ + struct j1939_session *session = NULL; + + spin_lock_bh(&jsk->sk_session_queue_lock); + if (!list_empty(&jsk->sk_session_queue)) { + session = list_last_entry(&jsk->sk_session_queue, + struct j1939_session, + sk_session_queue_entry); + if (session->total_queued_size == session->total_message_size) + session = NULL; + else + j1939_session_get(session); + } + spin_unlock_bh(&jsk->sk_session_queue_lock); + + return session; +} + +static void j1939_sk_queue_drop_all(struct j1939_priv *priv, + struct j1939_sock *jsk, int err) +{ + struct j1939_session *session, *tmp; + + netdev_dbg(priv->ndev, "%s: err: %i\n", __func__, err); + spin_lock_bh(&jsk->sk_session_queue_lock); + list_for_each_entry_safe(session, tmp, &jsk->sk_session_queue, + sk_session_queue_entry) { + list_del_init(&session->sk_session_queue_entry); + session->err = err; + j1939_session_put(session); + } + spin_unlock_bh(&jsk->sk_session_queue_lock); +} + +static void j1939_sk_queue_activate_next_locked(struct j1939_session *session) +{ + struct j1939_sock *jsk; + struct j1939_session *first; + int err; + + /* RX-Session don't have a socket (yet) */ + if (!session->sk) + return; + + jsk = j1939_sk(session->sk); + lockdep_assert_held(&jsk->sk_session_queue_lock); + + err = session->err; + + first = list_first_entry_or_null(&jsk->sk_session_queue, + struct j1939_session, + sk_session_queue_entry); + + /* Some else has already activated the next session */ + if (first != session) + return; + +activate_next: + list_del_init(&first->sk_session_queue_entry); + j1939_session_put(first); + first = list_first_entry_or_null(&jsk->sk_session_queue, + struct j1939_session, + sk_session_queue_entry); + if (!first) + return; + + if (WARN_ON_ONCE(j1939_session_activate(first))) { + first->err = -EBUSY; + goto activate_next; + } else { + /* Give receiver some time (arbitrary chosen) to recover */ + int time_ms = 0; + + if (err) + time_ms = 10 + prandom_u32_max(16); + + j1939_tp_schedule_txtimer(first, time_ms); + } +} + +void j1939_sk_queue_activate_next(struct j1939_session *session) +{ + struct j1939_sock *jsk; + + if (!session->sk) + return; + + jsk = j1939_sk(session->sk); + + spin_lock_bh(&jsk->sk_session_queue_lock); + j1939_sk_queue_activate_next_locked(session); + spin_unlock_bh(&jsk->sk_session_queue_lock); +} + +static bool j1939_sk_match_dst(struct j1939_sock *jsk, + const struct j1939_sk_buff_cb *skcb) +{ + if ((jsk->state & J1939_SOCK_PROMISC)) + return true; + + /* Destination address filter */ + if (jsk->addr.src_name && skcb->addr.dst_name) { + if (jsk->addr.src_name != skcb->addr.dst_name) + return false; + } else { + /* receive (all sockets) if + * - all packages that match our bind() address + * - all broadcast on a socket if SO_BROADCAST + * is set + */ + if (j1939_address_is_unicast(skcb->addr.da)) { + if (jsk->addr.sa != skcb->addr.da) + return false; + } else if (!sock_flag(&jsk->sk, SOCK_BROADCAST)) { + /* receiving broadcast without SO_BROADCAST + * flag is not allowed + */ + return false; + } + } + + /* Source address filter */ + if (jsk->state & J1939_SOCK_CONNECTED) { + /* receive (all sockets) if + * - all packages that match our connect() name or address + */ + if (jsk->addr.dst_name && skcb->addr.src_name) { + if (jsk->addr.dst_name != skcb->addr.src_name) + return false; + } else { + if (jsk->addr.da != skcb->addr.sa) + return false; + } + } + + /* PGN filter */ + if (j1939_pgn_is_valid(jsk->pgn_rx_filter) && + jsk->pgn_rx_filter != skcb->addr.pgn) + return false; + + return true; +} + +/* matches skb control buffer (addr) with a j1939 filter */ +static bool j1939_sk_match_filter(struct j1939_sock *jsk, + const struct j1939_sk_buff_cb *skcb) +{ + const struct j1939_filter *f = jsk->filters; + int nfilter = jsk->nfilters; + + if (!nfilter) + /* receive all when no filters are assigned */ + return true; + + for (; nfilter; ++f, --nfilter) { + if ((skcb->addr.pgn & f->pgn_mask) != f->pgn) + continue; + if ((skcb->addr.sa & f->addr_mask) != f->addr) + continue; + if ((skcb->addr.src_name & f->name_mask) != f->name) + continue; + return true; + } + return false; +} + +static bool j1939_sk_recv_match_one(struct j1939_sock *jsk, + const struct j1939_sk_buff_cb *skcb) +{ + if (!(jsk->state & J1939_SOCK_BOUND)) + return false; + + if (!j1939_sk_match_dst(jsk, skcb)) + return false; + + if (!j1939_sk_match_filter(jsk, skcb)) + return false; + + return true; +} + +static void j1939_sk_recv_one(struct j1939_sock *jsk, struct sk_buff *oskb) +{ + const struct j1939_sk_buff_cb *oskcb = j1939_skb_to_cb(oskb); + struct j1939_sk_buff_cb *skcb; + struct sk_buff *skb; + + if (oskb->sk == &jsk->sk) + return; + + if (!j1939_sk_recv_match_one(jsk, oskcb)) + return; + + skb = skb_clone(oskb, GFP_ATOMIC); + if (!skb) { + pr_warn("skb clone failed\n"); + return; + } + can_skb_set_owner(skb, oskb->sk); + + skcb = j1939_skb_to_cb(skb); + skcb->msg_flags &= ~(MSG_DONTROUTE); + if (skb->sk) + skcb->msg_flags |= MSG_DONTROUTE; + + if (sock_queue_rcv_skb(&jsk->sk, skb) < 0) + kfree_skb(skb); +} + +bool j1939_sk_recv_match(struct j1939_priv *priv, struct j1939_sk_buff_cb *skcb) +{ + struct j1939_sock *jsk; + bool match = false; + + spin_lock_bh(&priv->j1939_socks_lock); + list_for_each_entry(jsk, &priv->j1939_socks, list) { + match = j1939_sk_recv_match_one(jsk, skcb); + if (match) + break; + } + spin_unlock_bh(&priv->j1939_socks_lock); + + return match; +} + +void j1939_sk_recv(struct j1939_priv *priv, struct sk_buff *skb) +{ + struct j1939_sock *jsk; + + spin_lock_bh(&priv->j1939_socks_lock); + list_for_each_entry(jsk, &priv->j1939_socks, list) { + j1939_sk_recv_one(jsk, skb); + } + spin_unlock_bh(&priv->j1939_socks_lock); +} + +static int j1939_sk_init(struct sock *sk) +{ + struct j1939_sock *jsk = j1939_sk(sk); + + /* Ensure that "sk" is first member in "struct j1939_sock", so that we + * can skip it during memset(). + */ + BUILD_BUG_ON(offsetof(struct j1939_sock, sk) != 0); + memset((void *)jsk + sizeof(jsk->sk), 0x0, + sizeof(*jsk) - sizeof(jsk->sk)); + + INIT_LIST_HEAD(&jsk->list); + init_waitqueue_head(&jsk->waitq); + jsk->sk.sk_priority = j1939_to_sk_priority(6); + jsk->sk.sk_reuse = 1; /* per default */ + jsk->addr.sa = J1939_NO_ADDR; + jsk->addr.da = J1939_NO_ADDR; + jsk->addr.pgn = J1939_NO_PGN; + jsk->pgn_rx_filter = J1939_NO_PGN; + atomic_set(&jsk->skb_pending, 0); + spin_lock_init(&jsk->sk_session_queue_lock); + INIT_LIST_HEAD(&jsk->sk_session_queue); + + return 0; +} + +static int j1939_sk_sanity_check(struct sockaddr_can *addr, int len) +{ + if (!addr) + return -EDESTADDRREQ; + if (len < J1939_MIN_NAMELEN) + return -EINVAL; + if (addr->can_family != AF_CAN) + return -EINVAL; + if (!addr->can_ifindex) + return -ENODEV; + if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn) && + !j1939_pgn_is_clean_pdu(addr->can_addr.j1939.pgn)) + return -EINVAL; + + return 0; +} + +static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len) +{ + struct sockaddr_can *addr = (struct sockaddr_can *)uaddr; + struct j1939_sock *jsk = j1939_sk(sock->sk); + struct j1939_priv *priv = jsk->priv; + struct sock *sk = sock->sk; + struct net *net = sock_net(sk); + int ret = 0; + + ret = j1939_sk_sanity_check(addr, len); + if (ret) + return ret; + + lock_sock(sock->sk); + + /* Already bound to an interface? */ + if (jsk->state & J1939_SOCK_BOUND) { + /* A re-bind() to a different interface is not + * supported. + */ + if (jsk->ifindex != addr->can_ifindex) { + ret = -EINVAL; + goto out_release_sock; + } + + /* drop old references */ + j1939_jsk_del(priv, jsk); + j1939_local_ecu_put(priv, jsk->addr.src_name, jsk->addr.sa); + } else { + struct net_device *ndev; + + ndev = dev_get_by_index(net, addr->can_ifindex); + if (!ndev) { + ret = -ENODEV; + goto out_release_sock; + } + + if (ndev->type != ARPHRD_CAN) { + dev_put(ndev); + ret = -ENODEV; + goto out_release_sock; + } + + priv = j1939_netdev_start(ndev); + dev_put(ndev); + if (IS_ERR(priv)) { + ret = PTR_ERR(priv); + goto out_release_sock; + } + + jsk->ifindex = addr->can_ifindex; + } + + /* set default transmit pgn */ + if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn)) + jsk->pgn_rx_filter = addr->can_addr.j1939.pgn; + jsk->addr.src_name = addr->can_addr.j1939.name; + jsk->addr.sa = addr->can_addr.j1939.addr; + + /* get new references */ + ret = j1939_local_ecu_get(priv, jsk->addr.src_name, jsk->addr.sa); + if (ret) { + j1939_netdev_stop(priv); + goto out_release_sock; + } + + j1939_jsk_add(priv, jsk); + + out_release_sock: /* fall through */ + release_sock(sock->sk); + + return ret; +} + +static int j1939_sk_connect(struct socket *sock, struct sockaddr *uaddr, + int len, int flags) +{ + struct sockaddr_can *addr = (struct sockaddr_can *)uaddr; + struct j1939_sock *jsk = j1939_sk(sock->sk); + int ret = 0; + + ret = j1939_sk_sanity_check(addr, len); + if (ret) + return ret; + + lock_sock(sock->sk); + + /* bind() before connect() is mandatory */ + if (!(jsk->state & J1939_SOCK_BOUND)) { + ret = -EINVAL; + goto out_release_sock; + } + + /* A connect() to a different interface is not supported. */ + if (jsk->ifindex != addr->can_ifindex) { + ret = -EINVAL; + goto out_release_sock; + } + + if (!addr->can_addr.j1939.name && + addr->can_addr.j1939.addr == J1939_NO_ADDR && + !sock_flag(&jsk->sk, SOCK_BROADCAST)) { + /* broadcast, but SO_BROADCAST not set */ + ret = -EACCES; + goto out_release_sock; + } + + jsk->addr.dst_name = addr->can_addr.j1939.name; + jsk->addr.da = addr->can_addr.j1939.addr; + + if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn)) + jsk->addr.pgn = addr->can_addr.j1939.pgn; + + jsk->state |= J1939_SOCK_CONNECTED; + + out_release_sock: /* fall through */ + release_sock(sock->sk); + + return ret; +} + +static void j1939_sk_sock2sockaddr_can(struct sockaddr_can *addr, + const struct j1939_sock *jsk, int peer) +{ + addr->can_family = AF_CAN; + addr->can_ifindex = jsk->ifindex; + addr->can_addr.j1939.pgn = jsk->addr.pgn; + if (peer) { + addr->can_addr.j1939.name = jsk->addr.dst_name; + addr->can_addr.j1939.addr = jsk->addr.da; + } else { + addr->can_addr.j1939.name = jsk->addr.src_name; + addr->can_addr.j1939.addr = jsk->addr.sa; + } +} + +static int j1939_sk_getname(struct socket *sock, struct sockaddr *uaddr, + int peer) +{ + struct sockaddr_can *addr = (struct sockaddr_can *)uaddr; + struct sock *sk = sock->sk; + struct j1939_sock *jsk = j1939_sk(sk); + int ret = 0; + + lock_sock(sk); + + if (peer && !(jsk->state & J1939_SOCK_CONNECTED)) { + ret = -EADDRNOTAVAIL; + goto failure; + } + + j1939_sk_sock2sockaddr_can(addr, jsk, peer); + ret = J1939_MIN_NAMELEN; + + failure: + release_sock(sk); + + return ret; +} + +static int j1939_sk_release(struct socket *sock) +{ + struct sock *sk = sock->sk; + struct j1939_sock *jsk; + + if (!sk) + return 0; + + jsk = j1939_sk(sk); + lock_sock(sk); + + if (jsk->state & J1939_SOCK_BOUND) { + struct j1939_priv *priv = jsk->priv; + + if (wait_event_interruptible(jsk->waitq, + !j1939_sock_pending_get(&jsk->sk))) { + j1939_cancel_active_session(priv, sk); + j1939_sk_queue_drop_all(priv, jsk, ESHUTDOWN); + } + + j1939_jsk_del(priv, jsk); + + j1939_local_ecu_put(priv, jsk->addr.src_name, + jsk->addr.sa); + + j1939_netdev_stop(priv); + } + + sock_orphan(sk); + sock->sk = NULL; + + release_sock(sk); + sock_put(sk); + + return 0; +} + +static int j1939_sk_setsockopt_flag(struct j1939_sock *jsk, char __user *optval, + unsigned int optlen, int flag) +{ + int tmp; + + if (optlen != sizeof(tmp)) + return -EINVAL; + if (copy_from_user(&tmp, optval, optlen)) + return -EFAULT; + lock_sock(&jsk->sk); + if (tmp) + jsk->state |= flag; + else + jsk->state &= ~flag; + release_sock(&jsk->sk); + return tmp; +} + +static int j1939_sk_setsockopt(struct socket *sock, int level, int optname, + char __user *optval, unsigned int optlen) +{ + struct sock *sk = sock->sk; + struct j1939_sock *jsk = j1939_sk(sk); + int tmp, count = 0, ret = 0; + struct j1939_filter *filters = NULL, *ofilters; + + if (level != SOL_CAN_J1939) + return -EINVAL; + + switch (optname) { + case SO_J1939_FILTER: + if (optval) { + struct j1939_filter *f; + int c; + + if (optlen % sizeof(*filters) != 0) + return -EINVAL; + + if (optlen > J1939_FILTER_MAX * + sizeof(struct j1939_filter)) + return -EINVAL; + + count = optlen / sizeof(*filters); + filters = memdup_user(optval, optlen); + if (IS_ERR(filters)) + return PTR_ERR(filters); + + for (f = filters, c = count; c; f++, c--) { + f->name &= f->name_mask; + f->pgn &= f->pgn_mask; + f->addr &= f->addr_mask; + } + } + + lock_sock(&jsk->sk); + ofilters = jsk->filters; + jsk->filters = filters; + jsk->nfilters = count; + release_sock(&jsk->sk); + kfree(ofilters); + return 0; + case SO_J1939_PROMISC: + return j1939_sk_setsockopt_flag(jsk, optval, optlen, + J1939_SOCK_PROMISC); + case SO_J1939_ERRQUEUE: + ret = j1939_sk_setsockopt_flag(jsk, optval, optlen, + J1939_SOCK_ERRQUEUE); + if (ret < 0) + return ret; + + if (!(jsk->state & J1939_SOCK_ERRQUEUE)) + skb_queue_purge(&sk->sk_error_queue); + return ret; + case SO_J1939_SEND_PRIO: + if (optlen != sizeof(tmp)) + return -EINVAL; + if (copy_from_user(&tmp, optval, optlen)) + return -EFAULT; + if (tmp < 0 || tmp > 7) + return -EDOM; + if (tmp < 2 && !capable(CAP_NET_ADMIN)) + return -EPERM; + lock_sock(&jsk->sk); + jsk->sk.sk_priority = j1939_to_sk_priority(tmp); + release_sock(&jsk->sk); + return 0; + default: + return -ENOPROTOOPT; + } +} + +static int j1939_sk_getsockopt(struct socket *sock, int level, int optname, + char __user *optval, int __user *optlen) +{ + struct sock *sk = sock->sk; + struct j1939_sock *jsk = j1939_sk(sk); + int ret, ulen; + /* set defaults for using 'int' properties */ + int tmp = 0; + int len = sizeof(tmp); + void *val = &tmp; + + if (level != SOL_CAN_J1939) + return -EINVAL; + if (get_user(ulen, optlen)) + return -EFAULT; + if (ulen < 0) + return -EINVAL; + + lock_sock(&jsk->sk); + switch (optname) { + case SO_J1939_PROMISC: + tmp = (jsk->state & J1939_SOCK_PROMISC) ? 1 : 0; + break; + case SO_J1939_ERRQUEUE: + tmp = (jsk->state & J1939_SOCK_ERRQUEUE) ? 1 : 0; + break; + case SO_J1939_SEND_PRIO: + tmp = j1939_prio(jsk->sk.sk_priority); + break; + default: + ret = -ENOPROTOOPT; + goto no_copy; + } + + /* copy to user, based on 'len' & 'val' + * but most sockopt's are 'int' properties, and have 'len' & 'val' + * left unchanged, but instead modified 'tmp' + */ + if (len > ulen) + ret = -EFAULT; + else if (put_user(len, optlen)) + ret = -EFAULT; + else if (copy_to_user(optval, val, len)) + ret = -EFAULT; + else + ret = 0; + no_copy: + release_sock(&jsk->sk); + return ret; +} + +static int j1939_sk_recvmsg(struct socket *sock, struct msghdr *msg, + size_t size, int flags) +{ + struct sock *sk = sock->sk; + struct sk_buff *skb; + struct j1939_sk_buff_cb *skcb; + int ret = 0; + + if (flags & ~(MSG_DONTWAIT | MSG_ERRQUEUE)) + return -EINVAL; + + if (flags & MSG_ERRQUEUE) + return sock_recv_errqueue(sock->sk, msg, size, SOL_CAN_J1939, + SCM_J1939_ERRQUEUE); + + skb = skb_recv_datagram(sk, flags, 0, &ret); + if (!skb) + return ret; + + if (size < skb->len) + msg->msg_flags |= MSG_TRUNC; + else + size = skb->len; + + ret = memcpy_to_msg(msg, skb->data, size); + if (ret < 0) { + skb_free_datagram(sk, skb); + return ret; + } + + skcb = j1939_skb_to_cb(skb); + if (j1939_address_is_valid(skcb->addr.da)) + put_cmsg(msg, SOL_CAN_J1939, SCM_J1939_DEST_ADDR, + sizeof(skcb->addr.da), &skcb->addr.da); + + if (skcb->addr.dst_name) + put_cmsg(msg, SOL_CAN_J1939, SCM_J1939_DEST_NAME, + sizeof(skcb->addr.dst_name), &skcb->addr.dst_name); + + put_cmsg(msg, SOL_CAN_J1939, SCM_J1939_PRIO, + sizeof(skcb->priority), &skcb->priority); + + if (msg->msg_name) { + struct sockaddr_can *paddr = msg->msg_name; + + msg->msg_namelen = J1939_MIN_NAMELEN; + memset(msg->msg_name, 0, msg->msg_namelen); + paddr->can_family = AF_CAN; + paddr->can_ifindex = skb->skb_iif; + paddr->can_addr.j1939.name = skcb->addr.src_name; + paddr->can_addr.j1939.addr = skcb->addr.sa; + paddr->can_addr.j1939.pgn = skcb->addr.pgn; + } + + sock_recv_ts_and_drops(msg, sk, skb); + msg->msg_flags |= skcb->msg_flags; + skb_free_datagram(sk, skb); + + return size; +} + +static struct sk_buff *j1939_sk_alloc_skb(struct net_device *ndev, + struct sock *sk, + struct msghdr *msg, size_t size, + int *errcode) +{ + struct j1939_sock *jsk = j1939_sk(sk); + struct j1939_sk_buff_cb *skcb; + struct sk_buff *skb; + int ret; + + skb = sock_alloc_send_skb(sk, + size + + sizeof(struct can_frame) - + sizeof(((struct can_frame *)NULL)->data) + + sizeof(struct can_skb_priv), + msg->msg_flags & MSG_DONTWAIT, &ret); + if (!skb) + goto failure; + + can_skb_reserve(skb); + can_skb_prv(skb)->ifindex = ndev->ifindex; + can_skb_prv(skb)->skbcnt = 0; + skb_reserve(skb, offsetof(struct can_frame, data)); + + ret = memcpy_from_msg(skb_put(skb, size), msg, size); + if (ret < 0) + goto free_skb; + + skb->dev = ndev; + + skcb = j1939_skb_to_cb(skb); + memset(skcb, 0, sizeof(*skcb)); + skcb->addr = jsk->addr; + skcb->priority = j1939_prio(sk->sk_priority); + + if (msg->msg_name) { + struct sockaddr_can *addr = msg->msg_name; + + if (addr->can_addr.j1939.name || + addr->can_addr.j1939.addr != J1939_NO_ADDR) { + skcb->addr.dst_name = addr->can_addr.j1939.name; + skcb->addr.da = addr->can_addr.j1939.addr; + } + if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn)) + skcb->addr.pgn = addr->can_addr.j1939.pgn; + } + + *errcode = ret; + return skb; + +free_skb: + kfree_skb(skb); +failure: + *errcode = ret; + return NULL; +} + +static size_t j1939_sk_opt_stats_get_size(void) +{ + return + nla_total_size(sizeof(u32)) + /* J1939_NLA_BYTES_ACKED */ + 0; +} + +static struct sk_buff * +j1939_sk_get_timestamping_opt_stats(struct j1939_session *session) +{ + struct sk_buff *stats; + u32 size; + + stats = alloc_skb(j1939_sk_opt_stats_get_size(), GFP_ATOMIC); + if (!stats) + return NULL; + + if (session->skcb.addr.type == J1939_SIMPLE) + size = session->total_message_size; + else + size = min(session->pkt.tx_acked * 7, + session->total_message_size); + + nla_put_u32(stats, J1939_NLA_BYTES_ACKED, size); + + return stats; +} + +void j1939_sk_errqueue(struct j1939_session *session, + enum j1939_sk_errqueue_type type) +{ + struct j1939_priv *priv = session->priv; + struct sock *sk = session->sk; + struct j1939_sock *jsk; + struct sock_exterr_skb *serr; + struct sk_buff *skb; + char *state = "UNK"; + int err; + + /* currently we have no sk for the RX session */ + if (!sk) + return; + + jsk = j1939_sk(sk); + + if (!(jsk->state & J1939_SOCK_ERRQUEUE)) + return; + + skb = j1939_sk_get_timestamping_opt_stats(session); + if (!skb) + return; + + skb->tstamp = ktime_get_real(); + + BUILD_BUG_ON(sizeof(struct sock_exterr_skb) > sizeof(skb->cb)); + + serr = SKB_EXT_ERR(skb); + memset(serr, 0, sizeof(*serr)); + switch (type) { + case J1939_ERRQUEUE_ACK: + if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)) + return; + + serr->ee.ee_errno = ENOMSG; + serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; + serr->ee.ee_info = SCM_TSTAMP_ACK; + state = "ACK"; + break; + case J1939_ERRQUEUE_SCHED: + if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)) + return; + + serr->ee.ee_errno = ENOMSG; + serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; + serr->ee.ee_info = SCM_TSTAMP_SCHED; + state = "SCH"; + break; + case J1939_ERRQUEUE_ABORT: + serr->ee.ee_errno = session->err; + serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL; + serr->ee.ee_info = J1939_EE_INFO_TX_ABORT; + state = "ABT"; + break; + default: + netdev_err(priv->ndev, "Unknown errqueue type %i\n", type); + } + + serr->opt_stats = true; + if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) + serr->ee.ee_data = session->tskey; + + netdev_dbg(session->priv->ndev, "%s: 0x%p tskey: %i, state: %s\n", + __func__, session, session->tskey, state); + err = sock_queue_err_skb(sk, skb); + + if (err) + kfree_skb(skb); +}; + +void j1939_sk_send_loop_abort(struct sock *sk, int err) +{ + sk->sk_err = err; + + sk->sk_error_report(sk); +} + +static int j1939_sk_send_loop(struct j1939_priv *priv, struct sock *sk, + struct msghdr *msg, size_t size) + +{ + struct j1939_sock *jsk = j1939_sk(sk); + struct j1939_session *session = j1939_sk_get_incomplete_session(jsk); + struct sk_buff *skb; + size_t segment_size, todo_size; + int ret = 0; + + if (session && + session->total_message_size != session->total_queued_size + size) { + j1939_session_put(session); + return -EIO; + } + + todo_size = size; + + while (todo_size) { + struct j1939_sk_buff_cb *skcb; + + segment_size = min_t(size_t, J1939_MAX_TP_PACKET_SIZE, + todo_size); + + /* Allocate skb for one segment */ + skb = j1939_sk_alloc_skb(priv->ndev, sk, msg, segment_size, + &ret); + if (ret) + break; + + skcb = j1939_skb_to_cb(skb); + + if (!session) { + /* at this point the size should be full size + * of the session + */ + skcb->offset = 0; + session = j1939_tp_send(priv, skb, size); + if (IS_ERR(session)) { + ret = PTR_ERR(session); + goto kfree_skb; + } + if (j1939_sk_queue_session(session)) { + /* try to activate session if we a + * fist in the queue + */ + if (!j1939_session_activate(session)) { + j1939_tp_schedule_txtimer(session, 0); + } else { + ret = -EBUSY; + session->err = ret; + j1939_sk_queue_drop_all(priv, jsk, + EBUSY); + break; + } + } + } else { + skcb->offset = session->total_queued_size; + j1939_session_skb_queue(session, skb); + } + + todo_size -= segment_size; + session->total_queued_size += segment_size; + } + + switch (ret) { + case 0: /* OK */ + if (todo_size) + netdev_warn(priv->ndev, + "no error found and not completely queued?! %zu\n", + todo_size); + ret = size; + break; + case -ERESTARTSYS: + ret = -EINTR; + /* fall through */ + case -EAGAIN: /* OK */ + if (todo_size != size) + ret = size - todo_size; + break; + default: /* ERROR */ + break; + } + + if (session) + j1939_session_put(session); + + return ret; + + kfree_skb: + kfree_skb(skb); + return ret; +} + +static int j1939_sk_sendmsg(struct socket *sock, struct msghdr *msg, + size_t size) +{ + struct sock *sk = sock->sk; + struct j1939_sock *jsk = j1939_sk(sk); + struct j1939_priv *priv = jsk->priv; + int ifindex; + int ret; + + /* various socket state tests */ + if (!(jsk->state & J1939_SOCK_BOUND)) + return -EBADFD; + + ifindex = jsk->ifindex; + + if (!jsk->addr.src_name && jsk->addr.sa == J1939_NO_ADDR) + /* no source address assigned yet */ + return -EBADFD; + + /* deal with provided destination address info */ + if (msg->msg_name) { + struct sockaddr_can *addr = msg->msg_name; + + if (msg->msg_namelen < J1939_MIN_NAMELEN) + return -EINVAL; + + if (addr->can_family != AF_CAN) + return -EINVAL; + + if (addr->can_ifindex && addr->can_ifindex != ifindex) + return -EBADFD; + + if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn) && + !j1939_pgn_is_clean_pdu(addr->can_addr.j1939.pgn)) + return -EINVAL; + + if (!addr->can_addr.j1939.name && + addr->can_addr.j1939.addr == J1939_NO_ADDR && + !sock_flag(sk, SOCK_BROADCAST)) + /* broadcast, but SO_BROADCAST not set */ + return -EACCES; + } else { + if (!jsk->addr.dst_name && jsk->addr.da == J1939_NO_ADDR && + !sock_flag(sk, SOCK_BROADCAST)) + /* broadcast, but SO_BROADCAST not set */ + return -EACCES; + } + + ret = j1939_sk_send_loop(priv, sk, msg, size); + + return ret; +} + +void j1939_sk_netdev_event_netdown(struct j1939_priv *priv) +{ + struct j1939_sock *jsk; + int error_code = ENETDOWN; + + spin_lock_bh(&priv->j1939_socks_lock); + list_for_each_entry(jsk, &priv->j1939_socks, list) { + jsk->sk.sk_err = error_code; + if (!sock_flag(&jsk->sk, SOCK_DEAD)) + jsk->sk.sk_error_report(&jsk->sk); + + j1939_sk_queue_drop_all(priv, jsk, error_code); + } + spin_unlock_bh(&priv->j1939_socks_lock); +} + +static int j1939_sk_no_ioctlcmd(struct socket *sock, unsigned int cmd, + unsigned long arg) +{ + /* no ioctls for socket layer -> hand it down to NIC layer */ + return -ENOIOCTLCMD; +} + +static const struct proto_ops j1939_ops = { + .family = PF_CAN, + .release = j1939_sk_release, + .bind = j1939_sk_bind, + .connect = j1939_sk_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = j1939_sk_getname, + .poll = datagram_poll, + .ioctl = j1939_sk_no_ioctlcmd, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .setsockopt = j1939_sk_setsockopt, + .getsockopt = j1939_sk_getsockopt, + .sendmsg = j1939_sk_sendmsg, + .recvmsg = j1939_sk_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +}; + +static struct proto j1939_proto __read_mostly = { + .name = "CAN_J1939", + .owner = THIS_MODULE, + .obj_size = sizeof(struct j1939_sock), + .init = j1939_sk_init, +}; + +const struct can_proto j1939_can_proto = { + .type = SOCK_DGRAM, + .protocol = CAN_J1939, + .ops = &j1939_ops, + .prot = &j1939_proto, +}; diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c new file mode 100644 index 000000000000..fe000ea757ea --- /dev/null +++ b/net/can/j1939/transport.c @@ -0,0 +1,2027 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2010-2011 EIA Electronics, +// Kurt Van Dijck <kurt.van.dijck@eia.be> +// Copyright (c) 2018 Protonic, +// Robin van der Gracht <robin@protonic.nl> +// Copyright (c) 2017-2019 Pengutronix, +// Marc Kleine-Budde <kernel@pengutronix.de> +// Copyright (c) 2017-2019 Pengutronix, +// Oleksij Rempel <kernel@pengutronix.de> + +#include <linux/can/skb.h> + +#include "j1939-priv.h" + +#define J1939_XTP_TX_RETRY_LIMIT 100 + +#define J1939_ETP_PGN_CTL 0xc800 +#define J1939_ETP_PGN_DAT 0xc700 +#define J1939_TP_PGN_CTL 0xec00 +#define J1939_TP_PGN_DAT 0xeb00 + +#define J1939_TP_CMD_RTS 0x10 +#define J1939_TP_CMD_CTS 0x11 +#define J1939_TP_CMD_EOMA 0x13 +#define J1939_TP_CMD_BAM 0x20 +#define J1939_TP_CMD_ABORT 0xff + +#define J1939_ETP_CMD_RTS 0x14 +#define J1939_ETP_CMD_CTS 0x15 +#define J1939_ETP_CMD_DPO 0x16 +#define J1939_ETP_CMD_EOMA 0x17 +#define J1939_ETP_CMD_ABORT 0xff + +enum j1939_xtp_abort { + J1939_XTP_NO_ABORT = 0, + J1939_XTP_ABORT_BUSY = 1, + /* Already in one or more connection managed sessions and + * cannot support another. + * + * EALREADY: + * Operation already in progress + */ + + J1939_XTP_ABORT_RESOURCE = 2, + /* System resources were needed for another task so this + * connection managed session was terminated. + * + * EMSGSIZE: + * The socket type requires that message be sent atomically, + * and the size of the message to be sent made this + * impossible. + */ + + J1939_XTP_ABORT_TIMEOUT = 3, + /* A timeout occurred and this is the connection abort to + * close the session. + * + * EHOSTUNREACH: + * The destination host cannot be reached (probably because + * the host is down or a remote router cannot reach it). + */ + + J1939_XTP_ABORT_GENERIC = 4, + /* CTS messages received when data transfer is in progress + * + * EBADMSG: + * Not a data message + */ + + J1939_XTP_ABORT_FAULT = 5, + /* Maximal retransmit request limit reached + * + * ENOTRECOVERABLE: + * State not recoverable + */ + + J1939_XTP_ABORT_UNEXPECTED_DATA = 6, + /* Unexpected data transfer packet + * + * ENOTCONN: + * Transport endpoint is not connected + */ + + J1939_XTP_ABORT_BAD_SEQ = 7, + /* Bad sequence number (and software is not able to recover) + * + * EILSEQ: + * Illegal byte sequence + */ + + J1939_XTP_ABORT_DUP_SEQ = 8, + /* Duplicate sequence number (and software is not able to + * recover) + */ + + J1939_XTP_ABORT_EDPO_UNEXPECTED = 9, + /* Unexpected EDPO packet (ETP) or Message size > 1785 bytes + * (TP) + */ + + J1939_XTP_ABORT_BAD_EDPO_PGN = 10, + /* Unexpected EDPO PGN (PGN in EDPO is bad) */ + + J1939_XTP_ABORT_EDPO_OUTOF_CTS = 11, + /* EDPO number of packets is greater than CTS */ + + J1939_XTP_ABORT_BAD_EDPO_OFFSET = 12, + /* Bad EDPO offset */ + + J1939_XTP_ABORT_OTHER_DEPRECATED = 13, + /* Deprecated. Use 250 instead (Any other reason) */ + + J1939_XTP_ABORT_ECTS_UNXPECTED_PGN = 14, + /* Unexpected ECTS PGN (PGN in ECTS is bad) */ + + J1939_XTP_ABORT_ECTS_TOO_BIG = 15, + /* ECTS requested packets exceeds message size */ + + J1939_XTP_ABORT_OTHER = 250, + /* Any other reason (if a Connection Abort reason is + * identified that is not listed in the table use code 250) + */ +}; + +static unsigned int j1939_tp_block = 255; +static unsigned int j1939_tp_packet_delay; +static unsigned int j1939_tp_padding = 1; + +/* helpers */ +static const char *j1939_xtp_abort_to_str(enum j1939_xtp_abort abort) +{ + switch (abort) { + case J1939_XTP_ABORT_BUSY: + return "Already in one or more connection managed sessions and cannot support another."; + case J1939_XTP_ABORT_RESOURCE: + return "System resources were needed for another task so this connection managed session was terminated."; + case J1939_XTP_ABORT_TIMEOUT: + return "A timeout occurred and this is the connection abort to close the session."; + case J1939_XTP_ABORT_GENERIC: + return "CTS messages received when data transfer is in progress"; + case J1939_XTP_ABORT_FAULT: + return "Maximal retransmit request limit reached"; + case J1939_XTP_ABORT_UNEXPECTED_DATA: + return "Unexpected data transfer packet"; + case J1939_XTP_ABORT_BAD_SEQ: + return "Bad sequence number (and software is not able to recover)"; + case J1939_XTP_ABORT_DUP_SEQ: + return "Duplicate sequence number (and software is not able to recover)"; + case J1939_XTP_ABORT_EDPO_UNEXPECTED: + return "Unexpected EDPO packet (ETP) or Message size > 1785 bytes (TP)"; + case J1939_XTP_ABORT_BAD_EDPO_PGN: + return "Unexpected EDPO PGN (PGN in EDPO is bad)"; + case J1939_XTP_ABORT_EDPO_OUTOF_CTS: + return "EDPO number of packets is greater than CTS"; + case J1939_XTP_ABORT_BAD_EDPO_OFFSET: + return "Bad EDPO offset"; + case J1939_XTP_ABORT_OTHER_DEPRECATED: + return "Deprecated. Use 250 instead (Any other reason)"; + case J1939_XTP_ABORT_ECTS_UNXPECTED_PGN: + return "Unexpected ECTS PGN (PGN in ECTS is bad)"; + case J1939_XTP_ABORT_ECTS_TOO_BIG: + return "ECTS requested packets exceeds message size"; + case J1939_XTP_ABORT_OTHER: + return "Any other reason (if a Connection Abort reason is identified that is not listed in the table use code 250)"; + default: + return "<unknown>"; + } +} + +static int j1939_xtp_abort_to_errno(struct j1939_priv *priv, + enum j1939_xtp_abort abort) +{ + int err; + + switch (abort) { + case J1939_XTP_NO_ABORT: + WARN_ON_ONCE(abort == J1939_XTP_NO_ABORT); + err = 0; + break; + case J1939_XTP_ABORT_BUSY: + err = EALREADY; + break; + case J1939_XTP_ABORT_RESOURCE: + err = EMSGSIZE; + break; + case J1939_XTP_ABORT_TIMEOUT: + err = EHOSTUNREACH; + break; + case J1939_XTP_ABORT_GENERIC: + err = EBADMSG; + break; + case J1939_XTP_ABORT_FAULT: + err = ENOTRECOVERABLE; + break; + case J1939_XTP_ABORT_UNEXPECTED_DATA: + err = ENOTCONN; + break; + case J1939_XTP_ABORT_BAD_SEQ: + err = EILSEQ; + break; + case J1939_XTP_ABORT_DUP_SEQ: + err = EPROTO; + break; + case J1939_XTP_ABORT_EDPO_UNEXPECTED: + err = EPROTO; + break; + case J1939_XTP_ABORT_BAD_EDPO_PGN: + err = EPROTO; + break; + case J1939_XTP_ABORT_EDPO_OUTOF_CTS: + err = EPROTO; + break; + case J1939_XTP_ABORT_BAD_EDPO_OFFSET: + err = EPROTO; + break; + case J1939_XTP_ABORT_OTHER_DEPRECATED: + err = EPROTO; + break; + case J1939_XTP_ABORT_ECTS_UNXPECTED_PGN: + err = EPROTO; + break; + case J1939_XTP_ABORT_ECTS_TOO_BIG: + err = EPROTO; + break; + case J1939_XTP_ABORT_OTHER: + err = EPROTO; + break; + default: + netdev_warn(priv->ndev, "Unknown abort code %i", abort); + err = EPROTO; + } + + return err; +} + +static inline void j1939_session_list_lock(struct j1939_priv *priv) +{ + spin_lock_bh(&priv->active_session_list_lock); +} + +static inline void j1939_session_list_unlock(struct j1939_priv *priv) +{ + spin_unlock_bh(&priv->active_session_list_lock); +} + +void j1939_session_get(struct j1939_session *session) +{ + kref_get(&session->kref); +} + +/* session completion functions */ +static void __j1939_session_drop(struct j1939_session *session) +{ + if (!session->transmission) + return; + + j1939_sock_pending_del(session->sk); +} + +static void j1939_session_destroy(struct j1939_session *session) +{ + if (session->err) + j1939_sk_errqueue(session, J1939_ERRQUEUE_ABORT); + else + j1939_sk_errqueue(session, J1939_ERRQUEUE_ACK); + + netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); + + skb_queue_purge(&session->skb_queue); + __j1939_session_drop(session); + j1939_priv_put(session->priv); + kfree(session); +} + +static void __j1939_session_release(struct kref *kref) +{ + struct j1939_session *session = container_of(kref, struct j1939_session, + kref); + + j1939_session_destroy(session); +} + +void j1939_session_put(struct j1939_session *session) +{ + kref_put(&session->kref, __j1939_session_release); +} + +static void j1939_session_txtimer_cancel(struct j1939_session *session) +{ + if (hrtimer_cancel(&session->txtimer)) + j1939_session_put(session); +} + +static void j1939_session_rxtimer_cancel(struct j1939_session *session) +{ + if (hrtimer_cancel(&session->rxtimer)) + j1939_session_put(session); +} + +void j1939_session_timers_cancel(struct j1939_session *session) +{ + j1939_session_txtimer_cancel(session); + j1939_session_rxtimer_cancel(session); +} + +static inline bool j1939_cb_is_broadcast(const struct j1939_sk_buff_cb *skcb) +{ + return (!skcb->addr.dst_name && (skcb->addr.da == 0xff)); +} + +static void j1939_session_skb_drop_old(struct j1939_session *session) +{ + struct sk_buff *do_skb; + struct j1939_sk_buff_cb *do_skcb; + unsigned int offset_start; + unsigned long flags; + + if (skb_queue_len(&session->skb_queue) < 2) + return; + + offset_start = session->pkt.tx_acked * 7; + + spin_lock_irqsave(&session->skb_queue.lock, flags); + do_skb = skb_peek(&session->skb_queue); + do_skcb = j1939_skb_to_cb(do_skb); + + if ((do_skcb->offset + do_skb->len) < offset_start) { + __skb_unlink(do_skb, &session->skb_queue); + kfree_skb(do_skb); + } + spin_unlock_irqrestore(&session->skb_queue.lock, flags); +} + +void j1939_session_skb_queue(struct j1939_session *session, + struct sk_buff *skb) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + struct j1939_priv *priv = session->priv; + + j1939_ac_fixup(priv, skb); + + if (j1939_address_is_unicast(skcb->addr.da) && + priv->ents[skcb->addr.da].nusers) + skcb->flags |= J1939_ECU_LOCAL_DST; + + skcb->flags |= J1939_ECU_LOCAL_SRC; + + skb_queue_tail(&session->skb_queue, skb); +} + +static struct sk_buff *j1939_session_skb_find(struct j1939_session *session) +{ + struct j1939_priv *priv = session->priv; + struct sk_buff *skb = NULL; + struct sk_buff *do_skb; + struct j1939_sk_buff_cb *do_skcb; + unsigned int offset_start; + unsigned long flags; + + offset_start = session->pkt.dpo * 7; + + spin_lock_irqsave(&session->skb_queue.lock, flags); + skb_queue_walk(&session->skb_queue, do_skb) { + do_skcb = j1939_skb_to_cb(do_skb); + + if (offset_start >= do_skcb->offset && + offset_start < (do_skcb->offset + do_skb->len)) { + skb = do_skb; + } + } + spin_unlock_irqrestore(&session->skb_queue.lock, flags); + + if (!skb) + netdev_dbg(priv->ndev, "%s: 0x%p: no skb found for start: %i, queue size: %i\n", + __func__, session, offset_start, + skb_queue_len(&session->skb_queue)); + + return skb; +} + +/* see if we are receiver + * returns 0 for broadcasts, although we will receive them + */ +static inline int j1939_tp_im_receiver(const struct j1939_sk_buff_cb *skcb) +{ + return skcb->flags & J1939_ECU_LOCAL_DST; +} + +/* see if we are sender */ +static inline int j1939_tp_im_transmitter(const struct j1939_sk_buff_cb *skcb) +{ + return skcb->flags & J1939_ECU_LOCAL_SRC; +} + +/* see if we are involved as either receiver or transmitter */ +static int j1939_tp_im_involved(const struct j1939_sk_buff_cb *skcb, bool swap) +{ + if (swap) + return j1939_tp_im_receiver(skcb); + else + return j1939_tp_im_transmitter(skcb); +} + +static int j1939_tp_im_involved_anydir(struct j1939_sk_buff_cb *skcb) +{ + return skcb->flags & (J1939_ECU_LOCAL_SRC | J1939_ECU_LOCAL_DST); +} + +/* extract pgn from flow-ctl message */ +static inline pgn_t j1939_xtp_ctl_to_pgn(const u8 *dat) +{ + pgn_t pgn; + + pgn = (dat[7] << 16) | (dat[6] << 8) | (dat[5] << 0); + if (j1939_pgn_is_pdu1(pgn)) + pgn &= 0xffff00; + return pgn; +} + +static inline unsigned int j1939_tp_ctl_to_size(const u8 *dat) +{ + return (dat[2] << 8) + (dat[1] << 0); +} + +static inline unsigned int j1939_etp_ctl_to_packet(const u8 *dat) +{ + return (dat[4] << 16) | (dat[3] << 8) | (dat[2] << 0); +} + +static inline unsigned int j1939_etp_ctl_to_size(const u8 *dat) +{ + return (dat[4] << 24) | (dat[3] << 16) | + (dat[2] << 8) | (dat[1] << 0); +} + +/* find existing session: + * reverse: swap cb's src & dst + * there is no problem with matching broadcasts, since + * broadcasts (no dst, no da) would never call this + * with reverse == true + */ +static bool j1939_session_match(struct j1939_addr *se_addr, + struct j1939_addr *sk_addr, bool reverse) +{ + if (se_addr->type != sk_addr->type) + return false; + + if (reverse) { + if (se_addr->src_name) { + if (se_addr->src_name != sk_addr->dst_name) + return false; + } else if (se_addr->sa != sk_addr->da) { + return false; + } + + if (se_addr->dst_name) { + if (se_addr->dst_name != sk_addr->src_name) + return false; + } else if (se_addr->da != sk_addr->sa) { + return false; + } + } else { + if (se_addr->src_name) { + if (se_addr->src_name != sk_addr->src_name) + return false; + } else if (se_addr->sa != sk_addr->sa) { + return false; + } + + if (se_addr->dst_name) { + if (se_addr->dst_name != sk_addr->dst_name) + return false; + } else if (se_addr->da != sk_addr->da) { + return false; + } + } + + return true; +} + +static struct +j1939_session *j1939_session_get_by_addr_locked(struct j1939_priv *priv, + struct list_head *root, + struct j1939_addr *addr, + bool reverse, bool transmitter) +{ + struct j1939_session *session; + + lockdep_assert_held(&priv->active_session_list_lock); + + list_for_each_entry(session, root, active_session_list_entry) { + j1939_session_get(session); + if (j1939_session_match(&session->skcb.addr, addr, reverse) && + session->transmission == transmitter) + return session; + j1939_session_put(session); + } + + return NULL; +} + +static struct +j1939_session *j1939_session_get_simple(struct j1939_priv *priv, + struct sk_buff *skb) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + struct j1939_session *session; + + lockdep_assert_held(&priv->active_session_list_lock); + + list_for_each_entry(session, &priv->active_session_list, + active_session_list_entry) { + j1939_session_get(session); + if (session->skcb.addr.type == J1939_SIMPLE && + session->tskey == skcb->tskey && session->sk == skb->sk) + return session; + j1939_session_put(session); + } + + return NULL; +} + +static struct +j1939_session *j1939_session_get_by_addr(struct j1939_priv *priv, + struct j1939_addr *addr, + bool reverse, bool transmitter) +{ + struct j1939_session *session; + + j1939_session_list_lock(priv); + session = j1939_session_get_by_addr_locked(priv, + &priv->active_session_list, + addr, reverse, transmitter); + j1939_session_list_unlock(priv); + + return session; +} + +static void j1939_skbcb_swap(struct j1939_sk_buff_cb *skcb) +{ + u8 tmp = 0; + + swap(skcb->addr.dst_name, skcb->addr.src_name); + swap(skcb->addr.da, skcb->addr.sa); + + /* swap SRC and DST flags, leave other untouched */ + if (skcb->flags & J1939_ECU_LOCAL_SRC) + tmp |= J1939_ECU_LOCAL_DST; + if (skcb->flags & J1939_ECU_LOCAL_DST) + tmp |= J1939_ECU_LOCAL_SRC; + skcb->flags &= ~(J1939_ECU_LOCAL_SRC | J1939_ECU_LOCAL_DST); + skcb->flags |= tmp; +} + +static struct +sk_buff *j1939_tp_tx_dat_new(struct j1939_priv *priv, + const struct j1939_sk_buff_cb *re_skcb, + bool ctl, + bool swap_src_dst) +{ + struct sk_buff *skb; + struct j1939_sk_buff_cb *skcb; + + skb = alloc_skb(sizeof(struct can_frame) + sizeof(struct can_skb_priv), + GFP_ATOMIC); + if (unlikely(!skb)) + return ERR_PTR(-ENOMEM); + + skb->dev = priv->ndev; + can_skb_reserve(skb); + can_skb_prv(skb)->ifindex = priv->ndev->ifindex; + /* reserve CAN header */ + skb_reserve(skb, offsetof(struct can_frame, data)); + + memcpy(skb->cb, re_skcb, sizeof(skb->cb)); + skcb = j1939_skb_to_cb(skb); + if (swap_src_dst) + j1939_skbcb_swap(skcb); + + if (ctl) { + if (skcb->addr.type == J1939_ETP) + skcb->addr.pgn = J1939_ETP_PGN_CTL; + else + skcb->addr.pgn = J1939_TP_PGN_CTL; + } else { + if (skcb->addr.type == J1939_ETP) + skcb->addr.pgn = J1939_ETP_PGN_DAT; + else + skcb->addr.pgn = J1939_TP_PGN_DAT; + } + + return skb; +} + +/* TP transmit packet functions */ +static int j1939_tp_tx_dat(struct j1939_session *session, + const u8 *dat, int len) +{ + struct j1939_priv *priv = session->priv; + struct sk_buff *skb; + + skb = j1939_tp_tx_dat_new(priv, &session->skcb, + false, false); + if (IS_ERR(skb)) + return PTR_ERR(skb); + + skb_put_data(skb, dat, len); + if (j1939_tp_padding && len < 8) + memset(skb_put(skb, 8 - len), 0xff, 8 - len); + + return j1939_send_one(priv, skb); +} + +static int j1939_xtp_do_tx_ctl(struct j1939_priv *priv, + const struct j1939_sk_buff_cb *re_skcb, + bool swap_src_dst, pgn_t pgn, const u8 *dat) +{ + struct sk_buff *skb; + u8 *skdat; + + if (!j1939_tp_im_involved(re_skcb, swap_src_dst)) + return 0; + + skb = j1939_tp_tx_dat_new(priv, re_skcb, true, swap_src_dst); + if (IS_ERR(skb)) + return PTR_ERR(skb); + + skdat = skb_put(skb, 8); + memcpy(skdat, dat, 5); + skdat[5] = (pgn >> 0); + skdat[6] = (pgn >> 8); + skdat[7] = (pgn >> 16); + + return j1939_send_one(priv, skb); +} + +static inline int j1939_tp_tx_ctl(struct j1939_session *session, + bool swap_src_dst, const u8 *dat) +{ + struct j1939_priv *priv = session->priv; + + return j1939_xtp_do_tx_ctl(priv, &session->skcb, + swap_src_dst, + session->skcb.addr.pgn, dat); +} + +static int j1939_xtp_tx_abort(struct j1939_priv *priv, + const struct j1939_sk_buff_cb *re_skcb, + bool swap_src_dst, + enum j1939_xtp_abort err, + pgn_t pgn) +{ + u8 dat[5]; + + if (!j1939_tp_im_involved(re_skcb, swap_src_dst)) + return 0; + + memset(dat, 0xff, sizeof(dat)); + dat[0] = J1939_TP_CMD_ABORT; + dat[1] = err; + return j1939_xtp_do_tx_ctl(priv, re_skcb, swap_src_dst, pgn, dat); +} + +void j1939_tp_schedule_txtimer(struct j1939_session *session, int msec) +{ + j1939_session_get(session); + hrtimer_start(&session->txtimer, ms_to_ktime(msec), + HRTIMER_MODE_REL_SOFT); +} + +static inline void j1939_tp_set_rxtimeout(struct j1939_session *session, + int msec) +{ + j1939_session_rxtimer_cancel(session); + j1939_session_get(session); + hrtimer_start(&session->rxtimer, ms_to_ktime(msec), + HRTIMER_MODE_REL_SOFT); +} + +static int j1939_session_tx_rts(struct j1939_session *session) +{ + u8 dat[8]; + int ret; + + memset(dat, 0xff, sizeof(dat)); + + dat[1] = (session->total_message_size >> 0); + dat[2] = (session->total_message_size >> 8); + dat[3] = session->pkt.total; + + if (session->skcb.addr.type == J1939_ETP) { + dat[0] = J1939_ETP_CMD_RTS; + dat[1] = (session->total_message_size >> 0); + dat[2] = (session->total_message_size >> 8); + dat[3] = (session->total_message_size >> 16); + dat[4] = (session->total_message_size >> 24); + } else if (j1939_cb_is_broadcast(&session->skcb)) { + dat[0] = J1939_TP_CMD_BAM; + /* fake cts for broadcast */ + session->pkt.tx = 0; + } else { + dat[0] = J1939_TP_CMD_RTS; + dat[4] = dat[3]; + } + + if (dat[0] == session->last_txcmd) + /* done already */ + return 0; + + ret = j1939_tp_tx_ctl(session, false, dat); + if (ret < 0) + return ret; + + session->last_txcmd = dat[0]; + if (dat[0] == J1939_TP_CMD_BAM) + j1939_tp_schedule_txtimer(session, 50); + + j1939_tp_set_rxtimeout(session, 1250); + + netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); + + return 0; +} + +static int j1939_session_tx_dpo(struct j1939_session *session) +{ + unsigned int pkt; + u8 dat[8]; + int ret; + + memset(dat, 0xff, sizeof(dat)); + + dat[0] = J1939_ETP_CMD_DPO; + session->pkt.dpo = session->pkt.tx_acked; + pkt = session->pkt.dpo; + dat[1] = session->pkt.last - session->pkt.tx_acked; + dat[2] = (pkt >> 0); + dat[3] = (pkt >> 8); + dat[4] = (pkt >> 16); + + ret = j1939_tp_tx_ctl(session, false, dat); + if (ret < 0) + return ret; + + session->last_txcmd = dat[0]; + j1939_tp_set_rxtimeout(session, 1250); + session->pkt.tx = session->pkt.tx_acked; + + netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); + + return 0; +} + +static int j1939_session_tx_dat(struct j1939_session *session) +{ + struct j1939_priv *priv = session->priv; + struct j1939_sk_buff_cb *skcb; + int offset, pkt_done, pkt_end; + unsigned int len, pdelay; + struct sk_buff *se_skb; + const u8 *tpdat; + int ret = 0; + u8 dat[8]; + + se_skb = j1939_session_skb_find(session); + if (!se_skb) + return -ENOBUFS; + + skcb = j1939_skb_to_cb(se_skb); + tpdat = se_skb->data; + ret = 0; + pkt_done = 0; + if (session->skcb.addr.type != J1939_ETP && + j1939_cb_is_broadcast(&session->skcb)) + pkt_end = session->pkt.total; + else + pkt_end = session->pkt.last; + + while (session->pkt.tx < pkt_end) { + dat[0] = session->pkt.tx - session->pkt.dpo + 1; + offset = (session->pkt.tx * 7) - skcb->offset; + len = se_skb->len - offset; + if (len > 7) + len = 7; + + memcpy(&dat[1], &tpdat[offset], len); + ret = j1939_tp_tx_dat(session, dat, len + 1); + if (ret < 0) { + /* ENOBUS == CAN interface TX queue is full */ + if (ret != -ENOBUFS) + netdev_alert(priv->ndev, + "%s: 0x%p: queue data error: %i\n", + __func__, session, ret); + break; + } + + session->last_txcmd = 0xff; + pkt_done++; + session->pkt.tx++; + pdelay = j1939_cb_is_broadcast(&session->skcb) ? 50 : + j1939_tp_packet_delay; + + if (session->pkt.tx < session->pkt.total && pdelay) { + j1939_tp_schedule_txtimer(session, pdelay); + break; + } + } + + if (pkt_done) + j1939_tp_set_rxtimeout(session, 250); + + return ret; +} + +static int j1939_xtp_txnext_transmiter(struct j1939_session *session) +{ + struct j1939_priv *priv = session->priv; + int ret = 0; + + if (!j1939_tp_im_transmitter(&session->skcb)) { + netdev_alert(priv->ndev, "%s: 0x%p: called by not transmitter!\n", + __func__, session); + return -EINVAL; + } + + switch (session->last_cmd) { + case 0: + ret = j1939_session_tx_rts(session); + break; + + case J1939_ETP_CMD_CTS: + if (session->last_txcmd != J1939_ETP_CMD_DPO) { + ret = j1939_session_tx_dpo(session); + if (ret) + return ret; + } + + /* fall through */ + case J1939_TP_CMD_CTS: + case 0xff: /* did some data */ + case J1939_ETP_CMD_DPO: + case J1939_TP_CMD_BAM: + ret = j1939_session_tx_dat(session); + + break; + default: + netdev_alert(priv->ndev, "%s: 0x%p: unexpected last_cmd: %x\n", + __func__, session, session->last_cmd); + } + + return ret; +} + +static int j1939_session_tx_cts(struct j1939_session *session) +{ + struct j1939_priv *priv = session->priv; + unsigned int pkt, len; + int ret; + u8 dat[8]; + + if (!j1939_sk_recv_match(priv, &session->skcb)) + return -ENOENT; + + len = session->pkt.total - session->pkt.rx; + len = min3(len, session->pkt.block, j1939_tp_block ?: 255); + memset(dat, 0xff, sizeof(dat)); + + if (session->skcb.addr.type == J1939_ETP) { + pkt = session->pkt.rx + 1; + dat[0] = J1939_ETP_CMD_CTS; + dat[1] = len; + dat[2] = (pkt >> 0); + dat[3] = (pkt >> 8); + dat[4] = (pkt >> 16); + } else { + dat[0] = J1939_TP_CMD_CTS; + dat[1] = len; + dat[2] = session->pkt.rx + 1; + } + + if (dat[0] == session->last_txcmd) + /* done already */ + return 0; + + ret = j1939_tp_tx_ctl(session, true, dat); + if (ret < 0) + return ret; + + if (len) + /* only mark cts done when len is set */ + session->last_txcmd = dat[0]; + j1939_tp_set_rxtimeout(session, 1250); + + netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); + + return 0; +} + +static int j1939_session_tx_eoma(struct j1939_session *session) +{ + struct j1939_priv *priv = session->priv; + u8 dat[8]; + int ret; + + if (!j1939_sk_recv_match(priv, &session->skcb)) + return -ENOENT; + + memset(dat, 0xff, sizeof(dat)); + + if (session->skcb.addr.type == J1939_ETP) { + dat[0] = J1939_ETP_CMD_EOMA; + dat[1] = session->total_message_size >> 0; + dat[2] = session->total_message_size >> 8; + dat[3] = session->total_message_size >> 16; + dat[4] = session->total_message_size >> 24; + } else { + dat[0] = J1939_TP_CMD_EOMA; + dat[1] = session->total_message_size; + dat[2] = session->total_message_size >> 8; + dat[3] = session->pkt.total; + } + + if (dat[0] == session->last_txcmd) + /* done already */ + return 0; + + ret = j1939_tp_tx_ctl(session, true, dat); + if (ret < 0) + return ret; + + session->last_txcmd = dat[0]; + + /* wait for the EOMA packet to come in */ + j1939_tp_set_rxtimeout(session, 1250); + + netdev_dbg(session->priv->ndev, "%p: 0x%p\n", __func__, session); + + return 0; +} + +static int j1939_xtp_txnext_receiver(struct j1939_session *session) +{ + struct j1939_priv *priv = session->priv; + int ret = 0; + + if (!j1939_tp_im_receiver(&session->skcb)) { + netdev_alert(priv->ndev, "%s: 0x%p: called by not receiver!\n", + __func__, session); + return -EINVAL; + } + + switch (session->last_cmd) { + case J1939_TP_CMD_RTS: + case J1939_ETP_CMD_RTS: + ret = j1939_session_tx_cts(session); + break; + + case J1939_ETP_CMD_CTS: + case J1939_TP_CMD_CTS: + case 0xff: /* did some data */ + case J1939_ETP_CMD_DPO: + if ((session->skcb.addr.type == J1939_TP && + j1939_cb_is_broadcast(&session->skcb))) + break; + + if (session->pkt.rx >= session->pkt.total) { + ret = j1939_session_tx_eoma(session); + } else if (session->pkt.rx >= session->pkt.last) { + session->last_txcmd = 0; + ret = j1939_session_tx_cts(session); + } + break; + default: + netdev_alert(priv->ndev, "%s: 0x%p: unexpected last_cmd: %x\n", + __func__, session, session->last_cmd); + } + + return ret; +} + +static int j1939_simple_txnext(struct j1939_session *session) +{ + struct j1939_priv *priv = session->priv; + struct sk_buff *se_skb = j1939_session_skb_find(session); + struct sk_buff *skb; + int ret; + + if (!se_skb) + return 0; + + skb = skb_clone(se_skb, GFP_ATOMIC); + if (!skb) + return -ENOMEM; + + can_skb_set_owner(skb, se_skb->sk); + + j1939_tp_set_rxtimeout(session, J1939_SIMPLE_ECHO_TIMEOUT_MS); + + ret = j1939_send_one(priv, skb); + if (ret) + return ret; + + j1939_sk_errqueue(session, J1939_ERRQUEUE_SCHED); + j1939_sk_queue_activate_next(session); + + return 0; +} + +static bool j1939_session_deactivate_locked(struct j1939_session *session) +{ + bool active = false; + + lockdep_assert_held(&session->priv->active_session_list_lock); + + if (session->state >= J1939_SESSION_ACTIVE && + session->state < J1939_SESSION_ACTIVE_MAX) { + active = true; + + list_del_init(&session->active_session_list_entry); + session->state = J1939_SESSION_DONE; + j1939_session_put(session); + } + + return active; +} + +static bool j1939_session_deactivate(struct j1939_session *session) +{ + bool active; + + j1939_session_list_lock(session->priv); + active = j1939_session_deactivate_locked(session); + j1939_session_list_unlock(session->priv); + + return active; +} + +static void +j1939_session_deactivate_activate_next(struct j1939_session *session) +{ + if (j1939_session_deactivate(session)) + j1939_sk_queue_activate_next(session); +} + +static void j1939_session_cancel(struct j1939_session *session, + enum j1939_xtp_abort err) +{ + struct j1939_priv *priv = session->priv; + + WARN_ON_ONCE(!err); + + session->err = j1939_xtp_abort_to_errno(priv, err); + /* do not send aborts on incoming broadcasts */ + if (!j1939_cb_is_broadcast(&session->skcb)) { + session->state = J1939_SESSION_WAITING_ABORT; + j1939_xtp_tx_abort(priv, &session->skcb, + !session->transmission, + err, session->skcb.addr.pgn); + } + + if (session->sk) + j1939_sk_send_loop_abort(session->sk, session->err); +} + +static enum hrtimer_restart j1939_tp_txtimer(struct hrtimer *hrtimer) +{ + struct j1939_session *session = + container_of(hrtimer, struct j1939_session, txtimer); + struct j1939_priv *priv = session->priv; + int ret = 0; + + if (session->skcb.addr.type == J1939_SIMPLE) { + ret = j1939_simple_txnext(session); + } else { + if (session->transmission) + ret = j1939_xtp_txnext_transmiter(session); + else + ret = j1939_xtp_txnext_receiver(session); + } + + switch (ret) { + case -ENOBUFS: + /* Retry limit is currently arbitrary chosen */ + if (session->tx_retry < J1939_XTP_TX_RETRY_LIMIT) { + session->tx_retry++; + j1939_tp_schedule_txtimer(session, + 10 + prandom_u32_max(16)); + } else { + netdev_alert(priv->ndev, "%s: 0x%p: tx retry count reached\n", + __func__, session); + session->err = -ENETUNREACH; + j1939_session_rxtimer_cancel(session); + j1939_session_deactivate_activate_next(session); + } + break; + case -ENETDOWN: + /* In this case we should get a netdev_event(), all active + * sessions will be cleared by + * j1939_cancel_all_active_sessions(). So handle this as an + * error, but let j1939_cancel_all_active_sessions() do the + * cleanup including propagation of the error to user space. + */ + break; + case 0: + session->tx_retry = 0; + break; + default: + netdev_alert(priv->ndev, "%s: 0x%p: tx aborted with unknown reason: %i\n", + __func__, session, ret); + if (session->skcb.addr.type != J1939_SIMPLE) { + j1939_tp_set_rxtimeout(session, + J1939_XTP_ABORT_TIMEOUT_MS); + j1939_session_cancel(session, J1939_XTP_ABORT_OTHER); + } else { + session->err = ret; + j1939_session_rxtimer_cancel(session); + j1939_session_deactivate_activate_next(session); + } + } + + j1939_session_put(session); + + return HRTIMER_NORESTART; +} + +static void j1939_session_completed(struct j1939_session *session) +{ + struct sk_buff *skb; + + if (!session->transmission) { + skb = j1939_session_skb_find(session); + /* distribute among j1939 receivers */ + j1939_sk_recv(session->priv, skb); + } + + j1939_session_deactivate_activate_next(session); +} + +static enum hrtimer_restart j1939_tp_rxtimer(struct hrtimer *hrtimer) +{ + struct j1939_session *session = container_of(hrtimer, + struct j1939_session, + rxtimer); + struct j1939_priv *priv = session->priv; + + if (session->state == J1939_SESSION_WAITING_ABORT) { + netdev_alert(priv->ndev, "%s: 0x%p: abort rx timeout. Force session deactivation\n", + __func__, session); + + j1939_session_deactivate_activate_next(session); + + } else if (session->skcb.addr.type == J1939_SIMPLE) { + netdev_alert(priv->ndev, "%s: 0x%p: Timeout. Failed to send simple message.\n", + __func__, session); + + /* The message is probably stuck in the CAN controller and can + * be send as soon as CAN bus is in working state again. + */ + session->err = -ETIME; + j1939_session_deactivate(session); + } else { + netdev_alert(priv->ndev, "%s: 0x%p: rx timeout, send abort\n", + __func__, session); + + j1939_session_list_lock(session->priv); + if (session->state >= J1939_SESSION_ACTIVE && + session->state < J1939_SESSION_ACTIVE_MAX) { + j1939_session_get(session); + hrtimer_start(&session->rxtimer, + ms_to_ktime(J1939_XTP_ABORT_TIMEOUT_MS), + HRTIMER_MODE_REL_SOFT); + j1939_session_cancel(session, J1939_XTP_ABORT_TIMEOUT); + } + j1939_session_list_unlock(session->priv); + } + + j1939_session_put(session); + + return HRTIMER_NORESTART; +} + +static bool j1939_xtp_rx_cmd_bad_pgn(struct j1939_session *session, + const struct sk_buff *skb) +{ + const struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + pgn_t pgn = j1939_xtp_ctl_to_pgn(skb->data); + struct j1939_priv *priv = session->priv; + enum j1939_xtp_abort abort = J1939_XTP_NO_ABORT; + u8 cmd = skb->data[0]; + + if (session->skcb.addr.pgn == pgn) + return false; + + switch (cmd) { + case J1939_TP_CMD_BAM: + abort = J1939_XTP_NO_ABORT; + break; + + case J1939_ETP_CMD_RTS: + case J1939_TP_CMD_RTS: /* fall through */ + abort = J1939_XTP_ABORT_BUSY; + break; + + case J1939_ETP_CMD_CTS: + case J1939_TP_CMD_CTS: /* fall through */ + abort = J1939_XTP_ABORT_ECTS_UNXPECTED_PGN; + break; + + case J1939_ETP_CMD_DPO: + abort = J1939_XTP_ABORT_BAD_EDPO_PGN; + break; + + case J1939_ETP_CMD_EOMA: + case J1939_TP_CMD_EOMA: /* fall through */ + abort = J1939_XTP_ABORT_OTHER; + break; + + case J1939_ETP_CMD_ABORT: /* && J1939_TP_CMD_ABORT */ + abort = J1939_XTP_NO_ABORT; + break; + + default: + WARN_ON_ONCE(1); + break; + } + + netdev_warn(priv->ndev, "%s: 0x%p: CMD 0x%02x with PGN 0x%05x for running session with different PGN 0x%05x.\n", + __func__, session, cmd, pgn, session->skcb.addr.pgn); + if (abort != J1939_XTP_NO_ABORT) + j1939_xtp_tx_abort(priv, skcb, true, abort, pgn); + + return true; +} + +static void j1939_xtp_rx_abort_one(struct j1939_priv *priv, struct sk_buff *skb, + bool reverse, bool transmitter) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + struct j1939_session *session; + u8 abort = skb->data[1]; + + session = j1939_session_get_by_addr(priv, &skcb->addr, reverse, + transmitter); + if (!session) + return; + + if (j1939_xtp_rx_cmd_bad_pgn(session, skb)) + goto abort_put; + + netdev_info(priv->ndev, "%s: 0x%p: 0x%05x: (%u) %s\n", __func__, + session, j1939_xtp_ctl_to_pgn(skb->data), abort, + j1939_xtp_abort_to_str(abort)); + + j1939_session_timers_cancel(session); + session->err = j1939_xtp_abort_to_errno(priv, abort); + if (session->sk) + j1939_sk_send_loop_abort(session->sk, session->err); + j1939_session_deactivate_activate_next(session); + +abort_put: + j1939_session_put(session); +} + +/* abort packets may come in 2 directions */ +static void +j1939_xtp_rx_abort(struct j1939_priv *priv, struct sk_buff *skb, + bool transmitter) +{ + j1939_xtp_rx_abort_one(priv, skb, false, transmitter); + j1939_xtp_rx_abort_one(priv, skb, true, transmitter); +} + +static void +j1939_xtp_rx_eoma_one(struct j1939_session *session, struct sk_buff *skb) +{ + if (j1939_xtp_rx_cmd_bad_pgn(session, skb)) + return; + + netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); + + session->pkt.tx_acked = session->pkt.total; + j1939_session_timers_cancel(session); + /* transmitted without problems */ + j1939_session_completed(session); +} + +static void +j1939_xtp_rx_eoma(struct j1939_priv *priv, struct sk_buff *skb, + bool transmitter) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + struct j1939_session *session; + + session = j1939_session_get_by_addr(priv, &skcb->addr, true, + transmitter); + if (!session) + return; + + j1939_xtp_rx_eoma_one(session, skb); + j1939_session_put(session); +} + +static void +j1939_xtp_rx_cts_one(struct j1939_session *session, struct sk_buff *skb) +{ + enum j1939_xtp_abort err = J1939_XTP_ABORT_FAULT; + unsigned int pkt; + const u8 *dat; + + dat = skb->data; + + if (j1939_xtp_rx_cmd_bad_pgn(session, skb)) + return; + + netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); + + if (session->last_cmd == dat[0]) { + err = J1939_XTP_ABORT_DUP_SEQ; + goto out_session_cancel; + } + + if (session->skcb.addr.type == J1939_ETP) + pkt = j1939_etp_ctl_to_packet(dat); + else + pkt = dat[2]; + + if (!pkt) + goto out_session_cancel; + else if (dat[1] > session->pkt.block /* 0xff for etp */) + goto out_session_cancel; + + /* set packet counters only when not CTS(0) */ + session->pkt.tx_acked = pkt - 1; + j1939_session_skb_drop_old(session); + session->pkt.last = session->pkt.tx_acked + dat[1]; + if (session->pkt.last > session->pkt.total) + /* safety measure */ + session->pkt.last = session->pkt.total; + /* TODO: do not set tx here, do it in txtimer */ + session->pkt.tx = session->pkt.tx_acked; + + session->last_cmd = dat[0]; + if (dat[1]) { + j1939_tp_set_rxtimeout(session, 1250); + if (session->transmission) { + if (session->pkt.tx_acked) + j1939_sk_errqueue(session, + J1939_ERRQUEUE_SCHED); + j1939_session_txtimer_cancel(session); + j1939_tp_schedule_txtimer(session, 0); + } + } else { + /* CTS(0) */ + j1939_tp_set_rxtimeout(session, 550); + } + return; + + out_session_cancel: + j1939_session_timers_cancel(session); + j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS); + j1939_session_cancel(session, err); +} + +static void +j1939_xtp_rx_cts(struct j1939_priv *priv, struct sk_buff *skb, bool transmitter) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + struct j1939_session *session; + + session = j1939_session_get_by_addr(priv, &skcb->addr, true, + transmitter); + if (!session) + return; + j1939_xtp_rx_cts_one(session, skb); + j1939_session_put(session); +} + +static struct j1939_session *j1939_session_new(struct j1939_priv *priv, + struct sk_buff *skb, size_t size) +{ + struct j1939_session *session; + struct j1939_sk_buff_cb *skcb; + + session = kzalloc(sizeof(*session), gfp_any()); + if (!session) + return NULL; + + INIT_LIST_HEAD(&session->active_session_list_entry); + INIT_LIST_HEAD(&session->sk_session_queue_entry); + kref_init(&session->kref); + + j1939_priv_get(priv); + session->priv = priv; + session->total_message_size = size; + session->state = J1939_SESSION_NEW; + + skb_queue_head_init(&session->skb_queue); + skb_queue_tail(&session->skb_queue, skb); + + skcb = j1939_skb_to_cb(skb); + memcpy(&session->skcb, skcb, sizeof(session->skcb)); + + hrtimer_init(&session->txtimer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL_SOFT); + session->txtimer.function = j1939_tp_txtimer; + hrtimer_init(&session->rxtimer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL_SOFT); + session->rxtimer.function = j1939_tp_rxtimer; + + netdev_dbg(priv->ndev, "%s: 0x%p: sa: %02x, da: %02x\n", + __func__, session, skcb->addr.sa, skcb->addr.da); + + return session; +} + +static struct +j1939_session *j1939_session_fresh_new(struct j1939_priv *priv, + int size, + const struct j1939_sk_buff_cb *rel_skcb) +{ + struct sk_buff *skb; + struct j1939_sk_buff_cb *skcb; + struct j1939_session *session; + + skb = alloc_skb(size + sizeof(struct can_skb_priv), GFP_ATOMIC); + if (unlikely(!skb)) + return NULL; + + skb->dev = priv->ndev; + can_skb_reserve(skb); + can_skb_prv(skb)->ifindex = priv->ndev->ifindex; + skcb = j1939_skb_to_cb(skb); + memcpy(skcb, rel_skcb, sizeof(*skcb)); + + session = j1939_session_new(priv, skb, skb->len); + if (!session) { + kfree_skb(skb); + return NULL; + } + + /* alloc data area */ + skb_put(skb, size); + /* skb is recounted in j1939_session_new() */ + return session; +} + +int j1939_session_activate(struct j1939_session *session) +{ + struct j1939_priv *priv = session->priv; + struct j1939_session *active = NULL; + int ret = 0; + + j1939_session_list_lock(priv); + if (session->skcb.addr.type != J1939_SIMPLE) + active = j1939_session_get_by_addr_locked(priv, + &priv->active_session_list, + &session->skcb.addr, false, + session->transmission); + if (active) { + j1939_session_put(active); + ret = -EAGAIN; + } else { + WARN_ON_ONCE(session->state != J1939_SESSION_NEW); + list_add_tail(&session->active_session_list_entry, + &priv->active_session_list); + j1939_session_get(session); + session->state = J1939_SESSION_ACTIVE; + + netdev_dbg(session->priv->ndev, "%s: 0x%p\n", + __func__, session); + } + j1939_session_list_unlock(priv); + + return ret; +} + +static struct +j1939_session *j1939_xtp_rx_rts_session_new(struct j1939_priv *priv, + struct sk_buff *skb) +{ + enum j1939_xtp_abort abort = J1939_XTP_NO_ABORT; + struct j1939_sk_buff_cb skcb = *j1939_skb_to_cb(skb); + struct j1939_session *session; + const u8 *dat; + pgn_t pgn; + int len; + + netdev_dbg(priv->ndev, "%s\n", __func__); + + dat = skb->data; + pgn = j1939_xtp_ctl_to_pgn(dat); + skcb.addr.pgn = pgn; + + if (!j1939_sk_recv_match(priv, &skcb)) + return NULL; + + if (skcb.addr.type == J1939_ETP) { + len = j1939_etp_ctl_to_size(dat); + if (len > J1939_MAX_ETP_PACKET_SIZE) + abort = J1939_XTP_ABORT_FAULT; + else if (len > priv->tp_max_packet_size) + abort = J1939_XTP_ABORT_RESOURCE; + else if (len <= J1939_MAX_TP_PACKET_SIZE) + abort = J1939_XTP_ABORT_FAULT; + } else { + len = j1939_tp_ctl_to_size(dat); + if (len > J1939_MAX_TP_PACKET_SIZE) + abort = J1939_XTP_ABORT_FAULT; + else if (len > priv->tp_max_packet_size) + abort = J1939_XTP_ABORT_RESOURCE; + } + + if (abort != J1939_XTP_NO_ABORT) { + j1939_xtp_tx_abort(priv, &skcb, true, abort, pgn); + return NULL; + } + + session = j1939_session_fresh_new(priv, len, &skcb); + if (!session) { + j1939_xtp_tx_abort(priv, &skcb, true, + J1939_XTP_ABORT_RESOURCE, pgn); + return NULL; + } + + /* initialize the control buffer: plain copy */ + session->pkt.total = (len + 6) / 7; + session->pkt.block = 0xff; + if (skcb.addr.type != J1939_ETP) { + if (dat[3] != session->pkt.total) + netdev_alert(priv->ndev, "%s: 0x%p: strange total, %u != %u\n", + __func__, session, session->pkt.total, + dat[3]); + session->pkt.total = dat[3]; + session->pkt.block = min(dat[3], dat[4]); + } + + session->pkt.rx = 0; + session->pkt.tx = 0; + + WARN_ON_ONCE(j1939_session_activate(session)); + + return session; +} + +static int j1939_xtp_rx_rts_session_active(struct j1939_session *session, + struct sk_buff *skb) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + struct j1939_priv *priv = session->priv; + + if (!session->transmission) { + if (j1939_xtp_rx_cmd_bad_pgn(session, skb)) + return -EBUSY; + + /* RTS on active session */ + j1939_session_timers_cancel(session); + j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS); + j1939_session_cancel(session, J1939_XTP_ABORT_BUSY); + } + + if (session->last_cmd != 0) { + /* we received a second rts on the same connection */ + netdev_alert(priv->ndev, "%s: 0x%p: connection exists (%02x %02x). last cmd: %x\n", + __func__, session, skcb->addr.sa, skcb->addr.da, + session->last_cmd); + + j1939_session_timers_cancel(session); + j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS); + j1939_session_cancel(session, J1939_XTP_ABORT_BUSY); + + return -EBUSY; + } + + if (session->skcb.addr.sa != skcb->addr.sa || + session->skcb.addr.da != skcb->addr.da) + netdev_warn(priv->ndev, "%s: 0x%p: session->skcb.addr.sa=0x%02x skcb->addr.sa=0x%02x session->skcb.addr.da=0x%02x skcb->addr.da=0x%02x\n", + __func__, session, + session->skcb.addr.sa, skcb->addr.sa, + session->skcb.addr.da, skcb->addr.da); + /* make sure 'sa' & 'da' are correct ! + * They may be 'not filled in yet' for sending + * skb's, since they did not pass the Address Claim ever. + */ + session->skcb.addr.sa = skcb->addr.sa; + session->skcb.addr.da = skcb->addr.da; + + netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); + + return 0; +} + +static void j1939_xtp_rx_rts(struct j1939_priv *priv, struct sk_buff *skb, + bool transmitter) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + struct j1939_session *session; + u8 cmd = skb->data[0]; + + session = j1939_session_get_by_addr(priv, &skcb->addr, false, + transmitter); + + if (!session) { + if (transmitter) { + /* If we're the transmitter and this function is called, + * we received our own RTS. A session has already been + * created. + * + * For some reasons however it might have been destroyed + * already. So don't create a new one here (using + * "j1939_xtp_rx_rts_session_new()") as this will be a + * receiver session. + * + * The reasons the session is already destroyed might + * be: + * - user space closed socket was and the session was + * aborted + * - session was aborted due to external abort message + */ + return; + } + session = j1939_xtp_rx_rts_session_new(priv, skb); + if (!session) + return; + } else { + if (j1939_xtp_rx_rts_session_active(session, skb)) { + j1939_session_put(session); + return; + } + } + session->last_cmd = cmd; + + j1939_tp_set_rxtimeout(session, 1250); + + if (cmd != J1939_TP_CMD_BAM && !session->transmission) { + j1939_session_txtimer_cancel(session); + j1939_tp_schedule_txtimer(session, 0); + } + + j1939_session_put(session); +} + +static void j1939_xtp_rx_dpo_one(struct j1939_session *session, + struct sk_buff *skb) +{ + const u8 *dat = skb->data; + + if (j1939_xtp_rx_cmd_bad_pgn(session, skb)) + return; + + netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); + + /* transmitted without problems */ + session->pkt.dpo = j1939_etp_ctl_to_packet(skb->data); + session->last_cmd = dat[0]; + j1939_tp_set_rxtimeout(session, 750); +} + +static void j1939_xtp_rx_dpo(struct j1939_priv *priv, struct sk_buff *skb, + bool transmitter) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + struct j1939_session *session; + + session = j1939_session_get_by_addr(priv, &skcb->addr, false, + transmitter); + if (!session) { + netdev_info(priv->ndev, + "%s: no connection found\n", __func__); + return; + } + + j1939_xtp_rx_dpo_one(session, skb); + j1939_session_put(session); +} + +static void j1939_xtp_rx_dat_one(struct j1939_session *session, + struct sk_buff *skb) +{ + struct j1939_priv *priv = session->priv; + struct j1939_sk_buff_cb *skcb; + struct sk_buff *se_skb; + const u8 *dat; + u8 *tpdat; + int offset; + int nbytes; + bool final = false; + bool do_cts_eoma = false; + int packet; + + skcb = j1939_skb_to_cb(skb); + dat = skb->data; + if (skb->len <= 1) + /* makes no sense */ + goto out_session_cancel; + + switch (session->last_cmd) { + case 0xff: + break; + case J1939_ETP_CMD_DPO: + if (skcb->addr.type == J1939_ETP) + break; + /* fall through */ + case J1939_TP_CMD_BAM: /* fall through */ + case J1939_TP_CMD_CTS: /* fall through */ + if (skcb->addr.type != J1939_ETP) + break; + /* fall through */ + default: + netdev_info(priv->ndev, "%s: 0x%p: last %02x\n", __func__, + session, session->last_cmd); + goto out_session_cancel; + } + + packet = (dat[0] - 1 + session->pkt.dpo); + if (packet > session->pkt.total || + (session->pkt.rx + 1) > session->pkt.total) { + netdev_info(priv->ndev, "%s: 0x%p: should have been completed\n", + __func__, session); + goto out_session_cancel; + } + se_skb = j1939_session_skb_find(session); + if (!se_skb) { + netdev_warn(priv->ndev, "%s: 0x%p: no skb found\n", __func__, + session); + goto out_session_cancel; + } + + skcb = j1939_skb_to_cb(se_skb); + offset = packet * 7 - skcb->offset; + nbytes = se_skb->len - offset; + if (nbytes > 7) + nbytes = 7; + if (nbytes <= 0 || (nbytes + 1) > skb->len) { + netdev_info(priv->ndev, "%s: 0x%p: nbytes %i, len %i\n", + __func__, session, nbytes, skb->len); + goto out_session_cancel; + } + + tpdat = se_skb->data; + memcpy(&tpdat[offset], &dat[1], nbytes); + if (packet == session->pkt.rx) + session->pkt.rx++; + + if (skcb->addr.type != J1939_ETP && + j1939_cb_is_broadcast(&session->skcb)) { + if (session->pkt.rx >= session->pkt.total) + final = true; + } else { + /* never final, an EOMA must follow */ + if (session->pkt.rx >= session->pkt.last) + do_cts_eoma = true; + } + + if (final) { + j1939_session_completed(session); + } else if (do_cts_eoma) { + j1939_tp_set_rxtimeout(session, 1250); + if (!session->transmission) + j1939_tp_schedule_txtimer(session, 0); + } else { + j1939_tp_set_rxtimeout(session, 250); + } + session->last_cmd = 0xff; + j1939_session_put(session); + + return; + + out_session_cancel: + j1939_session_timers_cancel(session); + j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS); + j1939_session_cancel(session, J1939_XTP_ABORT_FAULT); + j1939_session_put(session); +} + +static void j1939_xtp_rx_dat(struct j1939_priv *priv, struct sk_buff *skb) +{ + struct j1939_sk_buff_cb *skcb; + struct j1939_session *session; + + skcb = j1939_skb_to_cb(skb); + + if (j1939_tp_im_transmitter(skcb)) { + session = j1939_session_get_by_addr(priv, &skcb->addr, false, + true); + if (!session) + netdev_info(priv->ndev, "%s: no tx connection found\n", + __func__); + else + j1939_xtp_rx_dat_one(session, skb); + } + + if (j1939_tp_im_receiver(skcb)) { + session = j1939_session_get_by_addr(priv, &skcb->addr, false, + false); + if (!session) + netdev_info(priv->ndev, "%s: no rx connection found\n", + __func__); + else + j1939_xtp_rx_dat_one(session, skb); + } +} + +/* j1939 main intf */ +struct j1939_session *j1939_tp_send(struct j1939_priv *priv, + struct sk_buff *skb, size_t size) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + struct j1939_session *session; + int ret; + + if (skcb->addr.pgn == J1939_TP_PGN_DAT || + skcb->addr.pgn == J1939_TP_PGN_CTL || + skcb->addr.pgn == J1939_ETP_PGN_DAT || + skcb->addr.pgn == J1939_ETP_PGN_CTL) + /* avoid conflict */ + return ERR_PTR(-EDOM); + + if (size > priv->tp_max_packet_size) + return ERR_PTR(-EMSGSIZE); + + if (size <= 8) + skcb->addr.type = J1939_SIMPLE; + else if (size > J1939_MAX_TP_PACKET_SIZE) + skcb->addr.type = J1939_ETP; + else + skcb->addr.type = J1939_TP; + + if (skcb->addr.type == J1939_ETP && + j1939_cb_is_broadcast(skcb)) + return ERR_PTR(-EDESTADDRREQ); + + /* fill in addresses from names */ + ret = j1939_ac_fixup(priv, skb); + if (unlikely(ret)) + return ERR_PTR(ret); + + /* fix DST flags, it may be used there soon */ + if (j1939_address_is_unicast(skcb->addr.da) && + priv->ents[skcb->addr.da].nusers) + skcb->flags |= J1939_ECU_LOCAL_DST; + + /* src is always local, I'm sending ... */ + skcb->flags |= J1939_ECU_LOCAL_SRC; + + /* prepare new session */ + session = j1939_session_new(priv, skb, size); + if (!session) + return ERR_PTR(-ENOMEM); + + /* skb is recounted in j1939_session_new() */ + session->sk = skb->sk; + session->transmission = true; + session->pkt.total = (size + 6) / 7; + session->pkt.block = skcb->addr.type == J1939_ETP ? 255 : + min(j1939_tp_block ?: 255, session->pkt.total); + + if (j1939_cb_is_broadcast(&session->skcb)) + /* set the end-packet for broadcast */ + session->pkt.last = session->pkt.total; + + skcb->tskey = session->sk->sk_tskey++; + session->tskey = skcb->tskey; + + return session; +} + +static void j1939_tp_cmd_recv(struct j1939_priv *priv, struct sk_buff *skb) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + int extd = J1939_TP; + u8 cmd = skb->data[0]; + + switch (cmd) { + case J1939_ETP_CMD_RTS: + extd = J1939_ETP; + /* fall through */ + case J1939_TP_CMD_BAM: /* fall through */ + case J1939_TP_CMD_RTS: /* fall through */ + if (skcb->addr.type != extd) + return; + + if (cmd == J1939_TP_CMD_RTS && j1939_cb_is_broadcast(skcb)) { + netdev_alert(priv->ndev, "%s: rts without destination (%02x)\n", + __func__, skcb->addr.sa); + return; + } + + if (j1939_tp_im_transmitter(skcb)) + j1939_xtp_rx_rts(priv, skb, true); + + if (j1939_tp_im_receiver(skcb)) + j1939_xtp_rx_rts(priv, skb, false); + + break; + + case J1939_ETP_CMD_CTS: + extd = J1939_ETP; + /* fall through */ + case J1939_TP_CMD_CTS: + if (skcb->addr.type != extd) + return; + + if (j1939_tp_im_transmitter(skcb)) + j1939_xtp_rx_cts(priv, skb, false); + + if (j1939_tp_im_receiver(skcb)) + j1939_xtp_rx_cts(priv, skb, true); + + break; + + case J1939_ETP_CMD_DPO: + if (skcb->addr.type != J1939_ETP) + return; + + if (j1939_tp_im_transmitter(skcb)) + j1939_xtp_rx_dpo(priv, skb, true); + + if (j1939_tp_im_receiver(skcb)) + j1939_xtp_rx_dpo(priv, skb, false); + + break; + + case J1939_ETP_CMD_EOMA: + extd = J1939_ETP; + /* fall through */ + case J1939_TP_CMD_EOMA: + if (skcb->addr.type != extd) + return; + + if (j1939_tp_im_transmitter(skcb)) + j1939_xtp_rx_eoma(priv, skb, false); + + if (j1939_tp_im_receiver(skcb)) + j1939_xtp_rx_eoma(priv, skb, true); + + break; + + case J1939_ETP_CMD_ABORT: /* && J1939_TP_CMD_ABORT */ + if (j1939_tp_im_transmitter(skcb)) + j1939_xtp_rx_abort(priv, skb, true); + + if (j1939_tp_im_receiver(skcb)) + j1939_xtp_rx_abort(priv, skb, false); + + break; + default: + return; + } +} + +int j1939_tp_recv(struct j1939_priv *priv, struct sk_buff *skb) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + + if (!j1939_tp_im_involved_anydir(skcb)) + return 0; + + switch (skcb->addr.pgn) { + case J1939_ETP_PGN_DAT: + skcb->addr.type = J1939_ETP; + /* fall through */ + case J1939_TP_PGN_DAT: + j1939_xtp_rx_dat(priv, skb); + break; + + case J1939_ETP_PGN_CTL: + skcb->addr.type = J1939_ETP; + /* fall through */ + case J1939_TP_PGN_CTL: + if (skb->len < 8) + return 0; /* Don't care. Nothing to extract here */ + + j1939_tp_cmd_recv(priv, skb); + break; + default: + return 0; /* no problem */ + } + return 1; /* "I processed the message" */ +} + +void j1939_simple_recv(struct j1939_priv *priv, struct sk_buff *skb) +{ + struct j1939_session *session; + + if (!skb->sk) + return; + + j1939_session_list_lock(priv); + session = j1939_session_get_simple(priv, skb); + j1939_session_list_unlock(priv); + if (!session) { + netdev_warn(priv->ndev, + "%s: Received already invalidated message\n", + __func__); + return; + } + + j1939_session_timers_cancel(session); + j1939_session_deactivate(session); + j1939_session_put(session); +} + +int j1939_cancel_active_session(struct j1939_priv *priv, struct sock *sk) +{ + struct j1939_session *session, *saved; + + netdev_dbg(priv->ndev, "%s, sk: %p\n", __func__, sk); + j1939_session_list_lock(priv); + list_for_each_entry_safe(session, saved, + &priv->active_session_list, + active_session_list_entry) { + if (!sk || sk == session->sk) { + j1939_session_timers_cancel(session); + session->err = ESHUTDOWN; + j1939_session_deactivate_locked(session); + } + } + j1939_session_list_unlock(priv); + return NOTIFY_DONE; +} + +void j1939_tp_init(struct j1939_priv *priv) +{ + spin_lock_init(&priv->active_session_list_lock); + INIT_LIST_HEAD(&priv->active_session_list); + priv->tp_max_packet_size = J1939_MAX_ETP_PACKET_SIZE; +} diff --git a/net/can/proc.c b/net/can/proc.c index 70fea17bb04c..e6881bfc3ed1 100644 --- a/net/can/proc.c +++ b/net/can/proc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) /* * proc.c - procfs support for Protocol family CAN core module * @@ -44,6 +45,7 @@ #include <linux/list.h> #include <linux/rcupdate.h> #include <linux/if_arp.h> +#include <linux/can/can-ml.h> #include <linux/can/core.h> #include "af_can.h" @@ -77,21 +79,21 @@ static const char rx_list_name[][8] = { static void can_init_stats(struct net *net) { - struct s_stats *can_stats = net->can.can_stats; - struct s_pstats *can_pstats = net->can.can_pstats; + struct can_pkg_stats *pkg_stats = net->can.pkg_stats; + struct can_rcv_lists_stats *rcv_lists_stats = net->can.rcv_lists_stats; /* * This memset function is called from a timer context (when * can_stattimer is active which is the default) OR in a process * context (reading the proc_fs when can_stattimer is disabled). */ - memset(can_stats, 0, sizeof(struct s_stats)); - can_stats->jiffies_init = jiffies; + memset(pkg_stats, 0, sizeof(struct can_pkg_stats)); + pkg_stats->jiffies_init = jiffies; - can_pstats->stats_reset++; + rcv_lists_stats->stats_reset++; if (user_reset) { user_reset = 0; - can_pstats->user_reset++; + rcv_lists_stats->user_reset++; } } @@ -117,8 +119,8 @@ static unsigned long calc_rate(unsigned long oldjif, unsigned long newjif, void can_stat_update(struct timer_list *t) { - struct net *net = from_timer(net, t, can.can_stattimer); - struct s_stats *can_stats = net->can.can_stats; + struct net *net = from_timer(net, t, can.stattimer); + struct can_pkg_stats *pkg_stats = net->can.pkg_stats; unsigned long j = jiffies; /* snapshot */ /* restart counting in timer context on user request */ @@ -126,57 +128,57 @@ void can_stat_update(struct timer_list *t) can_init_stats(net); /* restart counting on jiffies overflow */ - if (j < can_stats->jiffies_init) + if (j < pkg_stats->jiffies_init) can_init_stats(net); /* prevent overflow in calc_rate() */ - if (can_stats->rx_frames > (ULONG_MAX / HZ)) + if (pkg_stats->rx_frames > (ULONG_MAX / HZ)) can_init_stats(net); /* prevent overflow in calc_rate() */ - if (can_stats->tx_frames > (ULONG_MAX / HZ)) + if (pkg_stats->tx_frames > (ULONG_MAX / HZ)) can_init_stats(net); /* matches overflow - very improbable */ - if (can_stats->matches > (ULONG_MAX / 100)) + if (pkg_stats->matches > (ULONG_MAX / 100)) can_init_stats(net); /* calc total values */ - if (can_stats->rx_frames) - can_stats->total_rx_match_ratio = (can_stats->matches * 100) / - can_stats->rx_frames; + if (pkg_stats->rx_frames) + pkg_stats->total_rx_match_ratio = (pkg_stats->matches * 100) / + pkg_stats->rx_frames; - can_stats->total_tx_rate = calc_rate(can_stats->jiffies_init, j, - can_stats->tx_frames); - can_stats->total_rx_rate = calc_rate(can_stats->jiffies_init, j, - can_stats->rx_frames); + pkg_stats->total_tx_rate = calc_rate(pkg_stats->jiffies_init, j, + pkg_stats->tx_frames); + pkg_stats->total_rx_rate = calc_rate(pkg_stats->jiffies_init, j, + pkg_stats->rx_frames); /* calc current values */ - if (can_stats->rx_frames_delta) - can_stats->current_rx_match_ratio = - (can_stats->matches_delta * 100) / - can_stats->rx_frames_delta; + if (pkg_stats->rx_frames_delta) + pkg_stats->current_rx_match_ratio = + (pkg_stats->matches_delta * 100) / + pkg_stats->rx_frames_delta; - can_stats->current_tx_rate = calc_rate(0, HZ, can_stats->tx_frames_delta); - can_stats->current_rx_rate = calc_rate(0, HZ, can_stats->rx_frames_delta); + pkg_stats->current_tx_rate = calc_rate(0, HZ, pkg_stats->tx_frames_delta); + pkg_stats->current_rx_rate = calc_rate(0, HZ, pkg_stats->rx_frames_delta); /* check / update maximum values */ - if (can_stats->max_tx_rate < can_stats->current_tx_rate) - can_stats->max_tx_rate = can_stats->current_tx_rate; + if (pkg_stats->max_tx_rate < pkg_stats->current_tx_rate) + pkg_stats->max_tx_rate = pkg_stats->current_tx_rate; - if (can_stats->max_rx_rate < can_stats->current_rx_rate) - can_stats->max_rx_rate = can_stats->current_rx_rate; + if (pkg_stats->max_rx_rate < pkg_stats->current_rx_rate) + pkg_stats->max_rx_rate = pkg_stats->current_rx_rate; - if (can_stats->max_rx_match_ratio < can_stats->current_rx_match_ratio) - can_stats->max_rx_match_ratio = can_stats->current_rx_match_ratio; + if (pkg_stats->max_rx_match_ratio < pkg_stats->current_rx_match_ratio) + pkg_stats->max_rx_match_ratio = pkg_stats->current_rx_match_ratio; /* clear values for 'current rate' calculation */ - can_stats->tx_frames_delta = 0; - can_stats->rx_frames_delta = 0; - can_stats->matches_delta = 0; + pkg_stats->tx_frames_delta = 0; + pkg_stats->rx_frames_delta = 0; + pkg_stats->matches_delta = 0; /* restart timer (one second) */ - mod_timer(&net->can.can_stattimer, round_jiffies(jiffies + HZ)); + mod_timer(&net->can.stattimer, round_jiffies(jiffies + HZ)); } /* @@ -211,60 +213,60 @@ static void can_print_recv_banner(struct seq_file *m) static int can_stats_proc_show(struct seq_file *m, void *v) { struct net *net = m->private; - struct s_stats *can_stats = net->can.can_stats; - struct s_pstats *can_pstats = net->can.can_pstats; + struct can_pkg_stats *pkg_stats = net->can.pkg_stats; + struct can_rcv_lists_stats *rcv_lists_stats = net->can.rcv_lists_stats; seq_putc(m, '\n'); - seq_printf(m, " %8ld transmitted frames (TXF)\n", can_stats->tx_frames); - seq_printf(m, " %8ld received frames (RXF)\n", can_stats->rx_frames); - seq_printf(m, " %8ld matched frames (RXMF)\n", can_stats->matches); + seq_printf(m, " %8ld transmitted frames (TXF)\n", pkg_stats->tx_frames); + seq_printf(m, " %8ld received frames (RXF)\n", pkg_stats->rx_frames); + seq_printf(m, " %8ld matched frames (RXMF)\n", pkg_stats->matches); seq_putc(m, '\n'); - if (net->can.can_stattimer.function == can_stat_update) { + if (net->can.stattimer.function == can_stat_update) { seq_printf(m, " %8ld %% total match ratio (RXMR)\n", - can_stats->total_rx_match_ratio); + pkg_stats->total_rx_match_ratio); seq_printf(m, " %8ld frames/s total tx rate (TXR)\n", - can_stats->total_tx_rate); + pkg_stats->total_tx_rate); seq_printf(m, " %8ld frames/s total rx rate (RXR)\n", - can_stats->total_rx_rate); + pkg_stats->total_rx_rate); seq_putc(m, '\n'); seq_printf(m, " %8ld %% current match ratio (CRXMR)\n", - can_stats->current_rx_match_ratio); + pkg_stats->current_rx_match_ratio); seq_printf(m, " %8ld frames/s current tx rate (CTXR)\n", - can_stats->current_tx_rate); + pkg_stats->current_tx_rate); seq_printf(m, " %8ld frames/s current rx rate (CRXR)\n", - can_stats->current_rx_rate); + pkg_stats->current_rx_rate); seq_putc(m, '\n'); seq_printf(m, " %8ld %% max match ratio (MRXMR)\n", - can_stats->max_rx_match_ratio); + pkg_stats->max_rx_match_ratio); seq_printf(m, " %8ld frames/s max tx rate (MTXR)\n", - can_stats->max_tx_rate); + pkg_stats->max_tx_rate); seq_printf(m, " %8ld frames/s max rx rate (MRXR)\n", - can_stats->max_rx_rate); + pkg_stats->max_rx_rate); seq_putc(m, '\n'); } seq_printf(m, " %8ld current receive list entries (CRCV)\n", - can_pstats->rcv_entries); + rcv_lists_stats->rcv_entries); seq_printf(m, " %8ld maximum receive list entries (MRCV)\n", - can_pstats->rcv_entries_max); + rcv_lists_stats->rcv_entries_max); - if (can_pstats->stats_reset) + if (rcv_lists_stats->stats_reset) seq_printf(m, "\n %8ld statistic resets (STR)\n", - can_pstats->stats_reset); + rcv_lists_stats->stats_reset); - if (can_pstats->user_reset) + if (rcv_lists_stats->user_reset) seq_printf(m, " %8ld user statistic resets (USTR)\n", - can_pstats->user_reset); + rcv_lists_stats->user_reset); seq_putc(m, '\n'); return 0; @@ -273,20 +275,20 @@ static int can_stats_proc_show(struct seq_file *m, void *v) static int can_reset_stats_proc_show(struct seq_file *m, void *v) { struct net *net = m->private; - struct s_pstats *can_pstats = net->can.can_pstats; - struct s_stats *can_stats = net->can.can_stats; + struct can_rcv_lists_stats *rcv_lists_stats = net->can.rcv_lists_stats; + struct can_pkg_stats *pkg_stats = net->can.pkg_stats; user_reset = 1; - if (net->can.can_stattimer.function == can_stat_update) { + if (net->can.stattimer.function == can_stat_update) { seq_printf(m, "Scheduled statistic reset #%ld.\n", - can_pstats->stats_reset + 1); + rcv_lists_stats->stats_reset + 1); } else { - if (can_stats->jiffies_init != jiffies) + if (pkg_stats->jiffies_init != jiffies) can_init_stats(net); seq_printf(m, "Performed statistic reset #%ld.\n", - can_pstats->stats_reset); + rcv_lists_stats->stats_reset); } return 0; } @@ -299,11 +301,11 @@ static int can_version_proc_show(struct seq_file *m, void *v) static inline void can_rcvlist_proc_show_one(struct seq_file *m, int idx, struct net_device *dev, - struct can_dev_rcv_lists *d) + struct can_dev_rcv_lists *dev_rcv_lists) { - if (!hlist_empty(&d->rx[idx])) { + if (!hlist_empty(&dev_rcv_lists->rx[idx])) { can_print_recv_banner(m); - can_print_rcvlist(m, &d->rx[idx], dev); + can_print_rcvlist(m, &dev_rcv_lists->rx[idx], dev); } else seq_printf(m, " (%s: no entry)\n", DNAME(dev)); @@ -314,7 +316,7 @@ static int can_rcvlist_proc_show(struct seq_file *m, void *v) /* double cast to prevent GCC warning */ int idx = (int)(long)PDE_DATA(m->file->f_inode); struct net_device *dev; - struct can_dev_rcv_lists *d; + struct can_dev_rcv_lists *dev_rcv_lists; struct net *net = m->private; seq_printf(m, "\nreceive list '%s':\n", rx_list_name[idx]); @@ -322,8 +324,8 @@ static int can_rcvlist_proc_show(struct seq_file *m, void *v) rcu_read_lock(); /* receive list for 'all' CAN devices (dev == NULL) */ - d = net->can.can_rx_alldev_list; - can_rcvlist_proc_show_one(m, idx, NULL, d); + dev_rcv_lists = net->can.rx_alldev_list; + can_rcvlist_proc_show_one(m, idx, NULL, dev_rcv_lists); /* receive list for registered CAN devices */ for_each_netdev_rcu(net, dev) { @@ -365,7 +367,7 @@ static inline void can_rcvlist_proc_show_array(struct seq_file *m, static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v) { struct net_device *dev; - struct can_dev_rcv_lists *d; + struct can_dev_rcv_lists *dev_rcv_lists; struct net *net = m->private; /* RX_SFF */ @@ -374,15 +376,16 @@ static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v) rcu_read_lock(); /* sff receive list for 'all' CAN devices (dev == NULL) */ - d = net->can.can_rx_alldev_list; - can_rcvlist_proc_show_array(m, NULL, d->rx_sff, ARRAY_SIZE(d->rx_sff)); + dev_rcv_lists = net->can.rx_alldev_list; + can_rcvlist_proc_show_array(m, NULL, dev_rcv_lists->rx_sff, + ARRAY_SIZE(dev_rcv_lists->rx_sff)); /* sff receive list for registered CAN devices */ for_each_netdev_rcu(net, dev) { if (dev->type == ARPHRD_CAN && dev->ml_priv) { - d = dev->ml_priv; - can_rcvlist_proc_show_array(m, dev, d->rx_sff, - ARRAY_SIZE(d->rx_sff)); + dev_rcv_lists = dev->ml_priv; + can_rcvlist_proc_show_array(m, dev, dev_rcv_lists->rx_sff, + ARRAY_SIZE(dev_rcv_lists->rx_sff)); } } @@ -395,7 +398,7 @@ static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v) static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v) { struct net_device *dev; - struct can_dev_rcv_lists *d; + struct can_dev_rcv_lists *dev_rcv_lists; struct net *net = m->private; /* RX_EFF */ @@ -404,15 +407,16 @@ static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v) rcu_read_lock(); /* eff receive list for 'all' CAN devices (dev == NULL) */ - d = net->can.can_rx_alldev_list; - can_rcvlist_proc_show_array(m, NULL, d->rx_eff, ARRAY_SIZE(d->rx_eff)); + dev_rcv_lists = net->can.rx_alldev_list; + can_rcvlist_proc_show_array(m, NULL, dev_rcv_lists->rx_eff, + ARRAY_SIZE(dev_rcv_lists->rx_eff)); /* eff receive list for registered CAN devices */ for_each_netdev_rcu(net, dev) { if (dev->type == ARPHRD_CAN && dev->ml_priv) { - d = dev->ml_priv; - can_rcvlist_proc_show_array(m, dev, d->rx_eff, - ARRAY_SIZE(d->rx_eff)); + dev_rcv_lists = dev->ml_priv; + can_rcvlist_proc_show_array(m, dev, dev_rcv_lists->rx_eff, + ARRAY_SIZE(dev_rcv_lists->rx_eff)); } } diff --git a/net/can/raw.c b/net/can/raw.c index afcbff063a67..59c039d73c6d 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -1,5 +1,5 @@ -/* - * raw.c - Raw sockets for protocol family CAN +// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) +/* raw.c - Raw sockets for protocol family CAN * * Copyright (c) 2002-2007 Volkswagen Group Electronic Research * All rights reserved. @@ -64,8 +64,7 @@ MODULE_ALIAS("can-proto-1"); #define MASK_ALL 0 -/* - * A raw socket has a list of can_filters attached to it, each receiving +/* A raw socket has a list of can_filters attached to it, each receiving * the CAN frames matching that filter. If the filter list is empty, * no CAN frames will be received by the socket. The default after * opening the socket, is to have one filter which receives all frames. @@ -96,8 +95,7 @@ struct raw_sock { struct uniqframe __percpu *uniq; }; -/* - * Return pointer to store the extra msg flags for raw_recvmsg(). +/* Return pointer to store the extra msg flags for raw_recvmsg(). * We use the space of one unsigned int beyond the 'struct sockaddr_can' * in skb->cb. */ @@ -156,8 +154,7 @@ static void raw_rcv(struct sk_buff *oskb, void *data) if (!skb) return; - /* - * Put the datagram to the queue so that raw_recvmsg() can + /* Put the datagram to the queue so that raw_recvmsg() can * get it from there. We need to pass the interface index to * raw_recvmsg(). We pass a whole struct sockaddr_can in skb->cb * containing the interface index. @@ -283,7 +280,6 @@ static int raw_notifier(struct notifier_block *nb, return NOTIFY_DONE; switch (msg) { - case NETDEV_UNREGISTER: lock_sock(sk); /* remove current filters & unregister */ @@ -369,8 +365,9 @@ static int raw_release(struct socket *sock) raw_disable_allfilters(dev_net(dev), dev, sk); dev_put(dev); } - } else + } else { raw_disable_allfilters(sock_net(sk), NULL, sk); + } } if (ro->count > 1) @@ -399,7 +396,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len) int err = 0; int notify_enetdown = 0; - if (len < sizeof(*addr)) + if (len < CAN_REQUIRED_SIZE(*addr, can_ifindex)) return -EINVAL; if (addr->can_family != AF_CAN) return -EINVAL; @@ -450,8 +447,9 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len) dev, sk); dev_put(dev); } - } else + } else { raw_disable_allfilters(sock_net(sk), NULL, sk); + } } ro->ifindex = ifindex; ro->bound = 1; @@ -502,7 +500,6 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, return -EINVAL; switch (optname) { - case CAN_RAW_FILTER: if (optlen % sizeof(struct can_filter) != 0) return -EINVAL; @@ -665,17 +662,18 @@ static int raw_getsockopt(struct socket *sock, int level, int optname, return -EINVAL; switch (optname) { - case CAN_RAW_FILTER: lock_sock(sk); if (ro->count > 0) { int fsize = ro->count * sizeof(struct can_filter); + if (len > fsize) len = fsize; if (copy_to_user(optval, ro->filter, len)) err = -EFAULT; - } else + } else { len = 0; + } release_sock(sk); if (!err) @@ -735,15 +733,16 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) if (msg->msg_name) { DECLARE_SOCKADDR(struct sockaddr_can *, addr, msg->msg_name); - if (msg->msg_namelen < sizeof(*addr)) + if (msg->msg_namelen < CAN_REQUIRED_SIZE(*addr, can_ifindex)) return -EINVAL; if (addr->can_family != AF_CAN) return -EINVAL; ifindex = addr->can_ifindex; - } else + } else { ifindex = ro->ifindex; + } dev = dev_get_by_index(sock_net(sk), ifindex); if (!dev) @@ -836,6 +835,13 @@ static int raw_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, return size; } +static int raw_sock_no_ioctlcmd(struct socket *sock, unsigned int cmd, + unsigned long arg) +{ + /* no ioctls for socket layer -> hand it down to NIC layer */ + return -ENOIOCTLCMD; +} + static const struct proto_ops raw_ops = { .family = PF_CAN, .release = raw_release, @@ -845,7 +851,7 @@ static const struct proto_ops raw_ops = { .accept = sock_no_accept, .getname = raw_getname, .poll = datagram_poll, - .ioctl = can_ioctl, /* use can_ioctl() from af_can.c */ + .ioctl = raw_sock_no_ioctlcmd, .gettstamp = sock_gettstamp, .listen = sock_no_listen, .shutdown = sock_no_shutdown, @@ -879,7 +885,7 @@ static __init int raw_module_init(void) err = can_proto_register(&raw_can_proto); if (err < 0) - printk(KERN_ERR "can: registration of raw protocol failed\n"); + pr_err("can: registration of raw protocol failed\n"); return err; } diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 94c7f77ecb6b..da5639a5bd3b 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -12,6 +12,9 @@ static atomic_t cache_idx; +#define SK_STORAGE_CREATE_FLAG_MASK \ + (BPF_F_NO_PREALLOC | BPF_F_CLONE) + struct bucket { struct hlist_head list; raw_spinlock_t lock; @@ -209,7 +212,6 @@ static void selem_unlink_sk(struct bpf_sk_storage_elem *selem) kfree_rcu(sk_storage, rcu); } -/* sk_storage->lock must be held and sk_storage->list cannot be empty */ static void __selem_link_sk(struct bpf_sk_storage *sk_storage, struct bpf_sk_storage_elem *selem) { @@ -509,7 +511,7 @@ static int sk_storage_delete(struct sock *sk, struct bpf_map *map) return 0; } -/* Called by __sk_destruct() */ +/* Called by __sk_destruct() & bpf_sk_storage_clone() */ void bpf_sk_storage_free(struct sock *sk) { struct bpf_sk_storage_elem *selem; @@ -557,6 +559,11 @@ static void bpf_sk_storage_map_free(struct bpf_map *map) smap = (struct bpf_sk_storage_map *)map; + /* Note that this map might be concurrently cloned from + * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone + * RCU read section to finish before proceeding. New RCU + * read sections should be prevented via bpf_map_inc_not_zero. + */ synchronize_rcu(); /* bpf prog and the userspace can no longer access this map @@ -601,7 +608,9 @@ static void bpf_sk_storage_map_free(struct bpf_map *map) static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr) { - if (attr->map_flags != BPF_F_NO_PREALLOC || attr->max_entries || + if (attr->map_flags & ~SK_STORAGE_CREATE_FLAG_MASK || + !(attr->map_flags & BPF_F_NO_PREALLOC) || + attr->max_entries || attr->key_size != sizeof(int) || !attr->value_size || /* Enforce BTF for userspace sk dumping */ !attr->btf_key_type_id || !attr->btf_value_type_id) @@ -739,6 +748,95 @@ static int bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key) return err; } +static struct bpf_sk_storage_elem * +bpf_sk_storage_clone_elem(struct sock *newsk, + struct bpf_sk_storage_map *smap, + struct bpf_sk_storage_elem *selem) +{ + struct bpf_sk_storage_elem *copy_selem; + + copy_selem = selem_alloc(smap, newsk, NULL, true); + if (!copy_selem) + return NULL; + + if (map_value_has_spin_lock(&smap->map)) + copy_map_value_locked(&smap->map, SDATA(copy_selem)->data, + SDATA(selem)->data, true); + else + copy_map_value(&smap->map, SDATA(copy_selem)->data, + SDATA(selem)->data); + + return copy_selem; +} + +int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) +{ + struct bpf_sk_storage *new_sk_storage = NULL; + struct bpf_sk_storage *sk_storage; + struct bpf_sk_storage_elem *selem; + int ret = 0; + + RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL); + + rcu_read_lock(); + sk_storage = rcu_dereference(sk->sk_bpf_storage); + + if (!sk_storage || hlist_empty(&sk_storage->list)) + goto out; + + hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) { + struct bpf_sk_storage_elem *copy_selem; + struct bpf_sk_storage_map *smap; + struct bpf_map *map; + + smap = rcu_dereference(SDATA(selem)->smap); + if (!(smap->map.map_flags & BPF_F_CLONE)) + continue; + + /* Note that for lockless listeners adding new element + * here can race with cleanup in bpf_sk_storage_map_free. + * Try to grab map refcnt to make sure that it's still + * alive and prevent concurrent removal. + */ + map = bpf_map_inc_not_zero(&smap->map, false); + if (IS_ERR(map)) + continue; + + copy_selem = bpf_sk_storage_clone_elem(newsk, smap, selem); + if (!copy_selem) { + ret = -ENOMEM; + bpf_map_put(map); + goto out; + } + + if (new_sk_storage) { + selem_link_map(smap, copy_selem); + __selem_link_sk(new_sk_storage, copy_selem); + } else { + ret = sk_storage_alloc(newsk, smap, copy_selem); + if (ret) { + kfree(copy_selem); + atomic_sub(smap->elem_size, + &newsk->sk_omem_alloc); + bpf_map_put(map); + goto out; + } + + new_sk_storage = rcu_dereference(copy_selem->sk_storage); + } + bpf_map_put(map); + } + +out: + rcu_read_unlock(); + + /* In case of an error, don't free anything explicitly here, the + * caller is responsible to call bpf_sk_storage_free. + */ + + return ret; +} + BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk, void *, value, u64, flags) { diff --git a/net/core/datagram.c b/net/core/datagram.c index 45a162ef5e02..4cc8dc5db2b7 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -442,8 +442,8 @@ static int __skb_datagram_iter(const struct sk_buff *skb, int offset, if (copy > len) copy = len; - n = cb(vaddr + frag->page_offset + - offset - start, copy, data, to); + n = cb(vaddr + skb_frag_off(frag) + offset - start, + copy, data, to); kunmap(page); offset += n; if (n != copy) @@ -573,7 +573,7 @@ int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, if (copy > len) copy = len; copied = copy_page_from_iter(skb_frag_page(frag), - frag->page_offset + offset - start, + skb_frag_off(frag) + offset - start, copy, from); if (copied != copy) goto fault; diff --git a/net/core/dev.c b/net/core/dev.c index 5156c0edebe8..71b18e80389f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3467,18 +3467,22 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, qdisc_calculate_pkt_len(skb, q); if (q->flags & TCQ_F_NOLOCK) { - if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { - __qdisc_drop(skb, &to_free); - rc = NET_XMIT_DROP; - } else if ((q->flags & TCQ_F_CAN_BYPASS) && q->empty && - qdisc_run_begin(q)) { + if ((q->flags & TCQ_F_CAN_BYPASS) && q->empty && + qdisc_run_begin(q)) { + if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, + &q->state))) { + __qdisc_drop(skb, &to_free); + rc = NET_XMIT_DROP; + goto end_run; + } qdisc_bstats_cpu_update(q, skb); + rc = NET_XMIT_SUCCESS; if (sch_direct_xmit(skb, q, dev, txq, NULL, true)) __qdisc_run(q); +end_run: qdisc_run_end(q); - rc = NET_XMIT_SUCCESS; } else { rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; qdisc_run(q); @@ -3963,6 +3967,8 @@ int dev_weight_rx_bias __read_mostly = 1; /* bias for backlog weight */ int dev_weight_tx_bias __read_mostly = 1; /* bias for output_queue quota */ int dev_rx_weight __read_mostly = 64; int dev_tx_weight __read_mostly = 64; +/* Maximum number of GRO_NORMAL skbs to batch up for list-RX */ +int gro_normal_batch __read_mostly = 8; /* Called with irq disabled */ static inline void ____napi_schedule(struct softnet_data *sd, @@ -5486,7 +5492,7 @@ static void gro_pull_from_frag0(struct sk_buff *skb, int grow) skb->data_len -= grow; skb->tail += grow; - pinfo->frags[0].page_offset += grow; + skb_frag_off_add(&pinfo->frags[0], grow); skb_frag_size_sub(&pinfo->frags[0], grow); if (unlikely(!skb_frag_size(&pinfo->frags[0]))) { @@ -5747,6 +5753,26 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi) } EXPORT_SYMBOL(napi_get_frags); +/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */ +static void gro_normal_list(struct napi_struct *napi) +{ + if (!napi->rx_count) + return; + netif_receive_skb_list_internal(&napi->rx_list); + INIT_LIST_HEAD(&napi->rx_list); + napi->rx_count = 0; +} + +/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded, + * pass the whole batch up to the stack. + */ +static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb) +{ + list_add_tail(&skb->list, &napi->rx_list); + if (++napi->rx_count >= gro_normal_batch) + gro_normal_list(napi); +} + static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, gro_result_t ret) @@ -5756,8 +5782,8 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, case GRO_HELD: __skb_push(skb, ETH_HLEN); skb->protocol = eth_type_trans(skb, skb->dev); - if (ret == GRO_NORMAL && netif_receive_skb_internal(skb)) - ret = GRO_DROP; + if (ret == GRO_NORMAL) + gro_normal_one(napi, skb); break; case GRO_DROP: @@ -6034,6 +6060,8 @@ bool napi_complete_done(struct napi_struct *n, int work_done) NAPIF_STATE_IN_BUSY_POLL))) return false; + gro_normal_list(n); + if (n->gro_bitmask) { unsigned long timeout = 0; @@ -6119,10 +6147,19 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock) * Ideally, a new ndo_busy_poll_stop() could avoid another round. */ rc = napi->poll(napi, BUSY_POLL_BUDGET); + /* We can't gro_normal_list() here, because napi->poll() might have + * rearmed the napi (napi_complete_done()) in which case it could + * already be running on another CPU. + */ trace_napi_poll(napi, rc, BUSY_POLL_BUDGET); netpoll_poll_unlock(have_poll_lock); - if (rc == BUSY_POLL_BUDGET) + if (rc == BUSY_POLL_BUDGET) { + /* As the whole budget was spent, we still own the napi so can + * safely handle the rx_list. + */ + gro_normal_list(napi); __napi_schedule(napi); + } local_bh_enable(); } @@ -6167,6 +6204,7 @@ restart: } work = napi_poll(napi, BUSY_POLL_BUDGET); trace_napi_poll(napi, work, BUSY_POLL_BUDGET); + gro_normal_list(napi); count: if (work > 0) __NET_ADD_STATS(dev_net(napi->dev), @@ -6272,6 +6310,8 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi, napi->timer.function = napi_watchdog; init_gro_hash(napi); napi->skb = NULL; + INIT_LIST_HEAD(&napi->rx_list); + napi->rx_count = 0; napi->poll = poll; if (weight > NAPI_POLL_WEIGHT) netdev_err_once(dev, "%s() called with weight %d\n", __func__, @@ -6368,6 +6408,8 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll) goto out_unlock; } + gro_normal_list(n); + if (n->gro_bitmask) { /* flush too old packets * If HZ < 1000, flush all packets. @@ -8088,12 +8130,15 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, bpf_chk = generic_xdp_install; if (fd >= 0) { + u32 prog_id; + if (!offload && __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG)) { NL_SET_ERR_MSG(extack, "native and generic XDP can't be active at the same time"); return -EEXIST; } - if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && - __dev_xdp_query(dev, bpf_op, query)) { + + prog_id = __dev_xdp_query(dev, bpf_op, query); + if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && prog_id) { NL_SET_ERR_MSG(extack, "XDP program already attached"); return -EBUSY; } @@ -8108,6 +8153,14 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, bpf_prog_put(prog); return -EINVAL; } + + if (prog->aux->id == prog_id) { + bpf_prog_put(prog); + return 0; + } + } else { + if (!__dev_xdp_query(dev, bpf_op, query)) + return 0; } err = dev_xdp_install(dev, bpf_op, extack, flags, prog); diff --git a/net/core/devlink.c b/net/core/devlink.c index 4f40aeace902..e48680efe54a 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -18,6 +18,8 @@ #include <linux/spinlock.h> #include <linux/refcount.h> #include <linux/workqueue.h> +#include <linux/u64_stats_sync.h> +#include <linux/timekeeping.h> #include <rdma/ib_verbs.h> #include <net/netlink.h> #include <net/genetlink.h> @@ -25,6 +27,7 @@ #include <net/net_namespace.h> #include <net/sock.h> #include <net/devlink.h> +#include <net/drop_monitor.h> #define CREATE_TRACE_POINTS #include <trace/events/devlink.h> @@ -133,7 +136,7 @@ static struct devlink *devlink_get_from_info(struct genl_info *info) } static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink, - int port_index) + unsigned int port_index) { struct devlink_port *devlink_port; @@ -144,7 +147,8 @@ static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink, return NULL; } -static bool devlink_port_index_exists(struct devlink *devlink, int port_index) +static bool devlink_port_index_exists(struct devlink *devlink, + unsigned int port_index) { return devlink_port_get_by_index(devlink, port_index); } @@ -342,7 +346,6 @@ struct devlink_snapshot { struct list_head list; struct devlink_region *region; devlink_snapshot_data_dest_t *data_destructor; - u64 data_len; u8 *data; u32 id; }; @@ -371,14 +374,6 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id) return NULL; } -static void devlink_region_snapshot_del(struct devlink_snapshot *snapshot) -{ - snapshot->region->cur_snapshots--; - list_del(&snapshot->list); - (*snapshot->data_destructor)(snapshot->data); - kfree(snapshot); -} - #define DEVLINK_NL_FLAG_NEED_DEVLINK BIT(0) #define DEVLINK_NL_FLAG_NEED_PORT BIT(1) #define DEVLINK_NL_FLAG_NEED_SB BIT(2) @@ -476,6 +471,8 @@ static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink, if (devlink_nl_put_handle(msg, devlink)) goto nla_put_failure; + if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_FAILED, devlink->reload_failed)) + goto nla_put_failure; genlmsg_end(msg, hdr); return 0; @@ -515,32 +512,37 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg, return 0; if (nla_put_u16(msg, DEVLINK_ATTR_PORT_FLAVOUR, attrs->flavour)) return -EMSGSIZE; - if (devlink_port->attrs.flavour == DEVLINK_PORT_FLAVOUR_PCI_PF) { + switch (devlink_port->attrs.flavour) { + case DEVLINK_PORT_FLAVOUR_PCI_PF: if (nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER, attrs->pci_pf.pf)) return -EMSGSIZE; - } else if (devlink_port->attrs.flavour == DEVLINK_PORT_FLAVOUR_PCI_VF) { + break; + case DEVLINK_PORT_FLAVOUR_PCI_VF: if (nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER, attrs->pci_vf.pf) || nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_VF_NUMBER, attrs->pci_vf.vf)) return -EMSGSIZE; + break; + case DEVLINK_PORT_FLAVOUR_PHYSICAL: + case DEVLINK_PORT_FLAVOUR_CPU: + case DEVLINK_PORT_FLAVOUR_DSA: + if (nla_put_u32(msg, DEVLINK_ATTR_PORT_NUMBER, + attrs->phys.port_number)) + return -EMSGSIZE; + if (!attrs->split) + return 0; + if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_GROUP, + attrs->phys.port_number)) + return -EMSGSIZE; + if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_SUBPORT_NUMBER, + attrs->phys.split_subport_number)) + return -EMSGSIZE; + break; + default: + break; } - if (devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PHYSICAL && - devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_CPU && - devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_DSA) - return 0; - if (nla_put_u32(msg, DEVLINK_ATTR_PORT_NUMBER, - attrs->phys.port_number)) - return -EMSGSIZE; - if (!attrs->split) - return 0; - if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_GROUP, - attrs->phys.port_number)) - return -EMSGSIZE; - if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_SUBPORT_NUMBER, - attrs->phys.split_subport_number)) - return -EMSGSIZE; return 0; } @@ -560,7 +562,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink, if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index)) goto nla_put_failure; - spin_lock(&devlink_port->type_lock); + spin_lock_bh(&devlink_port->type_lock); if (nla_put_u16(msg, DEVLINK_ATTR_PORT_TYPE, devlink_port->type)) goto nla_put_failure_type_locked; if (devlink_port->desired_type != DEVLINK_PORT_TYPE_NOTSET && @@ -585,7 +587,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink, ibdev->name)) goto nla_put_failure_type_locked; } - spin_unlock(&devlink_port->type_lock); + spin_unlock_bh(&devlink_port->type_lock); if (devlink_nl_port_attrs_put(msg, devlink_port)) goto nla_put_failure; @@ -593,7 +595,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink, return 0; nla_put_failure_type_locked: - spin_unlock(&devlink_port->type_lock); + spin_unlock_bh(&devlink_port->type_lock); nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; @@ -2672,12 +2674,32 @@ devlink_resources_validate(struct devlink *devlink, return err; } +static bool devlink_reload_supported(struct devlink *devlink) +{ + return devlink->ops->reload_down && devlink->ops->reload_up; +} + +static void devlink_reload_failed_set(struct devlink *devlink, + bool reload_failed) +{ + if (devlink->reload_failed == reload_failed) + return; + devlink->reload_failed = reload_failed; + devlink_notify(devlink, DEVLINK_CMD_NEW); +} + +bool devlink_is_reload_failed(const struct devlink *devlink) +{ + return devlink->reload_failed; +} +EXPORT_SYMBOL_GPL(devlink_is_reload_failed); + static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; int err; - if (!devlink->ops->reload) + if (!devlink_reload_supported(devlink)) return -EOPNOTSUPP; err = devlink_resources_validate(devlink, NULL, info); @@ -2685,7 +2707,12 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info) NL_SET_ERR_MSG_MOD(info->extack, "resources size validation failed"); return err; } - return devlink->ops->reload(devlink, info->extack); + err = devlink->ops->reload_down(devlink, info->extack); + if (err) + return err; + err = devlink->ops->reload_up(devlink, info->extack); + devlink_reload_failed_set(devlink, !!err); + return err; } static int devlink_nl_flash_update_fill(struct sk_buff *msg, @@ -2852,6 +2879,11 @@ static const struct devlink_param devlink_param_generic[] = { .name = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_NAME, .type = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_TYPE, }, + { + .id = DEVLINK_PARAM_GENERIC_ID_RESET_DEV_ON_DRV_PROBE, + .name = DEVLINK_PARAM_GENERIC_RESET_DEV_ON_DRV_PROBE_NAME, + .type = DEVLINK_PARAM_GENERIC_RESET_DEV_ON_DRV_PROBE_TYPE, + }, }; static int devlink_param_generic_verify(const struct devlink_param *param) @@ -3596,6 +3628,16 @@ out_free_msg: nlmsg_free(msg); } +static void devlink_region_snapshot_del(struct devlink_region *region, + struct devlink_snapshot *snapshot) +{ + devlink_nl_region_notify(region, snapshot, DEVLINK_CMD_REGION_DEL); + region->cur_snapshots--; + list_del(&snapshot->list); + (*snapshot->data_destructor)(snapshot->data); + kfree(snapshot); +} + static int devlink_nl_cmd_region_get_doit(struct sk_buff *skb, struct genl_info *info) { @@ -3691,8 +3733,7 @@ static int devlink_nl_cmd_region_del(struct sk_buff *skb, if (!snapshot) return -EINVAL; - devlink_nl_region_notify(region, snapshot, DEVLINK_CMD_REGION_DEL); - devlink_region_snapshot_del(snapshot); + devlink_region_snapshot_del(region, snapshot); return 0; } @@ -3748,8 +3789,8 @@ static int devlink_nl_region_read_snapshot_fill(struct sk_buff *skb, if (!snapshot) return -EINVAL; - if (end_offset > snapshot->data_len || dump) - end_offset = snapshot->data_len; + if (end_offset > region->size || dump) + end_offset = region->size; while (curr_offset < end_offset) { u32 data_size; @@ -5154,6 +5195,571 @@ devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb, return 0; } +struct devlink_stats { + u64 rx_bytes; + u64 rx_packets; + struct u64_stats_sync syncp; +}; + +/** + * struct devlink_trap_group_item - Packet trap group attributes. + * @group: Immutable packet trap group attributes. + * @refcount: Number of trap items using the group. + * @list: trap_group_list member. + * @stats: Trap group statistics. + * + * Describes packet trap group attributes. Created by devlink during trap + * registration. + */ +struct devlink_trap_group_item { + const struct devlink_trap_group *group; + refcount_t refcount; + struct list_head list; + struct devlink_stats __percpu *stats; +}; + +/** + * struct devlink_trap_item - Packet trap attributes. + * @trap: Immutable packet trap attributes. + * @group_item: Associated group item. + * @list: trap_list member. + * @action: Trap action. + * @stats: Trap statistics. + * @priv: Driver private information. + * + * Describes both mutable and immutable packet trap attributes. Created by + * devlink during trap registration and used for all trap related operations. + */ +struct devlink_trap_item { + const struct devlink_trap *trap; + struct devlink_trap_group_item *group_item; + struct list_head list; + enum devlink_trap_action action; + struct devlink_stats __percpu *stats; + void *priv; +}; + +static struct devlink_trap_item * +devlink_trap_item_lookup(struct devlink *devlink, const char *name) +{ + struct devlink_trap_item *trap_item; + + list_for_each_entry(trap_item, &devlink->trap_list, list) { + if (!strcmp(trap_item->trap->name, name)) + return trap_item; + } + + return NULL; +} + +static struct devlink_trap_item * +devlink_trap_item_get_from_info(struct devlink *devlink, + struct genl_info *info) +{ + struct nlattr *attr; + + if (!info->attrs[DEVLINK_ATTR_TRAP_NAME]) + return NULL; + attr = info->attrs[DEVLINK_ATTR_TRAP_NAME]; + + return devlink_trap_item_lookup(devlink, nla_data(attr)); +} + +static int +devlink_trap_action_get_from_info(struct genl_info *info, + enum devlink_trap_action *p_trap_action) +{ + u8 val; + + val = nla_get_u8(info->attrs[DEVLINK_ATTR_TRAP_ACTION]); + switch (val) { + case DEVLINK_TRAP_ACTION_DROP: /* fall-through */ + case DEVLINK_TRAP_ACTION_TRAP: + *p_trap_action = val; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int devlink_trap_metadata_put(struct sk_buff *msg, + const struct devlink_trap *trap) +{ + struct nlattr *attr; + + attr = nla_nest_start(msg, DEVLINK_ATTR_TRAP_METADATA); + if (!attr) + return -EMSGSIZE; + + if ((trap->metadata_cap & DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT) && + nla_put_flag(msg, DEVLINK_ATTR_TRAP_METADATA_TYPE_IN_PORT)) + goto nla_put_failure; + + nla_nest_end(msg, attr); + + return 0; + +nla_put_failure: + nla_nest_cancel(msg, attr); + return -EMSGSIZE; +} + +static void devlink_trap_stats_read(struct devlink_stats __percpu *trap_stats, + struct devlink_stats *stats) +{ + int i; + + memset(stats, 0, sizeof(*stats)); + for_each_possible_cpu(i) { + struct devlink_stats *cpu_stats; + u64 rx_packets, rx_bytes; + unsigned int start; + + cpu_stats = per_cpu_ptr(trap_stats, i); + do { + start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); + rx_packets = cpu_stats->rx_packets; + rx_bytes = cpu_stats->rx_bytes; + } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); + + stats->rx_packets += rx_packets; + stats->rx_bytes += rx_bytes; + } +} + +static int devlink_trap_stats_put(struct sk_buff *msg, + struct devlink_stats __percpu *trap_stats) +{ + struct devlink_stats stats; + struct nlattr *attr; + + devlink_trap_stats_read(trap_stats, &stats); + + attr = nla_nest_start(msg, DEVLINK_ATTR_STATS); + if (!attr) + return -EMSGSIZE; + + if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS, + stats.rx_packets, DEVLINK_ATTR_PAD)) + goto nla_put_failure; + + if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES, + stats.rx_bytes, DEVLINK_ATTR_PAD)) + goto nla_put_failure; + + nla_nest_end(msg, attr); + + return 0; + +nla_put_failure: + nla_nest_cancel(msg, attr); + return -EMSGSIZE; +} + +static int devlink_nl_trap_fill(struct sk_buff *msg, struct devlink *devlink, + const struct devlink_trap_item *trap_item, + enum devlink_command cmd, u32 portid, u32 seq, + int flags) +{ + struct devlink_trap_group_item *group_item = trap_item->group_item; + void *hdr; + int err; + + hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); + if (!hdr) + return -EMSGSIZE; + + if (devlink_nl_put_handle(msg, devlink)) + goto nla_put_failure; + + if (nla_put_string(msg, DEVLINK_ATTR_TRAP_GROUP_NAME, + group_item->group->name)) + goto nla_put_failure; + + if (nla_put_string(msg, DEVLINK_ATTR_TRAP_NAME, trap_item->trap->name)) + goto nla_put_failure; + + if (nla_put_u8(msg, DEVLINK_ATTR_TRAP_TYPE, trap_item->trap->type)) + goto nla_put_failure; + + if (trap_item->trap->generic && + nla_put_flag(msg, DEVLINK_ATTR_TRAP_GENERIC)) + goto nla_put_failure; + + if (nla_put_u8(msg, DEVLINK_ATTR_TRAP_ACTION, trap_item->action)) + goto nla_put_failure; + + err = devlink_trap_metadata_put(msg, trap_item->trap); + if (err) + goto nla_put_failure; + + err = devlink_trap_stats_put(msg, trap_item->stats); + if (err) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int devlink_nl_cmd_trap_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct netlink_ext_ack *extack = info->extack; + struct devlink *devlink = info->user_ptr[0]; + struct devlink_trap_item *trap_item; + struct sk_buff *msg; + int err; + + if (list_empty(&devlink->trap_list)) + return -EOPNOTSUPP; + + trap_item = devlink_trap_item_get_from_info(devlink, info); + if (!trap_item) { + NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap"); + return -ENOENT; + } + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + err = devlink_nl_trap_fill(msg, devlink, trap_item, + DEVLINK_CMD_TRAP_NEW, info->snd_portid, + info->snd_seq, 0); + if (err) + goto err_trap_fill; + + return genlmsg_reply(msg, info); + +err_trap_fill: + nlmsg_free(msg); + return err; +} + +static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) +{ + struct devlink_trap_item *trap_item; + struct devlink *devlink; + int start = cb->args[0]; + int idx = 0; + int err; + + mutex_lock(&devlink_mutex); + list_for_each_entry(devlink, &devlink_list, list) { + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + continue; + mutex_lock(&devlink->lock); + list_for_each_entry(trap_item, &devlink->trap_list, list) { + if (idx < start) { + idx++; + continue; + } + err = devlink_nl_trap_fill(msg, devlink, trap_item, + DEVLINK_CMD_TRAP_NEW, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI); + if (err) { + mutex_unlock(&devlink->lock); + goto out; + } + idx++; + } + mutex_unlock(&devlink->lock); + } +out: + mutex_unlock(&devlink_mutex); + + cb->args[0] = idx; + return msg->len; +} + +static int __devlink_trap_action_set(struct devlink *devlink, + struct devlink_trap_item *trap_item, + enum devlink_trap_action trap_action, + struct netlink_ext_ack *extack) +{ + int err; + + if (trap_item->action != trap_action && + trap_item->trap->type != DEVLINK_TRAP_TYPE_DROP) { + NL_SET_ERR_MSG_MOD(extack, "Cannot change action of non-drop traps. Skipping"); + return 0; + } + + err = devlink->ops->trap_action_set(devlink, trap_item->trap, + trap_action); + if (err) + return err; + + trap_item->action = trap_action; + + return 0; +} + +static int devlink_trap_action_set(struct devlink *devlink, + struct devlink_trap_item *trap_item, + struct genl_info *info) +{ + enum devlink_trap_action trap_action; + int err; + + if (!info->attrs[DEVLINK_ATTR_TRAP_ACTION]) + return 0; + + err = devlink_trap_action_get_from_info(info, &trap_action); + if (err) { + NL_SET_ERR_MSG_MOD(info->extack, "Invalid trap action"); + return -EINVAL; + } + + return __devlink_trap_action_set(devlink, trap_item, trap_action, + info->extack); +} + +static int devlink_nl_cmd_trap_set_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct netlink_ext_ack *extack = info->extack; + struct devlink *devlink = info->user_ptr[0]; + struct devlink_trap_item *trap_item; + int err; + + if (list_empty(&devlink->trap_list)) + return -EOPNOTSUPP; + + trap_item = devlink_trap_item_get_from_info(devlink, info); + if (!trap_item) { + NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap"); + return -ENOENT; + } + + err = devlink_trap_action_set(devlink, trap_item, info); + if (err) + return err; + + return 0; +} + +static struct devlink_trap_group_item * +devlink_trap_group_item_lookup(struct devlink *devlink, const char *name) +{ + struct devlink_trap_group_item *group_item; + + list_for_each_entry(group_item, &devlink->trap_group_list, list) { + if (!strcmp(group_item->group->name, name)) + return group_item; + } + + return NULL; +} + +static struct devlink_trap_group_item * +devlink_trap_group_item_get_from_info(struct devlink *devlink, + struct genl_info *info) +{ + char *name; + + if (!info->attrs[DEVLINK_ATTR_TRAP_GROUP_NAME]) + return NULL; + name = nla_data(info->attrs[DEVLINK_ATTR_TRAP_GROUP_NAME]); + + return devlink_trap_group_item_lookup(devlink, name); +} + +static int +devlink_nl_trap_group_fill(struct sk_buff *msg, struct devlink *devlink, + const struct devlink_trap_group_item *group_item, + enum devlink_command cmd, u32 portid, u32 seq, + int flags) +{ + void *hdr; + int err; + + hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); + if (!hdr) + return -EMSGSIZE; + + if (devlink_nl_put_handle(msg, devlink)) + goto nla_put_failure; + + if (nla_put_string(msg, DEVLINK_ATTR_TRAP_GROUP_NAME, + group_item->group->name)) + goto nla_put_failure; + + if (group_item->group->generic && + nla_put_flag(msg, DEVLINK_ATTR_TRAP_GENERIC)) + goto nla_put_failure; + + err = devlink_trap_stats_put(msg, group_item->stats); + if (err) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int devlink_nl_cmd_trap_group_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct netlink_ext_ack *extack = info->extack; + struct devlink *devlink = info->user_ptr[0]; + struct devlink_trap_group_item *group_item; + struct sk_buff *msg; + int err; + + if (list_empty(&devlink->trap_group_list)) + return -EOPNOTSUPP; + + group_item = devlink_trap_group_item_get_from_info(devlink, info); + if (!group_item) { + NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap group"); + return -ENOENT; + } + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + err = devlink_nl_trap_group_fill(msg, devlink, group_item, + DEVLINK_CMD_TRAP_GROUP_NEW, + info->snd_portid, info->snd_seq, 0); + if (err) + goto err_trap_group_fill; + + return genlmsg_reply(msg, info); + +err_trap_group_fill: + nlmsg_free(msg); + return err; +} + +static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) +{ + enum devlink_command cmd = DEVLINK_CMD_TRAP_GROUP_NEW; + struct devlink_trap_group_item *group_item; + u32 portid = NETLINK_CB(cb->skb).portid; + struct devlink *devlink; + int start = cb->args[0]; + int idx = 0; + int err; + + mutex_lock(&devlink_mutex); + list_for_each_entry(devlink, &devlink_list, list) { + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + continue; + mutex_lock(&devlink->lock); + list_for_each_entry(group_item, &devlink->trap_group_list, + list) { + if (idx < start) { + idx++; + continue; + } + err = devlink_nl_trap_group_fill(msg, devlink, + group_item, cmd, + portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI); + if (err) { + mutex_unlock(&devlink->lock); + goto out; + } + idx++; + } + mutex_unlock(&devlink->lock); + } +out: + mutex_unlock(&devlink_mutex); + + cb->args[0] = idx; + return msg->len; +} + +static int +__devlink_trap_group_action_set(struct devlink *devlink, + struct devlink_trap_group_item *group_item, + enum devlink_trap_action trap_action, + struct netlink_ext_ack *extack) +{ + const char *group_name = group_item->group->name; + struct devlink_trap_item *trap_item; + int err; + + list_for_each_entry(trap_item, &devlink->trap_list, list) { + if (strcmp(trap_item->trap->group.name, group_name)) + continue; + err = __devlink_trap_action_set(devlink, trap_item, + trap_action, extack); + if (err) + return err; + } + + return 0; +} + +static int +devlink_trap_group_action_set(struct devlink *devlink, + struct devlink_trap_group_item *group_item, + struct genl_info *info) +{ + enum devlink_trap_action trap_action; + int err; + + if (!info->attrs[DEVLINK_ATTR_TRAP_ACTION]) + return 0; + + err = devlink_trap_action_get_from_info(info, &trap_action); + if (err) { + NL_SET_ERR_MSG_MOD(info->extack, "Invalid trap action"); + return -EINVAL; + } + + err = __devlink_trap_group_action_set(devlink, group_item, trap_action, + info->extack); + if (err) + return err; + + return 0; +} + +static int devlink_nl_cmd_trap_group_set_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct netlink_ext_ack *extack = info->extack; + struct devlink *devlink = info->user_ptr[0]; + struct devlink_trap_group_item *group_item; + int err; + + if (list_empty(&devlink->trap_group_list)) + return -EOPNOTSUPP; + + group_item = devlink_trap_group_item_get_from_info(devlink, info); + if (!group_item) { + NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap group"); + return -ENOENT; + } + + err = devlink_trap_group_action_set(devlink, group_item, info); + if (err) + return err; + + return 0; +} + static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING }, @@ -5184,6 +5790,9 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 }, [DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_FLASH_UPDATE_COMPONENT] = { .type = NLA_NUL_STRING }, + [DEVLINK_ATTR_TRAP_NAME] = { .type = NLA_NUL_STRING }, + [DEVLINK_ATTR_TRAP_ACTION] = { .type = NLA_U8 }, + [DEVLINK_ATTR_TRAP_GROUP_NAME] = { .type = NLA_NUL_STRING }, }; static const struct genl_ops devlink_nl_ops[] = { @@ -5483,6 +6092,32 @@ static const struct genl_ops devlink_nl_ops[] = { .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, + { + .cmd = DEVLINK_CMD_TRAP_GET, + .doit = devlink_nl_cmd_trap_get_doit, + .dumpit = devlink_nl_cmd_trap_get_dumpit, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + /* can be retrieved by unprivileged users */ + }, + { + .cmd = DEVLINK_CMD_TRAP_SET, + .doit = devlink_nl_cmd_trap_set_doit, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, + { + .cmd = DEVLINK_CMD_TRAP_GROUP_GET, + .doit = devlink_nl_cmd_trap_group_get_doit, + .dumpit = devlink_nl_cmd_trap_group_get_dumpit, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + /* can be retrieved by unprivileged users */ + }, + { + .cmd = DEVLINK_CMD_TRAP_GROUP_SET, + .doit = devlink_nl_cmd_trap_group_set_doit, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, }; static struct genl_family devlink_nl_family __ro_after_init = { @@ -5528,6 +6163,8 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size) INIT_LIST_HEAD(&devlink->param_list); INIT_LIST_HEAD(&devlink->region_list); INIT_LIST_HEAD(&devlink->reporter_list); + INIT_LIST_HEAD(&devlink->trap_list); + INIT_LIST_HEAD(&devlink->trap_group_list); mutex_init(&devlink->lock); mutex_init(&devlink->reporters_lock); return devlink; @@ -5574,6 +6211,8 @@ void devlink_free(struct devlink *devlink) { mutex_destroy(&devlink->reporters_lock); mutex_destroy(&devlink->lock); + WARN_ON(!list_empty(&devlink->trap_group_list)); + WARN_ON(!list_empty(&devlink->trap_list)); WARN_ON(!list_empty(&devlink->reporter_list)); WARN_ON(!list_empty(&devlink->region_list)); WARN_ON(!list_empty(&devlink->param_list)); @@ -5678,10 +6317,10 @@ static void __devlink_port_type_set(struct devlink_port *devlink_port, if (WARN_ON(!devlink_port->registered)) return; devlink_port_type_warn_cancel(devlink_port); - spin_lock(&devlink_port->type_lock); + spin_lock_bh(&devlink_port->type_lock); devlink_port->type = type; devlink_port->type_dev = type_dev; - spin_unlock(&devlink_port->type_lock); + spin_unlock_bh(&devlink_port->type_lock); devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); } @@ -6538,7 +7177,7 @@ __devlink_param_driverinit_value_set(struct devlink *devlink, int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id, union devlink_param_value *init_val) { - if (!devlink->ops->reload) + if (!devlink_reload_supported(devlink)) return -EOPNOTSUPP; return __devlink_param_driverinit_value_get(&devlink->param_list, @@ -6585,7 +7224,7 @@ int devlink_port_param_driverinit_value_get(struct devlink_port *devlink_port, { struct devlink *devlink = devlink_port->devlink; - if (!devlink->ops->reload) + if (!devlink_reload_supported(devlink)) return -EOPNOTSUPP; return __devlink_param_driverinit_value_get(&devlink_port->param_list, @@ -6744,7 +7383,7 @@ void devlink_region_destroy(struct devlink_region *region) /* Free all snapshots of region */ list_for_each_entry_safe(snapshot, ts, ®ion->snapshot_list, list) - devlink_region_snapshot_del(snapshot); + devlink_region_snapshot_del(region, snapshot); list_del(®ion->list); @@ -6784,12 +7423,11 @@ EXPORT_SYMBOL_GPL(devlink_region_shapshot_id_get); * The @snapshot_id should be obtained using the getter function. * * @region: devlink region of the snapshot - * @data_len: size of snapshot data * @data: snapshot data * @snapshot_id: snapshot id to be created * @data_destructor: pointer to destructor function to free data */ -int devlink_region_snapshot_create(struct devlink_region *region, u64 data_len, +int devlink_region_snapshot_create(struct devlink_region *region, u8 *data, u32 snapshot_id, devlink_snapshot_data_dest_t *data_destructor) { @@ -6819,7 +7457,6 @@ int devlink_region_snapshot_create(struct devlink_region *region, u64 data_len, snapshot->id = snapshot_id; snapshot->region = region; snapshot->data = data; - snapshot->data_len = data_len; snapshot->data_destructor = data_destructor; list_add_tail(&snapshot->list, ®ion->snapshot_list); @@ -6836,6 +7473,475 @@ unlock: } EXPORT_SYMBOL_GPL(devlink_region_snapshot_create); +#define DEVLINK_TRAP(_id, _type) \ + { \ + .type = DEVLINK_TRAP_TYPE_##_type, \ + .id = DEVLINK_TRAP_GENERIC_ID_##_id, \ + .name = DEVLINK_TRAP_GENERIC_NAME_##_id, \ + } + +static const struct devlink_trap devlink_trap_generic[] = { + DEVLINK_TRAP(SMAC_MC, DROP), + DEVLINK_TRAP(VLAN_TAG_MISMATCH, DROP), + DEVLINK_TRAP(INGRESS_VLAN_FILTER, DROP), + DEVLINK_TRAP(INGRESS_STP_FILTER, DROP), + DEVLINK_TRAP(EMPTY_TX_LIST, DROP), + DEVLINK_TRAP(PORT_LOOPBACK_FILTER, DROP), + DEVLINK_TRAP(BLACKHOLE_ROUTE, DROP), + DEVLINK_TRAP(TTL_ERROR, EXCEPTION), + DEVLINK_TRAP(TAIL_DROP, DROP), +}; + +#define DEVLINK_TRAP_GROUP(_id) \ + { \ + .id = DEVLINK_TRAP_GROUP_GENERIC_ID_##_id, \ + .name = DEVLINK_TRAP_GROUP_GENERIC_NAME_##_id, \ + } + +static const struct devlink_trap_group devlink_trap_group_generic[] = { + DEVLINK_TRAP_GROUP(L2_DROPS), + DEVLINK_TRAP_GROUP(L3_DROPS), + DEVLINK_TRAP_GROUP(BUFFER_DROPS), +}; + +static int devlink_trap_generic_verify(const struct devlink_trap *trap) +{ + if (trap->id > DEVLINK_TRAP_GENERIC_ID_MAX) + return -EINVAL; + + if (strcmp(trap->name, devlink_trap_generic[trap->id].name)) + return -EINVAL; + + if (trap->type != devlink_trap_generic[trap->id].type) + return -EINVAL; + + return 0; +} + +static int devlink_trap_driver_verify(const struct devlink_trap *trap) +{ + int i; + + if (trap->id <= DEVLINK_TRAP_GENERIC_ID_MAX) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(devlink_trap_generic); i++) { + if (!strcmp(trap->name, devlink_trap_generic[i].name)) + return -EEXIST; + } + + return 0; +} + +static int devlink_trap_verify(const struct devlink_trap *trap) +{ + if (!trap || !trap->name || !trap->group.name) + return -EINVAL; + + if (trap->generic) + return devlink_trap_generic_verify(trap); + else + return devlink_trap_driver_verify(trap); +} + +static int +devlink_trap_group_generic_verify(const struct devlink_trap_group *group) +{ + if (group->id > DEVLINK_TRAP_GROUP_GENERIC_ID_MAX) + return -EINVAL; + + if (strcmp(group->name, devlink_trap_group_generic[group->id].name)) + return -EINVAL; + + return 0; +} + +static int +devlink_trap_group_driver_verify(const struct devlink_trap_group *group) +{ + int i; + + if (group->id <= DEVLINK_TRAP_GROUP_GENERIC_ID_MAX) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(devlink_trap_group_generic); i++) { + if (!strcmp(group->name, devlink_trap_group_generic[i].name)) + return -EEXIST; + } + + return 0; +} + +static int devlink_trap_group_verify(const struct devlink_trap_group *group) +{ + if (group->generic) + return devlink_trap_group_generic_verify(group); + else + return devlink_trap_group_driver_verify(group); +} + +static void +devlink_trap_group_notify(struct devlink *devlink, + const struct devlink_trap_group_item *group_item, + enum devlink_command cmd) +{ + struct sk_buff *msg; + int err; + + WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_GROUP_NEW && + cmd != DEVLINK_CMD_TRAP_GROUP_DEL); + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return; + + err = devlink_nl_trap_group_fill(msg, devlink, group_item, cmd, 0, 0, + 0); + if (err) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), + msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); +} + +static struct devlink_trap_group_item * +devlink_trap_group_item_create(struct devlink *devlink, + const struct devlink_trap_group *group) +{ + struct devlink_trap_group_item *group_item; + int err; + + err = devlink_trap_group_verify(group); + if (err) + return ERR_PTR(err); + + group_item = kzalloc(sizeof(*group_item), GFP_KERNEL); + if (!group_item) + return ERR_PTR(-ENOMEM); + + group_item->stats = netdev_alloc_pcpu_stats(struct devlink_stats); + if (!group_item->stats) { + err = -ENOMEM; + goto err_stats_alloc; + } + + group_item->group = group; + refcount_set(&group_item->refcount, 1); + + if (devlink->ops->trap_group_init) { + err = devlink->ops->trap_group_init(devlink, group); + if (err) + goto err_group_init; + } + + list_add_tail(&group_item->list, &devlink->trap_group_list); + devlink_trap_group_notify(devlink, group_item, + DEVLINK_CMD_TRAP_GROUP_NEW); + + return group_item; + +err_group_init: + free_percpu(group_item->stats); +err_stats_alloc: + kfree(group_item); + return ERR_PTR(err); +} + +static void +devlink_trap_group_item_destroy(struct devlink *devlink, + struct devlink_trap_group_item *group_item) +{ + devlink_trap_group_notify(devlink, group_item, + DEVLINK_CMD_TRAP_GROUP_DEL); + list_del(&group_item->list); + free_percpu(group_item->stats); + kfree(group_item); +} + +static struct devlink_trap_group_item * +devlink_trap_group_item_get(struct devlink *devlink, + const struct devlink_trap_group *group) +{ + struct devlink_trap_group_item *group_item; + + group_item = devlink_trap_group_item_lookup(devlink, group->name); + if (group_item) { + refcount_inc(&group_item->refcount); + return group_item; + } + + return devlink_trap_group_item_create(devlink, group); +} + +static void +devlink_trap_group_item_put(struct devlink *devlink, + struct devlink_trap_group_item *group_item) +{ + if (!refcount_dec_and_test(&group_item->refcount)) + return; + + devlink_trap_group_item_destroy(devlink, group_item); +} + +static int +devlink_trap_item_group_link(struct devlink *devlink, + struct devlink_trap_item *trap_item) +{ + struct devlink_trap_group_item *group_item; + + group_item = devlink_trap_group_item_get(devlink, + &trap_item->trap->group); + if (IS_ERR(group_item)) + return PTR_ERR(group_item); + + trap_item->group_item = group_item; + + return 0; +} + +static void +devlink_trap_item_group_unlink(struct devlink *devlink, + struct devlink_trap_item *trap_item) +{ + devlink_trap_group_item_put(devlink, trap_item->group_item); +} + +static void devlink_trap_notify(struct devlink *devlink, + const struct devlink_trap_item *trap_item, + enum devlink_command cmd) +{ + struct sk_buff *msg; + int err; + + WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_NEW && + cmd != DEVLINK_CMD_TRAP_DEL); + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return; + + err = devlink_nl_trap_fill(msg, devlink, trap_item, cmd, 0, 0, 0); + if (err) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), + msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); +} + +static int +devlink_trap_register(struct devlink *devlink, + const struct devlink_trap *trap, void *priv) +{ + struct devlink_trap_item *trap_item; + int err; + + if (devlink_trap_item_lookup(devlink, trap->name)) + return -EEXIST; + + trap_item = kzalloc(sizeof(*trap_item), GFP_KERNEL); + if (!trap_item) + return -ENOMEM; + + trap_item->stats = netdev_alloc_pcpu_stats(struct devlink_stats); + if (!trap_item->stats) { + err = -ENOMEM; + goto err_stats_alloc; + } + + trap_item->trap = trap; + trap_item->action = trap->init_action; + trap_item->priv = priv; + + err = devlink_trap_item_group_link(devlink, trap_item); + if (err) + goto err_group_link; + + err = devlink->ops->trap_init(devlink, trap, trap_item); + if (err) + goto err_trap_init; + + list_add_tail(&trap_item->list, &devlink->trap_list); + devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_NEW); + + return 0; + +err_trap_init: + devlink_trap_item_group_unlink(devlink, trap_item); +err_group_link: + free_percpu(trap_item->stats); +err_stats_alloc: + kfree(trap_item); + return err; +} + +static void devlink_trap_unregister(struct devlink *devlink, + const struct devlink_trap *trap) +{ + struct devlink_trap_item *trap_item; + + trap_item = devlink_trap_item_lookup(devlink, trap->name); + if (WARN_ON_ONCE(!trap_item)) + return; + + devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_DEL); + list_del(&trap_item->list); + if (devlink->ops->trap_fini) + devlink->ops->trap_fini(devlink, trap, trap_item); + devlink_trap_item_group_unlink(devlink, trap_item); + free_percpu(trap_item->stats); + kfree(trap_item); +} + +static void devlink_trap_disable(struct devlink *devlink, + const struct devlink_trap *trap) +{ + struct devlink_trap_item *trap_item; + + trap_item = devlink_trap_item_lookup(devlink, trap->name); + if (WARN_ON_ONCE(!trap_item)) + return; + + devlink->ops->trap_action_set(devlink, trap, DEVLINK_TRAP_ACTION_DROP); + trap_item->action = DEVLINK_TRAP_ACTION_DROP; +} + +/** + * devlink_traps_register - Register packet traps with devlink. + * @devlink: devlink. + * @traps: Packet traps. + * @traps_count: Count of provided packet traps. + * @priv: Driver private information. + * + * Return: Non-zero value on failure. + */ +int devlink_traps_register(struct devlink *devlink, + const struct devlink_trap *traps, + size_t traps_count, void *priv) +{ + int i, err; + + if (!devlink->ops->trap_init || !devlink->ops->trap_action_set) + return -EINVAL; + + mutex_lock(&devlink->lock); + for (i = 0; i < traps_count; i++) { + const struct devlink_trap *trap = &traps[i]; + + err = devlink_trap_verify(trap); + if (err) + goto err_trap_verify; + + err = devlink_trap_register(devlink, trap, priv); + if (err) + goto err_trap_register; + } + mutex_unlock(&devlink->lock); + + return 0; + +err_trap_register: +err_trap_verify: + for (i--; i >= 0; i--) + devlink_trap_unregister(devlink, &traps[i]); + mutex_unlock(&devlink->lock); + return err; +} +EXPORT_SYMBOL_GPL(devlink_traps_register); + +/** + * devlink_traps_unregister - Unregister packet traps from devlink. + * @devlink: devlink. + * @traps: Packet traps. + * @traps_count: Count of provided packet traps. + */ +void devlink_traps_unregister(struct devlink *devlink, + const struct devlink_trap *traps, + size_t traps_count) +{ + int i; + + mutex_lock(&devlink->lock); + /* Make sure we do not have any packets in-flight while unregistering + * traps by disabling all of them and waiting for a grace period. + */ + for (i = traps_count - 1; i >= 0; i--) + devlink_trap_disable(devlink, &traps[i]); + synchronize_rcu(); + for (i = traps_count - 1; i >= 0; i--) + devlink_trap_unregister(devlink, &traps[i]); + mutex_unlock(&devlink->lock); +} +EXPORT_SYMBOL_GPL(devlink_traps_unregister); + +static void +devlink_trap_stats_update(struct devlink_stats __percpu *trap_stats, + size_t skb_len) +{ + struct devlink_stats *stats; + + stats = this_cpu_ptr(trap_stats); + u64_stats_update_begin(&stats->syncp); + stats->rx_bytes += skb_len; + stats->rx_packets++; + u64_stats_update_end(&stats->syncp); +} + +static void +devlink_trap_report_metadata_fill(struct net_dm_hw_metadata *hw_metadata, + const struct devlink_trap_item *trap_item, + struct devlink_port *in_devlink_port) +{ + struct devlink_trap_group_item *group_item = trap_item->group_item; + + hw_metadata->trap_group_name = group_item->group->name; + hw_metadata->trap_name = trap_item->trap->name; + + spin_lock(&in_devlink_port->type_lock); + if (in_devlink_port->type == DEVLINK_PORT_TYPE_ETH) + hw_metadata->input_dev = in_devlink_port->type_dev; + spin_unlock(&in_devlink_port->type_lock); +} + +/** + * devlink_trap_report - Report trapped packet to drop monitor. + * @devlink: devlink. + * @skb: Trapped packet. + * @trap_ctx: Trap context. + * @in_devlink_port: Input devlink port. + */ +void devlink_trap_report(struct devlink *devlink, struct sk_buff *skb, + void *trap_ctx, struct devlink_port *in_devlink_port) +{ + struct devlink_trap_item *trap_item = trap_ctx; + struct net_dm_hw_metadata hw_metadata = {}; + + devlink_trap_stats_update(trap_item->stats, skb->len); + devlink_trap_stats_update(trap_item->group_item->stats, skb->len); + + devlink_trap_report_metadata_fill(&hw_metadata, trap_item, + in_devlink_port); + net_dm_hw_report(skb, &hw_metadata); +} +EXPORT_SYMBOL_GPL(devlink_trap_report); + +/** + * devlink_trap_ctx_priv - Trap context to driver private information. + * @trap_ctx: Trap context. + * + * Return: Driver private information passed during registration. + */ +void *devlink_trap_ctx_priv(void *trap_ctx) +{ + struct devlink_trap_item *trap_item = trap_ctx; + + return trap_item->priv; +} +EXPORT_SYMBOL_GPL(devlink_trap_ctx_priv); + static void __devlink_compat_running_version(struct devlink *devlink, char *buf, size_t len) { @@ -6941,11 +8047,10 @@ int devlink_compat_switch_id_get(struct net_device *dev, { struct devlink_port *devlink_port; - /* RTNL mutex is held here which ensures that devlink_port - * instance cannot disappear in the middle. No need to take + /* Caller must hold RTNL mutex or reference to dev, which ensures that + * devlink_port instance cannot disappear in the middle. No need to take * any devlink lock as only permanent values are accessed. */ - ASSERT_RTNL(); devlink_port = netdev_to_devlink_port(dev); if (!devlink_port || !devlink_port->attrs.switch_port) return -EOPNOTSUPP; diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 4ea4347f5062..536e032d95c8 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -26,6 +26,7 @@ #include <linux/bitops.h> #include <linux/slab.h> #include <linux/module.h> +#include <net/drop_monitor.h> #include <net/genetlink.h> #include <net/netevent.h> @@ -43,13 +44,44 @@ * netlink alerts */ static int trace_state = TRACE_OFF; -static DEFINE_MUTEX(trace_state_mutex); +static bool monitor_hw; + +/* net_dm_mutex + * + * An overall lock guarding every operation coming from userspace. + * It also guards the global 'hw_stats_list' list. + */ +static DEFINE_MUTEX(net_dm_mutex); + +struct net_dm_stats { + u64 dropped; + struct u64_stats_sync syncp; +}; + +#define NET_DM_MAX_HW_TRAP_NAME_LEN 40 + +struct net_dm_hw_entry { + char trap_name[NET_DM_MAX_HW_TRAP_NAME_LEN]; + u32 count; +}; + +struct net_dm_hw_entries { + u32 num_entries; + struct net_dm_hw_entry entries[0]; +}; struct per_cpu_dm_data { - spinlock_t lock; - struct sk_buff *skb; + spinlock_t lock; /* Protects 'skb', 'hw_entries' and + * 'send_timer' + */ + union { + struct sk_buff *skb; + struct net_dm_hw_entries *hw_entries; + }; + struct sk_buff_head drop_queue; struct work_struct dm_alert_work; struct timer_list send_timer; + struct net_dm_stats stats; }; struct dm_hw_stat_delta { @@ -63,12 +95,37 @@ struct dm_hw_stat_delta { static struct genl_family net_drop_monitor_family; static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data); +static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_hw_cpu_data); static int dm_hit_limit = 64; static int dm_delay = 1; static unsigned long dm_hw_check_delta = 2*HZ; static LIST_HEAD(hw_stats_list); +static enum net_dm_alert_mode net_dm_alert_mode = NET_DM_ALERT_MODE_SUMMARY; +static u32 net_dm_trunc_len; +static u32 net_dm_queue_len = 1000; + +struct net_dm_alert_ops { + void (*kfree_skb_probe)(void *ignore, struct sk_buff *skb, + void *location); + void (*napi_poll_probe)(void *ignore, struct napi_struct *napi, + int work, int budget); + void (*work_item_func)(struct work_struct *work); + void (*hw_work_item_func)(struct work_struct *work); + void (*hw_probe)(struct sk_buff *skb, + const struct net_dm_hw_metadata *hw_metadata); +}; + +struct net_dm_skb_cb { + union { + struct net_dm_hw_metadata *hw_metadata; + void *pc; + }; +}; + +#define NET_DM_SKB_CB(__skb) ((struct net_dm_skb_cb *)&((__skb)->cb[0])) + static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data) { size_t al; @@ -235,48 +292,844 @@ static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi, rcu_read_unlock(); } -static int set_all_monitor_traces(int state) +static struct net_dm_hw_entries * +net_dm_hw_reset_per_cpu_data(struct per_cpu_dm_data *hw_data) { - int rc = 0; - struct dm_hw_stat_delta *new_stat = NULL; - struct dm_hw_stat_delta *temp; + struct net_dm_hw_entries *hw_entries; + unsigned long flags; + + hw_entries = kzalloc(struct_size(hw_entries, entries, dm_hit_limit), + GFP_KERNEL); + if (!hw_entries) { + /* If the memory allocation failed, we try to perform another + * allocation in 1/10 second. Otherwise, the probe function + * will constantly bail out. + */ + mod_timer(&hw_data->send_timer, jiffies + HZ / 10); + } - mutex_lock(&trace_state_mutex); + spin_lock_irqsave(&hw_data->lock, flags); + swap(hw_data->hw_entries, hw_entries); + spin_unlock_irqrestore(&hw_data->lock, flags); - if (state == trace_state) { - rc = -EAGAIN; - goto out_unlock; + return hw_entries; +} + +static int net_dm_hw_entry_put(struct sk_buff *msg, + const struct net_dm_hw_entry *hw_entry) +{ + struct nlattr *attr; + + attr = nla_nest_start(msg, NET_DM_ATTR_HW_ENTRY); + if (!attr) + return -EMSGSIZE; + + if (nla_put_string(msg, NET_DM_ATTR_HW_TRAP_NAME, hw_entry->trap_name)) + goto nla_put_failure; + + if (nla_put_u32(msg, NET_DM_ATTR_HW_TRAP_COUNT, hw_entry->count)) + goto nla_put_failure; + + nla_nest_end(msg, attr); + + return 0; + +nla_put_failure: + nla_nest_cancel(msg, attr); + return -EMSGSIZE; +} + +static int net_dm_hw_entries_put(struct sk_buff *msg, + const struct net_dm_hw_entries *hw_entries) +{ + struct nlattr *attr; + int i; + + attr = nla_nest_start(msg, NET_DM_ATTR_HW_ENTRIES); + if (!attr) + return -EMSGSIZE; + + for (i = 0; i < hw_entries->num_entries; i++) { + int rc; + + rc = net_dm_hw_entry_put(msg, &hw_entries->entries[i]); + if (rc) + goto nla_put_failure; } - switch (state) { - case TRACE_ON: - if (!try_module_get(THIS_MODULE)) { - rc = -ENODEV; - break; + nla_nest_end(msg, attr); + + return 0; + +nla_put_failure: + nla_nest_cancel(msg, attr); + return -EMSGSIZE; +} + +static int +net_dm_hw_summary_report_fill(struct sk_buff *msg, + const struct net_dm_hw_entries *hw_entries) +{ + struct net_dm_alert_msg anc_hdr = { 0 }; + void *hdr; + int rc; + + hdr = genlmsg_put(msg, 0, 0, &net_drop_monitor_family, 0, + NET_DM_CMD_ALERT); + if (!hdr) + return -EMSGSIZE; + + /* We need to put the ancillary header in order not to break user + * space. + */ + if (nla_put(msg, NLA_UNSPEC, sizeof(anc_hdr), &anc_hdr)) + goto nla_put_failure; + + rc = net_dm_hw_entries_put(msg, hw_entries); + if (rc) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static void net_dm_hw_summary_work(struct work_struct *work) +{ + struct net_dm_hw_entries *hw_entries; + struct per_cpu_dm_data *hw_data; + struct sk_buff *msg; + int rc; + + hw_data = container_of(work, struct per_cpu_dm_data, dm_alert_work); + + hw_entries = net_dm_hw_reset_per_cpu_data(hw_data); + if (!hw_entries) + return; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + goto out; + + rc = net_dm_hw_summary_report_fill(msg, hw_entries); + if (rc) { + nlmsg_free(msg); + goto out; + } + + genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL); + +out: + kfree(hw_entries); +} + +static void +net_dm_hw_summary_probe(struct sk_buff *skb, + const struct net_dm_hw_metadata *hw_metadata) +{ + struct net_dm_hw_entries *hw_entries; + struct net_dm_hw_entry *hw_entry; + struct per_cpu_dm_data *hw_data; + unsigned long flags; + int i; + + hw_data = this_cpu_ptr(&dm_hw_cpu_data); + spin_lock_irqsave(&hw_data->lock, flags); + hw_entries = hw_data->hw_entries; + + if (!hw_entries) + goto out; + + for (i = 0; i < hw_entries->num_entries; i++) { + hw_entry = &hw_entries->entries[i]; + if (!strncmp(hw_entry->trap_name, hw_metadata->trap_name, + NET_DM_MAX_HW_TRAP_NAME_LEN - 1)) { + hw_entry->count++; + goto out; } + } + if (WARN_ON_ONCE(hw_entries->num_entries == dm_hit_limit)) + goto out; - rc |= register_trace_kfree_skb(trace_kfree_skb_hit, NULL); - rc |= register_trace_napi_poll(trace_napi_poll_hit, NULL); - break; + hw_entry = &hw_entries->entries[hw_entries->num_entries]; + strlcpy(hw_entry->trap_name, hw_metadata->trap_name, + NET_DM_MAX_HW_TRAP_NAME_LEN - 1); + hw_entry->count = 1; + hw_entries->num_entries++; - case TRACE_OFF: - rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL); - rc |= unregister_trace_napi_poll(trace_napi_poll_hit, NULL); + if (!timer_pending(&hw_data->send_timer)) { + hw_data->send_timer.expires = jiffies + dm_delay * HZ; + add_timer(&hw_data->send_timer); + } - tracepoint_synchronize_unregister(); +out: + spin_unlock_irqrestore(&hw_data->lock, flags); +} - /* - * Clean the device list +static const struct net_dm_alert_ops net_dm_alert_summary_ops = { + .kfree_skb_probe = trace_kfree_skb_hit, + .napi_poll_probe = trace_napi_poll_hit, + .work_item_func = send_dm_alert, + .hw_work_item_func = net_dm_hw_summary_work, + .hw_probe = net_dm_hw_summary_probe, +}; + +static void net_dm_packet_trace_kfree_skb_hit(void *ignore, + struct sk_buff *skb, + void *location) +{ + ktime_t tstamp = ktime_get_real(); + struct per_cpu_dm_data *data; + struct sk_buff *nskb; + unsigned long flags; + + if (!skb_mac_header_was_set(skb)) + return; + + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + return; + + NET_DM_SKB_CB(nskb)->pc = location; + /* Override the timestamp because we care about the time when the + * packet was dropped. + */ + nskb->tstamp = tstamp; + + data = this_cpu_ptr(&dm_cpu_data); + + spin_lock_irqsave(&data->drop_queue.lock, flags); + if (skb_queue_len(&data->drop_queue) < net_dm_queue_len) + __skb_queue_tail(&data->drop_queue, nskb); + else + goto unlock_free; + spin_unlock_irqrestore(&data->drop_queue.lock, flags); + + schedule_work(&data->dm_alert_work); + + return; + +unlock_free: + spin_unlock_irqrestore(&data->drop_queue.lock, flags); + u64_stats_update_begin(&data->stats.syncp); + data->stats.dropped++; + u64_stats_update_end(&data->stats.syncp); + consume_skb(nskb); +} + +static void net_dm_packet_trace_napi_poll_hit(void *ignore, + struct napi_struct *napi, + int work, int budget) +{ +} + +static size_t net_dm_in_port_size(void) +{ + /* NET_DM_ATTR_IN_PORT nest */ + return nla_total_size(0) + + /* NET_DM_ATTR_PORT_NETDEV_IFINDEX */ + nla_total_size(sizeof(u32)) + + /* NET_DM_ATTR_PORT_NETDEV_NAME */ + nla_total_size(IFNAMSIZ + 1); +} + +#define NET_DM_MAX_SYMBOL_LEN 40 + +static size_t net_dm_packet_report_size(size_t payload_len) +{ + size_t size; + + size = nlmsg_msg_size(GENL_HDRLEN + net_drop_monitor_family.hdrsize); + + return NLMSG_ALIGN(size) + + /* NET_DM_ATTR_ORIGIN */ + nla_total_size(sizeof(u16)) + + /* NET_DM_ATTR_PC */ + nla_total_size(sizeof(u64)) + + /* NET_DM_ATTR_SYMBOL */ + nla_total_size(NET_DM_MAX_SYMBOL_LEN + 1) + + /* NET_DM_ATTR_IN_PORT */ + net_dm_in_port_size() + + /* NET_DM_ATTR_TIMESTAMP */ + nla_total_size(sizeof(u64)) + + /* NET_DM_ATTR_ORIG_LEN */ + nla_total_size(sizeof(u32)) + + /* NET_DM_ATTR_PROTO */ + nla_total_size(sizeof(u16)) + + /* NET_DM_ATTR_PAYLOAD */ + nla_total_size(payload_len); +} + +static int net_dm_packet_report_in_port_put(struct sk_buff *msg, int ifindex, + const char *name) +{ + struct nlattr *attr; + + attr = nla_nest_start(msg, NET_DM_ATTR_IN_PORT); + if (!attr) + return -EMSGSIZE; + + if (ifindex && + nla_put_u32(msg, NET_DM_ATTR_PORT_NETDEV_IFINDEX, ifindex)) + goto nla_put_failure; + + if (name && nla_put_string(msg, NET_DM_ATTR_PORT_NETDEV_NAME, name)) + goto nla_put_failure; + + nla_nest_end(msg, attr); + + return 0; + +nla_put_failure: + nla_nest_cancel(msg, attr); + return -EMSGSIZE; +} + +static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb, + size_t payload_len) +{ + u64 pc = (u64)(uintptr_t) NET_DM_SKB_CB(skb)->pc; + char buf[NET_DM_MAX_SYMBOL_LEN]; + struct nlattr *attr; + void *hdr; + int rc; + + hdr = genlmsg_put(msg, 0, 0, &net_drop_monitor_family, 0, + NET_DM_CMD_PACKET_ALERT); + if (!hdr) + return -EMSGSIZE; + + if (nla_put_u16(msg, NET_DM_ATTR_ORIGIN, NET_DM_ORIGIN_SW)) + goto nla_put_failure; + + if (nla_put_u64_64bit(msg, NET_DM_ATTR_PC, pc, NET_DM_ATTR_PAD)) + goto nla_put_failure; + + snprintf(buf, sizeof(buf), "%pS", NET_DM_SKB_CB(skb)->pc); + if (nla_put_string(msg, NET_DM_ATTR_SYMBOL, buf)) + goto nla_put_failure; + + rc = net_dm_packet_report_in_port_put(msg, skb->skb_iif, NULL); + if (rc) + goto nla_put_failure; + + if (nla_put_u64_64bit(msg, NET_DM_ATTR_TIMESTAMP, + ktime_to_ns(skb->tstamp), NET_DM_ATTR_PAD)) + goto nla_put_failure; + + if (nla_put_u32(msg, NET_DM_ATTR_ORIG_LEN, skb->len)) + goto nla_put_failure; + + if (!payload_len) + goto out; + + if (nla_put_u16(msg, NET_DM_ATTR_PROTO, be16_to_cpu(skb->protocol))) + goto nla_put_failure; + + attr = skb_put(msg, nla_total_size(payload_len)); + attr->nla_type = NET_DM_ATTR_PAYLOAD; + attr->nla_len = nla_attr_size(payload_len); + if (skb_copy_bits(skb, 0, nla_data(attr), payload_len)) + goto nla_put_failure; + +out: + genlmsg_end(msg, hdr); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +#define NET_DM_MAX_PACKET_SIZE (0xffff - NLA_HDRLEN - NLA_ALIGNTO) + +static void net_dm_packet_report(struct sk_buff *skb) +{ + struct sk_buff *msg; + size_t payload_len; + int rc; + + /* Make sure we start copying the packet from the MAC header */ + if (skb->data > skb_mac_header(skb)) + skb_push(skb, skb->data - skb_mac_header(skb)); + else + skb_pull(skb, skb_mac_header(skb) - skb->data); + + /* Ensure packet fits inside a single netlink attribute */ + payload_len = min_t(size_t, skb->len, NET_DM_MAX_PACKET_SIZE); + if (net_dm_trunc_len) + payload_len = min_t(size_t, net_dm_trunc_len, payload_len); + + msg = nlmsg_new(net_dm_packet_report_size(payload_len), GFP_KERNEL); + if (!msg) + goto out; + + rc = net_dm_packet_report_fill(msg, skb, payload_len); + if (rc) { + nlmsg_free(msg); + goto out; + } + + genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL); + +out: + consume_skb(skb); +} + +static void net_dm_packet_work(struct work_struct *work) +{ + struct per_cpu_dm_data *data; + struct sk_buff_head list; + struct sk_buff *skb; + unsigned long flags; + + data = container_of(work, struct per_cpu_dm_data, dm_alert_work); + + __skb_queue_head_init(&list); + + spin_lock_irqsave(&data->drop_queue.lock, flags); + skb_queue_splice_tail_init(&data->drop_queue, &list); + spin_unlock_irqrestore(&data->drop_queue.lock, flags); + + while ((skb = __skb_dequeue(&list))) + net_dm_packet_report(skb); +} + +static size_t +net_dm_hw_packet_report_size(size_t payload_len, + const struct net_dm_hw_metadata *hw_metadata) +{ + size_t size; + + size = nlmsg_msg_size(GENL_HDRLEN + net_drop_monitor_family.hdrsize); + + return NLMSG_ALIGN(size) + + /* NET_DM_ATTR_ORIGIN */ + nla_total_size(sizeof(u16)) + + /* NET_DM_ATTR_HW_TRAP_GROUP_NAME */ + nla_total_size(strlen(hw_metadata->trap_group_name) + 1) + + /* NET_DM_ATTR_HW_TRAP_NAME */ + nla_total_size(strlen(hw_metadata->trap_name) + 1) + + /* NET_DM_ATTR_IN_PORT */ + net_dm_in_port_size() + + /* NET_DM_ATTR_TIMESTAMP */ + nla_total_size(sizeof(u64)) + + /* NET_DM_ATTR_ORIG_LEN */ + nla_total_size(sizeof(u32)) + + /* NET_DM_ATTR_PROTO */ + nla_total_size(sizeof(u16)) + + /* NET_DM_ATTR_PAYLOAD */ + nla_total_size(payload_len); +} + +static int net_dm_hw_packet_report_fill(struct sk_buff *msg, + struct sk_buff *skb, size_t payload_len) +{ + struct net_dm_hw_metadata *hw_metadata; + struct nlattr *attr; + void *hdr; + + hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata; + + hdr = genlmsg_put(msg, 0, 0, &net_drop_monitor_family, 0, + NET_DM_CMD_PACKET_ALERT); + if (!hdr) + return -EMSGSIZE; + + if (nla_put_u16(msg, NET_DM_ATTR_ORIGIN, NET_DM_ORIGIN_HW)) + goto nla_put_failure; + + if (nla_put_string(msg, NET_DM_ATTR_HW_TRAP_GROUP_NAME, + hw_metadata->trap_group_name)) + goto nla_put_failure; + + if (nla_put_string(msg, NET_DM_ATTR_HW_TRAP_NAME, + hw_metadata->trap_name)) + goto nla_put_failure; + + if (hw_metadata->input_dev) { + struct net_device *dev = hw_metadata->input_dev; + int rc; + + rc = net_dm_packet_report_in_port_put(msg, dev->ifindex, + dev->name); + if (rc) + goto nla_put_failure; + } + + if (nla_put_u64_64bit(msg, NET_DM_ATTR_TIMESTAMP, + ktime_to_ns(skb->tstamp), NET_DM_ATTR_PAD)) + goto nla_put_failure; + + if (nla_put_u32(msg, NET_DM_ATTR_ORIG_LEN, skb->len)) + goto nla_put_failure; + + if (!payload_len) + goto out; + + if (nla_put_u16(msg, NET_DM_ATTR_PROTO, be16_to_cpu(skb->protocol))) + goto nla_put_failure; + + attr = skb_put(msg, nla_total_size(payload_len)); + attr->nla_type = NET_DM_ATTR_PAYLOAD; + attr->nla_len = nla_attr_size(payload_len); + if (skb_copy_bits(skb, 0, nla_data(attr), payload_len)) + goto nla_put_failure; + +out: + genlmsg_end(msg, hdr); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static struct net_dm_hw_metadata * +net_dm_hw_metadata_clone(const struct net_dm_hw_metadata *hw_metadata) +{ + struct net_dm_hw_metadata *n_hw_metadata; + const char *trap_group_name; + const char *trap_name; + + n_hw_metadata = kmalloc(sizeof(*hw_metadata), GFP_ATOMIC); + if (!n_hw_metadata) + return NULL; + + trap_group_name = kmemdup(hw_metadata->trap_group_name, + strlen(hw_metadata->trap_group_name) + 1, + GFP_ATOMIC | __GFP_ZERO); + if (!trap_group_name) + goto free_hw_metadata; + n_hw_metadata->trap_group_name = trap_group_name; + + trap_name = kmemdup(hw_metadata->trap_name, + strlen(hw_metadata->trap_name) + 1, + GFP_ATOMIC | __GFP_ZERO); + if (!trap_name) + goto free_trap_group; + n_hw_metadata->trap_name = trap_name; + + n_hw_metadata->input_dev = hw_metadata->input_dev; + if (n_hw_metadata->input_dev) + dev_hold(n_hw_metadata->input_dev); + + return n_hw_metadata; + +free_trap_group: + kfree(trap_group_name); +free_hw_metadata: + kfree(n_hw_metadata); + return NULL; +} + +static void +net_dm_hw_metadata_free(const struct net_dm_hw_metadata *hw_metadata) +{ + if (hw_metadata->input_dev) + dev_put(hw_metadata->input_dev); + kfree(hw_metadata->trap_name); + kfree(hw_metadata->trap_group_name); + kfree(hw_metadata); +} + +static void net_dm_hw_packet_report(struct sk_buff *skb) +{ + struct net_dm_hw_metadata *hw_metadata; + struct sk_buff *msg; + size_t payload_len; + int rc; + + if (skb->data > skb_mac_header(skb)) + skb_push(skb, skb->data - skb_mac_header(skb)); + else + skb_pull(skb, skb_mac_header(skb) - skb->data); + + payload_len = min_t(size_t, skb->len, NET_DM_MAX_PACKET_SIZE); + if (net_dm_trunc_len) + payload_len = min_t(size_t, net_dm_trunc_len, payload_len); + + hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata; + msg = nlmsg_new(net_dm_hw_packet_report_size(payload_len, hw_metadata), + GFP_KERNEL); + if (!msg) + goto out; + + rc = net_dm_hw_packet_report_fill(msg, skb, payload_len); + if (rc) { + nlmsg_free(msg); + goto out; + } + + genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL); + +out: + net_dm_hw_metadata_free(NET_DM_SKB_CB(skb)->hw_metadata); + consume_skb(skb); +} + +static void net_dm_hw_packet_work(struct work_struct *work) +{ + struct per_cpu_dm_data *hw_data; + struct sk_buff_head list; + struct sk_buff *skb; + unsigned long flags; + + hw_data = container_of(work, struct per_cpu_dm_data, dm_alert_work); + + __skb_queue_head_init(&list); + + spin_lock_irqsave(&hw_data->drop_queue.lock, flags); + skb_queue_splice_tail_init(&hw_data->drop_queue, &list); + spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags); + + while ((skb = __skb_dequeue(&list))) + net_dm_hw_packet_report(skb); +} + +static void +net_dm_hw_packet_probe(struct sk_buff *skb, + const struct net_dm_hw_metadata *hw_metadata) +{ + struct net_dm_hw_metadata *n_hw_metadata; + ktime_t tstamp = ktime_get_real(); + struct per_cpu_dm_data *hw_data; + struct sk_buff *nskb; + unsigned long flags; + + if (!skb_mac_header_was_set(skb)) + return; + + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + return; + + n_hw_metadata = net_dm_hw_metadata_clone(hw_metadata); + if (!n_hw_metadata) + goto free; + + NET_DM_SKB_CB(nskb)->hw_metadata = n_hw_metadata; + nskb->tstamp = tstamp; + + hw_data = this_cpu_ptr(&dm_hw_cpu_data); + + spin_lock_irqsave(&hw_data->drop_queue.lock, flags); + if (skb_queue_len(&hw_data->drop_queue) < net_dm_queue_len) + __skb_queue_tail(&hw_data->drop_queue, nskb); + else + goto unlock_free; + spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags); + + schedule_work(&hw_data->dm_alert_work); + + return; + +unlock_free: + spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags); + u64_stats_update_begin(&hw_data->stats.syncp); + hw_data->stats.dropped++; + u64_stats_update_end(&hw_data->stats.syncp); + net_dm_hw_metadata_free(n_hw_metadata); +free: + consume_skb(nskb); +} + +static const struct net_dm_alert_ops net_dm_alert_packet_ops = { + .kfree_skb_probe = net_dm_packet_trace_kfree_skb_hit, + .napi_poll_probe = net_dm_packet_trace_napi_poll_hit, + .work_item_func = net_dm_packet_work, + .hw_work_item_func = net_dm_hw_packet_work, + .hw_probe = net_dm_hw_packet_probe, +}; + +static const struct net_dm_alert_ops *net_dm_alert_ops_arr[] = { + [NET_DM_ALERT_MODE_SUMMARY] = &net_dm_alert_summary_ops, + [NET_DM_ALERT_MODE_PACKET] = &net_dm_alert_packet_ops, +}; + +void net_dm_hw_report(struct sk_buff *skb, + const struct net_dm_hw_metadata *hw_metadata) +{ + rcu_read_lock(); + + if (!monitor_hw) + goto out; + + net_dm_alert_ops_arr[net_dm_alert_mode]->hw_probe(skb, hw_metadata); + +out: + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(net_dm_hw_report); + +static int net_dm_hw_monitor_start(struct netlink_ext_ack *extack) +{ + const struct net_dm_alert_ops *ops; + int cpu; + + if (monitor_hw) { + NL_SET_ERR_MSG_MOD(extack, "Hardware monitoring already enabled"); + return -EAGAIN; + } + + ops = net_dm_alert_ops_arr[net_dm_alert_mode]; + + if (!try_module_get(THIS_MODULE)) { + NL_SET_ERR_MSG_MOD(extack, "Failed to take reference on module"); + return -ENODEV; + } + + for_each_possible_cpu(cpu) { + struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu); + struct net_dm_hw_entries *hw_entries; + + INIT_WORK(&hw_data->dm_alert_work, ops->hw_work_item_func); + timer_setup(&hw_data->send_timer, sched_send_work, 0); + hw_entries = net_dm_hw_reset_per_cpu_data(hw_data); + kfree(hw_entries); + } + + monitor_hw = true; + + return 0; +} + +static void net_dm_hw_monitor_stop(struct netlink_ext_ack *extack) +{ + int cpu; + + if (!monitor_hw) + NL_SET_ERR_MSG_MOD(extack, "Hardware monitoring already disabled"); + + monitor_hw = false; + + /* After this call returns we are guaranteed that no CPU is processing + * any hardware drops. + */ + synchronize_rcu(); + + for_each_possible_cpu(cpu) { + struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu); + struct sk_buff *skb; + + del_timer_sync(&hw_data->send_timer); + cancel_work_sync(&hw_data->dm_alert_work); + while ((skb = __skb_dequeue(&hw_data->drop_queue))) { + struct net_dm_hw_metadata *hw_metadata; + + hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata; + net_dm_hw_metadata_free(hw_metadata); + consume_skb(skb); + } + } + + module_put(THIS_MODULE); +} + +static int net_dm_trace_on_set(struct netlink_ext_ack *extack) +{ + const struct net_dm_alert_ops *ops; + int cpu, rc; + + ops = net_dm_alert_ops_arr[net_dm_alert_mode]; + + if (!try_module_get(THIS_MODULE)) { + NL_SET_ERR_MSG_MOD(extack, "Failed to take reference on module"); + return -ENODEV; + } + + for_each_possible_cpu(cpu) { + struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu); + struct sk_buff *skb; + + INIT_WORK(&data->dm_alert_work, ops->work_item_func); + timer_setup(&data->send_timer, sched_send_work, 0); + /* Allocate a new per-CPU skb for the summary alert message and + * free the old one which might contain stale data from + * previous tracing. */ - list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) { - if (new_stat->dev == NULL) { - list_del_rcu(&new_stat->list); - kfree_rcu(new_stat, rcu); - } + skb = reset_per_cpu_data(data); + consume_skb(skb); + } + + rc = register_trace_kfree_skb(ops->kfree_skb_probe, NULL); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, "Failed to connect probe to kfree_skb() tracepoint"); + goto err_module_put; + } + + rc = register_trace_napi_poll(ops->napi_poll_probe, NULL); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, "Failed to connect probe to napi_poll() tracepoint"); + goto err_unregister_trace; + } + + return 0; + +err_unregister_trace: + unregister_trace_kfree_skb(ops->kfree_skb_probe, NULL); +err_module_put: + module_put(THIS_MODULE); + return rc; +} + +static void net_dm_trace_off_set(void) +{ + struct dm_hw_stat_delta *new_stat, *temp; + const struct net_dm_alert_ops *ops; + int cpu; + + ops = net_dm_alert_ops_arr[net_dm_alert_mode]; + + unregister_trace_napi_poll(ops->napi_poll_probe, NULL); + unregister_trace_kfree_skb(ops->kfree_skb_probe, NULL); + + tracepoint_synchronize_unregister(); + + /* Make sure we do not send notifications to user space after request + * to stop tracing returns. + */ + for_each_possible_cpu(cpu) { + struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu); + struct sk_buff *skb; + + del_timer_sync(&data->send_timer); + cancel_work_sync(&data->dm_alert_work); + while ((skb = __skb_dequeue(&data->drop_queue))) + consume_skb(skb); + } + + list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) { + if (new_stat->dev == NULL) { + list_del_rcu(&new_stat->list); + kfree_rcu(new_stat, rcu); } + } + + module_put(THIS_MODULE); +} + +static int set_all_monitor_traces(int state, struct netlink_ext_ack *extack) +{ + int rc = 0; - module_put(THIS_MODULE); + if (state == trace_state) { + NL_SET_ERR_MSG_MOD(extack, "Trace state already set to requested state"); + return -EAGAIN; + } + switch (state) { + case TRACE_ON: + rc = net_dm_trace_on_set(extack); + break; + case TRACE_OFF: + net_dm_trace_off_set(); break; default: rc = 1; @@ -288,30 +1141,331 @@ static int set_all_monitor_traces(int state) else rc = -EINPROGRESS; -out_unlock: - mutex_unlock(&trace_state_mutex); - return rc; } +static bool net_dm_is_monitoring(void) +{ + return trace_state == TRACE_ON || monitor_hw; +} + +static int net_dm_alert_mode_get_from_info(struct genl_info *info, + enum net_dm_alert_mode *p_alert_mode) +{ + u8 val; + + val = nla_get_u8(info->attrs[NET_DM_ATTR_ALERT_MODE]); + + switch (val) { + case NET_DM_ALERT_MODE_SUMMARY: /* fall-through */ + case NET_DM_ALERT_MODE_PACKET: + *p_alert_mode = val; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int net_dm_alert_mode_set(struct genl_info *info) +{ + struct netlink_ext_ack *extack = info->extack; + enum net_dm_alert_mode alert_mode; + int rc; + + if (!info->attrs[NET_DM_ATTR_ALERT_MODE]) + return 0; + + rc = net_dm_alert_mode_get_from_info(info, &alert_mode); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, "Invalid alert mode"); + return -EINVAL; + } + + net_dm_alert_mode = alert_mode; + + return 0; +} + +static void net_dm_trunc_len_set(struct genl_info *info) +{ + if (!info->attrs[NET_DM_ATTR_TRUNC_LEN]) + return; + + net_dm_trunc_len = nla_get_u32(info->attrs[NET_DM_ATTR_TRUNC_LEN]); +} + +static void net_dm_queue_len_set(struct genl_info *info) +{ + if (!info->attrs[NET_DM_ATTR_QUEUE_LEN]) + return; + + net_dm_queue_len = nla_get_u32(info->attrs[NET_DM_ATTR_QUEUE_LEN]); +} static int net_dm_cmd_config(struct sk_buff *skb, struct genl_info *info) { - return -ENOTSUPP; + struct netlink_ext_ack *extack = info->extack; + int rc; + + if (net_dm_is_monitoring()) { + NL_SET_ERR_MSG_MOD(extack, "Cannot configure drop monitor during monitoring"); + return -EBUSY; + } + + rc = net_dm_alert_mode_set(info); + if (rc) + return rc; + + net_dm_trunc_len_set(info); + + net_dm_queue_len_set(info); + + return 0; +} + +static int net_dm_monitor_start(bool set_sw, bool set_hw, + struct netlink_ext_ack *extack) +{ + bool sw_set = false; + int rc; + + if (set_sw) { + rc = set_all_monitor_traces(TRACE_ON, extack); + if (rc) + return rc; + sw_set = true; + } + + if (set_hw) { + rc = net_dm_hw_monitor_start(extack); + if (rc) + goto err_monitor_hw; + } + + return 0; + +err_monitor_hw: + if (sw_set) + set_all_monitor_traces(TRACE_OFF, extack); + return rc; +} + +static void net_dm_monitor_stop(bool set_sw, bool set_hw, + struct netlink_ext_ack *extack) +{ + if (set_hw) + net_dm_hw_monitor_stop(extack); + if (set_sw) + set_all_monitor_traces(TRACE_OFF, extack); } static int net_dm_cmd_trace(struct sk_buff *skb, struct genl_info *info) { + bool set_sw = !!info->attrs[NET_DM_ATTR_SW_DROPS]; + bool set_hw = !!info->attrs[NET_DM_ATTR_HW_DROPS]; + struct netlink_ext_ack *extack = info->extack; + + /* To maintain backward compatibility, we start / stop monitoring of + * software drops if no flag is specified. + */ + if (!set_sw && !set_hw) + set_sw = true; + switch (info->genlhdr->cmd) { case NET_DM_CMD_START: - return set_all_monitor_traces(TRACE_ON); + return net_dm_monitor_start(set_sw, set_hw, extack); case NET_DM_CMD_STOP: - return set_all_monitor_traces(TRACE_OFF); + net_dm_monitor_stop(set_sw, set_hw, extack); + return 0; } - return -ENOTSUPP; + return -EOPNOTSUPP; +} + +static int net_dm_config_fill(struct sk_buff *msg, struct genl_info *info) +{ + void *hdr; + + hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, + &net_drop_monitor_family, 0, NET_DM_CMD_CONFIG_NEW); + if (!hdr) + return -EMSGSIZE; + + if (nla_put_u8(msg, NET_DM_ATTR_ALERT_MODE, net_dm_alert_mode)) + goto nla_put_failure; + + if (nla_put_u32(msg, NET_DM_ATTR_TRUNC_LEN, net_dm_trunc_len)) + goto nla_put_failure; + + if (nla_put_u32(msg, NET_DM_ATTR_QUEUE_LEN, net_dm_queue_len)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int net_dm_cmd_config_get(struct sk_buff *skb, struct genl_info *info) +{ + struct sk_buff *msg; + int rc; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + rc = net_dm_config_fill(msg, info); + if (rc) + goto free_msg; + + return genlmsg_reply(msg, info); + +free_msg: + nlmsg_free(msg); + return rc; +} + +static void net_dm_stats_read(struct net_dm_stats *stats) +{ + int cpu; + + memset(stats, 0, sizeof(*stats)); + for_each_possible_cpu(cpu) { + struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu); + struct net_dm_stats *cpu_stats = &data->stats; + unsigned int start; + u64 dropped; + + do { + start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); + dropped = cpu_stats->dropped; + } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); + + stats->dropped += dropped; + } +} + +static int net_dm_stats_put(struct sk_buff *msg) +{ + struct net_dm_stats stats; + struct nlattr *attr; + + net_dm_stats_read(&stats); + + attr = nla_nest_start(msg, NET_DM_ATTR_STATS); + if (!attr) + return -EMSGSIZE; + + if (nla_put_u64_64bit(msg, NET_DM_ATTR_STATS_DROPPED, + stats.dropped, NET_DM_ATTR_PAD)) + goto nla_put_failure; + + nla_nest_end(msg, attr); + + return 0; + +nla_put_failure: + nla_nest_cancel(msg, attr); + return -EMSGSIZE; +} + +static void net_dm_hw_stats_read(struct net_dm_stats *stats) +{ + int cpu; + + memset(stats, 0, sizeof(*stats)); + for_each_possible_cpu(cpu) { + struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu); + struct net_dm_stats *cpu_stats = &hw_data->stats; + unsigned int start; + u64 dropped; + + do { + start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); + dropped = cpu_stats->dropped; + } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); + + stats->dropped += dropped; + } +} + +static int net_dm_hw_stats_put(struct sk_buff *msg) +{ + struct net_dm_stats stats; + struct nlattr *attr; + + net_dm_hw_stats_read(&stats); + + attr = nla_nest_start(msg, NET_DM_ATTR_HW_STATS); + if (!attr) + return -EMSGSIZE; + + if (nla_put_u64_64bit(msg, NET_DM_ATTR_STATS_DROPPED, + stats.dropped, NET_DM_ATTR_PAD)) + goto nla_put_failure; + + nla_nest_end(msg, attr); + + return 0; + +nla_put_failure: + nla_nest_cancel(msg, attr); + return -EMSGSIZE; +} + +static int net_dm_stats_fill(struct sk_buff *msg, struct genl_info *info) +{ + void *hdr; + int rc; + + hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, + &net_drop_monitor_family, 0, NET_DM_CMD_STATS_NEW); + if (!hdr) + return -EMSGSIZE; + + rc = net_dm_stats_put(msg); + if (rc) + goto nla_put_failure; + + rc = net_dm_hw_stats_put(msg); + if (rc) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int net_dm_cmd_stats_get(struct sk_buff *skb, struct genl_info *info) +{ + struct sk_buff *msg; + int rc; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + rc = net_dm_stats_fill(msg, info); + if (rc) + goto free_msg; + + return genlmsg_reply(msg, info); + +free_msg: + nlmsg_free(msg); + return rc; } static int dropmon_net_event(struct notifier_block *ev_block, @@ -330,12 +1484,12 @@ static int dropmon_net_event(struct notifier_block *ev_block, new_stat->dev = dev; new_stat->last_rx = jiffies; - mutex_lock(&trace_state_mutex); + mutex_lock(&net_dm_mutex); list_add_rcu(&new_stat->list, &hw_stats_list); - mutex_unlock(&trace_state_mutex); + mutex_unlock(&net_dm_mutex); break; case NETDEV_UNREGISTER: - mutex_lock(&trace_state_mutex); + mutex_lock(&net_dm_mutex); list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) { if (new_stat->dev == dev) { new_stat->dev = NULL; @@ -346,18 +1500,28 @@ static int dropmon_net_event(struct notifier_block *ev_block, } } } - mutex_unlock(&trace_state_mutex); + mutex_unlock(&net_dm_mutex); break; } out: return NOTIFY_DONE; } +static const struct nla_policy net_dm_nl_policy[NET_DM_ATTR_MAX + 1] = { + [NET_DM_ATTR_UNSPEC] = { .strict_start_type = NET_DM_ATTR_UNSPEC + 1 }, + [NET_DM_ATTR_ALERT_MODE] = { .type = NLA_U8 }, + [NET_DM_ATTR_TRUNC_LEN] = { .type = NLA_U32 }, + [NET_DM_ATTR_QUEUE_LEN] = { .type = NLA_U32 }, + [NET_DM_ATTR_SW_DROPS] = {. type = NLA_FLAG }, + [NET_DM_ATTR_HW_DROPS] = {. type = NLA_FLAG }, +}; + static const struct genl_ops dropmon_ops[] = { { .cmd = NET_DM_CMD_CONFIG, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = net_dm_cmd_config, + .flags = GENL_ADMIN_PERM, }, { .cmd = NET_DM_CMD_START, @@ -369,12 +1533,38 @@ static const struct genl_ops dropmon_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = net_dm_cmd_trace, }, + { + .cmd = NET_DM_CMD_CONFIG_GET, + .doit = net_dm_cmd_config_get, + }, + { + .cmd = NET_DM_CMD_STATS_GET, + .doit = net_dm_cmd_stats_get, + }, }; +static int net_dm_nl_pre_doit(const struct genl_ops *ops, + struct sk_buff *skb, struct genl_info *info) +{ + mutex_lock(&net_dm_mutex); + + return 0; +} + +static void net_dm_nl_post_doit(const struct genl_ops *ops, + struct sk_buff *skb, struct genl_info *info) +{ + mutex_unlock(&net_dm_mutex); +} + static struct genl_family net_drop_monitor_family __ro_after_init = { .hdrsize = 0, .name = "NET_DM", .version = 2, + .maxattr = NET_DM_ATTR_MAX, + .policy = net_dm_nl_policy, + .pre_doit = net_dm_nl_pre_doit, + .post_doit = net_dm_nl_post_doit, .module = THIS_MODULE, .ops = dropmon_ops, .n_ops = ARRAY_SIZE(dropmon_ops), @@ -386,9 +1576,57 @@ static struct notifier_block dropmon_net_notifier = { .notifier_call = dropmon_net_event }; -static int __init init_net_drop_monitor(void) +static void __net_dm_cpu_data_init(struct per_cpu_dm_data *data) +{ + spin_lock_init(&data->lock); + skb_queue_head_init(&data->drop_queue); + u64_stats_init(&data->stats.syncp); +} + +static void __net_dm_cpu_data_fini(struct per_cpu_dm_data *data) +{ + WARN_ON(!skb_queue_empty(&data->drop_queue)); +} + +static void net_dm_cpu_data_init(int cpu) +{ + struct per_cpu_dm_data *data; + + data = &per_cpu(dm_cpu_data, cpu); + __net_dm_cpu_data_init(data); +} + +static void net_dm_cpu_data_fini(int cpu) { struct per_cpu_dm_data *data; + + data = &per_cpu(dm_cpu_data, cpu); + /* At this point, we should have exclusive access + * to this struct and can free the skb inside it. + */ + consume_skb(data->skb); + __net_dm_cpu_data_fini(data); +} + +static void net_dm_hw_cpu_data_init(int cpu) +{ + struct per_cpu_dm_data *hw_data; + + hw_data = &per_cpu(dm_hw_cpu_data, cpu); + __net_dm_cpu_data_init(hw_data); +} + +static void net_dm_hw_cpu_data_fini(int cpu) +{ + struct per_cpu_dm_data *hw_data; + + hw_data = &per_cpu(dm_hw_cpu_data, cpu); + kfree(hw_data->hw_entries); + __net_dm_cpu_data_fini(hw_data); +} + +static int __init init_net_drop_monitor(void) +{ int cpu, rc; pr_info("Initializing network drop monitor service\n"); @@ -414,14 +1652,10 @@ static int __init init_net_drop_monitor(void) rc = 0; for_each_possible_cpu(cpu) { - data = &per_cpu(dm_cpu_data, cpu); - INIT_WORK(&data->dm_alert_work, send_dm_alert); - timer_setup(&data->send_timer, sched_send_work, 0); - spin_lock_init(&data->lock); - reset_per_cpu_data(data); + net_dm_cpu_data_init(cpu); + net_dm_hw_cpu_data_init(cpu); } - goto out; out_unreg: @@ -432,7 +1666,6 @@ out: static void exit_net_drop_monitor(void) { - struct per_cpu_dm_data *data; int cpu; BUG_ON(unregister_netdevice_notifier(&dropmon_net_notifier)); @@ -440,19 +1673,11 @@ static void exit_net_drop_monitor(void) /* * Because of the module_get/put we do in the trace state change path * we are guarnateed not to have any current users when we get here - * all we need to do is make sure that we don't have any running timers - * or pending schedule calls */ for_each_possible_cpu(cpu) { - data = &per_cpu(dm_cpu_data, cpu); - del_timer_sync(&data->send_timer); - cancel_work_sync(&data->dm_alert_work); - /* - * At this point, we should have exclusive access - * to this struct and can free the skb inside it - */ - kfree_skb(data->skb); + net_dm_hw_cpu_data_fini(cpu); + net_dm_cpu_data_fini(cpu); } BUG_ON(genl_unregister_family(&net_drop_monitor_family)); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 6288e69e94fc..c763106c73fc 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -133,6 +133,7 @@ phy_tunable_strings[__ETHTOOL_PHY_TUNABLE_COUNT][ETH_GSTRING_LEN] = { [ETHTOOL_ID_UNSPEC] = "Unspec", [ETHTOOL_PHY_DOWNSHIFT] = "phy-downshift", [ETHTOOL_PHY_FAST_LINK_DOWN] = "phy-fast-link-down", + [ETHTOOL_PHY_EDPD] = "phy-energy-detect-power-down", }; static int ethtool_get_features(struct net_device *dev, void __user *useraddr) @@ -2451,6 +2452,11 @@ static int ethtool_phy_tunable_valid(const struct ethtool_tunable *tuna) tuna->type_id != ETHTOOL_TUNABLE_U8) return -EINVAL; break; + case ETHTOOL_PHY_EDPD: + if (tuna->len != sizeof(u16) || + tuna->type_id != ETHTOOL_TUNABLE_U16) + return -EINVAL; + break; default: return -EINVAL; } diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c index 13a40b831d6d..470a606d5e8d 100644 --- a/net/core/fib_notifier.c +++ b/net/core/fib_notifier.c @@ -5,8 +5,15 @@ #include <linux/module.h> #include <linux/init.h> #include <net/net_namespace.h> +#include <net/netns/generic.h> #include <net/fib_notifier.h> +static unsigned int fib_notifier_net_id; + +struct fib_notifier_net { + struct list_head fib_notifier_ops; +}; + static ATOMIC_NOTIFIER_HEAD(fib_chain); int call_fib_notifier(struct notifier_block *nb, struct net *net, @@ -34,6 +41,7 @@ EXPORT_SYMBOL(call_fib_notifiers); static unsigned int fib_seq_sum(void) { + struct fib_notifier_net *fn_net; struct fib_notifier_ops *ops; unsigned int fib_seq = 0; struct net *net; @@ -41,8 +49,9 @@ static unsigned int fib_seq_sum(void) rtnl_lock(); down_read(&net_rwsem); for_each_net(net) { + fn_net = net_generic(net, fib_notifier_net_id); rcu_read_lock(); - list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) { + list_for_each_entry_rcu(ops, &fn_net->fib_notifier_ops, list) { if (!try_module_get(ops->owner)) continue; fib_seq += ops->fib_seq_read(net); @@ -58,9 +67,10 @@ static unsigned int fib_seq_sum(void) static int fib_net_dump(struct net *net, struct notifier_block *nb) { + struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id); struct fib_notifier_ops *ops; - list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) { + list_for_each_entry_rcu(ops, &fn_net->fib_notifier_ops, list) { int err; if (!try_module_get(ops->owner)) @@ -127,12 +137,13 @@ EXPORT_SYMBOL(unregister_fib_notifier); static int __fib_notifier_ops_register(struct fib_notifier_ops *ops, struct net *net) { + struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id); struct fib_notifier_ops *o; - list_for_each_entry(o, &net->fib_notifier_ops, list) + list_for_each_entry(o, &fn_net->fib_notifier_ops, list) if (ops->family == o->family) return -EEXIST; - list_add_tail_rcu(&ops->list, &net->fib_notifier_ops); + list_add_tail_rcu(&ops->list, &fn_net->fib_notifier_ops); return 0; } @@ -167,18 +178,24 @@ EXPORT_SYMBOL(fib_notifier_ops_unregister); static int __net_init fib_notifier_net_init(struct net *net) { - INIT_LIST_HEAD(&net->fib_notifier_ops); + struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id); + + INIT_LIST_HEAD(&fn_net->fib_notifier_ops); return 0; } static void __net_exit fib_notifier_net_exit(struct net *net) { - WARN_ON_ONCE(!list_empty(&net->fib_notifier_ops)); + struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id); + + WARN_ON_ONCE(!list_empty(&fn_net->fib_notifier_ops)); } static struct pernet_operations fib_notifier_net_ops = { .init = fib_notifier_net_init, .exit = fib_notifier_net_exit, + .id = &fib_notifier_net_id, + .size = sizeof(struct fib_notifier_net), }; static int __init fib_notifier_init(void) diff --git a/net/core/filter.c b/net/core/filter.c index 4c6a252d4212..ed6563622ce3 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3517,7 +3517,8 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd, int err; switch (map->map_type) { - case BPF_MAP_TYPE_DEVMAP: { + case BPF_MAP_TYPE_DEVMAP: + case BPF_MAP_TYPE_DEVMAP_HASH: { struct bpf_dtab_netdev *dst = fwd; err = dev_map_enqueue(dst, xdp, dev_rx); @@ -3554,6 +3555,7 @@ void xdp_do_flush_map(void) if (map) { switch (map->map_type) { case BPF_MAP_TYPE_DEVMAP: + case BPF_MAP_TYPE_DEVMAP_HASH: __dev_map_flush(map); break; case BPF_MAP_TYPE_CPUMAP: @@ -3574,6 +3576,8 @@ static inline void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index) switch (map->map_type) { case BPF_MAP_TYPE_DEVMAP: return __dev_map_lookup_elem(map, index); + case BPF_MAP_TYPE_DEVMAP_HASH: + return __dev_map_hash_lookup_elem(map, index); case BPF_MAP_TYPE_CPUMAP: return __cpu_map_lookup_elem(map, index); case BPF_MAP_TYPE_XSKMAP: @@ -3655,7 +3659,8 @@ static int xdp_do_generic_redirect_map(struct net_device *dev, ri->tgt_value = NULL; WRITE_ONCE(ri->map, NULL); - if (map->map_type == BPF_MAP_TYPE_DEVMAP) { + if (map->map_type == BPF_MAP_TYPE_DEVMAP || + map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) { struct bpf_dtab_netdev *dst = fwd; err = dev_map_generic_redirect(dst, skb, xdp_prog); @@ -5850,6 +5855,75 @@ static const struct bpf_func_proto bpf_tcp_check_syncookie_proto = { .arg5_type = ARG_CONST_SIZE, }; +BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len, + struct tcphdr *, th, u32, th_len) +{ +#ifdef CONFIG_SYN_COOKIES + u32 cookie; + u16 mss; + + if (unlikely(th_len < sizeof(*th) || th_len != th->doff * 4)) + return -EINVAL; + + if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN) + return -EINVAL; + + if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies) + return -ENOENT; + + if (!th->syn || th->ack || th->fin || th->rst) + return -EINVAL; + + if (unlikely(iph_len < sizeof(struct iphdr))) + return -EINVAL; + + /* Both struct iphdr and struct ipv6hdr have the version field at the + * same offset so we can cast to the shorter header (struct iphdr). + */ + switch (((struct iphdr *)iph)->version) { + case 4: + if (sk->sk_family == AF_INET6 && sk->sk_ipv6only) + return -EINVAL; + + mss = tcp_v4_get_syncookie(sk, iph, th, &cookie); + break; + +#if IS_BUILTIN(CONFIG_IPV6) + case 6: + if (unlikely(iph_len < sizeof(struct ipv6hdr))) + return -EINVAL; + + if (sk->sk_family != AF_INET6) + return -EINVAL; + + mss = tcp_v6_get_syncookie(sk, iph, th, &cookie); + break; +#endif /* CONFIG_IPV6 */ + + default: + return -EPROTONOSUPPORT; + } + if (mss == 0) + return -ENOENT; + + return cookie | ((u64)mss << 32); +#else + return -EOPNOTSUPP; +#endif /* CONFIG_SYN_COOKIES */ +} + +static const struct bpf_func_proto bpf_tcp_gen_syncookie_proto = { + .func = bpf_tcp_gen_syncookie, + .gpl_only = true, /* __cookie_v*_init_sequence() is GPL */ + .pkt_access = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_SOCK_COMMON, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_PTR_TO_MEM, + .arg5_type = ARG_CONST_SIZE, +}; + #endif /* CONFIG_INET */ bool bpf_helper_changes_pkt_data(void *func) @@ -5999,6 +6073,8 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_socket_cookie_proto; case BPF_FUNC_get_socket_uid: return &bpf_get_socket_uid_proto; + case BPF_FUNC_perf_event_output: + return &bpf_skb_event_output_proto; default: return bpf_base_func_proto(func_id); } @@ -6019,6 +6095,8 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sk_storage_get_proto; case BPF_FUNC_sk_storage_delete: return &bpf_sk_storage_delete_proto; + case BPF_FUNC_perf_event_output: + return &bpf_skb_event_output_proto; #ifdef CONFIG_SOCK_CGROUP_DATA case BPF_FUNC_skb_cgroup_id: return &bpf_skb_cgroup_id_proto; @@ -6135,6 +6213,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_tcp_check_syncookie_proto; case BPF_FUNC_skb_ecn_set_ce: return &bpf_skb_ecn_set_ce_proto; + case BPF_FUNC_tcp_gen_syncookie: + return &bpf_tcp_gen_syncookie_proto; #endif default: return bpf_base_func_proto(func_id); @@ -6174,6 +6254,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_xdp_skc_lookup_tcp_proto; case BPF_FUNC_tcp_check_syncookie: return &bpf_tcp_check_syncookie_proto; + case BPF_FUNC_tcp_gen_syncookie: + return &bpf_tcp_gen_syncookie_proto; #endif default: return bpf_base_func_proto(func_id); @@ -6267,6 +6349,8 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sk_redirect_map_proto; case BPF_FUNC_sk_redirect_hash: return &bpf_sk_redirect_hash_proto; + case BPF_FUNC_perf_event_output: + return &bpf_skb_event_output_proto; #ifdef CONFIG_INET case BPF_FUNC_sk_lookup_tcp: return &bpf_sk_lookup_tcp_proto; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 2470b4b404e6..7c09d87d3269 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -737,6 +737,7 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys, struct flow_dissector_key_basic *key_basic; struct flow_dissector_key_addrs *key_addrs; struct flow_dissector_key_ports *key_ports; + struct flow_dissector_key_tags *key_tags; key_control = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_CONTROL, @@ -781,10 +782,18 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys, key_ports->src = flow_keys->sport; key_ports->dst = flow_keys->dport; } + + if (dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_FLOW_LABEL)) { + key_tags = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_FLOW_LABEL, + target_container); + key_tags->flow_label = ntohl(flow_keys->flow_label); + } } bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, - __be16 proto, int nhoff, int hlen) + __be16 proto, int nhoff, int hlen, unsigned int flags) { struct bpf_flow_keys *flow_keys = ctx->flow_keys; u32 result; @@ -795,6 +804,14 @@ bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, flow_keys->nhoff = nhoff; flow_keys->thoff = flow_keys->nhoff; + BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG != + (int)FLOW_DISSECTOR_F_PARSE_1ST_FRAG); + BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL != + (int)FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); + BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP != + (int)FLOW_DISSECTOR_F_STOP_AT_ENCAP); + flow_keys->flags = flags; + preempt_disable(); result = BPF_PROG_RUN(prog, ctx); preempt_enable(); @@ -914,7 +931,7 @@ bool __skb_flow_dissect(const struct net *net, } ret = bpf_flow_dissect(attached, &ctx, n_proto, nhoff, - hlen); + hlen, flags); __skb_flow_bpf_to_target(&flow_keys, flow_dissector, target_container); rcu_read_unlock(); diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index d63b970784dc..cf52d9c422fa 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -2,6 +2,8 @@ #include <linux/kernel.h> #include <linux/slab.h> #include <net/flow_offload.h> +#include <linux/rtnetlink.h> +#include <linux/mutex.h> struct flow_rule *flow_rule_alloc(unsigned int num_actions) { @@ -280,3 +282,242 @@ int flow_block_cb_setup_simple(struct flow_block_offload *f, } } EXPORT_SYMBOL(flow_block_cb_setup_simple); + +static LIST_HEAD(block_ing_cb_list); + +static struct rhashtable indr_setup_block_ht; + +struct flow_indr_block_cb { + struct list_head list; + void *cb_priv; + flow_indr_block_bind_cb_t *cb; + void *cb_ident; +}; + +struct flow_indr_block_dev { + struct rhash_head ht_node; + struct net_device *dev; + unsigned int refcnt; + struct list_head cb_list; +}; + +static const struct rhashtable_params flow_indr_setup_block_ht_params = { + .key_offset = offsetof(struct flow_indr_block_dev, dev), + .head_offset = offsetof(struct flow_indr_block_dev, ht_node), + .key_len = sizeof(struct net_device *), +}; + +static struct flow_indr_block_dev * +flow_indr_block_dev_lookup(struct net_device *dev) +{ + return rhashtable_lookup_fast(&indr_setup_block_ht, &dev, + flow_indr_setup_block_ht_params); +} + +static struct flow_indr_block_dev * +flow_indr_block_dev_get(struct net_device *dev) +{ + struct flow_indr_block_dev *indr_dev; + + indr_dev = flow_indr_block_dev_lookup(dev); + if (indr_dev) + goto inc_ref; + + indr_dev = kzalloc(sizeof(*indr_dev), GFP_KERNEL); + if (!indr_dev) + return NULL; + + INIT_LIST_HEAD(&indr_dev->cb_list); + indr_dev->dev = dev; + if (rhashtable_insert_fast(&indr_setup_block_ht, &indr_dev->ht_node, + flow_indr_setup_block_ht_params)) { + kfree(indr_dev); + return NULL; + } + +inc_ref: + indr_dev->refcnt++; + return indr_dev; +} + +static void flow_indr_block_dev_put(struct flow_indr_block_dev *indr_dev) +{ + if (--indr_dev->refcnt) + return; + + rhashtable_remove_fast(&indr_setup_block_ht, &indr_dev->ht_node, + flow_indr_setup_block_ht_params); + kfree(indr_dev); +} + +static struct flow_indr_block_cb * +flow_indr_block_cb_lookup(struct flow_indr_block_dev *indr_dev, + flow_indr_block_bind_cb_t *cb, void *cb_ident) +{ + struct flow_indr_block_cb *indr_block_cb; + + list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list) + if (indr_block_cb->cb == cb && + indr_block_cb->cb_ident == cb_ident) + return indr_block_cb; + return NULL; +} + +static struct flow_indr_block_cb * +flow_indr_block_cb_add(struct flow_indr_block_dev *indr_dev, void *cb_priv, + flow_indr_block_bind_cb_t *cb, void *cb_ident) +{ + struct flow_indr_block_cb *indr_block_cb; + + indr_block_cb = flow_indr_block_cb_lookup(indr_dev, cb, cb_ident); + if (indr_block_cb) + return ERR_PTR(-EEXIST); + + indr_block_cb = kzalloc(sizeof(*indr_block_cb), GFP_KERNEL); + if (!indr_block_cb) + return ERR_PTR(-ENOMEM); + + indr_block_cb->cb_priv = cb_priv; + indr_block_cb->cb = cb; + indr_block_cb->cb_ident = cb_ident; + list_add(&indr_block_cb->list, &indr_dev->cb_list); + + return indr_block_cb; +} + +static void flow_indr_block_cb_del(struct flow_indr_block_cb *indr_block_cb) +{ + list_del(&indr_block_cb->list); + kfree(indr_block_cb); +} + +static DEFINE_MUTEX(flow_indr_block_ing_cb_lock); + +static void flow_block_ing_cmd(struct net_device *dev, + flow_indr_block_bind_cb_t *cb, + void *cb_priv, + enum flow_block_command command) +{ + struct flow_indr_block_ing_entry *entry; + + mutex_lock(&flow_indr_block_ing_cb_lock); + list_for_each_entry(entry, &block_ing_cb_list, list) { + entry->cb(dev, cb, cb_priv, command); + } + mutex_unlock(&flow_indr_block_ing_cb_lock); +} + +int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv, + flow_indr_block_bind_cb_t *cb, + void *cb_ident) +{ + struct flow_indr_block_cb *indr_block_cb; + struct flow_indr_block_dev *indr_dev; + int err; + + indr_dev = flow_indr_block_dev_get(dev); + if (!indr_dev) + return -ENOMEM; + + indr_block_cb = flow_indr_block_cb_add(indr_dev, cb_priv, cb, cb_ident); + err = PTR_ERR_OR_ZERO(indr_block_cb); + if (err) + goto err_dev_put; + + flow_block_ing_cmd(dev, indr_block_cb->cb, indr_block_cb->cb_priv, + FLOW_BLOCK_BIND); + + return 0; + +err_dev_put: + flow_indr_block_dev_put(indr_dev); + return err; +} +EXPORT_SYMBOL_GPL(__flow_indr_block_cb_register); + +int flow_indr_block_cb_register(struct net_device *dev, void *cb_priv, + flow_indr_block_bind_cb_t *cb, + void *cb_ident) +{ + int err; + + rtnl_lock(); + err = __flow_indr_block_cb_register(dev, cb_priv, cb, cb_ident); + rtnl_unlock(); + + return err; +} +EXPORT_SYMBOL_GPL(flow_indr_block_cb_register); + +void __flow_indr_block_cb_unregister(struct net_device *dev, + flow_indr_block_bind_cb_t *cb, + void *cb_ident) +{ + struct flow_indr_block_cb *indr_block_cb; + struct flow_indr_block_dev *indr_dev; + + indr_dev = flow_indr_block_dev_lookup(dev); + if (!indr_dev) + return; + + indr_block_cb = flow_indr_block_cb_lookup(indr_dev, cb, cb_ident); + if (!indr_block_cb) + return; + + flow_block_ing_cmd(dev, indr_block_cb->cb, indr_block_cb->cb_priv, + FLOW_BLOCK_UNBIND); + + flow_indr_block_cb_del(indr_block_cb); + flow_indr_block_dev_put(indr_dev); +} +EXPORT_SYMBOL_GPL(__flow_indr_block_cb_unregister); + +void flow_indr_block_cb_unregister(struct net_device *dev, + flow_indr_block_bind_cb_t *cb, + void *cb_ident) +{ + rtnl_lock(); + __flow_indr_block_cb_unregister(dev, cb, cb_ident); + rtnl_unlock(); +} +EXPORT_SYMBOL_GPL(flow_indr_block_cb_unregister); + +void flow_indr_block_call(struct net_device *dev, + struct flow_block_offload *bo, + enum flow_block_command command) +{ + struct flow_indr_block_cb *indr_block_cb; + struct flow_indr_block_dev *indr_dev; + + indr_dev = flow_indr_block_dev_lookup(dev); + if (!indr_dev) + return; + + list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list) + indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK, + bo); +} +EXPORT_SYMBOL_GPL(flow_indr_block_call); + +void flow_indr_add_block_ing_cb(struct flow_indr_block_ing_entry *entry) +{ + mutex_lock(&flow_indr_block_ing_cb_lock); + list_add_tail(&entry->list, &block_ing_cb_list); + mutex_unlock(&flow_indr_block_ing_cb_lock); +} +EXPORT_SYMBOL_GPL(flow_indr_add_block_ing_cb); + +void flow_indr_del_block_ing_cb(struct flow_indr_block_ing_entry *entry) +{ + mutex_lock(&flow_indr_block_ing_cb_lock); + list_del(&entry->list); + mutex_unlock(&flow_indr_block_ing_cb_lock); +} +EXPORT_SYMBOL_GPL(flow_indr_del_block_ing_cb); + +static int __init init_flow_indr_rhashtable(void) +{ + return rhashtable_init(&indr_setup_block_ht, + &flow_indr_setup_block_ht_params); +} +subsys_initcall(init_flow_indr_rhashtable); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index f79e61c570ea..5480edff0c86 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -3033,7 +3033,7 @@ static struct neighbour *neigh_get_first(struct seq_file *seq) struct net *net = seq_file_net(seq); struct neigh_hash_table *nht = state->nht; struct neighbour *n = NULL; - int bucket = state->bucket; + int bucket; state->flags &= ~NEIGH_SEQ_IS_PNEIGH; for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) { diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 3272dc7a8c81..5bc65587f1c4 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -61,7 +61,7 @@ static int page_pool_init(struct page_pool *pool, struct page_pool *page_pool_create(const struct page_pool_params *params) { struct page_pool *pool; - int err = 0; + int err; pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, params->nid); if (!pool) @@ -82,12 +82,9 @@ EXPORT_SYMBOL(page_pool_create); static struct page *__page_pool_get_cached(struct page_pool *pool) { struct ptr_ring *r = &pool->ring; + bool refill = false; struct page *page; - /* Quicker fallback, avoid locks when ring is empty */ - if (__ptr_ring_empty(r)) - return NULL; - /* Test for safe-context, caller should provide this guarantee */ if (likely(in_serving_softirq())) { if (likely(pool->alloc.count)) { @@ -95,27 +92,23 @@ static struct page *__page_pool_get_cached(struct page_pool *pool) page = pool->alloc.cache[--pool->alloc.count]; return page; } - /* Slower-path: Alloc array empty, time to refill - * - * Open-coded bulk ptr_ring consumer. - * - * Discussion: the ring consumer lock is not really - * needed due to the softirq/NAPI protection, but - * later need the ability to reclaim pages on the - * ring. Thus, keeping the locks. - */ - spin_lock(&r->consumer_lock); - while ((page = __ptr_ring_consume(r))) { - if (pool->alloc.count == PP_ALLOC_CACHE_REFILL) - break; - pool->alloc.cache[pool->alloc.count++] = page; - } - spin_unlock(&r->consumer_lock); - return page; + refill = true; } - /* Slow-path: Get page from locked ring queue */ - page = ptr_ring_consume(&pool->ring); + /* Quicker fallback, avoid locks when ring is empty */ + if (__ptr_ring_empty(r)) + return NULL; + + /* Slow-path: Get page from locked ring queue, + * refill alloc array if requested. + */ + spin_lock(&r->consumer_lock); + page = __ptr_ring_consume(r); + if (refill) + pool->alloc.count = __ptr_ring_consume_batched(r, + pool->alloc.cache, + PP_ALLOC_CACHE_REFILL); + spin_unlock(&r->consumer_lock); return page; } diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 1d0c1b4886d7..48b1e429857c 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2650,7 +2650,7 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, } get_page(pkt_dev->page); skb_frag_set_page(skb, i, pkt_dev->page); - skb_shinfo(skb)->frags[i].page_offset = 0; + skb_frag_off_set(&skb_shinfo(skb)->frags[i], 0); /*last fragment, fill rest of data*/ if (i == (frags - 1)) skb_frag_size_set(&skb_shinfo(skb)->frags[i], diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 982d8d12830e..f12e8a050edb 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -785,7 +785,7 @@ void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt) struct page *p; u8 *vaddr; - skb_frag_foreach_page(frag, frag->page_offset, + skb_frag_foreach_page(frag, skb_frag_off(frag), skb_frag_size(frag), p, p_off, p_len, copied) { seg_len = min_t(int, p_len, len); @@ -1375,7 +1375,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) struct page *p; u8 *vaddr; - skb_frag_foreach_page(f, f->page_offset, skb_frag_size(f), + skb_frag_foreach_page(f, skb_frag_off(f), skb_frag_size(f), p, p_off, p_len, copied) { u32 copy, done = 0; vaddr = kmap_atomic(p); @@ -2144,10 +2144,12 @@ pull_pages: skb_frag_unref(skb, i); eat -= size; } else { - skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; + skb_frag_t *frag = &skb_shinfo(skb)->frags[k]; + + *frag = skb_shinfo(skb)->frags[i]; if (eat) { - skb_shinfo(skb)->frags[k].page_offset += eat; - skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat); + skb_frag_off_add(frag, eat); + skb_frag_size_sub(frag, eat); if (!i) goto end; eat = 0; @@ -2219,7 +2221,7 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) copy = len; skb_frag_foreach_page(f, - f->page_offset + offset - start, + skb_frag_off(f) + offset - start, copy, p, p_off, p_len, copied) { vaddr = kmap_atomic(p); memcpy(to + copied, vaddr + p_off, p_len); @@ -2395,7 +2397,7 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; if (__splice_segment(skb_frag_page(f), - f->page_offset, skb_frag_size(f), + skb_frag_off(f), skb_frag_size(f), offset, len, spd, false, sk, pipe)) return true; } @@ -2485,20 +2487,20 @@ do_frag_list: for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags; fragidx++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[fragidx]; - if (offset < frag->size) + if (offset < skb_frag_size(frag)) break; - offset -= frag->size; + offset -= skb_frag_size(frag); } for (; len && fragidx < skb_shinfo(skb)->nr_frags; fragidx++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[fragidx]; - slen = min_t(size_t, len, frag->size - offset); + slen = min_t(size_t, len, skb_frag_size(frag) - offset); while (slen) { - ret = kernel_sendpage_locked(sk, frag->page.p, - frag->page_offset + offset, + ret = kernel_sendpage_locked(sk, skb_frag_page(frag), + skb_frag_off(frag) + offset, slen, MSG_DONTWAIT); if (ret <= 0) goto error; @@ -2580,7 +2582,7 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len) copy = len; skb_frag_foreach_page(frag, - frag->page_offset + offset - start, + skb_frag_off(frag) + offset - start, copy, p, p_off, p_len, copied) { vaddr = kmap_atomic(p); memcpy(vaddr + p_off, from + copied, p_len); @@ -2660,7 +2662,7 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len, copy = len; skb_frag_foreach_page(frag, - frag->page_offset + offset - start, + skb_frag_off(frag) + offset - start, copy, p, p_off, p_len, copied) { vaddr = kmap_atomic(p); csum2 = INDIRECT_CALL_1(ops->update, @@ -2759,7 +2761,7 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, copy = len; skb_frag_foreach_page(frag, - frag->page_offset + offset - start, + skb_frag_off(frag) + offset - start, copy, p, p_off, p_len, copied) { vaddr = kmap_atomic(p); csum2 = csum_partial_copy_nocheck(vaddr + p_off, @@ -2975,11 +2977,15 @@ skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen) skb_zerocopy_clone(to, from, GFP_ATOMIC); for (i = 0; i < skb_shinfo(from)->nr_frags; i++) { + int size; + if (!len) break; skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i]; - skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len); - len -= skb_shinfo(to)->frags[j].size; + size = min_t(int, skb_frag_size(&skb_shinfo(to)->frags[j]), + len); + skb_frag_size_set(&skb_shinfo(to)->frags[j], size); + len -= size; skb_frag_ref(to, j); j++; } @@ -3230,7 +3236,7 @@ static inline void skb_split_no_header(struct sk_buff *skb, * 2. Split is accurately. We make this. */ skb_frag_ref(skb, i); - skb_shinfo(skb1)->frags[0].page_offset += len - pos; + skb_frag_off_add(&skb_shinfo(skb1)->frags[0], len - pos); skb_frag_size_sub(&skb_shinfo(skb1)->frags[0], len - pos); skb_frag_size_set(&skb_shinfo(skb)->frags[i], len - pos); skb_shinfo(skb)->nr_frags++; @@ -3293,7 +3299,7 @@ static int skb_prepare_for_shift(struct sk_buff *skb) int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) { int from, to, merge, todo; - struct skb_frag_struct *fragfrom, *fragto; + skb_frag_t *fragfrom, *fragto; BUG_ON(shiftlen > skb->len); @@ -3312,7 +3318,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) */ if (!to || !skb_can_coalesce(tgt, to, skb_frag_page(fragfrom), - fragfrom->page_offset)) { + skb_frag_off(fragfrom))) { merge = -1; } else { merge = to - 1; @@ -3329,7 +3335,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) skb_frag_size_add(fragto, shiftlen); skb_frag_size_sub(fragfrom, shiftlen); - fragfrom->page_offset += shiftlen; + skb_frag_off_add(fragfrom, shiftlen); goto onlymerged; } @@ -3360,11 +3366,11 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) } else { __skb_frag_ref(fragfrom); - fragto->page = fragfrom->page; - fragto->page_offset = fragfrom->page_offset; + skb_frag_page_copy(fragto, fragfrom); + skb_frag_off_copy(fragto, fragfrom); skb_frag_size_set(fragto, todo); - fragfrom->page_offset += todo; + skb_frag_off_add(fragfrom, todo); skb_frag_size_sub(fragfrom, todo); todo = 0; @@ -3489,7 +3495,7 @@ next_skb: if (!st->frag_data) st->frag_data = kmap_atomic(skb_frag_page(frag)); - *data = (u8 *) st->frag_data + frag->page_offset + + *data = (u8 *) st->frag_data + skb_frag_off(frag) + (abs_offset - st->stepped_offset); return block_limit - abs_offset; @@ -3625,10 +3631,10 @@ static inline skb_frag_t skb_head_frag_to_page_desc(struct sk_buff *frag_skb) struct page *page; page = virt_to_head_page(frag_skb->head); - head_frag.page.p = page; - head_frag.page_offset = frag_skb->data - - (unsigned char *)page_address(page); - head_frag.size = skb_headlen(frag_skb); + __skb_frag_set_page(&head_frag, page); + skb_frag_off_set(&head_frag, frag_skb->data - + (unsigned char *)page_address(page)); + skb_frag_size_set(&head_frag, skb_headlen(frag_skb)); return head_frag; } @@ -3890,7 +3896,7 @@ normal: size = skb_frag_size(nskb_frag); if (pos < offset) { - nskb_frag->page_offset += offset - pos; + skb_frag_off_add(nskb_frag, offset - pos); skb_frag_size_sub(nskb_frag, offset - pos); } @@ -4011,7 +4017,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) *--frag = *--frag2; } while (--i); - frag->page_offset += offset; + skb_frag_off_add(frag, offset); skb_frag_size_sub(frag, offset); /* all fragments truesize : remove (head size + sk_buff) */ @@ -4040,8 +4046,8 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) pinfo->nr_frags = nr_frags + 1 + skbinfo->nr_frags; - frag->page.p = page; - frag->page_offset = first_offset; + __skb_frag_set_page(frag, page); + skb_frag_off_set(frag, first_offset); skb_frag_size_set(frag, first_size); memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags); @@ -4057,7 +4063,7 @@ merge: if (offset > headlen) { unsigned int eat = offset - headlen; - skbinfo->frags[0].page_offset += eat; + skb_frag_off_add(&skbinfo->frags[0], eat); skb_frag_size_sub(&skbinfo->frags[0], eat); skb->data_len -= eat; skb->len -= eat; @@ -4100,6 +4106,9 @@ static const u8 skb_ext_type_len[] = { #ifdef CONFIG_XFRM [SKB_EXT_SEC_PATH] = SKB_EXT_CHUNKSIZEOF(struct sec_path), #endif +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + [TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext), +#endif }; static __always_inline unsigned int skb_ext_total_length(void) @@ -4111,6 +4120,9 @@ static __always_inline unsigned int skb_ext_total_length(void) #ifdef CONFIG_XFRM skb_ext_type_len[SKB_EXT_SEC_PATH] + #endif +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + skb_ext_type_len[TC_SKB_EXT] + +#endif 0; } @@ -4182,7 +4194,7 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len, if (copy > len) copy = len; sg_set_page(&sg[elt], skb_frag_page(frag), copy, - frag->page_offset+offset-start); + skb_frag_off(frag) + offset - start); elt++; if (!(len -= copy)) return elt; @@ -5853,7 +5865,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off, * where splitting is expensive. * 2. Split is accurately. We make this. */ - shinfo->frags[0].page_offset += off - pos; + skb_frag_off_add(&shinfo->frags[0], off - pos); skb_frag_size_sub(&shinfo->frags[0], off - pos); } skb_frag_ref(skb, i); diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 6832eeb4b785..cf390e0aa73d 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -190,8 +190,7 @@ static int __sk_msg_free(struct sock *sk, struct sk_msg *msg, u32 i, sk_msg_check_to_free(msg, i, msg->sg.size); sge = sk_msg_elem(msg, i); } - if (msg->skb) - consume_skb(msg->skb); + consume_skb(msg->skb); sk_msg_init(msg); return freed; } diff --git a/net/core/sock.c b/net/core/sock.c index 545fac19a711..07863edbe6fc 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1851,9 +1851,12 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) goto out; } RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL); -#ifdef CONFIG_BPF_SYSCALL - RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL); -#endif + + if (bpf_sk_storage_clone(sk, newsk)) { + sk_free_unlock_clone(newsk); + newsk = NULL; + goto out; + } newsk->sk_err = 0; newsk->sk_err_soft = 0; diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 50916f9bc4f2..eb114ee419b6 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -345,7 +345,7 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx, return -EINVAL; if (unlikely(idx >= map->max_entries)) return -E2BIG; - if (unlikely(icsk->icsk_ulp_data)) + if (unlikely(rcu_access_pointer(icsk->icsk_ulp_data))) return -EINVAL; link = sk_psock_init_link(); diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index 9408f9264d05..f3ceec93f392 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -295,8 +295,19 @@ struct sock *reuseport_select_sock(struct sock *sk, select_by_hash: /* no bpf or invalid bpf result: fall back to hash usage */ - if (!sk2) - sk2 = reuse->socks[reciprocal_scale(hash, socks)]; + if (!sk2) { + int i, j; + + i = j = reciprocal_scale(hash, socks); + while (reuse->socks[i]->sk_state == TCP_ESTABLISHED) { + i++; + if (i >= reuse->num_socks) + i = 0; + if (i == j) + goto out; + } + sk2 = reuse->socks[i]; + } } out: diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 8da5b3a54dac..eb29e5adc84d 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -567,6 +567,14 @@ static struct ctl_table net_core_table[] = { .mode = 0644, .proc_handler = proc_do_static_key, }, + { + .procname = "gro_normal_batch", + .data = &gro_normal_batch, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ONE, + }, { } }; diff --git a/net/core/tso.c b/net/core/tso.c index 43f4eba61933..d4d5c077ad72 100644 --- a/net/core/tso.c +++ b/net/core/tso.c @@ -55,8 +55,8 @@ void tso_build_data(struct sk_buff *skb, struct tso_t *tso, int size) skb_frag_t *frag = &skb_shinfo(skb)->frags[tso->next_frag_idx]; /* Move to next segment */ - tso->size = frag->size; - tso->data = page_address(frag->page.p) + frag->page_offset; + tso->size = skb_frag_size(frag); + tso->data = skb_frag_address(frag); tso->next_frag_idx++; } } @@ -79,8 +79,8 @@ void tso_start(struct sk_buff *skb, struct tso_t *tso) skb_frag_t *frag = &skb_shinfo(skb)->frags[tso->next_frag_idx]; /* Move to next segment */ - tso->size = frag->size; - tso->data = page_address(frag->page.p) + frag->page_offset; + tso->size = skb_frag_size(frag); + tso->data = skb_frag_address(frag); tso->next_frag_idx++; } } diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 6e942dda1bcd..29e2bd5cc5af 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -73,23 +73,11 @@ config NET_DSA_TAG_MTK Say Y or M if you want to enable support for tagging frames for Mediatek switches. -config NET_DSA_TAG_KSZ_COMMON - tristate - default n - config NET_DSA_TAG_KSZ - tristate "Tag driver for Microchip 9893 family of switches" - select NET_DSA_TAG_KSZ_COMMON - help - Say Y if you want to enable support for tagging frames for the - Microchip 9893 family of switches. - -config NET_DSA_TAG_KSZ9477 - tristate "Tag driver for Microchip 9477 family of switches" - select NET_DSA_TAG_KSZ_COMMON + tristate "Tag driver for Microchip 8795/9477/9893 families of switches" help Say Y if you want to enable support for tagging frames for the - Microchip 9477 family of switches. + Microchip 8795/9477/9893 families of switches. config NET_DSA_TAG_QCA tristate "Tag driver for Qualcomm Atheros QCA8K switches" diff --git a/net/dsa/Makefile b/net/dsa/Makefile index c342f54715ba..2c6d286f0511 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -9,7 +9,7 @@ obj-$(CONFIG_NET_DSA_TAG_BRCM_COMMON) += tag_brcm.o obj-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o obj-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o obj-$(CONFIG_NET_DSA_TAG_GSWIP) += tag_gswip.o -obj-$(CONFIG_NET_DSA_TAG_KSZ_COMMON) += tag_ksz.o +obj-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o obj-$(CONFIG_NET_DSA_TAG_LAN9303) += tag_lan9303.o obj-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o obj-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 3abd173ebacb..73002022c9d8 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -254,88 +254,109 @@ static void dsa_tree_teardown_default_cpu(struct dsa_switch_tree *dst) static int dsa_port_setup(struct dsa_port *dp) { - enum devlink_port_flavour flavour; struct dsa_switch *ds = dp->ds; struct dsa_switch_tree *dst = ds->dst; + const unsigned char *id = (const unsigned char *)&dst->index; + const unsigned char len = sizeof(dst->index); + struct devlink_port *dlp = &dp->devlink_port; + bool dsa_port_link_registered = false; + bool devlink_port_registered = false; + struct devlink *dl = ds->devlink; + bool dsa_port_enabled = false; int err = 0; - if (dp->type == DSA_PORT_TYPE_UNUSED) - return 0; - - memset(&dp->devlink_port, 0, sizeof(dp->devlink_port)); - dp->mac = of_get_mac_address(dp->dn); - - switch (dp->type) { - case DSA_PORT_TYPE_CPU: - flavour = DEVLINK_PORT_FLAVOUR_CPU; - break; - case DSA_PORT_TYPE_DSA: - flavour = DEVLINK_PORT_FLAVOUR_DSA; - break; - case DSA_PORT_TYPE_USER: /* fall-through */ - default: - flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; - break; - } - - /* dp->index is used now as port_number. However - * CPU and DSA ports should have separate numbering - * independent from front panel port numbers. - */ - devlink_port_attrs_set(&dp->devlink_port, flavour, - dp->index, false, 0, - (const char *) &dst->index, sizeof(dst->index)); - err = devlink_port_register(ds->devlink, &dp->devlink_port, - dp->index); - if (err) - return err; - switch (dp->type) { case DSA_PORT_TYPE_UNUSED: + dsa_port_disable(dp); break; case DSA_PORT_TYPE_CPU: + memset(dlp, 0, sizeof(*dlp)); + devlink_port_attrs_set(dlp, DEVLINK_PORT_FLAVOUR_CPU, + dp->index, false, 0, id, len); + err = devlink_port_register(dl, dlp, dp->index); + if (err) + break; + devlink_port_registered = true; + err = dsa_port_link_register_of(dp); if (err) - dev_err(ds->dev, "failed to setup link for port %d.%d\n", - ds->index, dp->index); + break; + dsa_port_link_registered = true; + + err = dsa_port_enable(dp, NULL); + if (err) + break; + dsa_port_enabled = true; + break; case DSA_PORT_TYPE_DSA: + memset(dlp, 0, sizeof(*dlp)); + devlink_port_attrs_set(dlp, DEVLINK_PORT_FLAVOUR_DSA, + dp->index, false, 0, id, len); + err = devlink_port_register(dl, dlp, dp->index); + if (err) + break; + devlink_port_registered = true; + err = dsa_port_link_register_of(dp); if (err) - dev_err(ds->dev, "failed to setup link for port %d.%d\n", - ds->index, dp->index); + break; + dsa_port_link_registered = true; + + err = dsa_port_enable(dp, NULL); + if (err) + break; + dsa_port_enabled = true; + break; case DSA_PORT_TYPE_USER: + memset(dlp, 0, sizeof(*dlp)); + devlink_port_attrs_set(dlp, DEVLINK_PORT_FLAVOUR_PHYSICAL, + dp->index, false, 0, id, len); + err = devlink_port_register(dl, dlp, dp->index); + if (err) + break; + devlink_port_registered = true; + + dp->mac = of_get_mac_address(dp->dn); err = dsa_slave_create(dp); if (err) - dev_err(ds->dev, "failed to create slave for port %d.%d\n", - ds->index, dp->index); - else - devlink_port_type_eth_set(&dp->devlink_port, dp->slave); + break; + + devlink_port_type_eth_set(dlp, dp->slave); break; } - if (err) - devlink_port_unregister(&dp->devlink_port); + if (err && dsa_port_enabled) + dsa_port_disable(dp); + if (err && dsa_port_link_registered) + dsa_port_link_unregister_of(dp); + if (err && devlink_port_registered) + devlink_port_unregister(dlp); return err; } static void dsa_port_teardown(struct dsa_port *dp) { - if (dp->type != DSA_PORT_TYPE_UNUSED) - devlink_port_unregister(&dp->devlink_port); + struct devlink_port *dlp = &dp->devlink_port; switch (dp->type) { case DSA_PORT_TYPE_UNUSED: break; case DSA_PORT_TYPE_CPU: + dsa_port_disable(dp); dsa_tag_driver_put(dp->tag_ops); - /* fall-through */ + devlink_port_unregister(dlp); + dsa_port_link_unregister_of(dp); + break; case DSA_PORT_TYPE_DSA: + dsa_port_disable(dp); + devlink_port_unregister(dlp); dsa_port_link_unregister_of(dp); break; case DSA_PORT_TYPE_USER: + devlink_port_unregister(dlp); if (dp->slave) { dsa_slave_destroy(dp->slave); dp->slave = NULL; @@ -623,6 +644,8 @@ static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master) tag_protocol = ds->ops->get_tag_protocol(ds, dp->index); tag_ops = dsa_tag_driver_get(tag_protocol); if (IS_ERR(tag_ops)) { + if (PTR_ERR(tag_ops) == -ENOPROTOOPT) + return -EPROBE_DEFER; dev_warn(ds->dev, "No tagger for this switch\n"); return PTR_ERR(tag_ops); } @@ -832,20 +855,6 @@ struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n) if (!ds) return NULL; - /* We avoid allocating memory outside dsa_switch - * if it is not needed. - */ - if (n <= sizeof(ds->_bitmap) * 8) { - ds->bitmap = &ds->_bitmap; - } else { - ds->bitmap = devm_kcalloc(dev, - BITS_TO_LONGS(n), - sizeof(unsigned long), - GFP_KERNEL); - if (unlikely(!ds->bitmap)) - return NULL; - } - ds->dev = dev; ds->num_ports = n; diff --git a/net/dsa/master.c b/net/dsa/master.c index 4b52f8bac5e1..a8e52c9967f4 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -8,6 +8,70 @@ #include "dsa_priv.h" +static int dsa_master_get_regs_len(struct net_device *dev) +{ + struct dsa_port *cpu_dp = dev->dsa_ptr; + const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; + struct dsa_switch *ds = cpu_dp->ds; + int port = cpu_dp->index; + int ret = 0; + int len; + + if (ops->get_regs_len) { + len = ops->get_regs_len(dev); + if (len < 0) + return len; + ret += len; + } + + ret += sizeof(struct ethtool_drvinfo); + ret += sizeof(struct ethtool_regs); + + if (ds->ops->get_regs_len) { + len = ds->ops->get_regs_len(ds, port); + if (len < 0) + return len; + ret += len; + } + + return ret; +} + +static void dsa_master_get_regs(struct net_device *dev, + struct ethtool_regs *regs, void *data) +{ + struct dsa_port *cpu_dp = dev->dsa_ptr; + const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; + struct dsa_switch *ds = cpu_dp->ds; + struct ethtool_drvinfo *cpu_info; + struct ethtool_regs *cpu_regs; + int port = cpu_dp->index; + int len; + + if (ops->get_regs_len && ops->get_regs) { + len = ops->get_regs_len(dev); + if (len < 0) + return; + regs->len = len; + ops->get_regs(dev, regs, data); + data += regs->len; + } + + cpu_info = (struct ethtool_drvinfo *)data; + strlcpy(cpu_info->driver, "dsa", sizeof(cpu_info->driver)); + data += sizeof(*cpu_info); + cpu_regs = (struct ethtool_regs *)data; + data += sizeof(*cpu_regs); + + if (ds->ops->get_regs_len && ds->ops->get_regs) { + len = ds->ops->get_regs_len(ds, port); + if (len < 0) + return; + cpu_regs->len = len; + ds->ops->get_regs(ds, port, cpu_regs, data); + } +} + static void dsa_master_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, uint64_t *data) @@ -147,6 +211,8 @@ static int dsa_master_ethtool_setup(struct net_device *dev) if (cpu_dp->orig_ethtool_ops) memcpy(ops, cpu_dp->orig_ethtool_ops, sizeof(*ops)); + ops->get_regs_len = dsa_master_get_regs_len; + ops->get_regs = dsa_master_get_regs; ops->get_sset_count = dsa_master_get_sset_count; ops->get_ethtool_stats = dsa_master_get_ethtool_stats; ops->get_strings = dsa_master_get_strings; diff --git a/net/dsa/port.c b/net/dsa/port.c index f071acf2842b..9b54e5a76297 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -348,10 +348,7 @@ int dsa_port_vlan_add(struct dsa_port *dp, .vlan = vlan, }; - if (!dp->bridge_dev || br_vlan_enabled(dp->bridge_dev)) - return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info); - - return 0; + return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info); } int dsa_port_vlan_del(struct dsa_port *dp, @@ -363,10 +360,7 @@ int dsa_port_vlan_del(struct dsa_port *dp, .vlan = vlan, }; - if (!dp->bridge_dev || br_vlan_enabled(dp->bridge_dev)) - return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info); - - return 0; + return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info); } int dsa_port_vid_add(struct dsa_port *dp, u16 vid, u16 flags) @@ -382,8 +376,8 @@ int dsa_port_vid_add(struct dsa_port *dp, u16 vid, u16 flags) trans.ph_prepare = true; err = dsa_port_vlan_add(dp, &vlan, &trans); - if (err == -EOPNOTSUPP) - return 0; + if (err) + return err; trans.ph_prepare = false; return dsa_port_vlan_add(dp, &vlan, &trans); @@ -538,10 +532,6 @@ static int dsa_port_setup_phy_of(struct dsa_port *dp, bool enable) return PTR_ERR(phydev); if (enable) { - err = genphy_config_init(phydev); - if (err < 0) - goto err_put_dev; - err = genphy_resume(phydev); if (err < 0) goto err_put_dev; @@ -589,7 +579,6 @@ static int dsa_port_fixed_link_register_of(struct dsa_port *dp) mode = PHY_INTERFACE_MODE_NA; phydev->interface = mode; - genphy_config_init(phydev); genphy_read_status(phydev); if (ds->ops->adjust_link) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 33f41178afcc..75d58229a4bd 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -312,6 +312,39 @@ static int dsa_slave_port_attr_set(struct net_device *dev, return ret; } +static int dsa_slave_vlan_add(struct net_device *dev, + const struct switchdev_obj *obj, + struct switchdev_trans *trans) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct switchdev_obj_port_vlan vlan; + int err; + + if (obj->orig_dev != dev) + return -EOPNOTSUPP; + + if (dp->bridge_dev && !br_vlan_enabled(dp->bridge_dev)) + return 0; + + vlan = *SWITCHDEV_OBJ_PORT_VLAN(obj); + + err = dsa_port_vlan_add(dp, &vlan, trans); + if (err) + return err; + + /* We need the dedicated CPU port to be a member of the VLAN as well. + * Even though drivers often handle CPU membership in special ways, + * it doesn't make sense to program a PVID, so clear this flag. + */ + vlan.flags &= ~BRIDGE_VLAN_INFO_PVID; + + err = dsa_port_vlan_add(dp->cpu_dp, &vlan, trans); + if (err) + return err; + + return 0; +} + static int dsa_slave_port_obj_add(struct net_device *dev, const struct switchdev_obj *obj, struct switchdev_trans *trans, @@ -339,10 +372,7 @@ static int dsa_slave_port_obj_add(struct net_device *dev, trans); break; case SWITCHDEV_OBJ_ID_PORT_VLAN: - if (obj->orig_dev != dev) - return -EOPNOTSUPP; - err = dsa_port_vlan_add(dp, SWITCHDEV_OBJ_PORT_VLAN(obj), - trans); + err = dsa_slave_vlan_add(dev, obj, trans); break; default: err = -EOPNOTSUPP; @@ -352,6 +382,23 @@ static int dsa_slave_port_obj_add(struct net_device *dev, return err; } +static int dsa_slave_vlan_del(struct net_device *dev, + const struct switchdev_obj *obj) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + + if (obj->orig_dev != dev) + return -EOPNOTSUPP; + + if (dp->bridge_dev && !br_vlan_enabled(dp->bridge_dev)) + return 0; + + /* Do not deprogram the CPU port as it may be shared with other user + * ports which can be members of this VLAN as well. + */ + return dsa_port_vlan_del(dp, SWITCHDEV_OBJ_PORT_VLAN(obj)); +} + static int dsa_slave_port_obj_del(struct net_device *dev, const struct switchdev_obj *obj) { @@ -371,9 +418,7 @@ static int dsa_slave_port_obj_del(struct net_device *dev, err = dsa_port_mdb_del(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; case SWITCHDEV_OBJ_ID_PORT_VLAN: - if (obj->orig_dev != dev) - return -EOPNOTSUPP; - err = dsa_port_vlan_del(dp, SWITCHDEV_OBJ_PORT_VLAN(obj)); + err = dsa_slave_vlan_del(dev, obj); break; default: err = -EOPNOTSUPP; @@ -990,12 +1035,16 @@ static int dsa_slave_setup_tc_block(struct net_device *dev, static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { - switch (type) { - case TC_SETUP_BLOCK: + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + + if (type == TC_SETUP_BLOCK) return dsa_slave_setup_tc_block(dev, type_data); - default: + + if (!ds->ops->port_setup_tc) return -EOPNOTSUPP; - } + + return ds->ops->port_setup_tc(ds, dp->index, type, type_data); } static void dsa_slave_get_stats64(struct net_device *dev, @@ -1073,6 +1122,9 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto, * need to emulate the switchdev prepare + commit phase. */ if (dp->bridge_dev) { + if (!br_vlan_enabled(dp->bridge_dev)) + return 0; + /* br_vlan_get_info() returns -EINVAL or -ENOENT if the * device, respectively the VID is not found, returning * 0 means success, which is a failure for us here. @@ -1082,8 +1134,15 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto, return -EBUSY; } - /* This API only allows programming tagged, non-PVID VIDs */ - return dsa_port_vid_add(dp, vid, 0); + ret = dsa_port_vid_add(dp, vid, 0); + if (ret) + return ret; + + ret = dsa_port_vid_add(dp->cpu_dp, vid, 0); + if (ret) + return ret; + + return 0; } static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, @@ -1097,6 +1156,9 @@ static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, * need to emulate the switchdev prepare + commit phase. */ if (dp->bridge_dev) { + if (!br_vlan_enabled(dp->bridge_dev)) + return 0; + /* br_vlan_get_info() returns -EINVAL or -ENOENT if the * device, respectively the VID is not found, returning * 0 means success, which is a failure for us here. @@ -1106,11 +1168,10 @@ static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, return -EBUSY; } - ret = dsa_port_vid_del(dp, vid); - if (ret == -EOPNOTSUPP) - ret = 0; - - return ret; + /* Do not deprogram the CPU port as it may be shared with other user + * ports which can be members of this VLAN as well. + */ + return dsa_port_vid_del(dp, vid); } static const struct ethtool_ops dsa_slave_ethtool_ops = { @@ -1357,8 +1418,9 @@ int dsa_slave_create(struct dsa_port *port) if (slave_dev == NULL) return -ENOMEM; - slave_dev->features = master->vlan_features | NETIF_F_HW_TC | - NETIF_F_HW_VLAN_CTAG_FILTER; + slave_dev->features = master->vlan_features | NETIF_F_HW_TC; + if (ds->ops->port_vlan_add && ds->ops->port_vlan_del) + slave_dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; slave_dev->hw_features |= NETIF_F_HW_TC; slave_dev->ethtool_ops = &dsa_slave_ethtool_ops; if (!IS_ERR_OR_NULL(port->mac)) diff --git a/net/dsa/switch.c b/net/dsa/switch.c index 09d9286b27cc..6a9607518823 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -128,57 +128,51 @@ static int dsa_switch_fdb_del(struct dsa_switch *ds, return ds->ops->port_fdb_del(ds, port, info->addr, info->vid); } -static int -dsa_switch_mdb_prepare_bitmap(struct dsa_switch *ds, - const struct switchdev_obj_port_mdb *mdb, - const unsigned long *bitmap) +static bool dsa_switch_mdb_match(struct dsa_switch *ds, int port, + struct dsa_notifier_mdb_info *info) +{ + if (ds->index == info->sw_index && port == info->port) + return true; + + if (dsa_is_dsa_port(ds, port)) + return true; + + return false; +} + +static int dsa_switch_mdb_prepare(struct dsa_switch *ds, + struct dsa_notifier_mdb_info *info) { int port, err; if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add) return -EOPNOTSUPP; - for_each_set_bit(port, bitmap, ds->num_ports) { - err = ds->ops->port_mdb_prepare(ds, port, mdb); - if (err) - return err; + for (port = 0; port < ds->num_ports; port++) { + if (dsa_switch_mdb_match(ds, port, info)) { + err = ds->ops->port_mdb_prepare(ds, port, info->mdb); + if (err) + return err; + } } return 0; } -static void dsa_switch_mdb_add_bitmap(struct dsa_switch *ds, - const struct switchdev_obj_port_mdb *mdb, - const unsigned long *bitmap) -{ - int port; - - if (!ds->ops->port_mdb_add) - return; - - for_each_set_bit(port, bitmap, ds->num_ports) - ds->ops->port_mdb_add(ds, port, mdb); -} - static int dsa_switch_mdb_add(struct dsa_switch *ds, struct dsa_notifier_mdb_info *info) { - const struct switchdev_obj_port_mdb *mdb = info->mdb; - struct switchdev_trans *trans = info->trans; int port; - /* Build a mask of Multicast group members */ - bitmap_zero(ds->bitmap, ds->num_ports); - if (ds->index == info->sw_index) - set_bit(info->port, ds->bitmap); - for (port = 0; port < ds->num_ports; port++) - if (dsa_is_dsa_port(ds, port)) - set_bit(port, ds->bitmap); + if (switchdev_trans_ph_prepare(info->trans)) + return dsa_switch_mdb_prepare(ds, info); - if (switchdev_trans_ph_prepare(trans)) - return dsa_switch_mdb_prepare_bitmap(ds, mdb, ds->bitmap); + if (!ds->ops->port_mdb_add) + return 0; - dsa_switch_mdb_add_bitmap(ds, mdb, ds->bitmap); + for (port = 0; port < ds->num_ports; port++) + if (dsa_switch_mdb_match(ds, port, info)) + ds->ops->port_mdb_add(ds, port, info->mdb); return 0; } @@ -186,13 +180,11 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds, static int dsa_switch_mdb_del(struct dsa_switch *ds, struct dsa_notifier_mdb_info *info) { - const struct switchdev_obj_port_mdb *mdb = info->mdb; - if (!ds->ops->port_mdb_del) return -EOPNOTSUPP; if (ds->index == info->sw_index) - return ds->ops->port_mdb_del(ds, info->port, mdb); + return ds->ops->port_mdb_del(ds, info->port, info->mdb); return 0; } @@ -234,59 +226,55 @@ static int dsa_port_vlan_check(struct dsa_switch *ds, int port, (void *)vlan); } -static int -dsa_switch_vlan_prepare_bitmap(struct dsa_switch *ds, - const struct switchdev_obj_port_vlan *vlan, - const unsigned long *bitmap) +static bool dsa_switch_vlan_match(struct dsa_switch *ds, int port, + struct dsa_notifier_vlan_info *info) +{ + if (ds->index == info->sw_index && port == info->port) + return true; + + if (dsa_is_dsa_port(ds, port)) + return true; + + return false; +} + +static int dsa_switch_vlan_prepare(struct dsa_switch *ds, + struct dsa_notifier_vlan_info *info) { int port, err; if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add) return -EOPNOTSUPP; - for_each_set_bit(port, bitmap, ds->num_ports) { - err = dsa_port_vlan_check(ds, port, vlan); - if (err) - return err; + for (port = 0; port < ds->num_ports; port++) { + if (dsa_switch_vlan_match(ds, port, info)) { + err = dsa_port_vlan_check(ds, port, info->vlan); + if (err) + return err; - err = ds->ops->port_vlan_prepare(ds, port, vlan); - if (err) - return err; + err = ds->ops->port_vlan_prepare(ds, port, info->vlan); + if (err) + return err; + } } return 0; } -static void -dsa_switch_vlan_add_bitmap(struct dsa_switch *ds, - const struct switchdev_obj_port_vlan *vlan, - const unsigned long *bitmap) -{ - int port; - - for_each_set_bit(port, bitmap, ds->num_ports) - ds->ops->port_vlan_add(ds, port, vlan); -} - static int dsa_switch_vlan_add(struct dsa_switch *ds, struct dsa_notifier_vlan_info *info) { - const struct switchdev_obj_port_vlan *vlan = info->vlan; - struct switchdev_trans *trans = info->trans; int port; - /* Build a mask of VLAN members */ - bitmap_zero(ds->bitmap, ds->num_ports); - if (ds->index == info->sw_index) - set_bit(info->port, ds->bitmap); - for (port = 0; port < ds->num_ports; port++) - if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) - set_bit(port, ds->bitmap); + if (switchdev_trans_ph_prepare(info->trans)) + return dsa_switch_vlan_prepare(ds, info); - if (switchdev_trans_ph_prepare(trans)) - return dsa_switch_vlan_prepare_bitmap(ds, vlan, ds->bitmap); + if (!ds->ops->port_vlan_add) + return 0; - dsa_switch_vlan_add_bitmap(ds, vlan, ds->bitmap); + for (port = 0; port < ds->num_ports; port++) + if (dsa_switch_vlan_match(ds, port, info)) + ds->ops->port_vlan_add(ds, port, info->vlan); return 0; } @@ -294,14 +282,15 @@ static int dsa_switch_vlan_add(struct dsa_switch *ds, static int dsa_switch_vlan_del(struct dsa_switch *ds, struct dsa_notifier_vlan_info *info) { - const struct switchdev_obj_port_vlan *vlan = info->vlan; - if (!ds->ops->port_vlan_del) return -EOPNOTSUPP; if (ds->index == info->sw_index) - return ds->ops->port_vlan_del(ds, info->port, vlan); + return ds->ops->port_vlan_del(ds, info->port, info->vlan); + /* Do not deprogram the DSA links as they may be used as conduit + * for other VLAN members in the fabric. + */ return 0; } diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index 67a1bc635a7b..9c1cc2482b68 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -93,6 +93,79 @@ int dsa_8021q_rx_source_port(u16 vid) } EXPORT_SYMBOL_GPL(dsa_8021q_rx_source_port); +static int dsa_8021q_restore_pvid(struct dsa_switch *ds, int port) +{ + struct bridge_vlan_info vinfo; + struct net_device *slave; + u16 pvid; + int err; + + if (!dsa_is_user_port(ds, port)) + return 0; + + slave = ds->ports[port].slave; + + err = br_vlan_get_pvid(slave, &pvid); + if (err < 0) + /* There is no pvid on the bridge for this port, which is + * perfectly valid. Nothing to restore, bye-bye! + */ + return 0; + + err = br_vlan_get_info(slave, pvid, &vinfo); + if (err < 0) { + dev_err(ds->dev, "Couldn't determine PVID attributes\n"); + return err; + } + + return dsa_port_vid_add(&ds->ports[port], pvid, vinfo.flags); +} + +/* If @enabled is true, installs @vid with @flags into the switch port's HW + * filter. + * If @enabled is false, deletes @vid (ignores @flags) from the port. Had the + * user explicitly configured this @vid through the bridge core, then the @vid + * is installed again, but this time with the flags from the bridge layer. + */ +static int dsa_8021q_vid_apply(struct dsa_switch *ds, int port, u16 vid, + u16 flags, bool enabled) +{ + struct dsa_port *dp = &ds->ports[port]; + struct bridge_vlan_info vinfo; + int err; + + if (enabled) + return dsa_port_vid_add(dp, vid, flags); + + err = dsa_port_vid_del(dp, vid); + if (err < 0) + return err; + + /* Nothing to restore from the bridge for a non-user port. + * The CPU port VLANs are restored implicitly with the user ports, + * similar to how the bridge does in dsa_slave_vlan_add and + * dsa_slave_vlan_del. + */ + if (!dsa_is_user_port(ds, port)) + return 0; + + err = br_vlan_get_info(dp->slave, vid, &vinfo); + /* Couldn't determine bridge attributes for this vid, + * it means the bridge had not configured it. + */ + if (err < 0) + return 0; + + /* Restore the VID from the bridge */ + err = dsa_port_vid_add(dp, vid, vinfo.flags); + if (err < 0) + return err; + + vinfo.flags &= ~BRIDGE_VLAN_INFO_PVID; + + return dsa_port_vid_add(dp->cpu_dp, vid, vinfo.flags); +} + /* RX VLAN tagging (left) and TX VLAN tagging (right) setup shown for a single * front-panel switch port (here swp0). * @@ -148,8 +221,6 @@ EXPORT_SYMBOL_GPL(dsa_8021q_rx_source_port); int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int port, bool enabled) { int upstream = dsa_upstream_port(ds, port); - struct dsa_port *dp = &ds->ports[port]; - struct dsa_port *upstream_dp = &ds->ports[upstream]; u16 rx_vid = dsa_8021q_rx_vid(ds, port); u16 tx_vid = dsa_8021q_tx_vid(ds, port); int i, err; @@ -166,7 +237,6 @@ int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int port, bool enabled) * restrictions, so there are no concerns about leaking traffic. */ for (i = 0; i < ds->num_ports; i++) { - struct dsa_port *other_dp = &ds->ports[i]; u16 flags; if (i == upstream) @@ -179,10 +249,7 @@ int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int port, bool enabled) /* The RX VID is a regular VLAN on all others */ flags = BRIDGE_VLAN_INFO_UNTAGGED; - if (enabled) - err = dsa_port_vid_add(other_dp, rx_vid, flags); - else - err = dsa_port_vid_del(other_dp, rx_vid); + err = dsa_8021q_vid_apply(ds, i, rx_vid, flags, enabled); if (err) { dev_err(ds->dev, "Failed to apply RX VID %d to port %d: %d\n", rx_vid, port, err); @@ -193,10 +260,7 @@ int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int port, bool enabled) /* CPU port needs to see this port's RX VID * as tagged egress. */ - if (enabled) - err = dsa_port_vid_add(upstream_dp, rx_vid, 0); - else - err = dsa_port_vid_del(upstream_dp, rx_vid); + err = dsa_8021q_vid_apply(ds, upstream, rx_vid, 0, enabled); if (err) { dev_err(ds->dev, "Failed to apply RX VID %d to port %d: %d\n", rx_vid, port, err); @@ -204,26 +268,24 @@ int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int port, bool enabled) } /* Finally apply the TX VID on this port and on the CPU port */ - if (enabled) - err = dsa_port_vid_add(dp, tx_vid, BRIDGE_VLAN_INFO_UNTAGGED); - else - err = dsa_port_vid_del(dp, tx_vid); + err = dsa_8021q_vid_apply(ds, port, tx_vid, BRIDGE_VLAN_INFO_UNTAGGED, + enabled); if (err) { dev_err(ds->dev, "Failed to apply TX VID %d on port %d: %d\n", tx_vid, port, err); return err; } - if (enabled) - err = dsa_port_vid_add(upstream_dp, tx_vid, 0); - else - err = dsa_port_vid_del(upstream_dp, tx_vid); + err = dsa_8021q_vid_apply(ds, upstream, tx_vid, 0, enabled); if (err) { dev_err(ds->dev, "Failed to apply TX VID %d on port %d: %d\n", tx_vid, upstream, err); return err; } - return 0; + if (!enabled) + err = dsa_8021q_restore_pvid(ds, port); + + return err; } EXPORT_SYMBOL_GPL(dsa_port_setup_8021q_tagging); diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index b4872b87d4a6..73605bcbb385 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -70,6 +70,67 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb, } /* + * For Ingress (Host -> KSZ8795), 1 byte is added before FCS. + * --------------------------------------------------------------------------- + * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag(1byte)|FCS(4bytes) + * --------------------------------------------------------------------------- + * tag : each bit represents port (eg, 0x01=port1, 0x02=port2, 0x10=port5) + * + * For Egress (KSZ8795 -> Host), 1 byte is added before FCS. + * --------------------------------------------------------------------------- + * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|FCS(4bytes) + * --------------------------------------------------------------------------- + * tag0 : zero-based value represents port + * (eg, 0x00=port1, 0x02=port3, 0x06=port7) + */ + +#define KSZ8795_INGRESS_TAG_LEN 1 + +#define KSZ8795_TAIL_TAG_OVERRIDE BIT(6) +#define KSZ8795_TAIL_TAG_LOOKUP BIT(7) + +static struct sk_buff *ksz8795_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct sk_buff *nskb; + u8 *tag; + u8 *addr; + + nskb = ksz_common_xmit(skb, dev, KSZ8795_INGRESS_TAG_LEN); + if (!nskb) + return NULL; + + /* Tag encoding */ + tag = skb_put(nskb, KSZ8795_INGRESS_TAG_LEN); + addr = skb_mac_header(nskb); + + *tag = 1 << dp->index; + if (is_link_local_ether_addr(addr)) + *tag |= KSZ8795_TAIL_TAG_OVERRIDE; + + return nskb; +} + +static struct sk_buff *ksz8795_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt) +{ + u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN; + + return ksz_common_rcv(skb, dev, tag[0] & 7, KSZ_EGRESS_TAG_LEN); +} + +static const struct dsa_device_ops ksz8795_netdev_ops = { + .name = "ksz8795", + .proto = DSA_TAG_PROTO_KSZ8795, + .xmit = ksz8795_xmit, + .rcv = ksz8795_rcv, + .overhead = KSZ8795_INGRESS_TAG_LEN, +}; + +DSA_TAG_DRIVER(ksz8795_netdev_ops); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_KSZ8795); + +/* * For Ingress (Host -> KSZ9477), 2 bytes are added before FCS. * --------------------------------------------------------------------------- * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|tag1(1byte)|FCS(4bytes) @@ -183,6 +244,7 @@ DSA_TAG_DRIVER(ksz9893_netdev_ops); MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_KSZ9893); static struct dsa_tag_driver *dsa_tag_driver_array[] = { + &DSA_TAG_DRIVER_NAME(ksz8795_netdev_ops), &DSA_TAG_DRIVER_NAME(ksz9477_netdev_ops), &DSA_TAG_DRIVER_NAME(ksz9893_netdev_ops), }; diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index 47ee88163a9d..9c9aff3e52cf 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -89,7 +89,8 @@ static struct sk_buff *sja1105_xmit(struct sk_buff *skb, struct dsa_port *dp = dsa_slave_to_port(netdev); struct dsa_switch *ds = dp->ds; u16 tx_vid = dsa_8021q_tx_vid(ds, dp->index); - u8 pcp = skb->priority; + u16 queue_mapping = skb_get_queue_mapping(skb); + u8 pcp = netdev_txq_to_tc(netdev, queue_mapping); /* Transmitting management traffic does not rely upon switch tagging, * but instead SPI-installed management routes. Part 2 of this diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index ed2301ef872e..70f92aaca411 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1845,13 +1845,8 @@ static __net_init int inet_init_net(struct net *net) return 0; } -static __net_exit void inet_exit_net(struct net *net) -{ -} - static __net_initdata struct pernet_operations af_inet_ops = { .init = inet_init_net, - .exit = inet_exit_net, }; static int __init init_inet_pernet_ops(void) diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 7bd29e694603..9a0fe0c2fa02 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -15,6 +15,7 @@ #include <net/sock.h> #include <net/route.h> #include <net/tcp_states.h> +#include <net/sock_reuseport.h> int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { @@ -69,6 +70,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len } inet->inet_daddr = fl4->daddr; inet->inet_dport = usin->sin_port; + reuseport_has_conns(sk, true); sk->sk_state = TCP_ESTABLISHED; sk_set_txhash(sk); inet->inet_id = jiffies; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index cc7ef0d05bbd..5eb73775c3f7 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1266,6 +1266,7 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, cork->length = 0; cork->ttl = ipc->ttl; cork->tos = ipc->tos; + cork->mark = ipc->sockc.mark; cork->priority = ipc->priority; cork->transmit_time = ipc->sockc.transmit_time; cork->tx_flags = 0; @@ -1529,7 +1530,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, } skb->priority = (cork->tos != -1) ? cork->priority: sk->sk_priority; - skb->mark = sk->sk_mark; + skb->mark = cork->mark; skb->tstamp = cork->transmit_time; /* * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index c07bc82cbbe9..313470f6bb14 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1134,8 +1134,8 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, if (!found) { /* Create a new entry if allowable */ - if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 || - (c = ipmr_cache_alloc_unres()) == NULL) { + c = ipmr_cache_alloc_unres(); + if (!c) { spin_unlock_bh(&mfc_unres_lock); kfree_skb(skb); diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 69e76d677f9e..f17b402111ce 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -272,7 +272,7 @@ config IP_NF_TARGET_CLUSTERIP The CLUSTERIP target allows you to build load-balancing clusters of network servers without having a dedicated load-balancing router/server/switch. - + To compile it as a module, choose M here. If unsure, say N. config IP_NF_TARGET_ECN @@ -281,7 +281,7 @@ config IP_NF_TARGET_ECN depends on NETFILTER_ADVANCED ---help--- This option adds a `ECN' target, which can be used in the iptables mangle - table. + table. You can use this target to remove the ECN bits from the IPv4 header of an IP packet. This is particularly useful, if you need to work around @@ -306,7 +306,7 @@ config IP_NF_RAW This option adds a `raw' table to iptables. This table is the very first in the netfilter framework and hooks in at the PREROUTING and OUTPUT chains. - + If you want to compile it as a module, say M here and read <file:Documentation/kbuild/modules.rst>. If unsure, say `N'. @@ -318,7 +318,7 @@ config IP_NF_SECURITY help This option adds a `security' table to iptables, for use with Mandatory Access Control (MAC) policy. - + If unsure, say N. endif # IP_NF_IPTABLES diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index c50e0ec095d2..7c497c78105f 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -31,7 +31,7 @@ obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o # flow table support obj-$(CONFIG_NF_FLOW_TABLE_IPV4) += nf_flow_table_ipv4.o -# generic IP tables +# generic IP tables obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o # the three instances of ip_tables diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index 0e70f3f65f6f..748dc3ce58d3 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -36,8 +36,8 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) opts.options |= XT_SYNPROXY_OPT_ECN; opts.options &= info->options; - opts.mss_encode = opts.mss; - opts.mss = info->mss; + opts.mss_encode = opts.mss_option; + opts.mss_option = info->mss; if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) synproxy_init_timestamp_cookie(info, &opts); else diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 5fe5a3981d43..fc34fd1668d6 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1151,7 +1151,7 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh, .fc_encap_type = cfg->nh_encap_type, }; u32 tb_id = l3mdev_fib_table(cfg->dev); - int err = -EINVAL; + int err; err = fib_nh_init(net, fib_nh, &fib_cfg, 1, extack); if (err) { diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 9d24ef5c5d8f..535427292194 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -781,7 +781,7 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) } else if (!ipc.oif) ipc.oif = inet->uc_index; - flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, + flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), faddr, saddr, 0, 0, sk->sk_uid); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 40a6abbc9cf6..80da5a66d5d7 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -375,7 +375,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, skb_reserve(skb, hlen); skb->priority = sk->sk_priority; - skb->mark = sk->sk_mark; + skb->mark = sockc->mark; skb->tstamp = sockc->transmit_time; skb_dst_set(skb, &rt->dst); *rtp = NULL; @@ -623,7 +623,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) } } - flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, + flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos, RT_SCOPE_UNIVERSE, hdrincl ? IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk) | diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 0b980e841927..59ded25acd04 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -820,6 +820,15 @@ static struct ctl_table ipv4_net_table[] = { .extra2 = &tcp_min_snd_mss_max, }, { + .procname = "tcp_mtu_probe_floor", + .data = &init_net.ipv4.sysctl_tcp_mtu_probe_floor, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &tcp_min_snd_mss_min, + .extra2 = &tcp_min_snd_mss_max, + }, + { .procname = "tcp_probe_threshold", .data = &init_net.ipv4.sysctl_tcp_probe_threshold, .maxlen = sizeof(int), diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 61082065b26a..79c325a07ba5 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1182,7 +1182,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) struct sockcm_cookie sockc; int flags, err, copied = 0; int mss_now = 0, size_goal, copied_syn = 0; - bool process_backlog = false; + int process_backlog = 0; bool zc = false; long timeo; @@ -1274,9 +1274,10 @@ new_segment: if (!sk_stream_memory_free(sk)) goto wait_for_sndbuf; - if (process_backlog && sk_flush_backlog(sk)) { - process_backlog = false; - goto restart; + if (unlikely(process_backlog >= 16)) { + process_backlog = 0; + if (sk_flush_backlog(sk)) + goto restart; } first_skb = tcp_rtx_and_write_queues_empty(sk); skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, @@ -1284,7 +1285,7 @@ new_segment: if (!skb) goto wait_for_memory; - process_backlog = true; + process_backlog++; skb->ip_summed = CHECKSUM_PARTIAL; skb_entail(sk, skb); @@ -1789,19 +1790,21 @@ static int tcp_zerocopy_receive(struct sock *sk, break; frags = skb_shinfo(skb)->frags; while (offset) { - if (frags->size > offset) + if (skb_frag_size(frags) > offset) goto out; - offset -= frags->size; + offset -= skb_frag_size(frags); frags++; } } - if (frags->size != PAGE_SIZE || frags->page_offset) { + if (skb_frag_size(frags) != PAGE_SIZE || skb_frag_off(frags)) { int remaining = zc->recv_skip_hint; + int size = skb_frag_size(frags); - while (remaining && (frags->size != PAGE_SIZE || - frags->page_offset)) { - remaining -= frags->size; + while (remaining && (size != PAGE_SIZE || + skb_frag_off(frags))) { + remaining -= size; frags++; + size = skb_frag_size(frags); } zc->recv_skip_hint -= remaining; break; @@ -2650,6 +2653,7 @@ int tcp_disconnect(struct sock *sk, int flags) tp->rx_opt.saw_tstamp = 0; tp->rx_opt.dsack = 0; tp->rx_opt.num_sacks = 0; + tp->rcv_ooopack = 0; /* Clean up fastopen related fields */ @@ -3292,6 +3296,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_bytes_retrans = tp->bytes_retrans; info->tcpi_dsack_dups = tp->dsack_dups; info->tcpi_reord_seen = tp->reord_seen; + info->tcpi_rcv_ooopack = tp->rcv_ooopack; + info->tcpi_snd_wnd = tp->snd_wnd; unlock_sock_fast(sk, slow); } EXPORT_SYMBOL_GPL(tcp_get_info); @@ -3794,8 +3800,8 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, return 1; for (i = 0; i < shi->nr_frags; ++i) { - const struct skb_frag_struct *f = &shi->frags[i]; - unsigned int offset = f->page_offset; + const skb_frag_t *f = &shi->frags[i]; + unsigned int offset = skb_frag_off(f); struct page *page = skb_frag_page(f) + (offset >> PAGE_SHIFT); sg_set_page(&sg, page, skb_frag_size(f), diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 56be7d27f208..95b59540eee1 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -346,7 +346,7 @@ static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) /* Calculate bdp based on min RTT and the estimated bottleneck bandwidth: * - * bdp = bw * min_rtt * gain + * bdp = ceil(bw * min_rtt * gain) * * The key factor, gain, controls the amount of queue. While a small gain * builds a smaller queue, it becomes more vulnerable to noise in RTT @@ -370,7 +370,9 @@ static u32 bbr_bdp(struct sock *sk, u32 bw, int gain) w = (u64)bw * bbr->min_rtt_us; - /* Apply a gain to the given value, then remove the BW_SCALE shift. */ + /* Apply a gain to the given value, remove the BW_SCALE shift, and + * round the value up to avoid a negative feedback loop. + */ bdp = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT; return bdp; diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index a3a386236d93..81a8221d650a 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -81,13 +81,42 @@ static int tcp_diag_put_md5sig(struct sk_buff *skb, } #endif +static int tcp_diag_put_ulp(struct sk_buff *skb, struct sock *sk, + const struct tcp_ulp_ops *ulp_ops) +{ + struct nlattr *nest; + int err; + + nest = nla_nest_start_noflag(skb, INET_DIAG_ULP_INFO); + if (!nest) + return -EMSGSIZE; + + err = nla_put_string(skb, INET_ULP_INFO_NAME, ulp_ops->name); + if (err) + goto nla_failure; + + if (ulp_ops->get_info) + err = ulp_ops->get_info(sk, skb); + if (err) + goto nla_failure; + + nla_nest_end(skb, nest); + return 0; + +nla_failure: + nla_nest_cancel(skb, nest); + return err; +} + static int tcp_diag_get_aux(struct sock *sk, bool net_admin, struct sk_buff *skb) { + struct inet_connection_sock *icsk = inet_csk(sk); + int err = 0; + #ifdef CONFIG_TCP_MD5SIG if (net_admin) { struct tcp_md5sig_info *md5sig; - int err = 0; rcu_read_lock(); md5sig = rcu_dereference(tcp_sk(sk)->md5sig_info); @@ -99,11 +128,21 @@ static int tcp_diag_get_aux(struct sock *sk, bool net_admin, } #endif + if (net_admin) { + const struct tcp_ulp_ops *ulp_ops; + + ulp_ops = icsk->icsk_ulp_ops; + if (ulp_ops) + err = tcp_diag_put_ulp(skb, sk, ulp_ops); + if (err) + return err; + } return 0; } static size_t tcp_diag_get_aux_size(struct sock *sk, bool net_admin) { + struct inet_connection_sock *icsk = inet_csk(sk); size_t size = 0; #ifdef CONFIG_TCP_MD5SIG @@ -124,6 +163,17 @@ static size_t tcp_diag_get_aux_size(struct sock *sk, bool net_admin) } #endif + if (net_admin && sk_fullsock(sk)) { + const struct tcp_ulp_ops *ulp_ops; + + ulp_ops = icsk->icsk_ulp_ops; + if (ulp_ops) { + size += nla_total_size(0) + + nla_total_size(TCP_ULP_NAME_MAX); + if (ulp_ops->get_info_size) + size += ulp_ops->get_info_size(sk); + } + } return size; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8a1cd93dbb09..3578357abe30 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3782,6 +3782,49 @@ static void smc_parse_options(const struct tcphdr *th, #endif } +/* Try to parse the MSS option from the TCP header. Return 0 on failure, clamped + * value on success. + */ +static u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss) +{ + const unsigned char *ptr = (const unsigned char *)(th + 1); + int length = (th->doff * 4) - sizeof(struct tcphdr); + u16 mss = 0; + + while (length > 0) { + int opcode = *ptr++; + int opsize; + + switch (opcode) { + case TCPOPT_EOL: + return mss; + case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ + length--; + continue; + default: + if (length < 2) + return mss; + opsize = *ptr++; + if (opsize < 2) /* "silly options" */ + return mss; + if (opsize > length) + return mss; /* fail on partial options */ + if (opcode == TCPOPT_MSS && opsize == TCPOLEN_MSS) { + u16 in_mss = get_unaligned_be16(ptr); + + if (in_mss) { + if (user_mss && user_mss < in_mss) + in_mss = user_mss; + mss = in_mss; + } + } + ptr += opsize - 2; + length -= opsize; + } + } + return mss; +} + /* Look for tcp options. Normally only called on SYN and SYNACK packets. * But, this can also be called on packets in the established flow when * the fast version below fails. @@ -4512,6 +4555,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) tp->pred_flags = 0; inet_csk_schedule_ack(sk); + tp->rcv_ooopack += max_t(u16, 1, skb_shinfo(skb)->gso_segs); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE); seq = TCP_SKB_CB(skb)->seq; end_seq = TCP_SKB_CB(skb)->end_seq; @@ -6422,9 +6466,7 @@ EXPORT_SYMBOL(inet_reqsk_alloc); /* * Return true if a syncookie should be sent */ -static bool tcp_syn_flood_action(const struct sock *sk, - const struct sk_buff *skb, - const char *proto) +static bool tcp_syn_flood_action(const struct sock *sk, const char *proto) { struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; const char *msg = "Dropping request"; @@ -6444,7 +6486,7 @@ static bool tcp_syn_flood_action(const struct sock *sk, net->ipv4.sysctl_tcp_syncookies != 2 && xchg(&queue->synflood_warned, 1) == 0) net_info_ratelimited("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", - proto, ntohs(tcp_hdr(skb)->dest), msg); + proto, sk->sk_num, msg); return want_cookie; } @@ -6466,6 +6508,36 @@ static void tcp_reqsk_record_syn(const struct sock *sk, } } +/* If a SYN cookie is required and supported, returns a clamped MSS value to be + * used for SYN cookie generation. + */ +u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops, + const struct tcp_request_sock_ops *af_ops, + struct sock *sk, struct tcphdr *th) +{ + struct tcp_sock *tp = tcp_sk(sk); + u16 mss; + + if (sock_net(sk)->ipv4.sysctl_tcp_syncookies != 2 && + !inet_csk_reqsk_queue_is_full(sk)) + return 0; + + if (!tcp_syn_flood_action(sk, rsk_ops->slab_name)) + return 0; + + if (sk_acceptq_is_full(sk)) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); + return 0; + } + + mss = tcp_parse_mss_option(th, tp->rx_opt.user_mss); + if (!mss) + mss = af_ops->mss_clamp; + + return mss; +} +EXPORT_SYMBOL_GPL(tcp_get_syncookie_mss); + int tcp_conn_request(struct request_sock_ops *rsk_ops, const struct tcp_request_sock_ops *af_ops, struct sock *sk, struct sk_buff *skb) @@ -6487,7 +6559,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, */ if ((net->ipv4.sysctl_tcp_syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) && !isn) { - want_cookie = tcp_syn_flood_action(sk, skb, rsk_ops->slab_name); + want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name); if (!want_cookie) goto drop; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d57641cb3477..fd394ad179a0 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1515,6 +1515,21 @@ static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb) return sk; } +u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph, + struct tcphdr *th, u32 *cookie) +{ + u16 mss = 0; +#ifdef CONFIG_SYN_COOKIES + mss = tcp_get_syncookie_mss(&tcp_request_sock_ops, + &tcp_request_sock_ipv4_ops, sk, th); + if (mss) { + *cookie = __cookie_v4_init_sequence(iph, th, &mss); + tcp_synq_overflow(sk); + } +#endif + return mss; +} + /* The socket must have it's spinlock held when we get * here, unless it is a TCP_LISTEN socket. * @@ -2637,6 +2652,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_min_snd_mss = TCP_MIN_SND_MSS; net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD; net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL; + net->ipv4.sysctl_tcp_mtu_probe_floor = TCP_MIN_SND_MSS; net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME; net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8a645f304e6c..fec6d67bfd14 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1050,11 +1050,22 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, tcb = TCP_SKB_CB(skb); memset(&opts, 0, sizeof(opts)); - if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) + if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) { tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5); - else + } else { tcp_options_size = tcp_established_options(sk, skb, &opts, &md5); + /* Force a PSH flag on all (GSO) packets to expedite GRO flush + * at receiver : This slightly improve GRO performance. + * Note that we do not force the PSH flag for non GSO packets, + * because they might be sent under high congestion events, + * and in this case it is better to delay the delivery of 1-MSS + * packets and thus the corresponding ACK packet that would + * release the following packet. + */ + if (tcp_skb_pcount(skb) > 1) + tcb->tcp_flags |= TCPHDR_PSH; + } tcp_header_size = tcp_options_size + sizeof(struct tcphdr); /* if no packet is in qdisc/device queue, then allow XPS to select @@ -1403,7 +1414,7 @@ static int __pskb_trim_head(struct sk_buff *skb, int len) } else { shinfo->frags[k] = shinfo->frags[i]; if (eat) { - shinfo->frags[k].page_offset += eat; + skb_frag_off_add(&shinfo->frags[k], eat); skb_frag_size_sub(&shinfo->frags[k], eat); eat = 0; } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index c801cd37cc2a..dbd9d2d0ee63 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -154,7 +154,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) } else { mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1; mss = min(net->ipv4.sysctl_tcp_base_mss, mss); - mss = max(mss, 68 - tcp_sk(sk)->tcp_header_len); + mss = max(mss, net->ipv4.sysctl_tcp_mtu_probe_floor); mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss); icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d88821c794fb..cf755156a684 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -423,12 +423,13 @@ static struct sock *udp4_lib_lookup2(struct net *net, score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, sdif); if (score > badness) { - if (sk->sk_reuseport) { + if (sk->sk_reuseport && + sk->sk_state != TCP_ESTABLISHED) { hash = udp_ehashfn(net, daddr, hnum, saddr, sport); result = reuseport_select_sock(sk, hash, skb, sizeof(struct udphdr)); - if (result) + if (result && !reuseport_has_conns(sk, false)) return result; } badness = score; @@ -1130,7 +1131,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl4 = &fl4_stack; - flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos, + flowi4_init_output(fl4, ipc.oif, ipc.sockc.mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, flow_flags, faddr, saddr, dport, inet->inet_sport, diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 9ab897ded4df..96f939248d2f 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -27,6 +27,7 @@ #include <net/ip6_route.h> #include <net/tcp_states.h> #include <net/dsfield.h> +#include <net/sock_reuseport.h> #include <linux/errqueue.h> #include <linux/uaccess.h> @@ -254,6 +255,7 @@ ipv4_connected: goto out; } + reuseport_has_conns(sk, true); sk->sk_state = TCP_ESTABLISHED; sk_set_txhash(sk); out: diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index b358f1a4dd08..da46c4284676 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -197,10 +197,8 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, struct ipv6hdr _ip6, *ip6; ip6 = skb_header_pointer(skb, *offset, sizeof(_ip6), &_ip6); - if (!ip6 || (ip6->version != 6)) { - printk(KERN_ERR "IPv6 header not found\n"); + if (!ip6 || (ip6->version != 6)) return -EBADMSG; - } start = *offset + sizeof(struct ipv6hdr); nexthdr = ip6->nexthdr; } diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index dd2d0b963260..d5779d6a6065 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -968,7 +968,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || ip_tunnel_info_af(tun_info) != AF_INET6)) - return -EINVAL; + goto tx_err; key = &tun_info->key; memset(&fl6, 0, sizeof(fl6)); diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index fa014d5f1732..d432d0011c16 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -80,8 +80,10 @@ static void ip6_sublist_rcv_finish(struct list_head *head) { struct sk_buff *skb, *next; - list_for_each_entry_safe(skb, next, head, list) + list_for_each_entry_safe(skb, next, head, list) { + skb_list_del_init(skb); dst_input(skb); + } } static void ip6_list_rcv_finish(struct net *net, struct sock *sk, diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 8e49fd62eea9..89a4c7c2e25d 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1294,6 +1294,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, cork->base.fragsize = mtu; cork->base.gso_size = ipc6->gso_size; cork->base.tx_flags = 0; + cork->base.mark = ipc6->sockc.mark; sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags); if (dst_allfrag(xfrm_dst_path(&rt->dst))) @@ -1764,7 +1765,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, hdr->daddr = *final_dst; skb->priority = sk->sk_priority; - skb->mark = sk->sk_mark; + skb->mark = cork->base.mark; skb->tstamp = cork->base.transmit_time; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index e80d36c5073d..857a89ad4d6c 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1148,8 +1148,8 @@ static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi, * Create a new entry if allowable */ - if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 || - (c = ip6mr_cache_alloc_unres()) == NULL) { + c = ip6mr_cache_alloc_unres(); + if (!c) { spin_unlock_bh(&mfc_unres_lock); kfree_skb(skb); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 083cc1c94cd3..53caf59c591e 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -196,6 +196,7 @@ static inline int ndisc_is_useropt(const struct net_device *dev, { return opt->nd_opt_type == ND_OPT_RDNSS || opt->nd_opt_type == ND_OPT_DNSSL || + opt->nd_opt_type == ND_OPT_CAPTIVE_PORTAL || ndisc_ops_is_useropt(dev, opt->nd_opt_type); } diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 61819ed858b1..a9bff556d3b2 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -113,9 +113,9 @@ int __nf_ip6_route(struct net *net, struct dst_entry **dst, EXPORT_SYMBOL_GPL(__nf_ip6_route); int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, - struct nf_ct_bridge_frag_data *data, + struct nf_bridge_frag_data *data, int (*output)(struct net *, struct sock *sk, - const struct nf_ct_bridge_frag_data *data, + const struct nf_bridge_frag_data *data, struct sk_buff *)) { int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index 5cdb4a69d277..fd1f52a21bf1 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -36,8 +36,8 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par) opts.options |= XT_SYNPROXY_OPT_ECN; opts.options &= info->options; - opts.mss_encode = opts.mss; - opts.mss = info->mss; + opts.mss_encode = opts.mss_option; + opts.mss_option = info->mss; if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) synproxy_init_timestamp_cookie(info, &opts); else diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c index 0fc6326ef499..c52ff929c93b 100644 --- a/net/ipv6/netfilter/ip6t_ipv6header.c +++ b/net/ipv6/netfilter/ip6t_ipv6header.c @@ -16,7 +16,7 @@ #include <net/ipv6.h> #include <linux/netfilter/x_tables.h> -#include <linux/netfilter_ipv6/ip6_tables.h> +#include <linux/netfilter_ipv6.h> #include <linux/netfilter_ipv6/ip6t_ipv6header.h> MODULE_LICENSE("GPL"); @@ -42,7 +42,7 @@ ipv6header_mt6(const struct sk_buff *skb, struct xt_action_param *par) len = skb->len - ptr; temp = 0; - while (ip6t_ext_hdr(nexthdr)) { + while (nf_ip6_ext_hdr(nexthdr)) { const struct ipv6_opt_hdr *hp; struct ipv6_opt_hdr _hdr; int hdrlen; diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c index f53bd8f01219..22b80db6d882 100644 --- a/net/ipv6/netfilter/nf_log_ipv6.c +++ b/net/ipv6/netfilter/nf_log_ipv6.c @@ -18,7 +18,7 @@ #include <net/route.h> #include <linux/netfilter.h> -#include <linux/netfilter_ipv6/ip6_tables.h> +#include <linux/netfilter_ipv6.h> #include <linux/netfilter/xt_LOG.h> #include <net/netfilter/nf_log.h> @@ -70,7 +70,7 @@ static void dump_ipv6_packet(struct net *net, struct nf_log_buf *m, fragment = 0; ptr = ip6hoff + sizeof(struct ipv6hdr); currenthdr = ih->nexthdr; - while (currenthdr != NEXTHDR_NONE && ip6t_ext_hdr(currenthdr)) { + while (currenthdr != NEXTHDR_NONE && nf_ip6_ext_hdr(currenthdr)) { struct ipv6_opt_hdr _hdr; const struct ipv6_opt_hdr *hp; diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c index 437d95545c31..b9df879c48d3 100644 --- a/net/ipv6/netfilter/nf_socket_ipv6.c +++ b/net/ipv6/netfilter/nf_socket_ipv6.c @@ -12,7 +12,6 @@ #include <net/sock.h> #include <net/inet_sock.h> #include <net/inet6_hashtables.h> -#include <net/netfilter/ipv6/nf_defrag_ipv6.h> #include <net/netfilter/nf_socket.h> #if IS_ENABLED(CONFIG_NF_CONNTRACK) #include <net/netfilter/nf_conntrack.h> diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 8a6131991e38..6e1888ee4036 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -646,7 +646,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, skb->protocol = htons(ETH_P_IPV6); skb->priority = sk->sk_priority; - skb->mark = sk->sk_mark; + skb->mark = sockc->mark; skb->tstamp = sockc->transmit_time; skb_put(skb, length); @@ -810,6 +810,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipcm6_init(&ipc6); ipc6.sockc.tsflags = sk->sk_tsflags; + ipc6.sockc.mark = sk->sk_mark; if (sin6) { if (addr_len < SIN6_LEN_RFC2133) @@ -891,6 +892,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) opt = ipv6_fixup_options(&opt_space, opt); fl6.flowi6_proto = proto; + fl6.flowi6_mark = ipc6.sockc.mark; if (!hdrincl) { rfv.msg = msg; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 546088e50815..a63ff85fe141 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -227,7 +227,7 @@ static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr) struct net_device *dev = dst->dev; struct rt6_info *rt = (struct rt6_info *)dst; - daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr); + daddr = choose_neigh_daddr(rt6_nexthop(rt, &in6addr_any), NULL, daddr); if (!daddr) return; if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) @@ -2725,10 +2725,9 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, rcu_read_lock(); res.f6i = rcu_dereference(rt6->from); - if (!res.f6i) { - rcu_read_unlock(); - return; - } + if (!res.f6i) + goto out_unlock; + res.fib6_flags = res.f6i->fib6_flags; res.fib6_type = res.f6i->fib6_type; @@ -2744,10 +2743,8 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, /* fib6_info uses a nexthop that does not have fib6_nh * using the dst->dev + gw. Should be impossible. */ - if (!arg.match) { - rcu_read_unlock(); - return; - } + if (!arg.match) + goto out_unlock; res.nh = arg.match; } else { @@ -2760,6 +2757,7 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, if (rt6_insert_exception(nrt6, &res)) dst_release_immediate(&nrt6->dst); } +out_unlock: rcu_read_unlock(); } } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5da069e91cac..87f44d3250ee 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1063,6 +1063,21 @@ static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) return sk; } +u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, + struct tcphdr *th, u32 *cookie) +{ + u16 mss = 0; +#ifdef CONFIG_SYN_COOKIES + mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops, + &tcp_request_sock_ipv6_ops, sk, th); + if (mss) { + *cookie = __cookie_v6_init_sequence(iph, th, &mss); + tcp_synq_overflow(sk); + } +#endif + return mss; +} + static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { if (skb->protocol == htons(ETH_P_IP)) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 827fe7385078..aae4938f3dea 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -158,13 +158,14 @@ static struct sock *udp6_lib_lookup2(struct net *net, score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, sdif); if (score > badness) { - if (sk->sk_reuseport) { + if (sk->sk_reuseport && + sk->sk_state != TCP_ESTABLISHED) { hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); result = reuseport_select_sock(sk, hash, skb, sizeof(struct udphdr)); - if (result) + if (result && !reuseport_has_conns(sk, false)) return result; } result = sk; @@ -1230,6 +1231,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipcm6_init(&ipc6); ipc6.gso_size = up->gso_size; ipc6.sockc.tsflags = sk->sk_tsflags; + ipc6.sockc.mark = sk->sk_mark; /* destination address check */ if (sin6) { @@ -1352,7 +1354,7 @@ do_udp_sendmsg: if (!fl6.flowi6_oif) fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; - fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_mark = ipc6.sockc.mark; fl6.flowi6_uid = sk->sk_uid; if (msg->msg_controllen) { diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 5dbc0c48f8cb..8f12f5c6ab87 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -379,7 +379,7 @@ static int kcm_parse_func_strparser(struct strparser *strp, struct sk_buff *skb) struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp); struct bpf_prog *prog = psock->bpf_prog; - return (*prog->bpf_func)(skb, prog->insnsi); + return BPF_PROG_RUN(prog, skb); } static int kcm_read_sock_done(struct strparser *strp, int err) @@ -635,15 +635,15 @@ do_frag_list: frag_offset = 0; do_frag: frag = &skb_shinfo(skb)->frags[fragidx]; - if (WARN_ON(!frag->size)) { + if (WARN_ON(!skb_frag_size(frag))) { ret = -EINVAL; goto out; } ret = kernel_sendpage(psock->sk->sk_socket, - frag->page.p, - frag->page_offset + frag_offset, - frag->size - frag_offset, + skb_frag_page(frag), + skb_frag_off(frag) + frag_offset, + skb_frag_size(frag) - frag_offset, MSG_DONTWAIT); if (ret <= 0) { if (ret == -EAGAIN) { @@ -678,7 +678,7 @@ do_frag: sent += ret; frag_offset += ret; KCM_STATS_ADD(psock->stats.tx_bytes, ret); - if (frag_offset < frag->size) { + if (frag_offset < skb_frag_size(frag)) { /* Not finished with this frag */ goto do_frag; } diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 01b0dad24500..4d1c335e06e5 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -178,17 +178,54 @@ static void sta_rx_agg_reorder_timer_expired(struct timer_list *t) rcu_read_unlock(); } -static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid, +static void ieee80211_add_addbaext(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, + const struct ieee80211_addba_ext_ie *req) +{ + struct ieee80211_supported_band *sband; + struct ieee80211_addba_ext_ie *resp; + const struct ieee80211_sta_he_cap *he_cap; + u8 frag_level, cap_frag_level; + u8 *pos; + + sband = ieee80211_get_sband(sdata); + if (!sband) + return; + he_cap = ieee80211_get_he_iftype_cap(sband, sdata->vif.type); + if (!he_cap) + return; + + pos = skb_put_zero(skb, 2 + sizeof(struct ieee80211_addba_ext_ie)); + *pos++ = WLAN_EID_ADDBA_EXT; + *pos++ = sizeof(struct ieee80211_addba_ext_ie); + resp = (struct ieee80211_addba_ext_ie *)pos; + resp->data = req->data & IEEE80211_ADDBA_EXT_NO_FRAG; + + frag_level = u32_get_bits(req->data, + IEEE80211_ADDBA_EXT_FRAG_LEVEL_MASK); + cap_frag_level = u32_get_bits(he_cap->he_cap_elem.mac_cap_info[0], + IEEE80211_HE_MAC_CAP0_DYNAMIC_FRAG_MASK); + if (frag_level > cap_frag_level) + frag_level = cap_frag_level; + resp->data |= u8_encode_bits(frag_level, + IEEE80211_ADDBA_EXT_FRAG_LEVEL_MASK); +} + +static void ieee80211_send_addba_resp(struct sta_info *sta, u8 *da, u16 tid, u8 dialog_token, u16 status, u16 policy, - u16 buf_size, u16 timeout) + u16 buf_size, u16 timeout, + const struct ieee80211_addba_ext_ie *addbaext) { + struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct ieee80211_mgmt *mgmt; bool amsdu = ieee80211_hw_check(&local->hw, SUPPORTS_AMSDU_IN_AMPDU); u16 capab; - skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom); + skb = dev_alloc_skb(sizeof(*mgmt) + + 2 + sizeof(struct ieee80211_addba_ext_ie) + + local->hw.extra_tx_headroom); if (!skb) return; @@ -222,13 +259,17 @@ static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *d mgmt->u.action.u.addba_resp.timeout = cpu_to_le16(timeout); mgmt->u.action.u.addba_resp.status = cpu_to_le16(status); + if (sta->sta.he_cap.has_he && addbaext) + ieee80211_add_addbaext(sdata, skb, addbaext); + ieee80211_tx_skb(sdata, skb); } void ___ieee80211_start_rx_ba_session(struct sta_info *sta, u8 dialog_token, u16 timeout, u16 start_seq_num, u16 ba_policy, u16 tid, - u16 buf_size, bool tx, bool auto_seq) + u16 buf_size, bool tx, bool auto_seq, + const struct ieee80211_addba_ext_ie *addbaext) { struct ieee80211_local *local = sta->sdata->local; struct tid_ampdu_rx *tid_agg_rx; @@ -410,21 +451,22 @@ end: } if (tx) - ieee80211_send_addba_resp(sta->sdata, sta->sta.addr, tid, + ieee80211_send_addba_resp(sta, sta->sta.addr, tid, dialog_token, status, 1, buf_size, - timeout); + timeout, addbaext); } static void __ieee80211_start_rx_ba_session(struct sta_info *sta, u8 dialog_token, u16 timeout, u16 start_seq_num, u16 ba_policy, u16 tid, u16 buf_size, bool tx, - bool auto_seq) + bool auto_seq, + const struct ieee80211_addba_ext_ie *addbaext) { mutex_lock(&sta->ampdu_mlme.mtx); ___ieee80211_start_rx_ba_session(sta, dialog_token, timeout, start_seq_num, ba_policy, tid, - buf_size, tx, auto_seq); + buf_size, tx, auto_seq, addbaext); mutex_unlock(&sta->ampdu_mlme.mtx); } @@ -434,7 +476,9 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, size_t len) { u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num; + struct ieee802_11_elems elems = { 0 }; u8 dialog_token; + int ies_len; /* extract session parameters from addba request frame */ dialog_token = mgmt->u.action.u.addba_req.dialog_token; @@ -447,9 +491,19 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2; buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6; + ies_len = len - offsetof(struct ieee80211_mgmt, + u.action.u.addba_req.variable); + if (ies_len) { + ieee802_11_parse_elems(mgmt->u.action.u.addba_req.variable, + ies_len, true, &elems, mgmt->bssid, NULL); + if (elems.parse_error) + return; + } + __ieee80211_start_rx_ba_session(sta, dialog_token, timeout, start_seq_num, ba_policy, tid, - buf_size, true, false); + buf_size, true, false, + elems.addba_ext_ie); } void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif, diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 4105c97c7ba1..70739e746c13 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -980,7 +980,8 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, BSS_CHANGED_SSID | BSS_CHANGED_P2P_PS | BSS_CHANGED_TXPOWER | - BSS_CHANGED_TWT; + BSS_CHANGED_TWT | + BSS_CHANGED_HE_OBSS_PD; int err; int prev_beacon_int; @@ -1051,6 +1052,8 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, sdata->vif.bss_conf.enable_beacon = true; sdata->vif.bss_conf.allow_p2p_go_ps = sdata->vif.p2p; sdata->vif.bss_conf.twt_responder = params->twt_responder; + memcpy(&sdata->vif.bss_conf.he_obss_pd, ¶ms->he_obss_pd, + sizeof(struct ieee80211_he_obss_pd)); sdata->vif.bss_conf.ssid_len = params->ssid_len; if (params->ssid_len) @@ -1542,7 +1545,7 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, if (ether_addr_equal(mac, sdata->vif.addr)) return -EINVAL; - if (is_multicast_ether_addr(mac)) + if (!is_valid_ether_addr(mac)) return -EINVAL; if (params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER) && diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 2e7f75938c51..568b3b276931 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -271,8 +271,7 @@ static const char *hw_flag_names[] = { FLAG(TX_STATUS_NO_AMPDU_LEN), FLAG(SUPPORTS_MULTI_BSSID), FLAG(SUPPORTS_ONLY_HE_MULTI_BSSID), - FLAG(EXT_KEY_ID_NATIVE), - FLAG(NO_AMPDU_KEYBORDER_SUPPORT), + FLAG(AMPDU_KEYBORDER_SUPPORT), #undef FLAG }; diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index c2d8b5451a5e..2c9b3eb8b652 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -692,14 +692,16 @@ static inline int drv_remain_on_channel(struct ieee80211_local *local, return ret; } -static inline int drv_cancel_remain_on_channel(struct ieee80211_local *local) +static inline int +drv_cancel_remain_on_channel(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) { int ret; might_sleep(); - trace_drv_cancel_remain_on_channel(local); - ret = local->ops->cancel_remain_on_channel(&local->hw); + trace_drv_cancel_remain_on_channel(local, sdata); + ret = local->ops->cancel_remain_on_channel(&local->hw, &sdata->vif); trace_drv_return_int(local, ret); return ret; diff --git a/net/mac80211/he.c b/net/mac80211/he.c index 219650591c79..736da0035135 100644 --- a/net/mac80211/he.c +++ b/net/mac80211/he.c @@ -50,3 +50,43 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, he_cap->has_he = true; } + +void +ieee80211_he_op_ie_to_bss_conf(struct ieee80211_vif *vif, + const struct ieee80211_he_operation *he_op_ie_elem) +{ + struct ieee80211_he_operation *he_operation = + &vif->bss_conf.he_operation; + + if (!he_op_ie_elem) { + memset(he_operation, 0, sizeof(*he_operation)); + return; + } + + vif->bss_conf.he_operation = *he_op_ie_elem; +} + +void +ieee80211_he_spr_ie_to_bss_conf(struct ieee80211_vif *vif, + const struct ieee80211_he_spr *he_spr_ie_elem) +{ + struct ieee80211_he_obss_pd *he_obss_pd = + &vif->bss_conf.he_obss_pd; + const u8 *data; + + memset(he_obss_pd, 0, sizeof(*he_obss_pd)); + + if (!he_spr_ie_elem) + return; + data = he_spr_ie_elem->optional; + + if (he_spr_ie_elem->he_sr_control & + IEEE80211_HE_SPR_NON_SRG_OFFSET_PRESENT) + data++; + if (he_spr_ie_elem->he_sr_control & + IEEE80211_HE_SPR_SRG_INFORMATION_PRESENT) { + he_obss_pd->max_offset = *data++; + he_obss_pd->min_offset = *data++; + he_obss_pd->enable = true; + } +} diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index d5a500b2a448..a2e4d6b8fd98 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -359,7 +359,7 @@ void ieee80211_ba_session_work(struct work_struct *work) sta->ampdu_mlme.tid_rx_manage_offl)) ___ieee80211_start_rx_ba_session(sta, 0, 0, 0, 1, tid, IEEE80211_MAX_AMPDU_BUF_HT, - false, true); + false, true, NULL); if (test_and_clear_bit(tid + IEEE80211_NUM_TIDS, sta->ampdu_mlme.tid_rx_manage_offl)) diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index f00dca056295..0a6ff01c68a9 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -1252,6 +1252,7 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata, static void ieee80211_ibss_sta_expire(struct ieee80211_sub_if_data *sdata) { + struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct ieee80211_local *local = sdata->local; struct sta_info *sta, *tmp; unsigned long exp_time = IEEE80211_IBSS_INACTIVITY_LIMIT; @@ -1268,10 +1269,17 @@ static void ieee80211_ibss_sta_expire(struct ieee80211_sub_if_data *sdata) if (time_is_before_jiffies(last_active + exp_time) || (time_is_before_jiffies(last_active + exp_rsn) && sta->sta_state != IEEE80211_STA_AUTHORIZED)) { + u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; + sta_dbg(sta->sdata, "expiring inactive %sSTA %pM\n", sta->sta_state != IEEE80211_STA_AUTHORIZED ? "not authorized " : "", sta->sta.addr); + ieee80211_send_deauth_disassoc(sdata, sta->sta.addr, + ifibss->bssid, + IEEE80211_STYPE_DEAUTH, + WLAN_REASON_DEAUTH_LEAVING, + true, frame_buf); WARN_ON(__sta_info_destroy(sta)); } } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 004e2e3adb88..05406e9c05b3 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1480,6 +1480,7 @@ struct ieee802_11_elems { const struct ieee80211_meshconf_ie *mesh_config; const u8 *he_cap; const struct ieee80211_he_operation *he_operation; + const struct ieee80211_he_spr *he_spr; const struct ieee80211_mu_edca_param_set *mu_edca_param_set; const u8 *uora_element; const u8 *mesh_id; @@ -1506,6 +1507,7 @@ struct ieee802_11_elems { u8 max_bssid_indicator; u8 dtim_count; u8 dtim_period; + const struct ieee80211_addba_ext_ie *addba_ext_ie; /* length of them, respectively */ u8 ext_capab_len; @@ -1767,7 +1769,8 @@ ieee80211_build_data_template(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, u32 info_flags); void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb, struct ieee80211_supported_band *sband, - int retry_count, int shift, bool send_to_cooked); + int retry_count, int shift, bool send_to_cooked, + struct ieee80211_tx_status *status); void ieee80211_check_fast_xmit(struct sta_info *sta); void ieee80211_check_fast_xmit_all(struct ieee80211_local *local); @@ -1804,7 +1807,8 @@ void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, void ___ieee80211_start_rx_ba_session(struct sta_info *sta, u8 dialog_token, u16 timeout, u16 start_seq_num, u16 ba_policy, u16 tid, - u16 buf_size, bool tx, bool auto_seq); + u16 buf_size, bool tx, bool auto_seq, + const struct ieee80211_addba_ext_ie *addbaext); void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, enum ieee80211_agg_stop_reason reason); void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, @@ -1869,6 +1873,13 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, const u8 *he_cap_ie, u8 he_cap_len, struct sta_info *sta); +void +ieee80211_he_spr_ie_to_bss_conf(struct ieee80211_vif *vif, + const struct ieee80211_he_spr *he_spr_ie_elem); + +void +ieee80211_he_op_ie_to_bss_conf(struct ieee80211_vif *vif, + const struct ieee80211_he_operation *he_op_ie_elem); /* Spectrum management */ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, @@ -2088,7 +2099,8 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, const u8 *da, const u8 *key, u8 key_len, u8 key_idx, u32 tx_flags); void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, - const u8 *bssid, u16 stype, u16 reason, + const u8 *da, const u8 *bssid, + u16 stype, u16 reason, bool send_frame, u8 *frame_buf); enum { @@ -2133,9 +2145,11 @@ u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, u32 cap); u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, const struct cfg80211_chan_def *chandef); +u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata, u8 iftype); u8 *ieee80211_ie_build_he_cap(u8 *pos, const struct ieee80211_sta_he_cap *he_cap, u8 *end); +u8 *ieee80211_ie_build_he_oper(u8 *pos); int ieee80211_parse_bitrates(struct cfg80211_chan_def *chandef, const struct ieee80211_supported_band *sband, const u8 *srates, int srates_len, u32 *rates); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 8dc6580e1787..af8b09214786 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1876,7 +1876,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, /* MTU range: 256 - 2304 */ ndev->min_mtu = 256; - ndev->max_mtu = IEEE80211_MAX_DATA_LEN; + ndev->max_mtu = local->hw.max_mtu; ret = register_netdevice(ndev); if (ret) { diff --git a/net/mac80211/key.c b/net/mac80211/key.c index dd60f6428049..0f889b919b06 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -6,6 +6,7 @@ * Copyright 2007-2008 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2015-2017 Intel Deutschland GmbH + * Copyright 2018-2019 Intel Corporation */ #include <linux/if_ether.h> @@ -270,7 +271,7 @@ int ieee80211_set_tx_key(struct ieee80211_key *key) sta->ptk_idx = key->conf.keyidx; - if (ieee80211_hw_check(&local->hw, NO_AMPDU_KEYBORDER_SUPPORT)) + if (!ieee80211_hw_check(&local->hw, AMPDU_KEYBORDER_SUPPORT)) clear_sta_flag(sta, WLAN_STA_BLOCK_BA); ieee80211_check_fast_xmit(sta); @@ -290,15 +291,15 @@ static void ieee80211_pairwise_rekey(struct ieee80211_key *old, /* Extended Key ID key install, initial one or rekey */ if (sta->ptk_idx != INVALID_PTK_KEYIDX && - ieee80211_hw_check(&local->hw, - NO_AMPDU_KEYBORDER_SUPPORT)) { + !ieee80211_hw_check(&local->hw, AMPDU_KEYBORDER_SUPPORT)) { /* Aggregation Sessions with Extended Key ID must not * mix MPDUs with different keyIDs within one A-MPDU. - * Tear down any running Tx aggregation and all new - * Rx/Tx aggregation request during rekey if the driver - * asks us to do so. (Blocking Tx only would be - * sufficient but WLAN_STA_BLOCK_BA gets the job done - * for the few ms we need it.) + * Tear down running Tx aggregation sessions and block + * new Rx/Tx aggregation requests during rekey to + * ensure there are no A-MPDUs when the driver is not + * supporting A-MPDU key borders. (Blocking Tx only + * would be sufficient but WLAN_STA_BLOCK_BA gets the + * job done for the few ms we need it.) */ set_sta_flag(sta, WLAN_STA_BLOCK_BA); mutex_lock(&sta->ampdu_mlme.mtx); @@ -781,9 +782,8 @@ int ieee80211_key_link(struct ieee80211_key *key, /* The rekey code assumes that the old and new key are using * the same cipher. Enforce the assumption for pairwise keys. */ - if (key && - ((alt_key && alt_key->conf.cipher != key->conf.cipher) || - (old_key && old_key->conf.cipher != key->conf.cipher))) + if ((alt_key && alt_key->conf.cipher != key->conf.cipher) || + (old_key && old_key->conf.cipher != key->conf.cipher)) goto out; } else if (sta) { old_key = key_mtx_dereference(sdata->local, sta->gtk[idx]); @@ -793,7 +793,7 @@ int ieee80211_key_link(struct ieee80211_key *key, /* Non-pairwise keys must also not switch the cipher on rekey */ if (!pairwise) { - if (key && old_key && old_key->conf.cipher != key->conf.cipher) + if (old_key && old_key->conf.cipher != key->conf.cipher) goto out; } @@ -843,46 +843,30 @@ void ieee80211_key_free(struct ieee80211_key *key, bool delay_tailroom) ieee80211_key_destroy(key, delay_tailroom); } -void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata) +void ieee80211_reenable_keys(struct ieee80211_sub_if_data *sdata) { struct ieee80211_key *key; struct ieee80211_sub_if_data *vlan; ASSERT_RTNL(); - if (WARN_ON(!ieee80211_sdata_running(sdata))) - return; - - mutex_lock(&sdata->local->key_mtx); - - WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt || - sdata->crypto_tx_tailroom_pending_dec); - - if (sdata->vif.type == NL80211_IFTYPE_AP) { - list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) - WARN_ON_ONCE(vlan->crypto_tx_tailroom_needed_cnt || - vlan->crypto_tx_tailroom_pending_dec); - } - - list_for_each_entry(key, &sdata->key_list, list) { - increment_tailroom_need_count(sdata); - ieee80211_key_enable_hw_accel(key); - } - - mutex_unlock(&sdata->local->key_mtx); -} - -void ieee80211_reset_crypto_tx_tailroom(struct ieee80211_sub_if_data *sdata) -{ - struct ieee80211_sub_if_data *vlan; - mutex_lock(&sdata->local->key_mtx); sdata->crypto_tx_tailroom_needed_cnt = 0; + sdata->crypto_tx_tailroom_pending_dec = 0; if (sdata->vif.type == NL80211_IFTYPE_AP) { - list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) + list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) { vlan->crypto_tx_tailroom_needed_cnt = 0; + vlan->crypto_tx_tailroom_pending_dec = 0; + } + } + + if (ieee80211_sdata_running(sdata)) { + list_for_each_entry(key, &sdata->key_list, list) { + increment_tailroom_need_count(sdata); + ieee80211_key_enable_hw_accel(key); + } } mutex_unlock(&sdata->local->key_mtx); diff --git a/net/mac80211/key.h b/net/mac80211/key.h index b8b9cd743bf4..d6d6e89cf7dd 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -2,6 +2,7 @@ /* * Copyright 2002-2004, Instant802 Networks, Inc. * Copyright 2005, Devicescape Software, Inc. + * Copyright (C) 2019 Intel Corporation */ #ifndef IEEE80211_KEY_H @@ -156,8 +157,7 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata, bool force_synchronize); void ieee80211_free_sta_keys(struct ieee80211_local *local, struct sta_info *sta); -void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata); -void ieee80211_reset_crypto_tx_tailroom(struct ieee80211_sub_if_data *sdata); +void ieee80211_reenable_keys(struct ieee80211_sub_if_data *sdata); #define key_mtx_dereference(local, ref) \ rcu_dereference_protected(ref, lockdep_is_held(&((local)->key_mtx))) diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 4c2702f128f3..aba094b4ccfc 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -639,6 +639,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH; local->hw.uapsd_queues = IEEE80211_DEFAULT_UAPSD_QUEUES; local->hw.uapsd_max_sp_len = IEEE80211_DEFAULT_MAX_SP_LEN; + local->hw.max_mtu = IEEE80211_MAX_DATA_LEN; local->user_power_level = IEEE80211_UNSET_POWER_LEVEL; wiphy->ht_capa_mod_mask = &mac80211_ht_capa_mod_mask; wiphy->vht_capa_mod_mask = &mac80211_vht_capa_mod_mask; @@ -1048,21 +1049,15 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) } } - /* Enable Extended Key IDs when driver allowed it, or when it - * supports neither HW crypto nor A-MPDUs + /* Mac80211 and therefore all drivers using SW crypto only + * are able to handle PTK rekeys and Extended Key ID. */ - if ((!local->ops->set_key && - !ieee80211_hw_check(hw, AMPDU_AGGREGATION)) || - ieee80211_hw_check(&local->hw, EXT_KEY_ID_NATIVE)) - wiphy_ext_feature_set(local->hw.wiphy, - NL80211_EXT_FEATURE_EXT_KEY_ID); - - /* Mac80211 and therefore all cards only using SW crypto are able to - * handle PTK rekeys correctly - */ - if (!local->ops->set_key) + if (!local->ops->set_key) { wiphy_ext_feature_set(local->hw.wiphy, NL80211_EXT_FEATURE_CAN_REPLACE_PTK0); + wiphy_ext_feature_set(local->hw.wiphy, + NL80211_EXT_FEATURE_EXT_KEY_ID); + } /* * Calculate scan IE length -- we need this to alloc diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 2e7fa743c892..d09b3c789314 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -532,6 +532,61 @@ int mesh_add_vht_oper_ie(struct ieee80211_sub_if_data *sdata, return 0; } +int mesh_add_he_cap_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, u8 ie_len) +{ + const struct ieee80211_sta_he_cap *he_cap; + struct ieee80211_supported_band *sband; + u8 *pos; + + sband = ieee80211_get_sband(sdata); + if (!sband) + return -EINVAL; + + he_cap = ieee80211_get_he_iftype_cap(sband, NL80211_IFTYPE_MESH_POINT); + + if (!he_cap || + sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT || + sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 || + sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10) + return 0; + + if (skb_tailroom(skb) < ie_len) + return -ENOMEM; + + pos = skb_put(skb, ie_len); + ieee80211_ie_build_he_cap(pos, he_cap, pos + ie_len); + + return 0; +} + +int mesh_add_he_oper_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) +{ + const struct ieee80211_sta_he_cap *he_cap; + struct ieee80211_supported_band *sband; + u8 *pos; + + sband = ieee80211_get_sband(sdata); + if (!sband) + return -EINVAL; + + he_cap = ieee80211_get_he_iftype_cap(sband, NL80211_IFTYPE_MESH_POINT); + if (!he_cap || + sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT || + sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 || + sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10) + return 0; + + if (skb_tailroom(skb) < 2 + 1 + sizeof(struct ieee80211_he_operation)) + return -ENOMEM; + + pos = skb_put(skb, 2 + 1 + sizeof(struct ieee80211_he_operation)); + ieee80211_ie_build_he_oper(pos); + + return 0; +} + static void ieee80211_mesh_path_timer(struct timer_list *t) { struct ieee80211_sub_if_data *sdata = @@ -677,6 +732,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) struct ieee80211_chanctx_conf *chanctx_conf; struct mesh_csa_settings *csa; enum nl80211_band band; + u8 ie_len_he_cap; u8 *pos; struct ieee80211_sub_if_data *sdata; int hdr_len = offsetofend(struct ieee80211_mgmt, u.beacon); @@ -687,6 +743,8 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) band = chanctx_conf->def.chan->band; rcu_read_unlock(); + ie_len_he_cap = ieee80211_ie_len_he_cap(sdata, + NL80211_IFTYPE_MESH_POINT); head_len = hdr_len + 2 + /* NULL SSID */ /* Channel Switch Announcement */ @@ -706,6 +764,8 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) 2 + sizeof(__le16) + /* awake window */ 2 + sizeof(struct ieee80211_vht_cap) + 2 + sizeof(struct ieee80211_vht_operation) + + ie_len_he_cap + + 2 + 1 + sizeof(struct ieee80211_he_operation) + ifmsh->ie_len; bcn = kzalloc(sizeof(*bcn) + head_len + tail_len, GFP_KERNEL); @@ -823,6 +883,8 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) mesh_add_awake_window_ie(sdata, skb) || mesh_add_vht_cap_ie(sdata, skb) || mesh_add_vht_oper_ie(sdata, skb) || + mesh_add_he_cap_ie(sdata, skb, ie_len_he_cap) || + mesh_add_he_oper_ie(sdata, skb) || mesh_add_vendor_ies(sdata, skb)) goto out_free; diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 94d57cce70da..953f720754e8 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -218,6 +218,10 @@ int mesh_add_vht_cap_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); int mesh_add_vht_oper_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); +int mesh_add_he_cap_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, u8 ie_len); +int mesh_add_he_oper_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb); void mesh_rmc_free(struct ieee80211_sub_if_data *sdata); int mesh_rmc_init(struct ieee80211_sub_if_data *sdata); void ieee80211s_init(void); diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index dd3aefd052a9..737c5f4dbf52 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -218,9 +218,12 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, bool include_plid = false; u16 peering_proto = 0; u8 *pos, ie_len = 4; + u8 ie_len_he_cap; int hdr_len = offsetofend(struct ieee80211_mgmt, u.action.u.self_prot); int err = -ENOMEM; + ie_len_he_cap = ieee80211_ie_len_he_cap(sdata, + NL80211_IFTYPE_MESH_POINT); skb = dev_alloc_skb(local->tx_headroom + hdr_len + 2 + /* capability info */ @@ -233,6 +236,8 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, 2 + sizeof(struct ieee80211_ht_operation) + 2 + sizeof(struct ieee80211_vht_cap) + 2 + sizeof(struct ieee80211_vht_operation) + + ie_len_he_cap + + 2 + 1 + sizeof(struct ieee80211_he_operation) + 2 + 8 + /* peering IE */ sdata->u.mesh.ie_len); if (!skb) @@ -321,7 +326,9 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, if (mesh_add_ht_cap_ie(sdata, skb) || mesh_add_ht_oper_ie(sdata, skb) || mesh_add_vht_cap_ie(sdata, skb) || - mesh_add_vht_oper_ie(sdata, skb)) + mesh_add_vht_oper_ie(sdata, skb) || + mesh_add_he_cap_ie(sdata, skb, ie_len_he_cap) || + mesh_add_he_oper_ie(sdata, skb)) goto free; } @@ -433,6 +440,9 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata, ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, elems->vht_cap_elem, sta); + ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband, elems->he_cap, + elems->he_cap_len, sta); + if (bw != sta->sta.bandwidth) changed |= IEEE80211_RC_BW_CHANGED; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 4c888dc9bd81..26a2f49208b6 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -158,10 +158,10 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, memcpy(&sta_ht_cap, &sband->ht_cap, sizeof(sta_ht_cap)); ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap); + memset(chandef, 0, sizeof(struct cfg80211_chan_def)); chandef->chan = channel; chandef->width = NL80211_CHAN_WIDTH_20_NOHT; chandef->center_freq1 = channel->center_freq; - chandef->center_freq2 = 0; if (!ht_oper || !sta_ht_cap.ht_supported) { ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; @@ -2278,8 +2278,9 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, !ifmgd->have_beacon) drv_mgd_prepare_tx(sdata->local, sdata, 0); - ieee80211_send_deauth_disassoc(sdata, ifmgd->bssid, stype, - reason, tx, frame_buf); + ieee80211_send_deauth_disassoc(sdata, ifmgd->bssid, + ifmgd->bssid, stype, reason, + tx, frame_buf); } /* flush out frame - make sure the deauth was actually sent */ @@ -2522,7 +2523,10 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) if (ieee80211_hw_check(&sdata->local->hw, REPORTS_TX_ACK_STATUS)) { ifmgd->nullfunc_failed = false; - ieee80211_send_nullfunc(sdata->local, sdata, false); + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE)) + ifmgd->probe_send_count--; + else + ieee80211_send_nullfunc(sdata->local, sdata, false); } else { int ssid_len; @@ -3391,6 +3395,8 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, if (elems.uora_element) bss_conf->uora_ocw_range = elems.uora_element[0]; + ieee80211_he_op_ie_to_bss_conf(&sdata->vif, elems.he_operation); + ieee80211_he_spr_ie_to_bss_conf(&sdata->vif, elems.he_spr); /* TODO: OPEN: what happens if BSS color disable is set? */ } @@ -4504,7 +4510,7 @@ void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata) * cfg80211 won't know and won't actually abort those attempts, * thus we need to do that ourselves. */ - ieee80211_send_deauth_disassoc(sdata, bssid, + ieee80211_send_deauth_disassoc(sdata, bssid, bssid, IEEE80211_STYPE_DEAUTH, WLAN_REASON_DEAUTH_LEAVING, false, frame_buf); @@ -5291,7 +5297,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; ifmgd->flags |= IEEE80211_STA_DISABLE_HE; netdev_info(sdata->dev, - "disabling HE/HT/VHT due to WEP/TKIP use\n"); + "disabling HT/VHT/HE due to WEP/TKIP use\n"); } } @@ -5545,7 +5551,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, ieee80211_get_reason_code_string(req->reason_code)); drv_mgd_prepare_tx(sdata->local, sdata, 0); - ieee80211_send_deauth_disassoc(sdata, req->bssid, + ieee80211_send_deauth_disassoc(sdata, req->bssid, req->bssid, IEEE80211_STYPE_DEAUTH, req->reason_code, tx, frame_buf); @@ -5565,7 +5571,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, ieee80211_get_reason_code_string(req->reason_code)); drv_mgd_prepare_tx(sdata->local, sdata, 0); - ieee80211_send_deauth_disassoc(sdata, req->bssid, + ieee80211_send_deauth_disassoc(sdata, req->bssid, req->bssid, IEEE80211_STYPE_DEAUTH, req->reason_code, tx, frame_buf); diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 60ef8972b254..c710504ccf1a 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -8,6 +8,7 @@ * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2009 Johannes Berg <johannes@sipsolutions.net> + * Copyright (C) 2019 Intel Corporation */ #include <linux/export.h> #include <net/mac80211.h> @@ -732,7 +733,7 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local, } if (local->ops->remain_on_channel) { - ret = drv_cancel_remain_on_channel(local); + ret = drv_cancel_remain_on_channel(local, roc->sdata); if (WARN_ON_ONCE(ret)) { mutex_unlock(&local->mtx); return ret; @@ -991,7 +992,7 @@ void ieee80211_roc_purge(struct ieee80211_local *local, if (roc->started) { if (local->ops->remain_on_channel) { /* can race, so ignore return value */ - drv_cancel_remain_on_channel(local); + drv_cancel_remain_on_channel(local, sdata); ieee80211_roc_notify_destroy(roc); } else { roc->abort = true; diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index 5d5348bc41ec..5397c6dad056 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -60,15 +60,6 @@ static inline void rate_control_add_sta_debugfs(struct sta_info *sta) #endif } -static inline void rate_control_remove_sta_debugfs(struct sta_info *sta) -{ -#ifdef CONFIG_MAC80211_DEBUGFS - struct rate_control_ref *ref = sta->rate_ctrl; - if (ref && ref->ops->remove_sta_debugfs) - ref->ops->remove_sta_debugfs(ref->priv, sta->rate_ctrl_priv); -#endif -} - void ieee80211_check_rate_mask(struct ieee80211_sub_if_data *sdata); /* Get a reference to the rate control algorithm. If `name' is NULL, get the diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h index 3c96a853adbd..51d8b2c846e7 100644 --- a/net/mac80211/rc80211_minstrel.h +++ b/net/mac80211/rc80211_minstrel.h @@ -95,6 +95,7 @@ struct minstrel_sta_info { struct minstrel_priv { struct ieee80211_hw *hw; bool has_mrr; + u32 sample_switch; unsigned int cw_min; unsigned int cw_max; unsigned int max_retry; diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 5a882da82f0e..0ef2633349b5 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -18,6 +18,8 @@ #define AVG_AMPDU_SIZE 16 #define AVG_PKT_SIZE 1200 +#define SAMPLE_SWITCH_THR 100 + /* Number of bits for an average sized packet */ #define MCS_NBITS ((AVG_PKT_SIZE * AVG_AMPDU_SIZE) << 3) @@ -58,6 +60,7 @@ [GROUP_IDX(_streams, _sgi, _ht40)] = { \ .streams = _streams, \ .shift = _s, \ + .bw = _ht40, \ .flags = \ IEEE80211_TX_RC_MCS | \ (_sgi ? IEEE80211_TX_RC_SHORT_GI : 0) | \ @@ -94,6 +97,7 @@ [VHT_GROUP_IDX(_streams, _sgi, _bw)] = { \ .streams = _streams, \ .shift = _s, \ + .bw = _bw, \ .flags = \ IEEE80211_TX_RC_VHT_MCS | \ (_sgi ? IEEE80211_TX_RC_SHORT_GI : 0) | \ @@ -486,7 +490,7 @@ minstrel_ht_assign_best_tp_rates(struct minstrel_ht_sta *mi, tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_ewma; tmp_mcs_tp = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob); - if (tmp_cck_tp > tmp_mcs_tp) { + if (tmp_cck_tp_rate && tmp_cck_tp > tmp_mcs_tp) { for(i = 0; i < MAX_THR_RATES; i++) { minstrel_ht_sort_best_tp_rates(mi, tmp_cck_tp_rate[i], tmp_mcs_tp_rate); @@ -526,6 +530,133 @@ minstrel_ht_prob_rate_reduce_streams(struct minstrel_ht_sta *mi) } } +static inline int +minstrel_get_duration(int index) +{ + const struct mcs_group *group = &minstrel_mcs_groups[index / MCS_GROUP_RATES]; + unsigned int duration = group->duration[index % MCS_GROUP_RATES]; + return duration << group->shift; +} + +static bool +minstrel_ht_probe_group(struct minstrel_ht_sta *mi, const struct mcs_group *tp_group, + int tp_idx, const struct mcs_group *group) +{ + if (group->bw < tp_group->bw) + return false; + + if (group->streams == tp_group->streams) + return true; + + if (tp_idx < 4 && group->streams == tp_group->streams - 1) + return true; + + return group->streams == tp_group->streams + 1; +} + +static void +minstrel_ht_find_probe_rates(struct minstrel_ht_sta *mi, u16 *rates, int *n_rates, + bool faster_rate) +{ + const struct mcs_group *group, *tp_group; + int i, g, max_dur; + int tp_idx; + + tp_group = &minstrel_mcs_groups[mi->max_tp_rate[0] / MCS_GROUP_RATES]; + tp_idx = mi->max_tp_rate[0] % MCS_GROUP_RATES; + + max_dur = minstrel_get_duration(mi->max_tp_rate[0]); + if (faster_rate) + max_dur -= max_dur / 16; + + for (g = 0; g < MINSTREL_GROUPS_NB; g++) { + u16 supported = mi->supported[g]; + + if (!supported) + continue; + + group = &minstrel_mcs_groups[g]; + if (!minstrel_ht_probe_group(mi, tp_group, tp_idx, group)) + continue; + + for (i = 0; supported; supported >>= 1, i++) { + int idx; + + if (!(supported & 1)) + continue; + + if ((group->duration[i] << group->shift) > max_dur) + continue; + + idx = g * MCS_GROUP_RATES + i; + if (idx == mi->max_tp_rate[0]) + continue; + + rates[(*n_rates)++] = idx; + break; + } + } +} + +static void +minstrel_ht_rate_sample_switch(struct minstrel_priv *mp, + struct minstrel_ht_sta *mi) +{ + struct minstrel_rate_stats *mrs; + u16 rates[MINSTREL_GROUPS_NB]; + int n_rates = 0; + int probe_rate = 0; + bool faster_rate; + int i; + u8 random; + + /* + * Use rate switching instead of probing packets for devices with + * little control over retry fallback behavior + */ + if (mp->hw->max_rates > 1) + return; + + /* + * If the current EWMA prob is >75%, look for a rate that's 6.25% + * faster than the max tp rate. + * If that fails, look again for a rate that is at least as fast + */ + mrs = minstrel_get_ratestats(mi, mi->max_tp_rate[0]); + faster_rate = mrs->prob_ewma > MINSTREL_FRAC(75, 100); + minstrel_ht_find_probe_rates(mi, rates, &n_rates, faster_rate); + if (!n_rates && faster_rate) + minstrel_ht_find_probe_rates(mi, rates, &n_rates, false); + + /* If no suitable rate was found, try to pick the next one in the group */ + if (!n_rates) { + int g_idx = mi->max_tp_rate[0] / MCS_GROUP_RATES; + u16 supported = mi->supported[g_idx]; + + supported >>= mi->max_tp_rate[0] % MCS_GROUP_RATES; + for (i = 0; supported; supported >>= 1, i++) { + if (!(supported & 1)) + continue; + + probe_rate = mi->max_tp_rate[0] + i; + goto out; + } + + return; + } + + i = 0; + if (n_rates > 1) { + random = prandom_u32(); + i = random % n_rates; + } + probe_rate = rates[i]; + +out: + mi->sample_rate = probe_rate; + mi->sample_mode = MINSTREL_SAMPLE_ACTIVE; +} + /* * Update rate statistics and select new primary rates * @@ -536,7 +667,8 @@ minstrel_ht_prob_rate_reduce_streams(struct minstrel_ht_sta *mi) * higher throughput rates, even if the probablity is a bit lower */ static void -minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) +minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, + bool sample) { struct minstrel_mcs_group_data *mg; struct minstrel_rate_stats *mrs; @@ -544,6 +676,18 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) u16 tmp_mcs_tp_rate[MAX_THR_RATES], tmp_group_tp_rate[MAX_THR_RATES]; u16 tmp_cck_tp_rate[MAX_THR_RATES], index; + mi->sample_mode = MINSTREL_SAMPLE_IDLE; + + if (sample) { + mi->total_packets_cur = mi->total_packets - + mi->total_packets_last; + mi->total_packets_last = mi->total_packets; + } + if (!mp->sample_switch) + sample = false; + if (mi->total_packets_cur < SAMPLE_SWITCH_THR && mp->sample_switch != 1) + sample = false; + if (mi->ampdu_packets > 0) { if (!ieee80211_hw_check(mp->hw, TX_STATUS_NO_AMPDU_LEN)) mi->avg_ampdu_len = minstrel_ewma(mi->avg_ampdu_len, @@ -558,11 +702,19 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) mi->sample_slow = 0; mi->sample_count = 0; - /* Initialize global rate indexes */ - for(j = 0; j < MAX_THR_RATES; j++){ - tmp_mcs_tp_rate[j] = 0; - tmp_cck_tp_rate[j] = 0; - } + memset(tmp_mcs_tp_rate, 0, sizeof(tmp_mcs_tp_rate)); + memset(tmp_cck_tp_rate, 0, sizeof(tmp_cck_tp_rate)); + if (mi->supported[MINSTREL_CCK_GROUP]) + for (j = 0; j < ARRAY_SIZE(tmp_cck_tp_rate); j++) + tmp_cck_tp_rate[j] = MINSTREL_CCK_GROUP * MCS_GROUP_RATES; + + if (mi->supported[MINSTREL_VHT_GROUP_0]) + index = MINSTREL_VHT_GROUP_0 * MCS_GROUP_RATES; + else + index = MINSTREL_HT_GROUP_0 * MCS_GROUP_RATES; + + for (j = 0; j < ARRAY_SIZE(tmp_mcs_tp_rate); j++) + tmp_mcs_tp_rate[j] = index; /* Find best rate sets within all MCS groups*/ for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) { @@ -575,7 +727,7 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) /* (re)Initialize group rate indexes */ for(j = 0; j < MAX_THR_RATES; j++) - tmp_group_tp_rate[j] = group; + tmp_group_tp_rate[j] = MCS_GROUP_RATES * group; for (i = 0; i < MCS_GROUP_RATES; i++) { if (!(mi->supported[group] & BIT(i))) @@ -622,12 +774,16 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) /* try to sample all available rates during each interval */ mi->sample_count *= 8; + if (sample) + minstrel_ht_rate_sample_switch(mp, mi); + #ifdef CONFIG_MAC80211_DEBUGFS /* use fixed index if set */ if (mp->fixed_rate_idx != -1) { for (i = 0; i < 4; i++) mi->max_tp_rate[i] = mp->fixed_rate_idx; mi->max_prob_rate = mp->fixed_rate_idx; + mi->sample_mode = MINSTREL_SAMPLE_IDLE; } #endif @@ -731,15 +887,17 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, struct minstrel_ht_sta_priv *msp = priv_sta; struct minstrel_ht_sta *mi = &msp->ht; struct ieee80211_tx_rate *ar = info->status.rates; - struct minstrel_rate_stats *rate, *rate2; + struct minstrel_rate_stats *rate, *rate2, *rate_sample = NULL; struct minstrel_priv *mp = priv; bool last, update = false; + bool sample_status = false; int i; if (!msp->is_ht) return mac80211_minstrel.tx_status_ext(priv, sband, &msp->legacy, st); + /* This packet was aggregated but doesn't carry status info */ if ((info->flags & IEEE80211_TX_CTL_AMPDU) && !(info->flags & IEEE80211_TX_STAT_AMPDU)) @@ -765,12 +923,17 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) mi->sample_packets += info->status.ampdu_len; + if (mi->sample_mode != MINSTREL_SAMPLE_IDLE) + rate_sample = minstrel_get_ratestats(mi, mi->sample_rate); + last = !minstrel_ht_txstat_valid(mp, &ar[0]); for (i = 0; !last; i++) { last = (i == IEEE80211_TX_MAX_RATES - 1) || !minstrel_ht_txstat_valid(mp, &ar[i + 1]); rate = minstrel_ht_get_stats(mp, mi, &ar[i]); + if (rate == rate_sample) + sample_status = true; if (last) rate->success += info->status.ampdu_ack_len; @@ -778,44 +941,60 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, rate->attempts += ar[i].count * info->status.ampdu_len; } - /* - * check for sudden death of spatial multiplexing, - * downgrade to a lower number of streams if necessary. - */ - rate = minstrel_get_ratestats(mi, mi->max_tp_rate[0]); - if (rate->attempts > 30 && - MINSTREL_FRAC(rate->success, rate->attempts) < - MINSTREL_FRAC(20, 100)) { - minstrel_downgrade_rate(mi, &mi->max_tp_rate[0], true); + switch (mi->sample_mode) { + case MINSTREL_SAMPLE_IDLE: + break; + + case MINSTREL_SAMPLE_ACTIVE: + if (!sample_status) + break; + + mi->sample_mode = MINSTREL_SAMPLE_PENDING; update = true; - } + break; + + case MINSTREL_SAMPLE_PENDING: + if (sample_status) + break; - rate2 = minstrel_get_ratestats(mi, mi->max_tp_rate[1]); - if (rate2->attempts > 30 && - MINSTREL_FRAC(rate2->success, rate2->attempts) < - MINSTREL_FRAC(20, 100)) { - minstrel_downgrade_rate(mi, &mi->max_tp_rate[1], false); update = true; + minstrel_ht_update_stats(mp, mi, false); + break; + } + + + if (mp->hw->max_rates > 1) { + /* + * check for sudden death of spatial multiplexing, + * downgrade to a lower number of streams if necessary. + */ + rate = minstrel_get_ratestats(mi, mi->max_tp_rate[0]); + if (rate->attempts > 30 && + MINSTREL_FRAC(rate->success, rate->attempts) < + MINSTREL_FRAC(20, 100)) { + minstrel_downgrade_rate(mi, &mi->max_tp_rate[0], true); + update = true; + } + + rate2 = minstrel_get_ratestats(mi, mi->max_tp_rate[1]); + if (rate2->attempts > 30 && + MINSTREL_FRAC(rate2->success, rate2->attempts) < + MINSTREL_FRAC(20, 100)) { + minstrel_downgrade_rate(mi, &mi->max_tp_rate[1], false); + update = true; + } } if (time_after(jiffies, mi->last_stats_update + (mp->update_interval / 2 * HZ) / 1000)) { update = true; - minstrel_ht_update_stats(mp, mi); + minstrel_ht_update_stats(mp, mi, true); } if (update) minstrel_ht_update_rates(mp, mi); } -static inline int -minstrel_get_duration(int index) -{ - const struct mcs_group *group = &minstrel_mcs_groups[index / MCS_GROUP_RATES]; - unsigned int duration = group->duration[index % MCS_GROUP_RATES]; - return duration << group->shift; -} - static void minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, int index) @@ -980,14 +1159,18 @@ static void minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) { struct ieee80211_sta_rates *rates; + u16 first_rate = mi->max_tp_rate[0]; int i = 0; + if (mi->sample_mode == MINSTREL_SAMPLE_ACTIVE) + first_rate = mi->sample_rate; + rates = kzalloc(sizeof(*rates), GFP_ATOMIC); if (!rates) return; /* Start with max_tp_rate[0] */ - minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_tp_rate[0]); + minstrel_ht_set_rate(mp, mi, rates, i++, first_rate); if (mp->hw->max_rates >= 3) { /* At least 3 tx rates supported, use max_tp_rate[1] next */ @@ -1012,6 +1195,11 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) int tp_rate1, tp_rate2; int sample_idx = 0; + if (mp->hw->max_rates == 1 && mp->sample_switch && + (mi->total_packets_cur >= SAMPLE_SWITCH_THR || + mp->sample_switch == 1)) + return -1; + if (mi->sample_wait > 0) { mi->sample_wait--; return -1; @@ -1059,6 +1247,21 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) minstrel_get_duration(mi->max_prob_rate) * 3 < sample_dur) return -1; + + /* + * For devices with no configurable multi-rate retry, skip sampling + * below the per-group max throughput rate, and only use one sampling + * attempt per rate + */ + if (mp->hw->max_rates == 1 && + (minstrel_get_duration(mg->max_group_tp_rate[0]) < sample_dur || + mrs->attempts)) + return -1; + + /* Skip already sampled slow rates */ + if (sample_dur >= minstrel_get_duration(tp_rate1) && mrs->attempts) + return -1; + /* * Make sure that lower rates get sampled only occasionally, * if the link is working perfectly. @@ -1318,7 +1521,7 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, mi->supported[MINSTREL_CCK_GROUP] |= mi->cck_supported_short << 4; /* create an initial rate table with the lowest supported rates */ - minstrel_ht_update_stats(mp, mi); + minstrel_ht_update_stats(mp, mi, true); minstrel_ht_update_rates(mp, mi); return; @@ -1436,6 +1639,8 @@ minstrel_ht_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) if (!mp) return NULL; + mp->sample_switch = -1; + /* contention window settings * Just an approximation. Using the per-queue values would complicate * the calculations and is probably unnecessary */ @@ -1467,6 +1672,8 @@ minstrel_ht_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) mp->fixed_rate_idx = (u32) -1; debugfs_create_u32("fixed_rate_idx", S_IRUGO | S_IWUGO, debugfsdir, &mp->fixed_rate_idx); + debugfs_create_u32("sample_switch", S_IRUGO | S_IWUSR, debugfsdir, + &mp->sample_switch); #endif minstrel_ht_init_cck_rates(mp); diff --git a/net/mac80211/rc80211_minstrel_ht.h b/net/mac80211/rc80211_minstrel_ht.h index 80296268c778..f938701e7ab7 100644 --- a/net/mac80211/rc80211_minstrel_ht.h +++ b/net/mac80211/rc80211_minstrel_ht.h @@ -33,6 +33,7 @@ struct mcs_group { u16 flags; u8 streams; u8 shift; + u8 bw; u16 duration[MCS_GROUP_RATES]; }; @@ -50,6 +51,12 @@ struct minstrel_mcs_group_data { struct minstrel_rate_stats rates[MCS_GROUP_RATES]; }; +enum minstrel_sample_mode { + MINSTREL_SAMPLE_IDLE, + MINSTREL_SAMPLE_ACTIVE, + MINSTREL_SAMPLE_PENDING, +}; + struct minstrel_ht_sta { struct ieee80211_sta *sta; @@ -71,6 +78,8 @@ struct minstrel_ht_sta { unsigned int overhead; unsigned int overhead_rtscts; + unsigned int total_packets_last; + unsigned int total_packets_cur; unsigned int total_packets; unsigned int sample_packets; @@ -82,6 +91,9 @@ struct minstrel_ht_sta { u8 sample_count; u8 sample_slow; + enum minstrel_sample_mode sample_mode; + u16 sample_rate; + /* current MCS group to be sampled */ u8 sample_group; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 5fb368cc2633..bd11fef2139f 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1065,7 +1065,6 @@ static void __sta_info_destroy_part2(struct sta_info *sta) cfg80211_del_sta_sinfo(sdata->dev, sta->sta.addr, sinfo, GFP_KERNEL); kfree(sinfo); - rate_control_remove_sta_debugfs(sta); ieee80211_sta_debugfs_remove(sta); cleanup_single_sta(sta); @@ -1962,6 +1961,7 @@ int sta_info_move_state(struct sta_info *sta, case IEEE80211_STA_ASSOC: if (sta->sta_state == IEEE80211_STA_AUTH) { set_bit(WLAN_STA_ASSOC, &sta->_flags); + sta->assoc_at = ktime_get_boottime_ns(); ieee80211_recalc_min_chandef(sta->sdata); if (!sta->sta.support_p2p_ps) ieee80211_recalc_p2p_go_ps_allowed(sta->sdata); @@ -2195,6 +2195,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, BIT_ULL(NL80211_STA_INFO_STA_FLAGS) | BIT_ULL(NL80211_STA_INFO_BSS_PARAM) | BIT_ULL(NL80211_STA_INFO_CONNECTED_TIME) | + BIT_ULL(NL80211_STA_INFO_ASSOC_AT_BOOTTIME) | BIT_ULL(NL80211_STA_INFO_RX_DROP_MISC); if (sdata->vif.type == NL80211_IFTYPE_STATION) { @@ -2203,6 +2204,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, } sinfo->connected_time = ktime_get_seconds() - sta->last_connected; + sinfo->assoc_at = sta->assoc_at; sinfo->inactive_time = jiffies_to_msecs(jiffies - ieee80211_sta_last_active(sta)); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 3260d4234920..369c2dddce52 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -466,6 +466,7 @@ struct ieee80211_sta_rx_stats { * the station when it leaves powersave or polls for frames * @driver_buffered_tids: bitmap of TIDs the driver has data buffered on * @txq_buffered_tids: bitmap of TIDs that mac80211 has txq data buffered on + * @assoc_at: clock boottime (in ns) of last association * @last_connected: time (in seconds) when a station got connected * @last_seq_ctrl: last received seq/frag number from this STA (per TID * plus one for non-QoS frames) @@ -562,6 +563,7 @@ struct sta_info { unsigned long driver_buffered_tids; unsigned long txq_buffered_tids; + u64 assoc_at; long last_connected; /* Updated from RX path only, no locking requirements */ diff --git a/net/mac80211/status.c b/net/mac80211/status.c index a88e3bf17e9d..ab8ba5835ca0 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -254,14 +254,22 @@ static void ieee80211_set_bar_pending(struct sta_info *sta, u8 tid, u16 ssn) tid_tx->bar_pending = true; } -static int ieee80211_tx_radiotap_len(struct ieee80211_tx_info *info) +static int ieee80211_tx_radiotap_len(struct ieee80211_tx_info *info, + struct ieee80211_tx_status *status) { int len = sizeof(struct ieee80211_radiotap_header); /* IEEE80211_RADIOTAP_RATE rate */ - if (info->status.rates[0].idx >= 0 && - !(info->status.rates[0].flags & (IEEE80211_TX_RC_MCS | - IEEE80211_TX_RC_VHT_MCS))) + if (status && status->rate && !(status->rate->flags & + (RATE_INFO_FLAGS_MCS | + RATE_INFO_FLAGS_DMG | + RATE_INFO_FLAGS_EDMG | + RATE_INFO_FLAGS_VHT_MCS | + RATE_INFO_FLAGS_HE_MCS))) + len += 2; + else if (info->status.rates[0].idx >= 0 && + !(info->status.rates[0].flags & + (IEEE80211_TX_RC_MCS | IEEE80211_TX_RC_VHT_MCS))) len += 2; /* IEEE80211_RADIOTAP_TX_FLAGS */ @@ -272,7 +280,14 @@ static int ieee80211_tx_radiotap_len(struct ieee80211_tx_info *info) /* IEEE80211_RADIOTAP_MCS * IEEE80211_RADIOTAP_VHT */ - if (info->status.rates[0].idx >= 0) { + if (status && status->rate) { + if (status->rate->flags & RATE_INFO_FLAGS_MCS) + len += 3; + else if (status->rate->flags & RATE_INFO_FLAGS_VHT_MCS) + len = ALIGN(len, 2) + 12; + else if (status->rate->flags & RATE_INFO_FLAGS_HE_MCS) + len = ALIGN(len, 2) + 12; + } else if (info->status.rates[0].idx >= 0) { if (info->status.rates[0].flags & IEEE80211_TX_RC_MCS) len += 3; else if (info->status.rates[0].flags & IEEE80211_TX_RC_VHT_MCS) @@ -286,12 +301,14 @@ static void ieee80211_add_tx_radiotap_header(struct ieee80211_local *local, struct ieee80211_supported_band *sband, struct sk_buff *skb, int retry_count, - int rtap_len, int shift) + int rtap_len, int shift, + struct ieee80211_tx_status *status) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct ieee80211_radiotap_header *rthdr; unsigned char *pos; + u16 legacy_rate = 0; u16 txflags; rthdr = skb_push(skb, rtap_len); @@ -310,14 +327,23 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local, */ /* IEEE80211_RADIOTAP_RATE */ - if (info->status.rates[0].idx >= 0 && - !(info->status.rates[0].flags & (IEEE80211_TX_RC_MCS | - IEEE80211_TX_RC_VHT_MCS))) { - u16 rate; + if (status && status->rate) { + if (!(status->rate->flags & (RATE_INFO_FLAGS_MCS | + RATE_INFO_FLAGS_DMG | + RATE_INFO_FLAGS_EDMG | + RATE_INFO_FLAGS_VHT_MCS | + RATE_INFO_FLAGS_HE_MCS))) + legacy_rate = status->rate->legacy; + } else if (info->status.rates[0].idx >= 0 && + !(info->status.rates[0].flags & (IEEE80211_TX_RC_MCS | + IEEE80211_TX_RC_VHT_MCS))) + legacy_rate = + sband->bitrates[info->status.rates[0].idx].bitrate; + + if (legacy_rate) { rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_RATE); - rate = sband->bitrates[info->status.rates[0].idx].bitrate; - *pos = DIV_ROUND_UP(rate, 5 * (1 << shift)); + *pos = DIV_ROUND_UP(legacy_rate, 5 * (1 << shift)); /* padding for tx flags */ pos += 2; } @@ -341,7 +367,140 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local, *pos = retry_count; pos++; - if (info->status.rates[0].idx < 0) + if (status && status->rate && + (status->rate->flags & RATE_INFO_FLAGS_MCS)) { + rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_MCS); + pos[0] = IEEE80211_RADIOTAP_MCS_HAVE_MCS | + IEEE80211_RADIOTAP_MCS_HAVE_GI | + IEEE80211_RADIOTAP_MCS_HAVE_BW; + if (status->rate->flags & RATE_INFO_FLAGS_SHORT_GI) + pos[1] |= IEEE80211_RADIOTAP_MCS_SGI; + if (status->rate->bw == RATE_INFO_BW_40) + pos[1] |= IEEE80211_RADIOTAP_MCS_BW_40; + pos[2] = status->rate->mcs; + pos += 3; + } else if (status && status->rate && + (status->rate->flags & RATE_INFO_FLAGS_VHT_MCS)) { + u16 known = local->hw.radiotap_vht_details & + (IEEE80211_RADIOTAP_VHT_KNOWN_GI | + IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH); + + rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_VHT); + + /* required alignment from rthdr */ + pos = (u8 *)rthdr + ALIGN(pos - (u8 *)rthdr, 2); + + /* u16 known - IEEE80211_RADIOTAP_VHT_KNOWN_* */ + put_unaligned_le16(known, pos); + pos += 2; + + /* u8 flags - IEEE80211_RADIOTAP_VHT_FLAG_* */ + if (status->rate->flags & RATE_INFO_FLAGS_SHORT_GI) + *pos |= IEEE80211_RADIOTAP_VHT_FLAG_SGI; + pos++; + + /* u8 bandwidth */ + switch (status->rate->bw) { + case RATE_INFO_BW_160: + *pos = 11; + break; + case RATE_INFO_BW_80: + *pos = 4; + break; + case RATE_INFO_BW_40: + *pos = 1; + break; + default: + *pos = 0; + break; + } + pos++; + + /* u8 mcs_nss[4] */ + *pos = (status->rate->mcs << 4) | status->rate->nss; + pos += 4; + + /* u8 coding */ + pos++; + /* u8 group_id */ + pos++; + /* u16 partial_aid */ + pos += 2; + } else if (status && status->rate && + (status->rate->flags & RATE_INFO_FLAGS_HE_MCS)) { + struct ieee80211_radiotap_he *he; + + rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_HE); + + /* required alignment from rthdr */ + pos = (u8 *)rthdr + ALIGN(pos - (u8 *)rthdr, 2); + he = (struct ieee80211_radiotap_he *)pos; + + he->data1 = cpu_to_le16(IEEE80211_RADIOTAP_HE_DATA1_FORMAT_SU | + IEEE80211_RADIOTAP_HE_DATA1_DATA_MCS_KNOWN | + IEEE80211_RADIOTAP_HE_DATA1_DATA_DCM_KNOWN | + IEEE80211_RADIOTAP_HE_DATA1_BW_RU_ALLOC_KNOWN); + + he->data2 = cpu_to_le16(IEEE80211_RADIOTAP_HE_DATA2_GI_KNOWN); + +#define HE_PREP(f, val) le16_encode_bits(val, IEEE80211_RADIOTAP_HE_##f) + + he->data6 |= HE_PREP(DATA6_NSTS, status->rate->nss); + +#define CHECK_GI(s) \ + BUILD_BUG_ON(IEEE80211_RADIOTAP_HE_DATA5_GI_##s != \ + (int)NL80211_RATE_INFO_HE_GI_##s) + + CHECK_GI(0_8); + CHECK_GI(1_6); + CHECK_GI(3_2); + + he->data3 |= HE_PREP(DATA3_DATA_MCS, status->rate->mcs); + he->data3 |= HE_PREP(DATA3_DATA_DCM, status->rate->he_dcm); + + he->data5 |= HE_PREP(DATA5_GI, status->rate->he_gi); + + switch (status->rate->bw) { + case RATE_INFO_BW_20: + he->data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC, + IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_20MHZ); + break; + case RATE_INFO_BW_40: + he->data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC, + IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_40MHZ); + break; + case RATE_INFO_BW_80: + he->data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC, + IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_80MHZ); + break; + case RATE_INFO_BW_160: + he->data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC, + IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_160MHZ); + break; + case RATE_INFO_BW_HE_RU: +#define CHECK_RU_ALLOC(s) \ + BUILD_BUG_ON(IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_##s##T != \ + NL80211_RATE_INFO_HE_RU_ALLOC_##s + 4) + + CHECK_RU_ALLOC(26); + CHECK_RU_ALLOC(52); + CHECK_RU_ALLOC(106); + CHECK_RU_ALLOC(242); + CHECK_RU_ALLOC(484); + CHECK_RU_ALLOC(996); + CHECK_RU_ALLOC(2x996); + + he->data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC, + status->rate->he_ru_alloc + 4); + break; + default: + WARN_ONCE(1, "Invalid SU BW %d\n", status->rate->bw); + } + + pos += sizeof(struct ieee80211_radiotap_he); + } + + if ((status && status->rate) || info->status.rates[0].idx < 0) return; /* IEEE80211_RADIOTAP_MCS @@ -645,7 +804,8 @@ static int ieee80211_tx_get_rates(struct ieee80211_hw *hw, void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb, struct ieee80211_supported_band *sband, - int retry_count, int shift, bool send_to_cooked) + int retry_count, int shift, bool send_to_cooked, + struct ieee80211_tx_status *status) { struct sk_buff *skb2; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); @@ -654,14 +814,14 @@ void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb, int rtap_len; /* send frame to monitor interfaces now */ - rtap_len = ieee80211_tx_radiotap_len(info); + rtap_len = ieee80211_tx_radiotap_len(info, status); if (WARN_ON_ONCE(skb_headroom(skb) < rtap_len)) { pr_err("ieee80211_tx_status: headroom too small\n"); dev_kfree_skb(skb); return; } ieee80211_add_tx_radiotap_header(local, sband, skb, retry_count, - rtap_len, shift); + rtap_len, shift, status); /* XXX: is this sufficient for BPF? */ skb_reset_mac_header(skb); @@ -901,7 +1061,8 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, } /* send to monitor interfaces */ - ieee80211_tx_monitor(local, skb, sband, retry_count, shift, send_to_cooked); + ieee80211_tx_monitor(local, skb, sband, retry_count, shift, + send_to_cooked, status); } void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 3bb4459b52c7..4768322dc202 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -1242,9 +1242,10 @@ TRACE_EVENT(drv_remain_on_channel, ) ); -DEFINE_EVENT(local_only_evt, drv_cancel_remain_on_channel, - TP_PROTO(struct ieee80211_local *local), - TP_ARGS(local) +DEFINE_EVENT(local_sdata_evt, drv_cancel_remain_on_channel, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata), + TP_ARGS(local, sdata) ); TRACE_EVENT(drv_set_ringparam, diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index f13eb2f61ccf..1fa422782905 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -162,6 +162,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, break; } case NL80211_BAND_5GHZ: + case NL80211_BAND_6GHZ: if (r->flags & IEEE80211_RATE_MANDATORY_A) mrate = r->bitrate; break; @@ -3546,6 +3547,8 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, ieee80211_tx_result r; struct ieee80211_vif *vif = txq->vif; + WARN_ON_ONCE(softirq_count() == 0); + begin: spin_lock_bh(&fq->lock); @@ -4647,7 +4650,8 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, if (!sband) return bcn; - ieee80211_tx_monitor(hw_to_local(hw), copy, sband, 1, shift, false); + ieee80211_tx_monitor(hw_to_local(hw), copy, sband, 1, shift, false, + NULL); return bcn; } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index ad1e58184c4e..051a02ddcb85 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1200,6 +1200,13 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, elems->cisco_dtpc_elem = pos; break; + case WLAN_EID_ADDBA_EXT: + if (elen != sizeof(struct ieee80211_addba_ext_ie)) { + elem_parse_failed = true; + break; + } + elems->addba_ext_ie = (void *)pos; + break; case WLAN_EID_TIMEOUT_INTERVAL: if (elen >= sizeof(struct ieee80211_timeout_interval_ie)) elems->timeout_int = (void *)pos; @@ -1233,6 +1240,10 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION && elen == 3) { elems->mbssid_config_ie = (void *)&pos[1]; + } else if (pos[0] == WLAN_EID_EXT_HE_SPR && + elen >= sizeof(*elems->he_spr) && + elen >= ieee80211_he_spr_size(&pos[1])) { + elems->he_spr = (void *)&pos[1]; } break; default: @@ -1572,7 +1583,8 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, } void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, - const u8 *bssid, u16 stype, u16 reason, + const u8 *da, const u8 *bssid, + u16 stype, u16 reason, bool send_frame, u8 *frame_buf) { struct ieee80211_local *local = sdata->local; @@ -1583,7 +1595,7 @@ void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | stype); mgmt->duration = 0; /* initialize only */ mgmt->seq_ctrl = 0; /* initialize only */ - memcpy(mgmt->da, bssid, ETH_ALEN); + memcpy(mgmt->da, da, ETH_ALEN); memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); memcpy(mgmt->bssid, bssid, ETH_ALEN); /* u.deauth.reason_code == u.disassoc.reason_code */ @@ -2409,11 +2421,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) /* add back keys */ list_for_each_entry(sdata, &local->interfaces, list) - ieee80211_reset_crypto_tx_tailroom(sdata); - - list_for_each_entry(sdata, &local->interfaces, list) - if (ieee80211_sdata_running(sdata)) - ieee80211_enable_keys(sdata); + ieee80211_reenable_keys(sdata); /* Reconfigure sched scan if it was interrupted by FW restart */ mutex_lock(&local->mtx); @@ -2702,6 +2710,27 @@ u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, return pos; } +u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata, u8 iftype) +{ + const struct ieee80211_sta_he_cap *he_cap; + struct ieee80211_supported_band *sband; + u8 n; + + sband = ieee80211_get_sband(sdata); + if (!sband) + return 0; + + he_cap = ieee80211_get_he_iftype_cap(sband, iftype); + if (!he_cap) + return 0; + + n = ieee80211_he_mcs_nss_size(&he_cap->he_cap_elem); + return 2 + 1 + + sizeof(he_cap->he_cap_elem) + n + + ieee80211_he_ppe_size(he_cap->ppe_thres[0], + he_cap->he_cap_elem.phy_cap_info); +} + u8 *ieee80211_ie_build_he_cap(u8 *pos, const struct ieee80211_sta_he_cap *he_cap, u8 *end) @@ -2891,6 +2920,34 @@ u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, return pos + sizeof(struct ieee80211_vht_operation); } +u8 *ieee80211_ie_build_he_oper(u8 *pos) +{ + struct ieee80211_he_operation *he_oper; + u32 he_oper_params; + + *pos++ = WLAN_EID_EXTENSION; + *pos++ = 1 + sizeof(struct ieee80211_he_operation); + *pos++ = WLAN_EID_EXT_HE_OPERATION; + + he_oper_params = 0; + he_oper_params |= u32_encode_bits(1023, /* disabled */ + IEEE80211_HE_OPERATION_RTS_THRESHOLD_MASK); + he_oper_params |= u32_encode_bits(1, + IEEE80211_HE_OPERATION_ER_SU_DISABLE); + he_oper_params |= u32_encode_bits(1, + IEEE80211_HE_OPERATION_BSS_COLOR_DISABLED); + + he_oper = (struct ieee80211_he_operation *)pos; + he_oper->he_oper_params = cpu_to_le32(he_oper_params); + + /* don't require special HE peer rates */ + he_oper->he_mcs_nss_set = cpu_to_le16(0xffff); + + /* TODO add VHT operational and 6GHz operational subelement? */ + + return pos + sizeof(struct ieee80211_vht_operation); +} + bool ieee80211_chandef_ht_oper(const struct ieee80211_ht_operation *ht_oper, struct cfg80211_chan_def *chandef) { diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index b20ff28d9f30..ccdcb9ad9ac7 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -4,7 +4,7 @@ * * Portions of this file * Copyright(c) 2015 - 2016 Intel Deutschland GmbH - * Copyright (C) 2018 Intel Corporation + * Copyright (C) 2018 - 2019 Intel Corporation */ #include <linux/ieee80211.h> @@ -349,6 +349,14 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct sta_info *sta) cap_width == IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ) return IEEE80211_STA_RX_BW_160; + /* + * If this is non-zero, then it does support 160 MHz after all, + * in one form or the other. We don't distinguish here (or even + * above) between 160 and 80+80 yet. + */ + if (vht_cap->cap & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK) + return IEEE80211_STA_RX_BW_160; + return IEEE80211_STA_RX_BW_80; } diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index ee72779729e5..91bf32af55e9 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -946,7 +946,8 @@ ieee80211_crypto_aes_cmac_encrypt(struct ieee80211_tx_data *tx) info = IEEE80211_SKB_CB(skb); - if (info->control.hw_key) + if (info->control.hw_key && + !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIE)) return TX_CONTINUE; if (WARN_ON(skb_tailroom(skb) < sizeof(*mmie))) @@ -962,6 +963,9 @@ ieee80211_crypto_aes_cmac_encrypt(struct ieee80211_tx_data *tx) bip_ipn_set64(mmie->sequence_number, pn64); + if (info->control.hw_key) + return TX_CONTINUE; + bip_aad(skb, aad); /* diff --git a/net/ncsi/ncsi-pkt.h b/net/ncsi/ncsi-pkt.h index a8e9def593f2..80938b338fee 100644 --- a/net/ncsi/ncsi-pkt.h +++ b/net/ncsi/ncsi-pkt.h @@ -387,6 +387,9 @@ struct ncsi_aen_hncdsc_pkt { #define NCSI_PKT_CMD_OEM 0x50 /* OEM */ #define NCSI_PKT_CMD_PLDM 0x51 /* PLDM request over NCSI over RBT */ #define NCSI_PKT_CMD_GPUUID 0x52 /* Get package UUID */ +#define NCSI_PKT_CMD_QPNPR 0x56 /* Query Pending NC PLDM request */ +#define NCSI_PKT_CMD_SNPR 0x57 /* Send NC PLDM Reply */ + /* NCSI packet responses */ #define NCSI_PKT_RSP_CIS (NCSI_PKT_CMD_CIS + 0x80) @@ -419,6 +422,8 @@ struct ncsi_aen_hncdsc_pkt { #define NCSI_PKT_RSP_OEM (NCSI_PKT_CMD_OEM + 0x80) #define NCSI_PKT_RSP_PLDM (NCSI_PKT_CMD_PLDM + 0x80) #define NCSI_PKT_RSP_GPUUID (NCSI_PKT_CMD_GPUUID + 0x80) +#define NCSI_PKT_RSP_QPNPR (NCSI_PKT_CMD_QPNPR + 0x80) +#define NCSI_PKT_RSP_SNPR (NCSI_PKT_CMD_SNPR + 0x80) /* NCSI response code/reason */ #define NCSI_PKT_RSP_C_COMPLETED 0x0000 /* Command Completed */ diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index d876bd55f356..d5611f04926d 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -1038,6 +1038,11 @@ static int ncsi_rsp_handler_gpuuid(struct ncsi_request *nr) return 0; } +static int ncsi_rsp_handler_pldm(struct ncsi_request *nr) +{ + return 0; +} + static int ncsi_rsp_handler_netlink(struct ncsi_request *nr) { struct ncsi_dev_priv *ndp = nr->ndp; @@ -1086,13 +1091,15 @@ static struct ncsi_rsp_handler { { NCSI_PKT_RSP_GVI, 40, ncsi_rsp_handler_gvi }, { NCSI_PKT_RSP_GC, 32, ncsi_rsp_handler_gc }, { NCSI_PKT_RSP_GP, -1, ncsi_rsp_handler_gp }, - { NCSI_PKT_RSP_GCPS, 172, ncsi_rsp_handler_gcps }, - { NCSI_PKT_RSP_GNS, 172, ncsi_rsp_handler_gns }, - { NCSI_PKT_RSP_GNPTS, 172, ncsi_rsp_handler_gnpts }, + { NCSI_PKT_RSP_GCPS, 204, ncsi_rsp_handler_gcps }, + { NCSI_PKT_RSP_GNS, 32, ncsi_rsp_handler_gns }, + { NCSI_PKT_RSP_GNPTS, 48, ncsi_rsp_handler_gnpts }, { NCSI_PKT_RSP_GPS, 8, ncsi_rsp_handler_gps }, { NCSI_PKT_RSP_OEM, -1, ncsi_rsp_handler_oem }, - { NCSI_PKT_RSP_PLDM, 0, NULL }, - { NCSI_PKT_RSP_GPUUID, 20, ncsi_rsp_handler_gpuuid } + { NCSI_PKT_RSP_PLDM, -1, ncsi_rsp_handler_pldm }, + { NCSI_PKT_RSP_GPUUID, 20, ncsi_rsp_handler_gpuuid }, + { NCSI_PKT_RSP_QPNPR, -1, ncsi_rsp_handler_pldm }, + { NCSI_PKT_RSP_SNPR, -1, ncsi_rsp_handler_pldm } }; int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev, diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 0d65f4d39494..34ec7afec116 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -20,7 +20,7 @@ config NETFILTER_FAMILY_ARP bool config NETFILTER_NETLINK_ACCT -tristate "Netfilter NFACCT over NFNETLINK interface" + tristate "Netfilter NFACCT over NFNETLINK interface" depends on NETFILTER_ADVANCED select NETFILTER_NETLINK help @@ -34,7 +34,7 @@ config NETFILTER_NETLINK_QUEUE help If this option is enabled, the kernel will include support for queueing packets via NFNETLINK. - + config NETFILTER_NETLINK_LOG tristate "Netfilter LOG over NFNETLINK interface" default m if NETFILTER_ADVANCED=n @@ -1502,7 +1502,7 @@ config NETFILTER_XT_MATCH_REALM This option adds a `realm' match, which allows you to use the realm key from the routing subsystem inside iptables. - This match pretty much resembles the CONFIG_NET_CLS_ROUTE4 option + This match pretty much resembles the CONFIG_NET_CLS_ROUTE4 option in tc world. If you want to compile it as a module, say M here and read @@ -1523,7 +1523,7 @@ config NETFILTER_XT_MATCH_SCTP depends on NETFILTER_ADVANCED default IP_SCTP help - With this option enabled, you will be able to use the + With this option enabled, you will be able to use the `sctp' match in order to match on SCTP source/destination ports and SCTP chunk types. diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 9270a7fae484..4fc075b612fe 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -124,7 +124,7 @@ nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o -# generic X tables +# generic X tables obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o # combos diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 0feb77fa9edc..d098d87bc331 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -7,7 +7,7 @@ #include <linux/rcupdate.h> #include <linux/jhash.h> #include <linux/types.h> -#include <linux/netfilter/ipset/ip_set_timeout.h> +#include <linux/netfilter/ipset/ip_set.h> #define __ipset_dereference_protected(p, c) rcu_dereference_protected(p, c) #define ipset_dereference_protected(p, set) \ @@ -953,7 +953,7 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0])); #endif key = HKEY(d, h->initval, t->htable_bits); - n = rcu_dereference_bh(hbucket(t, key)); + n = rcu_dereference_bh(hbucket(t, key)); if (!n) continue; for (i = 0; i < n->pos; i++) { diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 6f9ead6319e0..67ac50104e6f 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -288,7 +288,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (n && !(SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(n, set)))) - n = NULL; + n = NULL; e = kzalloc(set->dsize, GFP_ATOMIC); if (!e) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 46f06f92ab8f..8b80ab794a92 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -617,7 +617,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && iph->protocol == IPPROTO_UDP) ? IP_VS_CONN_F_ONE_PACKET : 0; - union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } }; + union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } }; /* create a new connection entry */ IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 060565e7d227..8b48e7ce1c2c 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -262,7 +262,7 @@ static inline unsigned int ip_vs_svc_hashkey(struct netns_ipvs *ipvs, int af, unsigned int proto, const union nf_inet_addr *addr, __be16 port) { - register unsigned int porth = ntohs(port); + unsigned int porth = ntohs(port); __be32 addr_fold = addr->ip; __u32 ahash; @@ -493,7 +493,7 @@ static inline unsigned int ip_vs_rs_hashkey(int af, const union nf_inet_addr *addr, __be16 port) { - register unsigned int porth = ntohs(port); + unsigned int porth = ntohs(port); __be32 addr_fold = addr->ip; #ifdef CONFIG_IP_VS_IPV6 @@ -1737,12 +1737,18 @@ proc_do_defense_mode(struct ctl_table *table, int write, int val = *valp; int rc; - rc = proc_dointvec(table, write, buffer, lenp, ppos); + struct ctl_table tmp = { + .data = &val, + .maxlen = sizeof(int), + .mode = table->mode, + }; + + rc = proc_dointvec(&tmp, write, buffer, lenp, ppos); if (write && (*valp != val)) { - if ((*valp < 0) || (*valp > 3)) { - /* Restore the correct value */ - *valp = val; + if (val < 0 || val > 3) { + rc = -EINVAL; } else { + *valp = val; update_defense_level(ipvs); } } @@ -1756,33 +1762,20 @@ proc_do_sync_threshold(struct ctl_table *table, int write, int *valp = table->data; int val[2]; int rc; + struct ctl_table tmp = { + .data = &val, + .maxlen = table->maxlen, + .mode = table->mode, + }; - /* backup the value first */ memcpy(val, valp, sizeof(val)); - - rc = proc_dointvec(table, write, buffer, lenp, ppos); - if (write && (valp[0] < 0 || valp[1] < 0 || - (valp[0] >= valp[1] && valp[1]))) { - /* Restore the correct value */ - memcpy(valp, val, sizeof(val)); - } - return rc; -} - -static int -proc_do_sync_mode(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - int *valp = table->data; - int val = *valp; - int rc; - - rc = proc_dointvec(table, write, buffer, lenp, ppos); - if (write && (*valp != val)) { - if ((*valp < 0) || (*valp > 1)) { - /* Restore the correct value */ - *valp = val; - } + rc = proc_dointvec(&tmp, write, buffer, lenp, ppos); + if (write) { + if (val[0] < 0 || val[1] < 0 || + (val[0] >= val[1] && val[1])) + rc = -EINVAL; + else + memcpy(valp, val, sizeof(val)); } return rc; } @@ -1795,12 +1788,18 @@ proc_do_sync_ports(struct ctl_table *table, int write, int val = *valp; int rc; - rc = proc_dointvec(table, write, buffer, lenp, ppos); + struct ctl_table tmp = { + .data = &val, + .maxlen = sizeof(int), + .mode = table->mode, + }; + + rc = proc_dointvec(&tmp, write, buffer, lenp, ppos); if (write && (*valp != val)) { - if (*valp < 1 || !is_power_of_2(*valp)) { - /* Restore the correct value */ + if (val < 1 || !is_power_of_2(val)) + rc = -EINVAL; + else *valp = val; - } } return rc; } @@ -1860,7 +1859,9 @@ static struct ctl_table vs_vars[] = { .procname = "sync_version", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_do_sync_mode, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { .procname = "sync_ports", diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index c8b5a504476c..77c323c36a88 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -160,7 +160,7 @@ static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set) /* get weighted least-connection node in the destination set */ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) { - register struct ip_vs_dest_set_elem *e; + struct ip_vs_dest_set_elem *e; struct ip_vs_dest *dest, *least; int loh, doh; @@ -209,7 +209,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) /* get weighted most-connection node in the destination set */ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) { - register struct ip_vs_dest_set_elem *e; + struct ip_vs_dest_set_elem *e; struct ip_vs_dest *dest, *most; int moh, doh; diff --git a/net/netfilter/ipvs/ip_vs_mh.c b/net/netfilter/ipvs/ip_vs_mh.c index 94d9d349ebb0..da0280cec506 100644 --- a/net/netfilter/ipvs/ip_vs_mh.c +++ b/net/netfilter/ipvs/ip_vs_mh.c @@ -174,8 +174,8 @@ static int ip_vs_mh_populate(struct ip_vs_mh_state *s, return 0; } - table = kcalloc(BITS_TO_LONGS(IP_VS_MH_TAB_SIZE), - sizeof(unsigned long), GFP_KERNEL); + table = kcalloc(BITS_TO_LONGS(IP_VS_MH_TAB_SIZE), + sizeof(unsigned long), GFP_KERNEL); if (!table) return -ENOMEM; diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 000d961b97e4..32b028853a7c 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -710,7 +710,7 @@ static int __ip_vs_tcp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd sizeof(tcp_timeouts)); if (!pd->timeout_table) return -ENOMEM; - pd->tcp_state_table = tcp_states; + pd->tcp_state_table = tcp_states; return 0; } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 81a8ef42b88d..0c63120b2db2 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -73,8 +73,7 @@ struct conntrack_gc_work { }; static __read_mostly struct kmem_cache *nf_conntrack_cachep; -static __read_mostly spinlock_t nf_conntrack_locks_all_lock; -static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock); +static DEFINE_SPINLOCK(nf_conntrack_locks_all_lock); static __read_mostly bool nf_conntrack_locks_all; /* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */ diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 5e2812ee2149..6fba74b5aaf7 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -24,6 +24,7 @@ #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_extend.h> static DEFINE_MUTEX(nf_ct_ecache_mutex); diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 65364de915d1..42557d2b6a90 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -25,8 +25,10 @@ #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_expect.h> #include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_tuple.h> #include <net/netfilter/nf_conntrack_zones.h> diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 8d96738b7dfd..9eca90414bb7 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -162,7 +162,7 @@ static int try_rfc959(const char *data, size_t dlen, if (length == 0) return 0; - cmd->u3.ip = htonl((array[0] << 24) | (array[1] << 16) | + cmd->u3.ip = htonl((array[0] << 24) | (array[1] << 16) | (array[2] << 8) | array[3]); cmd->u.tcp.port = htons((array[4] << 8) | array[5]); return length; diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 8d729e7c36ff..118f415928ae 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -21,10 +21,11 @@ #include <linux/rtnetlink.h> #include <net/netfilter/nf_conntrack.h> -#include <net/netfilter/nf_conntrack_l4proto.h> -#include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_extend.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_log.h> static DEFINE_MUTEX(nf_ct_helper_mutex); diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c index 74b8113f7aeb..522792556632 100644 --- a/net/netfilter/nf_conntrack_labels.c +++ b/net/netfilter/nf_conntrack_labels.c @@ -11,7 +11,7 @@ #include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_labels.h> -static spinlock_t nf_connlabels_lock; +static DEFINE_SPINLOCK(nf_connlabels_lock); static int replace_u32(u32 *address, u32 mask, u32 new) { @@ -89,7 +89,6 @@ int nf_conntrack_labels_init(void) { BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE / sizeof(long) >= U8_MAX); - spin_lock_init(&nf_connlabels_lock); return nf_ct_extend_register(&labels_extend); } diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c index 7e317e6698ba..6f9144e1f1c1 100644 --- a/net/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/netfilter/nf_conntrack_proto_icmpv6.c @@ -22,7 +22,6 @@ #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_timeout.h> #include <net/netfilter/nf_conntrack_zones.h> -#include <net/netfilter/ipv6/nf_conntrack_icmpv6.h> #include <net/netfilter/nf_log.h> static const unsigned int nf_ct_icmpv6_timeout = 30*HZ; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 85c1f8c213b0..1926fd56df56 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1227,7 +1227,7 @@ static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = { [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 }, [CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NLA_U8 }, [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .len = sizeof(struct nf_ct_tcp_flags) }, - [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) }, + [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) }, }; #define TCP_NLATTR_SIZE ( \ diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 0006503d2da9..410809c669e1 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -511,8 +511,6 @@ static void nf_conntrack_standalone_fini_proc(struct net *net) /* Log invalid packets of a given protocol */ static int log_invalid_proto_min __read_mostly; static int log_invalid_proto_max __read_mostly = 255; -static int zero; -static int one = 1; /* size the user *wants to set */ static unsigned int nf_conntrack_htable_size_user __read_mostly; @@ -629,8 +627,8 @@ static struct ctl_table nf_ct_sysctl_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, [NF_SYSCTL_CT_LOG_INVALID] = { .procname = "nf_conntrack_log_invalid", @@ -654,8 +652,8 @@ static struct ctl_table nf_ct_sysctl_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, [NF_SYSCTL_CT_HELPER] = { .procname = "nf_conntrack_helper", @@ -663,8 +661,8 @@ static struct ctl_table nf_ct_sysctl_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, #ifdef CONFIG_NF_CONNTRACK_EVENTS [NF_SYSCTL_CT_EVENTS] = { @@ -673,8 +671,8 @@ static struct ctl_table nf_ct_sysctl_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, #endif #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP @@ -684,8 +682,8 @@ static struct ctl_table nf_ct_sysctl_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, #endif [NF_SYSCTL_CT_PROTO_TIMEOUT_GENERIC] = { @@ -759,16 +757,16 @@ static struct ctl_table nf_ct_sysctl_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, [NF_SYSCTL_CT_PROTO_TCP_LIBERAL] = { .procname = "nf_conntrack_tcp_be_liberal", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, [NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS] = { .procname = "nf_conntrack_tcp_max_retrans", @@ -904,8 +902,8 @@ static struct ctl_table nf_ct_sysctl_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, #endif #ifdef CONFIG_NF_CT_PROTO_GRE @@ -1169,7 +1167,6 @@ static int __init nf_conntrack_standalone_init(void) if (ret < 0) goto out_start; - BUILD_BUG_ON(SKB_NFCT_PTRMASK != NFCT_PTRMASK); BUILD_BUG_ON(NFCT_INFOMASK <= IP_CT_NUMBER); #ifdef CONFIG_SYSCTL diff --git a/net/netfilter/nf_conntrack_timeout.c b/net/netfilter/nf_conntrack_timeout.c index 13d0f4a92647..14387e0b8008 100644 --- a/net/netfilter/nf_conntrack_timeout.c +++ b/net/netfilter/nf_conntrack_timeout.c @@ -19,6 +19,7 @@ #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_extend.h> +#include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_timeout.h> struct nf_ct_timeout * diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c index 5a35ef08c3cb..f108a76925dd 100644 --- a/net/netfilter/nf_dup_netdev.c +++ b/net/netfilter/nf_dup_netdev.c @@ -10,6 +10,7 @@ #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_offload.h> #include <net/netfilter/nf_dup_netdev.h> static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev) @@ -50,5 +51,25 @@ void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif) } EXPORT_SYMBOL_GPL(nf_dup_netdev_egress); +int nft_fwd_dup_netdev_offload(struct nft_offload_ctx *ctx, + struct nft_flow_rule *flow, + enum flow_action_id id, int oif) +{ + struct flow_action_entry *entry; + struct net_device *dev; + + /* nft_flow_rule_destroy() releases the reference on this device. */ + dev = dev_get_by_index(ctx->net, oif); + if (!dev) + return -EOPNOTSUPP; + + entry = &flow->rule->action.entries[ctx->num_actions++]; + entry->id = id; + entry->dev = dev; + + return 0; +} +EXPORT_SYMBOL_GPL(nft_fwd_dup_netdev_offload); + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index a0b4bf654de2..132f5228b431 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -11,6 +11,7 @@ #include <net/netfilter/nf_flow_table.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_tuple.h> struct flow_offload_entry { diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 3f6023ed4966..bfc555fcbc72 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -18,12 +18,12 @@ #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> -#include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_nat_helper.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_seqadj.h> #include <net/netfilter/nf_conntrack_zones.h> -#include <linux/netfilter/nf_nat.h> +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_nat_helper.h> +#include <uapi/linux/netfilter/nf_nat.h> #include "nf_internals.h" diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c index 7ac733ebd060..0a59c14b5177 100644 --- a/net/netfilter/nf_nat_proto.c +++ b/net/netfilter/nf_nat_proto.c @@ -722,7 +722,7 @@ nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb, return ret; } -const struct nf_hook_ops nf_nat_ipv4_ops[] = { +static const struct nf_hook_ops nf_nat_ipv4_ops[] = { /* Before packet filtering, change destination */ { .hook = nf_nat_ipv4_in, @@ -961,7 +961,7 @@ nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb, return ret; } -const struct nf_hook_ops nf_nat_ipv6_ops[] = { +static const struct nf_hook_ops nf_nat_ipv6_ops[] = { /* Before packet filtering, change destination */ { .hook = nf_nat_ipv6_in, diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index c769462a839e..b0930d4aba22 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -56,7 +56,7 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, switch (opcode) { case TCPOPT_MSS: if (opsize == TCPOLEN_MSS) { - opts->mss = get_unaligned_be16(ptr); + opts->mss_option = get_unaligned_be16(ptr); opts->options |= NF_SYNPROXY_OPT_MSS; } break; @@ -115,7 +115,7 @@ synproxy_build_options(struct tcphdr *th, const struct synproxy_options *opts) if (options & NF_SYNPROXY_OPT_MSS) *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | - opts->mss); + opts->mss_option); if (options & NF_SYNPROXY_OPT_TIMESTAMP) { if (options & NF_SYNPROXY_OPT_SACK_PERM) @@ -642,7 +642,7 @@ synproxy_recv_client_ack(struct net *net, } this_cpu_inc(snet->stats->cookie_valid); - opts->mss = mss; + opts->mss_option = mss; opts->options |= NF_SYNPROXY_OPT_MSS; if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP) @@ -1060,7 +1060,7 @@ synproxy_recv_client_ack_ipv6(struct net *net, } this_cpu_inc(snet->stats->cookie_valid); - opts->mss = mss; + opts->mss_option = mss; opts->options |= NF_SYNPROXY_OPT_MSS; if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d47469f824a1..e4a68dc42694 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2853,7 +2853,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, return nft_table_validate(net, table); if (chain->flags & NFT_CHAIN_HW_OFFLOAD) { - flow = nft_flow_rule_create(rule); + flow = nft_flow_rule_create(net, rule); if (IS_ERR(flow)) return PTR_ERR(flow); @@ -5131,6 +5131,41 @@ nft_obj_type_get(struct net *net, u32 objtype) return ERR_PTR(-ENOENT); } +static int nf_tables_updobj(const struct nft_ctx *ctx, + const struct nft_object_type *type, + const struct nlattr *attr, + struct nft_object *obj) +{ + struct nft_object *newobj; + struct nft_trans *trans; + int err; + + if (!obj->ops->update) + return -EOPNOTSUPP; + + trans = nft_trans_alloc(ctx, NFT_MSG_NEWOBJ, + sizeof(struct nft_trans_obj)); + if (!trans) + return -ENOMEM; + + newobj = nft_obj_init(ctx, type, attr); + if (IS_ERR(newobj)) { + err = PTR_ERR(newobj); + goto err_free_trans; + } + + nft_trans_obj(trans) = obj; + nft_trans_obj_update(trans) = true; + nft_trans_obj_newobj(trans) = newobj; + list_add_tail(&trans->list, &ctx->net->nft.commit_list); + + return 0; + +err_free_trans: + kfree(trans); + return err; +} + static int nf_tables_newobj(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[], @@ -5170,7 +5205,13 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]); return -EEXIST; } - return 0; + if (nlh->nlmsg_flags & NLM_F_REPLACE) + return -EOPNOTSUPP; + + type = nft_obj_type_get(net, objtype); + nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); + + return nf_tables_updobj(&ctx, type, nla[NFTA_OBJ_DATA], obj); } nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); @@ -6431,6 +6472,19 @@ static void nft_chain_commit_update(struct nft_trans *trans) } } +static void nft_obj_commit_update(struct nft_trans *trans) +{ + struct nft_object *newobj; + struct nft_object *obj; + + obj = nft_trans_obj(trans); + newobj = nft_trans_obj_newobj(trans); + + obj->ops->update(obj, newobj); + + kfree(newobj); +} + static void nft_commit_release(struct nft_trans *trans) { switch (trans->msg_type) { @@ -6795,10 +6849,18 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) te->set->ndeact--; break; case NFT_MSG_NEWOBJ: - nft_clear(net, nft_trans_obj(trans)); - nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans), - NFT_MSG_NEWOBJ); - nft_trans_destroy(trans); + if (nft_trans_obj_update(trans)) { + nft_obj_commit_update(trans); + nf_tables_obj_notify(&trans->ctx, + nft_trans_obj(trans), + NFT_MSG_NEWOBJ); + } else { + nft_clear(net, nft_trans_obj(trans)); + nf_tables_obj_notify(&trans->ctx, + nft_trans_obj(trans), + NFT_MSG_NEWOBJ); + nft_trans_destroy(trans); + } break; case NFT_MSG_DELOBJ: nft_obj_del(nft_trans_obj(trans)); @@ -6945,8 +7007,13 @@ static int __nf_tables_abort(struct net *net) nft_trans_destroy(trans); break; case NFT_MSG_NEWOBJ: - trans->ctx.table->use--; - nft_obj_del(nft_trans_obj(trans)); + if (nft_trans_obj_update(trans)) { + kfree(nft_trans_obj_newobj(trans)); + nft_trans_destroy(trans); + } else { + trans->ctx.table->use--; + nft_obj_del(nft_trans_obj(trans)); + } break; case NFT_MSG_DELOBJ: trans->ctx.table->use++; @@ -7627,13 +7694,20 @@ static int __init nf_tables_module_init(void) if (err < 0) goto err4; + err = nft_offload_init(); + if (err < 0) + goto err5; + /* must be last */ err = nfnetlink_subsys_register(&nf_tables_subsys); if (err < 0) - goto err5; + goto err6; nft_chain_route_init(); + return err; +err6: + nft_offload_exit(); err5: rhltable_destroy(&nft_objname_ht); err4: @@ -7650,6 +7724,7 @@ err1: static void __exit nf_tables_module_exit(void) { nfnetlink_subsys_unregister(&nf_tables_subsys); + nft_offload_exit(); unregister_netdevice_notifier(&nf_tables_flowtable_notifier); nft_chain_filter_fini(); nft_chain_route_fini(); diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index c0d18c1d77ac..21bb772cb4b7 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -28,13 +28,10 @@ static struct nft_flow_rule *nft_flow_rule_alloc(int num_actions) return flow; } -struct nft_flow_rule *nft_flow_rule_create(const struct nft_rule *rule) +struct nft_flow_rule *nft_flow_rule_create(struct net *net, + const struct nft_rule *rule) { - struct nft_offload_ctx ctx = { - .dep = { - .type = NFT_OFFLOAD_DEP_UNSPEC, - }, - }; + struct nft_offload_ctx *ctx; struct nft_flow_rule *flow; int num_actions = 0, err; struct nft_expr *expr; @@ -52,21 +49,32 @@ struct nft_flow_rule *nft_flow_rule_create(const struct nft_rule *rule) return ERR_PTR(-ENOMEM); expr = nft_expr_first(rule); + + ctx = kzalloc(sizeof(struct nft_offload_ctx), GFP_KERNEL); + if (!ctx) { + err = -ENOMEM; + goto err_out; + } + ctx->net = net; + ctx->dep.type = NFT_OFFLOAD_DEP_UNSPEC; + while (expr->ops && expr != nft_expr_last(rule)) { if (!expr->ops->offload) { err = -EOPNOTSUPP; goto err_out; } - err = expr->ops->offload(&ctx, flow, expr); + err = expr->ops->offload(ctx, flow, expr); if (err < 0) goto err_out; expr = nft_expr_next(expr); } - flow->proto = ctx.dep.l3num; + flow->proto = ctx->dep.l3num; + kfree(ctx); return flow; err_out: + kfree(ctx); nft_flow_rule_destroy(flow); return ERR_PTR(err); @@ -74,6 +82,19 @@ err_out: void nft_flow_rule_destroy(struct nft_flow_rule *flow) { + struct flow_action_entry *entry; + int i; + + flow_action_for_each(i, entry, &flow->rule->action) { + switch (entry->id) { + case FLOW_ACTION_REDIRECT: + case FLOW_ACTION_MIRRED: + dev_put(entry->dev); + break; + default: + break; + } + } kfree(flow->rule); kfree(flow); } @@ -134,20 +155,20 @@ int nft_chain_offload_priority(struct nft_base_chain *basechain) return 0; } -static int nft_flow_offload_rule(struct nft_trans *trans, +static int nft_flow_offload_rule(struct nft_chain *chain, + struct nft_rule *rule, + struct nft_flow_rule *flow, enum flow_cls_command command) { - struct nft_flow_rule *flow = nft_trans_flow_rule(trans); - struct nft_rule *rule = nft_trans_rule(trans); struct flow_cls_offload cls_flow = {}; struct nft_base_chain *basechain; struct netlink_ext_ack extack; __be16 proto = ETH_P_ALL; - if (!nft_is_base_chain(trans->ctx.chain)) + if (!nft_is_base_chain(chain)) return -EOPNOTSUPP; - basechain = nft_base_chain(trans->ctx.chain); + basechain = nft_base_chain(chain); if (flow) proto = flow->proto; @@ -182,58 +203,130 @@ static int nft_flow_offload_unbind(struct flow_block_offload *bo, return 0; } -#define FLOW_SETUP_BLOCK TC_SETUP_BLOCK +static int nft_block_setup(struct nft_base_chain *basechain, + struct flow_block_offload *bo, + enum flow_block_command cmd) +{ + int err; -static int nft_flow_offload_chain(struct nft_trans *trans, - enum flow_block_command cmd) + switch (cmd) { + case FLOW_BLOCK_BIND: + err = nft_flow_offload_bind(bo, basechain); + break; + case FLOW_BLOCK_UNBIND: + err = nft_flow_offload_unbind(bo, basechain); + break; + default: + WARN_ON_ONCE(1); + err = -EOPNOTSUPP; + } + + return err; +} + +static int nft_block_offload_cmd(struct nft_base_chain *chain, + struct net_device *dev, + enum flow_block_command cmd) +{ + struct netlink_ext_ack extack = {}; + struct flow_block_offload bo = {}; + int err; + + bo.net = dev_net(dev); + bo.block = &chain->flow_block; + bo.command = cmd; + bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS; + bo.extack = &extack; + INIT_LIST_HEAD(&bo.cb_list); + + err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo); + if (err < 0) + return err; + + return nft_block_setup(chain, &bo, cmd); +} + +static void nft_indr_block_ing_cmd(struct net_device *dev, + struct nft_base_chain *chain, + flow_indr_block_bind_cb_t *cb, + void *cb_priv, + enum flow_block_command cmd) { - struct nft_chain *chain = trans->ctx.chain; struct netlink_ext_ack extack = {}; struct flow_block_offload bo = {}; + + if (!chain) + return; + + bo.net = dev_net(dev); + bo.block = &chain->flow_block; + bo.command = cmd; + bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS; + bo.extack = &extack; + INIT_LIST_HEAD(&bo.cb_list); + + cb(dev, cb_priv, TC_SETUP_BLOCK, &bo); + + nft_block_setup(chain, &bo, cmd); +} + +static int nft_indr_block_offload_cmd(struct nft_base_chain *chain, + struct net_device *dev, + enum flow_block_command cmd) +{ + struct flow_block_offload bo = {}; + struct netlink_ext_ack extack = {}; + + bo.net = dev_net(dev); + bo.block = &chain->flow_block; + bo.command = cmd; + bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS; + bo.extack = &extack; + INIT_LIST_HEAD(&bo.cb_list); + + flow_indr_block_call(dev, &bo, cmd); + + if (list_empty(&bo.cb_list)) + return -EOPNOTSUPP; + + return nft_block_setup(chain, &bo, cmd); +} + +#define FLOW_SETUP_BLOCK TC_SETUP_BLOCK + +static int nft_flow_offload_chain(struct nft_chain *chain, + u8 *ppolicy, + enum flow_block_command cmd) +{ struct nft_base_chain *basechain; struct net_device *dev; - int err; + u8 policy; if (!nft_is_base_chain(chain)) return -EOPNOTSUPP; basechain = nft_base_chain(chain); dev = basechain->ops.dev; - if (!dev || !dev->netdev_ops->ndo_setup_tc) + if (!dev) return -EOPNOTSUPP; + policy = ppolicy ? *ppolicy : basechain->policy; + /* Only default policy to accept is supported for now. */ - if (cmd == FLOW_BLOCK_BIND && - nft_trans_chain_policy(trans) != -1 && - nft_trans_chain_policy(trans) != NF_ACCEPT) + if (cmd == FLOW_BLOCK_BIND && policy != -1 && policy != NF_ACCEPT) return -EOPNOTSUPP; - bo.command = cmd; - bo.block = &basechain->flow_block; - bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS; - bo.extack = &extack; - INIT_LIST_HEAD(&bo.cb_list); - - err = dev->netdev_ops->ndo_setup_tc(dev, FLOW_SETUP_BLOCK, &bo); - if (err < 0) - return err; - - switch (cmd) { - case FLOW_BLOCK_BIND: - err = nft_flow_offload_bind(&bo, basechain); - break; - case FLOW_BLOCK_UNBIND: - err = nft_flow_offload_unbind(&bo, basechain); - break; - } - - return err; + if (dev->netdev_ops->ndo_setup_tc) + return nft_block_offload_cmd(basechain, dev, cmd); + else + return nft_indr_block_offload_cmd(basechain, dev, cmd); } int nft_flow_rule_offload_commit(struct net *net) { struct nft_trans *trans; int err = 0; + u8 policy; list_for_each_entry(trans, &net->nft.commit_list, list) { if (trans->ctx.family != NFPROTO_NETDEV) @@ -244,13 +337,17 @@ int nft_flow_rule_offload_commit(struct net *net) if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) continue; - err = nft_flow_offload_chain(trans, FLOW_BLOCK_BIND); + policy = nft_trans_chain_policy(trans); + err = nft_flow_offload_chain(trans->ctx.chain, &policy, + FLOW_BLOCK_BIND); break; case NFT_MSG_DELCHAIN: if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) continue; - err = nft_flow_offload_chain(trans, FLOW_BLOCK_UNBIND); + policy = nft_trans_chain_policy(trans); + err = nft_flow_offload_chain(trans->ctx.chain, &policy, + FLOW_BLOCK_BIND); break; case NFT_MSG_NEWRULE: if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) @@ -260,14 +357,20 @@ int nft_flow_rule_offload_commit(struct net *net) !(trans->ctx.flags & NLM_F_APPEND)) return -EOPNOTSUPP; - err = nft_flow_offload_rule(trans, FLOW_CLS_REPLACE); + err = nft_flow_offload_rule(trans->ctx.chain, + nft_trans_rule(trans), + nft_trans_flow_rule(trans), + FLOW_CLS_REPLACE); nft_flow_rule_destroy(nft_trans_flow_rule(trans)); break; case NFT_MSG_DELRULE: if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) continue; - err = nft_flow_offload_rule(trans, FLOW_CLS_DESTROY); + err = nft_flow_offload_rule(trans->ctx.chain, + nft_trans_rule(trans), + nft_trans_flow_rule(trans), + FLOW_CLS_DESTROY); break; } @@ -277,3 +380,104 @@ int nft_flow_rule_offload_commit(struct net *net) return err; } + +static struct nft_chain *__nft_offload_get_chain(struct net_device *dev) +{ + struct nft_base_chain *basechain; + struct net *net = dev_net(dev); + const struct nft_table *table; + struct nft_chain *chain; + + list_for_each_entry(table, &net->nft.tables, list) { + if (table->family != NFPROTO_NETDEV) + continue; + + list_for_each_entry(chain, &table->chains, list) { + if (!nft_is_base_chain(chain) || + !(chain->flags & NFT_CHAIN_HW_OFFLOAD)) + continue; + + basechain = nft_base_chain(chain); + if (strncmp(basechain->dev_name, dev->name, IFNAMSIZ)) + continue; + + return chain; + } + } + + return NULL; +} + +static void nft_indr_block_cb(struct net_device *dev, + flow_indr_block_bind_cb_t *cb, void *cb_priv, + enum flow_block_command cmd) +{ + struct net *net = dev_net(dev); + struct nft_chain *chain; + + mutex_lock(&net->nft.commit_mutex); + chain = __nft_offload_get_chain(dev); + if (chain) { + struct nft_base_chain *basechain; + + basechain = nft_base_chain(chain); + nft_indr_block_ing_cmd(dev, basechain, cb, cb_priv, cmd); + } + mutex_unlock(&net->nft.commit_mutex); +} + +static void nft_offload_chain_clean(struct nft_chain *chain) +{ + struct nft_rule *rule; + + list_for_each_entry(rule, &chain->rules, list) { + nft_flow_offload_rule(chain, rule, + NULL, FLOW_CLS_DESTROY); + } + + nft_flow_offload_chain(chain, NULL, FLOW_BLOCK_UNBIND); +} + +static int nft_offload_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct net *net = dev_net(dev); + struct nft_chain *chain; + + mutex_lock(&net->nft.commit_mutex); + chain = __nft_offload_get_chain(dev); + if (chain) + nft_offload_chain_clean(chain); + mutex_unlock(&net->nft.commit_mutex); + + return NOTIFY_DONE; +} + +static struct flow_indr_block_ing_entry block_ing_entry = { + .cb = nft_indr_block_cb, + .list = LIST_HEAD_INIT(block_ing_entry.list), +}; + +static struct notifier_block nft_offload_netdev_notifier = { + .notifier_call = nft_offload_netdev_event, +}; + +int nft_offload_init(void) +{ + int err; + + err = register_netdevice_notifier(&nft_offload_netdev_notifier); + if (err < 0) + return err; + + flow_indr_add_block_ing_cb(&block_ing_entry); + + return 0; +} + +void nft_offload_exit(void) +{ + flow_indr_del_block_ing_cb(&block_ing_entry); + unregister_netdevice_notifier(&nft_offload_netdev_notifier); +} diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 6dee4f9a944c..0ba020ca38e6 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -385,6 +385,57 @@ nfulnl_timer(struct timer_list *t) instance_put(inst); } +static u32 nfulnl_get_bridge_size(const struct sk_buff *skb) +{ + u32 size = 0; + + if (!skb_mac_header_was_set(skb)) + return 0; + + if (skb_vlan_tag_present(skb)) { + size += nla_total_size(0); /* nested */ + size += nla_total_size(sizeof(u16)); /* id */ + size += nla_total_size(sizeof(u16)); /* tag */ + } + + if (skb->network_header > skb->mac_header) + size += nla_total_size(skb->network_header - skb->mac_header); + + return size; +} + +static int nfulnl_put_bridge(struct nfulnl_instance *inst, const struct sk_buff *skb) +{ + if (!skb_mac_header_was_set(skb)) + return 0; + + if (skb_vlan_tag_present(skb)) { + struct nlattr *nest; + + nest = nla_nest_start(inst->skb, NFULA_VLAN); + if (!nest) + goto nla_put_failure; + + if (nla_put_be16(inst->skb, NFULA_VLAN_TCI, htons(skb->vlan_tci)) || + nla_put_be16(inst->skb, NFULA_VLAN_PROTO, skb->vlan_proto)) + goto nla_put_failure; + + nla_nest_end(inst->skb, nest); + } + + if (skb->mac_header < skb->network_header) { + int len = (int)(skb->network_header - skb->mac_header); + + if (nla_put(inst->skb, NFULA_L2HDR, len, skb_mac_header(skb))) + goto nla_put_failure; + } + + return 0; + +nla_put_failure: + return -1; +} + /* This is an inline function, we don't really care about a long * list of arguments */ static inline int @@ -580,6 +631,10 @@ __build_packet_message(struct nfnl_log_net *log, NFULA_CT, NFULA_CT_INFO) < 0) goto nla_put_failure; + if ((pf == NFPROTO_NETDEV || pf == NFPROTO_BRIDGE) && + nfulnl_put_bridge(inst, skb) < 0) + goto nla_put_failure; + if (data_len) { struct nlattr *nla; int size = nla_attr_size(data_len); @@ -651,7 +706,7 @@ nfulnl_log_packet(struct net *net, /* FIXME: do we want to make the size calculation conditional based on * what is actually present? way more branches and checks, but more * memory efficient... */ - size = nlmsg_total_size(sizeof(struct nfgenmsg)) + size = nlmsg_total_size(sizeof(struct nfgenmsg)) + nla_total_size(sizeof(struct nfulnl_msg_packet_hdr)) + nla_total_size(sizeof(u_int32_t)) /* ifindex */ + nla_total_size(sizeof(u_int32_t)) /* ifindex */ @@ -668,7 +723,7 @@ nfulnl_log_packet(struct net *net, + nla_total_size(sizeof(struct nfgenmsg)); /* NLMSG_DONE */ if (in && skb_mac_header_was_set(skb)) { - size += nla_total_size(skb->dev->hard_header_len) + size += nla_total_size(skb->dev->hard_header_len) + nla_total_size(sizeof(u_int16_t)) /* hwtype */ + nla_total_size(sizeof(u_int16_t)); /* hwlen */ } @@ -687,6 +742,8 @@ nfulnl_log_packet(struct net *net, size += nfnl_ct->build_size(ct); } } + if (pf == NFPROTO_NETDEV || pf == NFPROTO_BRIDGE) + size += nfulnl_get_bridge_size(skb); qthreshold = inst->qthreshold; /* per-rule qthreshold overrides per-instance */ diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index b6a7ce622c72..feabdfb22920 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -394,7 +394,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, char *secdata = NULL; u32 seclen = 0; - size = nlmsg_total_size(sizeof(struct nfgenmsg)) + size = nlmsg_total_size(sizeof(struct nfgenmsg)) + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr)) + nla_total_size(sizeof(u_int32_t)) /* ifindex */ + nla_total_size(sizeof(u_int32_t)) /* ifindex */ @@ -453,7 +453,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, } if (queue->flags & NFQA_CFG_F_UID_GID) { - size += (nla_total_size(sizeof(u_int32_t)) /* uid */ + size += (nla_total_size(sizeof(u_int32_t)) /* uid */ + nla_total_size(sizeof(u_int32_t))); /* gid */ } diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index b310b637b550..974300178fa9 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -13,6 +13,7 @@ #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_offload.h> struct nft_bitwise { enum nft_registers sreg:8; @@ -126,12 +127,30 @@ nla_put_failure: return -1; } +static struct nft_data zero; + +static int nft_bitwise_offload(struct nft_offload_ctx *ctx, + struct nft_flow_rule *flow, + const struct nft_expr *expr) +{ + const struct nft_bitwise *priv = nft_expr_priv(expr); + + if (memcmp(&priv->xor, &zero, sizeof(priv->xor)) || + priv->sreg != priv->dreg) + return -EOPNOTSUPP; + + memcpy(&ctx->regs[priv->dreg].mask, &priv->mask, sizeof(priv->mask)); + + return 0; +} + static const struct nft_expr_ops nft_bitwise_ops = { .type = &nft_bitwise_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_bitwise)), .eval = nft_bitwise_eval, .init = nft_bitwise_init, .dump = nft_bitwise_dump, + .offload = nft_bitwise_offload, }; struct nft_expr_type nft_bitwise_type __read_mostly = { diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index e06318428ea0..12bed3f7bbc6 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -43,14 +43,15 @@ void nft_byteorder_eval(const struct nft_expr *expr, switch (priv->op) { case NFT_BYTEORDER_NTOH: for (i = 0; i < priv->len / 8; i++) { - src64 = get_unaligned((u64 *)&src[i]); - put_unaligned_be64(src64, &dst[i]); + src64 = nft_reg_load64(&src[i]); + nft_reg_store64(&dst[i], be64_to_cpu(src64)); } break; case NFT_BYTEORDER_HTON: for (i = 0; i < priv->len / 8; i++) { - src64 = get_unaligned_be64(&src[i]); - put_unaligned(src64, (u64 *)&dst[i]); + src64 = (__force __u64) + cpu_to_be64(nft_reg_load64(&src[i])); + nft_reg_store64(&dst[i], src64); } break; } diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c index c6052fdd2c40..c2e78c160fd7 100644 --- a/net/netfilter/nft_dup_netdev.c +++ b/net/netfilter/nft_dup_netdev.c @@ -10,6 +10,7 @@ #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_offload.h> #include <net/netfilter/nf_dup_netdev.h> struct nft_dup_netdev { @@ -56,6 +57,16 @@ nla_put_failure: return -1; } +static int nft_dup_netdev_offload(struct nft_offload_ctx *ctx, + struct nft_flow_rule *flow, + const struct nft_expr *expr) +{ + const struct nft_dup_netdev *priv = nft_expr_priv(expr); + int oif = ctx->regs[priv->sreg_dev].data.data[0]; + + return nft_fwd_dup_netdev_offload(ctx, flow, FLOW_ACTION_MIRRED, oif); +} + static struct nft_expr_type nft_dup_netdev_type; static const struct nft_expr_ops nft_dup_netdev_ops = { .type = &nft_dup_netdev_type, @@ -63,6 +74,7 @@ static const struct nft_expr_ops nft_dup_netdev_ops = { .eval = nft_dup_netdev_eval, .init = nft_dup_netdev_init, .dump = nft_dup_netdev_dump, + .offload = nft_dup_netdev_offload, }; static struct nft_expr_type nft_dup_netdev_type __read_mostly = { diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 33833a0cb989..8887295414dc 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -84,6 +84,11 @@ void nft_dynset_eval(const struct nft_expr *expr, const struct nft_expr *sexpr; u64 timeout; + if (priv->op == NFT_DYNSET_OP_DELETE) { + set->ops->delete(set, ®s->data[priv->sreg_key]); + return; + } + if (set->ops->update(set, ®s->data[priv->sreg_key], nft_dynset_new, expr, regs, &ext)) { sexpr = NULL; @@ -161,6 +166,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx, priv->op = ntohl(nla_get_be32(tb[NFTA_DYNSET_OP])); switch (priv->op) { case NFT_DYNSET_OP_ADD: + case NFT_DYNSET_OP_DELETE: break; case NFT_DYNSET_OP_UPDATE: if (!(set->flags & NFT_SET_TIMEOUT)) diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index 01705ad74a9a..22cf236eb5d5 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -6,12 +6,13 @@ #include <linux/netfilter.h> #include <linux/workqueue.h> #include <linux/spinlock.h> +#include <linux/netfilter/nf_conntrack_common.h> #include <linux/netfilter/nf_tables.h> #include <net/ip.h> /* for ipv4 options. */ #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_conntrack_core.h> -#include <linux/netfilter/nf_conntrack_common.h> +#include <net/netfilter/nf_conntrack_extend.h> #include <net/netfilter/nf_flow_table.h> struct nft_flow_offload { diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index 61b7f93ac681..aba11c2333f3 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -12,6 +12,7 @@ #include <linux/ip.h> #include <linux/ipv6.h> #include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_offload.h> #include <net/netfilter/nf_dup_netdev.h> #include <net/neighbour.h> #include <net/ip.h> @@ -63,6 +64,16 @@ nla_put_failure: return -1; } +static int nft_fwd_netdev_offload(struct nft_offload_ctx *ctx, + struct nft_flow_rule *flow, + const struct nft_expr *expr) +{ + const struct nft_fwd_netdev *priv = nft_expr_priv(expr); + int oif = ctx->regs[priv->sreg_dev].data.data[0]; + + return nft_fwd_dup_netdev_offload(ctx, flow, FLOW_ACTION_REDIRECT, oif); +} + struct nft_fwd_neigh { enum nft_registers sreg_dev:8; enum nft_registers sreg_addr:8; @@ -194,6 +205,7 @@ static const struct nft_expr_ops nft_fwd_netdev_ops = { .eval = nft_fwd_netdev_eval, .init = nft_fwd_netdev_init, .dump = nft_fwd_netdev_dump, + .offload = nft_fwd_netdev_offload, }; static const struct nft_expr_ops * diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index ca2ae4b95a8d..c7f0ef73d939 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -125,17 +125,13 @@ static int nft_immediate_validate(const struct nft_ctx *ctx, return 0; } -static int nft_immediate_offload(struct nft_offload_ctx *ctx, - struct nft_flow_rule *flow, - const struct nft_expr *expr) +static int nft_immediate_offload_verdict(struct nft_offload_ctx *ctx, + struct nft_flow_rule *flow, + const struct nft_immediate_expr *priv) { - const struct nft_immediate_expr *priv = nft_expr_priv(expr); struct flow_action_entry *entry; const struct nft_data *data; - if (priv->dreg != NFT_REG_VERDICT) - return -EOPNOTSUPP; - entry = &flow->rule->action.entries[ctx->num_actions++]; data = &priv->data; @@ -153,6 +149,20 @@ static int nft_immediate_offload(struct nft_offload_ctx *ctx, return 0; } +static int nft_immediate_offload(struct nft_offload_ctx *ctx, + struct nft_flow_rule *flow, + const struct nft_expr *expr) +{ + const struct nft_immediate_expr *priv = nft_expr_priv(expr); + + if (priv->dreg == NFT_REG_VERDICT) + return nft_immediate_offload_verdict(ctx, flow, priv); + + memcpy(&ctx->regs[priv->dreg].data, &priv->data, sizeof(priv->data)); + + return 0; +} + static const struct nft_expr_ops nft_imm_ops = { .type = &nft_imm_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)), diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index f69afb9ff3cb..317e3a9e8c5b 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -26,8 +26,36 @@ #include <uapi/linux/netfilter_bridge.h> /* NF_BR_PRE_ROUTING */ +#define NFT_META_SECS_PER_MINUTE 60 +#define NFT_META_SECS_PER_HOUR 3600 +#define NFT_META_SECS_PER_DAY 86400 +#define NFT_META_DAYS_PER_WEEK 7 + static DEFINE_PER_CPU(struct rnd_state, nft_prandom_state); +static u8 nft_meta_weekday(unsigned long secs) +{ + unsigned int dse; + u8 wday; + + secs -= NFT_META_SECS_PER_MINUTE * sys_tz.tz_minuteswest; + dse = secs / NFT_META_SECS_PER_DAY; + wday = (4 + dse) % NFT_META_DAYS_PER_WEEK; + + return wday; +} + +static u32 nft_meta_hour(unsigned long secs) +{ + struct tm tm; + + time64_to_tm(secs, 0, &tm); + + return tm.tm_hour * NFT_META_SECS_PER_HOUR + + tm.tm_min * NFT_META_SECS_PER_MINUTE + + tm.tm_sec; +} + void nft_meta_get_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -218,6 +246,15 @@ void nft_meta_get_eval(const struct nft_expr *expr, goto err; strncpy((char *)dest, out->rtnl_link_ops->kind, IFNAMSIZ); break; + case NFT_META_TIME_NS: + nft_reg_store64(dest, ktime_get_real_ns()); + break; + case NFT_META_TIME_DAY: + nft_reg_store8(dest, nft_meta_weekday(get_seconds())); + break; + case NFT_META_TIME_HOUR: + *dest = nft_meta_hour(get_seconds()); + break; default: WARN_ON(1); goto err; @@ -330,6 +367,15 @@ int nft_meta_get_init(const struct nft_ctx *ctx, len = sizeof(u8); break; #endif + case NFT_META_TIME_NS: + len = sizeof(u64); + break; + case NFT_META_TIME_DAY: + len = sizeof(u8); + break; + case NFT_META_TIME_HOUR: + len = sizeof(u32); + break; default: return -EOPNOTSUPP; } diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c index c8745d454bf8..4413690591f2 100644 --- a/net/netfilter/nft_quota.c +++ b/net/netfilter/nft_quota.c @@ -13,7 +13,7 @@ #include <net/netfilter/nf_tables.h> struct nft_quota { - u64 quota; + atomic64_t quota; unsigned long flags; atomic64_t consumed; }; @@ -21,7 +21,8 @@ struct nft_quota { static inline bool nft_overquota(struct nft_quota *priv, const struct sk_buff *skb) { - return atomic64_add_return(skb->len, &priv->consumed) >= priv->quota; + return atomic64_add_return(skb->len, &priv->consumed) >= + atomic64_read(&priv->quota); } static inline bool nft_quota_invert(struct nft_quota *priv) @@ -89,7 +90,7 @@ static int nft_quota_do_init(const struct nlattr * const tb[], return -EOPNOTSUPP; } - priv->quota = quota; + atomic64_set(&priv->quota, quota); priv->flags = flags; atomic64_set(&priv->consumed, consumed); @@ -105,10 +106,22 @@ static int nft_quota_obj_init(const struct nft_ctx *ctx, return nft_quota_do_init(tb, priv); } +static void nft_quota_obj_update(struct nft_object *obj, + struct nft_object *newobj) +{ + struct nft_quota *newpriv = nft_obj_data(newobj); + struct nft_quota *priv = nft_obj_data(obj); + u64 newquota; + + newquota = atomic64_read(&newpriv->quota); + atomic64_set(&priv->quota, newquota); + priv->flags = newpriv->flags; +} + static int nft_quota_do_dump(struct sk_buff *skb, struct nft_quota *priv, bool reset) { - u64 consumed, consumed_cap; + u64 consumed, consumed_cap, quota; u32 flags = priv->flags; /* Since we inconditionally increment consumed quota for each packet @@ -116,14 +129,15 @@ static int nft_quota_do_dump(struct sk_buff *skb, struct nft_quota *priv, * userspace. */ consumed = atomic64_read(&priv->consumed); - if (consumed >= priv->quota) { - consumed_cap = priv->quota; + quota = atomic64_read(&priv->quota); + if (consumed >= quota) { + consumed_cap = quota; flags |= NFT_QUOTA_F_DEPLETED; } else { consumed_cap = consumed; } - if (nla_put_be64(skb, NFTA_QUOTA_BYTES, cpu_to_be64(priv->quota), + if (nla_put_be64(skb, NFTA_QUOTA_BYTES, cpu_to_be64(quota), NFTA_QUOTA_PAD) || nla_put_be64(skb, NFTA_QUOTA_CONSUMED, cpu_to_be64(consumed_cap), NFTA_QUOTA_PAD) || @@ -155,6 +169,7 @@ static const struct nft_object_ops nft_quota_obj_ops = { .init = nft_quota_obj_init, .eval = nft_quota_obj_eval, .dump = nft_quota_obj_dump, + .update = nft_quota_obj_update, }; static struct nft_object_type nft_quota_obj_type __read_mostly = { diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index b5aeccdddb22..087a056e34d1 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -10,7 +10,7 @@ #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> -#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> struct nft_bitmap_elem { struct list_head head; diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 6e8d20c03e3d..b331a3c9a3a8 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -16,7 +16,7 @@ #include <linux/rhashtable.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> -#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> /* We target a hash table size of 4, element hint is 75% of final size */ #define NFT_RHASH_ELEMENT_HINT 3 @@ -234,6 +234,24 @@ static void nft_rhash_remove(const struct net *net, rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params); } +static bool nft_rhash_delete(const struct nft_set *set, + const u32 *key) +{ + struct nft_rhash *priv = nft_set_priv(set); + struct nft_rhash_cmp_arg arg = { + .genmask = NFT_GENMASK_ANY, + .set = set, + .key = key, + }; + struct nft_rhash_elem *he; + + he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params); + if (he == NULL) + return false; + + return rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params) == 0; +} + static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_iter *iter) { @@ -662,6 +680,7 @@ struct nft_set_type nft_set_rhash_type __read_mostly = { .remove = nft_rhash_remove, .lookup = nft_rhash_lookup, .update = nft_rhash_update, + .delete = nft_rhash_delete, .walk = nft_rhash_walk, .get = nft_rhash_get, }, diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 419d58ef802b..57123259452f 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -13,7 +13,7 @@ #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> -#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> struct nft_rbtree { struct rb_root root; diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c index 928e661d1517..e2c1fc608841 100644 --- a/net/netfilter/nft_synproxy.c +++ b/net/netfilter/nft_synproxy.c @@ -24,15 +24,15 @@ static void nft_synproxy_tcp_options(struct synproxy_options *opts, const struct tcphdr *tcp, struct synproxy_net *snet, struct nf_synproxy_info *info, - struct nft_synproxy *priv) + const struct nft_synproxy *priv) { this_cpu_inc(snet->stats->syn_received); if (tcp->ece && tcp->cwr) opts->options |= NF_SYNPROXY_OPT_ECN; opts->options &= priv->info.options; - opts->mss_encode = opts->mss; - opts->mss = info->mss; + opts->mss_encode = opts->mss_option; + opts->mss_option = info->mss; if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP) synproxy_init_timestamp_cookie(info, opts); else @@ -41,14 +41,13 @@ static void nft_synproxy_tcp_options(struct synproxy_options *opts, NF_SYNPROXY_OPT_ECN); } -static void nft_synproxy_eval_v4(const struct nft_expr *expr, +static void nft_synproxy_eval_v4(const struct nft_synproxy *priv, struct nft_regs *regs, const struct nft_pktinfo *pkt, const struct tcphdr *tcp, struct tcphdr *_tcph, struct synproxy_options *opts) { - struct nft_synproxy *priv = nft_expr_priv(expr); struct nf_synproxy_info info = priv->info; struct net *net = nft_net(pkt); struct synproxy_net *snet = synproxy_pernet(net); @@ -73,14 +72,13 @@ static void nft_synproxy_eval_v4(const struct nft_expr *expr, } #if IS_ENABLED(CONFIG_NF_TABLES_IPV6) -static void nft_synproxy_eval_v6(const struct nft_expr *expr, +static void nft_synproxy_eval_v6(const struct nft_synproxy *priv, struct nft_regs *regs, const struct nft_pktinfo *pkt, const struct tcphdr *tcp, struct tcphdr *_tcph, struct synproxy_options *opts) { - struct nft_synproxy *priv = nft_expr_priv(expr); struct nf_synproxy_info info = priv->info; struct net *net = nft_net(pkt); struct synproxy_net *snet = synproxy_pernet(net); @@ -105,9 +103,9 @@ static void nft_synproxy_eval_v6(const struct nft_expr *expr, } #endif /* CONFIG_NF_TABLES_IPV6*/ -static void nft_synproxy_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +static void nft_synproxy_do_eval(const struct nft_synproxy *priv, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) { struct synproxy_options opts = {}; struct sk_buff *skb = pkt->skb; @@ -140,23 +138,22 @@ static void nft_synproxy_eval(const struct nft_expr *expr, switch (skb->protocol) { case htons(ETH_P_IP): - nft_synproxy_eval_v4(expr, regs, pkt, tcp, &_tcph, &opts); + nft_synproxy_eval_v4(priv, regs, pkt, tcp, &_tcph, &opts); return; #if IS_ENABLED(CONFIG_NF_TABLES_IPV6) case htons(ETH_P_IPV6): - nft_synproxy_eval_v6(expr, regs, pkt, tcp, &_tcph, &opts); + nft_synproxy_eval_v6(priv, regs, pkt, tcp, &_tcph, &opts); return; #endif } regs->verdict.code = NFT_BREAK; } -static int nft_synproxy_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) +static int nft_synproxy_do_init(const struct nft_ctx *ctx, + const struct nlattr * const tb[], + struct nft_synproxy *priv) { struct synproxy_net *snet = synproxy_pernet(ctx->net); - struct nft_synproxy *priv = nft_expr_priv(expr); u32 flags; int err; @@ -206,8 +203,7 @@ nf_ct_failure: return err; } -static void nft_synproxy_destroy(const struct nft_ctx *ctx, - const struct nft_expr *expr) +static void nft_synproxy_do_destroy(const struct nft_ctx *ctx) { struct synproxy_net *snet = synproxy_pernet(ctx->net); @@ -229,10 +225,8 @@ static void nft_synproxy_destroy(const struct nft_ctx *ctx, nf_ct_netns_put(ctx->net, ctx->family); } -static int nft_synproxy_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_synproxy_do_dump(struct sk_buff *skb, struct nft_synproxy *priv) { - const struct nft_synproxy *priv = nft_expr_priv(expr); - if (nla_put_be16(skb, NFTA_SYNPROXY_MSS, htons(priv->info.mss)) || nla_put_u8(skb, NFTA_SYNPROXY_WSCALE, priv->info.wscale) || nla_put_be32(skb, NFTA_SYNPROXY_FLAGS, htonl(priv->info.options))) @@ -244,6 +238,15 @@ nla_put_failure: return -1; } +static void nft_synproxy_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_synproxy *priv = nft_expr_priv(expr); + + nft_synproxy_do_eval(priv, regs, pkt); +} + static int nft_synproxy_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data) @@ -252,6 +255,28 @@ static int nft_synproxy_validate(const struct nft_ctx *ctx, (1 << NF_INET_FORWARD)); } +static int nft_synproxy_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_synproxy *priv = nft_expr_priv(expr); + + return nft_synproxy_do_init(ctx, tb, priv); +} + +static void nft_synproxy_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + nft_synproxy_do_destroy(ctx); +} + +static int nft_synproxy_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + struct nft_synproxy *priv = nft_expr_priv(expr); + + return nft_synproxy_do_dump(skb, priv); +} + static struct nft_expr_type nft_synproxy_type; static const struct nft_expr_ops nft_synproxy_ops = { .eval = nft_synproxy_eval, @@ -271,14 +296,89 @@ static struct nft_expr_type nft_synproxy_type __read_mostly = { .maxattr = NFTA_SYNPROXY_MAX, }; +static int nft_synproxy_obj_init(const struct nft_ctx *ctx, + const struct nlattr * const tb[], + struct nft_object *obj) +{ + struct nft_synproxy *priv = nft_obj_data(obj); + + return nft_synproxy_do_init(ctx, tb, priv); +} + +static void nft_synproxy_obj_destroy(const struct nft_ctx *ctx, + struct nft_object *obj) +{ + nft_synproxy_do_destroy(ctx); +} + +static int nft_synproxy_obj_dump(struct sk_buff *skb, + struct nft_object *obj, bool reset) +{ + struct nft_synproxy *priv = nft_obj_data(obj); + + return nft_synproxy_do_dump(skb, priv); +} + +static void nft_synproxy_obj_eval(struct nft_object *obj, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_synproxy *priv = nft_obj_data(obj); + + nft_synproxy_do_eval(priv, regs, pkt); +} + +static void nft_synproxy_obj_update(struct nft_object *obj, + struct nft_object *newobj) +{ + struct nft_synproxy *newpriv = nft_obj_data(newobj); + struct nft_synproxy *priv = nft_obj_data(obj); + + priv->info = newpriv->info; +} + +static struct nft_object_type nft_synproxy_obj_type; +static const struct nft_object_ops nft_synproxy_obj_ops = { + .type = &nft_synproxy_obj_type, + .size = sizeof(struct nft_synproxy), + .init = nft_synproxy_obj_init, + .destroy = nft_synproxy_obj_destroy, + .dump = nft_synproxy_obj_dump, + .eval = nft_synproxy_obj_eval, + .update = nft_synproxy_obj_update, +}; + +static struct nft_object_type nft_synproxy_obj_type __read_mostly = { + .type = NFT_OBJECT_SYNPROXY, + .ops = &nft_synproxy_obj_ops, + .maxattr = NFTA_SYNPROXY_MAX, + .policy = nft_synproxy_policy, + .owner = THIS_MODULE, +}; + static int __init nft_synproxy_module_init(void) { - return nft_register_expr(&nft_synproxy_type); + int err; + + err = nft_register_obj(&nft_synproxy_obj_type); + if (err < 0) + return err; + + err = nft_register_expr(&nft_synproxy_type); + if (err < 0) + goto err; + + return 0; + +err: + nft_unregister_obj(&nft_synproxy_obj_type); + return err; } static void __exit nft_synproxy_module_exit(void) { - return nft_unregister_expr(&nft_synproxy_type); + nft_unregister_expr(&nft_synproxy_type); + nft_unregister_obj(&nft_synproxy_obj_type); } module_init(nft_synproxy_module_init); @@ -287,3 +387,4 @@ module_exit(nft_synproxy_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Fernando Fernandez <ffmancera@riseup.net>"); MODULE_ALIAS_NFT_EXPR("synproxy"); +MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_SYNPROXY); diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index 9cec9eae556a..f56d3ed93e56 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -283,7 +283,7 @@ static int __init idletimer_tg_init(void) idletimer_tg_kobj = &idletimer_tg_device->kobj; - err = xt_register_target(&idletimer_tg); + err = xt_register_target(&idletimer_tg); if (err < 0) { pr_debug("couldn't register xt target\n"); goto out_dev; diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index bc6c8ab0fa62..46fcac75f726 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -13,6 +13,8 @@ */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/ip.h> +#include <linux/ipv6.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/netfilter/x_tables.h> diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 2d2691dd51e0..ced3fc8fad7c 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -34,9 +34,14 @@ #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> -#include <linux/netfilter/xt_hashlimit.h> #include <linux/mutex.h> #include <linux/kernel.h> +#include <uapi/linux/netfilter/xt_hashlimit.h> + +#define XT_HASHLIMIT_ALL (XT_HASHLIMIT_HASH_DIP | XT_HASHLIMIT_HASH_DPT | \ + XT_HASHLIMIT_HASH_SIP | XT_HASHLIMIT_HASH_SPT | \ + XT_HASHLIMIT_INVERT | XT_HASHLIMIT_BYTES |\ + XT_HASHLIMIT_RATE_MATCH) MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index b92b22ce8abd..ec6ed6fda96c 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -5,12 +5,13 @@ /* (C) 2001-2003 Bart De Schuymer <bdschuym@pandora.be> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/if.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/netfilter_bridge.h> -#include <linux/netfilter/xt_physdev.h> #include <linux/netfilter/x_tables.h> -#include <net/netfilter/br_netfilter.h> +#include <uapi/linux/netfilter/xt_physdev.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>"); diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index ecbfa291fb70..731bc2cafae4 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -14,7 +14,6 @@ #include <linux/netfilter/x_tables.h> #include <linux/netfilter/ipset/ip_set.h> -#include <linux/netfilter/ipset/ip_set_timeout.h> #include <uapi/linux/netfilter/xt_set.h> MODULE_LICENSE("GPL"); diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 2b0ef55cf89e..409a3ae47ce2 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -607,7 +607,7 @@ catmap_getnode_alloc: */ int netlbl_catmap_walk(struct netlbl_lsm_catmap *catmap, u32 offset) { - struct netlbl_lsm_catmap *iter = catmap; + struct netlbl_lsm_catmap *iter; u32 idx; u32 bit; NETLBL_CATMAP_MAPTYPE bitmap; diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index ea64c90b14e8..17e6ca62f1be 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -970,7 +970,8 @@ static int nfc_genl_dep_link_down(struct sk_buff *skb, struct genl_info *info) int rc; u32 idx; - if (!info->attrs[NFC_ATTR_DEVICE_INDEX]) + if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || + !info->attrs[NFC_ATTR_TARGET_INDEX]) return -EINVAL; idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); @@ -1018,7 +1019,8 @@ static int nfc_genl_llc_get_params(struct sk_buff *skb, struct genl_info *info) struct sk_buff *msg = NULL; u32 idx; - if (!info->attrs[NFC_ATTR_DEVICE_INDEX]) + if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || + !info->attrs[NFC_ATTR_FIRMWARE_NAME]) return -EINVAL; idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index d01410e52097..dde9d762edee 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -222,6 +222,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) struct dp_stats_percpu *stats; u64 *stats_counter; u32 n_mask_hit; + int error; stats = this_cpu_ptr(dp->stats_percpu); @@ -229,7 +230,6 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit); if (unlikely(!flow)) { struct dp_upcall_info upcall; - int error; memset(&upcall, 0, sizeof(upcall)); upcall.cmd = OVS_PACKET_CMD_MISS; @@ -246,7 +246,10 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) ovs_flow_stats_update(flow, key->tp.flags, skb); sf_acts = rcu_dereference(flow->sf_acts); - ovs_execute_actions(dp, skb, sf_acts, key); + error = ovs_execute_actions(dp, skb, sf_acts, key); + if (unlikely(error)) + net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n", + ovs_dp_name(dp), error); stats_counter = &stats->n_hit; @@ -1542,10 +1545,34 @@ static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *in dp->user_features = 0; } -static void ovs_dp_change(struct datapath *dp, struct nlattr *a[]) +DEFINE_STATIC_KEY_FALSE(tc_recirc_sharing_support); + +static int ovs_dp_change(struct datapath *dp, struct nlattr *a[]) { - if (a[OVS_DP_ATTR_USER_FEATURES]) - dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]); + u32 user_features = 0; + + if (a[OVS_DP_ATTR_USER_FEATURES]) { + user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]); + + if (user_features & ~(OVS_DP_F_VPORT_PIDS | + OVS_DP_F_UNALIGNED | + OVS_DP_F_TC_RECIRC_SHARING)) + return -EOPNOTSUPP; + +#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + if (user_features & OVS_DP_F_TC_RECIRC_SHARING) + return -EOPNOTSUPP; +#endif + } + + dp->user_features = user_features; + + if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) + static_branch_enable(&tc_recirc_sharing_support); + else + static_branch_disable(&tc_recirc_sharing_support); + + return 0; } static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) @@ -1607,7 +1634,9 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) parms.port_no = OVSP_LOCAL; parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID]; - ovs_dp_change(dp, a); + err = ovs_dp_change(dp, a); + if (err) + goto err_destroy_meters; /* So far only local changes have been made, now need the lock. */ ovs_lock(); @@ -1733,7 +1762,9 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(dp)) goto err_unlock_free; - ovs_dp_change(dp, info->attrs); + err = ovs_dp_change(dp, info->attrs); + if (err) + goto err_unlock_free; err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, info->snd_seq, 0, OVS_DP_CMD_SET); diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 751d34accdf9..81e85dde8217 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -218,6 +218,8 @@ static inline struct datapath *get_dp(struct net *net, int dp_ifindex) extern struct notifier_block ovs_dp_device_notifier; extern struct genl_family dp_vport_genl_family; +DECLARE_STATIC_KEY_FALSE(tc_recirc_sharing_support); + void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key); void ovs_dp_detach_port(struct vport *); int ovs_dp_upcall(struct datapath *, struct sk_buff *, diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 9d81d2c7bf82..38147e6a20f5 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -842,6 +842,9 @@ static int key_extract_mac_proto(struct sk_buff *skb) int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, struct sk_buff *skb, struct sw_flow_key *key) { +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + struct tc_skb_ext *tc_ext; +#endif int res, err; /* Extract metadata from packet. */ @@ -874,7 +877,17 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, if (res < 0) return res; key->mac_proto = res; + +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + if (static_branch_unlikely(&tc_recirc_sharing_support)) { + tc_ext = skb_ext_find(skb, TC_SKB_EXT); + key->recirc_id = tc_ext ? tc_ext->chain : 0; + } else { + key->recirc_id = 0; + } +#else key->recirc_id = 0; +#endif err = key_extract(skb, key); if (!err) diff --git a/net/psample/psample.c b/net/psample/psample.c index 66e4b61a350d..a6ceb0533b5b 100644 --- a/net/psample/psample.c +++ b/net/psample/psample.c @@ -73,7 +73,7 @@ static int psample_nl_cmd_get_group_dumpit(struct sk_buff *msg, int idx = 0; int err; - spin_lock(&psample_groups_lock); + spin_lock_bh(&psample_groups_lock); list_for_each_entry(group, &psample_groups_list, list) { if (!net_eq(group->net, sock_net(msg->sk))) continue; @@ -89,7 +89,7 @@ static int psample_nl_cmd_get_group_dumpit(struct sk_buff *msg, idx++; } - spin_unlock(&psample_groups_lock); + spin_unlock_bh(&psample_groups_lock); cb->args[0] = idx; return msg->len; } @@ -172,7 +172,7 @@ struct psample_group *psample_group_get(struct net *net, u32 group_num) { struct psample_group *group; - spin_lock(&psample_groups_lock); + spin_lock_bh(&psample_groups_lock); group = psample_group_lookup(net, group_num); if (!group) { @@ -183,19 +183,27 @@ struct psample_group *psample_group_get(struct net *net, u32 group_num) group->refcount++; out: - spin_unlock(&psample_groups_lock); + spin_unlock_bh(&psample_groups_lock); return group; } EXPORT_SYMBOL_GPL(psample_group_get); +void psample_group_take(struct psample_group *group) +{ + spin_lock_bh(&psample_groups_lock); + group->refcount++; + spin_unlock_bh(&psample_groups_lock); +} +EXPORT_SYMBOL_GPL(psample_group_take); + void psample_group_put(struct psample_group *group) { - spin_lock(&psample_groups_lock); + spin_lock_bh(&psample_groups_lock); if (--group->refcount == 0) psample_group_destroy(group); - spin_unlock(&psample_groups_lock); + spin_unlock_bh(&psample_groups_lock); } EXPORT_SYMBOL_GPL(psample_group_put); diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 2b969f99ef13..1a5bf3fa4578 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -559,7 +559,7 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr, ret = -EDESTADDRREQ; break; } - if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) || + if (ipv4_is_multicast(sin->sin_addr.s_addr) || sin->sin_addr.s_addr == htonl(INADDR_BROADCAST)) { ret = -EINVAL; break; @@ -593,7 +593,7 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr, addr4 = sin6->sin6_addr.s6_addr32[3]; if (addr4 == htonl(INADDR_ANY) || addr4 == htonl(INADDR_BROADCAST) || - IN_MULTICAST(ntohl(addr4))) { + ipv4_is_multicast(addr4)) { ret = -EPROTOTYPE; break; } @@ -705,7 +705,7 @@ static int rds_create(struct net *net, struct socket *sock, int protocol, if (sock->type != SOCK_SEQPACKET || protocol) return -ESOCKTNOSUPPORT; - sk = sk_alloc(net, AF_RDS, GFP_ATOMIC, &rds_proto, kern); + sk = sk_alloc(net, AF_RDS, GFP_KERNEL, &rds_proto, kern); if (!sk) return -ENOMEM; @@ -741,6 +741,10 @@ static void rds_sock_inc_info(struct socket *sock, unsigned int len, spin_lock_bh(&rds_sock_lock); list_for_each_entry(rs, &rds_sock_list, rs_item) { + /* This option only supports IPv4 sockets. */ + if (!ipv6_addr_v4mapped(&rs->rs_bound_addr)) + continue; + read_lock(&rs->rs_recv_lock); /* XXX too lazy to maintain counts.. */ @@ -762,21 +766,60 @@ static void rds_sock_inc_info(struct socket *sock, unsigned int len, lens->each = sizeof(struct rds_info_message); } +#if IS_ENABLED(CONFIG_IPV6) +static void rds6_sock_inc_info(struct socket *sock, unsigned int len, + struct rds_info_iterator *iter, + struct rds_info_lengths *lens) +{ + struct rds_incoming *inc; + unsigned int total = 0; + struct rds_sock *rs; + + len /= sizeof(struct rds6_info_message); + + spin_lock_bh(&rds_sock_lock); + + list_for_each_entry(rs, &rds_sock_list, rs_item) { + read_lock(&rs->rs_recv_lock); + + list_for_each_entry(inc, &rs->rs_recv_queue, i_item) { + total++; + if (total <= len) + rds6_inc_info_copy(inc, iter, &inc->i_saddr, + &rs->rs_bound_addr, 1); + } + + read_unlock(&rs->rs_recv_lock); + } + + spin_unlock_bh(&rds_sock_lock); + + lens->nr = total; + lens->each = sizeof(struct rds6_info_message); +} +#endif + static void rds_sock_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens) { struct rds_info_socket sinfo; + unsigned int cnt = 0; struct rds_sock *rs; len /= sizeof(struct rds_info_socket); spin_lock_bh(&rds_sock_lock); - if (len < rds_sock_count) + if (len < rds_sock_count) { + cnt = rds_sock_count; goto out; + } list_for_each_entry(rs, &rds_sock_list, rs_item) { + /* This option only supports IPv4 sockets. */ + if (!ipv6_addr_v4mapped(&rs->rs_bound_addr)) + continue; sinfo.sndbuf = rds_sk_sndbuf(rs); sinfo.rcvbuf = rds_sk_rcvbuf(rs); sinfo.bound_addr = rs->rs_bound_addr_v4; @@ -786,15 +829,51 @@ static void rds_sock_info(struct socket *sock, unsigned int len, sinfo.inum = sock_i_ino(rds_rs_to_sk(rs)); rds_info_copy(iter, &sinfo, sizeof(sinfo)); + cnt++; } out: - lens->nr = rds_sock_count; + lens->nr = cnt; lens->each = sizeof(struct rds_info_socket); spin_unlock_bh(&rds_sock_lock); } +#if IS_ENABLED(CONFIG_IPV6) +static void rds6_sock_info(struct socket *sock, unsigned int len, + struct rds_info_iterator *iter, + struct rds_info_lengths *lens) +{ + struct rds6_info_socket sinfo6; + struct rds_sock *rs; + + len /= sizeof(struct rds6_info_socket); + + spin_lock_bh(&rds_sock_lock); + + if (len < rds_sock_count) + goto out; + + list_for_each_entry(rs, &rds_sock_list, rs_item) { + sinfo6.sndbuf = rds_sk_sndbuf(rs); + sinfo6.rcvbuf = rds_sk_rcvbuf(rs); + sinfo6.bound_addr = rs->rs_bound_addr; + sinfo6.connected_addr = rs->rs_conn_addr; + sinfo6.bound_port = rs->rs_bound_port; + sinfo6.connected_port = rs->rs_conn_port; + sinfo6.inum = sock_i_ino(rds_rs_to_sk(rs)); + + rds_info_copy(iter, &sinfo6, sizeof(sinfo6)); + } + + out: + lens->nr = rds_sock_count; + lens->each = sizeof(struct rds6_info_socket); + + spin_unlock_bh(&rds_sock_lock); +} +#endif + static void rds_exit(void) { sock_unregister(rds_family_ops.family); @@ -808,6 +887,10 @@ static void rds_exit(void) rds_bind_lock_destroy(); rds_info_deregister_func(RDS_INFO_SOCKETS, rds_sock_info); rds_info_deregister_func(RDS_INFO_RECV_MESSAGES, rds_sock_inc_info); +#if IS_ENABLED(CONFIG_IPV6) + rds_info_deregister_func(RDS6_INFO_SOCKETS, rds6_sock_info); + rds_info_deregister_func(RDS6_INFO_RECV_MESSAGES, rds6_sock_inc_info); +#endif } module_exit(rds_exit); @@ -845,6 +928,10 @@ static int rds_init(void) rds_info_register_func(RDS_INFO_SOCKETS, rds_sock_info); rds_info_register_func(RDS_INFO_RECV_MESSAGES, rds_sock_inc_info); +#if IS_ENABLED(CONFIG_IPV6) + rds_info_register_func(RDS6_INFO_SOCKETS, rds6_sock_info); + rds_info_register_func(RDS6_INFO_RECV_MESSAGES, rds6_sock_inc_info); +#endif goto out; diff --git a/net/rds/bind.c b/net/rds/bind.c index 05464fd7c17a..20c156a73e73 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -181,7 +181,7 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (addr_len < sizeof(struct sockaddr_in) || sin->sin_addr.s_addr == htonl(INADDR_ANY) || sin->sin_addr.s_addr == htonl(INADDR_BROADCAST) || - IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) + ipv4_is_multicast(sin->sin_addr.s_addr)) return -EINVAL; ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &v6addr); binding_addr = &v6addr; @@ -206,7 +206,7 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) addr4 = sin6->sin6_addr.s6_addr32[3]; if (addr4 == htonl(INADDR_ANY) || addr4 == htonl(INADDR_BROADCAST) || - IN_MULTICAST(ntohl(addr4))) + ipv4_is_multicast(addr4)) return -EINVAL; } /* The scope ID must be specified for link local address. */ diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 3cae88cbdaa0..a0f99bbf362c 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -385,6 +385,7 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp) unsigned int posted = 0; int ret = 0; bool can_wait = !!(gfp & __GFP_DIRECT_RECLAIM); + bool must_wake = false; u32 pos; /* the goal here is to just make sure that someone, somewhere @@ -405,6 +406,7 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp) recv = &ic->i_recvs[pos]; ret = rds_ib_recv_refill_one(conn, recv, gfp); if (ret) { + must_wake = true; break; } @@ -423,6 +425,11 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp) } posted++; + + if ((posted > 128 && need_resched()) || posted > 8192) { + must_wake = true; + break; + } } /* We're doing flow control - update the window. */ @@ -445,10 +452,13 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp) * if we should requeue. */ if (rds_conn_up(conn) && - ((can_wait && rds_ib_ring_low(&ic->i_recv_ring)) || + (must_wake || + (can_wait && rds_ib_ring_low(&ic->i_recv_ring)) || rds_ib_ring_empty(&ic->i_recv_ring))) { queue_delayed_work(rds_wq, &conn->c_recv_w, 1); } + if (can_wait) + cond_resched(); } /* @@ -1038,9 +1048,14 @@ int rds_ib_recv_init(void) si_meminfo(&si); rds_ib_sysctl_max_recv_allocation = si.totalram / 3 * PAGE_SIZE / RDS_FRAG_SIZE; - rds_ib_incoming_slab = kmem_cache_create("rds_ib_incoming", - sizeof(struct rds_ib_incoming), - 0, SLAB_HWCACHE_ALIGN, NULL); + rds_ib_incoming_slab = + kmem_cache_create_usercopy("rds_ib_incoming", + sizeof(struct rds_ib_incoming), + 0, SLAB_HWCACHE_ALIGN, + offsetof(struct rds_ib_incoming, + ii_inc.i_usercopy), + sizeof(struct rds_inc_usercopy), + NULL); if (!rds_ib_incoming_slab) goto out; diff --git a/net/rds/ib_stats.c b/net/rds/ib_stats.c index 9252ad126335..ac46d8961b61 100644 --- a/net/rds/ib_stats.c +++ b/net/rds/ib_stats.c @@ -42,7 +42,7 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_ib_statistics, rds_ib_stats); static const char *const rds_ib_stat_names[] = { "ib_connect_raced", "ib_listen_closed_stale", - "s_ib_evt_handler_call", + "ib_evt_handler_call", "ib_tasklet_call", "ib_tx_cq_event", "ib_tx_ring_full", diff --git a/net/rds/rds.h b/net/rds/rds.h index f0066d168499..53e86911773a 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -271,6 +271,12 @@ struct rds_ext_header_rdma_dest { #define RDS_MSG_RX_END 2 #define RDS_MSG_RX_CMSG 3 +/* The following values are whitelisted for usercopy */ +struct rds_inc_usercopy { + rds_rdma_cookie_t rdma_cookie; + ktime_t rx_tstamp; +}; + struct rds_incoming { refcount_t i_refcount; struct list_head i_item; @@ -280,8 +286,7 @@ struct rds_incoming { unsigned long i_rx_jiffies; struct in6_addr i_saddr; - rds_rdma_cookie_t i_rdma_cookie; - ktime_t i_rx_tstamp; + struct rds_inc_usercopy i_usercopy; u64 i_rx_lat_trace[RDS_RX_MAX_TRACES]; }; @@ -717,7 +722,7 @@ struct rds_statistics { uint64_t s_cong_send_blocked; uint64_t s_recv_bytes_added_to_socket; uint64_t s_recv_bytes_removed_from_socket; - + uint64_t s_send_stuck_rm; }; /* af_rds.c */ diff --git a/net/rds/recv.c b/net/rds/recv.c index a42ba7fa06d5..c8404971d5ab 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -47,8 +47,8 @@ void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn, INIT_LIST_HEAD(&inc->i_item); inc->i_conn = conn; inc->i_saddr = *saddr; - inc->i_rdma_cookie = 0; - inc->i_rx_tstamp = ktime_set(0, 0); + inc->i_usercopy.rdma_cookie = 0; + inc->i_usercopy.rx_tstamp = ktime_set(0, 0); memset(inc->i_rx_lat_trace, 0, sizeof(inc->i_rx_lat_trace)); } @@ -62,8 +62,8 @@ void rds_inc_path_init(struct rds_incoming *inc, struct rds_conn_path *cp, inc->i_conn = cp->cp_conn; inc->i_conn_path = cp; inc->i_saddr = *saddr; - inc->i_rdma_cookie = 0; - inc->i_rx_tstamp = ktime_set(0, 0); + inc->i_usercopy.rdma_cookie = 0; + inc->i_usercopy.rx_tstamp = ktime_set(0, 0); } EXPORT_SYMBOL_GPL(rds_inc_path_init); @@ -186,7 +186,7 @@ static void rds_recv_incoming_exthdrs(struct rds_incoming *inc, struct rds_sock case RDS_EXTHDR_RDMA_DEST: /* We ignore the size for now. We could stash it * somewhere and use it for error checking. */ - inc->i_rdma_cookie = rds_rdma_make_cookie( + inc->i_usercopy.rdma_cookie = rds_rdma_make_cookie( be32_to_cpu(buffer.rdma_dest.h_rdma_rkey), be32_to_cpu(buffer.rdma_dest.h_rdma_offset)); @@ -380,7 +380,7 @@ void rds_recv_incoming(struct rds_connection *conn, struct in6_addr *saddr, be32_to_cpu(inc->i_hdr.h_len), inc->i_hdr.h_dport); if (sock_flag(sk, SOCK_RCVTSTAMP)) - inc->i_rx_tstamp = ktime_get_real(); + inc->i_usercopy.rx_tstamp = ktime_get_real(); rds_inc_addref(inc); inc->i_rx_lat_trace[RDS_MSG_RX_END] = local_clock(); list_add_tail(&inc->i_item, &rs->rs_recv_queue); @@ -540,16 +540,18 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg, { int ret = 0; - if (inc->i_rdma_cookie) { + if (inc->i_usercopy.rdma_cookie) { ret = put_cmsg(msg, SOL_RDS, RDS_CMSG_RDMA_DEST, - sizeof(inc->i_rdma_cookie), &inc->i_rdma_cookie); + sizeof(inc->i_usercopy.rdma_cookie), + &inc->i_usercopy.rdma_cookie); if (ret) goto out; } - if ((inc->i_rx_tstamp != 0) && + if ((inc->i_usercopy.rx_tstamp != 0) && sock_flag(rds_rs_to_sk(rs), SOCK_RCVTSTAMP)) { - struct __kernel_old_timeval tv = ns_to_kernel_old_timeval(inc->i_rx_tstamp); + struct __kernel_old_timeval tv = + ns_to_kernel_old_timeval(inc->i_usercopy.rx_tstamp); if (!sock_flag(rds_rs_to_sk(rs), SOCK_TSTAMP_NEW)) { ret = put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD, diff --git a/net/rds/send.c b/net/rds/send.c index 031b1e97a466..82dcd8b84fe7 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -145,6 +145,7 @@ int rds_send_xmit(struct rds_conn_path *cp) LIST_HEAD(to_be_dropped); int batch_count; unsigned long send_gen = 0; + int same_rm = 0; restart: batch_count = 0; @@ -200,6 +201,17 @@ restart: rm = cp->cp_xmit_rm; + if (!rm) { + same_rm = 0; + } else { + same_rm++; + if (same_rm >= 4096) { + rds_stats_inc(s_send_stuck_rm); + ret = -EAGAIN; + break; + } + } + /* * If between sending messages, we can send a pending congestion * map update. @@ -1132,7 +1144,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) case AF_INET: if (usin->sin_addr.s_addr == htonl(INADDR_ANY) || usin->sin_addr.s_addr == htonl(INADDR_BROADCAST) || - IN_MULTICAST(ntohl(usin->sin_addr.s_addr))) { + ipv4_is_multicast(usin->sin_addr.s_addr)) { ret = -EINVAL; goto out; } @@ -1163,7 +1175,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) addr4 = sin6->sin6_addr.s6_addr32[3]; if (addr4 == htonl(INADDR_ANY) || addr4 == htonl(INADDR_BROADCAST) || - IN_MULTICAST(ntohl(addr4))) { + ipv4_is_multicast(addr4)) { ret = -EINVAL; goto out; } diff --git a/net/rds/stats.c b/net/rds/stats.c index 73be187d389e..9e87da43c004 100644 --- a/net/rds/stats.c +++ b/net/rds/stats.c @@ -76,6 +76,9 @@ static const char *const rds_stat_names[] = { "cong_update_received", "cong_send_error", "cong_send_blocked", + "recv_bytes_added_to_sock", + "recv_bytes_freed_fromsock", + "send_stuck_rm", }; void rds_stats_info_copy(struct rds_info_iterator *iter, diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 8051dfdcf26d..1091bf35a199 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -232,6 +232,9 @@ struct rxrpc_security { int (*verify_packet)(struct rxrpc_call *, struct sk_buff *, unsigned int, unsigned int, rxrpc_seq_t, u16); + /* Free crypto request on a call */ + void (*free_call_crypto)(struct rxrpc_call *); + /* Locate the data in a received packet that has been verified. */ void (*locate_data)(struct rxrpc_call *, struct sk_buff *, unsigned int *, unsigned int *); @@ -564,6 +567,7 @@ struct rxrpc_call { unsigned long expect_term_by; /* When we expect call termination by */ u32 next_rx_timo; /* Timeout for next Rx packet (jif) */ u32 next_req_timo; /* Timeout for next Rx request packet (jif) */ + struct skcipher_request *cipher_req; /* Packet cipher request buffer */ struct timer_list timer; /* Combined event timer */ struct work_struct processor; /* Event processor */ rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */ diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 014548c259ce..32d8dc677142 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -488,8 +488,10 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn); - if (conn) + if (conn) { rxrpc_disconnect_call(call); + conn->security->free_call_crypto(call); + } rxrpc_cleanup_ring(call); _leave(""); diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c index a29d26c273b5..f6c59f5fae9d 100644 --- a/net/rxrpc/insecure.c +++ b/net/rxrpc/insecure.c @@ -33,6 +33,10 @@ static int none_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, return 0; } +static void none_free_call_crypto(struct rxrpc_call *call) +{ +} + static void none_locate_data(struct rxrpc_call *call, struct sk_buff *skb, unsigned int *_offset, unsigned int *_len) { @@ -83,6 +87,7 @@ const struct rxrpc_security rxrpc_no_security = { .exit = none_exit, .init_connection_security = none_init_connection_security, .prime_packet_security = none_prime_packet_security, + .free_call_crypto = none_free_call_crypto, .secure_packet = none_secure_packet, .verify_packet = none_verify_packet, .locate_data = none_locate_data, diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index c60c520fde7c..8d8aa3c230b5 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -43,6 +43,7 @@ struct rxkad_level2_hdr { * packets */ static struct crypto_sync_skcipher *rxkad_ci; +static struct skcipher_request *rxkad_ci_req; static DEFINE_MUTEX(rxkad_ci_mutex); /* @@ -99,8 +100,8 @@ error: */ static int rxkad_prime_packet_security(struct rxrpc_connection *conn) { + struct skcipher_request *req; struct rxrpc_key_token *token; - SYNC_SKCIPHER_REQUEST_ON_STACK(req, conn->cipher); struct scatterlist sg; struct rxrpc_crypt iv; __be32 *tmpbuf; @@ -115,6 +116,12 @@ static int rxkad_prime_packet_security(struct rxrpc_connection *conn) if (!tmpbuf) return -ENOMEM; + req = skcipher_request_alloc(&conn->cipher->base, GFP_NOFS); + if (!req) { + kfree(tmpbuf); + return -ENOMEM; + } + token = conn->params.key->payload.data[0]; memcpy(&iv, token->kad->session_key, sizeof(iv)); @@ -128,7 +135,7 @@ static int rxkad_prime_packet_security(struct rxrpc_connection *conn) skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, &sg, &sg, tmpsize, iv.x); crypto_skcipher_encrypt(req); - skcipher_request_zero(req); + skcipher_request_free(req); memcpy(&conn->csum_iv, tmpbuf + 2, sizeof(conn->csum_iv)); kfree(tmpbuf); @@ -137,6 +144,35 @@ static int rxkad_prime_packet_security(struct rxrpc_connection *conn) } /* + * Allocate and prepare the crypto request on a call. For any particular call, + * this is called serially for the packets, so no lock should be necessary. + */ +static struct skcipher_request *rxkad_get_call_crypto(struct rxrpc_call *call) +{ + struct crypto_skcipher *tfm = &call->conn->cipher->base; + struct skcipher_request *cipher_req = call->cipher_req; + + if (!cipher_req) { + cipher_req = skcipher_request_alloc(tfm, GFP_NOFS); + if (!cipher_req) + return NULL; + call->cipher_req = cipher_req; + } + + return cipher_req; +} + +/* + * Clean up the crypto on a call. + */ +static void rxkad_free_call_crypto(struct rxrpc_call *call) +{ + if (call->cipher_req) + skcipher_request_free(call->cipher_req); + call->cipher_req = NULL; +} + +/* * partially encrypt a packet (level 1 security) */ static int rxkad_secure_packet_auth(const struct rxrpc_call *call, @@ -243,7 +279,7 @@ static int rxkad_secure_packet(struct rxrpc_call *call, void *sechdr) { struct rxrpc_skb_priv *sp; - SYNC_SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher); + struct skcipher_request *req; struct rxrpc_crypt iv; struct scatterlist sg; u32 x, y; @@ -262,6 +298,10 @@ static int rxkad_secure_packet(struct rxrpc_call *call, if (ret < 0) return ret; + req = rxkad_get_call_crypto(call); + if (!req) + return -ENOMEM; + /* continue encrypting from where we left off */ memcpy(&iv, call->conn->csum_iv.x, sizeof(iv)); @@ -488,7 +528,7 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, unsigned int offset, unsigned int len, rxrpc_seq_t seq, u16 expected_cksum) { - SYNC_SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher); + struct skcipher_request *req; struct rxrpc_crypt iv; struct scatterlist sg; bool aborted; @@ -501,6 +541,10 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, if (!call->conn->cipher) return 0; + req = rxkad_get_call_crypto(call); + if (!req) + return -ENOMEM; + /* continue encrypting from where we left off */ memcpy(&iv, call->conn->csum_iv.x, sizeof(iv)); @@ -733,14 +777,18 @@ static void rxkad_calc_response_checksum(struct rxkad_response *response) /* * encrypt the response packet */ -static void rxkad_encrypt_response(struct rxrpc_connection *conn, - struct rxkad_response *resp, - const struct rxkad_key *s2) +static int rxkad_encrypt_response(struct rxrpc_connection *conn, + struct rxkad_response *resp, + const struct rxkad_key *s2) { - SYNC_SKCIPHER_REQUEST_ON_STACK(req, conn->cipher); + struct skcipher_request *req; struct rxrpc_crypt iv; struct scatterlist sg[1]; + req = skcipher_request_alloc(&conn->cipher->base, GFP_NOFS); + if (!req) + return -ENOMEM; + /* continue encrypting from where we left off */ memcpy(&iv, s2->session_key, sizeof(iv)); @@ -750,7 +798,8 @@ static void rxkad_encrypt_response(struct rxrpc_connection *conn, skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x); crypto_skcipher_encrypt(req); - skcipher_request_zero(req); + skcipher_request_free(req); + return 0; } /* @@ -825,8 +874,9 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, /* calculate the response checksum and then do the encryption */ rxkad_calc_response_checksum(resp); - rxkad_encrypt_response(conn, resp, token->kad); - ret = rxkad_send_response(conn, &sp->hdr, resp, token->kad); + ret = rxkad_encrypt_response(conn, resp, token->kad); + if (ret == 0) + ret = rxkad_send_response(conn, &sp->hdr, resp, token->kad); kfree(resp); return ret; @@ -1003,18 +1053,16 @@ static void rxkad_decrypt_response(struct rxrpc_connection *conn, struct rxkad_response *resp, const struct rxrpc_crypt *session_key) { - SYNC_SKCIPHER_REQUEST_ON_STACK(req, rxkad_ci); + struct skcipher_request *req = rxkad_ci_req; struct scatterlist sg[1]; struct rxrpc_crypt iv; _enter(",,%08x%08x", ntohl(session_key->n[0]), ntohl(session_key->n[1])); - ASSERT(rxkad_ci != NULL); - mutex_lock(&rxkad_ci_mutex); if (crypto_sync_skcipher_setkey(rxkad_ci, session_key->x, - sizeof(*session_key)) < 0) + sizeof(*session_key)) < 0) BUG(); memcpy(&iv, session_key, sizeof(iv)); @@ -1208,10 +1256,26 @@ static void rxkad_clear(struct rxrpc_connection *conn) */ static int rxkad_init(void) { + struct crypto_sync_skcipher *tfm; + struct skcipher_request *req; + /* pin the cipher we need so that the crypto layer doesn't invoke * keventd to go get it */ - rxkad_ci = crypto_alloc_sync_skcipher("pcbc(fcrypt)", 0, 0); - return PTR_ERR_OR_ZERO(rxkad_ci); + tfm = crypto_alloc_sync_skcipher("pcbc(fcrypt)", 0, 0); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + + req = skcipher_request_alloc(&tfm->base, GFP_KERNEL); + if (!req) + goto nomem_tfm; + + rxkad_ci_req = req; + rxkad_ci = tfm; + return 0; + +nomem_tfm: + crypto_free_sync_skcipher(tfm); + return -ENOMEM; } /* @@ -1219,8 +1283,8 @@ static int rxkad_init(void) */ static void rxkad_exit(void) { - if (rxkad_ci) - crypto_free_sync_skcipher(rxkad_ci); + crypto_free_sync_skcipher(rxkad_ci); + skcipher_request_free(rxkad_ci_req); } /* @@ -1235,6 +1299,7 @@ const struct rxrpc_security rxkad = { .prime_packet_security = rxkad_prime_packet_security, .secure_packet = rxkad_secure_packet, .verify_packet = rxkad_verify_packet, + .free_call_crypto = rxkad_free_call_crypto, .locate_data = rxkad_locate_data, .issue_challenge = rxkad_issue_challenge, .respond_to_challenge = rxkad_respond_to_challenge, diff --git a/net/sched/Kconfig b/net/sched/Kconfig index afd2ba157a13..b3faafeafab9 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -963,6 +963,19 @@ config NET_IFE_SKBTCINDEX tristate "Support to encoding decoding skb tcindex on IFE action" depends on NET_ACT_IFE +config NET_TC_SKB_EXT + bool "TC recirculation support" + depends on NET_CLS_ACT + default y if NET_CLS_ACT + select SKB_EXTENSIONS + + help + Say Y here to allow tc chain misses to continue in OvS datapath in + the correct recirc_id, and hardware chain misses to continue in + the correct chain in tc software datapath. + + Say N here if you won't be using tc<->ovs offload or tc chains offload. + endif # NET_SCHED config NET_SCH_FIFO diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index cdd6f3818097..fcc46025e790 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -24,12 +24,12 @@ #include <uapi/linux/tc_act/tc_ct.h> #include <net/tc_act/tc_ct.h> -#include <linux/netfilter/nf_nat.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/ipv6/nf_defrag_ipv6.h> +#include <uapi/linux/netfilter/nf_nat.h> static struct tc_action_ops act_ct_ops; static unsigned int ct_net_id; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 9d1bf508075a..9ce073a05414 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -408,25 +408,31 @@ static struct notifier_block mirred_device_notifier = { .notifier_call = mirred_device_event, }; -static struct net_device *tcf_mirred_get_dev(const struct tc_action *a) +static void tcf_mirred_dev_put(void *priv) +{ + struct net_device *dev = priv; + + dev_put(dev); +} + +static struct net_device * +tcf_mirred_get_dev(const struct tc_action *a, + tc_action_priv_destructor *destructor) { struct tcf_mirred *m = to_mirred(a); struct net_device *dev; rcu_read_lock(); dev = rcu_dereference(m->tcfm_dev); - if (dev) + if (dev) { dev_hold(dev); + *destructor = tcf_mirred_dev_put; + } rcu_read_unlock(); return dev; } -static void tcf_mirred_put_dev(struct net_device *dev) -{ - dev_put(dev); -} - static size_t tcf_mirred_get_fill_size(const struct tc_action *act) { return nla_total_size(sizeof(struct tc_mirred)); @@ -446,7 +452,6 @@ static struct tc_action_ops act_mirred_ops = { .get_fill_size = tcf_mirred_get_fill_size, .size = sizeof(struct tcf_mirred), .get_dev = tcf_mirred_get_dev, - .put_dev = tcf_mirred_put_dev, }; static __net_init int mirred_init_net(struct net *net) diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 6315e0f8d26e..89c04c52af3d 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -40,6 +40,8 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = { [TCA_POLICE_PEAKRATE] = { .len = TC_RTAB_SIZE }, [TCA_POLICE_AVRATE] = { .type = NLA_U32 }, [TCA_POLICE_RESULT] = { .type = NLA_U32 }, + [TCA_POLICE_RATE64] = { .type = NLA_U64 }, + [TCA_POLICE_PEAKRATE64] = { .type = NLA_U64 }, }; static int tcf_police_init(struct net *net, struct nlattr *nla, @@ -58,6 +60,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, struct tcf_police_params *new; bool exists = false; u32 index; + u64 rate64, prate64; if (nla == NULL) return -EINVAL; @@ -155,14 +158,18 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, } if (R_tab) { new->rate_present = true; - psched_ratecfg_precompute(&new->rate, &R_tab->rate, 0); + rate64 = tb[TCA_POLICE_RATE64] ? + nla_get_u64(tb[TCA_POLICE_RATE64]) : 0; + psched_ratecfg_precompute(&new->rate, &R_tab->rate, rate64); qdisc_put_rtab(R_tab); } else { new->rate_present = false; } if (P_tab) { new->peak_present = true; - psched_ratecfg_precompute(&new->peak, &P_tab->rate, 0); + prate64 = tb[TCA_POLICE_PEAKRATE64] ? + nla_get_u64(tb[TCA_POLICE_PEAKRATE64]) : 0; + psched_ratecfg_precompute(&new->peak, &P_tab->rate, prate64); qdisc_put_rtab(P_tab); } else { new->peak_present = false; @@ -313,10 +320,22 @@ static int tcf_police_dump(struct sk_buff *skb, struct tc_action *a, lockdep_is_held(&police->tcf_lock)); opt.mtu = p->tcfp_mtu; opt.burst = PSCHED_NS2TICKS(p->tcfp_burst); - if (p->rate_present) + if (p->rate_present) { psched_ratecfg_getrate(&opt.rate, &p->rate); - if (p->peak_present) + if ((police->params->rate.rate_bytes_ps >= (1ULL << 32)) && + nla_put_u64_64bit(skb, TCA_POLICE_RATE64, + police->params->rate.rate_bytes_ps, + TCA_POLICE_PAD)) + goto nla_put_failure; + } + if (p->peak_present) { psched_ratecfg_getrate(&opt.peakrate, &p->peak); + if ((police->params->peak.rate_bytes_ps >= (1ULL << 32)) && + nla_put_u64_64bit(skb, TCA_POLICE_PEAKRATE64, + police->params->peak.rate_bytes_ps, + TCA_POLICE_PAD)) + goto nla_put_failure; + } if (nla_put(skb, TCA_POLICE_TBF, sizeof(opt), &opt)) goto nla_put_failure; if (p->tcfp_result && diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 10229124a992..692c4c9040fd 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -252,6 +252,32 @@ static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index) return tcf_idr_search(tn, a, index); } +static void tcf_psample_group_put(void *priv) +{ + struct psample_group *group = priv; + + psample_group_put(group); +} + +static struct psample_group * +tcf_sample_get_group(const struct tc_action *a, + tc_action_priv_destructor *destructor) +{ + struct tcf_sample *s = to_sample(a); + struct psample_group *group; + + spin_lock_bh(&s->tcf_lock); + group = rcu_dereference_protected(s->psample_group, + lockdep_is_held(&s->tcf_lock)); + if (group) { + psample_group_take(group); + *destructor = tcf_psample_group_put; + } + spin_unlock_bh(&s->tcf_lock); + + return group; +} + static struct tc_action_ops act_sample_ops = { .kind = "sample", .id = TCA_ID_SAMPLE, @@ -262,6 +288,7 @@ static struct tc_action_ops act_sample_ops = { .cleanup = tcf_sample_cleanup, .walk = tcf_sample_walker, .lookup = tcf_sample_search, + .get_psample_group = tcf_sample_get_group, .size = sizeof(struct tcf_sample), }; diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 287a30bf8930..08aaf719a70f 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -301,6 +301,19 @@ static int tcf_vlan_walker(struct net *net, struct sk_buff *skb, return tcf_generic_walker(tn, skb, cb, type, ops, extack); } +static void tcf_vlan_stats_update(struct tc_action *a, u64 bytes, u32 packets, + u64 lastuse, bool hw) +{ + struct tcf_vlan *v = to_vlan(a); + struct tcf_t *tm = &v->tcf_tm; + + _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); + if (hw) + _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw), + bytes, packets); + tm->lastuse = max_t(u64, tm->lastuse, lastuse); +} + static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, vlan_net_id); @@ -325,6 +338,7 @@ static struct tc_action_ops act_vlan_ops = { .init = tcf_vlan_init, .cleanup = tcf_vlan_cleanup, .walk = tcf_vlan_walker, + .stats_update = tcf_vlan_stats_update, .get_fill_size = tcf_vlan_get_fill_size, .lookup = tcf_vlan_search, .size = sizeof(struct tcf_vlan), diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index efd3cfb80a2a..32577c248968 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -36,6 +36,8 @@ #include <net/tc_act/tc_sample.h> #include <net/tc_act/tc_skbedit.h> #include <net/tc_act/tc_ct.h> +#include <net/tc_act/tc_mpls.h> +#include <net/flow_offload.h> extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; @@ -544,235 +546,73 @@ static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held) } } -static struct tcf_block *tc_dev_ingress_block(struct net_device *dev) -{ - const struct Qdisc_class_ops *cops; - struct Qdisc *qdisc; - - if (!dev_ingress_queue(dev)) - return NULL; - - qdisc = dev_ingress_queue(dev)->qdisc_sleeping; - if (!qdisc) - return NULL; - - cops = qdisc->ops->cl_ops; - if (!cops) - return NULL; - - if (!cops->tcf_block) - return NULL; - - return cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL); -} - -static struct rhashtable indr_setup_block_ht; - -struct tc_indr_block_dev { - struct rhash_head ht_node; - struct net_device *dev; - unsigned int refcnt; - struct list_head cb_list; - struct tcf_block *block; -}; - -struct tc_indr_block_cb { - struct list_head list; - void *cb_priv; - tc_indr_block_bind_cb_t *cb; - void *cb_ident; -}; - -static const struct rhashtable_params tc_indr_setup_block_ht_params = { - .key_offset = offsetof(struct tc_indr_block_dev, dev), - .head_offset = offsetof(struct tc_indr_block_dev, ht_node), - .key_len = sizeof(struct net_device *), -}; - -static struct tc_indr_block_dev * -tc_indr_block_dev_lookup(struct net_device *dev) -{ - return rhashtable_lookup_fast(&indr_setup_block_ht, &dev, - tc_indr_setup_block_ht_params); -} - -static struct tc_indr_block_dev *tc_indr_block_dev_get(struct net_device *dev) -{ - struct tc_indr_block_dev *indr_dev; - - indr_dev = tc_indr_block_dev_lookup(dev); - if (indr_dev) - goto inc_ref; - - indr_dev = kzalloc(sizeof(*indr_dev), GFP_KERNEL); - if (!indr_dev) - return NULL; - - INIT_LIST_HEAD(&indr_dev->cb_list); - indr_dev->dev = dev; - indr_dev->block = tc_dev_ingress_block(dev); - if (rhashtable_insert_fast(&indr_setup_block_ht, &indr_dev->ht_node, - tc_indr_setup_block_ht_params)) { - kfree(indr_dev); - return NULL; - } - -inc_ref: - indr_dev->refcnt++; - return indr_dev; -} - -static void tc_indr_block_dev_put(struct tc_indr_block_dev *indr_dev) -{ - if (--indr_dev->refcnt) - return; - - rhashtable_remove_fast(&indr_setup_block_ht, &indr_dev->ht_node, - tc_indr_setup_block_ht_params); - kfree(indr_dev); -} - -static struct tc_indr_block_cb * -tc_indr_block_cb_lookup(struct tc_indr_block_dev *indr_dev, - tc_indr_block_bind_cb_t *cb, void *cb_ident) -{ - struct tc_indr_block_cb *indr_block_cb; - - list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list) - if (indr_block_cb->cb == cb && - indr_block_cb->cb_ident == cb_ident) - return indr_block_cb; - return NULL; -} - -static struct tc_indr_block_cb * -tc_indr_block_cb_add(struct tc_indr_block_dev *indr_dev, void *cb_priv, - tc_indr_block_bind_cb_t *cb, void *cb_ident) -{ - struct tc_indr_block_cb *indr_block_cb; - - indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident); - if (indr_block_cb) - return ERR_PTR(-EEXIST); - - indr_block_cb = kzalloc(sizeof(*indr_block_cb), GFP_KERNEL); - if (!indr_block_cb) - return ERR_PTR(-ENOMEM); - - indr_block_cb->cb_priv = cb_priv; - indr_block_cb->cb = cb; - indr_block_cb->cb_ident = cb_ident; - list_add(&indr_block_cb->list, &indr_dev->cb_list); - - return indr_block_cb; -} - -static void tc_indr_block_cb_del(struct tc_indr_block_cb *indr_block_cb) -{ - list_del(&indr_block_cb->list); - kfree(indr_block_cb); -} - static int tcf_block_setup(struct tcf_block *block, struct flow_block_offload *bo); -static void tc_indr_block_ing_cmd(struct tc_indr_block_dev *indr_dev, - struct tc_indr_block_cb *indr_block_cb, +static void tc_indr_block_ing_cmd(struct net_device *dev, + struct tcf_block *block, + flow_indr_block_bind_cb_t *cb, + void *cb_priv, enum flow_block_command command) { struct flow_block_offload bo = { .command = command, .binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS, - .net = dev_net(indr_dev->dev), - .block_shared = tcf_block_non_null_shared(indr_dev->block), + .net = dev_net(dev), + .block_shared = tcf_block_non_null_shared(block), }; INIT_LIST_HEAD(&bo.cb_list); - if (!indr_dev->block) + if (!block) return; - bo.block = &indr_dev->block->flow_block; - - indr_block_cb->cb(indr_dev->dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK, - &bo); - tcf_block_setup(indr_dev->block, &bo); -} - -int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, - tc_indr_block_bind_cb_t *cb, void *cb_ident) -{ - struct tc_indr_block_cb *indr_block_cb; - struct tc_indr_block_dev *indr_dev; - int err; - - indr_dev = tc_indr_block_dev_get(dev); - if (!indr_dev) - return -ENOMEM; - - indr_block_cb = tc_indr_block_cb_add(indr_dev, cb_priv, cb, cb_ident); - err = PTR_ERR_OR_ZERO(indr_block_cb); - if (err) - goto err_dev_put; + bo.block = &block->flow_block; - tc_indr_block_ing_cmd(indr_dev, indr_block_cb, FLOW_BLOCK_BIND); - return 0; + down_write(&block->cb_lock); + cb(dev, cb_priv, TC_SETUP_BLOCK, &bo); -err_dev_put: - tc_indr_block_dev_put(indr_dev); - return err; + tcf_block_setup(block, &bo); + up_write(&block->cb_lock); } -EXPORT_SYMBOL_GPL(__tc_indr_block_cb_register); -int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, - tc_indr_block_bind_cb_t *cb, void *cb_ident) +static struct tcf_block *tc_dev_ingress_block(struct net_device *dev) { - int err; - - rtnl_lock(); - err = __tc_indr_block_cb_register(dev, cb_priv, cb, cb_ident); - rtnl_unlock(); + const struct Qdisc_class_ops *cops; + struct Qdisc *qdisc; - return err; -} -EXPORT_SYMBOL_GPL(tc_indr_block_cb_register); + if (!dev_ingress_queue(dev)) + return NULL; -void __tc_indr_block_cb_unregister(struct net_device *dev, - tc_indr_block_bind_cb_t *cb, void *cb_ident) -{ - struct tc_indr_block_cb *indr_block_cb; - struct tc_indr_block_dev *indr_dev; + qdisc = dev_ingress_queue(dev)->qdisc_sleeping; + if (!qdisc) + return NULL; - indr_dev = tc_indr_block_dev_lookup(dev); - if (!indr_dev) - return; + cops = qdisc->ops->cl_ops; + if (!cops) + return NULL; - indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident); - if (!indr_block_cb) - return; + if (!cops->tcf_block) + return NULL; - /* Send unbind message if required to free any block cbs. */ - tc_indr_block_ing_cmd(indr_dev, indr_block_cb, FLOW_BLOCK_UNBIND); - tc_indr_block_cb_del(indr_block_cb); - tc_indr_block_dev_put(indr_dev); + return cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL); } -EXPORT_SYMBOL_GPL(__tc_indr_block_cb_unregister); -void tc_indr_block_cb_unregister(struct net_device *dev, - tc_indr_block_bind_cb_t *cb, void *cb_ident) +static void tc_indr_block_get_and_ing_cmd(struct net_device *dev, + flow_indr_block_bind_cb_t *cb, + void *cb_priv, + enum flow_block_command command) { - rtnl_lock(); - __tc_indr_block_cb_unregister(dev, cb, cb_ident); - rtnl_unlock(); + struct tcf_block *block = tc_dev_ingress_block(dev); + + tc_indr_block_ing_cmd(dev, block, cb, cb_priv, command); } -EXPORT_SYMBOL_GPL(tc_indr_block_cb_unregister); -static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev, +static void tc_indr_block_call(struct tcf_block *block, + struct net_device *dev, struct tcf_block_ext_info *ei, enum flow_block_command command, struct netlink_ext_ack *extack) { - struct tc_indr_block_cb *indr_block_cb; - struct tc_indr_block_dev *indr_dev; struct flow_block_offload bo = { .command = command, .binder_type = ei->binder_type, @@ -783,22 +623,13 @@ static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev, }; INIT_LIST_HEAD(&bo.cb_list); - indr_dev = tc_indr_block_dev_lookup(dev); - if (!indr_dev) - return; - - indr_dev->block = command == FLOW_BLOCK_BIND ? block : NULL; - - list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list) - indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK, - &bo); - + flow_indr_block_call(dev, &bo, command); tcf_block_setup(block, &bo); } static bool tcf_block_offload_in_use(struct tcf_block *block) { - return block->offloadcnt; + return atomic_read(&block->offloadcnt); } static int tcf_block_offload_cmd(struct tcf_block *block, @@ -832,6 +663,7 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q, struct net_device *dev = q->dev_queue->dev; int err; + down_write(&block->cb_lock); if (!dev->netdev_ops->ndo_setup_tc) goto no_offload_dev_inc; @@ -840,24 +672,31 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q, */ if (!tc_can_offload(dev) && tcf_block_offload_in_use(block)) { NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto err_unlock; } err = tcf_block_offload_cmd(block, dev, ei, FLOW_BLOCK_BIND, extack); if (err == -EOPNOTSUPP) goto no_offload_dev_inc; if (err) - return err; + goto err_unlock; tc_indr_block_call(block, dev, ei, FLOW_BLOCK_BIND, extack); + up_write(&block->cb_lock); return 0; no_offload_dev_inc: - if (tcf_block_offload_in_use(block)) - return -EOPNOTSUPP; + if (tcf_block_offload_in_use(block)) { + err = -EOPNOTSUPP; + goto err_unlock; + } + err = 0; block->nooffloaddevcnt++; tc_indr_block_call(block, dev, ei, FLOW_BLOCK_BIND, extack); - return 0; +err_unlock: + up_write(&block->cb_lock); + return err; } static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q, @@ -866,6 +705,7 @@ static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q, struct net_device *dev = q->dev_queue->dev; int err; + down_write(&block->cb_lock); tc_indr_block_call(block, dev, ei, FLOW_BLOCK_UNBIND, NULL); if (!dev->netdev_ops->ndo_setup_tc) @@ -873,10 +713,12 @@ static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q, err = tcf_block_offload_cmd(block, dev, ei, FLOW_BLOCK_UNBIND, NULL); if (err == -EOPNOTSUPP) goto no_offload_dev_dec; + up_write(&block->cb_lock); return; no_offload_dev_dec: WARN_ON(block->nooffloaddevcnt-- == 0); + up_write(&block->cb_lock); } static int @@ -991,6 +833,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q, return ERR_PTR(-ENOMEM); } mutex_init(&block->lock); + init_rwsem(&block->cb_lock); flow_block_init(&block->flow_block); INIT_LIST_HEAD(&block->chain_list); INIT_LIST_HEAD(&block->owner_list); @@ -1526,6 +1369,8 @@ tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb, struct tcf_proto *tp, *tp_prev; int err; + lockdep_assert_held(&block->cb_lock); + for (chain = __tcf_get_next_chain(block, NULL); chain; chain_prev = chain, @@ -1564,6 +1409,8 @@ static int tcf_block_bind(struct tcf_block *block, struct flow_block_cb *block_cb, *next; int err, i = 0; + lockdep_assert_held(&block->cb_lock); + list_for_each_entry(block_cb, &bo->cb_list, list) { err = tcf_block_playback_offloads(block, block_cb->cb, block_cb->cb_priv, true, @@ -1571,6 +1418,8 @@ static int tcf_block_bind(struct tcf_block *block, bo->extack); if (err) goto err_unroll; + if (!bo->unlocked_driver_cb) + block->lockeddevcnt++; i++; } @@ -1586,6 +1435,8 @@ err_unroll: block_cb->cb_priv, false, tcf_block_offload_in_use(block), NULL); + if (!bo->unlocked_driver_cb) + block->lockeddevcnt--; } flow_block_cb_free(block_cb); } @@ -1598,6 +1449,8 @@ static void tcf_block_unbind(struct tcf_block *block, { struct flow_block_cb *block_cb, *next; + lockdep_assert_held(&block->cb_lock); + list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) { tcf_block_playback_offloads(block, block_cb->cb, block_cb->cb_priv, false, @@ -1605,6 +1458,8 @@ static void tcf_block_unbind(struct tcf_block *block, NULL); list_del(&block_cb->list); flow_block_cb_free(block_cb); + if (!bo->unlocked_driver_cb) + block->lockeddevcnt--; } } @@ -1659,6 +1514,18 @@ reclassify: goto reset; } else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) { first_tp = res->goto_tp; + +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + { + struct tc_skb_ext *ext; + + ext = skb_ext_add(skb, TC_SKB_EXT); + if (WARN_ON_ONCE(!ext)) + return TC_ACT_SHOT; + + ext->chain = err & TC_ACT_EXT_VAL_MASK; + } +#endif goto reset; } #endif @@ -3151,17 +3018,61 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts) } EXPORT_SYMBOL(tcf_exts_dump_stats); -int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, - void *type_data, bool err_stop) +static void tcf_block_offload_inc(struct tcf_block *block, u32 *flags) +{ + if (*flags & TCA_CLS_FLAGS_IN_HW) + return; + *flags |= TCA_CLS_FLAGS_IN_HW; + atomic_inc(&block->offloadcnt); +} + +static void tcf_block_offload_dec(struct tcf_block *block, u32 *flags) +{ + if (!(*flags & TCA_CLS_FLAGS_IN_HW)) + return; + *flags &= ~TCA_CLS_FLAGS_IN_HW; + atomic_dec(&block->offloadcnt); +} + +static void tc_cls_offload_cnt_update(struct tcf_block *block, + struct tcf_proto *tp, u32 *cnt, + u32 *flags, u32 diff, bool add) +{ + lockdep_assert_held(&block->cb_lock); + + spin_lock(&tp->lock); + if (add) { + if (!*cnt) + tcf_block_offload_inc(block, flags); + *cnt += diff; + } else { + *cnt -= diff; + if (!*cnt) + tcf_block_offload_dec(block, flags); + } + spin_unlock(&tp->lock); +} + +static void +tc_cls_offload_cnt_reset(struct tcf_block *block, struct tcf_proto *tp, + u32 *cnt, u32 *flags) +{ + lockdep_assert_held(&block->cb_lock); + + spin_lock(&tp->lock); + tcf_block_offload_dec(block, flags); + *cnt = 0; + spin_unlock(&tp->lock); +} + +static int +__tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, + void *type_data, bool err_stop) { struct flow_block_cb *block_cb; int ok_count = 0; int err; - /* Make sure all netdevs sharing this block are offload-capable. */ - if (block->nooffloaddevcnt && err_stop) - return -EOPNOTSUPP; - list_for_each_entry(block_cb, &block->flow_block.cb_list, list) { err = block_cb->cb(type, type_data, block_cb->cb_priv); if (err) { @@ -3173,17 +3084,261 @@ int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, } return ok_count; } + +int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, + void *type_data, bool err_stop, bool rtnl_held) +{ + bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; + int ok_count; + +retry: + if (take_rtnl) + rtnl_lock(); + down_read(&block->cb_lock); + /* Need to obtain rtnl lock if block is bound to devs that require it. + * In block bind code cb_lock is obtained while holding rtnl, so we must + * obtain the locks in same order here. + */ + if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { + up_read(&block->cb_lock); + take_rtnl = true; + goto retry; + } + + ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); + + up_read(&block->cb_lock); + if (take_rtnl) + rtnl_unlock(); + return ok_count; +} EXPORT_SYMBOL(tc_setup_cb_call); +/* Non-destructive filter add. If filter that wasn't already in hardware is + * successfully offloaded, increment block offloads counter. On failure, + * previously offloaded filter is considered to be intact and offloads counter + * is not decremented. + */ + +int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp, + enum tc_setup_type type, void *type_data, bool err_stop, + u32 *flags, unsigned int *in_hw_count, bool rtnl_held) +{ + bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; + int ok_count; + +retry: + if (take_rtnl) + rtnl_lock(); + down_read(&block->cb_lock); + /* Need to obtain rtnl lock if block is bound to devs that require it. + * In block bind code cb_lock is obtained while holding rtnl, so we must + * obtain the locks in same order here. + */ + if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { + up_read(&block->cb_lock); + take_rtnl = true; + goto retry; + } + + /* Make sure all netdevs sharing this block are offload-capable. */ + if (block->nooffloaddevcnt && err_stop) { + ok_count = -EOPNOTSUPP; + goto err_unlock; + } + + ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); + if (ok_count < 0) + goto err_unlock; + + if (tp->ops->hw_add) + tp->ops->hw_add(tp, type_data); + if (ok_count > 0) + tc_cls_offload_cnt_update(block, tp, in_hw_count, flags, + ok_count, true); +err_unlock: + up_read(&block->cb_lock); + if (take_rtnl) + rtnl_unlock(); + return ok_count < 0 ? ok_count : 0; +} +EXPORT_SYMBOL(tc_setup_cb_add); + +/* Destructive filter replace. If filter that wasn't already in hardware is + * successfully offloaded, increment block offload counter. On failure, + * previously offloaded filter is considered to be destroyed and offload counter + * is decremented. + */ + +int tc_setup_cb_replace(struct tcf_block *block, struct tcf_proto *tp, + enum tc_setup_type type, void *type_data, bool err_stop, + u32 *old_flags, unsigned int *old_in_hw_count, + u32 *new_flags, unsigned int *new_in_hw_count, + bool rtnl_held) +{ + bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; + int ok_count; + +retry: + if (take_rtnl) + rtnl_lock(); + down_read(&block->cb_lock); + /* Need to obtain rtnl lock if block is bound to devs that require it. + * In block bind code cb_lock is obtained while holding rtnl, so we must + * obtain the locks in same order here. + */ + if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { + up_read(&block->cb_lock); + take_rtnl = true; + goto retry; + } + + /* Make sure all netdevs sharing this block are offload-capable. */ + if (block->nooffloaddevcnt && err_stop) { + ok_count = -EOPNOTSUPP; + goto err_unlock; + } + + tc_cls_offload_cnt_reset(block, tp, old_in_hw_count, old_flags); + if (tp->ops->hw_del) + tp->ops->hw_del(tp, type_data); + + ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); + if (ok_count < 0) + goto err_unlock; + + if (tp->ops->hw_add) + tp->ops->hw_add(tp, type_data); + if (ok_count > 0) + tc_cls_offload_cnt_update(block, tp, new_in_hw_count, + new_flags, ok_count, true); +err_unlock: + up_read(&block->cb_lock); + if (take_rtnl) + rtnl_unlock(); + return ok_count < 0 ? ok_count : 0; +} +EXPORT_SYMBOL(tc_setup_cb_replace); + +/* Destroy filter and decrement block offload counter, if filter was previously + * offloaded. + */ + +int tc_setup_cb_destroy(struct tcf_block *block, struct tcf_proto *tp, + enum tc_setup_type type, void *type_data, bool err_stop, + u32 *flags, unsigned int *in_hw_count, bool rtnl_held) +{ + bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; + int ok_count; + +retry: + if (take_rtnl) + rtnl_lock(); + down_read(&block->cb_lock); + /* Need to obtain rtnl lock if block is bound to devs that require it. + * In block bind code cb_lock is obtained while holding rtnl, so we must + * obtain the locks in same order here. + */ + if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { + up_read(&block->cb_lock); + take_rtnl = true; + goto retry; + } + + ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); + + tc_cls_offload_cnt_reset(block, tp, in_hw_count, flags); + if (tp->ops->hw_del) + tp->ops->hw_del(tp, type_data); + + up_read(&block->cb_lock); + if (take_rtnl) + rtnl_unlock(); + return ok_count < 0 ? ok_count : 0; +} +EXPORT_SYMBOL(tc_setup_cb_destroy); + +int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp, + bool add, flow_setup_cb_t *cb, + enum tc_setup_type type, void *type_data, + void *cb_priv, u32 *flags, unsigned int *in_hw_count) +{ + int err = cb(type, type_data, cb_priv); + + if (err) { + if (add && tc_skip_sw(*flags)) + return err; + } else { + tc_cls_offload_cnt_update(block, tp, in_hw_count, flags, 1, + add); + } + + return 0; +} +EXPORT_SYMBOL(tc_setup_cb_reoffload); + +void tc_cleanup_flow_action(struct flow_action *flow_action) +{ + struct flow_action_entry *entry; + int i; + + flow_action_for_each(i, entry, flow_action) + if (entry->destructor) + entry->destructor(entry->destructor_priv); +} +EXPORT_SYMBOL(tc_cleanup_flow_action); + +static void tcf_mirred_get_dev(struct flow_action_entry *entry, + const struct tc_action *act) +{ +#ifdef CONFIG_NET_CLS_ACT + entry->dev = act->ops->get_dev(act, &entry->destructor); + if (!entry->dev) + return; + entry->destructor_priv = entry->dev; +#endif +} + +static void tcf_tunnel_encap_put_tunnel(void *priv) +{ + struct ip_tunnel_info *tunnel = priv; + + kfree(tunnel); +} + +static int tcf_tunnel_encap_get_tunnel(struct flow_action_entry *entry, + const struct tc_action *act) +{ + entry->tunnel = tcf_tunnel_info_copy(act); + if (!entry->tunnel) + return -ENOMEM; + entry->destructor = tcf_tunnel_encap_put_tunnel; + entry->destructor_priv = entry->tunnel; + return 0; +} + +static void tcf_sample_get_group(struct flow_action_entry *entry, + const struct tc_action *act) +{ +#ifdef CONFIG_NET_CLS_ACT + entry->sample.psample_group = + act->ops->get_psample_group(act, &entry->destructor); + entry->destructor_priv = entry->sample.psample_group; +#endif +} + int tc_setup_flow_action(struct flow_action *flow_action, - const struct tcf_exts *exts) + const struct tcf_exts *exts, bool rtnl_held) { const struct tc_action *act; - int i, j, k; + int i, j, k, err = 0; if (!exts) return 0; + if (!rtnl_held) + rtnl_lock(); + j = 0; tcf_exts_for_each_action(i, act, exts) { struct flow_action_entry *entry; @@ -3200,10 +3355,16 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->chain_index = tcf_gact_goto_chain_index(act); } else if (is_tcf_mirred_egress_redirect(act)) { entry->id = FLOW_ACTION_REDIRECT; - entry->dev = tcf_mirred_dev(act); + tcf_mirred_get_dev(entry, act); } else if (is_tcf_mirred_egress_mirror(act)) { entry->id = FLOW_ACTION_MIRRED; - entry->dev = tcf_mirred_dev(act); + tcf_mirred_get_dev(entry, act); + } else if (is_tcf_mirred_ingress_redirect(act)) { + entry->id = FLOW_ACTION_REDIRECT_INGRESS; + tcf_mirred_get_dev(entry, act); + } else if (is_tcf_mirred_ingress_mirror(act)) { + entry->id = FLOW_ACTION_MIRRED_INGRESS; + tcf_mirred_get_dev(entry, act); } else if (is_tcf_vlan(act)) { switch (tcf_vlan_action(act)) { case TCA_VLAN_ACT_PUSH: @@ -3222,11 +3383,14 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->vlan.prio = tcf_vlan_push_prio(act); break; default: + err = -EOPNOTSUPP; goto err_out; } } else if (is_tcf_tunnel_set(act)) { entry->id = FLOW_ACTION_TUNNEL_ENCAP; - entry->tunnel = tcf_tunnel_info(act); + err = tcf_tunnel_encap_get_tunnel(entry, act); + if (err) + goto err_out; } else if (is_tcf_tunnel_release(act)) { entry->id = FLOW_ACTION_TUNNEL_DECAP; } else if (is_tcf_pedit(act)) { @@ -3239,6 +3403,7 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->id = FLOW_ACTION_ADD; break; default: + err = -EOPNOTSUPP; goto err_out; } entry->mangle.htype = tcf_pedit_htype(act, k); @@ -3255,11 +3420,10 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->mark = tcf_skbedit_mark(act); } else if (is_tcf_sample(act)) { entry->id = FLOW_ACTION_SAMPLE; - entry->sample.psample_group = - tcf_sample_psample_group(act); entry->sample.trunc_size = tcf_sample_trunc_size(act); entry->sample.truncate = tcf_sample_truncate(act); entry->sample.rate = tcf_sample_rate(act); + tcf_sample_get_group(entry, act); } else if (is_tcf_police(act)) { entry->id = FLOW_ACTION_POLICE; entry->police.burst = tcf_police_tcfp_burst(act); @@ -3269,16 +3433,50 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->id = FLOW_ACTION_CT; entry->ct.action = tcf_ct_action(act); entry->ct.zone = tcf_ct_zone(act); + } else if (is_tcf_mpls(act)) { + switch (tcf_mpls_action(act)) { + case TCA_MPLS_ACT_PUSH: + entry->id = FLOW_ACTION_MPLS_PUSH; + entry->mpls_push.proto = tcf_mpls_proto(act); + entry->mpls_push.label = tcf_mpls_label(act); + entry->mpls_push.tc = tcf_mpls_tc(act); + entry->mpls_push.bos = tcf_mpls_bos(act); + entry->mpls_push.ttl = tcf_mpls_ttl(act); + break; + case TCA_MPLS_ACT_POP: + entry->id = FLOW_ACTION_MPLS_POP; + entry->mpls_pop.proto = tcf_mpls_proto(act); + break; + case TCA_MPLS_ACT_MODIFY: + entry->id = FLOW_ACTION_MPLS_MANGLE; + entry->mpls_mangle.label = tcf_mpls_label(act); + entry->mpls_mangle.tc = tcf_mpls_tc(act); + entry->mpls_mangle.bos = tcf_mpls_bos(act); + entry->mpls_mangle.ttl = tcf_mpls_ttl(act); + break; + default: + goto err_out; + } + } else if (is_tcf_skbedit_ptype(act)) { + entry->id = FLOW_ACTION_PTYPE; + entry->ptype = tcf_skbedit_ptype(act); } else { + err = -EOPNOTSUPP; goto err_out; } if (!is_tcf_pedit(act)) j++; } - return 0; + err_out: - return -EOPNOTSUPP; + if (!rtnl_held) + rtnl_unlock(); + + if (err) + tc_cleanup_flow_action(flow_action); + + return err; } EXPORT_SYMBOL(tc_setup_flow_action); @@ -3321,6 +3519,11 @@ static struct pernet_operations tcf_net_ops = { .size = sizeof(struct tcf_net), }; +static struct flow_indr_block_ing_entry block_ing_entry = { + .cb = tc_indr_block_get_and_ing_cmd, + .list = LIST_HEAD_INIT(block_ing_entry.list), +}; + static int __init tc_filter_init(void) { int err; @@ -3333,10 +3536,7 @@ static int __init tc_filter_init(void) if (err) goto err_register_pernet_subsys; - err = rhashtable_init(&indr_setup_block_ht, - &tc_indr_setup_block_ht_params); - if (err) - goto err_rhash_setup_block_ht; + flow_indr_add_block_ing_cb(&block_ing_entry); rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, RTNL_FLAG_DOIT_UNLOCKED); @@ -3351,8 +3551,6 @@ static int __init tc_filter_init(void) return 0; -err_rhash_setup_block_ht: - unregister_pernet_subsys(&tcf_net_ops); err_register_pernet_subsys: destroy_workqueue(tc_filter_wq); return err; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 3f7a9c02b70c..bf10bdaf5012 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -163,17 +163,19 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, cls_bpf.exts_integrated = obj->exts_integrated; if (oldprog) - tcf_block_offload_dec(block, &oldprog->gen_flags); + err = tc_setup_cb_replace(block, tp, TC_SETUP_CLSBPF, &cls_bpf, + skip_sw, &oldprog->gen_flags, + &oldprog->in_hw_count, + &prog->gen_flags, &prog->in_hw_count, + true); + else + err = tc_setup_cb_add(block, tp, TC_SETUP_CLSBPF, &cls_bpf, + skip_sw, &prog->gen_flags, + &prog->in_hw_count, true); - err = tc_setup_cb_call(block, TC_SETUP_CLSBPF, &cls_bpf, skip_sw); - if (prog) { - if (err < 0) { - cls_bpf_offload_cmd(tp, oldprog, prog, extack); - return err; - } else if (err > 0) { - prog->in_hw_count = err; - tcf_block_offload_inc(block, &prog->gen_flags); - } + if (prog && err) { + cls_bpf_offload_cmd(tp, oldprog, prog, extack); + return err; } if (prog && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW)) @@ -230,7 +232,7 @@ static void cls_bpf_offload_update_stats(struct tcf_proto *tp, cls_bpf.name = prog->bpf_name; cls_bpf.exts_integrated = prog->exts_integrated; - tc_setup_cb_call(block, TC_SETUP_CLSBPF, &cls_bpf, false); + tc_setup_cb_call(block, TC_SETUP_CLSBPF, &cls_bpf, false, true); } static int cls_bpf_init(struct tcf_proto *tp) @@ -673,15 +675,11 @@ static int cls_bpf_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb cls_bpf.name = prog->bpf_name; cls_bpf.exts_integrated = prog->exts_integrated; - err = cb(TC_SETUP_CLSBPF, &cls_bpf, cb_priv); - if (err) { - if (add && tc_skip_sw(prog->gen_flags)) - return err; - continue; - } - - tc_cls_offload_cnt_update(block, &prog->in_hw_count, - &prog->gen_flags, add); + err = tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSBPF, + &cls_bpf, cb_priv, &prog->gen_flags, + &prog->in_hw_count); + if (err) + return err; } return 0; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 054123742e32..74221e3351c3 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -412,41 +412,27 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f, struct tcf_block *block = tp->chain->block; struct flow_cls_offload cls_flower = {}; - if (!rtnl_held) - rtnl_lock(); - tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack); cls_flower.command = FLOW_CLS_DESTROY; cls_flower.cookie = (unsigned long) f; - tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false); - spin_lock(&tp->lock); - list_del_init(&f->hw_list); - tcf_block_offload_dec(block, &f->flags); - spin_unlock(&tp->lock); + tc_setup_cb_destroy(block, tp, TC_SETUP_CLSFLOWER, &cls_flower, false, + &f->flags, &f->in_hw_count, rtnl_held); - if (!rtnl_held) - rtnl_unlock(); } static int fl_hw_replace_filter(struct tcf_proto *tp, struct cls_fl_filter *f, bool rtnl_held, struct netlink_ext_ack *extack) { - struct cls_fl_head *head = fl_head_dereference(tp); struct tcf_block *block = tp->chain->block; struct flow_cls_offload cls_flower = {}; bool skip_sw = tc_skip_sw(f->flags); int err = 0; - if (!rtnl_held) - rtnl_lock(); - cls_flower.rule = flow_rule_alloc(tcf_exts_num_actions(&f->exts)); - if (!cls_flower.rule) { - err = -ENOMEM; - goto errout; - } + if (!cls_flower.rule) + return -ENOMEM; tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack); cls_flower.command = FLOW_CLS_REPLACE; @@ -456,43 +442,31 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, cls_flower.rule->match.key = &f->mkey; cls_flower.classid = f->res.classid; - err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts); + err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts, + rtnl_held); if (err) { kfree(cls_flower.rule); - if (skip_sw) + if (skip_sw) { NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action"); - else - err = 0; - goto errout; + return err; + } + return 0; } - err = tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, skip_sw); + err = tc_setup_cb_add(block, tp, TC_SETUP_CLSFLOWER, &cls_flower, + skip_sw, &f->flags, &f->in_hw_count, rtnl_held); + tc_cleanup_flow_action(&cls_flower.rule->action); kfree(cls_flower.rule); - if (err < 0) { - fl_hw_destroy_filter(tp, f, true, NULL); - goto errout; - } else if (err > 0) { - f->in_hw_count = err; - err = 0; - spin_lock(&tp->lock); - tcf_block_offload_inc(block, &f->flags); - spin_unlock(&tp->lock); - } - - if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW)) { - err = -EINVAL; - goto errout; + if (err) { + fl_hw_destroy_filter(tp, f, rtnl_held, NULL); + return err; } - spin_lock(&tp->lock); - list_add(&f->hw_list, &head->hw_filters); - spin_unlock(&tp->lock); -errout: - if (!rtnl_held) - rtnl_unlock(); + if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW)) + return -EINVAL; - return err; + return 0; } static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f, @@ -501,22 +475,17 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f, struct tcf_block *block = tp->chain->block; struct flow_cls_offload cls_flower = {}; - if (!rtnl_held) - rtnl_lock(); - tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, NULL); cls_flower.command = FLOW_CLS_STATS; cls_flower.cookie = (unsigned long) f; cls_flower.classid = f->res.classid; - tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false); + tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false, + rtnl_held); tcf_exts_stats_update(&f->exts, cls_flower.stats.bytes, cls_flower.stats.pkts, cls_flower.stats.lastused); - - if (!rtnl_held) - rtnl_unlock(); } static void __fl_put(struct cls_fl_filter *f) @@ -1831,7 +1800,8 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, cls_flower.rule->match.mask = &f->mask->key; cls_flower.rule->match.key = &f->mkey; - err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts); + err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts, + true); if (err) { kfree(cls_flower.rule); if (tc_skip_sw(f->flags)) { @@ -1844,21 +1814,17 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, cls_flower.classid = f->res.classid; - err = cb(TC_SETUP_CLSFLOWER, &cls_flower, cb_priv); + err = tc_setup_cb_reoffload(block, tp, add, cb, + TC_SETUP_CLSFLOWER, &cls_flower, + cb_priv, &f->flags, + &f->in_hw_count); + tc_cleanup_flow_action(&cls_flower.rule->action); kfree(cls_flower.rule); if (err) { - if (add && tc_skip_sw(f->flags)) { - __fl_put(f); - return err; - } - goto next_flow; + __fl_put(f); + return err; } - - spin_lock(&tp->lock); - tc_cls_offload_cnt_update(block, &f->in_hw_count, &f->flags, - add); - spin_unlock(&tp->lock); next_flow: __fl_put(f); } @@ -1866,6 +1832,30 @@ next_flow: return 0; } +static void fl_hw_add(struct tcf_proto *tp, void *type_data) +{ + struct flow_cls_offload *cls_flower = type_data; + struct cls_fl_filter *f = + (struct cls_fl_filter *) cls_flower->cookie; + struct cls_fl_head *head = fl_head_dereference(tp); + + spin_lock(&tp->lock); + list_add(&f->hw_list, &head->hw_filters); + spin_unlock(&tp->lock); +} + +static void fl_hw_del(struct tcf_proto *tp, void *type_data) +{ + struct flow_cls_offload *cls_flower = type_data; + struct cls_fl_filter *f = + (struct cls_fl_filter *) cls_flower->cookie; + + spin_lock(&tp->lock); + if (!list_empty(&f->hw_list)) + list_del_init(&f->hw_list); + spin_unlock(&tp->lock); +} + static int fl_hw_create_tmplt(struct tcf_chain *chain, struct fl_flow_tmplt *tmplt) { @@ -1886,7 +1876,7 @@ static int fl_hw_create_tmplt(struct tcf_chain *chain, /* We don't care if driver (any of them) fails to handle this * call. It serves just as a hint for it. */ - tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false); + tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false, true); kfree(cls_flower.rule); return 0; @@ -1902,7 +1892,7 @@ static void fl_hw_destroy_tmplt(struct tcf_chain *chain, cls_flower.command = FLOW_CLS_TMPLT_DESTROY; cls_flower.cookie = (unsigned long) tmplt; - tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false); + tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false, true); } static void *fl_tmplt_create(struct net *net, struct tcf_chain *chain, @@ -2526,6 +2516,8 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = { .delete = fl_delete, .walk = fl_walk, .reoffload = fl_reoffload, + .hw_add = fl_hw_add, + .hw_del = fl_hw_del, .dump = fl_dump, .bind_class = fl_bind_class, .tmplt_create = fl_tmplt_create, diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 455ea2793f9b..7fc2eb62aa98 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -75,8 +75,8 @@ static void mall_destroy_hw_filter(struct tcf_proto *tp, cls_mall.command = TC_CLSMATCHALL_DESTROY; cls_mall.cookie = cookie; - tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false); - tcf_block_offload_dec(block, &head->flags); + tc_setup_cb_destroy(block, tp, TC_SETUP_CLSMATCHALL, &cls_mall, false, + &head->flags, &head->in_hw_count, true); } static int mall_replace_hw_filter(struct tcf_proto *tp, @@ -97,7 +97,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, cls_mall.command = TC_CLSMATCHALL_REPLACE; cls_mall.cookie = cookie; - err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts); + err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts, true); if (err) { kfree(cls_mall.rule); mall_destroy_hw_filter(tp, head, cookie, NULL); @@ -109,15 +109,14 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, return err; } - err = tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, skip_sw); + err = tc_setup_cb_add(block, tp, TC_SETUP_CLSMATCHALL, &cls_mall, + skip_sw, &head->flags, &head->in_hw_count, true); + tc_cleanup_flow_action(&cls_mall.rule->action); kfree(cls_mall.rule); - if (err < 0) { + if (err) { mall_destroy_hw_filter(tp, head, cookie, NULL); return err; - } else if (err > 0) { - head->in_hw_count = err; - tcf_block_offload_inc(block, &head->flags); } if (skip_sw && !(head->flags & TCA_CLS_FLAGS_IN_HW)) @@ -302,7 +301,7 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, TC_CLSMATCHALL_REPLACE : TC_CLSMATCHALL_DESTROY; cls_mall.cookie = (unsigned long)head; - err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts); + err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts, true); if (err) { kfree(cls_mall.rule); if (add && tc_skip_sw(head->flags)) { @@ -312,16 +311,14 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, return 0; } - err = cb(TC_SETUP_CLSMATCHALL, &cls_mall, cb_priv); + err = tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSMATCHALL, + &cls_mall, cb_priv, &head->flags, + &head->in_hw_count); + tc_cleanup_flow_action(&cls_mall.rule->action); kfree(cls_mall.rule); - if (err) { - if (add && tc_skip_sw(head->flags)) - return err; - return 0; - } - - tc_cls_offload_cnt_update(block, &head->in_hw_count, &head->flags, add); + if (err) + return err; return 0; } @@ -337,7 +334,7 @@ static void mall_stats_hw_filter(struct tcf_proto *tp, cls_mall.command = TC_CLSMATCHALL_STATS; cls_mall.cookie = cookie; - tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false); + tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false, true); tcf_exts_stats_update(&head->exts, cls_mall.stats.bytes, cls_mall.stats.pkts, cls_mall.stats.lastused); diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 8614088edd1b..a0e6fac613de 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -480,7 +480,7 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, cls_u32.hnode.handle = h->handle; cls_u32.hnode.prio = h->prio; - tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, false); + tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, false, true); } static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, @@ -498,7 +498,7 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, cls_u32.hnode.handle = h->handle; cls_u32.hnode.prio = h->prio; - err = tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, skip_sw); + err = tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, skip_sw, true); if (err < 0) { u32_clear_hw_hnode(tp, h, NULL); return err; @@ -522,8 +522,8 @@ static void u32_remove_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, cls_u32.command = TC_CLSU32_DELETE_KNODE; cls_u32.knode.handle = n->handle; - tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, false); - tcf_block_offload_dec(block, &n->flags); + tc_setup_cb_destroy(block, tp, TC_SETUP_CLSU32, &cls_u32, false, + &n->flags, &n->in_hw_count, true); } static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, @@ -552,13 +552,11 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, if (n->ht_down) cls_u32.knode.link_handle = ht->handle; - err = tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, skip_sw); - if (err < 0) { + err = tc_setup_cb_add(block, tp, TC_SETUP_CLSU32, &cls_u32, skip_sw, + &n->flags, &n->in_hw_count, true); + if (err) { u32_remove_hw_knode(tp, n, NULL); return err; - } else if (err > 0) { - n->in_hw_count = err; - tcf_block_offload_inc(block, &n->flags); } if (skip_sw && !(n->flags & TCA_CLS_FLAGS_IN_HW)) @@ -1201,14 +1199,11 @@ static int u32_reoffload_knode(struct tcf_proto *tp, struct tc_u_knode *n, cls_u32.knode.link_handle = ht->handle; } - err = cb(TC_SETUP_CLSU32, &cls_u32, cb_priv); - if (err) { - if (add && tc_skip_sw(n->flags)) - return err; - return 0; - } - - tc_cls_offload_cnt_update(block, &n->in_hw_count, &n->flags, add); + err = tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSU32, + &cls_u32, cb_priv, &n->flags, + &n->in_hw_count); + if (err) + return err; return 0; } diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index 810645b5c086..93b58fde99b7 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -299,7 +299,7 @@ static void cbs_set_port_rate(struct net_device *dev, struct cbs_sched_data *q) { struct ethtool_link_ksettings ecmd; int speed = SPEED_10; - int port_rate = -1; + int port_rate; int err; err = __ethtool_get_link_ksettings(dev, &ecmd); diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index d59fbcc745d1..c261c0a18868 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -45,7 +45,6 @@ struct fq_codel_flow { struct sk_buff *tail; struct list_head flowchain; int deficit; - u32 dropped; /* number of drops (or ECN marks) on this flow */ struct codel_vars cvars; }; /* please try to keep this structure <= 64 bytes */ @@ -173,7 +172,8 @@ static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets, __qdisc_drop(skb, to_free); } while (++i < max_packets && len < threshold); - flow->dropped += i; + /* Tell codel to increase its signal strength also */ + flow->cvars.count += i; q->backlogs[idx] -= len; q->memory_usage -= mem; sch->qstats.drops += i; @@ -211,7 +211,6 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch, list_add_tail(&flow->flowchain, &q->new_flows); q->new_flow_count++; flow->deficit = q->quantum; - flow->dropped = 0; } get_codel_cb(skb)->mem_usage = skb->truesize; q->memory_usage += get_codel_cb(skb)->mem_usage; @@ -286,7 +285,6 @@ static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch) struct sk_buff *skb; struct fq_codel_flow *flow; struct list_head *head; - u32 prev_drop_count, prev_ecn_mark; begin: head = &q->new_flows; @@ -303,16 +301,10 @@ begin: goto begin; } - prev_drop_count = q->cstats.drop_count; - prev_ecn_mark = q->cstats.ecn_mark; - skb = codel_dequeue(sch, &sch->qstats.backlog, &q->cparams, &flow->cvars, &q->cstats, qdisc_pkt_len, codel_get_enqueue_time, drop_func, dequeue_func); - flow->dropped += q->cstats.drop_count - prev_drop_count; - flow->dropped += q->cstats.ecn_mark - prev_ecn_mark; - if (!skb) { /* force a pass through old_flows to prevent starvation */ if ((head == &q->new_flows) && !list_empty(&q->old_flows)) @@ -658,7 +650,7 @@ static int fq_codel_dump_class_stats(struct Qdisc *sch, unsigned long cl, sch_tree_unlock(sch); } qs.backlog = q->backlogs[idx]; - qs.drops = flow->dropped; + qs.drops = 0; } if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0) return -1; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index ac28f6a5d70e..17bd8f539bc7 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -985,6 +985,9 @@ static void qdisc_destroy(struct Qdisc *qdisc) void qdisc_put(struct Qdisc *qdisc) { + if (!qdisc) + return; + if (qdisc->flags & TCQ_F_BUILTIN || !refcount_dec_and_test(&qdisc->refcnt)) return; diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 8d8bc2ec5cd6..2f7b34205c82 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -29,8 +29,8 @@ static DEFINE_SPINLOCK(taprio_list_lock); #define TAPRIO_ALL_GATES_OPEN -1 -#define FLAGS_VALID(flags) (!((flags) & ~TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST)) #define TXTIME_ASSIST_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) +#define FULL_OFFLOAD_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD) struct sched_entry { struct list_head list; @@ -75,9 +75,16 @@ struct taprio_sched { struct sched_gate_list __rcu *admin_sched; struct hrtimer advance_timer; struct list_head taprio_list; + struct sk_buff *(*dequeue)(struct Qdisc *sch); + struct sk_buff *(*peek)(struct Qdisc *sch); u32 txtime_delay; }; +struct __tc_taprio_qopt_offload { + refcount_t users; + struct tc_taprio_qopt_offload offload; +}; + static ktime_t sched_base_time(const struct sched_gate_list *sched) { if (!sched) @@ -268,6 +275,19 @@ static bool is_valid_interval(struct sk_buff *skb, struct Qdisc *sch) return entry; } +static bool taprio_flags_valid(u32 flags) +{ + /* Make sure no other flag bits are set. */ + if (flags & ~(TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST | + TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)) + return false; + /* txtime-assist and full offload are mutually exclusive */ + if ((flags & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) && + (flags & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)) + return false; + return true; +} + /* This returns the tstamp value set by TCP in terms of the set clock. */ static ktime_t get_tcp_tstamp(struct taprio_sched *q, struct sk_buff *skb) { @@ -417,7 +437,7 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, return qdisc_enqueue(skb, child, to_free); } -static struct sk_buff *taprio_peek(struct Qdisc *sch) +static struct sk_buff *taprio_peek_soft(struct Qdisc *sch) { struct taprio_sched *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); @@ -461,6 +481,36 @@ static struct sk_buff *taprio_peek(struct Qdisc *sch) return NULL; } +static struct sk_buff *taprio_peek_offload(struct Qdisc *sch) +{ + struct taprio_sched *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + struct sk_buff *skb; + int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct Qdisc *child = q->qdiscs[i]; + + if (unlikely(!child)) + continue; + + skb = child->ops->peek(child); + if (!skb) + continue; + + return skb; + } + + return NULL; +} + +static struct sk_buff *taprio_peek(struct Qdisc *sch) +{ + struct taprio_sched *q = qdisc_priv(sch); + + return q->peek(sch); +} + static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry) { atomic_set(&entry->budget, @@ -468,7 +518,7 @@ static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry) atomic64_read(&q->picos_per_byte))); } -static struct sk_buff *taprio_dequeue(struct Qdisc *sch) +static struct sk_buff *taprio_dequeue_soft(struct Qdisc *sch) { struct taprio_sched *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); @@ -550,6 +600,40 @@ done: return skb; } +static struct sk_buff *taprio_dequeue_offload(struct Qdisc *sch) +{ + struct taprio_sched *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + struct sk_buff *skb; + int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct Qdisc *child = q->qdiscs[i]; + + if (unlikely(!child)) + continue; + + skb = child->ops->dequeue(child); + if (unlikely(!skb)) + continue; + + qdisc_bstats_update(sch, skb); + qdisc_qstats_backlog_dec(sch, skb); + sch->q.qlen--; + + return skb; + } + + return NULL; +} + +static struct sk_buff *taprio_dequeue(struct Qdisc *sch) +{ + struct taprio_sched *q = qdisc_priv(sch); + + return q->dequeue(sch); +} + static bool should_restart_cycle(const struct sched_gate_list *oper, const struct sched_entry *entry) { @@ -672,10 +756,6 @@ static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { [TCA_TAPRIO_SCHED_ENTRY_INTERVAL] = { .type = NLA_U32 }, }; -static const struct nla_policy entry_list_policy[TCA_TAPRIO_SCHED_MAX + 1] = { - [TCA_TAPRIO_SCHED_ENTRY] = { .type = NLA_NESTED }, -}; - static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = { [TCA_TAPRIO_ATTR_PRIOMAP] = { .len = sizeof(struct tc_mqprio_qopt) @@ -936,6 +1016,9 @@ static void taprio_start_sched(struct Qdisc *sch, struct taprio_sched *q = qdisc_priv(sch); ktime_t expires; + if (FULL_OFFLOAD_IS_ENABLED(q->flags)) + return; + expires = hrtimer_get_expires(&q->advance_timer); if (expires == 0) expires = KTIME_MAX; @@ -1015,6 +1098,254 @@ static void setup_txtime(struct taprio_sched *q, } } +static struct tc_taprio_qopt_offload *taprio_offload_alloc(int num_entries) +{ + size_t size = sizeof(struct tc_taprio_sched_entry) * num_entries + + sizeof(struct __tc_taprio_qopt_offload); + struct __tc_taprio_qopt_offload *__offload; + + __offload = kzalloc(size, GFP_KERNEL); + if (!__offload) + return NULL; + + refcount_set(&__offload->users, 1); + + return &__offload->offload; +} + +struct tc_taprio_qopt_offload *taprio_offload_get(struct tc_taprio_qopt_offload + *offload) +{ + struct __tc_taprio_qopt_offload *__offload; + + __offload = container_of(offload, struct __tc_taprio_qopt_offload, + offload); + + refcount_inc(&__offload->users); + + return offload; +} +EXPORT_SYMBOL_GPL(taprio_offload_get); + +void taprio_offload_free(struct tc_taprio_qopt_offload *offload) +{ + struct __tc_taprio_qopt_offload *__offload; + + __offload = container_of(offload, struct __tc_taprio_qopt_offload, + offload); + + if (!refcount_dec_and_test(&__offload->users)) + return; + + kfree(__offload); +} +EXPORT_SYMBOL_GPL(taprio_offload_free); + +/* The function will only serve to keep the pointers to the "oper" and "admin" + * schedules valid in relation to their base times, so when calling dump() the + * users looks at the right schedules. + * When using full offload, the admin configuration is promoted to oper at the + * base_time in the PHC time domain. But because the system time is not + * necessarily in sync with that, we can't just trigger a hrtimer to call + * switch_schedules at the right hardware time. + * At the moment we call this by hand right away from taprio, but in the future + * it will be useful to create a mechanism for drivers to notify taprio of the + * offload state (PENDING, ACTIVE, INACTIVE) so it can be visible in dump(). + * This is left as TODO. + */ +void taprio_offload_config_changed(struct taprio_sched *q) +{ + struct sched_gate_list *oper, *admin; + + spin_lock(&q->current_entry_lock); + + oper = rcu_dereference_protected(q->oper_sched, + lockdep_is_held(&q->current_entry_lock)); + admin = rcu_dereference_protected(q->admin_sched, + lockdep_is_held(&q->current_entry_lock)); + + switch_schedules(q, &admin, &oper); + + spin_unlock(&q->current_entry_lock); +} + +static void taprio_sched_to_offload(struct taprio_sched *q, + struct sched_gate_list *sched, + const struct tc_mqprio_qopt *mqprio, + struct tc_taprio_qopt_offload *offload) +{ + struct sched_entry *entry; + int i = 0; + + offload->base_time = sched->base_time; + offload->cycle_time = sched->cycle_time; + offload->cycle_time_extension = sched->cycle_time_extension; + + list_for_each_entry(entry, &sched->entries, list) { + struct tc_taprio_sched_entry *e = &offload->entries[i]; + + e->command = entry->command; + e->interval = entry->interval; + e->gate_mask = entry->gate_mask; + i++; + } + + offload->num_entries = i; +} + +static int taprio_enable_offload(struct net_device *dev, + struct tc_mqprio_qopt *mqprio, + struct taprio_sched *q, + struct sched_gate_list *sched, + struct netlink_ext_ack *extack) +{ + const struct net_device_ops *ops = dev->netdev_ops; + struct tc_taprio_qopt_offload *offload; + int err = 0; + + if (!ops->ndo_setup_tc) { + NL_SET_ERR_MSG(extack, + "Device does not support taprio offload"); + return -EOPNOTSUPP; + } + + offload = taprio_offload_alloc(sched->num_entries); + if (!offload) { + NL_SET_ERR_MSG(extack, + "Not enough memory for enabling offload mode"); + return -ENOMEM; + } + offload->enable = 1; + taprio_sched_to_offload(q, sched, mqprio, offload); + + err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload); + if (err < 0) { + NL_SET_ERR_MSG(extack, + "Device failed to setup taprio offload"); + goto done; + } + + taprio_offload_config_changed(q); + +done: + taprio_offload_free(offload); + + return err; +} + +static int taprio_disable_offload(struct net_device *dev, + struct taprio_sched *q, + struct netlink_ext_ack *extack) +{ + const struct net_device_ops *ops = dev->netdev_ops; + struct tc_taprio_qopt_offload *offload; + int err; + + if (!FULL_OFFLOAD_IS_ENABLED(q->flags)) + return 0; + + if (!ops->ndo_setup_tc) + return -EOPNOTSUPP; + + offload = taprio_offload_alloc(0); + if (!offload) { + NL_SET_ERR_MSG(extack, + "Not enough memory to disable offload mode"); + return -ENOMEM; + } + offload->enable = 0; + + err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload); + if (err < 0) { + NL_SET_ERR_MSG(extack, + "Device failed to disable offload"); + goto out; + } + +out: + taprio_offload_free(offload); + + return err; +} + +/* If full offload is enabled, the only possible clockid is the net device's + * PHC. For that reason, specifying a clockid through netlink is incorrect. + * For txtime-assist, it is implicitly assumed that the device's PHC is kept + * in sync with the specified clockid via a user space daemon such as phc2sys. + * For both software taprio and txtime-assist, the clockid is used for the + * hrtimer that advances the schedule and hence mandatory. + */ +static int taprio_parse_clockid(struct Qdisc *sch, struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct taprio_sched *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + int err = -EINVAL; + + if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { + const struct ethtool_ops *ops = dev->ethtool_ops; + struct ethtool_ts_info info = { + .cmd = ETHTOOL_GET_TS_INFO, + .phc_index = -1, + }; + + if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) { + NL_SET_ERR_MSG(extack, + "The 'clockid' cannot be specified for full offload"); + goto out; + } + + if (ops && ops->get_ts_info) + err = ops->get_ts_info(dev, &info); + + if (err || info.phc_index < 0) { + NL_SET_ERR_MSG(extack, + "Device does not have a PTP clock"); + err = -ENOTSUPP; + goto out; + } + } else if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) { + int clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]); + + /* We only support static clockids and we don't allow + * for it to be modified after the first init. + */ + if (clockid < 0 || + (q->clockid != -1 && q->clockid != clockid)) { + NL_SET_ERR_MSG(extack, + "Changing the 'clockid' of a running schedule is not supported"); + err = -ENOTSUPP; + goto out; + } + + switch (clockid) { + case CLOCK_REALTIME: + q->tk_offset = TK_OFFS_REAL; + break; + case CLOCK_MONOTONIC: + q->tk_offset = TK_OFFS_MAX; + break; + case CLOCK_BOOTTIME: + q->tk_offset = TK_OFFS_BOOT; + break; + case CLOCK_TAI: + q->tk_offset = TK_OFFS_TAI; + break; + default: + NL_SET_ERR_MSG(extack, "Invalid 'clockid'"); + err = -EINVAL; + goto out; + } + + q->clockid = clockid; + } else { + NL_SET_ERR_MSG(extack, "Specifying a 'clockid' is mandatory"); + goto out; + } +out: + return err; +} + static int taprio_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { @@ -1024,9 +1355,9 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, struct net_device *dev = qdisc_dev(sch); struct tc_mqprio_qopt *mqprio = NULL; u32 taprio_flags = 0; - int i, err, clockid; unsigned long flags; ktime_t start; + int i, err; err = nla_parse_nested_deprecated(tb, TCA_TAPRIO_ATTR_MAX, opt, taprio_policy, extack); @@ -1042,7 +1373,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, if (q->flags != 0 && q->flags != taprio_flags) { NL_SET_ERR_MSG_MOD(extack, "Changing 'flags' of a running schedule is not supported"); return -EOPNOTSUPP; - } else if (!FLAGS_VALID(taprio_flags)) { + } else if (!taprio_flags_valid(taprio_flags)) { NL_SET_ERR_MSG_MOD(extack, "Specified 'flags' are not valid"); return -EINVAL; } @@ -1082,30 +1413,19 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, goto free_sched; } - if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) { - clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]); - - /* We only support static clockids and we don't allow - * for it to be modified after the first init. - */ - if (clockid < 0 || - (q->clockid != -1 && q->clockid != clockid)) { - NL_SET_ERR_MSG(extack, "Changing the 'clockid' of a running schedule is not supported"); - err = -ENOTSUPP; - goto free_sched; - } - - q->clockid = clockid; - } - - if (q->clockid == -1 && !tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) { - NL_SET_ERR_MSG(extack, "Specifying a 'clockid' is mandatory"); - err = -EINVAL; + err = taprio_parse_clockid(sch, tb, extack); + if (err < 0) goto free_sched; - } taprio_set_picos_per_byte(dev, q); + if (FULL_OFFLOAD_IS_ENABLED(taprio_flags)) + err = taprio_enable_offload(dev, mqprio, q, new_admin, extack); + else + err = taprio_disable_offload(dev, q, extack); + if (err) + goto free_sched; + /* Protects against enqueue()/dequeue() */ spin_lock_bh(qdisc_lock(sch)); @@ -1120,6 +1440,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, } if (!TXTIME_ASSIST_IS_ENABLED(taprio_flags) && + !FULL_OFFLOAD_IS_ENABLED(taprio_flags) && !hrtimer_active(&q->advance_timer)) { hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS); q->advance_timer.function = advance_sched; @@ -1138,23 +1459,15 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, mqprio->prio_tc_map[i]); } - switch (q->clockid) { - case CLOCK_REALTIME: - q->tk_offset = TK_OFFS_REAL; - break; - case CLOCK_MONOTONIC: - q->tk_offset = TK_OFFS_MAX; - break; - case CLOCK_BOOTTIME: - q->tk_offset = TK_OFFS_BOOT; - break; - case CLOCK_TAI: - q->tk_offset = TK_OFFS_TAI; - break; - default: - NL_SET_ERR_MSG(extack, "Invalid 'clockid'"); - err = -EINVAL; - goto unlock; + if (FULL_OFFLOAD_IS_ENABLED(taprio_flags)) { + q->dequeue = taprio_dequeue_offload; + q->peek = taprio_peek_offload; + } else { + /* Be sure to always keep the function pointers + * in a consistent state. + */ + q->dequeue = taprio_dequeue_soft; + q->peek = taprio_peek_soft; } err = taprio_get_start_time(sch, new_admin, &start); @@ -1216,6 +1529,8 @@ static void taprio_destroy(struct Qdisc *sch) hrtimer_cancel(&q->advance_timer); + taprio_disable_offload(dev, q, NULL); + if (q->qdiscs) { for (i = 0; i < dev->num_tx_queues && q->qdiscs[i]; i++) qdisc_put(q->qdiscs[i]); @@ -1245,6 +1560,9 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt, hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS); q->advance_timer.function = advance_sched; + q->dequeue = taprio_dequeue_soft; + q->peek = taprio_peek_soft; + q->root = sch; /* We only support static clockids. Use an invalid value as default @@ -1427,7 +1745,8 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) if (nla_put(skb, TCA_TAPRIO_ATTR_PRIOMAP, sizeof(opt), &opt)) goto options_error; - if (nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid)) + if (!FULL_OFFLOAD_IS_ENABLED(q->flags) && + nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid)) goto options_error; if (q->flags && nla_put_u32(skb, TCA_TAPRIO_ATTR_FLAGS, q->flags)) diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 5010cce52c93..d2ffc9a0ba3a 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -54,7 +54,6 @@ static struct sctp_association *sctp_association_init( const struct sock *sk, enum sctp_scope scope, gfp_t gfp) { - struct net *net = sock_net(sk); struct sctp_sock *sp; struct sctp_paramhdr *p; int i; @@ -214,14 +213,6 @@ static struct sctp_association *sctp_association_init( asoc->peer.sack_needed = 1; asoc->peer.sack_generation = 1; - /* Assume that the peer will tell us if he recognizes ASCONF - * as part of INIT exchange. - * The sctp_addip_noauth option is there for backward compatibility - * and will revert old behavior. - */ - if (net->sctp.addip_noauth) - asoc->peer.asconf_capable = 1; - /* Create an input queue. */ sctp_inq_init(&asoc->base.inqueue); sctp_inq_set_th_handler(&asoc->base.inqueue, sctp_assoc_bh_rcv); diff --git a/net/sctp/auth.c b/net/sctp/auth.c index de4c78d4a21e..4278764d82b8 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -389,7 +389,7 @@ int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp) /* If we don't support AUTH, or peer is not capable * we don't need to do anything. */ - if (!asoc->ep->auth_enable || !asoc->peer.auth_capable) + if (!asoc->peer.auth_capable) return 0; /* If the key_id is non-zero and we couldn't find an @@ -675,7 +675,7 @@ int sctp_auth_send_cid(enum sctp_cid chunk, const struct sctp_association *asoc) if (!asoc) return 0; - if (!asoc->ep->auth_enable || !asoc->peer.auth_capable) + if (!asoc->peer.auth_capable) return 0; return __sctp_auth_cid(chunk, asoc->peer.peer_chunks); @@ -687,7 +687,7 @@ int sctp_auth_recv_cid(enum sctp_cid chunk, const struct sctp_association *asoc) if (!asoc) return 0; - if (!asoc->ep->auth_enable) + if (!asoc->peer.auth_capable) return 0; return __sctp_auth_cid(chunk, @@ -831,10 +831,15 @@ int sctp_auth_set_key(struct sctp_endpoint *ep, /* Try to find the given key id to see if * we are doing a replace, or adding a new key */ - if (asoc) + if (asoc) { + if (!asoc->peer.auth_capable) + return -EACCES; sh_keys = &asoc->endpoint_shared_keys; - else + } else { + if (!ep->auth_enable) + return -EACCES; sh_keys = &ep->endpoint_shared_keys; + } key_for_each(shkey, sh_keys) { if (shkey->key_id == auth_key->sca_keynumber) { @@ -875,10 +880,15 @@ int sctp_auth_set_active_key(struct sctp_endpoint *ep, int found = 0; /* The key identifier MUST correst to an existing key */ - if (asoc) + if (asoc) { + if (!asoc->peer.auth_capable) + return -EACCES; sh_keys = &asoc->endpoint_shared_keys; - else + } else { + if (!ep->auth_enable) + return -EACCES; sh_keys = &ep->endpoint_shared_keys; + } key_for_each(key, sh_keys) { if (key->key_id == key_id) { @@ -911,11 +921,15 @@ int sctp_auth_del_key_id(struct sctp_endpoint *ep, * The key identifier MUST correst to an existing key */ if (asoc) { + if (!asoc->peer.auth_capable) + return -EACCES; if (asoc->active_key_id == key_id) return -EINVAL; sh_keys = &asoc->endpoint_shared_keys; } else { + if (!ep->auth_enable) + return -EACCES; if (ep->active_key_id == key_id) return -EINVAL; @@ -950,11 +964,15 @@ int sctp_auth_deact_key_id(struct sctp_endpoint *ep, * The key identifier MUST correst to an existing key */ if (asoc) { + if (!asoc->peer.auth_capable) + return -EACCES; if (asoc->active_key_id == key_id) return -EINVAL; sh_keys = &asoc->endpoint_shared_keys; } else { + if (!ep->auth_enable) + return -EACCES; if (ep->active_key_id == key_id) return -EINVAL; @@ -989,3 +1007,72 @@ int sctp_auth_deact_key_id(struct sctp_endpoint *ep, return 0; } + +int sctp_auth_init(struct sctp_endpoint *ep, gfp_t gfp) +{ + int err = -ENOMEM; + + /* Allocate space for HMACS and CHUNKS authentication + * variables. There are arrays that we encode directly + * into parameters to make the rest of the operations easier. + */ + if (!ep->auth_hmacs_list) { + struct sctp_hmac_algo_param *auth_hmacs; + + auth_hmacs = kzalloc(struct_size(auth_hmacs, hmac_ids, + SCTP_AUTH_NUM_HMACS), gfp); + if (!auth_hmacs) + goto nomem; + /* Initialize the HMACS parameter. + * SCTP-AUTH: Section 3.3 + * Every endpoint supporting SCTP chunk authentication MUST + * support the HMAC based on the SHA-1 algorithm. + */ + auth_hmacs->param_hdr.type = SCTP_PARAM_HMAC_ALGO; + auth_hmacs->param_hdr.length = + htons(sizeof(struct sctp_paramhdr) + 2); + auth_hmacs->hmac_ids[0] = htons(SCTP_AUTH_HMAC_ID_SHA1); + ep->auth_hmacs_list = auth_hmacs; + } + + if (!ep->auth_chunk_list) { + struct sctp_chunks_param *auth_chunks; + + auth_chunks = kzalloc(sizeof(*auth_chunks) + + SCTP_NUM_CHUNK_TYPES, gfp); + if (!auth_chunks) + goto nomem; + /* Initialize the CHUNKS parameter */ + auth_chunks->param_hdr.type = SCTP_PARAM_CHUNKS; + auth_chunks->param_hdr.length = + htons(sizeof(struct sctp_paramhdr)); + ep->auth_chunk_list = auth_chunks; + } + + /* Allocate and initialize transorms arrays for supported + * HMACs. + */ + err = sctp_auth_init_hmacs(ep, gfp); + if (err) + goto nomem; + + return 0; + +nomem: + /* Free all allocations */ + kfree(ep->auth_hmacs_list); + kfree(ep->auth_chunk_list); + ep->auth_hmacs_list = NULL; + ep->auth_chunk_list = NULL; + return err; +} + +void sctp_auth_free(struct sctp_endpoint *ep) +{ + kfree(ep->auth_hmacs_list); + kfree(ep->auth_chunk_list); + ep->auth_hmacs_list = NULL; + ep->auth_chunk_list = NULL; + sctp_auth_destroy_hmacs(ep->auth_hmacs); + ep->auth_hmacs = NULL; +} diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 69cebb2c998b..ea53049d1db6 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -43,62 +43,21 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, gfp_t gfp) { struct net *net = sock_net(sk); - struct sctp_hmac_algo_param *auth_hmacs = NULL; - struct sctp_chunks_param *auth_chunks = NULL; struct sctp_shared_key *null_key; - int err; ep->digest = kzalloc(SCTP_SIGNATURE_SIZE, gfp); if (!ep->digest) return NULL; + ep->asconf_enable = net->sctp.addip_enable; ep->auth_enable = net->sctp.auth_enable; if (ep->auth_enable) { - /* Allocate space for HMACS and CHUNKS authentication - * variables. There are arrays that we encode directly - * into parameters to make the rest of the operations easier. - */ - auth_hmacs = kzalloc(struct_size(auth_hmacs, hmac_ids, - SCTP_AUTH_NUM_HMACS), gfp); - if (!auth_hmacs) - goto nomem; - - auth_chunks = kzalloc(sizeof(*auth_chunks) + - SCTP_NUM_CHUNK_TYPES, gfp); - if (!auth_chunks) + if (sctp_auth_init(ep, gfp)) goto nomem; - - /* Initialize the HMACS parameter. - * SCTP-AUTH: Section 3.3 - * Every endpoint supporting SCTP chunk authentication MUST - * support the HMAC based on the SHA-1 algorithm. - */ - auth_hmacs->param_hdr.type = SCTP_PARAM_HMAC_ALGO; - auth_hmacs->param_hdr.length = - htons(sizeof(struct sctp_paramhdr) + 2); - auth_hmacs->hmac_ids[0] = htons(SCTP_AUTH_HMAC_ID_SHA1); - - /* Initialize the CHUNKS parameter */ - auth_chunks->param_hdr.type = SCTP_PARAM_CHUNKS; - auth_chunks->param_hdr.length = - htons(sizeof(struct sctp_paramhdr)); - - /* If the Add-IP functionality is enabled, we must - * authenticate, ASCONF and ASCONF-ACK chunks - */ - if (net->sctp.addip_enable) { - auth_chunks->chunks[0] = SCTP_CID_ASCONF; - auth_chunks->chunks[1] = SCTP_CID_ASCONF_ACK; - auth_chunks->param_hdr.length = - htons(sizeof(struct sctp_paramhdr) + 2); + if (ep->asconf_enable) { + sctp_auth_ep_add_chunkid(ep, SCTP_CID_ASCONF); + sctp_auth_ep_add_chunkid(ep, SCTP_CID_ASCONF_ACK); } - - /* Allocate and initialize transorms arrays for supported - * HMACs. - */ - err = sctp_auth_init_hmacs(ep, gfp); - if (err) - goto nomem; } /* Initialize the base structure. */ @@ -145,10 +104,9 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, /* Add the null key to the endpoint shared keys list and * set the hmcas and chunks pointers. */ - ep->auth_hmacs_list = auth_hmacs; - ep->auth_chunk_list = auth_chunks; ep->prsctp_enable = net->sctp.prsctp_enable; ep->reconf_enable = net->sctp.reconf_enable; + ep->ecn_enable = net->sctp.ecn_enable; /* Remember who we are attached to. */ ep->base.sk = sk; @@ -157,11 +115,8 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, return ep; nomem_shkey: - sctp_auth_destroy_hmacs(ep->auth_hmacs); + sctp_auth_free(ep); nomem: - /* Free all allocations */ - kfree(auth_hmacs); - kfree(auth_chunks); kfree(ep->digest); return NULL; @@ -244,11 +199,7 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) * chunks and hmacs arrays that were allocated */ sctp_auth_destroy_keys(&ep->endpoint_shared_keys); - kfree(ep->auth_hmacs_list); - kfree(ep->auth_chunk_list); - - /* AUTH - Free any allocated HMAC transform containers */ - sctp_auth_destroy_hmacs(ep->auth_hmacs); + sctp_auth_free(ep); /* Cleanup. */ sctp_inq_free(&ep->base.inqueue); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 53746ffeeca3..08d14d86ecfb 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1254,6 +1254,9 @@ static int __net_init sctp_defaults_init(struct net *net) /* Disable AUTH by default. */ net->sctp.auth_enable = 0; + /* Enable ECN by default. */ + net->sctp.ecn_enable = 1; + /* Set SCOPE policy to enabled */ net->sctp.scope_policy = SCTP_SCOPE_POLICY_ENABLE; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 36bd8a6e82df..e41ed2e0ae7d 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -207,7 +207,6 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, const struct sctp_bind_addr *bp, gfp_t gfp, int vparam_len) { - struct net *net = sock_net(asoc->base.sk); struct sctp_supported_ext_param ext_param; struct sctp_adaptation_ind_param aiparam; struct sctp_paramhdr *auth_chunks = NULL; @@ -245,7 +244,9 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, chunksize = sizeof(init) + addrs_len; chunksize += SCTP_PAD4(SCTP_SAT_LEN(num_types)); - chunksize += sizeof(ecap_param); + + if (asoc->ep->ecn_enable) + chunksize += sizeof(ecap_param); if (asoc->ep->prsctp_enable) chunksize += sizeof(prsctp_param); @@ -255,7 +256,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, * the ASCONF,the ASCONF-ACK, and the AUTH chunks in its INIT and * INIT-ACK parameters. */ - if (net->sctp.addip_enable) { + if (asoc->ep->asconf_enable) { extensions[num_ext] = SCTP_CID_ASCONF; extensions[num_ext+1] = SCTP_CID_ASCONF_ACK; num_ext += 2; @@ -336,7 +337,8 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, sctp_addto_chunk(retval, sizeof(sat), &sat); sctp_addto_chunk(retval, num_types * sizeof(__u16), &types); - sctp_addto_chunk(retval, sizeof(ecap_param), &ecap_param); + if (asoc->ep->ecn_enable) + sctp_addto_chunk(retval, sizeof(ecap_param), &ecap_param); /* Add the supported extensions parameter. Be nice and add this * fist before addiding the parameters for the extensions themselves @@ -1964,7 +1966,9 @@ static int sctp_process_hn_param(const struct sctp_association *asoc, return 0; } -static int sctp_verify_ext_param(struct net *net, union sctp_params param) +static int sctp_verify_ext_param(struct net *net, + const struct sctp_endpoint *ep, + union sctp_params param) { __u16 num_ext = ntohs(param.p->length) - sizeof(struct sctp_paramhdr); int have_asconf = 0; @@ -1991,7 +1995,7 @@ static int sctp_verify_ext_param(struct net *net, union sctp_params param) if (net->sctp.addip_noauth) return 1; - if (net->sctp.addip_enable && !have_auth && have_asconf) + if (ep->asconf_enable && !have_auth && have_asconf) return 0; return 1; @@ -2001,7 +2005,6 @@ static void sctp_process_ext_param(struct sctp_association *asoc, union sctp_params param) { __u16 num_ext = ntohs(param.p->length) - sizeof(struct sctp_paramhdr); - struct net *net = sock_net(asoc->base.sk); int i; for (i = 0; i < num_ext; i++) { @@ -2023,7 +2026,7 @@ static void sctp_process_ext_param(struct sctp_association *asoc, break; case SCTP_CID_ASCONF: case SCTP_CID_ASCONF_ACK: - if (net->sctp.addip_enable) + if (asoc->ep->asconf_enable) asoc->peer.asconf_capable = 1; break; case SCTP_CID_I_DATA: @@ -2145,12 +2148,12 @@ static enum sctp_ierror sctp_verify_param(struct net *net, break; case SCTP_PARAM_SUPPORTED_EXT: - if (!sctp_verify_ext_param(net, param)) + if (!sctp_verify_ext_param(net, ep, param)) return SCTP_IERROR_ABORT; break; case SCTP_PARAM_SET_PRIMARY: - if (net->sctp.addip_enable) + if (ep->asconf_enable) break; goto fallthrough; @@ -2597,15 +2600,20 @@ do_addr_param: break; case SCTP_PARAM_ECN_CAPABLE: - asoc->peer.ecn_capable = 1; - break; + if (asoc->ep->ecn_enable) { + asoc->peer.ecn_capable = 1; + break; + } + /* Fall Through */ + goto fall_through; + case SCTP_PARAM_ADAPTATION_LAYER_IND: asoc->peer.adaptation_ind = ntohl(param.aind->adaptation_ind); break; case SCTP_PARAM_SET_PRIMARY: - if (!net->sctp.addip_enable) + if (!ep->asconf_enable) goto fall_through; addr_param = param.v + sizeof(struct sctp_addip_param); diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 2c244b29a199..0c21c52fc408 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3721,7 +3721,8 @@ enum sctp_disposition sctp_sf_do_asconf(struct net *net, * is received unauthenticated it MUST be silently discarded as * described in [I-D.ietf-tsvwg-sctp-auth]. */ - if (!net->sctp.addip_noauth && !chunk->auth) + if (!asoc->peer.asconf_capable || + (!net->sctp.addip_noauth && !chunk->auth)) return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); @@ -3863,7 +3864,8 @@ enum sctp_disposition sctp_sf_do_asconf_ack(struct net *net, * is received unauthenticated it MUST be silently discarded as * described in [I-D.ietf-tsvwg-sctp-auth]. */ - if (!net->sctp.addip_noauth && !asconf_ack->auth) + if (!asoc->peer.asconf_capable || + (!net->sctp.addip_noauth && !asconf_ack->auth)) return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c index 61ed9c6e3be3..88ea87f4f0e7 100644 --- a/net/sctp/sm_statetable.c +++ b/net/sctp/sm_statetable.c @@ -976,26 +976,22 @@ static const struct sctp_sm_table_entry *sctp_chunk_event_lookup( if (cid <= SCTP_CID_BASE_MAX) return &chunk_event_table[cid][state]; - if (net->sctp.prsctp_enable) { - if (cid == SCTP_CID_FWD_TSN || cid == SCTP_CID_I_FWD_TSN) - return &prsctp_chunk_event_table[0][state]; - } + switch ((u16)cid) { + case SCTP_CID_FWD_TSN: + case SCTP_CID_I_FWD_TSN: + return &prsctp_chunk_event_table[0][state]; - if (net->sctp.addip_enable) { - if (cid == SCTP_CID_ASCONF) - return &addip_chunk_event_table[0][state]; + case SCTP_CID_ASCONF: + return &addip_chunk_event_table[0][state]; - if (cid == SCTP_CID_ASCONF_ACK) - return &addip_chunk_event_table[1][state]; - } + case SCTP_CID_ASCONF_ACK: + return &addip_chunk_event_table[1][state]; - if (net->sctp.reconf_enable) - if (cid == SCTP_CID_RECONF) - return &reconf_chunk_event_table[0][state]; + case SCTP_CID_RECONF: + return &reconf_chunk_event_table[0][state]; - if (net->sctp.auth_enable) { - if (cid == SCTP_CID_AUTH) - return &auth_chunk_event_table[0][state]; + case SCTP_CID_AUTH: + return &auth_chunk_event_table[0][state]; } return &chunk_event_table_unknown[state]; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index b083d4e66230..939b8d2595bc 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -525,7 +525,6 @@ static int sctp_send_asconf_add_ip(struct sock *sk, struct sockaddr *addrs, int addrcnt) { - struct net *net = sock_net(sk); struct sctp_sock *sp; struct sctp_endpoint *ep; struct sctp_association *asoc; @@ -540,12 +539,12 @@ static int sctp_send_asconf_add_ip(struct sock *sk, int i; int retval = 0; - if (!net->sctp.addip_enable) - return retval; - sp = sctp_sk(sk); ep = sp->ep; + if (!ep->asconf_enable) + return retval; + pr_debug("%s: sk:%p, addrs:%p, addrcnt:%d\n", __func__, sk, addrs, addrcnt); @@ -728,7 +727,6 @@ static int sctp_send_asconf_del_ip(struct sock *sk, struct sockaddr *addrs, int addrcnt) { - struct net *net = sock_net(sk); struct sctp_sock *sp; struct sctp_endpoint *ep; struct sctp_association *asoc; @@ -744,12 +742,12 @@ static int sctp_send_asconf_del_ip(struct sock *sk, int stored = 0; chunk = NULL; - if (!net->sctp.addip_enable) - return retval; - sp = sctp_sk(sk); ep = sp->ep; + if (!ep->asconf_enable) + return retval; + pr_debug("%s: sk:%p, addrs:%p, addrcnt:%d\n", __func__, sk, addrs, addrcnt); @@ -1045,158 +1043,161 @@ out: return err; } -/* __sctp_connect(struct sock* sk, struct sockaddr *kaddrs, int addrs_size) - * - * Common routine for handling connect() and sctp_connectx(). - * Connect will come in with just a single address. - */ -static int __sctp_connect(struct sock *sk, - struct sockaddr *kaddrs, - int addrs_size, int flags, - sctp_assoc_t *assoc_id) +static int sctp_connect_new_asoc(struct sctp_endpoint *ep, + const union sctp_addr *daddr, + const struct sctp_initmsg *init, + struct sctp_transport **tp) { + struct sctp_association *asoc; + struct sock *sk = ep->base.sk; struct net *net = sock_net(sk); - struct sctp_sock *sp; - struct sctp_endpoint *ep; - struct sctp_association *asoc = NULL; - struct sctp_association *asoc2; - struct sctp_transport *transport; - union sctp_addr to; enum sctp_scope scope; - long timeo; - int err = 0; - int addrcnt = 0; - int walk_size = 0; - union sctp_addr *sa_addr = NULL; - void *addr_buf; - unsigned short port; + int err; - sp = sctp_sk(sk); - ep = sp->ep; + if (sctp_endpoint_is_peeled_off(ep, daddr)) + return -EADDRNOTAVAIL; - /* connect() cannot be done on a socket that is already in ESTABLISHED - * state - UDP-style peeled off socket or a TCP-style socket that - * is already connected. - * It cannot be done even on a TCP-style listening socket. - */ - if (sctp_sstate(sk, ESTABLISHED) || sctp_sstate(sk, CLOSING) || - (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING))) { - err = -EISCONN; - goto out_free; + if (!ep->base.bind_addr.port) { + if (sctp_autobind(sk)) + return -EAGAIN; + } else { + if (ep->base.bind_addr.port < inet_prot_sock(net) && + !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) + return -EACCES; } - /* Walk through the addrs buffer and count the number of addresses. */ - addr_buf = kaddrs; - while (walk_size < addrs_size) { - struct sctp_af *af; - - if (walk_size + sizeof(sa_family_t) > addrs_size) { - err = -EINVAL; - goto out_free; - } + scope = sctp_scope(daddr); + asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL); + if (!asoc) + return -ENOMEM; - sa_addr = addr_buf; - af = sctp_get_af_specific(sa_addr->sa.sa_family); + err = sctp_assoc_set_bind_addr_from_ep(asoc, scope, GFP_KERNEL); + if (err < 0) + goto free; - /* If the address family is not supported or if this address - * causes the address buffer to overflow return EINVAL. - */ - if (!af || (walk_size + af->sockaddr_len) > addrs_size) { - err = -EINVAL; - goto out_free; - } + *tp = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL, SCTP_UNKNOWN); + if (!*tp) { + err = -ENOMEM; + goto free; + } - port = ntohs(sa_addr->v4.sin_port); + if (!init) + return 0; - /* Save current address so we can work with it */ - memcpy(&to, sa_addr, af->sockaddr_len); + if (init->sinit_num_ostreams) { + __u16 outcnt = init->sinit_num_ostreams; - err = sctp_verify_addr(sk, &to, af->sockaddr_len); + asoc->c.sinit_num_ostreams = outcnt; + /* outcnt has been changed, need to re-init stream */ + err = sctp_stream_init(&asoc->stream, outcnt, 0, GFP_KERNEL); if (err) - goto out_free; + goto free; + } - /* Make sure the destination port is correctly set - * in all addresses. - */ - if (asoc && asoc->peer.port && asoc->peer.port != port) { - err = -EINVAL; - goto out_free; - } + if (init->sinit_max_instreams) + asoc->c.sinit_max_instreams = init->sinit_max_instreams; - /* Check if there already is a matching association on the - * endpoint (other than the one created here). - */ - asoc2 = sctp_endpoint_lookup_assoc(ep, &to, &transport); - if (asoc2 && asoc2 != asoc) { - if (asoc2->state >= SCTP_STATE_ESTABLISHED) - err = -EISCONN; - else - err = -EALREADY; - goto out_free; - } + if (init->sinit_max_attempts) + asoc->max_init_attempts = init->sinit_max_attempts; - /* If we could not find a matching association on the endpoint, - * make sure that there is no peeled-off association matching - * the peer address even on another socket. - */ - if (sctp_endpoint_is_peeled_off(ep, &to)) { - err = -EADDRNOTAVAIL; - goto out_free; - } + if (init->sinit_max_init_timeo) + asoc->max_init_timeo = + msecs_to_jiffies(init->sinit_max_init_timeo); - if (!asoc) { - /* If a bind() or sctp_bindx() is not called prior to - * an sctp_connectx() call, the system picks an - * ephemeral port and will choose an address set - * equivalent to binding with a wildcard address. - */ - if (!ep->base.bind_addr.port) { - if (sctp_autobind(sk)) { - err = -EAGAIN; - goto out_free; - } - } else { - /* - * If an unprivileged user inherits a 1-many - * style socket with open associations on a - * privileged port, it MAY be permitted to - * accept new associations, but it SHOULD NOT - * be permitted to open new associations. - */ - if (ep->base.bind_addr.port < - inet_prot_sock(net) && - !ns_capable(net->user_ns, - CAP_NET_BIND_SERVICE)) { - err = -EACCES; - goto out_free; - } - } + return 0; +free: + sctp_association_free(asoc); + return err; +} - scope = sctp_scope(&to); - asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL); - if (!asoc) { - err = -ENOMEM; - goto out_free; - } +static int sctp_connect_add_peer(struct sctp_association *asoc, + union sctp_addr *daddr, int addr_len) +{ + struct sctp_endpoint *ep = asoc->ep; + struct sctp_association *old; + struct sctp_transport *t; + int err; - err = sctp_assoc_set_bind_addr_from_ep(asoc, scope, - GFP_KERNEL); - if (err < 0) { - goto out_free; - } + err = sctp_verify_addr(ep->base.sk, daddr, addr_len); + if (err) + return err; - } + old = sctp_endpoint_lookup_assoc(ep, daddr, &t); + if (old && old != asoc) + return old->state >= SCTP_STATE_ESTABLISHED ? -EISCONN + : -EALREADY; + + if (sctp_endpoint_is_peeled_off(ep, daddr)) + return -EADDRNOTAVAIL; + + t = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL, SCTP_UNKNOWN); + if (!t) + return -ENOMEM; - /* Prime the peer's transport structures. */ - transport = sctp_assoc_add_peer(asoc, &to, GFP_KERNEL, - SCTP_UNKNOWN); - if (!transport) { - err = -ENOMEM; + return 0; +} + +/* __sctp_connect(struct sock* sk, struct sockaddr *kaddrs, int addrs_size) + * + * Common routine for handling connect() and sctp_connectx(). + * Connect will come in with just a single address. + */ +static int __sctp_connect(struct sock *sk, struct sockaddr *kaddrs, + int addrs_size, int flags, sctp_assoc_t *assoc_id) +{ + struct sctp_sock *sp = sctp_sk(sk); + struct sctp_endpoint *ep = sp->ep; + struct sctp_transport *transport; + struct sctp_association *asoc; + void *addr_buf = kaddrs; + union sctp_addr *daddr; + struct sctp_af *af; + int walk_size, err; + long timeo; + + if (sctp_sstate(sk, ESTABLISHED) || sctp_sstate(sk, CLOSING) || + (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING))) + return -EISCONN; + + daddr = addr_buf; + af = sctp_get_af_specific(daddr->sa.sa_family); + if (!af || af->sockaddr_len > addrs_size) + return -EINVAL; + + err = sctp_verify_addr(sk, daddr, af->sockaddr_len); + if (err) + return err; + + asoc = sctp_endpoint_lookup_assoc(ep, daddr, &transport); + if (asoc) + return asoc->state >= SCTP_STATE_ESTABLISHED ? -EISCONN + : -EALREADY; + + err = sctp_connect_new_asoc(ep, daddr, NULL, &transport); + if (err) + return err; + asoc = transport->asoc; + + addr_buf += af->sockaddr_len; + walk_size = af->sockaddr_len; + while (walk_size < addrs_size) { + err = -EINVAL; + if (walk_size + sizeof(sa_family_t) > addrs_size) goto out_free; - } - addrcnt++; - addr_buf += af->sockaddr_len; + daddr = addr_buf; + af = sctp_get_af_specific(daddr->sa.sa_family); + if (!af || af->sockaddr_len + walk_size > addrs_size) + goto out_free; + + if (asoc->peer.port != ntohs(daddr->v4.sin_port)) + goto out_free; + + err = sctp_connect_add_peer(asoc, daddr, af->sockaddr_len); + if (err) + goto out_free; + + addr_buf += af->sockaddr_len; walk_size += af->sockaddr_len; } @@ -1209,40 +1210,25 @@ static int __sctp_connect(struct sock *sk, goto out_free; } - err = sctp_primitive_ASSOCIATE(net, asoc, NULL); - if (err < 0) { + err = sctp_primitive_ASSOCIATE(sock_net(sk), asoc, NULL); + if (err < 0) goto out_free; - } /* Initialize sk's dport and daddr for getpeername() */ inet_sk(sk)->inet_dport = htons(asoc->peer.port); - sp->pf->to_sk_daddr(sa_addr, sk); + sp->pf->to_sk_daddr(daddr, sk); sk->sk_err = 0; - timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); - if (assoc_id) *assoc_id = asoc->assoc_id; - err = sctp_wait_for_connect(asoc, &timeo); - /* Note: the asoc may be freed after the return of - * sctp_wait_for_connect. - */ - - /* Don't free association on exit. */ - asoc = NULL; + timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); + return sctp_wait_for_connect(asoc, &timeo); out_free: pr_debug("%s: took out_free path with asoc:%p kaddrs:%p err:%d\n", __func__, asoc, kaddrs, err); - - if (asoc) { - /* sctp_primitive_ASSOCIATE may have added this association - * To the hash table, try to unhash it, just in case, its a noop - * if it wasn't hashed so we're safe - */ - sctp_association_free(asoc); - } + sctp_association_free(asoc); return err; } @@ -1312,7 +1298,8 @@ static int __sctp_setsockopt_connectx(struct sock *sk, pr_debug("%s: sk:%p addrs:%p addrs_size:%d\n", __func__, sk, addrs, addrs_size); - if (unlikely(addrs_size <= 0)) + /* make sure the 1st addr's sa_family is accessible later */ + if (unlikely(addrs_size < sizeof(sa_family_t))) return -EINVAL; kaddrs = memdup_user(addrs, addrs_size); @@ -1660,9 +1647,7 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags, struct sctp_transport **tp) { struct sctp_endpoint *ep = sctp_sk(sk)->ep; - struct net *net = sock_net(sk); struct sctp_association *asoc; - enum sctp_scope scope; struct cmsghdr *cmsg; __be32 flowinfo = 0; struct sctp_af *af; @@ -1677,20 +1662,6 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags, sctp_sstate(sk, CLOSING))) return -EADDRNOTAVAIL; - if (sctp_endpoint_is_peeled_off(ep, daddr)) - return -EADDRNOTAVAIL; - - if (!ep->base.bind_addr.port) { - if (sctp_autobind(sk)) - return -EAGAIN; - } else { - if (ep->base.bind_addr.port < inet_prot_sock(net) && - !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) - return -EACCES; - } - - scope = sctp_scope(daddr); - /* Label connection socket for first association 1-to-many * style for client sequence socket()->sendmsg(). This * needs to be done before sctp_assoc_add_peer() as that will @@ -1706,45 +1677,10 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags, if (err < 0) return err; - asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL); - if (!asoc) - return -ENOMEM; - - if (sctp_assoc_set_bind_addr_from_ep(asoc, scope, GFP_KERNEL) < 0) { - err = -ENOMEM; - goto free; - } - - if (cmsgs->init) { - struct sctp_initmsg *init = cmsgs->init; - - if (init->sinit_num_ostreams) { - __u16 outcnt = init->sinit_num_ostreams; - - asoc->c.sinit_num_ostreams = outcnt; - /* outcnt has been changed, need to re-init stream */ - err = sctp_stream_init(&asoc->stream, outcnt, 0, - GFP_KERNEL); - if (err) - goto free; - } - - if (init->sinit_max_instreams) - asoc->c.sinit_max_instreams = init->sinit_max_instreams; - - if (init->sinit_max_attempts) - asoc->max_init_attempts = init->sinit_max_attempts; - - if (init->sinit_max_init_timeo) - asoc->max_init_timeo = - msecs_to_jiffies(init->sinit_max_init_timeo); - } - - *tp = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL, SCTP_UNKNOWN); - if (!*tp) { - err = -ENOMEM; - goto free; - } + err = sctp_connect_new_asoc(ep, daddr, cmsgs->init, tp); + if (err) + return err; + asoc = (*tp)->asoc; if (!cmsgs->addrs_msg) return 0; @@ -1754,8 +1690,6 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags, /* sendv addr list parse */ for_each_cmsghdr(cmsg, cmsgs->addrs_msg) { - struct sctp_transport *transport; - struct sctp_association *old; union sctp_addr _daddr; int dlen; @@ -1789,30 +1723,10 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags, daddr->v6.sin6_port = htons(asoc->peer.port); memcpy(&daddr->v6.sin6_addr, CMSG_DATA(cmsg), dlen); } - err = sctp_verify_addr(sk, daddr, sizeof(*daddr)); - if (err) - goto free; - - old = sctp_endpoint_lookup_assoc(ep, daddr, &transport); - if (old && old != asoc) { - if (old->state >= SCTP_STATE_ESTABLISHED) - err = -EISCONN; - else - err = -EALREADY; - goto free; - } - if (sctp_endpoint_is_peeled_off(ep, daddr)) { - err = -EADDRNOTAVAIL; - goto free; - } - - transport = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL, - SCTP_UNKNOWN); - if (!transport) { - err = -ENOMEM; + err = sctp_connect_add_peer(asoc, daddr, sizeof(*daddr)); + if (err) goto free; - } } return 0; @@ -3415,7 +3329,6 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optval, unsigned int optlen) { - struct net *net = sock_net(sk); struct sctp_sock *sp; struct sctp_association *asoc = NULL; struct sctp_setpeerprim prim; @@ -3425,7 +3338,7 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva sp = sctp_sk(sk); - if (!net->sctp.addip_enable) + if (!sp->ep->asconf_enable) return -EPERM; if (optlen != sizeof(struct sctp_setpeerprim)) @@ -3775,9 +3688,6 @@ static int sctp_setsockopt_auth_key(struct sock *sk, struct sctp_association *asoc; int ret = -EINVAL; - if (!ep->auth_enable) - return -EACCES; - if (optlen <= sizeof(struct sctp_authkey)) return -EINVAL; /* authkey->sca_keylength is u16, so optlen can't be bigger than @@ -3844,9 +3754,6 @@ static int sctp_setsockopt_active_key(struct sock *sk, struct sctp_authkeyid val; int ret = 0; - if (!ep->auth_enable) - return -EACCES; - if (optlen != sizeof(struct sctp_authkeyid)) return -EINVAL; if (copy_from_user(&val, optval, optlen)) @@ -3898,9 +3805,6 @@ static int sctp_setsockopt_del_key(struct sock *sk, struct sctp_authkeyid val; int ret = 0; - if (!ep->auth_enable) - return -EACCES; - if (optlen != sizeof(struct sctp_authkeyid)) return -EINVAL; if (copy_from_user(&val, optval, optlen)) @@ -3951,9 +3855,6 @@ static int sctp_setsockopt_deactivate_key(struct sock *sk, char __user *optval, struct sctp_authkeyid val; int ret = 0; - if (!ep->auth_enable) - return -EACCES; - if (optlen != sizeof(struct sctp_authkeyid)) return -EINVAL; if (copy_from_user(&val, optval, optlen)) @@ -4584,6 +4485,110 @@ static int sctp_setsockopt_event(struct sock *sk, char __user *optval, return retval; } +static int sctp_setsockopt_asconf_supported(struct sock *sk, + char __user *optval, + unsigned int optlen) +{ + struct sctp_assoc_value params; + struct sctp_association *asoc; + struct sctp_endpoint *ep; + int retval = -EINVAL; + + if (optlen != sizeof(params)) + goto out; + + if (copy_from_user(¶ms, optval, optlen)) { + retval = -EFAULT; + goto out; + } + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) + goto out; + + ep = sctp_sk(sk)->ep; + ep->asconf_enable = !!params.assoc_value; + + if (ep->asconf_enable && ep->auth_enable) { + sctp_auth_ep_add_chunkid(ep, SCTP_CID_ASCONF); + sctp_auth_ep_add_chunkid(ep, SCTP_CID_ASCONF_ACK); + } + + retval = 0; + +out: + return retval; +} + +static int sctp_setsockopt_auth_supported(struct sock *sk, + char __user *optval, + unsigned int optlen) +{ + struct sctp_assoc_value params; + struct sctp_association *asoc; + struct sctp_endpoint *ep; + int retval = -EINVAL; + + if (optlen != sizeof(params)) + goto out; + + if (copy_from_user(¶ms, optval, optlen)) { + retval = -EFAULT; + goto out; + } + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) + goto out; + + ep = sctp_sk(sk)->ep; + if (params.assoc_value) { + retval = sctp_auth_init(ep, GFP_KERNEL); + if (retval) + goto out; + if (ep->asconf_enable) { + sctp_auth_ep_add_chunkid(ep, SCTP_CID_ASCONF); + sctp_auth_ep_add_chunkid(ep, SCTP_CID_ASCONF_ACK); + } + } + + ep->auth_enable = !!params.assoc_value; + retval = 0; + +out: + return retval; +} + +static int sctp_setsockopt_ecn_supported(struct sock *sk, + char __user *optval, + unsigned int optlen) +{ + struct sctp_assoc_value params; + struct sctp_association *asoc; + int retval = -EINVAL; + + if (optlen != sizeof(params)) + goto out; + + if (copy_from_user(¶ms, optval, optlen)) { + retval = -EFAULT; + goto out; + } + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) + goto out; + + sctp_sk(sk)->ep->ecn_enable = !!params.assoc_value; + retval = 0; + +out: + return retval; +} + /* API 6.2 setsockopt(), getsockopt() * * Applications use setsockopt() and getsockopt() to set or retrieve @@ -4784,6 +4789,15 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, case SCTP_EVENT: retval = sctp_setsockopt_event(sk, optval, optlen); break; + case SCTP_ASCONF_SUPPORTED: + retval = sctp_setsockopt_asconf_supported(sk, optval, optlen); + break; + case SCTP_AUTH_SUPPORTED: + retval = sctp_setsockopt_auth_supported(sk, optval, optlen); + break; + case SCTP_ECN_SUPPORTED: + retval = sctp_setsockopt_ecn_supported(sk, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; @@ -6921,9 +6935,6 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len, struct sctp_authkeyid val; struct sctp_association *asoc; - if (!ep->auth_enable) - return -EACCES; - if (len < sizeof(struct sctp_authkeyid)) return -EINVAL; @@ -6935,10 +6946,15 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len, if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP)) return -EINVAL; - if (asoc) + if (asoc) { + if (!asoc->peer.auth_capable) + return -EACCES; val.scact_keynumber = asoc->active_key_id; - else + } else { + if (!ep->auth_enable) + return -EACCES; val.scact_keynumber = ep->active_key_id; + } if (put_user(len, optlen)) return -EFAULT; @@ -6951,7 +6967,6 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len, static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len, char __user *optval, int __user *optlen) { - struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_authchunks __user *p = (void __user *)optval; struct sctp_authchunks val; struct sctp_association *asoc; @@ -6959,9 +6974,6 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len, u32 num_chunks = 0; char __user *to; - if (!ep->auth_enable) - return -EACCES; - if (len < sizeof(struct sctp_authchunks)) return -EINVAL; @@ -6973,6 +6985,9 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len, if (!asoc) return -EINVAL; + if (!asoc->peer.auth_capable) + return -EACCES; + ch = asoc->peer.peer_chunks; if (!ch) goto num; @@ -7004,9 +7019,6 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len, u32 num_chunks = 0; char __user *to; - if (!ep->auth_enable) - return -EACCES; - if (len < sizeof(struct sctp_authchunks)) return -EINVAL; @@ -7019,8 +7031,15 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len, sctp_style(sk, UDP)) return -EINVAL; - ch = asoc ? (struct sctp_chunks_param *)asoc->c.auth_chunks - : ep->auth_chunk_list; + if (asoc) { + if (!asoc->peer.auth_capable) + return -EACCES; + ch = (struct sctp_chunks_param *)asoc->c.auth_chunks; + } else { + if (!ep->auth_enable) + return -EACCES; + ch = ep->auth_chunk_list; + } if (!ch) goto num; @@ -7764,6 +7783,123 @@ static int sctp_getsockopt_event(struct sock *sk, int len, char __user *optval, return 0; } +static int sctp_getsockopt_asconf_supported(struct sock *sk, int len, + char __user *optval, + int __user *optlen) +{ + struct sctp_assoc_value params; + struct sctp_association *asoc; + int retval = -EFAULT; + + if (len < sizeof(params)) { + retval = -EINVAL; + goto out; + } + + len = sizeof(params); + if (copy_from_user(¶ms, optval, len)) + goto out; + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) { + retval = -EINVAL; + goto out; + } + + params.assoc_value = asoc ? asoc->peer.asconf_capable + : sctp_sk(sk)->ep->asconf_enable; + + if (put_user(len, optlen)) + goto out; + + if (copy_to_user(optval, ¶ms, len)) + goto out; + + retval = 0; + +out: + return retval; +} + +static int sctp_getsockopt_auth_supported(struct sock *sk, int len, + char __user *optval, + int __user *optlen) +{ + struct sctp_assoc_value params; + struct sctp_association *asoc; + int retval = -EFAULT; + + if (len < sizeof(params)) { + retval = -EINVAL; + goto out; + } + + len = sizeof(params); + if (copy_from_user(¶ms, optval, len)) + goto out; + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) { + retval = -EINVAL; + goto out; + } + + params.assoc_value = asoc ? asoc->peer.auth_capable + : sctp_sk(sk)->ep->auth_enable; + + if (put_user(len, optlen)) + goto out; + + if (copy_to_user(optval, ¶ms, len)) + goto out; + + retval = 0; + +out: + return retval; +} + +static int sctp_getsockopt_ecn_supported(struct sock *sk, int len, + char __user *optval, + int __user *optlen) +{ + struct sctp_assoc_value params; + struct sctp_association *asoc; + int retval = -EFAULT; + + if (len < sizeof(params)) { + retval = -EINVAL; + goto out; + } + + len = sizeof(params); + if (copy_from_user(¶ms, optval, len)) + goto out; + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) { + retval = -EINVAL; + goto out; + } + + params.assoc_value = asoc ? asoc->peer.ecn_capable + : sctp_sk(sk)->ep->ecn_enable; + + if (put_user(len, optlen)) + goto out; + + if (copy_to_user(optval, ¶ms, len)) + goto out; + + retval = 0; + +out: + return retval; +} + static int sctp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -7965,6 +8101,17 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, case SCTP_EVENT: retval = sctp_getsockopt_event(sk, len, optval, optlen); break; + case SCTP_ASCONF_SUPPORTED: + retval = sctp_getsockopt_asconf_supported(sk, len, optval, + optlen); + break; + case SCTP_AUTH_SUPPORTED: + retval = sctp_getsockopt_auth_supported(sk, len, optval, + optlen); + break; + case SCTP_ECN_SUPPORTED: + retval = sctp_getsockopt_ecn_supported(sk, len, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 1250751bca1b..238cf1737576 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -278,6 +278,13 @@ static struct ctl_table sctp_net_table[] = { .proc_handler = proc_dointvec, }, { + .procname = "ecn_enable", + .data = &init_net.sctp.ecn_enable, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { .procname = "addr_scope_policy", .data = &init_net.sctp.scope_policy, .maxlen = sizeof(int), diff --git a/net/sctp/transport.c b/net/sctp/transport.c index e2f8e369cd08..7235a6032671 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -43,8 +43,8 @@ static struct sctp_transport *sctp_transport_init(struct net *net, gfp_t gfp) { /* Copy in the address. */ - peer->ipaddr = *addr; peer->af_specific = sctp_get_af_specific(addr->sa.sa_family); + memcpy(&peer->ipaddr, addr, peer->af_specific->sockaddr_len); memset(&peer->saddr, 0, sizeof(union sctp_addr)); peer->sack_generation = 0; diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c index 550fdf18d3b3..3b7f721c023b 100644 --- a/net/sunrpc/auth_gss/gss_krb5_keys.c +++ b/net/sunrpc/auth_gss/gss_krb5_keys.c @@ -228,14 +228,11 @@ u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e, ret = 0; err_free_raw: - memset(rawkey, 0, keybytes); - kfree(rawkey); + kzfree(rawkey); err_free_out: - memset(outblockdata, 0, blocksize); - kfree(outblockdata); + kzfree(outblockdata); err_free_in: - memset(inblockdata, 0, blocksize); - kfree(inblockdata); + kzfree(inblockdata); err_free_cipher: crypto_free_sync_skcipher(cipher); err_return: diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 1336f3cdad38..6ef1abdd525f 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -185,7 +185,7 @@ static void tipc_bcbase_xmit(struct net *net, struct sk_buff_head *xmitq) } /* We have to transmit across all bearers */ - skb_queue_head_init(&_xmitq); + __skb_queue_head_init(&_xmitq); for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { if (!bb->dests[bearer_id]) continue; @@ -256,7 +256,7 @@ static int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts, struct sk_buff_head xmitq; int rc = 0; - skb_queue_head_init(&xmitq); + __skb_queue_head_init(&xmitq); tipc_bcast_lock(net); if (tipc_link_bc_peers(l)) rc = tipc_link_xmit(l, pkts, &xmitq); @@ -286,7 +286,7 @@ static int tipc_rcast_xmit(struct net *net, struct sk_buff_head *pkts, u32 dnode, selector; selector = msg_link_selector(buf_msg(skb_peek(pkts))); - skb_queue_head_init(&_pkts); + __skb_queue_head_init(&_pkts); list_for_each_entry_safe(dst, tmp, &dests->list, list) { dnode = dst->node; @@ -344,7 +344,7 @@ static int tipc_mcast_send_sync(struct net *net, struct sk_buff *skb, msg_set_size(_hdr, MCAST_H_SIZE); msg_set_is_rcast(_hdr, !msg_is_rcast(hdr)); - skb_queue_head_init(&tmpq); + __skb_queue_head_init(&tmpq); __skb_queue_tail(&tmpq, _skb); if (method->rcast) tipc_bcast_xmit(net, &tmpq, cong_link_cnt); @@ -378,7 +378,7 @@ int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts, int rc = 0; skb_queue_head_init(&inputq); - skb_queue_head_init(&localq); + __skb_queue_head_init(&localq); /* Clone packets before they are consumed by next call */ if (dests->local && !tipc_msg_reassemble(pkts, &localq)) { @@ -406,8 +406,10 @@ int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts, rc = tipc_bcast_xmit(net, pkts, cong_link_cnt); } - if (dests->local) + if (dests->local) { + tipc_loopback_trace(net, &localq); tipc_sk_mcast_rcv(net, &localq, &inputq); + } exit: /* This queue should normally be empty by now */ __skb_queue_purge(pkts); diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index a809c0ec8d15..0214aa1c4427 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -389,6 +389,11 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, dev_put(dev); return -EINVAL; } + if (dev == net->loopback_dev) { + dev_put(dev); + pr_info("Enabling <%s> not permitted\n", b->name); + return -EINVAL; + } /* Autoconfigure own node identity if needed */ if (!tipc_own_id(net) && hwaddr_len <= NODE_ID_LEN) { @@ -674,6 +679,65 @@ void tipc_bearer_stop(struct net *net) } } +void tipc_clone_to_loopback(struct net *net, struct sk_buff_head *pkts) +{ + struct net_device *dev = net->loopback_dev; + struct sk_buff *skb, *_skb; + int exp; + + skb_queue_walk(pkts, _skb) { + skb = pskb_copy(_skb, GFP_ATOMIC); + if (!skb) + continue; + + exp = SKB_DATA_ALIGN(dev->hard_header_len - skb_headroom(skb)); + if (exp > 0 && pskb_expand_head(skb, exp, 0, GFP_ATOMIC)) { + kfree_skb(skb); + continue; + } + + skb_reset_network_header(skb); + dev_hard_header(skb, dev, ETH_P_TIPC, dev->dev_addr, + dev->dev_addr, skb->len); + skb->dev = dev; + skb->pkt_type = PACKET_HOST; + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->protocol = eth_type_trans(skb, dev); + netif_rx_ni(skb); + } +} + +static int tipc_loopback_rcv_pkt(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *od) +{ + consume_skb(skb); + return NET_RX_SUCCESS; +} + +int tipc_attach_loopback(struct net *net) +{ + struct net_device *dev = net->loopback_dev; + struct tipc_net *tn = tipc_net(net); + + if (!dev) + return -ENODEV; + + dev_hold(dev); + tn->loopback_pt.dev = dev; + tn->loopback_pt.type = htons(ETH_P_TIPC); + tn->loopback_pt.func = tipc_loopback_rcv_pkt; + dev_add_pack(&tn->loopback_pt); + return 0; +} + +void tipc_detach_loopback(struct net *net) +{ + struct tipc_net *tn = tipc_net(net); + + dev_remove_pack(&tn->loopback_pt); + dev_put(net->loopback_dev); +} + /* Caller should hold rtnl_lock to protect the bearer */ static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg, struct tipc_bearer *bearer, int nlflags) diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 7f4c569594a5..ea0f3c49cbed 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -232,6 +232,16 @@ void tipc_bearer_xmit(struct net *net, u32 bearer_id, struct tipc_media_addr *dst); void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id, struct sk_buff_head *xmitq); +void tipc_clone_to_loopback(struct net *net, struct sk_buff_head *pkts); +int tipc_attach_loopback(struct net *net); +void tipc_detach_loopback(struct net *net); + +static inline void tipc_loopback_trace(struct net *net, + struct sk_buff_head *pkts) +{ + if (unlikely(dev_nit_active(net->loopback_dev))) + tipc_clone_to_loopback(net, pkts); +} /* check if device MTU is too low for tipc headers */ static inline bool tipc_mtu_bad(struct net_device *dev, unsigned int reserve) diff --git a/net/tipc/core.c b/net/tipc/core.c index c8370722f0bb..23cb379a93d6 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -82,6 +82,10 @@ static int __net_init tipc_init_net(struct net *net) if (err) goto out_bclink; + err = tipc_attach_loopback(net); + if (err) + goto out_bclink; + return 0; out_bclink: @@ -94,6 +98,7 @@ out_sk_rht: static void __net_exit tipc_exit_net(struct net *net) { + tipc_detach_loopback(net); tipc_net_stop(net); tipc_bcast_stop(net); tipc_nametbl_stop(net); diff --git a/net/tipc/core.h b/net/tipc/core.h index 7a68e1b6a066..60d829581068 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -125,6 +125,9 @@ struct tipc_net { /* Cluster capabilities */ u16 capabilities; + + /* Tracing of node internal messages */ + struct packet_type loopback_pt; }; static inline struct tipc_net *tipc_net(struct net *net) diff --git a/net/tipc/group.c b/net/tipc/group.c index 5f98d38bcf08..89257e2a980d 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -199,7 +199,7 @@ void tipc_group_join(struct net *net, struct tipc_group *grp, int *sk_rcvbuf) struct tipc_member *m, *tmp; struct sk_buff_head xmitq; - skb_queue_head_init(&xmitq); + __skb_queue_head_init(&xmitq); rbtree_postorder_for_each_entry_safe(m, tmp, tree, tree_node) { tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, &xmitq); tipc_group_update_member(m, 0); @@ -435,7 +435,7 @@ bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport, return true; if (state == MBR_PENDING && adv == ADV_IDLE) return true; - skb_queue_head_init(&xmitq); + __skb_queue_head_init(&xmitq); tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, &xmitq); tipc_node_distr_xmit(grp->net, &xmitq); return true; diff --git a/net/tipc/link.c b/net/tipc/link.c index c2c5c53cad22..6cc75ffd9e2c 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -176,6 +176,7 @@ struct tipc_link { /* Fragmentation/reassembly */ struct sk_buff *reasm_buf; + struct sk_buff *reasm_tnlmsg; /* Broadcast */ u16 ackers; @@ -849,18 +850,31 @@ static int link_schedule_user(struct tipc_link *l, struct tipc_msg *hdr) */ static void link_prepare_wakeup(struct tipc_link *l) { + struct sk_buff_head *wakeupq = &l->wakeupq; + struct sk_buff_head *inputq = l->inputq; struct sk_buff *skb, *tmp; - int imp, i = 0; + struct sk_buff_head tmpq; + int avail[5] = {0,}; + int imp = 0; + + __skb_queue_head_init(&tmpq); + + for (; imp <= TIPC_SYSTEM_IMPORTANCE; imp++) + avail[imp] = l->backlog[imp].limit - l->backlog[imp].len; - skb_queue_walk_safe(&l->wakeupq, skb, tmp) { + skb_queue_walk_safe(wakeupq, skb, tmp) { imp = TIPC_SKB_CB(skb)->chain_imp; - if (l->backlog[imp].len < l->backlog[imp].limit) { - skb_unlink(skb, &l->wakeupq); - skb_queue_tail(l->inputq, skb); - } else if (i++ > 10) { - break; - } + if (avail[imp] <= 0) + continue; + avail[imp]--; + __skb_unlink(skb, wakeupq); + __skb_queue_tail(&tmpq, skb); } + + spin_lock_bh(&inputq->lock); + skb_queue_splice_tail(&tmpq, inputq); + spin_unlock_bh(&inputq->lock); + } void tipc_link_reset(struct tipc_link *l) @@ -893,8 +907,10 @@ void tipc_link_reset(struct tipc_link *l) l->backlog[TIPC_CRITICAL_IMPORTANCE].len = 0; l->backlog[TIPC_SYSTEM_IMPORTANCE].len = 0; kfree_skb(l->reasm_buf); + kfree_skb(l->reasm_tnlmsg); kfree_skb(l->failover_reasm_skb); l->reasm_buf = NULL; + l->reasm_tnlmsg = NULL; l->failover_reasm_skb = NULL; l->rcv_unacked = 0; l->snd_nxt = 1; @@ -936,7 +952,10 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, int rc = 0; if (unlikely(msg_size(hdr) > mtu)) { - skb_queue_purge(list); + pr_warn("Too large msg, purging xmit list %d %d %d %d %d!\n", + skb_queue_len(list), msg_user(hdr), + msg_type(hdr), msg_size(hdr), mtu); + __skb_queue_purge(list); return -EMSGSIZE; } @@ -965,7 +984,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, if (likely(skb_queue_len(transmq) < maxwin)) { _skb = skb_clone(skb, GFP_ATOMIC); if (!_skb) { - skb_queue_purge(list); + __skb_queue_purge(list); return -ENOBUFS; } __skb_dequeue(list); @@ -1238,6 +1257,7 @@ static int tipc_link_tnl_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *inputq) { struct sk_buff **reasm_skb = &l->failover_reasm_skb; + struct sk_buff **reasm_tnlmsg = &l->reasm_tnlmsg; struct sk_buff_head *fdefq = &l->failover_deferdq; struct tipc_msg *hdr = buf_msg(skb); struct sk_buff *iskb; @@ -1245,40 +1265,56 @@ static int tipc_link_tnl_rcv(struct tipc_link *l, struct sk_buff *skb, int rc = 0; u16 seqno; - /* SYNCH_MSG */ - if (msg_type(hdr) == SYNCH_MSG) - goto drop; + if (msg_type(hdr) == SYNCH_MSG) { + kfree_skb(skb); + return 0; + } - /* FAILOVER_MSG */ - if (!tipc_msg_extract(skb, &iskb, &ipos)) { - pr_warn_ratelimited("Cannot extract FAILOVER_MSG, defq: %d\n", - skb_queue_len(fdefq)); - return rc; + /* Not a fragment? */ + if (likely(!msg_nof_fragms(hdr))) { + if (unlikely(!tipc_msg_extract(skb, &iskb, &ipos))) { + pr_warn_ratelimited("Unable to extract msg, defq: %d\n", + skb_queue_len(fdefq)); + return 0; + } + kfree_skb(skb); + } else { + /* Set fragment type for buf_append */ + if (msg_fragm_no(hdr) == 1) + msg_set_type(hdr, FIRST_FRAGMENT); + else if (msg_fragm_no(hdr) < msg_nof_fragms(hdr)) + msg_set_type(hdr, FRAGMENT); + else + msg_set_type(hdr, LAST_FRAGMENT); + + if (!tipc_buf_append(reasm_tnlmsg, &skb)) { + /* Successful but non-complete reassembly? */ + if (*reasm_tnlmsg || link_is_bc_rcvlink(l)) + return 0; + pr_warn_ratelimited("Unable to reassemble tunnel msg\n"); + return tipc_link_fsm_evt(l, LINK_FAILURE_EVT); + } + iskb = skb; } do { seqno = buf_seqno(iskb); - if (unlikely(less(seqno, l->drop_point))) { kfree_skb(iskb); continue; } - if (unlikely(seqno != l->drop_point)) { __tipc_skb_queue_sorted(fdefq, seqno, iskb); continue; } l->drop_point++; - if (!tipc_data_input(l, iskb, inputq)) rc |= tipc_link_input(l, iskb, inputq, reasm_skb); if (unlikely(rc)) break; } while ((iskb = __tipc_skb_dequeue(fdefq, l->drop_point))); -drop: - kfree_skb(skb); return rc; } @@ -1644,7 +1680,7 @@ void tipc_link_create_dummy_tnl_msg(struct tipc_link *l, struct sk_buff *skb; u32 dnode = l->addr; - skb_queue_head_init(&tnlq); + __skb_queue_head_init(&tnlq); skb = tipc_msg_create(TUNNEL_PROTOCOL, FAILOVER_MSG, INT_H_SIZE, BASIC_H_SIZE, dnode, onode, 0, 0, 0); @@ -1675,14 +1711,18 @@ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, struct sk_buff *skb, *tnlskb; struct tipc_msg *hdr, tnlhdr; struct sk_buff_head *queue = &l->transmq; - struct sk_buff_head tmpxq, tnlq; + struct sk_buff_head tmpxq, tnlq, frags; u16 pktlen, pktcnt, seqno = l->snd_nxt; + bool pktcnt_need_update = false; + u16 syncpt; + int rc; if (!tnl) return; - skb_queue_head_init(&tnlq); - skb_queue_head_init(&tmpxq); + __skb_queue_head_init(&tnlq); + __skb_queue_head_init(&tmpxq); + __skb_queue_head_init(&frags); /* At least one packet required for safe algorithm => add dummy */ skb = tipc_msg_create(TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG, @@ -1692,10 +1732,35 @@ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, pr_warn("%sunable to create tunnel packet\n", link_co_err); return; } - skb_queue_tail(&tnlq, skb); + __skb_queue_tail(&tnlq, skb); tipc_link_xmit(l, &tnlq, &tmpxq); __skb_queue_purge(&tmpxq); + /* Link Synching: + * From now on, send only one single ("dummy") SYNCH message + * to peer. The SYNCH message does not contain any data, just + * a header conveying the synch point to the peer. + */ + if (mtyp == SYNCH_MSG && (tnl->peer_caps & TIPC_TUNNEL_ENHANCED)) { + tnlskb = tipc_msg_create(TUNNEL_PROTOCOL, SYNCH_MSG, + INT_H_SIZE, 0, l->addr, + tipc_own_addr(l->net), + 0, 0, 0); + if (!tnlskb) { + pr_warn("%sunable to create dummy SYNCH_MSG\n", + link_co_err); + return; + } + + hdr = buf_msg(tnlskb); + syncpt = l->snd_nxt + skb_queue_len(&l->backlogq) - 1; + msg_set_syncpt(hdr, syncpt); + msg_set_bearer_id(hdr, l->peer_bearer_id); + __skb_queue_tail(&tnlq, tnlskb); + tipc_link_xmit(tnl, &tnlq, xmitq); + return; + } + /* Initialize reusable tunnel packet header */ tipc_msg_init(tipc_own_addr(l->net), &tnlhdr, TUNNEL_PROTOCOL, mtyp, INT_H_SIZE, l->addr); @@ -1713,6 +1778,39 @@ tnl: if (queue == &l->backlogq) msg_set_seqno(hdr, seqno++); pktlen = msg_size(hdr); + + /* Tunnel link MTU is not large enough? This could be + * due to: + * 1) Link MTU has just changed or set differently; + * 2) Or FAILOVER on the top of a SYNCH message + * + * The 2nd case should not happen if peer supports + * TIPC_TUNNEL_ENHANCED + */ + if (pktlen > tnl->mtu - INT_H_SIZE) { + if (mtyp == FAILOVER_MSG && + (tnl->peer_caps & TIPC_TUNNEL_ENHANCED)) { + rc = tipc_msg_fragment(skb, &tnlhdr, tnl->mtu, + &frags); + if (rc) { + pr_warn("%sunable to frag msg: rc %d\n", + link_co_err, rc); + return; + } + pktcnt += skb_queue_len(&frags) - 1; + pktcnt_need_update = true; + skb_queue_splice_tail_init(&frags, &tnlq); + continue; + } + /* Unluckily, peer doesn't have TIPC_TUNNEL_ENHANCED + * => Just warn it and return! + */ + pr_warn_ratelimited("%stoo large msg <%d, %d>: %d!\n", + link_co_err, msg_user(hdr), + msg_type(hdr), msg_size(hdr)); + return; + } + msg_set_size(&tnlhdr, pktlen + INT_H_SIZE); tnlskb = tipc_buf_acquire(pktlen + INT_H_SIZE, GFP_ATOMIC); if (!tnlskb) { @@ -1728,6 +1826,12 @@ tnl: goto tnl; } + if (pktcnt_need_update) + skb_queue_walk(&tnlq, skb) { + hdr = buf_msg(skb); + msg_set_msgcnt(hdr, pktcnt); + } + tipc_link_xmit(tnl, &tnlq, xmitq); if (mtyp == FAILOVER_MSG) { diff --git a/net/tipc/msg.c b/net/tipc/msg.c index f48e5857210f..e6d49cdc61b4 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -244,6 +244,65 @@ bool tipc_msg_validate(struct sk_buff **_skb) } /** + * tipc_msg_fragment - build a fragment skb list for TIPC message + * + * @skb: TIPC message skb + * @hdr: internal msg header to be put on the top of the fragments + * @pktmax: max size of a fragment incl. the header + * @frags: returned fragment skb list + * + * Returns 0 if the fragmentation is successful, otherwise: -EINVAL + * or -ENOMEM + */ +int tipc_msg_fragment(struct sk_buff *skb, const struct tipc_msg *hdr, + int pktmax, struct sk_buff_head *frags) +{ + int pktno, nof_fragms, dsz, dmax, eat; + struct tipc_msg *_hdr; + struct sk_buff *_skb; + u8 *data; + + /* Non-linear buffer? */ + if (skb_linearize(skb)) + return -ENOMEM; + + data = (u8 *)skb->data; + dsz = msg_size(buf_msg(skb)); + dmax = pktmax - INT_H_SIZE; + if (dsz <= dmax || !dmax) + return -EINVAL; + + nof_fragms = dsz / dmax + 1; + for (pktno = 1; pktno <= nof_fragms; pktno++) { + if (pktno < nof_fragms) + eat = dmax; + else + eat = dsz % dmax; + /* Allocate a new fragment */ + _skb = tipc_buf_acquire(INT_H_SIZE + eat, GFP_ATOMIC); + if (!_skb) + goto error; + skb_orphan(_skb); + __skb_queue_tail(frags, _skb); + /* Copy header & data to the fragment */ + skb_copy_to_linear_data(_skb, hdr, INT_H_SIZE); + skb_copy_to_linear_data_offset(_skb, INT_H_SIZE, data, eat); + data += eat; + /* Update the fragment's header */ + _hdr = buf_msg(_skb); + msg_set_fragm_no(_hdr, pktno); + msg_set_nof_fragms(_hdr, nof_fragms); + msg_set_size(_hdr, INT_H_SIZE + eat); + } + return 0; + +error: + __skb_queue_purge(frags); + __skb_queue_head_init(frags); + return -ENOMEM; +} + +/** * tipc_msg_build - create buffer chain containing specified header and data * @mhdr: Message header, to be prepended to data * @m: User message diff --git a/net/tipc/msg.h b/net/tipc/msg.h index d7ebc9e955f6..0daa6f04ca81 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -723,12 +723,26 @@ static inline void msg_set_last_bcast(struct tipc_msg *m, u32 n) msg_set_bits(m, 4, 16, 0xffff, n); } +static inline u32 msg_nof_fragms(struct tipc_msg *m) +{ + return msg_bits(m, 4, 0, 0xffff); +} + +static inline void msg_set_nof_fragms(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 4, 0, 0xffff, n); +} + +static inline u32 msg_fragm_no(struct tipc_msg *m) +{ + return msg_bits(m, 4, 16, 0xffff); +} + static inline void msg_set_fragm_no(struct tipc_msg *m, u32 n) { msg_set_bits(m, 4, 16, 0xffff, n); } - static inline u16 msg_next_sent(struct tipc_msg *m) { return msg_bits(m, 4, 0, 0xffff); @@ -879,6 +893,16 @@ static inline void msg_set_msgcnt(struct tipc_msg *m, u16 n) msg_set_bits(m, 9, 16, 0xffff, n); } +static inline u16 msg_syncpt(struct tipc_msg *m) +{ + return msg_bits(m, 9, 16, 0xffff); +} + +static inline void msg_set_syncpt(struct tipc_msg *m, u16 n) +{ + msg_set_bits(m, 9, 16, 0xffff, n); +} + static inline u32 msg_conn_ack(struct tipc_msg *m) { return msg_bits(m, 9, 16, 0xffff); @@ -1037,6 +1061,8 @@ bool tipc_msg_bundle(struct sk_buff *skb, struct tipc_msg *msg, u32 mtu); bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg, u32 mtu, u32 dnode); bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos); +int tipc_msg_fragment(struct sk_buff *skb, const struct tipc_msg *hdr, + int pktmax, struct sk_buff_head *frags); int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, int dsz, int mtu, struct sk_buff_head *list); bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err); diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 241ed2274473..836e629e8f4a 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -190,7 +190,7 @@ void tipc_named_node_up(struct net *net, u32 dnode) struct name_table *nt = tipc_name_table(net); struct sk_buff_head head; - skb_queue_head_init(&head); + __skb_queue_head_init(&head); read_lock_bh(&nt->cluster_scope_lock); named_distribute(net, &head, dnode, &nt->cluster_scope); diff --git a/net/tipc/node.c b/net/tipc/node.c index 3a5be1d7e572..c8f6177dd5a2 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1443,13 +1443,15 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, int rc; if (in_own_node(net, dnode)) { + tipc_loopback_trace(net, list); + spin_lock_init(&list->lock); tipc_sk_rcv(net, list); return 0; } n = tipc_node_find(net, dnode); if (unlikely(!n)) { - skb_queue_purge(list); + __skb_queue_purge(list); return -EHOSTUNREACH; } @@ -1458,7 +1460,7 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, if (unlikely(bearer_id == INVALID_BEARER_ID)) { tipc_node_read_unlock(n); tipc_node_put(n); - skb_queue_purge(list); + __skb_queue_purge(list); return -EHOSTUNREACH; } @@ -1490,7 +1492,7 @@ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode, { struct sk_buff_head head; - skb_queue_head_init(&head); + __skb_queue_head_init(&head); __skb_queue_tail(&head, skb); tipc_node_xmit(net, &head, dnode, selector); return 0; @@ -1649,7 +1651,6 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, int usr = msg_user(hdr); int mtyp = msg_type(hdr); u16 oseqno = msg_seqno(hdr); - u16 iseqno = msg_seqno(msg_inner_hdr(hdr)); u16 exp_pkts = msg_msgcnt(hdr); u16 rcv_nxt, syncpt, dlv_nxt, inputq_len; int state = n->state; @@ -1748,7 +1749,10 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, /* Initiate synch mode if applicable */ if ((usr == TUNNEL_PROTOCOL) && (mtyp == SYNCH_MSG) && (oseqno == 1)) { - syncpt = iseqno + exp_pkts - 1; + if (n->capabilities & TIPC_TUNNEL_ENHANCED) + syncpt = msg_syncpt(hdr); + else + syncpt = msg_seqno(msg_inner_hdr(hdr)) + exp_pkts - 1; if (!tipc_link_is_up(l)) __tipc_node_link_up(n, bearer_id, xmitq); if (n->state == SELF_UP_PEER_UP) { diff --git a/net/tipc/node.h b/net/tipc/node.h index c0bf49ea3de4..291d0ecd4101 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -53,7 +53,8 @@ enum { TIPC_NODE_ID128 = (1 << 5), TIPC_LINK_PROTO_SEQNO = (1 << 6), TIPC_MCAST_RBCTL = (1 << 7), - TIPC_GAP_ACK_BLOCK = (1 << 8) + TIPC_GAP_ACK_BLOCK = (1 << 8), + TIPC_TUNNEL_ENHANCED = (1 << 9) }; #define TIPC_NODE_CAPABILITIES (TIPC_SYN_BIT | \ @@ -64,7 +65,8 @@ enum { TIPC_NODE_ID128 | \ TIPC_LINK_PROTO_SEQNO | \ TIPC_MCAST_RBCTL | \ - TIPC_GAP_ACK_BLOCK) + TIPC_GAP_ACK_BLOCK | \ + TIPC_TUNNEL_ENHANCED) #define INVALID_BEARER_ID -1 void tipc_node_stop(struct net *net); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 83ae41d7e554..3b9f8cc328f5 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -809,7 +809,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, msg_set_nameupper(hdr, seq->upper); /* Build message as chain of buffers */ - skb_queue_head_init(&pkts); + __skb_queue_head_init(&pkts); rc = tipc_msg_build(hdr, msg, 0, dlen, mtu, &pkts); /* Send message if build was successful */ @@ -853,7 +853,7 @@ static int tipc_send_group_msg(struct net *net, struct tipc_sock *tsk, msg_set_grp_bc_seqno(hdr, bc_snd_nxt); /* Build message as chain of buffers */ - skb_queue_head_init(&pkts); + __skb_queue_head_init(&pkts); mtu = tipc_node_get_mtu(net, dnode, tsk->portid); rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts); if (unlikely(rc != dlen)) @@ -1058,7 +1058,7 @@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m, msg_set_grp_bc_ack_req(hdr, ack); /* Build message as chain of buffers */ - skb_queue_head_init(&pkts); + __skb_queue_head_init(&pkts); rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts); if (unlikely(rc != dlen)) return rc; @@ -1387,7 +1387,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) if (unlikely(rc)) return rc; - skb_queue_head_init(&pkts); + __skb_queue_head_init(&pkts); mtu = tipc_node_get_mtu(net, dnode, tsk->portid); rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts); if (unlikely(rc != dlen)) @@ -1445,7 +1445,7 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen) int send, sent = 0; int rc = 0; - skb_queue_head_init(&pkts); + __skb_queue_head_init(&pkts); if (unlikely(dlen > INT_MAX)) return -EMSGSIZE; @@ -1805,7 +1805,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, /* Send group flow control advertisement when applicable */ if (tsk->group && msg_in_group(hdr) && !grp_evt) { - skb_queue_head_init(&xmitq); + __skb_queue_head_init(&xmitq); tipc_group_update_rcv_win(tsk->group, tsk_blocks(hlen + dlen), msg_orignode(hdr), msg_origport(hdr), &xmitq); @@ -2674,7 +2674,7 @@ static void tipc_sk_timeout(struct timer_list *t) struct sk_buff_head list; int rc = 0; - skb_queue_head_init(&list); + __skb_queue_head_init(&list); bh_lock_sock(sk); /* Try again later if socket is busy */ diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index ca8ac96d22a9..3a12fc18239b 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -40,6 +40,7 @@ #include "socket.h" #include "addr.h" #include "msg.h" +#include "bearer.h" #include <net/sock.h> #include <linux/module.h> @@ -608,6 +609,7 @@ static void tipc_topsrv_kern_evt(struct net *net, struct tipc_event *evt) memcpy(msg_data(buf_msg(skb)), evt, sizeof(*evt)); skb_queue_head_init(&evtq); __skb_queue_tail(&evtq, skb); + tipc_loopback_trace(net, &evtq); tipc_sk_rcv(net, &evtq); } diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 43922d86e510..f959487c5cd1 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -61,7 +61,7 @@ static void tls_device_free_ctx(struct tls_context *ctx) if (ctx->rx_conf == TLS_HW) kfree(tls_offload_ctx_rx(ctx)); - tls_ctx_free(ctx); + tls_ctx_free(NULL, ctx); } static void tls_device_gc_task(struct work_struct *work) @@ -122,13 +122,10 @@ static struct net_device *get_netdev_for_sock(struct sock *sk) static void destroy_record(struct tls_record_info *record) { - int nr_frags = record->num_frags; - skb_frag_t *frag; + int i; - while (nr_frags-- > 0) { - frag = &record->frags[nr_frags]; - __skb_frag_unref(frag); - } + for (i = 0; i < record->num_frags; i++) + __skb_frag_unref(&record->frags[i]); kfree(record); } @@ -159,12 +156,8 @@ static void tls_icsk_clean_acked(struct sock *sk, u32 acked_seq) spin_lock_irqsave(&ctx->lock, flags); info = ctx->retransmit_hint; - if (info && !before(acked_seq, info->end_seq)) { + if (info && !before(acked_seq, info->end_seq)) ctx->retransmit_hint = NULL; - list_del(&info->list); - destroy_record(info); - deleted_records++; - } list_for_each_entry_safe(info, temp, &ctx->records_list, list) { if (before(acked_seq, info->end_seq)) @@ -243,14 +236,14 @@ static void tls_append_frag(struct tls_record_info *record, skb_frag_t *frag; frag = &record->frags[record->num_frags - 1]; - if (frag->page.p == pfrag->page && - frag->page_offset + frag->size == pfrag->offset) { - frag->size += size; + if (skb_frag_page(frag) == pfrag->page && + skb_frag_off(frag) + skb_frag_size(frag) == pfrag->offset) { + skb_frag_size_add(frag, size); } else { ++frag; - frag->page.p = pfrag->page; - frag->page_offset = pfrag->offset; - frag->size = size; + __skb_frag_set_page(frag, pfrag->page); + skb_frag_off_set(frag, pfrag->offset); + skb_frag_size_set(frag, size); ++record->num_frags; get_page(pfrag->page); } @@ -263,33 +256,15 @@ static int tls_push_record(struct sock *sk, struct tls_context *ctx, struct tls_offload_context_tx *offload_ctx, struct tls_record_info *record, - struct page_frag *pfrag, - int flags, - unsigned char record_type) + int flags) { struct tls_prot_info *prot = &ctx->prot_info; struct tcp_sock *tp = tcp_sk(sk); - struct page_frag dummy_tag_frag; skb_frag_t *frag; int i; - /* fill prepend */ - frag = &record->frags[0]; - tls_fill_prepend(ctx, - skb_frag_address(frag), - record->len - prot->prepend_size, - record_type, - prot->version); - - /* HW doesn't care about the data in the tag, because it fills it. */ - dummy_tag_frag.page = skb_frag_page(frag); - dummy_tag_frag.offset = 0; - - tls_append_frag(record, &dummy_tag_frag, prot->tag_size); record->end_seq = tp->write_seq + record->len; - spin_lock_irq(&offload_ctx->lock); - list_add_tail(&record->list, &offload_ctx->records_list); - spin_unlock_irq(&offload_ctx->lock); + list_add_tail_rcu(&record->list, &offload_ctx->records_list); offload_ctx->open_record = NULL; if (test_bit(TLS_TX_SYNC_SCHED, &ctx->flags)) @@ -301,8 +276,8 @@ static int tls_push_record(struct sock *sk, frag = &record->frags[i]; sg_unmark_end(&offload_ctx->sg_tx_data[i]); sg_set_page(&offload_ctx->sg_tx_data[i], skb_frag_page(frag), - frag->size, frag->page_offset); - sk_mem_charge(sk, frag->size); + skb_frag_size(frag), skb_frag_off(frag)); + sk_mem_charge(sk, skb_frag_size(frag)); get_page(skb_frag_page(frag)); } sg_mark_end(&offload_ctx->sg_tx_data[record->num_frags - 1]); @@ -311,6 +286,38 @@ static int tls_push_record(struct sock *sk, return tls_push_sg(sk, ctx, offload_ctx->sg_tx_data, 0, flags); } +static int tls_device_record_close(struct sock *sk, + struct tls_context *ctx, + struct tls_record_info *record, + struct page_frag *pfrag, + unsigned char record_type) +{ + struct tls_prot_info *prot = &ctx->prot_info; + int ret; + + /* append tag + * device will fill in the tag, we just need to append a placeholder + * use socket memory to improve coalescing (re-using a single buffer + * increases frag count) + * if we can't allocate memory now, steal some back from data + */ + if (likely(skb_page_frag_refill(prot->tag_size, pfrag, + sk->sk_allocation))) { + ret = 0; + tls_append_frag(record, pfrag, prot->tag_size); + } else { + ret = prot->tag_size; + if (record->len <= prot->overhead_size) + return -ENOMEM; + } + + /* fill prepend */ + tls_fill_prepend(ctx, skb_frag_address(&record->frags[0]), + record->len - prot->overhead_size, + record_type, prot->version); + return ret; +} + static int tls_create_new_record(struct tls_offload_context_tx *offload_ctx, struct page_frag *pfrag, size_t prepend_size) @@ -324,7 +331,7 @@ static int tls_create_new_record(struct tls_offload_context_tx *offload_ctx, frag = &record->frags[0]; __skb_frag_set_page(frag, pfrag->page); - frag->page_offset = pfrag->offset; + skb_frag_off_set(frag, pfrag->offset); skb_frag_size_set(frag, prepend_size); get_page(pfrag->page); @@ -365,6 +372,31 @@ static int tls_do_allocation(struct sock *sk, return 0; } +static int tls_device_copy_data(void *addr, size_t bytes, struct iov_iter *i) +{ + size_t pre_copy, nocache; + + pre_copy = ~((unsigned long)addr - 1) & (SMP_CACHE_BYTES - 1); + if (pre_copy) { + pre_copy = min(pre_copy, bytes); + if (copy_from_iter(addr, pre_copy, i) != pre_copy) + return -EFAULT; + bytes -= pre_copy; + addr += pre_copy; + } + + nocache = round_down(bytes, SMP_CACHE_BYTES); + if (copy_from_iter_nocache(addr, nocache, i) != nocache) + return -EFAULT; + bytes -= nocache; + addr += nocache; + + if (bytes && copy_from_iter(addr, bytes, i) != bytes) + return -EFAULT; + + return 0; +} + static int tls_push_data(struct sock *sk, struct iov_iter *msg_iter, size_t size, int flags, @@ -438,12 +470,10 @@ handle_error: copy = min_t(size_t, size, (pfrag->size - pfrag->offset)); copy = min_t(size_t, copy, (max_open_record_len - record->len)); - if (copy_from_iter_nocache(page_address(pfrag->page) + - pfrag->offset, - copy, msg_iter) != copy) { - rc = -EFAULT; + rc = tls_device_copy_data(page_address(pfrag->page) + + pfrag->offset, copy, msg_iter); + if (rc) goto handle_error; - } tls_append_frag(record, pfrag, copy); size -= copy; @@ -461,13 +491,24 @@ last_record: if (done || record->len >= max_open_record_len || (record->num_frags >= MAX_SKB_FRAGS - 1)) { + rc = tls_device_record_close(sk, tls_ctx, record, + pfrag, record_type); + if (rc) { + if (rc > 0) { + size += rc; + } else { + size = orig_size; + destroy_record(record); + ctx->open_record = NULL; + break; + } + } + rc = tls_push_record(sk, tls_ctx, ctx, record, - pfrag, - tls_push_record_flags, - record_type); + tls_push_record_flags); if (rc < 0) break; } @@ -542,12 +583,16 @@ struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context, /* if retransmit_hint is irrelevant start * from the beggining of the list */ - info = list_first_entry(&context->records_list, - struct tls_record_info, list); + info = list_first_entry_or_null(&context->records_list, + struct tls_record_info, list); + if (!info) + return NULL; record_sn = context->unacked_record_sn; } - list_for_each_entry_from(info, &context->records_list, list) { + /* We just need the _rcu for the READ_ONCE() */ + rcu_read_lock(); + list_for_each_entry_from_rcu(info, &context->records_list, list) { if (before(seq, info->end_seq)) { if (!context->retransmit_hint || after(info->end_seq, @@ -556,12 +601,15 @@ struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context, context->retransmit_hint = info; } *p_record_sn = record_sn; - return info; + goto exit_rcu_unlock; } record_sn++; } + info = NULL; - return NULL; +exit_rcu_unlock: + rcu_read_unlock(); + return info; } EXPORT_SYMBOL(tls_get_record); @@ -838,22 +886,18 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) struct net_device *netdev; char *iv, *rec_seq; struct sk_buff *skb; - int rc = -EINVAL; __be64 rcd_sn; + int rc; if (!ctx) - goto out; + return -EINVAL; - if (ctx->priv_ctx_tx) { - rc = -EEXIST; - goto out; - } + if (ctx->priv_ctx_tx) + return -EEXIST; start_marker_record = kmalloc(sizeof(*start_marker_record), GFP_KERNEL); - if (!start_marker_record) { - rc = -ENOMEM; - goto out; - } + if (!start_marker_record) + return -ENOMEM; offload_ctx = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE_TX, GFP_KERNEL); if (!offload_ctx) { @@ -939,17 +983,11 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) if (skb) TCP_SKB_CB(skb)->eor = 1; - /* We support starting offload on multiple sockets - * concurrently, so we only need a read lock here. - * This lock must precede get_netdev_for_sock to prevent races between - * NETDEV_DOWN and setsockopt. - */ - down_read(&device_offload_lock); netdev = get_netdev_for_sock(sk); if (!netdev) { pr_err_ratelimited("%s: netdev not found\n", __func__); rc = -EINVAL; - goto release_lock; + goto disable_cad; } if (!(netdev->features & NETIF_F_HW_TLS_TX)) { @@ -960,10 +998,15 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) /* Avoid offloading if the device is down * We don't want to offload new flows after * the NETDEV_DOWN event + * + * device_offload_lock is taken in tls_devices's NETDEV_DOWN + * handler thus protecting from the device going down before + * ctx was added to tls_device_list. */ + down_read(&device_offload_lock); if (!(netdev->flags & IFF_UP)) { rc = -EINVAL; - goto release_netdev; + goto release_lock; } ctx->priv_ctx_tx = offload_ctx; @@ -971,9 +1014,10 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) &ctx->crypto_send.info, tcp_sk(sk)->write_seq); if (rc) - goto release_netdev; + goto release_lock; tls_device_attach(ctx, sk, netdev); + up_read(&device_offload_lock); /* following this assignment tls_is_sk_tx_device_offloaded * will return true and the context might be accessed @@ -981,13 +1025,14 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) */ smp_store_release(&sk->sk_validate_xmit_skb, tls_validate_xmit_skb); dev_put(netdev); - up_read(&device_offload_lock); - goto out; -release_netdev: - dev_put(netdev); + return 0; + release_lock: up_read(&device_offload_lock); +release_netdev: + dev_put(netdev); +disable_cad: clean_acked_data_disable(inet_csk(sk)); crypto_free_aead(offload_ctx->aead_send); free_rec_seq: @@ -999,7 +1044,6 @@ free_offload_ctx: ctx->priv_ctx_tx = NULL; free_marker_record: kfree(start_marker_record); -out: return rc; } @@ -1012,17 +1056,10 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx) if (ctx->crypto_recv.info.version != TLS_1_2_VERSION) return -EOPNOTSUPP; - /* We support starting offload on multiple sockets - * concurrently, so we only need a read lock here. - * This lock must precede get_netdev_for_sock to prevent races between - * NETDEV_DOWN and setsockopt. - */ - down_read(&device_offload_lock); netdev = get_netdev_for_sock(sk); if (!netdev) { pr_err_ratelimited("%s: netdev not found\n", __func__); - rc = -EINVAL; - goto release_lock; + return -EINVAL; } if (!(netdev->features & NETIF_F_HW_TLS_RX)) { @@ -1033,16 +1070,21 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx) /* Avoid offloading if the device is down * We don't want to offload new flows after * the NETDEV_DOWN event + * + * device_offload_lock is taken in tls_devices's NETDEV_DOWN + * handler thus protecting from the device going down before + * ctx was added to tls_device_list. */ + down_read(&device_offload_lock); if (!(netdev->flags & IFF_UP)) { rc = -EINVAL; - goto release_netdev; + goto release_lock; } context = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE_RX, GFP_KERNEL); if (!context) { rc = -ENOMEM; - goto release_netdev; + goto release_lock; } context->resync_nh_reset = 1; @@ -1058,7 +1100,11 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx) goto free_sw_resources; tls_device_attach(ctx, sk, netdev); - goto release_netdev; + up_read(&device_offload_lock); + + dev_put(netdev); + + return 0; free_sw_resources: up_read(&device_offload_lock); @@ -1066,10 +1112,10 @@ free_sw_resources: down_read(&device_offload_lock); release_ctx: ctx->priv_ctx_rx = NULL; -release_netdev: - dev_put(netdev); release_lock: up_read(&device_offload_lock); +release_netdev: + dev_put(netdev); return rc; } diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c index 9070d68a92a4..28895333701e 100644 --- a/net/tls/tls_device_fallback.c +++ b/net/tls/tls_device_fallback.c @@ -273,7 +273,7 @@ static int fill_sg_in(struct scatterlist *sg_in, __skb_frag_ref(frag); sg_set_page(sg_in + i, skb_frag_page(frag), - skb_frag_size(frag), frag->page_offset); + skb_frag_size(frag), skb_frag_off(frag)); remaining -= skb_frag_size(frag); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 43252a801c3f..ac88877dcade 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -39,6 +39,7 @@ #include <linux/netdevice.h> #include <linux/sched/signal.h> #include <linux/inetdevice.h> +#include <linux/inet_diag.h> #include <net/tls.h> @@ -251,14 +252,26 @@ static void tls_write_space(struct sock *sk) ctx->sk_write_space(sk); } -void tls_ctx_free(struct tls_context *ctx) +/** + * tls_ctx_free() - free TLS ULP context + * @sk: socket to with @ctx is attached + * @ctx: TLS context structure + * + * Free TLS context. If @sk is %NULL caller guarantees that the socket + * to which @ctx was attached has no outstanding references. + */ +void tls_ctx_free(struct sock *sk, struct tls_context *ctx) { if (!ctx) return; memzero_explicit(&ctx->crypto_send, sizeof(ctx->crypto_send)); memzero_explicit(&ctx->crypto_recv, sizeof(ctx->crypto_recv)); - kfree(ctx); + + if (sk) + kfree_rcu(ctx, rcu); + else + kfree(ctx); } static void tls_sk_proto_cleanup(struct sock *sk, @@ -273,19 +286,14 @@ static void tls_sk_proto_cleanup(struct sock *sk, kfree(ctx->tx.rec_seq); kfree(ctx->tx.iv); tls_sw_release_resources_tx(sk); -#ifdef CONFIG_TLS_DEVICE } else if (ctx->tx_conf == TLS_HW) { tls_device_free_resources_tx(sk); -#endif } if (ctx->rx_conf == TLS_SW) tls_sw_release_resources_rx(sk); - -#ifdef CONFIG_TLS_DEVICE - if (ctx->rx_conf == TLS_HW) + else if (ctx->rx_conf == TLS_HW) tls_device_offload_cleanup_rx(sk); -#endif } static void tls_sk_proto_close(struct sock *sk, long timeout) @@ -306,7 +314,7 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) write_lock_bh(&sk->sk_callback_lock); if (free_ctx) - icsk->icsk_ulp_data = NULL; + rcu_assign_pointer(icsk->icsk_ulp_data, NULL); sk->sk_prot = ctx->sk_proto; if (sk->sk_write_space == tls_write_space) sk->sk_write_space = ctx->sk_write_space; @@ -318,10 +326,10 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) tls_sw_strparser_done(ctx); if (ctx->rx_conf == TLS_SW) tls_sw_free_ctx_rx(ctx); - ctx->sk_proto_close(sk, timeout); + ctx->sk_proto->close(sk, timeout); if (free_ctx) - tls_ctx_free(ctx); + tls_ctx_free(sk, ctx); } static int do_tls_getsockopt_tx(struct sock *sk, char __user *optval, @@ -438,7 +446,8 @@ static int tls_getsockopt(struct sock *sk, int level, int optname, struct tls_context *ctx = tls_get_ctx(sk); if (level != SOL_TLS) - return ctx->getsockopt(sk, level, optname, optval, optlen); + return ctx->sk_proto->getsockopt(sk, level, + optname, optval, optlen); return do_tls_getsockopt(sk, optname, optval, optlen); } @@ -523,26 +532,18 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval, } if (tx) { -#ifdef CONFIG_TLS_DEVICE rc = tls_set_device_offload(sk, ctx); conf = TLS_HW; if (rc) { -#else - { -#endif rc = tls_set_sw_offload(sk, ctx, 1); if (rc) goto err_crypto_info; conf = TLS_SW; } } else { -#ifdef CONFIG_TLS_DEVICE rc = tls_set_device_offload_rx(sk, ctx); conf = TLS_HW; if (rc) { -#else - { -#endif rc = tls_set_sw_offload(sk, ctx, 0); if (rc) goto err_crypto_info; @@ -596,7 +597,8 @@ static int tls_setsockopt(struct sock *sk, int level, int optname, struct tls_context *ctx = tls_get_ctx(sk); if (level != SOL_TLS) - return ctx->setsockopt(sk, level, optname, optval, optlen); + return ctx->sk_proto->setsockopt(sk, level, optname, optval, + optlen); return do_tls_setsockopt(sk, optname, optval, optlen); } @@ -610,11 +612,8 @@ static struct tls_context *create_ctx(struct sock *sk) if (!ctx) return NULL; - icsk->icsk_ulp_data = ctx; - ctx->setsockopt = sk->sk_prot->setsockopt; - ctx->getsockopt = sk->sk_prot->getsockopt; - ctx->sk_proto_close = sk->sk_prot->close; - ctx->unhash = sk->sk_prot->unhash; + rcu_assign_pointer(icsk->icsk_ulp_data, ctx); + ctx->sk_proto = sk->sk_prot; return ctx; } @@ -651,8 +650,8 @@ static void tls_hw_sk_destruct(struct sock *sk) ctx->sk_destruct(sk); /* Free ctx */ - tls_ctx_free(ctx); - icsk->icsk_ulp_data = NULL; + rcu_assign_pointer(icsk->icsk_ulp_data, NULL); + tls_ctx_free(sk, ctx); } static int tls_hw_prot(struct sock *sk) @@ -670,9 +669,6 @@ static int tls_hw_prot(struct sock *sk) spin_unlock_bh(&device_spinlock); tls_build_proto(sk); - ctx->hash = sk->sk_prot->hash; - ctx->unhash = sk->sk_prot->unhash; - ctx->sk_proto_close = sk->sk_prot->close; ctx->sk_destruct = sk->sk_destruct; sk->sk_destruct = tls_hw_sk_destruct; ctx->rx_conf = TLS_HW_RECORD; @@ -704,7 +700,7 @@ static void tls_hw_unhash(struct sock *sk) } } spin_unlock_bh(&device_spinlock); - ctx->unhash(sk); + ctx->sk_proto->unhash(sk); } static int tls_hw_hash(struct sock *sk) @@ -713,7 +709,7 @@ static int tls_hw_hash(struct sock *sk) struct tls_device *dev; int err; - err = ctx->hash(sk); + err = ctx->sk_proto->hash(sk); spin_lock_bh(&device_spinlock); list_for_each_entry(dev, &device_list, dev_list) { if (dev->hash) { @@ -803,7 +799,6 @@ static int tls_init(struct sock *sk) ctx->tx_conf = TLS_BASE; ctx->rx_conf = TLS_BASE; - ctx->sk_proto = sk->sk_prot; update_sk_prot(sk, ctx); out: write_unlock_bh(&sk->sk_callback_lock); @@ -815,12 +810,71 @@ static void tls_update(struct sock *sk, struct proto *p) struct tls_context *ctx; ctx = tls_get_ctx(sk); - if (likely(ctx)) { - ctx->sk_proto_close = p->close; + if (likely(ctx)) ctx->sk_proto = p; - } else { + else sk->sk_prot = p; - } +} + +static int tls_get_info(const struct sock *sk, struct sk_buff *skb) +{ + u16 version, cipher_type; + struct tls_context *ctx; + struct nlattr *start; + int err; + + start = nla_nest_start_noflag(skb, INET_ULP_INFO_TLS); + if (!start) + return -EMSGSIZE; + + rcu_read_lock(); + ctx = rcu_dereference(inet_csk(sk)->icsk_ulp_data); + if (!ctx) { + err = 0; + goto nla_failure; + } + version = ctx->prot_info.version; + if (version) { + err = nla_put_u16(skb, TLS_INFO_VERSION, version); + if (err) + goto nla_failure; + } + cipher_type = ctx->prot_info.cipher_type; + if (cipher_type) { + err = nla_put_u16(skb, TLS_INFO_CIPHER, cipher_type); + if (err) + goto nla_failure; + } + err = nla_put_u16(skb, TLS_INFO_TXCONF, tls_user_config(ctx, true)); + if (err) + goto nla_failure; + + err = nla_put_u16(skb, TLS_INFO_RXCONF, tls_user_config(ctx, false)); + if (err) + goto nla_failure; + + rcu_read_unlock(); + nla_nest_end(skb, start); + return 0; + +nla_failure: + rcu_read_unlock(); + nla_nest_cancel(skb, start); + return err; +} + +static size_t tls_get_info_size(const struct sock *sk) +{ + size_t size = 0; + + size += nla_total_size(0) + /* INET_ULP_INFO_TLS */ + nla_total_size(sizeof(u16)) + /* TLS_INFO_VERSION */ + nla_total_size(sizeof(u16)) + /* TLS_INFO_CIPHER */ + nla_total_size(sizeof(u16)) + /* TLS_INFO_RXCONF */ + nla_total_size(sizeof(u16)) + /* TLS_INFO_TXCONF */ + 0; + + return size; } void tls_register_device(struct tls_device *device) @@ -844,6 +898,8 @@ static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = { .owner = THIS_MODULE, .init = tls_init, .update = tls_update, + .get_info = tls_get_info, + .get_info_size = tls_get_info_size, }; static int __init tls_register(void) @@ -851,9 +907,7 @@ static int __init tls_register(void) tls_sw_proto_ops = inet_stream_ops; tls_sw_proto_ops.splice_read = tls_sw_splice_read; -#ifdef CONFIG_TLS_DEVICE tls_device_init(); -#endif tcp_register_ulp(&tcp_tls_ulp_ops); return 0; @@ -862,9 +916,7 @@ static int __init tls_register(void) static void __exit tls_unregister(void) { tcp_unregister_ulp(&tcp_tls_ulp_ops); -#ifdef CONFIG_TLS_DEVICE tls_device_cleanup(); -#endif } module_init(tls_register); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 91d21b048a9b..c2b5e0d2ba1a 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1489,13 +1489,12 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, int pad, err = 0; if (!ctx->decrypted) { -#ifdef CONFIG_TLS_DEVICE if (tls_ctx->rx_conf == TLS_HW) { err = tls_device_decrypted(sk, skb); if (err < 0) return err; } -#endif + /* Still not decrypted after tls_device */ if (!ctx->decrypted) { err = decrypt_internal(sk, skb, dest, NULL, chunk, zc, @@ -2014,10 +2013,9 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb) ret = -EINVAL; goto read_failure; } -#ifdef CONFIG_TLS_DEVICE + tls_device_rx_resync_new_rec(strp->sk, data_len + TLS_HEADER_SIZE, TCP_SKB_CB(skb)->seq + rxm->offset); -#endif return data_len + TLS_HEADER_SIZE; read_failure: diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index 9d864ebeb7b3..261521d286d6 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -77,11 +77,11 @@ struct hvs_send_buf { VMBUS_PKT_TRAILER_SIZE) union hvs_service_id { - uuid_le srv_id; + guid_t srv_id; struct { unsigned int svm_port; - unsigned char b[sizeof(uuid_le) - sizeof(unsigned int)]; + unsigned char b[sizeof(guid_t) - sizeof(unsigned int)]; }; }; @@ -89,8 +89,8 @@ union hvs_service_id { struct hvsock { struct vsock_sock *vsk; - uuid_le vm_srv_id; - uuid_le host_srv_id; + guid_t vm_srv_id; + guid_t host_srv_id; struct vmbus_channel *chan; struct vmpacket_descriptor *recv_desc; @@ -159,21 +159,21 @@ struct hvsock { #define MIN_HOST_EPHEMERAL_PORT (MAX_HOST_LISTEN_PORT + 1) /* 00000000-facb-11e6-bd58-64006a7986d3 */ -static const uuid_le srv_id_template = - UUID_LE(0x00000000, 0xfacb, 0x11e6, 0xbd, 0x58, - 0x64, 0x00, 0x6a, 0x79, 0x86, 0xd3); +static const guid_t srv_id_template = + GUID_INIT(0x00000000, 0xfacb, 0x11e6, 0xbd, 0x58, + 0x64, 0x00, 0x6a, 0x79, 0x86, 0xd3); -static bool is_valid_srv_id(const uuid_le *id) +static bool is_valid_srv_id(const guid_t *id) { - return !memcmp(&id->b[4], &srv_id_template.b[4], sizeof(uuid_le) - 4); + return !memcmp(&id->b[4], &srv_id_template.b[4], sizeof(guid_t) - 4); } -static unsigned int get_port_by_srv_id(const uuid_le *svr_id) +static unsigned int get_port_by_srv_id(const guid_t *svr_id) { return *((unsigned int *)svr_id); } -static void hvs_addr_init(struct sockaddr_vm *addr, const uuid_le *svr_id) +static void hvs_addr_init(struct sockaddr_vm *addr, const guid_t *svr_id) { unsigned int port = get_port_by_srv_id(svr_id); @@ -321,7 +321,7 @@ static void hvs_close_connection(struct vmbus_channel *chan) static void hvs_open_connection(struct vmbus_channel *chan) { - uuid_le *if_instance, *if_type; + guid_t *if_instance, *if_type; unsigned char conn_from_host; struct sockaddr_vm addr; diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 0815d1357861..082a30936690 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -307,6 +307,7 @@ static void virtio_vsock_rx_fill(struct virtio_vsock *vsock) break; } + pkt->buf_len = buf_len; pkt->len = buf_len; sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr)); diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 6f1a8aff65c5..5bb70c692b1e 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -26,6 +26,9 @@ /* How long to wait for graceful shutdown of a connection */ #define VSOCK_CLOSE_TIMEOUT (8 * HZ) +/* Threshold for detecting small packets to copy */ +#define GOOD_COPY_LEN 128 + static const struct virtio_transport *virtio_transport_get_ops(void) { const struct vsock_transport *t = vsock_core_get_transport(); @@ -64,6 +67,9 @@ virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info, pkt->buf = kmalloc(len, GFP_KERNEL); if (!pkt->buf) goto out_pkt; + + pkt->buf_len = len; + err = memcpy_from_msg(pkt->buf, info->msg, len); if (err) goto out; @@ -91,8 +97,17 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) struct virtio_vsock_pkt *pkt = opaque; struct af_vsockmon_hdr *hdr; struct sk_buff *skb; + size_t payload_len; + void *payload_buf; + + /* A packet could be split to fit the RX buffer, so we can retrieve + * the payload length from the header and the buffer pointer taking + * care of the offset in the original packet. + */ + payload_len = le32_to_cpu(pkt->hdr.len); + payload_buf = pkt->buf + pkt->off; - skb = alloc_skb(sizeof(*hdr) + sizeof(pkt->hdr) + pkt->len, + skb = alloc_skb(sizeof(*hdr) + sizeof(pkt->hdr) + payload_len, GFP_ATOMIC); if (!skb) return NULL; @@ -132,8 +147,8 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) skb_put_data(skb, &pkt->hdr, sizeof(pkt->hdr)); - if (pkt->len) { - skb_put_data(skb, pkt->buf, pkt->len); + if (payload_len) { + skb_put_data(skb, payload_buf, payload_len); } return skb; @@ -166,8 +181,8 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, vvs = vsk->trans; /* we can send less than pkt_len bytes */ - if (pkt_len > VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE) - pkt_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE; + if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) + pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; /* virtio_transport_get_credit might return less than pkt_len credit */ pkt_len = virtio_transport_get_credit(vvs, pkt_len); @@ -204,10 +219,11 @@ static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs, void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt) { - spin_lock_bh(&vvs->tx_lock); + spin_lock_bh(&vvs->rx_lock); + vvs->last_fwd_cnt = vvs->fwd_cnt; pkt->hdr.fwd_cnt = cpu_to_le32(vvs->fwd_cnt); pkt->hdr.buf_alloc = cpu_to_le32(vvs->buf_alloc); - spin_unlock_bh(&vvs->tx_lock); + spin_unlock_bh(&vvs->rx_lock); } EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt); @@ -255,6 +271,7 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, struct virtio_vsock_sock *vvs = vsk->trans; struct virtio_vsock_pkt *pkt; size_t bytes, total = 0; + u32 free_space; int err = -EFAULT; spin_lock_bh(&vvs->rx_lock); @@ -285,11 +302,24 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, virtio_transport_free_pkt(pkt); } } + + free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt); + spin_unlock_bh(&vvs->rx_lock); - /* Send a credit pkt to peer */ - virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_STREAM, - NULL); + /* To reduce the number of credit update messages, + * don't update credits as long as lots of space is available. + * Note: the limit chosen here is arbitrary. Setting the limit + * too high causes extra messages. Too low causes transmitter + * stalls. As stalls are in theory more expensive than extra + * messages, we set the limit to a high value. TODO: experiment + * with different values. + */ + if (free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) { + virtio_transport_send_credit_update(vsk, + VIRTIO_VSOCK_TYPE_STREAM, + NULL); + } return total; @@ -841,24 +871,60 @@ destroy: return err; } +static void +virtio_transport_recv_enqueue(struct vsock_sock *vsk, + struct virtio_vsock_pkt *pkt) +{ + struct virtio_vsock_sock *vvs = vsk->trans; + bool free_pkt = false; + + pkt->len = le32_to_cpu(pkt->hdr.len); + pkt->off = 0; + + spin_lock_bh(&vvs->rx_lock); + + virtio_transport_inc_rx_pkt(vvs, pkt); + + /* Try to copy small packets into the buffer of last packet queued, + * to avoid wasting memory queueing the entire buffer with a small + * payload. + */ + if (pkt->len <= GOOD_COPY_LEN && !list_empty(&vvs->rx_queue)) { + struct virtio_vsock_pkt *last_pkt; + + last_pkt = list_last_entry(&vvs->rx_queue, + struct virtio_vsock_pkt, list); + + /* If there is space in the last packet queued, we copy the + * new packet in its buffer. + */ + if (pkt->len <= last_pkt->buf_len - last_pkt->len) { + memcpy(last_pkt->buf + last_pkt->len, pkt->buf, + pkt->len); + last_pkt->len += pkt->len; + free_pkt = true; + goto out; + } + } + + list_add_tail(&pkt->list, &vvs->rx_queue); + +out: + spin_unlock_bh(&vvs->rx_lock); + if (free_pkt) + virtio_transport_free_pkt(pkt); +} + static int virtio_transport_recv_connected(struct sock *sk, struct virtio_vsock_pkt *pkt) { struct vsock_sock *vsk = vsock_sk(sk); - struct virtio_vsock_sock *vvs = vsk->trans; int err = 0; switch (le16_to_cpu(pkt->hdr.op)) { case VIRTIO_VSOCK_OP_RW: - pkt->len = le32_to_cpu(pkt->hdr.len); - pkt->off = 0; - - spin_lock_bh(&vvs->rx_lock); - virtio_transport_inc_rx_pkt(vvs, pkt); - list_add_tail(&pkt->list, &vvs->rx_queue); - spin_unlock_bh(&vvs->rx_lock); - + virtio_transport_recv_enqueue(vsk, pkt); sk->sk_data_ready(sk); return err; case VIRTIO_VSOCK_OP_CREDIT_UPDATE: diff --git a/net/wimax/debugfs.c b/net/wimax/debugfs.c index 1af56df30276..3c54bb6b925a 100644 --- a/net/wimax/debugfs.c +++ b/net/wimax/debugfs.c @@ -13,49 +13,23 @@ #define D_SUBMODULE debugfs #include "debug-levels.h" - -#define __debugfs_register(prefix, name, parent) \ -do { \ - result = d_level_register_debugfs(prefix, name, parent); \ - if (result < 0) \ - goto error; \ -} while (0) - - -int wimax_debugfs_add(struct wimax_dev *wimax_dev) +void wimax_debugfs_add(struct wimax_dev *wimax_dev) { - int result; struct net_device *net_dev = wimax_dev->net_dev; - struct device *dev = net_dev->dev.parent; struct dentry *dentry; char buf[128]; snprintf(buf, sizeof(buf), "wimax:%s", net_dev->name); dentry = debugfs_create_dir(buf, NULL); - result = PTR_ERR(dentry); - if (IS_ERR(dentry)) { - if (result == -ENODEV) - result = 0; /* No debugfs support */ - else - dev_err(dev, "Can't create debugfs dentry: %d\n", - result); - goto out; - } wimax_dev->debugfs_dentry = dentry; - __debugfs_register("wimax_dl_", debugfs, dentry); - __debugfs_register("wimax_dl_", id_table, dentry); - __debugfs_register("wimax_dl_", op_msg, dentry); - __debugfs_register("wimax_dl_", op_reset, dentry); - __debugfs_register("wimax_dl_", op_rfkill, dentry); - __debugfs_register("wimax_dl_", op_state_get, dentry); - __debugfs_register("wimax_dl_", stack, dentry); - result = 0; -out: - return result; -error: - debugfs_remove_recursive(wimax_dev->debugfs_dentry); - return result; + d_level_register_debugfs("wimax_dl_", debugfs, dentry); + d_level_register_debugfs("wimax_dl_", id_table, dentry); + d_level_register_debugfs("wimax_dl_", op_msg, dentry); + d_level_register_debugfs("wimax_dl_", op_reset, dentry); + d_level_register_debugfs("wimax_dl_", op_rfkill, dentry); + d_level_register_debugfs("wimax_dl_", op_state_get, dentry); + d_level_register_debugfs("wimax_dl_", stack, dentry); } void wimax_debugfs_rm(struct wimax_dev *wimax_dev) diff --git a/net/wimax/stack.c b/net/wimax/stack.c index 1ba99d65feca..4b9b1c5e8f3a 100644 --- a/net/wimax/stack.c +++ b/net/wimax/stack.c @@ -481,12 +481,7 @@ int wimax_dev_add(struct wimax_dev *wimax_dev, struct net_device *net_dev) /* Set up user-space interaction */ mutex_lock(&wimax_dev->mutex); wimax_id_table_add(wimax_dev); - result = wimax_debugfs_add(wimax_dev); - if (result < 0) { - dev_err(dev, "cannot initialize debugfs: %d\n", - result); - goto error_debugfs_add; - } + wimax_debugfs_add(wimax_dev); __wimax_state_set(wimax_dev, WIMAX_ST_DOWN); mutex_unlock(&wimax_dev->mutex); @@ -498,10 +493,6 @@ int wimax_dev_add(struct wimax_dev *wimax_dev, struct net_device *net_dev) d_fnend(3, dev, "(wimax_dev %p net_dev %p) = 0\n", wimax_dev, net_dev); return 0; -error_debugfs_add: - wimax_id_table_rm(wimax_dev); - mutex_unlock(&wimax_dev->mutex); - wimax_rfkill_rm(wimax_dev); error_rfkill_add: d_fnend(3, dev, "(wimax_dev %p net_dev %p) = %d\n", wimax_dev, net_dev, result); diff --git a/net/wimax/wimax-internal.h b/net/wimax/wimax-internal.h index e819a09337ee..40751207296c 100644 --- a/net/wimax/wimax-internal.h +++ b/net/wimax/wimax-internal.h @@ -57,13 +57,10 @@ void __wimax_state_set(struct wimax_dev *wimax_dev, enum wimax_st state) void __wimax_state_change(struct wimax_dev *, enum wimax_st); #ifdef CONFIG_DEBUG_FS -int wimax_debugfs_add(struct wimax_dev *); +void wimax_debugfs_add(struct wimax_dev *); void wimax_debugfs_rm(struct wimax_dev *); #else -static inline int wimax_debugfs_add(struct wimax_dev *wimax_dev) -{ - return 0; -} +static inline void wimax_debugfs_add(struct wimax_dev *wimax_dev) {} static inline void wimax_debugfs_rm(struct wimax_dev *wimax_dev) {} #endif diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index 67f8360dfcee..63cf7131f601 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -217,6 +217,8 @@ config LIB80211_CRYPT_WEP config LIB80211_CRYPT_CCMP tristate + select CRYPTO_AES + select CRYPTO_CCM config LIB80211_CRYPT_TKIP tristate diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 7dc1bbd0888f..e851cafd8e2f 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -14,6 +14,11 @@ #include "core.h" #include "rdev-ops.h" +static bool cfg80211_valid_60g_freq(u32 freq) +{ + return freq >= 58320 && freq <= 70200; +} + void cfg80211_chandef_create(struct cfg80211_chan_def *chandef, struct ieee80211_channel *chan, enum nl80211_channel_type chan_type) @@ -23,6 +28,8 @@ void cfg80211_chandef_create(struct cfg80211_chan_def *chandef, chandef->chan = chan; chandef->center_freq2 = 0; + chandef->edmg.bw_config = 0; + chandef->edmg.channels = 0; switch (chan_type) { case NL80211_CHAN_NO_HT: @@ -47,6 +54,91 @@ void cfg80211_chandef_create(struct cfg80211_chan_def *chandef, } EXPORT_SYMBOL(cfg80211_chandef_create); +static bool cfg80211_edmg_chandef_valid(const struct cfg80211_chan_def *chandef) +{ + int max_contiguous = 0; + int num_of_enabled = 0; + int contiguous = 0; + int i; + + if (!chandef->edmg.channels || !chandef->edmg.bw_config) + return false; + + if (!cfg80211_valid_60g_freq(chandef->chan->center_freq)) + return false; + + for (i = 0; i < 6; i++) { + if (chandef->edmg.channels & BIT(i)) { + contiguous++; + num_of_enabled++; + } else { + contiguous = 0; + } + + max_contiguous = max(contiguous, max_contiguous); + } + /* basic verification of edmg configuration according to + * IEEE P802.11ay/D4.0 section 9.4.2.251 + */ + /* check bw_config against contiguous edmg channels */ + switch (chandef->edmg.bw_config) { + case IEEE80211_EDMG_BW_CONFIG_4: + case IEEE80211_EDMG_BW_CONFIG_8: + case IEEE80211_EDMG_BW_CONFIG_12: + if (max_contiguous < 1) + return false; + break; + case IEEE80211_EDMG_BW_CONFIG_5: + case IEEE80211_EDMG_BW_CONFIG_9: + case IEEE80211_EDMG_BW_CONFIG_13: + if (max_contiguous < 2) + return false; + break; + case IEEE80211_EDMG_BW_CONFIG_6: + case IEEE80211_EDMG_BW_CONFIG_10: + case IEEE80211_EDMG_BW_CONFIG_14: + if (max_contiguous < 3) + return false; + break; + case IEEE80211_EDMG_BW_CONFIG_7: + case IEEE80211_EDMG_BW_CONFIG_11: + case IEEE80211_EDMG_BW_CONFIG_15: + if (max_contiguous < 4) + return false; + break; + + default: + return false; + } + + /* check bw_config against aggregated (non contiguous) edmg channels */ + switch (chandef->edmg.bw_config) { + case IEEE80211_EDMG_BW_CONFIG_4: + case IEEE80211_EDMG_BW_CONFIG_5: + case IEEE80211_EDMG_BW_CONFIG_6: + case IEEE80211_EDMG_BW_CONFIG_7: + break; + case IEEE80211_EDMG_BW_CONFIG_8: + case IEEE80211_EDMG_BW_CONFIG_9: + case IEEE80211_EDMG_BW_CONFIG_10: + case IEEE80211_EDMG_BW_CONFIG_11: + if (num_of_enabled < 2) + return false; + break; + case IEEE80211_EDMG_BW_CONFIG_12: + case IEEE80211_EDMG_BW_CONFIG_13: + case IEEE80211_EDMG_BW_CONFIG_14: + case IEEE80211_EDMG_BW_CONFIG_15: + if (num_of_enabled < 4 || max_contiguous < 2) + return false; + break; + default: + return false; + } + + return true; +} + bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef) { u32 control_freq; @@ -112,6 +204,10 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef) return false; } + if (cfg80211_chandef_is_edmg(chandef) && + !cfg80211_edmg_chandef_valid(chandef)) + return false; + return true; } EXPORT_SYMBOL(cfg80211_chandef_valid); @@ -721,12 +817,66 @@ static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy, return true; } +/* check if the operating channels are valid and supported */ +static bool cfg80211_edmg_usable(struct wiphy *wiphy, u8 edmg_channels, + enum ieee80211_edmg_bw_config edmg_bw_config, + int primary_channel, + struct ieee80211_edmg *edmg_cap) +{ + struct ieee80211_channel *chan; + int i, freq; + int channels_counter = 0; + + if (!edmg_channels && !edmg_bw_config) + return true; + + if ((!edmg_channels && edmg_bw_config) || + (edmg_channels && !edmg_bw_config)) + return false; + + if (!(edmg_channels & BIT(primary_channel - 1))) + return false; + + /* 60GHz channels 1..6 */ + for (i = 0; i < 6; i++) { + if (!(edmg_channels & BIT(i))) + continue; + + if (!(edmg_cap->channels & BIT(i))) + return false; + + channels_counter++; + + freq = ieee80211_channel_to_frequency(i + 1, + NL80211_BAND_60GHZ); + chan = ieee80211_get_channel(wiphy, freq); + if (!chan || chan->flags & IEEE80211_CHAN_DISABLED) + return false; + } + + /* IEEE802.11 allows max 4 channels */ + if (channels_counter > 4) + return false; + + /* check bw_config is a subset of what driver supports + * (see IEEE P802.11ay/D4.0 section 9.4.2.251, Table 13) + */ + if ((edmg_bw_config % 4) > (edmg_cap->bw_config % 4)) + return false; + + if (edmg_bw_config > edmg_cap->bw_config) + return false; + + return true; +} + bool cfg80211_chandef_usable(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef, u32 prohibited_flags) { struct ieee80211_sta_ht_cap *ht_cap; struct ieee80211_sta_vht_cap *vht_cap; + struct ieee80211_edmg *edmg_cap; u32 width, control_freq, cap; if (WARN_ON(!cfg80211_chandef_valid(chandef))) @@ -734,6 +884,15 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, ht_cap = &wiphy->bands[chandef->chan->band]->ht_cap; vht_cap = &wiphy->bands[chandef->chan->band]->vht_cap; + edmg_cap = &wiphy->bands[chandef->chan->band]->edmg_cap; + + if (edmg_cap->channels && + !cfg80211_edmg_usable(wiphy, + chandef->edmg.channels, + chandef->edmg.bw_config, + chandef->chan->hw_value, + edmg_cap)) + return false; control_freq = chandef->chan->center_freq; @@ -894,7 +1053,8 @@ static bool cfg80211_ir_permissive_chan(struct wiphy *wiphy, if (chan == other_chan) return true; - if (chan->band != NL80211_BAND_5GHZ) + if (chan->band != NL80211_BAND_5GHZ && + chan->band != NL80211_BAND_6GHZ) continue; r1 = cfg80211_get_unii(chan->center_freq); diff --git a/net/wireless/core.c b/net/wireless/core.c index 32b3c719fdfc..350513744575 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -5,7 +5,7 @@ * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018 Intel Corporation + * Copyright (C) 2018-2019 Intel Corporation */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -142,12 +142,10 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, if (result) return result; - if (rdev->wiphy.debugfsdir && - !debugfs_rename(rdev->wiphy.debugfsdir->d_parent, - rdev->wiphy.debugfsdir, - rdev->wiphy.debugfsdir->d_parent, - newname)) - pr_err("failed to rename debugfs dir to %s!\n", newname); + if (rdev->wiphy.debugfsdir) + debugfs_rename(rdev->wiphy.debugfsdir->d_parent, + rdev->wiphy.debugfsdir, + rdev->wiphy.debugfsdir->d_parent, newname); nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY); @@ -302,12 +300,13 @@ static int cfg80211_rfkill_set_block(void *data, bool blocked) return 0; } -static void cfg80211_rfkill_sync_work(struct work_struct *work) +static void cfg80211_rfkill_block_work(struct work_struct *work) { struct cfg80211_registered_device *rdev; - rdev = container_of(work, struct cfg80211_registered_device, rfkill_sync); - cfg80211_rfkill_set_block(rdev, rfkill_blocked(rdev->rfkill)); + rdev = container_of(work, struct cfg80211_registered_device, + rfkill_block); + cfg80211_rfkill_set_block(rdev, true); } static void cfg80211_event_work(struct work_struct *work) @@ -518,7 +517,7 @@ use_default_name: return NULL; } - INIT_WORK(&rdev->rfkill_sync, cfg80211_rfkill_sync_work); + INIT_WORK(&rdev->rfkill_block, cfg80211_rfkill_block_work); INIT_WORK(&rdev->conn_work, cfg80211_conn_work); INIT_WORK(&rdev->event_work, cfg80211_event_work); @@ -899,11 +898,8 @@ int wiphy_register(struct wiphy *wiphy) cfg80211_rdev_list_generation++; /* add to debugfs */ - rdev->wiphy.debugfsdir = - debugfs_create_dir(wiphy_name(&rdev->wiphy), - ieee80211_debugfs_dir); - if (IS_ERR(rdev->wiphy.debugfsdir)) - rdev->wiphy.debugfsdir = NULL; + rdev->wiphy.debugfsdir = debugfs_create_dir(wiphy_name(&rdev->wiphy), + ieee80211_debugfs_dir); cfg80211_debugfs_rdev_add(rdev); nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY); @@ -1066,7 +1062,7 @@ void wiphy_rfkill_set_hw_state(struct wiphy *wiphy, bool blocked) struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); if (rfkill_set_hw_state(rdev->rfkill, blocked)) - schedule_work(&rdev->rfkill_sync); + schedule_work(&rdev->rfkill_block); } EXPORT_SYMBOL(wiphy_rfkill_set_hw_state); diff --git a/net/wireless/core.h b/net/wireless/core.h index ee8388fe4a92..ed487e324571 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -28,7 +28,7 @@ struct cfg80211_registered_device { /* rfkill support */ struct rfkill_ops rfkill_ops; struct rfkill *rfkill; - struct work_struct rfkill_sync; + struct work_struct rfkill_block; /* ISO / IEC 3166 alpha2 for which this device is receiving * country IEs on, this can help disregard country IEs from APs @@ -306,6 +306,8 @@ void ieee80211_set_bitrate_flags(struct wiphy *wiphy); void cfg80211_bss_expire(struct cfg80211_registered_device *rdev); void cfg80211_bss_age(struct cfg80211_registered_device *rdev, unsigned long age_secs); +void cfg80211_update_assoc_bss_entry(struct wireless_dev *wdev, + struct ieee80211_channel *channel); /* IBSS */ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index d1743e6abc34..ae8fe66a9bb8 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -104,13 +104,19 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, * use the mandatory rate set for 11b or * 11a for maximum compatibility. */ - struct ieee80211_supported_band *sband = - rdev->wiphy.bands[params->chandef.chan->band]; + struct ieee80211_supported_band *sband; + enum nl80211_band band; + u32 flag; int j; - u32 flag = params->chandef.chan->band == NL80211_BAND_5GHZ ? - IEEE80211_RATE_MANDATORY_A : - IEEE80211_RATE_MANDATORY_B; + band = params->chandef.chan->band; + if (band == NL80211_BAND_5GHZ || + band == NL80211_BAND_6GHZ) + flag = IEEE80211_RATE_MANDATORY_A; + else + flag = IEEE80211_RATE_MANDATORY_B; + + sband = rdev->wiphy.bands[band]; for (j = 0; j < sband->n_bitrates; j++) { if (sband->bitrates[j].flags & flag) params->basic_rates |= BIT(j); diff --git a/net/wireless/lib80211_crypt_ccmp.c b/net/wireless/lib80211_crypt_ccmp.c index 7e8ff9d7dcfa..6a5f08f7491e 100644 --- a/net/wireless/lib80211_crypt_ccmp.c +++ b/net/wireless/lib80211_crypt_ccmp.c @@ -22,6 +22,7 @@ #include <linux/ieee80211.h> #include <linux/crypto.h> +#include <crypto/aead.h> #include <net/lib80211.h> @@ -48,20 +49,13 @@ struct lib80211_ccmp_data { int key_idx; - struct crypto_cipher *tfm; + struct crypto_aead *tfm; /* scratch buffers for virt_to_page() (crypto API) */ - u8 tx_b0[AES_BLOCK_LEN], tx_b[AES_BLOCK_LEN], - tx_e[AES_BLOCK_LEN], tx_s0[AES_BLOCK_LEN]; - u8 rx_b0[AES_BLOCK_LEN], rx_b[AES_BLOCK_LEN], rx_a[AES_BLOCK_LEN]; + u8 tx_aad[2 * AES_BLOCK_LEN]; + u8 rx_aad[2 * AES_BLOCK_LEN]; }; -static inline void lib80211_ccmp_aes_encrypt(struct crypto_cipher *tfm, - const u8 pt[16], u8 ct[16]) -{ - crypto_cipher_encrypt_one(tfm, ct, pt); -} - static void *lib80211_ccmp_init(int key_idx) { struct lib80211_ccmp_data *priv; @@ -71,7 +65,7 @@ static void *lib80211_ccmp_init(int key_idx) goto fail; priv->key_idx = key_idx; - priv->tfm = crypto_alloc_cipher("aes", 0, 0); + priv->tfm = crypto_alloc_aead("ccm(aes)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(priv->tfm)) { priv->tfm = NULL; goto fail; @@ -82,7 +76,7 @@ static void *lib80211_ccmp_init(int key_idx) fail: if (priv) { if (priv->tfm) - crypto_free_cipher(priv->tfm); + crypto_free_aead(priv->tfm); kfree(priv); } @@ -93,25 +87,16 @@ static void lib80211_ccmp_deinit(void *priv) { struct lib80211_ccmp_data *_priv = priv; if (_priv && _priv->tfm) - crypto_free_cipher(_priv->tfm); + crypto_free_aead(_priv->tfm); kfree(priv); } -static inline void xor_block(u8 * b, u8 * a, size_t len) -{ - int i; - for (i = 0; i < len; i++) - b[i] ^= a[i]; -} - -static void ccmp_init_blocks(struct crypto_cipher *tfm, - struct ieee80211_hdr *hdr, - u8 * pn, size_t dlen, u8 * b0, u8 * auth, u8 * s0) +static int ccmp_init_iv_and_aad(const struct ieee80211_hdr *hdr, + const u8 *pn, u8 *iv, u8 *aad) { u8 *pos, qc = 0; size_t aad_len; int a4_included, qc_included; - u8 aad[2 * AES_BLOCK_LEN]; a4_included = ieee80211_has_a4(hdr->frame_control); qc_included = ieee80211_is_data_qos(hdr->frame_control); @@ -127,17 +112,19 @@ static void ccmp_init_blocks(struct crypto_cipher *tfm, aad_len += 2; } - /* CCM Initial Block: - * Flag (Include authentication header, M=3 (8-octet MIC), - * L=1 (2-octet Dlen)) - * Nonce: 0x00 | A2 | PN - * Dlen */ - b0[0] = 0x59; - b0[1] = qc; - memcpy(b0 + 2, hdr->addr2, ETH_ALEN); - memcpy(b0 + 8, pn, CCMP_PN_LEN); - b0[14] = (dlen >> 8) & 0xff; - b0[15] = dlen & 0xff; + /* In CCM, the initial vectors (IV) used for CTR mode encryption and CBC + * mode authentication are not allowed to collide, yet both are derived + * from the same vector. We only set L := 1 here to indicate that the + * data size can be represented in (L+1) bytes. The CCM layer will take + * care of storing the data length in the top (L+1) bytes and setting + * and clearing the other bits as is required to derive the two IVs. + */ + iv[0] = 0x1; + + /* Nonce: QC | A2 | PN */ + iv[1] = qc; + memcpy(iv + 2, hdr->addr2, ETH_ALEN); + memcpy(iv + 8, pn, CCMP_PN_LEN); /* AAD: * FC with bits 4..6 and 11..13 masked to zero; 14 is always one @@ -147,31 +134,20 @@ static void ccmp_init_blocks(struct crypto_cipher *tfm, * QC (if present) */ pos = (u8 *) hdr; - aad[0] = 0; /* aad_len >> 8 */ - aad[1] = aad_len & 0xff; - aad[2] = pos[0] & 0x8f; - aad[3] = pos[1] & 0xc7; - memcpy(aad + 4, hdr->addr1, 3 * ETH_ALEN); + aad[0] = pos[0] & 0x8f; + aad[1] = pos[1] & 0xc7; + memcpy(aad + 2, hdr->addr1, 3 * ETH_ALEN); pos = (u8 *) & hdr->seq_ctrl; - aad[22] = pos[0] & 0x0f; - aad[23] = 0; /* all bits masked */ - memset(aad + 24, 0, 8); + aad[20] = pos[0] & 0x0f; + aad[21] = 0; /* all bits masked */ + memset(aad + 22, 0, 8); if (a4_included) - memcpy(aad + 24, hdr->addr4, ETH_ALEN); + memcpy(aad + 22, hdr->addr4, ETH_ALEN); if (qc_included) { - aad[a4_included ? 30 : 24] = qc; + aad[a4_included ? 28 : 22] = qc; /* rest of QC masked */ } - - /* Start with the first block and AAD */ - lib80211_ccmp_aes_encrypt(tfm, b0, auth); - xor_block(auth, aad, AES_BLOCK_LEN); - lib80211_ccmp_aes_encrypt(tfm, auth, auth); - xor_block(auth, &aad[AES_BLOCK_LEN], AES_BLOCK_LEN); - lib80211_ccmp_aes_encrypt(tfm, auth, auth); - b0[0] &= 0x07; - b0[14] = b0[15] = 0; - lib80211_ccmp_aes_encrypt(tfm, b0, s0); + return aad_len; } static int lib80211_ccmp_hdr(struct sk_buff *skb, int hdr_len, @@ -214,13 +190,13 @@ static int lib80211_ccmp_hdr(struct sk_buff *skb, int hdr_len, static int lib80211_ccmp_encrypt(struct sk_buff *skb, int hdr_len, void *priv) { struct lib80211_ccmp_data *key = priv; - int data_len, i, blocks, last, len; - u8 *pos, *mic; struct ieee80211_hdr *hdr; - u8 *b0 = key->tx_b0; - u8 *b = key->tx_b; - u8 *e = key->tx_e; - u8 *s0 = key->tx_s0; + struct aead_request *req; + struct scatterlist sg[2]; + u8 *aad = key->tx_aad; + u8 iv[AES_BLOCK_LEN]; + int len, data_len, aad_len; + int ret; if (skb_tailroom(skb) < CCMP_MIC_LEN || skb->len < hdr_len) return -1; @@ -230,31 +206,28 @@ static int lib80211_ccmp_encrypt(struct sk_buff *skb, int hdr_len, void *priv) if (len < 0) return -1; - pos = skb->data + hdr_len + CCMP_HDR_LEN; + req = aead_request_alloc(key->tfm, GFP_ATOMIC); + if (!req) + return -ENOMEM; + hdr = (struct ieee80211_hdr *)skb->data; - ccmp_init_blocks(key->tfm, hdr, key->tx_pn, data_len, b0, b, s0); - - blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN); - last = data_len % AES_BLOCK_LEN; - - for (i = 1; i <= blocks; i++) { - len = (i == blocks && last) ? last : AES_BLOCK_LEN; - /* Authentication */ - xor_block(b, pos, len); - lib80211_ccmp_aes_encrypt(key->tfm, b, b); - /* Encryption, with counter */ - b0[14] = (i >> 8) & 0xff; - b0[15] = i & 0xff; - lib80211_ccmp_aes_encrypt(key->tfm, b0, e); - xor_block(pos, e, len); - pos += len; - } + aad_len = ccmp_init_iv_and_aad(hdr, key->tx_pn, iv, aad); - mic = skb_put(skb, CCMP_MIC_LEN); - for (i = 0; i < CCMP_MIC_LEN; i++) - mic[i] = b[i] ^ s0[i]; + skb_put(skb, CCMP_MIC_LEN); - return 0; + sg_init_table(sg, 2); + sg_set_buf(&sg[0], aad, aad_len); + sg_set_buf(&sg[1], skb->data + hdr_len + CCMP_HDR_LEN, + data_len + CCMP_MIC_LEN); + + aead_request_set_callback(req, 0, NULL, NULL); + aead_request_set_ad(req, aad_len); + aead_request_set_crypt(req, sg, sg, data_len, iv); + + ret = crypto_aead_encrypt(req); + aead_request_free(req); + + return ret; } /* @@ -283,13 +256,13 @@ static int lib80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) struct lib80211_ccmp_data *key = priv; u8 keyidx, *pos; struct ieee80211_hdr *hdr; - u8 *b0 = key->rx_b0; - u8 *b = key->rx_b; - u8 *a = key->rx_a; + struct aead_request *req; + struct scatterlist sg[2]; + u8 *aad = key->rx_aad; + u8 iv[AES_BLOCK_LEN]; u8 pn[6]; - int i, blocks, last, len; - size_t data_len = skb->len - hdr_len - CCMP_HDR_LEN - CCMP_MIC_LEN; - u8 *mic = skb->data + skb->len - CCMP_MIC_LEN; + int aad_len, ret; + size_t data_len = skb->len - hdr_len - CCMP_HDR_LEN; if (skb->len < hdr_len + CCMP_HDR_LEN + CCMP_MIC_LEN) { key->dot11RSNAStatsCCMPFormatErrors++; @@ -337,28 +310,26 @@ static int lib80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) return -4; } - ccmp_init_blocks(key->tfm, hdr, pn, data_len, b0, a, b); - xor_block(mic, b, CCMP_MIC_LEN); - - blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN); - last = data_len % AES_BLOCK_LEN; - - for (i = 1; i <= blocks; i++) { - len = (i == blocks && last) ? last : AES_BLOCK_LEN; - /* Decrypt, with counter */ - b0[14] = (i >> 8) & 0xff; - b0[15] = i & 0xff; - lib80211_ccmp_aes_encrypt(key->tfm, b0, b); - xor_block(pos, b, len); - /* Authentication */ - xor_block(a, pos, len); - lib80211_ccmp_aes_encrypt(key->tfm, a, a); - pos += len; - } + req = aead_request_alloc(key->tfm, GFP_ATOMIC); + if (!req) + return -ENOMEM; - if (memcmp(mic, a, CCMP_MIC_LEN) != 0) { - net_dbg_ratelimited("CCMP: decrypt failed: STA=%pM\n", - hdr->addr2); + aad_len = ccmp_init_iv_and_aad(hdr, pn, iv, aad); + + sg_init_table(sg, 2); + sg_set_buf(&sg[0], aad, aad_len); + sg_set_buf(&sg[1], pos, data_len); + + aead_request_set_callback(req, 0, NULL, NULL); + aead_request_set_ad(req, aad_len); + aead_request_set_crypt(req, sg, sg, data_len, iv); + + ret = crypto_aead_decrypt(req); + aead_request_free(req); + + if (ret) { + net_dbg_ratelimited("CCMP: decrypt failed: STA=%pM (%d)\n", + hdr->addr2, ret); key->dot11RSNAStatsCCMPDecryptErrors++; return -5; } @@ -377,7 +348,7 @@ static int lib80211_ccmp_set_key(void *key, int len, u8 * seq, void *priv) { struct lib80211_ccmp_data *data = priv; int keyidx; - struct crypto_cipher *tfm = data->tfm; + struct crypto_aead *tfm = data->tfm; keyidx = data->key_idx; memset(data, 0, sizeof(*data)); @@ -394,7 +365,9 @@ static int lib80211_ccmp_set_key(void *key, int len, u8 * seq, void *priv) data->rx_pn[4] = seq[1]; data->rx_pn[5] = seq[0]; } - crypto_cipher_setkey(data->tfm, data->key, CCMP_TK_LEN); + if (crypto_aead_setauthsize(data->tfm, CCMP_MIC_LEN) || + crypto_aead_setkey(data->tfm, data->key, CCMP_TK_LEN)) + return -1; } else if (len == 0) data->key_set = 0; else diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index fd05ae1437a9..d21b1581a665 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -281,7 +281,16 @@ nl80211_pmsr_attr_policy[NL80211_PMSR_ATTR_MAX + 1] = { NLA_POLICY_NESTED_ARRAY(nl80211_psmr_peer_attr_policy), }; +static const struct nla_policy +he_obss_pd_policy[NL80211_HE_OBSS_PD_ATTR_MAX + 1] = { + [NL80211_HE_OBSS_PD_ATTR_MIN_OFFSET] = + NLA_POLICY_RANGE(NLA_U8, 1, 20), + [NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET] = + NLA_POLICY_RANGE(NLA_U8, 1, 20), +}; + const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { + [0] = { .strict_start_type = NL80211_ATTR_HE_OBSS_PD }, [NL80211_ATTR_WIPHY] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_NAME] = { .type = NLA_NUL_STRING, .len = 20-1 }, @@ -289,6 +298,13 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_WIPHY_FREQ] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_CHANNEL_TYPE] = { .type = NLA_U32 }, + [NL80211_ATTR_WIPHY_EDMG_CHANNELS] = NLA_POLICY_RANGE(NLA_U8, + NL80211_EDMG_CHANNELS_MIN, + NL80211_EDMG_CHANNELS_MAX), + [NL80211_ATTR_WIPHY_EDMG_BW_CONFIG] = NLA_POLICY_RANGE(NLA_U8, + NL80211_EDMG_BW_CONFIG_MIN, + NL80211_EDMG_BW_CONFIG_MAX), + [NL80211_ATTR_CHANNEL_WIDTH] = { .type = NLA_U32 }, [NL80211_ATTR_CENTER_FREQ1] = { .type = NLA_U32 }, [NL80211_ATTR_CENTER_FREQ2] = { .type = NLA_U32 }, @@ -574,6 +590,7 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_SAE_PASSWORD] = { .type = NLA_BINARY, .len = SAE_PASSWORD_MAX_LEN }, [NL80211_ATTR_TWT_RESPONDER] = { .type = NLA_FLAG }, + [NL80211_ATTR_HE_OBSS_PD] = NLA_POLICY_NESTED(he_obss_pd_policy), }; /* policy for the key attributes */ @@ -667,6 +684,7 @@ static const struct nla_policy nl80211_match_band_rssi_policy[NUM_NL80211_BANDS] = { [NL80211_BAND_2GHZ] = { .type = NLA_S32 }, [NL80211_BAND_5GHZ] = { .type = NLA_S32 }, + [NL80211_BAND_6GHZ] = { .type = NLA_S32 }, [NL80211_BAND_60GHZ] = { .type = NLA_S32 }, }; @@ -749,17 +767,25 @@ int nl80211_prepare_wdev_dump(struct netlink_callback *cb, int err; if (!cb->args[0]) { + struct nlattr **attrbuf; + + attrbuf = kcalloc(NUM_NL80211_ATTR, sizeof(*attrbuf), + GFP_KERNEL); + if (!attrbuf) + return -ENOMEM; + err = nlmsg_parse_deprecated(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, - genl_family_attrbuf(&nl80211_fam), - nl80211_fam.maxattr, + attrbuf, nl80211_fam.maxattr, nl80211_policy, NULL); - if (err) + if (err) { + kfree(attrbuf); return err; + } - *wdev = __cfg80211_wdev_from_attrs( - sock_net(cb->skb->sk), - genl_family_attrbuf(&nl80211_fam)); + *wdev = __cfg80211_wdev_from_attrs(sock_net(cb->skb->sk), + attrbuf); + kfree(attrbuf); if (IS_ERR(*wdev)) return PTR_ERR(*wdev); *rdev = wiphy_to_rdev((*wdev)->wiphy); @@ -1555,6 +1581,15 @@ static int nl80211_send_band_rateinfo(struct sk_buff *msg, nla_nest_end(msg, nl_iftype_data); } + /* add EDMG info */ + if (sband->edmg_cap.channels && + (nla_put_u8(msg, NL80211_BAND_ATTR_EDMG_CHANNELS, + sband->edmg_cap.channels) || + nla_put_u8(msg, NL80211_BAND_ATTR_EDMG_BW_CONFIG, + sband->edmg_cap.bw_config))) + + return -ENOBUFS; + /* add bitrates */ nl_rates = nla_nest_start_noflag(msg, NL80211_BAND_ATTR_RATES); if (!nl_rates) @@ -2065,6 +2100,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, CMD(add_tx_ts, ADD_TX_TS); CMD(set_multicast_to_unicast, SET_MULTICAST_TO_UNICAST); CMD(update_connect_params, UPDATE_CONNECT_PARAMS); + CMD(update_ft_ies, UPDATE_FT_IES); } #undef CMD @@ -2172,6 +2208,30 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, rdev->wiphy.vht_capa_mod_mask)) goto nla_put_failure; + if (nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, + rdev->wiphy.perm_addr)) + goto nla_put_failure; + + if (!is_zero_ether_addr(rdev->wiphy.addr_mask) && + nla_put(msg, NL80211_ATTR_MAC_MASK, ETH_ALEN, + rdev->wiphy.addr_mask)) + goto nla_put_failure; + + if (rdev->wiphy.n_addresses > 1) { + void *attr; + + attr = nla_nest_start(msg, NL80211_ATTR_MAC_ADDRS); + if (!attr) + goto nla_put_failure; + + for (i = 0; i < rdev->wiphy.n_addresses; i++) + if (nla_put(msg, i + 1, ETH_ALEN, + rdev->wiphy.addresses[i].addr)) + goto nla_put_failure; + + nla_nest_end(msg, attr); + } + state->split_start++; break; case 10: @@ -2366,14 +2426,21 @@ static int nl80211_dump_wiphy_parse(struct sk_buff *skb, struct netlink_callback *cb, struct nl80211_dump_wiphy_state *state) { - struct nlattr **tb = genl_family_attrbuf(&nl80211_fam); - int ret = nlmsg_parse_deprecated(cb->nlh, - GENL_HDRLEN + nl80211_fam.hdrsize, - tb, nl80211_fam.maxattr, - nl80211_policy, NULL); + struct nlattr **tb = kcalloc(NUM_NL80211_ATTR, sizeof(*tb), GFP_KERNEL); + int ret; + + if (!tb) + return -ENOMEM; + + ret = nlmsg_parse_deprecated(cb->nlh, + GENL_HDRLEN + nl80211_fam.hdrsize, + tb, nl80211_fam.maxattr, + nl80211_policy, NULL); /* ignore parse errors for backward compatibility */ - if (ret) - return 0; + if (ret) { + ret = 0; + goto out; + } state->split = tb[NL80211_ATTR_SPLIT_WIPHY_DUMP]; if (tb[NL80211_ATTR_WIPHY]) @@ -2386,8 +2453,10 @@ static int nl80211_dump_wiphy_parse(struct sk_buff *skb, int ifidx = nla_get_u32(tb[NL80211_ATTR_IFINDEX]); netdev = __dev_get_by_index(sock_net(skb->sk), ifidx); - if (!netdev) - return -ENODEV; + if (!netdev) { + ret = -ENODEV; + goto out; + } if (netdev->ieee80211_ptr) { rdev = wiphy_to_rdev( netdev->ieee80211_ptr->wiphy); @@ -2395,7 +2464,10 @@ static int nl80211_dump_wiphy_parse(struct sk_buff *skb, } } - return 0; + ret = 0; +out: + kfree(tb); + return ret; } static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) @@ -2622,6 +2694,18 @@ int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ2]); } + if (info->attrs[NL80211_ATTR_WIPHY_EDMG_CHANNELS]) { + chandef->edmg.channels = + nla_get_u8(info->attrs[NL80211_ATTR_WIPHY_EDMG_CHANNELS]); + + if (info->attrs[NL80211_ATTR_WIPHY_EDMG_BW_CONFIG]) + chandef->edmg.bw_config = + nla_get_u8(info->attrs[NL80211_ATTR_WIPHY_EDMG_BW_CONFIG]); + } else { + chandef->edmg.bw_config = 0; + chandef->edmg.channels = 0; + } + if (!cfg80211_chandef_valid(chandef)) { NL_SET_ERR_MSG(extack, "invalid channel definition"); return -EINVAL; @@ -4359,6 +4443,34 @@ static int nl80211_parse_beacon(struct cfg80211_registered_device *rdev, return 0; } +static int nl80211_parse_he_obss_pd(struct nlattr *attrs, + struct ieee80211_he_obss_pd *he_obss_pd) +{ + struct nlattr *tb[NL80211_HE_OBSS_PD_ATTR_MAX + 1]; + int err; + + err = nla_parse_nested(tb, NL80211_HE_OBSS_PD_ATTR_MAX, attrs, + he_obss_pd_policy, NULL); + if (err) + return err; + + if (!tb[NL80211_HE_OBSS_PD_ATTR_MIN_OFFSET] || + !tb[NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET]) + return -EINVAL; + + he_obss_pd->min_offset = + nla_get_u32(tb[NL80211_HE_OBSS_PD_ATTR_MIN_OFFSET]); + he_obss_pd->max_offset = + nla_get_u32(tb[NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET]); + + if (he_obss_pd->min_offset >= he_obss_pd->max_offset) + return -EINVAL; + + he_obss_pd->enable = true; + + return 0; +} + static void nl80211_check_ap_rate_selectors(struct cfg80211_ap_settings *params, const u8 *rates) { @@ -4643,6 +4755,14 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) params.twt_responder = nla_get_flag(info->attrs[NL80211_ATTR_TWT_RESPONDER]); + if (info->attrs[NL80211_ATTR_HE_OBSS_PD]) { + err = nl80211_parse_he_obss_pd( + info->attrs[NL80211_ATTR_HE_OBSS_PD], + ¶ms.he_obss_pd); + if (err) + return err; + } + nl80211_calculate_ap_params(¶ms); if (info->attrs[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT]) @@ -4941,6 +5061,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, PUT_SINFO(CONNECTED_TIME, connected_time, u32); PUT_SINFO(INACTIVE_TIME, inactive_time, u32); + PUT_SINFO_U64(ASSOC_AT_BOOTTIME, assoc_at); if (sinfo->filled & (BIT_ULL(NL80211_STA_INFO_RX_BYTES) | BIT_ULL(NL80211_STA_INFO_RX_BYTES64)) && @@ -8685,6 +8806,10 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq, nla_put_u64_64bit(msg, NL80211_SURVEY_INFO_TIME_SCAN, survey->time_scan, NL80211_SURVEY_INFO_PAD)) goto nla_put_failure; + if ((survey->filled & SURVEY_INFO_TIME_BSS_RX) && + nla_put_u64_64bit(msg, NL80211_SURVEY_INFO_TIME_BSS_RX, + survey->time_bss_rx, NL80211_SURVEY_INFO_PAD)) + goto nla_put_failure; nla_nest_end(msg, infoattr); @@ -8698,7 +8823,7 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq, static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb) { - struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam); + struct nlattr **attrbuf; struct survey_info survey; struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; @@ -8706,6 +8831,10 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb) int res; bool radio_stats; + attrbuf = kcalloc(NUM_NL80211_ATTR, sizeof(*attrbuf), GFP_KERNEL); + if (!attrbuf) + return -ENOMEM; + rtnl_lock(); res = nl80211_prepare_wdev_dump(cb, &rdev, &wdev); if (res) @@ -8750,6 +8879,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb) cb->args[2] = survey_idx; res = skb->len; out_err: + kfree(attrbuf); rtnl_unlock(); return res; } @@ -9609,6 +9739,7 @@ static int nl80211_testmode_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct cfg80211_registered_device *rdev; + struct nlattr **attrbuf = NULL; int err; long phy_idx; void *data = NULL; @@ -9629,7 +9760,12 @@ static int nl80211_testmode_dump(struct sk_buff *skb, goto out_err; } } else { - struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam); + attrbuf = kcalloc(NUM_NL80211_ATTR, sizeof(*attrbuf), + GFP_KERNEL); + if (!attrbuf) { + err = -ENOMEM; + goto out_err; + } err = nlmsg_parse_deprecated(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, @@ -9696,6 +9832,7 @@ static int nl80211_testmode_dump(struct sk_buff *skb, /* see above */ cb->args[0] = phy_idx + 1; out_err: + kfree(attrbuf); rtnl_unlock(); return err; } @@ -9790,6 +9927,15 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } + if (info->attrs[NL80211_ATTR_WIPHY_EDMG_CHANNELS]) { + connect.edmg.channels = + nla_get_u8(info->attrs[NL80211_ATTR_WIPHY_EDMG_CHANNELS]); + + if (info->attrs[NL80211_ATTR_WIPHY_EDMG_BW_CONFIG]) + connect.edmg.bw_config = + nla_get_u8(info->attrs[NL80211_ATTR_WIPHY_EDMG_BW_CONFIG]); + } + if (connect.privacy && info->attrs[NL80211_ATTR_KEYS]) { connkeys = nl80211_parse_connkeys(rdev, info, NULL); if (IS_ERR(connkeys)) @@ -10659,9 +10805,11 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev, hyst = wdev->cqm_config->rssi_hyst; n = wdev->cqm_config->n_rssi_thresholds; - for (i = 0; i < n; i++) + for (i = 0; i < n; i++) { + i = array_index_nospec(i, n); if (last < wdev->cqm_config->rssi_thresholds[i]) break; + } low_index = i - 1; if (low_index >= 0) { @@ -12789,7 +12937,7 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, struct cfg80211_registered_device **rdev, struct wireless_dev **wdev) { - struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam); + struct nlattr **attrbuf; u32 vid, subcmd; unsigned int i; int vcmd_idx = -1; @@ -12820,24 +12968,32 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, return 0; } + attrbuf = kcalloc(NUM_NL80211_ATTR, sizeof(*attrbuf), GFP_KERNEL); + if (!attrbuf) + return -ENOMEM; + err = nlmsg_parse_deprecated(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, attrbuf, nl80211_fam.maxattr, nl80211_policy, NULL); if (err) - return err; + goto out; if (!attrbuf[NL80211_ATTR_VENDOR_ID] || - !attrbuf[NL80211_ATTR_VENDOR_SUBCMD]) - return -EINVAL; + !attrbuf[NL80211_ATTR_VENDOR_SUBCMD]) { + err = -EINVAL; + goto out; + } *wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk), attrbuf); if (IS_ERR(*wdev)) *wdev = NULL; *rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), attrbuf); - if (IS_ERR(*rdev)) - return PTR_ERR(*rdev); + if (IS_ERR(*rdev)) { + err = PTR_ERR(*rdev); + goto out; + } vid = nla_get_u32(attrbuf[NL80211_ATTR_VENDOR_ID]); subcmd = nla_get_u32(attrbuf[NL80211_ATTR_VENDOR_SUBCMD]); @@ -12850,15 +13006,19 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, if (vcmd->info.vendor_id != vid || vcmd->info.subcmd != subcmd) continue; - if (!vcmd->dumpit) - return -EOPNOTSUPP; + if (!vcmd->dumpit) { + err = -EOPNOTSUPP; + goto out; + } vcmd_idx = i; break; } - if (vcmd_idx < 0) - return -EOPNOTSUPP; + if (vcmd_idx < 0) { + err = -EOPNOTSUPP; + goto out; + } if (attrbuf[NL80211_ATTR_VENDOR_DATA]) { data = nla_data(attrbuf[NL80211_ATTR_VENDOR_DATA]); @@ -12869,7 +13029,7 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, attrbuf[NL80211_ATTR_VENDOR_DATA], cb->extack); if (err) - return err; + goto out; } /* 0 is the first index - add 1 to parse only once */ @@ -12881,7 +13041,10 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, cb->args[4] = data_len; /* keep rtnl locked in successful case */ - return 0; + err = 0; +out: + kfree(attrbuf); + return err; } static int nl80211_vendor_cmd_dump(struct sk_buff *skb, @@ -14559,6 +14722,7 @@ static struct genl_family nl80211_fam __ro_after_init = { .n_ops = ARRAY_SIZE(nl80211_ops), .mcgrps = nl80211_mcgrps, .n_mcgrps = ARRAY_SIZE(nl80211_mcgrps), + .parallel_ops = true, }; /* notification functions */ @@ -14835,12 +14999,10 @@ void nl80211_common_reg_change_event(enum nl80211_commands cmd_id, return; hdr = nl80211hdr_put(msg, 0, 0, 0, cmd_id); - if (!hdr) { - nlmsg_free(msg); - return; - } + if (!hdr) + goto nla_put_failure; - if (nl80211_reg_change_event_fill(msg, request) == false) + if (!nl80211_reg_change_event_fill(msg, request)) goto nla_put_failure; genlmsg_end(msg, hdr); @@ -16090,7 +16252,9 @@ void cfg80211_ch_switch_notify(struct net_device *dev, if (wdev->iftype == NL80211_IFTYPE_STATION && !WARN_ON(!wdev->current_bss)) - wdev->current_bss->pub.channel = chandef->chan; + cfg80211_update_assoc_bss_entry(wdev, chandef->chan); + + cfg80211_sched_dfs_chan_update(rdev); nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL, NL80211_CMD_CH_SWITCH_NOTIFY, 0); diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 327479ce69f5..5311d0ae2454 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -3806,8 +3806,9 @@ void wiphy_regulatory_deregister(struct wiphy *wiphy) } /* - * See http://www.fcc.gov/document/5-ghz-unlicensed-spectrum-unii, for - * UNII band definitions + * See FCC notices for UNII band definitions + * 5GHz: https://www.fcc.gov/document/5-ghz-unlicensed-spectrum-unii + * 6GHz: https://www.fcc.gov/document/fcc-proposes-more-spectrum-unlicensed-use-0 */ int cfg80211_get_unii(int freq) { @@ -3831,6 +3832,22 @@ int cfg80211_get_unii(int freq) if (freq > 5725 && freq <= 5825) return 4; + /* UNII-5 */ + if (freq > 5925 && freq <= 6425) + return 5; + + /* UNII-6 */ + if (freq > 6425 && freq <= 6525) + return 6; + + /* UNII-7 */ + if (freq > 6525 && freq <= 6875) + return 7; + + /* UNII-8 */ + if (freq > 6875 && freq <= 7125) + return 8; + return -EINVAL; } diff --git a/net/wireless/scan.c b/net/wireless/scan.c index d66e6d4b7555..d313c9befa23 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1091,6 +1091,93 @@ struct cfg80211_non_tx_bss { u8 bssid_index; }; +static bool +cfg80211_update_known_bss(struct cfg80211_registered_device *rdev, + struct cfg80211_internal_bss *known, + struct cfg80211_internal_bss *new, + bool signal_valid) +{ + lockdep_assert_held(&rdev->bss_lock); + + /* Update IEs */ + if (rcu_access_pointer(new->pub.proberesp_ies)) { + const struct cfg80211_bss_ies *old; + + old = rcu_access_pointer(known->pub.proberesp_ies); + + rcu_assign_pointer(known->pub.proberesp_ies, + new->pub.proberesp_ies); + /* Override possible earlier Beacon frame IEs */ + rcu_assign_pointer(known->pub.ies, + new->pub.proberesp_ies); + if (old) + kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head); + } else if (rcu_access_pointer(new->pub.beacon_ies)) { + const struct cfg80211_bss_ies *old; + struct cfg80211_internal_bss *bss; + + if (known->pub.hidden_beacon_bss && + !list_empty(&known->hidden_list)) { + const struct cfg80211_bss_ies *f; + + /* The known BSS struct is one of the probe + * response members of a group, but we're + * receiving a beacon (beacon_ies in the new + * bss is used). This can only mean that the + * AP changed its beacon from not having an + * SSID to showing it, which is confusing so + * drop this information. + */ + + f = rcu_access_pointer(new->pub.beacon_ies); + kfree_rcu((struct cfg80211_bss_ies *)f, rcu_head); + return false; + } + + old = rcu_access_pointer(known->pub.beacon_ies); + + rcu_assign_pointer(known->pub.beacon_ies, new->pub.beacon_ies); + + /* Override IEs if they were from a beacon before */ + if (old == rcu_access_pointer(known->pub.ies)) + rcu_assign_pointer(known->pub.ies, new->pub.beacon_ies); + + /* Assign beacon IEs to all sub entries */ + list_for_each_entry(bss, &known->hidden_list, hidden_list) { + const struct cfg80211_bss_ies *ies; + + ies = rcu_access_pointer(bss->pub.beacon_ies); + WARN_ON(ies != old); + + rcu_assign_pointer(bss->pub.beacon_ies, + new->pub.beacon_ies); + } + + if (old) + kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head); + } + + known->pub.beacon_interval = new->pub.beacon_interval; + + /* don't update the signal if beacon was heard on + * adjacent channel. + */ + if (signal_valid) + known->pub.signal = new->pub.signal; + known->pub.capability = new->pub.capability; + known->ts = new->ts; + known->ts_boottime = new->ts_boottime; + known->parent_tsf = new->parent_tsf; + known->pub.chains = new->pub.chains; + memcpy(known->pub.chain_signal, new->pub.chain_signal, + IEEE80211_MAX_CHAINS); + ether_addr_copy(known->parent_bssid, new->parent_bssid); + known->pub.max_bssid_indicator = new->pub.max_bssid_indicator; + known->pub.bssid_index = new->pub.bssid_index; + + return true; +} + /* Returned bss is reference counted and must be cleaned up appropriately. */ struct cfg80211_internal_bss * cfg80211_bss_update(struct cfg80211_registered_device *rdev, @@ -1114,88 +1201,8 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev, found = rb_find_bss(rdev, tmp, BSS_CMP_REGULAR); if (found) { - /* Update IEs */ - if (rcu_access_pointer(tmp->pub.proberesp_ies)) { - const struct cfg80211_bss_ies *old; - - old = rcu_access_pointer(found->pub.proberesp_ies); - - rcu_assign_pointer(found->pub.proberesp_ies, - tmp->pub.proberesp_ies); - /* Override possible earlier Beacon frame IEs */ - rcu_assign_pointer(found->pub.ies, - tmp->pub.proberesp_ies); - if (old) - kfree_rcu((struct cfg80211_bss_ies *)old, - rcu_head); - } else if (rcu_access_pointer(tmp->pub.beacon_ies)) { - const struct cfg80211_bss_ies *old; - struct cfg80211_internal_bss *bss; - - if (found->pub.hidden_beacon_bss && - !list_empty(&found->hidden_list)) { - const struct cfg80211_bss_ies *f; - - /* - * The found BSS struct is one of the probe - * response members of a group, but we're - * receiving a beacon (beacon_ies in the tmp - * bss is used). This can only mean that the - * AP changed its beacon from not having an - * SSID to showing it, which is confusing so - * drop this information. - */ - - f = rcu_access_pointer(tmp->pub.beacon_ies); - kfree_rcu((struct cfg80211_bss_ies *)f, - rcu_head); - goto drop; - } - - old = rcu_access_pointer(found->pub.beacon_ies); - - rcu_assign_pointer(found->pub.beacon_ies, - tmp->pub.beacon_ies); - - /* Override IEs if they were from a beacon before */ - if (old == rcu_access_pointer(found->pub.ies)) - rcu_assign_pointer(found->pub.ies, - tmp->pub.beacon_ies); - - /* Assign beacon IEs to all sub entries */ - list_for_each_entry(bss, &found->hidden_list, - hidden_list) { - const struct cfg80211_bss_ies *ies; - - ies = rcu_access_pointer(bss->pub.beacon_ies); - WARN_ON(ies != old); - - rcu_assign_pointer(bss->pub.beacon_ies, - tmp->pub.beacon_ies); - } - - if (old) - kfree_rcu((struct cfg80211_bss_ies *)old, - rcu_head); - } - - found->pub.beacon_interval = tmp->pub.beacon_interval; - /* - * don't update the signal if beacon was heard on - * adjacent channel. - */ - if (signal_valid) - found->pub.signal = tmp->pub.signal; - found->pub.capability = tmp->pub.capability; - found->ts = tmp->ts; - found->ts_boottime = tmp->ts_boottime; - found->parent_tsf = tmp->parent_tsf; - found->pub.chains = tmp->pub.chains; - memcpy(found->pub.chain_signal, tmp->pub.chain_signal, - IEEE80211_MAX_CHAINS); - ether_addr_copy(found->parent_bssid, tmp->parent_bssid); - found->pub.max_bssid_indicator = tmp->pub.max_bssid_indicator; - found->pub.bssid_index = tmp->pub.bssid_index; + if (!cfg80211_update_known_bss(rdev, found, tmp, signal_valid)) + goto drop; } else { struct cfg80211_internal_bss *new; struct cfg80211_internal_bss *hidden; @@ -1368,6 +1375,7 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, struct cfg80211_internal_bss tmp = {}, *res; int bss_type; bool signal_valid; + unsigned long ts; if (WARN_ON(!wiphy)) return NULL; @@ -1390,8 +1398,11 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, tmp.ts_boottime = data->boottime_ns; if (non_tx_data) { tmp.pub.transmitted_bss = non_tx_data->tx_bss; + ts = bss_from_pub(non_tx_data->tx_bss)->ts; tmp.pub.bssid_index = non_tx_data->bssid_index; tmp.pub.max_bssid_indicator = non_tx_data->max_bssid_indicator; + } else { + ts = jiffies; } /* @@ -1425,8 +1436,7 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, signal_valid = abs(data->chan->center_freq - channel->center_freq) <= wiphy->max_adj_channel_rssi_comp; - res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid, - jiffies); + res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid, ts); if (!res) return NULL; @@ -1440,7 +1450,7 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, regulatory_hint_found_beacon(wiphy, channel, gfp); } - if (non_tx_data && non_tx_data->tx_bss) { + if (non_tx_data) { /* this is a nontransmitting bss, we need to add it to * transmitting bss' list if it is not there */ @@ -1659,6 +1669,8 @@ cfg80211_inform_bss_data(struct wiphy *wiphy, res = cfg80211_inform_single_bss_data(wiphy, data, ftype, bssid, tsf, capability, beacon_interval, ie, ielen, NULL, gfp); + if (!res) + return NULL; non_tx_data.tx_bss = res; cfg80211_parse_mbssid_data(wiphy, data, ftype, bssid, tsf, beacon_interval, ie, ielen, &non_tx_data, @@ -1776,7 +1788,6 @@ static struct cfg80211_bss * cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy, struct cfg80211_inform_bss *data, struct ieee80211_mgmt *mgmt, size_t len, - struct cfg80211_non_tx_bss *non_tx_data, gfp_t gfp) { struct cfg80211_internal_bss tmp = {}, *res; @@ -1835,11 +1846,6 @@ cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy, tmp.pub.chains = data->chains; memcpy(tmp.pub.chain_signal, data->chain_signal, IEEE80211_MAX_CHAINS); ether_addr_copy(tmp.parent_bssid, data->parent_bssid); - if (non_tx_data) { - tmp.pub.transmitted_bss = non_tx_data->tx_bss; - tmp.pub.bssid_index = non_tx_data->bssid_index; - tmp.pub.max_bssid_indicator = non_tx_data->max_bssid_indicator; - } signal_valid = abs(data->chan->center_freq - channel->center_freq) <= wiphy->max_adj_channel_rssi_comp; @@ -1877,7 +1883,7 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, struct cfg80211_non_tx_bss non_tx_data; res = cfg80211_inform_single_bss_frame_data(wiphy, data, mgmt, - len, NULL, gfp); + len, gfp); if (!res || !wiphy->support_mbssid || !cfg80211_find_ie(WLAN_EID_MULTIPLE_BSSID, ie, ielen)) return res; @@ -1995,6 +2001,85 @@ void cfg80211_bss_iter(struct wiphy *wiphy, } EXPORT_SYMBOL(cfg80211_bss_iter); +void cfg80211_update_assoc_bss_entry(struct wireless_dev *wdev, + struct ieee80211_channel *chan) +{ + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + struct cfg80211_internal_bss *cbss = wdev->current_bss; + struct cfg80211_internal_bss *new = NULL; + struct cfg80211_internal_bss *bss; + struct cfg80211_bss *nontrans_bss; + struct cfg80211_bss *tmp; + + spin_lock_bh(&rdev->bss_lock); + + if (WARN_ON(cbss->pub.channel == chan)) + goto done; + + /* use transmitting bss */ + if (cbss->pub.transmitted_bss) + cbss = container_of(cbss->pub.transmitted_bss, + struct cfg80211_internal_bss, + pub); + + cbss->pub.channel = chan; + + list_for_each_entry(bss, &rdev->bss_list, list) { + if (!cfg80211_bss_type_match(bss->pub.capability, + bss->pub.channel->band, + wdev->conn_bss_type)) + continue; + + if (bss == cbss) + continue; + + if (!cmp_bss(&bss->pub, &cbss->pub, BSS_CMP_REGULAR)) { + new = bss; + break; + } + } + + if (new) { + /* to save time, update IEs for transmitting bss only */ + if (cfg80211_update_known_bss(rdev, cbss, new, false)) { + new->pub.proberesp_ies = NULL; + new->pub.beacon_ies = NULL; + } + + list_for_each_entry_safe(nontrans_bss, tmp, + &new->pub.nontrans_list, + nontrans_list) { + bss = container_of(nontrans_bss, + struct cfg80211_internal_bss, pub); + if (__cfg80211_unlink_bss(rdev, bss)) + rdev->bss_generation++; + } + + WARN_ON(atomic_read(&new->hold)); + if (!WARN_ON(!__cfg80211_unlink_bss(rdev, new))) + rdev->bss_generation++; + } + + rb_erase(&cbss->rbn, &rdev->bss_tree); + rb_insert_bss(rdev, cbss); + rdev->bss_generation++; + + list_for_each_entry_safe(nontrans_bss, tmp, + &cbss->pub.nontrans_list, + nontrans_list) { + bss = container_of(nontrans_bss, + struct cfg80211_internal_bss, pub); + bss->pub.channel = chan; + rb_erase(&bss->rbn, &rdev->bss_tree); + rb_insert_bss(rdev, bss); + rdev->bss_generation++; + } + +done: + spin_unlock_bh(&rdev->bss_lock); +} + #ifdef CONFIG_CFG80211_WEXT static struct cfg80211_registered_device * cfg80211_get_dev_from_ifindex(struct net *net, int ifindex) diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 4fbb91a511ae..d98ad2b3143b 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2446,10 +2446,11 @@ TRACE_EVENT(rdev_set_mcast_rate, sizeof(int) * NUM_NL80211_BANDS); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " - "mcast_rates [2.4GHz=0x%x, 5.2GHz=0x%x, 60GHz=0x%x]", + "mcast_rates [2.4GHz=0x%x, 5.2GHz=0x%x, 6GHz=0x%x, 60GHz=0x%x]", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->mcast_rate[NL80211_BAND_2GHZ], __entry->mcast_rate[NL80211_BAND_5GHZ], + __entry->mcast_rate[NL80211_BAND_6GHZ], __entry->mcast_rate[NL80211_BAND_60GHZ]) ); diff --git a/net/wireless/util.c b/net/wireless/util.c index e74837824cea..419eb12c1e93 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -91,6 +91,11 @@ int ieee80211_channel_to_frequency(int chan, enum nl80211_band band) else return 5000 + chan * 5; break; + case NL80211_BAND_6GHZ: + /* see 802.11ax D4.1 27.3.22.2 */ + if (chan <= 253) + return 5940 + chan * 5; + break; case NL80211_BAND_60GHZ: if (chan < 7) return 56160 + chan * 2160; @@ -111,8 +116,11 @@ int ieee80211_frequency_to_channel(int freq) return (freq - 2407) / 5; else if (freq >= 4910 && freq <= 4980) return (freq - 4000) / 5; - else if (freq <= 45000) /* DMG band lower limit */ + else if (freq < 5945) return (freq - 5000) / 5; + else if (freq <= 45000) /* DMG band lower limit */ + /* see 802.11ax D4.1 27.3.22.2 */ + return (freq - 5940) / 5; else if (freq >= 58320 && freq <= 70200) return (freq - 56160) / 2160; else @@ -148,6 +156,7 @@ static void set_mandatory_flags_band(struct ieee80211_supported_band *sband) switch (sband->band) { case NL80211_BAND_5GHZ: + case NL80211_BAND_6GHZ: want = 3; for (i = 0; i < sband->n_bitrates; i++) { if (sband->bitrates[i].bitrate == 60 || @@ -960,6 +969,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, } cfg80211_process_rdev_events(rdev); + cfg80211_mlme_purge_registrations(dev->ieee80211_ptr); } err = rdev_change_virtual_intf(rdev, dev, ntype, params); @@ -1039,7 +1049,7 @@ static u32 cfg80211_calculate_bitrate_ht(struct rate_info *rate) return (bitrate + 50000) / 100000; } -static u32 cfg80211_calculate_bitrate_60g(struct rate_info *rate) +static u32 cfg80211_calculate_bitrate_dmg(struct rate_info *rate) { static const u32 __mcs2bitrate[] = { /* control PHY */ @@ -1086,6 +1096,40 @@ static u32 cfg80211_calculate_bitrate_60g(struct rate_info *rate) return __mcs2bitrate[rate->mcs]; } +static u32 cfg80211_calculate_bitrate_edmg(struct rate_info *rate) +{ + static const u32 __mcs2bitrate[] = { + /* control PHY */ + [0] = 275, + /* SC PHY */ + [1] = 3850, + [2] = 7700, + [3] = 9625, + [4] = 11550, + [5] = 12512, /* 1251.25 mbps */ + [6] = 13475, + [7] = 15400, + [8] = 19250, + [9] = 23100, + [10] = 25025, + [11] = 26950, + [12] = 30800, + [13] = 38500, + [14] = 46200, + [15] = 50050, + [16] = 53900, + [17] = 57750, + [18] = 69300, + [19] = 75075, + [20] = 80850, + }; + + if (WARN_ON_ONCE(rate->mcs >= ARRAY_SIZE(__mcs2bitrate))) + return 0; + + return __mcs2bitrate[rate->mcs] * rate->n_bonded_ch; +} + static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate) { static const u32 base[4][10] = { @@ -1258,8 +1302,10 @@ u32 cfg80211_calculate_bitrate(struct rate_info *rate) { if (rate->flags & RATE_INFO_FLAGS_MCS) return cfg80211_calculate_bitrate_ht(rate); - if (rate->flags & RATE_INFO_FLAGS_60G) - return cfg80211_calculate_bitrate_60g(rate); + if (rate->flags & RATE_INFO_FLAGS_DMG) + return cfg80211_calculate_bitrate_dmg(rate); + if (rate->flags & RATE_INFO_FLAGS_EDMG) + return cfg80211_calculate_bitrate_edmg(rate); if (rate->flags & RATE_INFO_FLAGS_VHT_MCS) return cfg80211_calculate_bitrate_vht(rate); if (rate->flags & RATE_INFO_FLAGS_HE_MCS) @@ -1471,6 +1517,9 @@ bool ieee80211_operating_class_to_band(u8 operating_class, case 128 ... 130: *band = NL80211_BAND_5GHZ; return true; + case 131 ... 135: + *band = NL80211_BAND_6GHZ; + return true; case 81: case 82: case 83: diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 46e4d69db845..7b6529d81c61 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -7,6 +7,7 @@ * we directly assign the wireless handlers of wireless interfaces. * * Copyright 2008-2009 Johannes Berg <johannes@sipsolutions.net> + * Copyright (C) 2019 Intel Corporation */ #include <linux/export.h> @@ -864,8 +865,8 @@ static int cfg80211_wext_siwtxpower(struct net_device *dev, } } } else { - rfkill_set_sw_state(rdev->rfkill, true); - schedule_work(&rdev->rfkill_sync); + if (rfkill_set_sw_state(rdev->rfkill, true)) + schedule_work(&rdev->rfkill_block); return 0; } diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 688aac7a6943..947b8ff0227e 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -14,6 +14,7 @@ #include <linux/netdevice.h> #include <linux/rtnetlink.h> #include <linux/idr.h> +#include <linux/vmalloc.h> #include "xdp_umem.h" #include "xsk_queue.h" @@ -105,14 +106,22 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, umem->dev = dev; umem->queue_id = queue_id; + if (flags & XDP_USE_NEED_WAKEUP) { + umem->flags |= XDP_UMEM_USES_NEED_WAKEUP; + /* Tx needs to be explicitly woken up the first time. + * Also for supporting drivers that do not implement this + * feature. They will always have to call sendto(). + */ + xsk_set_tx_need_wakeup(umem); + } + dev_hold(dev); if (force_copy) /* For copy-mode, we are done. */ return 0; - if (!dev->netdev_ops->ndo_bpf || - !dev->netdev_ops->ndo_xsk_async_xmit) { + if (!dev->netdev_ops->ndo_bpf || !dev->netdev_ops->ndo_xsk_wakeup) { err = -EOPNOTSUPP; goto err_unreg_umem; } @@ -164,6 +173,37 @@ void xdp_umem_clear_dev(struct xdp_umem *umem) umem->zc = false; } +static void xdp_umem_unmap_pages(struct xdp_umem *umem) +{ + unsigned int i; + + for (i = 0; i < umem->npgs; i++) + if (PageHighMem(umem->pgs[i])) + vunmap(umem->pages[i].addr); +} + +static int xdp_umem_map_pages(struct xdp_umem *umem) +{ + unsigned int i; + void *addr; + + for (i = 0; i < umem->npgs; i++) { + if (PageHighMem(umem->pgs[i])) + addr = vmap(&umem->pgs[i], 1, VM_MAP, PAGE_KERNEL); + else + addr = page_address(umem->pgs[i]); + + if (!addr) { + xdp_umem_unmap_pages(umem); + return -ENOMEM; + } + + umem->pages[i].addr = addr; + } + + return 0; +} + static void xdp_umem_unpin_pages(struct xdp_umem *umem) { unsigned int i; @@ -207,6 +247,7 @@ static void xdp_umem_release(struct xdp_umem *umem) xsk_reuseq_destroy(umem); + xdp_umem_unmap_pages(umem); xdp_umem_unpin_pages(umem); kfree(umem->pages); @@ -299,10 +340,11 @@ static int xdp_umem_account_pages(struct xdp_umem *umem) static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) { + bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; u32 chunk_size = mr->chunk_size, headroom = mr->headroom; unsigned int chunks, chunks_per_page; u64 addr = mr->addr, size = mr->len; - int size_chk, err, i; + int size_chk, err; if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) { /* Strictly speaking we could support this, if: @@ -314,7 +356,11 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) return -EINVAL; } - if (!is_power_of_2(chunk_size)) + if (mr->flags & ~(XDP_UMEM_UNALIGNED_CHUNK_FLAG | + XDP_UMEM_USES_NEED_WAKEUP)) + return -EINVAL; + + if (!unaligned_chunks && !is_power_of_2(chunk_size)) return -EINVAL; if (!PAGE_ALIGNED(addr)) { @@ -331,9 +377,11 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) if (chunks == 0) return -EINVAL; - chunks_per_page = PAGE_SIZE / chunk_size; - if (chunks < chunks_per_page || chunks % chunks_per_page) - return -EINVAL; + if (!unaligned_chunks) { + chunks_per_page = PAGE_SIZE / chunk_size; + if (chunks < chunks_per_page || chunks % chunks_per_page) + return -EINVAL; + } headroom = ALIGN(headroom, 64); @@ -342,13 +390,15 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) return -EINVAL; umem->address = (unsigned long)addr; - umem->chunk_mask = ~((u64)chunk_size - 1); + umem->chunk_mask = unaligned_chunks ? XSK_UNALIGNED_BUF_ADDR_MASK + : ~((u64)chunk_size - 1); umem->size = size; umem->headroom = headroom; umem->chunk_size_nohr = chunk_size - headroom; umem->npgs = size / PAGE_SIZE; umem->pgs = NULL; umem->user = NULL; + umem->flags = mr->flags; INIT_LIST_HEAD(&umem->xsk_list); spin_lock_init(&umem->xsk_list_lock); @@ -368,10 +418,11 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) goto out_pin; } - for (i = 0; i < umem->npgs; i++) - umem->pages[i].addr = page_address(umem->pgs[i]); + err = xdp_umem_map_pages(umem); + if (!err) + return 0; - return 0; + kfree(umem->pages); out_pin: xdp_umem_unpin_pages(umem); diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 59b57d708697..c2f1af3b6a7c 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -45,7 +45,7 @@ EXPORT_SYMBOL(xsk_umem_has_addrs); u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr) { - return xskq_peek_addr(umem->fq, addr); + return xskq_peek_addr(umem->fq, addr, umem); } EXPORT_SYMBOL(xsk_umem_peek_addr); @@ -55,21 +55,103 @@ void xsk_umem_discard_addr(struct xdp_umem *umem) } EXPORT_SYMBOL(xsk_umem_discard_addr); +void xsk_set_rx_need_wakeup(struct xdp_umem *umem) +{ + if (umem->need_wakeup & XDP_WAKEUP_RX) + return; + + umem->fq->ring->flags |= XDP_RING_NEED_WAKEUP; + umem->need_wakeup |= XDP_WAKEUP_RX; +} +EXPORT_SYMBOL(xsk_set_rx_need_wakeup); + +void xsk_set_tx_need_wakeup(struct xdp_umem *umem) +{ + struct xdp_sock *xs; + + if (umem->need_wakeup & XDP_WAKEUP_TX) + return; + + rcu_read_lock(); + list_for_each_entry_rcu(xs, &umem->xsk_list, list) { + xs->tx->ring->flags |= XDP_RING_NEED_WAKEUP; + } + rcu_read_unlock(); + + umem->need_wakeup |= XDP_WAKEUP_TX; +} +EXPORT_SYMBOL(xsk_set_tx_need_wakeup); + +void xsk_clear_rx_need_wakeup(struct xdp_umem *umem) +{ + if (!(umem->need_wakeup & XDP_WAKEUP_RX)) + return; + + umem->fq->ring->flags &= ~XDP_RING_NEED_WAKEUP; + umem->need_wakeup &= ~XDP_WAKEUP_RX; +} +EXPORT_SYMBOL(xsk_clear_rx_need_wakeup); + +void xsk_clear_tx_need_wakeup(struct xdp_umem *umem) +{ + struct xdp_sock *xs; + + if (!(umem->need_wakeup & XDP_WAKEUP_TX)) + return; + + rcu_read_lock(); + list_for_each_entry_rcu(xs, &umem->xsk_list, list) { + xs->tx->ring->flags &= ~XDP_RING_NEED_WAKEUP; + } + rcu_read_unlock(); + + umem->need_wakeup &= ~XDP_WAKEUP_TX; +} +EXPORT_SYMBOL(xsk_clear_tx_need_wakeup); + +bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem) +{ + return umem->flags & XDP_UMEM_USES_NEED_WAKEUP; +} +EXPORT_SYMBOL(xsk_umem_uses_need_wakeup); + +/* If a buffer crosses a page boundary, we need to do 2 memcpy's, one for + * each page. This is only required in copy mode. + */ +static void __xsk_rcv_memcpy(struct xdp_umem *umem, u64 addr, void *from_buf, + u32 len, u32 metalen) +{ + void *to_buf = xdp_umem_get_data(umem, addr); + + addr = xsk_umem_add_offset_to_addr(addr); + if (xskq_crosses_non_contig_pg(umem, addr, len + metalen)) { + void *next_pg_addr = umem->pages[(addr >> PAGE_SHIFT) + 1].addr; + u64 page_start = addr & ~(PAGE_SIZE - 1); + u64 first_len = PAGE_SIZE - (addr - page_start); + + memcpy(to_buf, from_buf, first_len + metalen); + memcpy(next_pg_addr, from_buf + first_len, len - first_len); + + return; + } + + memcpy(to_buf, from_buf, len + metalen); +} + static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) { - void *to_buf, *from_buf; + u64 offset = xs->umem->headroom; + u64 addr, memcpy_addr; + void *from_buf; u32 metalen; - u64 addr; int err; - if (!xskq_peek_addr(xs->umem->fq, &addr) || + if (!xskq_peek_addr(xs->umem->fq, &addr, xs->umem) || len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) { xs->rx_dropped++; return -ENOSPC; } - addr += xs->umem->headroom; - if (unlikely(xdp_data_meta_unsupported(xdp))) { from_buf = xdp->data; metalen = 0; @@ -78,9 +160,11 @@ static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) metalen = xdp->data - xdp->data_meta; } - to_buf = xdp_umem_get_data(xs->umem, addr); - memcpy(to_buf, from_buf, len + metalen); - addr += metalen; + memcpy_addr = xsk_umem_adjust_offset(xs->umem, addr, offset); + __xsk_rcv_memcpy(xs->umem, memcpy_addr, from_buf, len, metalen); + + offset += metalen; + addr = xsk_umem_adjust_offset(xs->umem, addr, offset); err = xskq_produce_batch_desc(xs->rx, addr, len); if (!err) { xskq_discard_addr(xs->umem->fq); @@ -102,10 +186,23 @@ static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) return err; } +static bool xsk_is_bound(struct xdp_sock *xs) +{ + if (READ_ONCE(xs->state) == XSK_BOUND) { + /* Matches smp_wmb() in bind(). */ + smp_rmb(); + return true; + } + return false; +} + int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) { u32 len; + if (!xsk_is_bound(xs)) + return -EINVAL; + if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index) return -EINVAL; @@ -125,6 +222,7 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) { u32 metalen = xdp->data - xdp->data_meta; u32 len = xdp->data_end - xdp->data; + u64 offset = xs->umem->headroom; void *buffer; u64 addr; int err; @@ -136,17 +234,17 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) goto out_unlock; } - if (!xskq_peek_addr(xs->umem->fq, &addr) || + if (!xskq_peek_addr(xs->umem->fq, &addr, xs->umem) || len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) { err = -ENOSPC; goto out_drop; } - addr += xs->umem->headroom; - + addr = xsk_umem_adjust_offset(xs->umem, addr, offset); buffer = xdp_umem_get_data(xs->umem, addr); memcpy(buffer, xdp->data_meta, len + metalen); - addr += metalen; + + addr = xsk_umem_adjust_offset(xs->umem, addr, metalen); err = xskq_produce_batch_desc(xs->rx, addr, len); if (err) goto out_drop; @@ -190,7 +288,7 @@ bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc) rcu_read_lock(); list_for_each_entry_rcu(xs, &umem->xsk_list, list) { - if (!xskq_peek_desc(xs->tx, desc)) + if (!xskq_peek_desc(xs->tx, desc, umem)) continue; if (xskq_produce_addr_lazy(umem->cq, desc->addr)) @@ -212,7 +310,8 @@ static int xsk_zc_xmit(struct sock *sk) struct xdp_sock *xs = xdp_sk(sk); struct net_device *dev = xs->dev; - return dev->netdev_ops->ndo_xsk_async_xmit(dev, xs->queue_id); + return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, + XDP_WAKEUP_TX); } static void xsk_destruct_skb(struct sk_buff *skb) @@ -243,7 +342,7 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m, if (xs->queue_id >= xs->dev->real_num_tx_queues) goto out; - while (xskq_peek_desc(xs->tx, &desc)) { + while (xskq_peek_desc(xs->tx, &desc, xs->umem)) { char *buffer; u64 addr; u32 len; @@ -272,7 +371,7 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m, skb->dev = xs->dev; skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - skb_shinfo(skb)->destructor_arg = (void *)(long)addr; + skb_shinfo(skb)->destructor_arg = (void *)(long)desc.addr; skb->destructor = xsk_destruct_skb; err = dev_direct_xmit(skb, xs->queue_id); @@ -301,7 +400,7 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) struct sock *sk = sock->sk; struct xdp_sock *xs = xdp_sk(sk); - if (unlikely(!xs->dev)) + if (unlikely(!xsk_is_bound(xs))) return -ENXIO; if (unlikely(!(xs->dev->flags & IFF_UP))) return -ENETDOWN; @@ -317,8 +416,19 @@ static unsigned int xsk_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait) { unsigned int mask = datagram_poll(file, sock, wait); - struct sock *sk = sock->sk; - struct xdp_sock *xs = xdp_sk(sk); + struct xdp_sock *xs = xdp_sk(sock->sk); + struct net_device *dev; + struct xdp_umem *umem; + + if (unlikely(!xsk_is_bound(xs))) + return mask; + + dev = xs->dev; + umem = xs->umem; + + if (umem->need_wakeup) + dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, + umem->need_wakeup); if (xs->rx && !xskq_empty_desc(xs->rx)) mask |= POLLIN | POLLRDNORM; @@ -342,7 +452,7 @@ static int xsk_init_queue(u32 entries, struct xsk_queue **queue, /* Make sure queue is ready before it can be seen by others */ smp_wmb(); - *queue = q; + WRITE_ONCE(*queue, q); return 0; } @@ -350,10 +460,9 @@ static void xsk_unbind_dev(struct xdp_sock *xs) { struct net_device *dev = xs->dev; - if (!dev || xs->state != XSK_BOUND) + if (xs->state != XSK_BOUND) return; - - xs->state = XSK_UNBOUND; + WRITE_ONCE(xs->state, XSK_UNBOUND); /* Wait for driver to stop using the xdp socket. */ xdp_del_sk_umem(xs->umem, xs); @@ -362,6 +471,52 @@ static void xsk_unbind_dev(struct xdp_sock *xs) dev_put(dev); } +static struct xsk_map *xsk_get_map_list_entry(struct xdp_sock *xs, + struct xdp_sock ***map_entry) +{ + struct xsk_map *map = NULL; + struct xsk_map_node *node; + + *map_entry = NULL; + + spin_lock_bh(&xs->map_list_lock); + node = list_first_entry_or_null(&xs->map_list, struct xsk_map_node, + node); + if (node) { + WARN_ON(xsk_map_inc(node->map)); + map = node->map; + *map_entry = node->map_entry; + } + spin_unlock_bh(&xs->map_list_lock); + return map; +} + +static void xsk_delete_from_maps(struct xdp_sock *xs) +{ + /* This function removes the current XDP socket from all the + * maps it resides in. We need to take extra care here, due to + * the two locks involved. Each map has a lock synchronizing + * updates to the entries, and each socket has a lock that + * synchronizes access to the list of maps (map_list). For + * deadlock avoidance the locks need to be taken in the order + * "map lock"->"socket map list lock". We start off by + * accessing the socket map list, and take a reference to the + * map to guarantee existence between the + * xsk_get_map_list_entry() and xsk_map_try_sock_delete() + * calls. Then we ask the map to remove the socket, which + * tries to remove the socket from the map. Note that there + * might be updates to the map between + * xsk_get_map_list_entry() and xsk_map_try_sock_delete(). + */ + struct xdp_sock **map_entry = NULL; + struct xsk_map *map; + + while ((map = xsk_get_map_list_entry(xs, &map_entry))) { + xsk_map_try_sock_delete(map, xs, map_entry); + xsk_map_put(map); + } +} + static int xsk_release(struct socket *sock) { struct sock *sk = sock->sk; @@ -381,7 +536,10 @@ static int xsk_release(struct socket *sock) sock_prot_inuse_add(net, sk->sk_prot, -1); local_bh_enable(); + xsk_delete_from_maps(xs); + mutex_lock(&xs->mutex); xsk_unbind_dev(xs); + mutex_unlock(&xs->mutex); xskq_destroy(xs->rx); xskq_destroy(xs->tx); @@ -412,6 +570,24 @@ static struct socket *xsk_lookup_xsk_from_fd(int fd) return sock; } +/* Check if umem pages are contiguous. + * If zero-copy mode, use the DMA address to do the page contiguity check + * For all other modes we use addr (kernel virtual address) + * Store the result in the low bits of addr. + */ +static void xsk_check_page_contiguity(struct xdp_umem *umem, u32 flags) +{ + struct xdp_umem_page *pgs = umem->pages; + int i, is_contig; + + for (i = 0; i < umem->npgs - 1; i++) { + is_contig = (flags & XDP_ZEROCOPY) ? + (pgs[i].dma + PAGE_SIZE == pgs[i + 1].dma) : + (pgs[i].addr + PAGE_SIZE == pgs[i + 1].addr); + pgs[i].addr += is_contig << XSK_NEXT_PG_CONTIG_SHIFT; + } +} + static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) { struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr; @@ -427,7 +603,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) return -EINVAL; flags = sxdp->sxdp_flags; - if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY)) + if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY | + XDP_USE_NEED_WAKEUP)) return -EINVAL; rtnl_lock(); @@ -454,7 +631,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) struct xdp_sock *umem_xs; struct socket *sock; - if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY)) { + if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY) || + (flags & XDP_USE_NEED_WAKEUP)) { /* Cannot specify flags for shared sockets. */ err = -EINVAL; goto out_unlock; @@ -473,19 +651,19 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) } umem_xs = xdp_sk(sock->sk); - if (!umem_xs->umem) { - /* No umem to inherit. */ + if (!xsk_is_bound(umem_xs)) { err = -EBADF; sockfd_put(sock); goto out_unlock; - } else if (umem_xs->dev != dev || umem_xs->queue_id != qid) { + } + if (umem_xs->dev != dev || umem_xs->queue_id != qid) { err = -EINVAL; sockfd_put(sock); goto out_unlock; } xdp_get_umem(umem_xs->umem); - xs->umem = umem_xs->umem; + WRITE_ONCE(xs->umem, umem_xs->umem); sockfd_put(sock); } else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) { err = -EINVAL; @@ -500,6 +678,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) err = xdp_umem_assign_dev(xs->umem, dev, qid, flags); if (err) goto out_unlock; + + xsk_check_page_contiguity(xs->umem, flags); } xs->dev = dev; @@ -510,16 +690,28 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) xdp_add_sk_umem(xs->umem, xs); out_unlock: - if (err) + if (err) { dev_put(dev); - else - xs->state = XSK_BOUND; + } else { + /* Matches smp_rmb() in bind() for shared umem + * sockets, and xsk_is_bound(). + */ + smp_wmb(); + WRITE_ONCE(xs->state, XSK_BOUND); + } out_release: mutex_unlock(&xs->mutex); rtnl_unlock(); return err; } +struct xdp_umem_reg_v1 { + __u64 addr; /* Start of packet data area */ + __u64 len; /* Length of packet data area */ + __u32 chunk_size; + __u32 headroom; +}; + static int xsk_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) { @@ -549,15 +741,24 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, } q = (optname == XDP_TX_RING) ? &xs->tx : &xs->rx; err = xsk_init_queue(entries, q, false); + if (!err && optname == XDP_TX_RING) + /* Tx needs to be explicitly woken up the first time */ + xs->tx->ring->flags |= XDP_RING_NEED_WAKEUP; mutex_unlock(&xs->mutex); return err; } case XDP_UMEM_REG: { - struct xdp_umem_reg mr; + size_t mr_size = sizeof(struct xdp_umem_reg); + struct xdp_umem_reg mr = {}; struct xdp_umem *umem; - if (copy_from_user(&mr, optval, sizeof(mr))) + if (optlen < sizeof(struct xdp_umem_reg_v1)) + return -EINVAL; + else if (optlen < sizeof(mr)) + mr_size = sizeof(struct xdp_umem_reg_v1); + + if (copy_from_user(&mr, optval, mr_size)) return -EFAULT; mutex_lock(&xs->mutex); @@ -574,7 +775,7 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, /* Make sure umem is ready before it can be seen by others */ smp_wmb(); - xs->umem = umem; + WRITE_ONCE(xs->umem, umem); mutex_unlock(&xs->mutex); return 0; } @@ -610,6 +811,20 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, return -ENOPROTOOPT; } +static void xsk_enter_rxtx_offsets(struct xdp_ring_offset_v1 *ring) +{ + ring->producer = offsetof(struct xdp_rxtx_ring, ptrs.producer); + ring->consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer); + ring->desc = offsetof(struct xdp_rxtx_ring, desc); +} + +static void xsk_enter_umem_offsets(struct xdp_ring_offset_v1 *ring) +{ + ring->producer = offsetof(struct xdp_umem_ring, ptrs.producer); + ring->consumer = offsetof(struct xdp_umem_ring, ptrs.consumer); + ring->desc = offsetof(struct xdp_umem_ring, desc); +} + static int xsk_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { @@ -649,26 +864,49 @@ static int xsk_getsockopt(struct socket *sock, int level, int optname, case XDP_MMAP_OFFSETS: { struct xdp_mmap_offsets off; + struct xdp_mmap_offsets_v1 off_v1; + bool flags_supported = true; + void *to_copy; - if (len < sizeof(off)) + if (len < sizeof(off_v1)) return -EINVAL; + else if (len < sizeof(off)) + flags_supported = false; + + if (flags_supported) { + /* xdp_ring_offset is identical to xdp_ring_offset_v1 + * except for the flags field added to the end. + */ + xsk_enter_rxtx_offsets((struct xdp_ring_offset_v1 *) + &off.rx); + xsk_enter_rxtx_offsets((struct xdp_ring_offset_v1 *) + &off.tx); + xsk_enter_umem_offsets((struct xdp_ring_offset_v1 *) + &off.fr); + xsk_enter_umem_offsets((struct xdp_ring_offset_v1 *) + &off.cr); + off.rx.flags = offsetof(struct xdp_rxtx_ring, + ptrs.flags); + off.tx.flags = offsetof(struct xdp_rxtx_ring, + ptrs.flags); + off.fr.flags = offsetof(struct xdp_umem_ring, + ptrs.flags); + off.cr.flags = offsetof(struct xdp_umem_ring, + ptrs.flags); + + len = sizeof(off); + to_copy = &off; + } else { + xsk_enter_rxtx_offsets(&off_v1.rx); + xsk_enter_rxtx_offsets(&off_v1.tx); + xsk_enter_umem_offsets(&off_v1.fr); + xsk_enter_umem_offsets(&off_v1.cr); + + len = sizeof(off_v1); + to_copy = &off_v1; + } - off.rx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer); - off.rx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer); - off.rx.desc = offsetof(struct xdp_rxtx_ring, desc); - off.tx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer); - off.tx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer); - off.tx.desc = offsetof(struct xdp_rxtx_ring, desc); - - off.fr.producer = offsetof(struct xdp_umem_ring, ptrs.producer); - off.fr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer); - off.fr.desc = offsetof(struct xdp_umem_ring, desc); - off.cr.producer = offsetof(struct xdp_umem_ring, ptrs.producer); - off.cr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer); - off.cr.desc = offsetof(struct xdp_umem_ring, desc); - - len = sizeof(off); - if (copy_to_user(optval, &off, len)) + if (copy_to_user(optval, to_copy, len)) return -EFAULT; if (put_user(len, optlen)) return -EFAULT; @@ -713,7 +951,7 @@ static int xsk_mmap(struct file *file, struct socket *sock, unsigned long pfn; struct page *qpg; - if (xs->state != XSK_READY) + if (READ_ONCE(xs->state) != XSK_READY) return -EBUSY; if (offset == XDP_PGOFF_RX_RING) { @@ -855,6 +1093,9 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol, spin_lock_init(&xs->rx_lock); spin_lock_init(&xs->tx_completion_lock); + INIT_LIST_HEAD(&xs->map_list); + spin_lock_init(&xs->map_list_lock); + mutex_lock(&net->xdp.lock); sk_add_node_rcu(sk, &net->xdp.list); mutex_unlock(&net->xdp.lock); diff --git a/net/xdp/xsk.h b/net/xdp/xsk.h index ba8120610426..4cfd106bdb53 100644 --- a/net/xdp/xsk.h +++ b/net/xdp/xsk.h @@ -4,6 +4,19 @@ #ifndef XSK_H_ #define XSK_H_ +struct xdp_ring_offset_v1 { + __u64 producer; + __u64 consumer; + __u64 desc; +}; + +struct xdp_mmap_offsets_v1 { + struct xdp_ring_offset_v1 rx; + struct xdp_ring_offset_v1 tx; + struct xdp_ring_offset_v1 fr; + struct xdp_ring_offset_v1 cr; +}; + static inline struct xdp_sock *xdp_sk(struct sock *sk) { return (struct xdp_sock *)sk; diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c index d5e06c8e0cbf..f59791ba43a0 100644 --- a/net/xdp/xsk_diag.c +++ b/net/xdp/xsk_diag.c @@ -56,7 +56,7 @@ static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb) du.id = umem->id; du.size = umem->size; du.num_pages = umem->npgs; - du.chunk_size = (__u32)(~umem->chunk_mask + 1); + du.chunk_size = umem->chunk_size_nohr + umem->headroom; du.headroom = umem->headroom; du.ifindex = umem->dev ? umem->dev->ifindex : 0; du.queue_id = umem->queue_id; @@ -97,6 +97,7 @@ static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb, msg->xdiag_ino = sk_ino; sock_diag_save_cookie(sk, msg->xdiag_cookie); + mutex_lock(&xs->mutex); if ((req->xdiag_show & XDP_SHOW_INFO) && xsk_diag_put_info(xs, nlskb)) goto out_nlmsg_trim; @@ -117,10 +118,12 @@ static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb, sock_diag_put_meminfo(sk, nlskb, XDP_DIAG_MEMINFO)) goto out_nlmsg_trim; + mutex_unlock(&xs->mutex); nlmsg_end(nlskb, nlh); return 0; out_nlmsg_trim: + mutex_unlock(&xs->mutex); nlmsg_cancel(nlskb, nlh); return -EMSGSIZE; } diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index 909c5168ed0f..eddae4688862 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -16,6 +16,7 @@ struct xdp_ring { u32 producer ____cacheline_aligned_in_smp; u32 consumer ____cacheline_aligned_in_smp; + u32 flags; }; /* Used for the RX and TX queues for packets */ @@ -133,6 +134,17 @@ static inline bool xskq_has_addrs(struct xsk_queue *q, u32 cnt) /* UMEM queue */ +static inline bool xskq_crosses_non_contig_pg(struct xdp_umem *umem, u64 addr, + u64 length) +{ + bool cross_pg = (addr & (PAGE_SIZE - 1)) + length > PAGE_SIZE; + bool next_pg_contig = + (unsigned long)umem->pages[(addr >> PAGE_SHIFT)].addr & + XSK_NEXT_PG_CONTIG_MASK; + + return cross_pg && !next_pg_contig; +} + static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr) { if (addr >= q->size) { @@ -143,23 +155,51 @@ static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr) return true; } -static inline u64 *xskq_validate_addr(struct xsk_queue *q, u64 *addr) +static inline bool xskq_is_valid_addr_unaligned(struct xsk_queue *q, u64 addr, + u64 length, + struct xdp_umem *umem) +{ + u64 base_addr = xsk_umem_extract_addr(addr); + + addr = xsk_umem_add_offset_to_addr(addr); + if (base_addr >= q->size || addr >= q->size || + xskq_crosses_non_contig_pg(umem, addr, length)) { + q->invalid_descs++; + return false; + } + + return true; +} + +static inline u64 *xskq_validate_addr(struct xsk_queue *q, u64 *addr, + struct xdp_umem *umem) { while (q->cons_tail != q->cons_head) { struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; unsigned int idx = q->cons_tail & q->ring_mask; *addr = READ_ONCE(ring->desc[idx]) & q->chunk_mask; + + if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) { + if (xskq_is_valid_addr_unaligned(q, *addr, + umem->chunk_size_nohr, + umem)) + return addr; + goto out; + } + if (xskq_is_valid_addr(q, *addr)) return addr; +out: q->cons_tail++; } return NULL; } -static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr) +static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr, + struct xdp_umem *umem) { if (q->cons_tail == q->cons_head) { smp_mb(); /* D, matches A */ @@ -170,7 +210,7 @@ static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr) smp_rmb(); } - return xskq_validate_addr(q, addr); + return xskq_validate_addr(q, addr, umem); } static inline void xskq_discard_addr(struct xsk_queue *q) @@ -229,8 +269,21 @@ static inline int xskq_reserve_addr(struct xsk_queue *q) /* Rx/Tx queue */ -static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d) +static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d, + struct xdp_umem *umem) { + if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) { + if (!xskq_is_valid_addr_unaligned(q, d->addr, d->len, umem)) + return false; + + if (d->len > umem->chunk_size_nohr || d->options) { + q->invalid_descs++; + return false; + } + + return true; + } + if (!xskq_is_valid_addr(q, d->addr)) return false; @@ -244,14 +297,15 @@ static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d) } static inline struct xdp_desc *xskq_validate_desc(struct xsk_queue *q, - struct xdp_desc *desc) + struct xdp_desc *desc, + struct xdp_umem *umem) { while (q->cons_tail != q->cons_head) { struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; unsigned int idx = q->cons_tail & q->ring_mask; *desc = READ_ONCE(ring->desc[idx]); - if (xskq_is_valid_desc(q, desc)) + if (xskq_is_valid_desc(q, desc, umem)) return desc; q->cons_tail++; @@ -261,7 +315,8 @@ static inline struct xdp_desc *xskq_validate_desc(struct xsk_queue *q, } static inline struct xdp_desc *xskq_peek_desc(struct xsk_queue *q, - struct xdp_desc *desc) + struct xdp_desc *desc, + struct xdp_umem *umem) { if (q->cons_tail == q->cons_head) { smp_mb(); /* D, matches A */ @@ -272,7 +327,7 @@ static inline struct xdp_desc *xskq_peek_desc(struct xsk_queue *q, smp_rmb(); /* C, matches B */ } - return xskq_validate_desc(q, desc); + return xskq_validate_desc(q, desc, umem); } static inline void xskq_discard_desc(struct xsk_queue *q) diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c index 32c364d3bfb3..4d422447aadc 100644 --- a/net/xfrm/xfrm_ipcomp.c +++ b/net/xfrm/xfrm_ipcomp.c @@ -85,7 +85,7 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb) if (dlen < len) len = dlen; - frag->page_offset = 0; + skb_frag_off_set(frag, 0); skb_frag_size_set(frag, len); memcpy(skb_frag_address(frag), scratch, len); |