diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-02-25 12:06:25 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-02-25 12:06:25 -0800 |
commit | 5ad3dbab569ac39e88fae31690401895c37368b6 (patch) | |
tree | 43301ebd31b499939a2c7c955468afada309ea33 | |
parent | 268f77b5250998b871fa54a2a9703871fb44544e (diff) | |
parent | 6cf739131a15e4177e58a1b4f2bede9d5da78552 (diff) |
Merge tag 'net-5.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Jakub Kicinski:
"Rather small batch this time.
Current release - regressions:
- bcm63xx_enet: fix sporadic kernel panic due to queue length
mis-accounting
Current release - new code bugs:
- bcm4908_enet: fix RX path possible mem leak
- bcm4908_enet: fix NAPI poll returned value
- stmmac: fix missing spin_lock_init in visconti_eth_dwmac_probe()
- sched: cls_flower: validate ct_state for invalid and reply flags
Previous releases - regressions:
- net: introduce CAN specific pointer in the struct net_device to
prevent mis-interpreting memory
- phy: micrel: set soft_reset callback to genphy_soft_reset for
KSZ8081
- psample: fix netlink skb length with tunnel info
Previous releases - always broken:
- icmp: pass zeroed opts from icmp{,v6}_ndo_send before sending
- wireguard: device: do not generate ICMP for non-IP packets
- mptcp: provide subflow aware release function to avoid a mem leak
- hsr: add support for EntryForgetTime
- r8169: fix jumbo packet handling on RTL8168e
- octeontx2-af: fix an off by one in rvu_dbg_qsize_write()
- i40e: fix flow for IPv6 next header (extension header)
- phy: icplus: call phy_restore_page() when phy_select_page() fails
- dpaa_eth: fix the access method for the dpaa_napi_portal"
* tag 'net-5.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (55 commits)
r8169: fix jumbo packet handling on RTL8168e
net: phy: micrel: set soft_reset callback to genphy_soft_reset for KSZ8081
net: psample: Fix netlink skb length with tunnel info
net: broadcom: bcm4908_enet: fix NAPI poll returned value
net: broadcom: bcm4908_enet: fix RX path possible mem leak
net: hsr: add support for EntryForgetTime
net: dsa: sja1105: Remove unneeded cast in sja1105_crc32()
ibmvnic: fix a race between open and reset
net: stmmac: Fix missing spin_lock_init in visconti_eth_dwmac_probe()
net: introduce CAN specific pointer in the struct net_device
net: usb: qmi_wwan: support ZTE P685M modem
wireguard: kconfig: use arm chacha even with no neon
wireguard: queueing: get rid of per-peer ring buffers
wireguard: device: do not generate ICMP for non-IP packets
wireguard: peer: put frequently used members above cache lines
wireguard: selftests: test multiple parallel streams
wireguard: socket: remove bogus __be32 annotation
wireguard: avoid double unlikely() notation when using IS_ERR()
net: qrtr: Fix memory leak in qrtr_tun_open
vxlan: move debug check after netdev unregister
...
68 files changed, 734 insertions, 371 deletions
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 45d12b0e9a2f..b09bed554f26 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -88,7 +88,7 @@ config WIREGUARD select CRYPTO_CURVE25519_X86 if X86 && 64BIT select ARM_CRYPTO if ARM select ARM64_CRYPTO if ARM64 - select CRYPTO_CHACHA20_NEON if (ARM || ARM64) && KERNEL_MODE_NEON + select CRYPTO_CHACHA20_NEON if ARM || (ARM64 && KERNEL_MODE_NEON) select CRYPTO_POLY1305_NEON if ARM64 && KERNEL_MODE_NEON select CRYPTO_POLY1305_ARM if ARM select CRYPTO_BLAKE2S_ARM if ARM diff --git a/drivers/net/can/dev/dev.c b/drivers/net/can/dev/dev.c index d9281ae853f8..311d8564d611 100644 --- a/drivers/net/can/dev/dev.c +++ b/drivers/net/can/dev/dev.c @@ -239,6 +239,7 @@ void can_setup(struct net_device *dev) struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max, unsigned int txqs, unsigned int rxqs) { + struct can_ml_priv *can_ml; struct net_device *dev; struct can_priv *priv; int size; @@ -270,7 +271,8 @@ struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max, priv = netdev_priv(dev); priv->dev = dev; - dev->ml_priv = (void *)priv + ALIGN(sizeof_priv, NETDEV_ALIGN); + can_ml = (void *)priv + ALIGN(sizeof_priv, NETDEV_ALIGN); + can_set_ml_priv(dev, can_ml); if (echo_skb_max) { priv->echo_skb_max = echo_skb_max; diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c index a1bd1be09548..30c8d53c9745 100644 --- a/drivers/net/can/slcan.c +++ b/drivers/net/can/slcan.c @@ -516,6 +516,7 @@ static struct slcan *slc_alloc(void) int i; char name[IFNAMSIZ]; struct net_device *dev = NULL; + struct can_ml_priv *can_ml; struct slcan *sl; int size; @@ -538,7 +539,8 @@ static struct slcan *slc_alloc(void) dev->base_addr = i; sl = netdev_priv(dev); - dev->ml_priv = (void *)sl + ALIGN(sizeof(*sl), NETDEV_ALIGN); + can_ml = (void *)sl + ALIGN(sizeof(*sl), NETDEV_ALIGN); + can_set_ml_priv(dev, can_ml); /* Initialize channel control data */ sl->magic = SLCAN_MAGIC; diff --git a/drivers/net/can/vcan.c b/drivers/net/can/vcan.c index 39ca14b0585d..067705e2850b 100644 --- a/drivers/net/can/vcan.c +++ b/drivers/net/can/vcan.c @@ -153,7 +153,7 @@ static void vcan_setup(struct net_device *dev) dev->addr_len = 0; dev->tx_queue_len = 0; dev->flags = IFF_NOARP; - dev->ml_priv = netdev_priv(dev); + can_set_ml_priv(dev, netdev_priv(dev)); /* set flags according to driver capabilities */ if (echo) diff --git a/drivers/net/can/vxcan.c b/drivers/net/can/vxcan.c index f9a524c5f6d6..8861a7d875e7 100644 --- a/drivers/net/can/vxcan.c +++ b/drivers/net/can/vxcan.c @@ -141,6 +141,8 @@ static const struct net_device_ops vxcan_netdev_ops = { static void vxcan_setup(struct net_device *dev) { + struct can_ml_priv *can_ml; + dev->type = ARPHRD_CAN; dev->mtu = CANFD_MTU; dev->hard_header_len = 0; @@ -149,7 +151,9 @@ static void vxcan_setup(struct net_device *dev) dev->flags = (IFF_NOARP|IFF_ECHO); dev->netdev_ops = &vxcan_netdev_ops; dev->needs_free_netdev = true; - dev->ml_priv = netdev_priv(dev) + ALIGN(sizeof(struct vxcan_priv), NETDEV_ALIGN); + + can_ml = netdev_priv(dev) + ALIGN(sizeof(struct vxcan_priv), NETDEV_ALIGN); + can_set_ml_priv(dev, can_ml); } /* forward declaration for rtnl_create_link() */ diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index ae86ded1e2a1..a162499bcafc 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -543,6 +543,19 @@ static void b53_port_set_mcast_flood(struct b53_device *dev, int port, b53_write16(dev, B53_CTRL_PAGE, B53_IPMC_FLOOD_MASK, mc); } +static void b53_port_set_learning(struct b53_device *dev, int port, + bool learning) +{ + u16 reg; + + b53_read16(dev, B53_CTRL_PAGE, B53_DIS_LEARNING, ®); + if (learning) + reg &= ~BIT(port); + else + reg |= BIT(port); + b53_write16(dev, B53_CTRL_PAGE, B53_DIS_LEARNING, reg); +} + int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy) { struct b53_device *dev = ds->priv; @@ -557,6 +570,7 @@ int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy) b53_port_set_ucast_flood(dev, port, true); b53_port_set_mcast_flood(dev, port, true); + b53_port_set_learning(dev, port, false); if (dev->ops->irq_enable) ret = dev->ops->irq_enable(dev, port); @@ -691,6 +705,7 @@ static void b53_enable_cpu_port(struct b53_device *dev, int port) b53_port_set_ucast_flood(dev, port, true); b53_port_set_mcast_flood(dev, port, true); + b53_port_set_learning(dev, port, false); } static void b53_enable_mib(struct b53_device *dev) @@ -1953,19 +1968,20 @@ void b53_br_fast_age(struct dsa_switch *ds, int port) } EXPORT_SYMBOL(b53_br_fast_age); -static int b53_br_flags_pre(struct dsa_switch *ds, int port, - struct switchdev_brport_flags flags, - struct netlink_ext_ack *extack) +int b53_br_flags_pre(struct dsa_switch *ds, int port, + struct switchdev_brport_flags flags, + struct netlink_ext_ack *extack) { - if (flags.mask & ~(BR_FLOOD | BR_MCAST_FLOOD)) + if (flags.mask & ~(BR_FLOOD | BR_MCAST_FLOOD | BR_LEARNING)) return -EINVAL; return 0; } +EXPORT_SYMBOL(b53_br_flags_pre); -static int b53_br_flags(struct dsa_switch *ds, int port, - struct switchdev_brport_flags flags, - struct netlink_ext_ack *extack) +int b53_br_flags(struct dsa_switch *ds, int port, + struct switchdev_brport_flags flags, + struct netlink_ext_ack *extack) { if (flags.mask & BR_FLOOD) b53_port_set_ucast_flood(ds->priv, port, @@ -1973,17 +1989,22 @@ static int b53_br_flags(struct dsa_switch *ds, int port, if (flags.mask & BR_MCAST_FLOOD) b53_port_set_mcast_flood(ds->priv, port, !!(flags.val & BR_MCAST_FLOOD)); + if (flags.mask & BR_LEARNING) + b53_port_set_learning(ds->priv, port, + !!(flags.val & BR_LEARNING)); return 0; } +EXPORT_SYMBOL(b53_br_flags); -static int b53_set_mrouter(struct dsa_switch *ds, int port, bool mrouter, - struct netlink_ext_ack *extack) +int b53_set_mrouter(struct dsa_switch *ds, int port, bool mrouter, + struct netlink_ext_ack *extack) { b53_port_set_mcast_flood(ds->priv, port, mrouter); return 0; } +EXPORT_SYMBOL(b53_set_mrouter); static bool b53_possible_cpu_port(struct dsa_switch *ds, int port) { diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index faf983fbca82..8419bb7f4505 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -326,6 +326,14 @@ int b53_br_join(struct dsa_switch *ds, int port, struct net_device *bridge); void b53_br_leave(struct dsa_switch *ds, int port, struct net_device *bridge); void b53_br_set_stp_state(struct dsa_switch *ds, int port, u8 state); void b53_br_fast_age(struct dsa_switch *ds, int port); +int b53_br_flags_pre(struct dsa_switch *ds, int port, + struct switchdev_brport_flags flags, + struct netlink_ext_ack *extack); +int b53_br_flags(struct dsa_switch *ds, int port, + struct switchdev_brport_flags flags, + struct netlink_ext_ack *extack); +int b53_set_mrouter(struct dsa_switch *ds, int port, bool mrouter, + struct netlink_ext_ack *extack); int b53_setup_devlink_resources(struct dsa_switch *ds); void b53_port_event(struct dsa_switch *ds, int port); void b53_phylink_validate(struct dsa_switch *ds, int port, diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h index c90985c294a2..b2c539a42154 100644 --- a/drivers/net/dsa/b53/b53_regs.h +++ b/drivers/net/dsa/b53/b53_regs.h @@ -115,6 +115,7 @@ #define B53_UC_FLOOD_MASK 0x32 #define B53_MC_FLOOD_MASK 0x34 #define B53_IPMC_FLOOD_MASK 0x36 +#define B53_DIS_LEARNING 0x3c /* * Override Ports 0-7 State on devices with xMII interfaces (8 bit) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 1857aa9aa84a..5ee8103b8e9c 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -223,23 +223,10 @@ static int bcm_sf2_port_setup(struct dsa_switch *ds, int port, reg &= ~P_TXQ_PSM_VDD(port); core_writel(priv, reg, CORE_MEM_PSM_VDD_CTRL); - /* Enable learning */ - reg = core_readl(priv, CORE_DIS_LEARN); - reg &= ~BIT(port); - core_writel(priv, reg, CORE_DIS_LEARN); - /* Enable Broadcom tags for that port if requested */ - if (priv->brcm_tag_mask & BIT(port)) { + if (priv->brcm_tag_mask & BIT(port)) b53_brcm_hdr_setup(ds, port); - /* Disable learning on ASP port */ - if (port == 7) { - reg = core_readl(priv, CORE_DIS_LEARN); - reg |= BIT(port); - core_writel(priv, reg, CORE_DIS_LEARN); - } - } - /* Configure Traffic Class to QoS mapping, allow each priority to map * to a different queue number */ @@ -1117,7 +1104,10 @@ static const struct dsa_switch_ops bcm_sf2_ops = { .set_mac_eee = b53_set_mac_eee, .port_bridge_join = b53_br_join, .port_bridge_leave = b53_br_leave, + .port_pre_bridge_flags = b53_br_flags_pre, + .port_bridge_flags = b53_br_flags, .port_stp_state_set = b53_br_set_stp_state, + .port_set_mrouter = b53_set_mrouter, .port_fast_age = b53_br_fast_age, .port_vlan_filtering = b53_vlan_filtering, .port_vlan_add = b53_vlan_add, diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.c b/drivers/net/dsa/sja1105/sja1105_static_config.c index 139b7b4fbd0d..a8efb7fac395 100644 --- a/drivers/net/dsa/sja1105/sja1105_static_config.c +++ b/drivers/net/dsa/sja1105/sja1105_static_config.c @@ -85,7 +85,7 @@ u32 sja1105_crc32(const void *buf, size_t len) /* seed */ crc = ~0; for (i = 0; i < len; i += 4) { - sja1105_unpack((void *)buf + i, &word, 31, 0, 4); + sja1105_unpack(buf + i, &word, 31, 0, 4); crc = crc32_le(crc, (u8 *)&word, 4); } return ~crc; diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c index dd5c8a9038bb..a60ce9030581 100644 --- a/drivers/net/ethernet/atheros/ag71xx.c +++ b/drivers/net/ethernet/atheros/ag71xx.c @@ -223,8 +223,6 @@ #define AG71XX_REG_RX_SM 0x01b0 #define AG71XX_REG_TX_SM 0x01b4 -#define ETH_SWITCH_HEADER_LEN 2 - #define AG71XX_DEFAULT_MSG_ENABLE \ (NETIF_MSG_DRV \ | NETIF_MSG_PROBE \ @@ -933,7 +931,7 @@ static void ag71xx_hw_setup(struct ag71xx *ag) static unsigned int ag71xx_max_frame_len(unsigned int mtu) { - return ETH_SWITCH_HEADER_LEN + ETH_HLEN + VLAN_HLEN + mtu + ETH_FCS_LEN; + return ETH_HLEN + VLAN_HLEN + mtu + ETH_FCS_LEN; } static void ag71xx_hw_set_macaddr(struct ag71xx *ag, unsigned char *mac) diff --git a/drivers/net/ethernet/broadcom/bcm4908_enet.c b/drivers/net/ethernet/broadcom/bcm4908_enet.c index 9be33dc98072..0b70e9e0ddad 100644 --- a/drivers/net/ethernet/broadcom/bcm4908_enet.c +++ b/drivers/net/ethernet/broadcom/bcm4908_enet.c @@ -570,6 +570,7 @@ static int bcm4908_enet_poll(struct napi_struct *napi, int weight) if (len < ETH_ZLEN || (ctl & (DMA_CTL_STATUS_SOP | DMA_CTL_STATUS_EOP)) != (DMA_CTL_STATUS_SOP | DMA_CTL_STATUS_EOP)) { + kfree_skb(slot.skb); enet->netdev->stats.rx_dropped++; break; } @@ -582,6 +583,8 @@ static int bcm4908_enet_poll(struct napi_struct *napi, int weight) enet->netdev->stats.rx_packets++; enet->netdev->stats.rx_bytes += len; + + handled++; } if (handled < weight) { diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index fd8767213165..977f097fc7bf 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -1192,7 +1192,6 @@ static int bcm_enet_stop(struct net_device *dev) kdev = &priv->pdev->dev; netif_stop_queue(dev); - netdev_reset_queue(dev); napi_disable(&priv->napi); if (priv->has_phy) phy_stop(dev->phydev); @@ -1231,6 +1230,9 @@ static int bcm_enet_stop(struct net_device *dev) if (priv->has_phy) phy_disconnect(dev->phydev); + /* reset BQL after forced tx reclaim to prevent kernel panic */ + netdev_reset_queue(dev); + return 0; } @@ -2343,7 +2345,6 @@ static int bcm_enetsw_stop(struct net_device *dev) del_timer_sync(&priv->swphy_poll); netif_stop_queue(dev); - netdev_reset_queue(dev); napi_disable(&priv->napi); del_timer_sync(&priv->rx_timeout); @@ -2371,6 +2372,9 @@ static int bcm_enetsw_stop(struct net_device *dev) free_irq(priv->irq_tx, dev); free_irq(priv->irq_rx, dev); + /* reset BQL after forced tx reclaim to prevent kernel panic */ + netdev_reset_queue(dev); + return 0; } diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index ccfe52a50a66..720dc99bd1fc 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2670,7 +2670,6 @@ static enum qman_cb_dqrr_result rx_default_dqrr(struct qman_portal *portal, u32 hash; u64 ns; - np = container_of(&portal, struct dpaa_napi_portal, p); dpaa_fq = container_of(fq, struct dpaa_fq, fq_base); fd_status = be32_to_cpu(fd->status); fd_format = qm_fd_get_format(fd); @@ -2685,6 +2684,7 @@ static enum qman_cb_dqrr_result rx_default_dqrr(struct qman_portal *portal, percpu_priv = this_cpu_ptr(priv->percpu_priv); percpu_stats = &percpu_priv->stats; + np = &percpu_priv->np; if (unlikely(dpaa_eth_napi_schedule(percpu_priv, portal, sched_napi))) return qman_cb_dqrr_stop; diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 1c0e4beb56e7..118a4bd3f877 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1172,12 +1172,25 @@ static int ibmvnic_open(struct net_device *netdev) struct ibmvnic_adapter *adapter = netdev_priv(netdev); int rc; - /* If device failover is pending, just set device state and return. - * Device operation will be handled by reset routine. + ASSERT_RTNL(); + + /* If device failover is pending or we are about to reset, just set + * device state and return. Device operation will be handled by reset + * routine. + * + * It should be safe to overwrite the adapter->state here. Since + * we hold the rtnl, either the reset has not actually started or + * the rtnl got dropped during the set_link_state() in do_reset(). + * In the former case, no one else is changing the state (again we + * have the rtnl) and in the latter case, do_reset() will detect and + * honor our setting below. */ - if (adapter->failover_pending) { + if (adapter->failover_pending || (test_bit(0, &adapter->resetting))) { + netdev_dbg(netdev, "[S:%d FOP:%d] Resetting, deferring open\n", + adapter->state, adapter->failover_pending); adapter->state = VNIC_OPEN; - return 0; + rc = 0; + goto out; } if (adapter->state != VNIC_CLOSED) { @@ -1196,10 +1209,12 @@ static int ibmvnic_open(struct net_device *netdev) rc = __ibmvnic_open(netdev); out: - /* If open fails due to a pending failover, set device state and - * return. Device operation will be handled by reset routine. + /* If open failed and there is a pending failover or in-progress reset, + * set device state and return. Device operation will be handled by + * reset routine. See also comments above regarding rtnl. */ - if (rc && adapter->failover_pending) { + if (rc && + (adapter->failover_pending || (test_bit(0, &adapter->resetting)))) { adapter->state = VNIC_OPEN; rc = 0; } @@ -1928,6 +1943,14 @@ static int do_reset(struct ibmvnic_adapter *adapter, if (rwi->reset_reason == VNIC_RESET_FAILOVER) adapter->failover_pending = false; + /* read the state and check (again) after getting rtnl */ + reset_state = adapter->state; + + if (reset_state == VNIC_REMOVING || reset_state == VNIC_REMOVED) { + rc = -EBUSY; + goto out; + } + netif_carrier_off(netdev); old_num_rx_queues = adapter->req_rx_queues; @@ -1958,11 +1981,27 @@ static int do_reset(struct ibmvnic_adapter *adapter, if (rc) goto out; + if (adapter->state == VNIC_OPEN) { + /* When we dropped rtnl, ibmvnic_open() got + * it and noticed that we are resetting and + * set the adapter state to OPEN. Update our + * new "target" state, and resume the reset + * from VNIC_CLOSING state. + */ + netdev_dbg(netdev, + "Open changed state from %d, updating.\n", + reset_state); + reset_state = VNIC_OPEN; + adapter->state = VNIC_CLOSING; + } + if (adapter->state != VNIC_CLOSING) { + /* If someone else changed the adapter state + * when we dropped the rtnl, fail the reset + */ rc = -1; goto out; } - adapter->state = VNIC_CLOSED; } } @@ -2106,6 +2145,14 @@ static int do_hard_reset(struct ibmvnic_adapter *adapter, netdev_dbg(adapter->netdev, "Hard resetting driver (%d)\n", rwi->reset_reason); + /* read the state and check (again) after getting rtnl */ + reset_state = adapter->state; + + if (reset_state == VNIC_REMOVING || reset_state == VNIC_REMOVED) { + rc = -EBUSY; + goto out; + } + netif_carrier_off(netdev); adapter->reset_reason = rwi->reset_reason; diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index a8a2b5f683a2..c70dec65a572 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -5083,7 +5083,7 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags) enum i40e_admin_queue_err adq_err; struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; - bool is_reset_needed; + u32 reset_needed = 0; i40e_status status; u32 i, j; @@ -5128,9 +5128,11 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags) flags_complete: changed_flags = orig_flags ^ new_flags; - is_reset_needed = !!(changed_flags & (I40E_FLAG_VEB_STATS_ENABLED | - I40E_FLAG_LEGACY_RX | I40E_FLAG_SOURCE_PRUNING_DISABLED | - I40E_FLAG_DISABLE_FW_LLDP)); + if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) + reset_needed = I40E_PF_RESET_AND_REBUILD_FLAG; + if (changed_flags & (I40E_FLAG_VEB_STATS_ENABLED | + I40E_FLAG_LEGACY_RX | I40E_FLAG_SOURCE_PRUNING_DISABLED)) + reset_needed = BIT(__I40E_PF_RESET_REQUESTED); /* Before we finalize any flag changes, we need to perform some * checks to ensure that the changes are supported and safe. @@ -5252,7 +5254,7 @@ flags_complete: case I40E_AQ_RC_EEXIST: dev_warn(&pf->pdev->dev, "FW LLDP agent is already running\n"); - is_reset_needed = false; + reset_needed = 0; break; case I40E_AQ_RC_EPERM: dev_warn(&pf->pdev->dev, @@ -5281,8 +5283,8 @@ flags_complete: /* Issue reset to cause things to take effect, as additional bits * are added we will need to create a mask of bits requiring reset */ - if (is_reset_needed) - i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED), true); + if (reset_needed) + i40e_do_reset(pf, reset_needed, true); return 0; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 8bb8eb65add9..353deae139f9 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -2616,7 +2616,7 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf) return; if (!test_and_clear_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state)) return; - if (test_and_set_bit(__I40E_VF_DISABLE, pf->state)) { + if (test_bit(__I40E_VF_DISABLE, pf->state)) { set_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state); return; } @@ -2634,7 +2634,6 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf) } } } - clear_bit(__I40E_VF_DISABLE, pf->state); } /** @@ -5937,7 +5936,7 @@ static int i40e_add_channel(struct i40e_pf *pf, u16 uplink_seid, ch->enabled_tc = !i40e_is_channel_macvlan(ch) && enabled_tc; ch->seid = ctxt.seid; ch->vsi_number = ctxt.vsi_number; - ch->stat_counter_idx = cpu_to_le16(ctxt.info.stat_counter_idx); + ch->stat_counter_idx = le16_to_cpu(ctxt.info.stat_counter_idx); /* copy just the sections touched not the entire info * since not all sections are valid as returned by @@ -7977,8 +7976,8 @@ static inline void i40e_set_cld_element(struct i40e_cloud_filter *filter, struct i40e_aqc_cloud_filters_element_data *cld) { - int i, j; u32 ipa; + int i; memset(cld, 0, sizeof(*cld)); ether_addr_copy(cld->outer_mac, filter->dst_mac); @@ -7989,14 +7988,14 @@ i40e_set_cld_element(struct i40e_cloud_filter *filter, if (filter->n_proto == ETH_P_IPV6) { #define IPV6_MAX_INDEX (ARRAY_SIZE(filter->dst_ipv6) - 1) - for (i = 0, j = 0; i < ARRAY_SIZE(filter->dst_ipv6); - i++, j += 2) { + for (i = 0; i < ARRAY_SIZE(filter->dst_ipv6); i++) { ipa = be32_to_cpu(filter->dst_ipv6[IPV6_MAX_INDEX - i]); - ipa = cpu_to_le32(ipa); - memcpy(&cld->ipaddr.raw_v6.data[j], &ipa, sizeof(ipa)); + + *(__le32 *)&cld->ipaddr.raw_v6.data[i * 2] = cpu_to_le32(ipa); } } else { ipa = be32_to_cpu(filter->dst_ipv4); + memcpy(&cld->ipaddr.v4.data, &ipa, sizeof(ipa)); } @@ -8044,6 +8043,8 @@ int i40e_add_del_cloud_filter(struct i40e_vsi *vsi, if (filter->flags >= ARRAY_SIZE(flag_table)) return I40E_ERR_CONFIG; + memset(&cld_filter, 0, sizeof(cld_filter)); + /* copy element needed to add cloud filter from filter */ i40e_set_cld_element(filter, &cld_filter); @@ -8107,10 +8108,13 @@ int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi, return -EOPNOTSUPP; /* adding filter using src_port/src_ip is not supported at this stage */ - if (filter->src_port || filter->src_ipv4 || + if (filter->src_port || + (filter->src_ipv4 && filter->n_proto != ETH_P_IPV6) || !ipv6_addr_any(&filter->ip.v6.src_ip6)) return -EOPNOTSUPP; + memset(&cld_filter, 0, sizeof(cld_filter)); + /* copy element needed to add cloud filter from filter */ i40e_set_cld_element(filter, &cld_filter.element); @@ -8134,7 +8138,7 @@ int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi, cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_MAC_VLAN_PORT); } - } else if (filter->dst_ipv4 || + } else if ((filter->dst_ipv4 && filter->n_proto != ETH_P_IPV6) || !ipv6_addr_any(&filter->ip.v6.dst_ip6)) { cld_filter.element.flags = cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_IP_PORT); @@ -8928,11 +8932,6 @@ void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags, bool lock_acquired) dev_dbg(&pf->pdev->dev, "PFR requested\n"); i40e_handle_reset_warning(pf, lock_acquired); - dev_info(&pf->pdev->dev, - pf->flags & I40E_FLAG_DISABLE_FW_LLDP ? - "FW LLDP is disabled\n" : - "FW LLDP is enabled\n"); - } else if (reset_flags & I40E_PF_RESET_AND_REBUILD_FLAG) { /* Request a PF Reset * @@ -8940,6 +8939,10 @@ void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags, bool lock_acquired) */ i40e_prep_for_reset(pf); i40e_reset_and_rebuild(pf, true, lock_acquired); + dev_info(&pf->pdev->dev, + pf->flags & I40E_FLAG_DISABLE_FW_LLDP ? + "FW LLDP is disabled\n" : + "FW LLDP is enabled\n"); } else if (reset_flags & BIT_ULL(__I40E_REINIT_REQUESTED)) { int v; @@ -10462,7 +10465,6 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) int old_recovery_mode_bit = test_bit(__I40E_RECOVERY_MODE, pf->state); struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; struct i40e_hw *hw = &pf->hw; - u8 set_fc_aq_fail = 0; i40e_status ret; u32 val; int v; @@ -10605,13 +10607,6 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) i40e_stat_str(&pf->hw, ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); - /* make sure our flow control settings are restored */ - ret = i40e_set_fc(&pf->hw, &set_fc_aq_fail, true); - if (ret) - dev_dbg(&pf->pdev->dev, "setting flow control: ret = %s last_status = %s\n", - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); - /* Rebuild the VSIs and VEBs that existed before reset. * They are still in our local switch element arrays, so only * need to rebuild the switch model in the HW. @@ -12191,6 +12186,8 @@ i40e_status i40e_set_partition_bw_setting(struct i40e_pf *pf) struct i40e_aqc_configure_partition_bw_data bw_data; i40e_status status; + memset(&bw_data, 0, sizeof(bw_data)); + /* Set the valid bit for this PF */ bw_data.pf_valid_bits = cpu_to_le16(BIT(pf->hw.pf_id)); bw_data.max_bw[pf->hw.pf_id] = pf->max_bw & I40E_ALT_BW_VALUE_MASK; @@ -15198,7 +15195,6 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) int err; u32 val; u32 i; - u8 set_fc_aq_fail; err = pci_enable_device_mem(pdev); if (err) @@ -15537,24 +15533,6 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } INIT_LIST_HEAD(&pf->vsi[pf->lan_vsi]->ch_list); - /* Make sure flow control is set according to current settings */ - err = i40e_set_fc(hw, &set_fc_aq_fail, true); - if (set_fc_aq_fail & I40E_SET_FC_AQ_FAIL_GET) - dev_dbg(&pf->pdev->dev, - "Set fc with err %s aq_err %s on get_phy_cap\n", - i40e_stat_str(hw, err), - i40e_aq_str(hw, hw->aq.asq_last_status)); - if (set_fc_aq_fail & I40E_SET_FC_AQ_FAIL_SET) - dev_dbg(&pf->pdev->dev, - "Set fc with err %s aq_err %s on set_phy_config\n", - i40e_stat_str(hw, err), - i40e_aq_str(hw, hw->aq.asq_last_status)); - if (set_fc_aq_fail & I40E_SET_FC_AQ_FAIL_UPDATE) - dev_dbg(&pf->pdev->dev, - "Set fc with err %s aq_err %s on get_link_info\n", - i40e_stat_str(hw, err), - i40e_aq_str(hw, hw->aq.asq_last_status)); - /* if FDIR VSI was set up, start it now */ for (i = 0; i < pf->num_alloc_vsi; i++) { if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR) { @@ -15611,6 +15589,8 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) { dev_info(&pdev->dev, "setup of misc vector failed: %d\n", err); + i40e_cloud_filter_exit(pf); + i40e_fdir_teardown(pf); goto err_vsis; } } diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index f6f1af94cca0..627794b31e33 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1948,7 +1948,7 @@ void i40e_process_skb_fields(struct i40e_ring *rx_ring, skb_record_rx_queue(skb, rx_ring->queue_index); if (qword & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) { - u16 vlan_tag = rx_desc->wb.qword0.lo_dword.l2tag1; + __le16 vlan_tag = rx_desc->wb.qword0.lo_dword.l2tag1; __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), le16_to_cpu(vlan_tag)); @@ -3223,13 +3223,16 @@ static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, l4_proto = ip.v4->protocol; } else if (*tx_flags & I40E_TX_FLAGS_IPV6) { + int ret; + tunnel |= I40E_TX_CTX_EXT_IP_IPV6; exthdr = ip.hdr + sizeof(*ip.v6); l4_proto = ip.v6->nexthdr; - if (l4.hdr != exthdr) - ipv6_skip_exthdr(skb, exthdr - skb->data, - &l4_proto, &frag_off); + ret = ipv6_skip_exthdr(skb, exthdr - skb->data, + &l4_proto, &frag_off); + if (ret < 0) + return -1; } /* define outer transport */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index 4f11f7bf75d1..fc32c5019b0f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -453,7 +453,7 @@ static void i40e_set_rs_bit(struct i40e_ring *xdp_ring) struct i40e_tx_desc *tx_desc; tx_desc = I40E_TX_DESC(xdp_ring, ntu); - tx_desc->cmd_type_offset_bsz |= (I40E_TX_DESC_CMD_RS << I40E_TXD_QW1_CMD_SHIFT); + tx_desc->cmd_type_offset_bsz |= cpu_to_le64(I40E_TX_DESC_CMD_RS << I40E_TXD_QW1_CMD_SHIFT); } /** diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index dae8280ce17c..357706444dd5 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -454,9 +454,7 @@ struct ice_pf { struct ice_hw_port_stats stats_prev; struct ice_hw hw; u8 stat_prev_loaded:1; /* has previous stats been loaded */ -#ifdef CONFIG_DCB u16 dcbx_cap; -#endif /* CONFIG_DCB */ u32 tx_timeout_count; unsigned long tx_timeout_last_recovery; u32 tx_timeout_recovery_level; diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c index fcfefad00d1c..468a63f7eff9 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c @@ -134,7 +134,7 @@ ice_dcbnl_getnumtcs(struct net_device *dev, int __always_unused tcid, u8 *num) if (!test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags)) return -EINVAL; - *num = IEEE_8021QAZ_MAX_TCS; + *num = pf->hw.func_caps.common_cap.maxtc; return 0; } @@ -159,6 +159,10 @@ static u8 ice_dcbnl_setdcbx(struct net_device *netdev, u8 mode) struct ice_pf *pf = ice_netdev_to_pf(netdev); struct ice_qos_cfg *qos_cfg; + /* if FW LLDP agent is running, DCBNL not allowed to change mode */ + if (test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags)) + return ICE_DCB_NO_HW_CHG; + /* No support for LLD_MANAGED modes or CEE+IEEE */ if ((mode & DCB_CAP_DCBX_LLD_MANAGED) || ((mode & DCB_CAP_DCBX_VER_IEEE) && (mode & DCB_CAP_DCBX_VER_CEE)) || diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 5636c9b23896..2dcfa685b763 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -8,6 +8,7 @@ #include "ice_fltr.h" #include "ice_lib.h" #include "ice_dcb_lib.h" +#include <net/dcbnl.h> struct ice_stats { char stat_string[ETH_GSTRING_LEN]; @@ -1238,6 +1239,9 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags) status = ice_init_pf_dcb(pf, true); if (status) dev_warn(dev, "Fail to init DCB\n"); + + pf->dcbx_cap &= ~DCB_CAP_DCBX_LLD_MANAGED; + pf->dcbx_cap |= DCB_CAP_DCBX_HOST; } else { enum ice_status status; bool dcbx_agent_status; @@ -1280,6 +1284,9 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags) if (status) dev_dbg(dev, "Fail to enable MIB change events\n"); + pf->dcbx_cap &= ~DCB_CAP_DCBX_HOST; + pf->dcbx_cap |= DCB_CAP_DCBX_LLD_MANAGED; + ice_nway_reset(netdev); } } @@ -3322,6 +3329,18 @@ ice_get_channels(struct net_device *dev, struct ethtool_channels *ch) } /** + * ice_get_valid_rss_size - return valid number of RSS queues + * @hw: pointer to the HW structure + * @new_size: requested RSS queues + */ +static int ice_get_valid_rss_size(struct ice_hw *hw, int new_size) +{ + struct ice_hw_common_caps *caps = &hw->func_caps.common_cap; + + return min_t(int, new_size, BIT(caps->rss_table_entry_width)); +} + +/** * ice_vsi_set_dflt_rss_lut - set default RSS LUT with requested RSS size * @vsi: VSI to reconfigure RSS LUT on * @req_rss_size: requested range of queue numbers for hashing @@ -3348,14 +3367,10 @@ static int ice_vsi_set_dflt_rss_lut(struct ice_vsi *vsi, int req_rss_size) return -ENOMEM; /* set RSS LUT parameters */ - if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { + if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) vsi->rss_size = 1; - } else { - struct ice_hw_common_caps *caps = &hw->func_caps.common_cap; - - vsi->rss_size = min_t(int, req_rss_size, - BIT(caps->rss_table_entry_width)); - } + else + vsi->rss_size = ice_get_valid_rss_size(hw, req_rss_size); /* create/set RSS LUT */ ice_fill_rss_lut(lut, vsi->rss_table_size, vsi->rss_size); @@ -3434,9 +3449,12 @@ static int ice_set_channels(struct net_device *dev, struct ethtool_channels *ch) ice_vsi_recfg_qs(vsi, new_rx, new_tx); - if (new_rx && !netif_is_rxfh_configured(dev)) + if (!netif_is_rxfh_configured(dev)) return ice_vsi_set_dflt_rss_lut(vsi, new_rx); + /* Update rss_size due to change in Rx queues */ + vsi->rss_size = ice_get_valid_rss_size(&pf->hw, new_rx); + return 0; } diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index bf5fd812ea0e..1f38a8d0c525 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -1919,6 +1919,29 @@ static int ice_vc_get_ver_msg(struct ice_vf *vf, u8 *msg) } /** + * ice_vc_get_max_frame_size - get max frame size allowed for VF + * @vf: VF used to determine max frame size + * + * Max frame size is determined based on the current port's max frame size and + * whether a port VLAN is configured on this VF. The VF is not aware whether + * it's in a port VLAN so the PF needs to account for this in max frame size + * checks and sending the max frame size to the VF. + */ +static u16 ice_vc_get_max_frame_size(struct ice_vf *vf) +{ + struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx]; + struct ice_port_info *pi = vsi->port_info; + u16 max_frame_size; + + max_frame_size = pi->phy.link_info.max_frame_size; + + if (vf->port_vlan_info) + max_frame_size -= VLAN_HLEN; + + return max_frame_size; +} + +/** * ice_vc_get_vf_res_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer @@ -2000,6 +2023,7 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) vfres->max_vectors = pf->num_msix_per_vf; vfres->rss_key_size = ICE_VSIQF_HKEY_ARRAY_SIZE; vfres->rss_lut_size = ICE_VSIQF_HLUT_ARRAY_SIZE; + vfres->max_mtu = ice_vc_get_max_frame_size(vf); vfres->vsi_res[0].vsi_id = vf->lan_vsi_num; vfres->vsi_res[0].vsi_type = VIRTCHNL_VSI_SRIOV; @@ -2420,7 +2444,7 @@ static int ice_vc_cfg_promiscuous_mode_msg(struct ice_vf *vf, u8 *msg) } if (!test_bit(ICE_FLAG_VF_TRUE_PROMISC_ENA, pf->flags)) { - bool set_dflt_vsi = !!(info->flags & FLAG_VF_UNICAST_PROMISC); + bool set_dflt_vsi = alluni || allmulti; if (set_dflt_vsi && !ice_is_dflt_vsi_in_use(pf->first_sw)) /* only attempt to set the default forwarding VSI if @@ -2998,6 +3022,8 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) /* copy Rx queue info from VF into VSI */ if (qpi->rxq.ring_len > 0) { + u16 max_frame_size = ice_vc_get_max_frame_size(vf); + num_rxq++; vsi->rx_rings[i]->dma = qpi->rxq.dma_ring_addr; vsi->rx_rings[i]->count = qpi->rxq.ring_len; @@ -3010,7 +3036,7 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) } vsi->rx_buf_len = qpi->rxq.databuffer_size; vsi->rx_rings[i]->rx_buf_len = vsi->rx_buf_len; - if (qpi->rxq.max_pkt_size >= (16 * 1024) || + if (qpi->rxq.max_pkt_size > max_frame_size || qpi->rxq.max_pkt_size < 64) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; @@ -3018,6 +3044,11 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) } vsi->max_frame = qpi->rxq.max_pkt_size; + /* add space for the port VLAN since the VF driver is not + * expected to account for it in the MTU calculation + */ + if (vf->port_vlan_info) + vsi->max_frame += VLAN_HLEN; } /* VF can request to configure less than allocated queues or default diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 0507369bb54d..1767c60056c5 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -4699,9 +4699,10 @@ static void mvpp2_irqs_deinit(struct mvpp2_port *port) } } -static bool mvpp22_rss_is_supported(void) +static bool mvpp22_rss_is_supported(struct mvpp2_port *port) { - return queue_mode == MVPP2_QDIST_MULTI_MODE; + return (queue_mode == MVPP2_QDIST_MULTI_MODE) && + !(port->flags & MVPP2_F_LOOPBACK); } static int mvpp2_open(struct net_device *dev) @@ -5513,7 +5514,7 @@ static int mvpp2_ethtool_get_rxnfc(struct net_device *dev, struct mvpp2_port *port = netdev_priv(dev); int ret = 0, i, loc = 0; - if (!mvpp22_rss_is_supported()) + if (!mvpp22_rss_is_supported(port)) return -EOPNOTSUPP; switch (info->cmd) { @@ -5548,7 +5549,7 @@ static int mvpp2_ethtool_set_rxnfc(struct net_device *dev, struct mvpp2_port *port = netdev_priv(dev); int ret = 0; - if (!mvpp22_rss_is_supported()) + if (!mvpp22_rss_is_supported(port)) return -EOPNOTSUPP; switch (info->cmd) { @@ -5569,7 +5570,9 @@ static int mvpp2_ethtool_set_rxnfc(struct net_device *dev, static u32 mvpp2_ethtool_get_rxfh_indir_size(struct net_device *dev) { - return mvpp22_rss_is_supported() ? MVPP22_RSS_TABLE_ENTRIES : 0; + struct mvpp2_port *port = netdev_priv(dev); + + return mvpp22_rss_is_supported(port) ? MVPP22_RSS_TABLE_ENTRIES : 0; } static int mvpp2_ethtool_get_rxfh(struct net_device *dev, u32 *indir, u8 *key, @@ -5578,7 +5581,7 @@ static int mvpp2_ethtool_get_rxfh(struct net_device *dev, u32 *indir, u8 *key, struct mvpp2_port *port = netdev_priv(dev); int ret = 0; - if (!mvpp22_rss_is_supported()) + if (!mvpp22_rss_is_supported(port)) return -EOPNOTSUPP; if (indir) @@ -5596,7 +5599,7 @@ static int mvpp2_ethtool_set_rxfh(struct net_device *dev, const u32 *indir, struct mvpp2_port *port = netdev_priv(dev); int ret = 0; - if (!mvpp22_rss_is_supported()) + if (!mvpp22_rss_is_supported(port)) return -EOPNOTSUPP; if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_CRC32) @@ -5617,7 +5620,7 @@ static int mvpp2_ethtool_get_rxfh_context(struct net_device *dev, u32 *indir, struct mvpp2_port *port = netdev_priv(dev); int ret = 0; - if (!mvpp22_rss_is_supported()) + if (!mvpp22_rss_is_supported(port)) return -EOPNOTSUPP; if (rss_context >= MVPP22_N_RSS_TABLES) return -EINVAL; @@ -5639,7 +5642,7 @@ static int mvpp2_ethtool_set_rxfh_context(struct net_device *dev, struct mvpp2_port *port = netdev_priv(dev); int ret; - if (!mvpp22_rss_is_supported()) + if (!mvpp22_rss_is_supported(port)) return -EOPNOTSUPP; if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_CRC32) @@ -5956,7 +5959,7 @@ static int mvpp2_port_init(struct mvpp2_port *port) mvpp2_cls_oversize_rxq_set(port); mvpp2_cls_port_config(port); - if (mvpp22_rss_is_supported()) + if (mvpp22_rss_is_supported(port)) mvpp22_port_rss_init(port); /* Provide an initial Rx packet size */ @@ -6861,7 +6864,7 @@ static int mvpp2_port_probe(struct platform_device *pdev, dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO | NETIF_F_HW_VLAN_CTAG_FILTER; - if (mvpp22_rss_is_supported()) { + if (mvpp22_rss_is_supported(port)) { dev->hw_features |= NETIF_F_RXHASH; dev->features |= NETIF_F_NTUPLE; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index 094124b695dc..aa2ca8780b9c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -473,7 +473,7 @@ static ssize_t rvu_dbg_qsize_write(struct file *filp, u16 pcifunc; int ret, lf; - cmd_buf = memdup_user(buffer, count); + cmd_buf = memdup_user(buffer, count + 1); if (IS_ERR(cmd_buf)) return -ENOMEM; diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index ebe1406c6e64..dbec8e187a68 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -4806,12 +4806,11 @@ static struct net_device *sky2_init_netdev(struct sky2_hw *hw, unsigned port, if (!is_valid_ether_addr(dev->dev_addr)) { struct sockaddr sa = { AF_UNSPEC }; - netdev_warn(dev, - "Invalid MAC address, defaulting to random\n"); + dev_warn(&hw->pdev->dev, "Invalid MAC address, defaulting to random\n"); eth_hw_addr_random(dev); memcpy(sa.sa_data, dev->dev_addr, ETH_ALEN); if (sky2_set_mac_address(dev, &sa)) - netdev_warn(dev, "Failed to set MAC address.\n"); + dev_warn(&hw->pdev->dev, "Failed to set MAC address.\n"); } return dev; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 394f43add85c..a99e71bc7b3c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -4986,6 +4986,7 @@ static int mlx4_do_mirror_rule(struct mlx4_dev *dev, struct res_fs_rule *fs_rule if (!fs_rule->mirr_mbox) { mlx4_err(dev, "rule mirroring mailbox is null\n"); + mlx4_free_cmd_mailbox(dev, mailbox); return -EINVAL; } memcpy(mailbox->buf, fs_rule->mirr_mbox, fs_rule->mirr_mbox_size); diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 0a20dae32184..f704da3f214c 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2285,14 +2285,14 @@ static void r8168dp_hw_jumbo_disable(struct rtl8169_private *tp) static void r8168e_hw_jumbo_enable(struct rtl8169_private *tp) { - RTL_W8(tp, MaxTxPacketSize, 0x3f); + RTL_W8(tp, MaxTxPacketSize, 0x24); RTL_W8(tp, Config3, RTL_R8(tp, Config3) | Jumbo_En0); RTL_W8(tp, Config4, RTL_R8(tp, Config4) | 0x01); } static void r8168e_hw_jumbo_disable(struct rtl8169_private *tp) { - RTL_W8(tp, MaxTxPacketSize, 0x0c); + RTL_W8(tp, MaxTxPacketSize, 0x3f); RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Jumbo_En0); RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~0x01); } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c index b7a0c57dfbfb..d23be45a64e5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c @@ -218,6 +218,7 @@ static int visconti_eth_dwmac_probe(struct platform_device *pdev) goto remove_config; } + spin_lock_init(&dwmac->lock); dwmac->reg = stmmac_res.addr; plat_dat->bsp_priv = dwmac; plat_dat->fix_mac_speed = visconti_eth_fix_mac_speed; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 56985542e202..44bb133c3000 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -316,6 +316,32 @@ static int tc_setup_cbs(struct stmmac_priv *priv, if (!priv->dma_cap.av) return -EOPNOTSUPP; + /* Port Transmit Rate and Speed Divider */ + switch (priv->speed) { + case SPEED_10000: + ptr = 32; + speed_div = 10000000; + break; + case SPEED_5000: + ptr = 32; + speed_div = 5000000; + break; + case SPEED_2500: + ptr = 8; + speed_div = 2500000; + break; + case SPEED_1000: + ptr = 8; + speed_div = 1000000; + break; + case SPEED_100: + ptr = 4; + speed_div = 100000; + break; + default: + return -EOPNOTSUPP; + } + mode_to_use = priv->plat->tx_queues_cfg[queue].mode_to_use; if (mode_to_use == MTL_QUEUE_DCB && qopt->enable) { ret = stmmac_dma_qmode(priv, priv->ioaddr, queue, MTL_QUEUE_AVB); @@ -332,10 +358,6 @@ static int tc_setup_cbs(struct stmmac_priv *priv, priv->plat->tx_queues_cfg[queue].mode_to_use = MTL_QUEUE_DCB; } - /* Port Transmit Rate and Speed Divider */ - ptr = (priv->speed == SPEED_100) ? 4 : 8; - speed_div = (priv->speed == SPEED_100) ? 100000 : 1000000; - /* Final adjustments for HW */ value = div_s64(qopt->idleslope * 1024ll * ptr, speed_div); priv->plat->tx_queues_cfg[queue].idle_slope = value & GENMASK(31, 0); diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 9a70f05baf6e..39c00f050fbd 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -543,7 +543,6 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev, if (!skb_is_gso(skb) && (iph->frag_off & htons(IP_DF)) && mtu < ntohs(iph->tot_len)) { netdev_dbg(dev, "packet too big, fragmentation needed\n"); - memset(IPCB(skb), 0, sizeof(*IPCB(skb))); icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); goto err_rt; diff --git a/drivers/net/phy/icplus.c b/drivers/net/phy/icplus.c index 3e431737c1ba..a00a667454a9 100644 --- a/drivers/net/phy/icplus.c +++ b/drivers/net/phy/icplus.c @@ -239,7 +239,7 @@ static int ip101a_g_config_intr_pin(struct phy_device *phydev) oldpage = phy_select_page(phydev, IP101G_DEFAULT_PAGE); if (oldpage < 0) - return oldpage; + goto out; /* configure the RXER/INTR_32 pin of the 32-pin IP101GR if needed: */ switch (priv->sel_intr32) { @@ -314,7 +314,7 @@ static int ip101a_g_read_status(struct phy_device *phydev) oldpage = phy_select_page(phydev, IP101G_DEFAULT_PAGE); if (oldpage < 0) - return oldpage; + goto out; ret = __phy_read(phydev, IP10XX_SPEC_CTRL_STATUS); if (ret < 0) @@ -349,7 +349,8 @@ out: static int ip101a_g_config_mdix(struct phy_device *phydev) { u16 ctrl = 0, ctrl2 = 0; - int oldpage, ret; + int oldpage; + int ret = 0; switch (phydev->mdix_ctrl) { case ETH_TP_MDI: @@ -367,7 +368,7 @@ static int ip101a_g_config_mdix(struct phy_device *phydev) oldpage = phy_select_page(phydev, IP101G_DEFAULT_PAGE); if (oldpage < 0) - return oldpage; + goto out; ret = __phy_modify(phydev, IP10XX_SPEC_CTRL_STATUS, IP101A_G_AUTO_MDIX_DIS, ctrl); diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 7ec6f70d6a82..a14a00328fa3 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -1303,6 +1303,7 @@ static struct phy_driver ksphy_driver[] = { .driver_data = &ksz8081_type, .probe = kszphy_probe, .config_init = ksz8081_config_init, + .soft_reset = genphy_soft_reset, .config_intr = kszphy_config_intr, .handle_interrupt = kszphy_handle_interrupt, .get_sset_count = kszphy_get_sset_count, diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 6c3d8c2abd38..17a050521b86 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1318,6 +1318,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x19d2, 0x1255, 4)}, {QMI_FIXED_INTF(0x19d2, 0x1256, 4)}, {QMI_FIXED_INTF(0x19d2, 0x1270, 5)}, /* ZTE MF667 */ + {QMI_FIXED_INTF(0x19d2, 0x1275, 3)}, /* ZTE P685M */ {QMI_FIXED_INTF(0x19d2, 0x1401, 2)}, {QMI_FIXED_INTF(0x19d2, 0x1402, 2)}, /* ZTE MF60 */ {QMI_FIXED_INTF(0x19d2, 0x1424, 2)}, diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 2d7cc63bef89..b246817f3405 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -2632,21 +2632,24 @@ static inline u8 rtl8152_get_speed(struct r8152 *tp) return ocp_read_byte(tp, MCU_TYPE_PLA, PLA_PHYSTATUS); } -static void rtl_set_eee_plus(struct r8152 *tp) +static void rtl_eee_plus_en(struct r8152 *tp, bool enable) { u32 ocp_data; - u8 speed; - speed = rtl8152_get_speed(tp); - if (speed & _10bps) { - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEEP_CR); + ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEEP_CR); + if (enable) ocp_data |= EEEP_CR_EEEP_TX; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEEP_CR, ocp_data); - } else { - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEEP_CR); + else ocp_data &= ~EEEP_CR_EEEP_TX; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEEP_CR, ocp_data); - } + ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEEP_CR, ocp_data); +} + +static void rtl_set_eee_plus(struct r8152 *tp) +{ + if (rtl8152_get_speed(tp) & _10bps) + rtl_eee_plus_en(tp, true); + else + rtl_eee_plus_en(tp, false); } static void rxdy_gated_en(struct r8152 *tp, bool enable) @@ -3150,10 +3153,22 @@ static void r8153b_ups_flags(struct r8152 *tp) ocp_write_dword(tp, MCU_TYPE_USB, USB_UPS_FLAGS, ups_flags); } -static void r8153b_green_en(struct r8152 *tp, bool enable) +static void rtl_green_en(struct r8152 *tp, bool enable) { u16 data; + data = sram_read(tp, SRAM_GREEN_CFG); + if (enable) + data |= GREEN_ETH_EN; + else + data &= ~GREEN_ETH_EN; + sram_write(tp, SRAM_GREEN_CFG, data); + + tp->ups_info.green = enable; +} + +static void r8153b_green_en(struct r8152 *tp, bool enable) +{ if (enable) { sram_write(tp, 0x8045, 0); /* 10M abiq&ldvbias */ sram_write(tp, 0x804d, 0x1222); /* 100M short abiq&ldvbias */ @@ -3164,11 +3179,7 @@ static void r8153b_green_en(struct r8152 *tp, bool enable) sram_write(tp, 0x805d, 0x2444); /* 1000M short abiq&ldvbias */ } - data = sram_read(tp, SRAM_GREEN_CFG); - data |= GREEN_ETH_EN; - sram_write(tp, SRAM_GREEN_CFG, data); - - tp->ups_info.green = enable; + rtl_green_en(tp, true); } static u16 r8153_phy_status(struct r8152 *tp, u16 desired) @@ -3360,7 +3371,7 @@ static void rtl8153b_runtime_enable(struct r8152 *tp, bool enable) r8153b_ups_en(tp, false); r8153_queue_wake(tp, false); rtl_runtime_suspend_enable(tp, false); - if (tp->udev->speed != USB_SPEED_HIGH) + if (tp->udev->speed >= USB_SPEED_SUPER) r8153b_u1u2en(tp, true); } } @@ -5056,7 +5067,7 @@ static void rtl8153b_up(struct r8152 *tp) r8153_aldps_en(tp, true); - if (tp->udev->speed != USB_SPEED_HIGH) + if (tp->udev->speed >= USB_SPEED_SUPER) r8153b_u1u2en(tp, true); } @@ -5572,8 +5583,9 @@ static void r8153b_init(struct r8152 *tp) ocp_data |= POLL_LINK_CHG; ocp_write_word(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS, ocp_data); - if (tp->udev->speed != USB_SPEED_HIGH) + if (tp->udev->speed >= USB_SPEED_SUPER) r8153b_u1u2en(tp, true); + usb_enable_lpm(tp->udev); /* MAC clock speed down */ @@ -5756,6 +5768,9 @@ static int rtl8152_runtime_suspend(struct r8152 *tp) struct net_device *netdev = tp->netdev; int ret = 0; + if (!tp->rtl_ops.autosuspend_en) + return -EBUSY; + set_bit(SELECTIVE_SUSPEND, &tp->flags); smp_mb__after_atomic(); @@ -6155,6 +6170,11 @@ rtl_ethtool_get_eee(struct net_device *net, struct ethtool_eee *edata) struct r8152 *tp = netdev_priv(net); int ret; + if (!tp->rtl_ops.eee_get) { + ret = -EOPNOTSUPP; + goto out; + } + ret = usb_autopm_get_interface(tp->intf); if (ret < 0) goto out; @@ -6177,6 +6197,11 @@ rtl_ethtool_set_eee(struct net_device *net, struct ethtool_eee *edata) struct r8152 *tp = netdev_priv(net); int ret; + if (!tp->rtl_ops.eee_set) { + ret = -EOPNOTSUPP; + goto out; + } + ret = usb_autopm_get_interface(tp->intf); if (ret < 0) goto out; @@ -6576,7 +6601,7 @@ static int rtl_ops_init(struct r8152 *tp) default: ret = -ENODEV; - netif_err(tp, probe, tp->netdev, "Unknown Device\n"); + dev_err(&tp->intf->dev, "Unknown Device\n"); break; } @@ -6833,7 +6858,7 @@ static int rtl8152_probe(struct usb_interface *intf, ret = register_netdev(netdev); if (ret != 0) { - netif_err(tp, probe, netdev, "couldn't register the device\n"); + dev_err(&intf->dev, "couldn't register the device\n"); goto out1; } diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 3929e437382b..666dd201c3d5 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -4721,7 +4721,6 @@ static void vxlan_destroy_tunnels(struct net *net, struct list_head *head) struct vxlan_net *vn = net_generic(net, vxlan_net_id); struct vxlan_dev *vxlan, *next; struct net_device *dev, *aux; - unsigned int h; for_each_netdev_safe(net, dev, aux) if (dev->rtnl_link_ops == &vxlan_link_ops) @@ -4735,14 +4734,13 @@ static void vxlan_destroy_tunnels(struct net *net, struct list_head *head) unregister_netdevice_queue(vxlan->dev, head); } - for (h = 0; h < PORT_HASH_SIZE; ++h) - WARN_ON_ONCE(!hlist_empty(&vn->sock_list[h])); } static void __net_exit vxlan_exit_batch_net(struct list_head *net_list) { struct net *net; LIST_HEAD(list); + unsigned int h; rtnl_lock(); list_for_each_entry(net, net_list, exit_list) { @@ -4755,6 +4753,13 @@ static void __net_exit vxlan_exit_batch_net(struct list_head *net_list) unregister_netdevice_many(&list); rtnl_unlock(); + + list_for_each_entry(net, net_list, exit_list) { + struct vxlan_net *vn = net_generic(net, vxlan_net_id); + + for (h = 0; h < PORT_HASH_SIZE; ++h) + WARN_ON_ONCE(!hlist_empty(&vn->sock_list[h])); + } } static struct pernet_operations vxlan_net_ops = { diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c index a3ed49cd95c3..551ddaaaf540 100644 --- a/drivers/net/wireguard/device.c +++ b/drivers/net/wireguard/device.c @@ -138,7 +138,7 @@ static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev) else if (skb->protocol == htons(ETH_P_IPV6)) net_dbg_ratelimited("%s: No peer has allowed IPs matching %pI6\n", dev->name, &ipv6_hdr(skb)->daddr); - goto err; + goto err_icmp; } family = READ_ONCE(peer->endpoint.addr.sa_family); @@ -157,7 +157,7 @@ static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev) } else { struct sk_buff *segs = skb_gso_segment(skb, 0); - if (unlikely(IS_ERR(segs))) { + if (IS_ERR(segs)) { ret = PTR_ERR(segs); goto err_peer; } @@ -201,12 +201,13 @@ static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev) err_peer: wg_peer_put(peer); -err: - ++dev->stats.tx_errors; +err_icmp: if (skb->protocol == htons(ETH_P_IP)) icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); else if (skb->protocol == htons(ETH_P_IPV6)) icmpv6_ndo_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); +err: + ++dev->stats.tx_errors; kfree_skb(skb); return ret; } @@ -234,8 +235,8 @@ static void wg_destruct(struct net_device *dev) destroy_workqueue(wg->handshake_receive_wq); destroy_workqueue(wg->handshake_send_wq); destroy_workqueue(wg->packet_crypt_wq); - wg_packet_queue_free(&wg->decrypt_queue, true); - wg_packet_queue_free(&wg->encrypt_queue, true); + wg_packet_queue_free(&wg->decrypt_queue); + wg_packet_queue_free(&wg->encrypt_queue); rcu_barrier(); /* Wait for all the peers to be actually freed. */ wg_ratelimiter_uninit(); memzero_explicit(&wg->static_identity, sizeof(wg->static_identity)); @@ -337,12 +338,12 @@ static int wg_newlink(struct net *src_net, struct net_device *dev, goto err_destroy_handshake_send; ret = wg_packet_queue_init(&wg->encrypt_queue, wg_packet_encrypt_worker, - true, MAX_QUEUED_PACKETS); + MAX_QUEUED_PACKETS); if (ret < 0) goto err_destroy_packet_crypt; ret = wg_packet_queue_init(&wg->decrypt_queue, wg_packet_decrypt_worker, - true, MAX_QUEUED_PACKETS); + MAX_QUEUED_PACKETS); if (ret < 0) goto err_free_encrypt_queue; @@ -367,9 +368,9 @@ static int wg_newlink(struct net *src_net, struct net_device *dev, err_uninit_ratelimiter: wg_ratelimiter_uninit(); err_free_decrypt_queue: - wg_packet_queue_free(&wg->decrypt_queue, true); + wg_packet_queue_free(&wg->decrypt_queue); err_free_encrypt_queue: - wg_packet_queue_free(&wg->encrypt_queue, true); + wg_packet_queue_free(&wg->encrypt_queue); err_destroy_packet_crypt: destroy_workqueue(wg->packet_crypt_wq); err_destroy_handshake_send: diff --git a/drivers/net/wireguard/device.h b/drivers/net/wireguard/device.h index 4d0144e16947..854bc3d97150 100644 --- a/drivers/net/wireguard/device.h +++ b/drivers/net/wireguard/device.h @@ -27,13 +27,14 @@ struct multicore_worker { struct crypt_queue { struct ptr_ring ring; - union { - struct { - struct multicore_worker __percpu *worker; - int last_cpu; - }; - struct work_struct work; - }; + struct multicore_worker __percpu *worker; + int last_cpu; +}; + +struct prev_queue { + struct sk_buff *head, *tail, *peeked; + struct { struct sk_buff *next, *prev; } empty; // Match first 2 members of struct sk_buff. + atomic_t count; }; struct wg_device { diff --git a/drivers/net/wireguard/peer.c b/drivers/net/wireguard/peer.c index b3b6370e6b95..cd5cb0292cb6 100644 --- a/drivers/net/wireguard/peer.c +++ b/drivers/net/wireguard/peer.c @@ -32,27 +32,22 @@ struct wg_peer *wg_peer_create(struct wg_device *wg, peer = kzalloc(sizeof(*peer), GFP_KERNEL); if (unlikely(!peer)) return ERR_PTR(ret); - peer->device = wg; + if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL)) + goto err; + peer->device = wg; wg_noise_handshake_init(&peer->handshake, &wg->static_identity, public_key, preshared_key, peer); - if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL)) - goto err_1; - if (wg_packet_queue_init(&peer->tx_queue, wg_packet_tx_worker, false, - MAX_QUEUED_PACKETS)) - goto err_2; - if (wg_packet_queue_init(&peer->rx_queue, NULL, false, - MAX_QUEUED_PACKETS)) - goto err_3; - peer->internal_id = atomic64_inc_return(&peer_counter); peer->serial_work_cpu = nr_cpumask_bits; wg_cookie_init(&peer->latest_cookie); wg_timers_init(peer); wg_cookie_checker_precompute_peer_keys(peer); spin_lock_init(&peer->keypairs.keypair_update_lock); - INIT_WORK(&peer->transmit_handshake_work, - wg_packet_handshake_send_worker); + INIT_WORK(&peer->transmit_handshake_work, wg_packet_handshake_send_worker); + INIT_WORK(&peer->transmit_packet_work, wg_packet_tx_worker); + wg_prev_queue_init(&peer->tx_queue); + wg_prev_queue_init(&peer->rx_queue); rwlock_init(&peer->endpoint_lock); kref_init(&peer->refcount); skb_queue_head_init(&peer->staged_packet_queue); @@ -68,11 +63,7 @@ struct wg_peer *wg_peer_create(struct wg_device *wg, pr_debug("%s: Peer %llu created\n", wg->dev->name, peer->internal_id); return peer; -err_3: - wg_packet_queue_free(&peer->tx_queue, false); -err_2: - dst_cache_destroy(&peer->endpoint_cache); -err_1: +err: kfree(peer); return ERR_PTR(ret); } @@ -197,8 +188,7 @@ static void rcu_release(struct rcu_head *rcu) struct wg_peer *peer = container_of(rcu, struct wg_peer, rcu); dst_cache_destroy(&peer->endpoint_cache); - wg_packet_queue_free(&peer->rx_queue, false); - wg_packet_queue_free(&peer->tx_queue, false); + WARN_ON(wg_prev_queue_peek(&peer->tx_queue) || wg_prev_queue_peek(&peer->rx_queue)); /* The final zeroing takes care of clearing any remaining handshake key * material and other potentially sensitive information. diff --git a/drivers/net/wireguard/peer.h b/drivers/net/wireguard/peer.h index 23af40922997..8d53b687a1d1 100644 --- a/drivers/net/wireguard/peer.h +++ b/drivers/net/wireguard/peer.h @@ -36,16 +36,17 @@ struct endpoint { struct wg_peer { struct wg_device *device; - struct crypt_queue tx_queue, rx_queue; + struct prev_queue tx_queue, rx_queue; struct sk_buff_head staged_packet_queue; int serial_work_cpu; + bool is_dead; struct noise_keypairs keypairs; struct endpoint endpoint; struct dst_cache endpoint_cache; rwlock_t endpoint_lock; struct noise_handshake handshake; atomic64_t last_sent_handshake; - struct work_struct transmit_handshake_work, clear_peer_work; + struct work_struct transmit_handshake_work, clear_peer_work, transmit_packet_work; struct cookie latest_cookie; struct hlist_node pubkey_hash; u64 rx_bytes, tx_bytes; @@ -61,9 +62,8 @@ struct wg_peer { struct rcu_head rcu; struct list_head peer_list; struct list_head allowedips_list; - u64 internal_id; struct napi_struct napi; - bool is_dead; + u64 internal_id; }; struct wg_peer *wg_peer_create(struct wg_device *wg, diff --git a/drivers/net/wireguard/queueing.c b/drivers/net/wireguard/queueing.c index 71b8e80b58e1..48e7b982a307 100644 --- a/drivers/net/wireguard/queueing.c +++ b/drivers/net/wireguard/queueing.c @@ -9,8 +9,7 @@ struct multicore_worker __percpu * wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr) { int cpu; - struct multicore_worker __percpu *worker = - alloc_percpu(struct multicore_worker); + struct multicore_worker __percpu *worker = alloc_percpu(struct multicore_worker); if (!worker) return NULL; @@ -23,7 +22,7 @@ wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr) } int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function, - bool multicore, unsigned int len) + unsigned int len) { int ret; @@ -31,25 +30,78 @@ int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function, ret = ptr_ring_init(&queue->ring, len, GFP_KERNEL); if (ret) return ret; - if (function) { - if (multicore) { - queue->worker = wg_packet_percpu_multicore_worker_alloc( - function, queue); - if (!queue->worker) { - ptr_ring_cleanup(&queue->ring, NULL); - return -ENOMEM; - } - } else { - INIT_WORK(&queue->work, function); - } + queue->worker = wg_packet_percpu_multicore_worker_alloc(function, queue); + if (!queue->worker) { + ptr_ring_cleanup(&queue->ring, NULL); + return -ENOMEM; } return 0; } -void wg_packet_queue_free(struct crypt_queue *queue, bool multicore) +void wg_packet_queue_free(struct crypt_queue *queue) { - if (multicore) - free_percpu(queue->worker); + free_percpu(queue->worker); WARN_ON(!__ptr_ring_empty(&queue->ring)); ptr_ring_cleanup(&queue->ring, NULL); } + +#define NEXT(skb) ((skb)->prev) +#define STUB(queue) ((struct sk_buff *)&queue->empty) + +void wg_prev_queue_init(struct prev_queue *queue) +{ + NEXT(STUB(queue)) = NULL; + queue->head = queue->tail = STUB(queue); + queue->peeked = NULL; + atomic_set(&queue->count, 0); + BUILD_BUG_ON( + offsetof(struct sk_buff, next) != offsetof(struct prev_queue, empty.next) - + offsetof(struct prev_queue, empty) || + offsetof(struct sk_buff, prev) != offsetof(struct prev_queue, empty.prev) - + offsetof(struct prev_queue, empty)); +} + +static void __wg_prev_queue_enqueue(struct prev_queue *queue, struct sk_buff *skb) +{ + WRITE_ONCE(NEXT(skb), NULL); + WRITE_ONCE(NEXT(xchg_release(&queue->head, skb)), skb); +} + +bool wg_prev_queue_enqueue(struct prev_queue *queue, struct sk_buff *skb) +{ + if (!atomic_add_unless(&queue->count, 1, MAX_QUEUED_PACKETS)) + return false; + __wg_prev_queue_enqueue(queue, skb); + return true; +} + +struct sk_buff *wg_prev_queue_dequeue(struct prev_queue *queue) +{ + struct sk_buff *tail = queue->tail, *next = smp_load_acquire(&NEXT(tail)); + + if (tail == STUB(queue)) { + if (!next) + return NULL; + queue->tail = next; + tail = next; + next = smp_load_acquire(&NEXT(next)); + } + if (next) { + queue->tail = next; + atomic_dec(&queue->count); + return tail; + } + if (tail != READ_ONCE(queue->head)) + return NULL; + __wg_prev_queue_enqueue(queue, STUB(queue)); + next = smp_load_acquire(&NEXT(tail)); + if (next) { + queue->tail = next; + atomic_dec(&queue->count); + return tail; + } + return NULL; +} + +#undef NEXT +#undef STUB diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h index dfb674e03076..4ef2944a68bc 100644 --- a/drivers/net/wireguard/queueing.h +++ b/drivers/net/wireguard/queueing.h @@ -17,12 +17,13 @@ struct wg_device; struct wg_peer; struct multicore_worker; struct crypt_queue; +struct prev_queue; struct sk_buff; /* queueing.c APIs: */ int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function, - bool multicore, unsigned int len); -void wg_packet_queue_free(struct crypt_queue *queue, bool multicore); + unsigned int len); +void wg_packet_queue_free(struct crypt_queue *queue); struct multicore_worker __percpu * wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr); @@ -135,8 +136,31 @@ static inline int wg_cpumask_next_online(int *next) return cpu; } +void wg_prev_queue_init(struct prev_queue *queue); + +/* Multi producer */ +bool wg_prev_queue_enqueue(struct prev_queue *queue, struct sk_buff *skb); + +/* Single consumer */ +struct sk_buff *wg_prev_queue_dequeue(struct prev_queue *queue); + +/* Single consumer */ +static inline struct sk_buff *wg_prev_queue_peek(struct prev_queue *queue) +{ + if (queue->peeked) + return queue->peeked; + queue->peeked = wg_prev_queue_dequeue(queue); + return queue->peeked; +} + +/* Single consumer */ +static inline void wg_prev_queue_drop_peeked(struct prev_queue *queue) +{ + queue->peeked = NULL; +} + static inline int wg_queue_enqueue_per_device_and_peer( - struct crypt_queue *device_queue, struct crypt_queue *peer_queue, + struct crypt_queue *device_queue, struct prev_queue *peer_queue, struct sk_buff *skb, struct workqueue_struct *wq, int *next_cpu) { int cpu; @@ -145,8 +169,9 @@ static inline int wg_queue_enqueue_per_device_and_peer( /* We first queue this up for the peer ingestion, but the consumer * will wait for the state to change to CRYPTED or DEAD before. */ - if (unlikely(ptr_ring_produce_bh(&peer_queue->ring, skb))) + if (unlikely(!wg_prev_queue_enqueue(peer_queue, skb))) return -ENOSPC; + /* Then we queue it up in the device queue, which consumes the * packet as soon as it can. */ @@ -157,9 +182,7 @@ static inline int wg_queue_enqueue_per_device_and_peer( return 0; } -static inline void wg_queue_enqueue_per_peer(struct crypt_queue *queue, - struct sk_buff *skb, - enum packet_state state) +static inline void wg_queue_enqueue_per_peer_tx(struct sk_buff *skb, enum packet_state state) { /* We take a reference, because as soon as we call atomic_set, the * peer can be freed from below us. @@ -167,14 +190,12 @@ static inline void wg_queue_enqueue_per_peer(struct crypt_queue *queue, struct wg_peer *peer = wg_peer_get(PACKET_PEER(skb)); atomic_set_release(&PACKET_CB(skb)->state, state); - queue_work_on(wg_cpumask_choose_online(&peer->serial_work_cpu, - peer->internal_id), - peer->device->packet_crypt_wq, &queue->work); + queue_work_on(wg_cpumask_choose_online(&peer->serial_work_cpu, peer->internal_id), + peer->device->packet_crypt_wq, &peer->transmit_packet_work); wg_peer_put(peer); } -static inline void wg_queue_enqueue_per_peer_napi(struct sk_buff *skb, - enum packet_state state) +static inline void wg_queue_enqueue_per_peer_rx(struct sk_buff *skb, enum packet_state state) { /* We take a reference, because as soon as we call atomic_set, the * peer can be freed from below us. diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c index 2c9551ea6dc7..7dc84bcca261 100644 --- a/drivers/net/wireguard/receive.c +++ b/drivers/net/wireguard/receive.c @@ -444,7 +444,6 @@ packet_processed: int wg_packet_rx_poll(struct napi_struct *napi, int budget) { struct wg_peer *peer = container_of(napi, struct wg_peer, napi); - struct crypt_queue *queue = &peer->rx_queue; struct noise_keypair *keypair; struct endpoint endpoint; enum packet_state state; @@ -455,11 +454,10 @@ int wg_packet_rx_poll(struct napi_struct *napi, int budget) if (unlikely(budget <= 0)) return 0; - while ((skb = __ptr_ring_peek(&queue->ring)) != NULL && + while ((skb = wg_prev_queue_peek(&peer->rx_queue)) != NULL && (state = atomic_read_acquire(&PACKET_CB(skb)->state)) != PACKET_STATE_UNCRYPTED) { - __ptr_ring_discard_one(&queue->ring); - peer = PACKET_PEER(skb); + wg_prev_queue_drop_peeked(&peer->rx_queue); keypair = PACKET_CB(skb)->keypair; free = true; @@ -508,7 +506,7 @@ void wg_packet_decrypt_worker(struct work_struct *work) enum packet_state state = likely(decrypt_packet(skb, PACKET_CB(skb)->keypair)) ? PACKET_STATE_CRYPTED : PACKET_STATE_DEAD; - wg_queue_enqueue_per_peer_napi(skb, state); + wg_queue_enqueue_per_peer_rx(skb, state); if (need_resched()) cond_resched(); } @@ -531,12 +529,10 @@ static void wg_packet_consume_data(struct wg_device *wg, struct sk_buff *skb) if (unlikely(READ_ONCE(peer->is_dead))) goto err; - ret = wg_queue_enqueue_per_device_and_peer(&wg->decrypt_queue, - &peer->rx_queue, skb, - wg->packet_crypt_wq, - &wg->decrypt_queue.last_cpu); + ret = wg_queue_enqueue_per_device_and_peer(&wg->decrypt_queue, &peer->rx_queue, skb, + wg->packet_crypt_wq, &wg->decrypt_queue.last_cpu); if (unlikely(ret == -EPIPE)) - wg_queue_enqueue_per_peer_napi(skb, PACKET_STATE_DEAD); + wg_queue_enqueue_per_peer_rx(skb, PACKET_STATE_DEAD); if (likely(!ret || ret == -EPIPE)) { rcu_read_unlock_bh(); return; diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c index f74b9341ab0f..5368f7c35b4b 100644 --- a/drivers/net/wireguard/send.c +++ b/drivers/net/wireguard/send.c @@ -239,8 +239,7 @@ void wg_packet_send_keepalive(struct wg_peer *peer) wg_packet_send_staged_packets(peer); } -static void wg_packet_create_data_done(struct sk_buff *first, - struct wg_peer *peer) +static void wg_packet_create_data_done(struct wg_peer *peer, struct sk_buff *first) { struct sk_buff *skb, *next; bool is_keepalive, data_sent = false; @@ -262,22 +261,19 @@ static void wg_packet_create_data_done(struct sk_buff *first, void wg_packet_tx_worker(struct work_struct *work) { - struct crypt_queue *queue = container_of(work, struct crypt_queue, - work); + struct wg_peer *peer = container_of(work, struct wg_peer, transmit_packet_work); struct noise_keypair *keypair; enum packet_state state; struct sk_buff *first; - struct wg_peer *peer; - while ((first = __ptr_ring_peek(&queue->ring)) != NULL && + while ((first = wg_prev_queue_peek(&peer->tx_queue)) != NULL && (state = atomic_read_acquire(&PACKET_CB(first)->state)) != PACKET_STATE_UNCRYPTED) { - __ptr_ring_discard_one(&queue->ring); - peer = PACKET_PEER(first); + wg_prev_queue_drop_peeked(&peer->tx_queue); keypair = PACKET_CB(first)->keypair; if (likely(state == PACKET_STATE_CRYPTED)) - wg_packet_create_data_done(first, peer); + wg_packet_create_data_done(peer, first); else kfree_skb_list(first); @@ -306,16 +302,14 @@ void wg_packet_encrypt_worker(struct work_struct *work) break; } } - wg_queue_enqueue_per_peer(&PACKET_PEER(first)->tx_queue, first, - state); + wg_queue_enqueue_per_peer_tx(first, state); if (need_resched()) cond_resched(); } } -static void wg_packet_create_data(struct sk_buff *first) +static void wg_packet_create_data(struct wg_peer *peer, struct sk_buff *first) { - struct wg_peer *peer = PACKET_PEER(first); struct wg_device *wg = peer->device; int ret = -EINVAL; @@ -323,13 +317,10 @@ static void wg_packet_create_data(struct sk_buff *first) if (unlikely(READ_ONCE(peer->is_dead))) goto err; - ret = wg_queue_enqueue_per_device_and_peer(&wg->encrypt_queue, - &peer->tx_queue, first, - wg->packet_crypt_wq, - &wg->encrypt_queue.last_cpu); + ret = wg_queue_enqueue_per_device_and_peer(&wg->encrypt_queue, &peer->tx_queue, first, + wg->packet_crypt_wq, &wg->encrypt_queue.last_cpu); if (unlikely(ret == -EPIPE)) - wg_queue_enqueue_per_peer(&peer->tx_queue, first, - PACKET_STATE_DEAD); + wg_queue_enqueue_per_peer_tx(first, PACKET_STATE_DEAD); err: rcu_read_unlock_bh(); if (likely(!ret || ret == -EPIPE)) @@ -393,7 +384,7 @@ void wg_packet_send_staged_packets(struct wg_peer *peer) packets.prev->next = NULL; wg_peer_get(keypair->entry.peer); PACKET_CB(packets.next)->keypair = keypair; - wg_packet_create_data(packets.next); + wg_packet_create_data(peer, packets.next); return; out_invalid: diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c index 410b318e57fb..d9ad850daa79 100644 --- a/drivers/net/wireguard/socket.c +++ b/drivers/net/wireguard/socket.c @@ -53,7 +53,7 @@ static int send4(struct wg_device *wg, struct sk_buff *skb, if (unlikely(!inet_confirm_addr(sock_net(sock), NULL, 0, fl.saddr, RT_SCOPE_HOST))) { endpoint->src4.s_addr = 0; - *(__force __be32 *)&endpoint->src_if4 = 0; + endpoint->src_if4 = 0; fl.saddr = 0; if (cache) dst_cache_reset(cache); @@ -63,7 +63,7 @@ static int send4(struct wg_device *wg, struct sk_buff *skb, PTR_ERR(rt) == -EINVAL) || (!IS_ERR(rt) && rt->dst.dev->ifindex != endpoint->src_if4)))) { endpoint->src4.s_addr = 0; - *(__force __be32 *)&endpoint->src_if4 = 0; + endpoint->src_if4 = 0; fl.saddr = 0; if (cache) dst_cache_reset(cache); @@ -71,7 +71,7 @@ static int send4(struct wg_device *wg, struct sk_buff *skb, ip_rt_put(rt); rt = ip_route_output_flow(sock_net(sock), &fl, sock); } - if (unlikely(IS_ERR(rt))) { + if (IS_ERR(rt)) { ret = PTR_ERR(rt); net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n", wg->dev->name, &endpoint->addr, ret); @@ -138,7 +138,7 @@ static int send6(struct wg_device *wg, struct sk_buff *skb, } dst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(sock), sock, &fl, NULL); - if (unlikely(IS_ERR(dst))) { + if (IS_ERR(dst)) { ret = PTR_ERR(dst); net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n", wg->dev->name, &endpoint->addr, ret); diff --git a/include/linux/can/can-ml.h b/include/linux/can/can-ml.h index 2f5d731ae251..8afa92d15a66 100644 --- a/include/linux/can/can-ml.h +++ b/include/linux/can/can-ml.h @@ -44,6 +44,7 @@ #include <linux/can.h> #include <linux/list.h> +#include <linux/netdevice.h> #define CAN_SFF_RCV_ARRAY_SZ (1 << CAN_SFF_ID_BITS) #define CAN_EFF_RCV_HASH_BITS 10 @@ -65,4 +66,15 @@ struct can_ml_priv { #endif }; +static inline struct can_ml_priv *can_get_ml_priv(struct net_device *dev) +{ + return netdev_get_ml_priv(dev, ML_PRIV_CAN); +} + +static inline void can_set_ml_priv(struct net_device *dev, + struct can_ml_priv *ml_priv) +{ + netdev_set_ml_priv(dev, ml_priv, ML_PRIV_CAN); +} + #endif /* CAN_ML_H */ diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h index 452d8978ffc7..9055cb380ee2 100644 --- a/include/linux/icmpv6.h +++ b/include/linux/icmpv6.h @@ -3,6 +3,7 @@ #define _LINUX_ICMPV6_H #include <linux/skbuff.h> +#include <linux/ipv6.h> #include <uapi/linux/icmpv6.h> static inline struct icmp6hdr *icmp6_hdr(const struct sk_buff *skb) @@ -15,13 +16,16 @@ static inline struct icmp6hdr *icmp6_hdr(const struct sk_buff *skb) #if IS_ENABLED(CONFIG_IPV6) typedef void ip6_icmp_send_t(struct sk_buff *skb, u8 type, u8 code, __u32 info, - const struct in6_addr *force_saddr); + const struct in6_addr *force_saddr, + const struct inet6_skb_parm *parm); void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, - const struct in6_addr *force_saddr); + const struct in6_addr *force_saddr, + const struct inet6_skb_parm *parm); #if IS_BUILTIN(CONFIG_IPV6) -static inline void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) +static inline void __icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, + const struct inet6_skb_parm *parm) { - icmp6_send(skb, type, code, info, NULL); + icmp6_send(skb, type, code, info, NULL, parm); } static inline int inet6_register_icmp_sender(ip6_icmp_send_t *fn) { @@ -34,18 +38,28 @@ static inline int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn) return 0; } #else -extern void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info); +extern void __icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, + const struct inet6_skb_parm *parm); extern int inet6_register_icmp_sender(ip6_icmp_send_t *fn); extern int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn); #endif +static inline void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) +{ + __icmpv6_send(skb, type, code, info, IP6CB(skb)); +} + int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type, unsigned int data_len); #if IS_ENABLED(CONFIG_NF_NAT) void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info); #else -#define icmpv6_ndo_send icmpv6_send +static inline void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info) +{ + struct inet6_skb_parm parm = { 0 }; + __icmpv6_send(skb_in, type, code, info, &parm); +} #endif #else diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 9d1f29f0c512..70b2ad3b9884 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -85,7 +85,6 @@ struct ipv6_params { __s32 autoconf; }; extern struct ipv6_params ipv6_defaults; -#include <linux/icmpv6.h> #include <linux/tcp.h> #include <linux/udp.h> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ddf4cfc12615..f06fbee8638e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1584,6 +1584,12 @@ enum netdev_priv_flags { #define IFF_L3MDEV_RX_HANDLER IFF_L3MDEV_RX_HANDLER #define IFF_LIVE_RENAME_OK IFF_LIVE_RENAME_OK +/* Specifies the type of the struct net_device::ml_priv pointer */ +enum netdev_ml_priv_type { + ML_PRIV_NONE, + ML_PRIV_CAN, +}; + /** * struct net_device - The DEVICE structure. * @@ -1779,6 +1785,7 @@ enum netdev_priv_flags { * @nd_net: Network namespace this network device is inside * * @ml_priv: Mid-layer private + * @ml_priv_type: Mid-layer private type * @lstats: Loopback statistics * @tstats: Tunnel statistics * @dstats: Dummy statistics @@ -2094,8 +2101,10 @@ struct net_device { possible_net_t nd_net; /* mid-layer private */ + void *ml_priv; + enum netdev_ml_priv_type ml_priv_type; + union { - void *ml_priv; struct pcpu_lstats __percpu *lstats; struct pcpu_sw_netstats __percpu *tstats; struct pcpu_dstats __percpu *dstats; @@ -2286,6 +2295,29 @@ static inline void netdev_reset_rx_headroom(struct net_device *dev) netdev_set_rx_headroom(dev, -1); } +static inline void *netdev_get_ml_priv(struct net_device *dev, + enum netdev_ml_priv_type type) +{ + if (dev->ml_priv_type != type) + return NULL; + + return dev->ml_priv; +} + +static inline void netdev_set_ml_priv(struct net_device *dev, + void *ml_priv, + enum netdev_ml_priv_type type) +{ + WARN(dev->ml_priv_type && dev->ml_priv_type != type, + "Overwriting already set ml_priv_type (%u) with different ml_priv_type (%u)!\n", + dev->ml_priv_type, type); + WARN(!dev->ml_priv_type && dev->ml_priv, + "Overwriting already set ml_priv and ml_priv_type is ML_PRIV_NONE!\n"); + + dev->ml_priv = ml_priv; + dev->ml_priv_type = type; +} + /* * Net namespace inlines */ diff --git a/include/net/icmp.h b/include/net/icmp.h index 9ac2d2672a93..fd84adc47963 100644 --- a/include/net/icmp.h +++ b/include/net/icmp.h @@ -46,7 +46,11 @@ static inline void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 #if IS_ENABLED(CONFIG_NF_NAT) void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info); #else -#define icmp_ndo_send icmp_send +static inline void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info) +{ + struct ip_options opts = { 0 }; + __icmp_send(skb_in, type, code, info, &opts); +} #endif int icmp_rcv(struct sk_buff *skb); diff --git a/net/can/af_can.c b/net/can/af_can.c index 837bb8af0ec3..cce2af10eb3e 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -304,8 +304,8 @@ static struct can_dev_rcv_lists *can_dev_rcv_lists_find(struct net *net, struct net_device *dev) { if (dev) { - struct can_ml_priv *ml_priv = dev->ml_priv; - return &ml_priv->dev_rcv_lists; + struct can_ml_priv *can_ml = can_get_ml_priv(dev); + return &can_ml->dev_rcv_lists; } else { return net->can.rx_alldev_list; } @@ -790,25 +790,6 @@ void can_proto_unregister(const struct can_proto *cp) } EXPORT_SYMBOL(can_proto_unregister); -/* af_can notifier to create/remove CAN netdevice specific structs */ -static int can_notifier(struct notifier_block *nb, unsigned long msg, - void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - - if (dev->type != ARPHRD_CAN) - return NOTIFY_DONE; - - switch (msg) { - case NETDEV_REGISTER: - WARN(!dev->ml_priv, - "No CAN mid layer private allocated, please fix your driver and use alloc_candev()!\n"); - break; - } - - return NOTIFY_DONE; -} - static int can_pernet_init(struct net *net) { spin_lock_init(&net->can.rcvlists_lock); @@ -876,11 +857,6 @@ static const struct net_proto_family can_family_ops = { .owner = THIS_MODULE, }; -/* notifier block for netdevice event */ -static struct notifier_block can_netdev_notifier __read_mostly = { - .notifier_call = can_notifier, -}; - static struct pernet_operations can_pernet_ops __read_mostly = { .init = can_pernet_init, .exit = can_pernet_exit, @@ -911,17 +887,12 @@ static __init int can_init(void) err = sock_register(&can_family_ops); if (err) goto out_sock; - err = register_netdevice_notifier(&can_netdev_notifier); - if (err) - goto out_notifier; dev_add_pack(&can_packet); dev_add_pack(&canfd_packet); return 0; -out_notifier: - sock_unregister(PF_CAN); out_sock: unregister_pernet_subsys(&can_pernet_ops); out_pernet: @@ -935,7 +906,6 @@ static __exit void can_exit(void) /* protocol unregister */ dev_remove_pack(&canfd_packet); dev_remove_pack(&can_packet); - unregister_netdevice_notifier(&can_netdev_notifier); sock_unregister(PF_CAN); unregister_pernet_subsys(&can_pernet_ops); diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c index bb914d8b4216..da3a7a7bcff2 100644 --- a/net/can/j1939/main.c +++ b/net/can/j1939/main.c @@ -140,9 +140,9 @@ static struct j1939_priv *j1939_priv_create(struct net_device *ndev) static inline void j1939_priv_set(struct net_device *ndev, struct j1939_priv *priv) { - struct can_ml_priv *can_ml_priv = ndev->ml_priv; + struct can_ml_priv *can_ml = can_get_ml_priv(ndev); - can_ml_priv->j1939_priv = priv; + can_ml->j1939_priv = priv; } static void __j1939_priv_release(struct kref *kref) @@ -211,12 +211,9 @@ static void __j1939_rx_release(struct kref *kref) /* get pointer to priv without increasing ref counter */ static inline struct j1939_priv *j1939_ndev_to_priv(struct net_device *ndev) { - struct can_ml_priv *can_ml_priv = ndev->ml_priv; + struct can_ml_priv *can_ml = can_get_ml_priv(ndev); - if (!can_ml_priv) - return NULL; - - return can_ml_priv->j1939_priv; + return can_ml->j1939_priv; } static struct j1939_priv *j1939_priv_get_by_ndev_locked(struct net_device *ndev) @@ -225,9 +222,6 @@ static struct j1939_priv *j1939_priv_get_by_ndev_locked(struct net_device *ndev) lockdep_assert_held(&j1939_netdev_lock); - if (ndev->type != ARPHRD_CAN) - return NULL; - priv = j1939_ndev_to_priv(ndev); if (priv) j1939_priv_get(priv); @@ -348,15 +342,16 @@ static int j1939_netdev_notify(struct notifier_block *nb, unsigned long msg, void *data) { struct net_device *ndev = netdev_notifier_info_to_dev(data); + struct can_ml_priv *can_ml = can_get_ml_priv(ndev); struct j1939_priv *priv; + if (!can_ml) + goto notify_done; + priv = j1939_priv_get_by_ndev(ndev); if (!priv) goto notify_done; - if (ndev->type != ARPHRD_CAN) - goto notify_put; - switch (msg) { case NETDEV_DOWN: j1939_cancel_active_session(priv, NULL); @@ -365,7 +360,6 @@ static int j1939_netdev_notify(struct notifier_block *nb, break; } -notify_put: j1939_priv_put(priv); notify_done: diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index f23966526a88..56aa66147d5a 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -12,6 +12,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/can/can-ml.h> #include <linux/can/core.h> #include <linux/can/skb.h> #include <linux/errqueue.h> @@ -453,6 +454,7 @@ static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len) j1939_jsk_del(priv, jsk); j1939_local_ecu_put(priv, jsk->addr.src_name, jsk->addr.sa); } else { + struct can_ml_priv *can_ml; struct net_device *ndev; ndev = dev_get_by_index(net, addr->can_ifindex); @@ -461,15 +463,8 @@ static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len) goto out_release_sock; } - if (ndev->type != ARPHRD_CAN) { - dev_put(ndev); - ret = -ENODEV; - goto out_release_sock; - } - - if (!ndev->ml_priv) { - netdev_warn_once(ndev, - "No CAN mid layer private allocated, please fix your driver and use alloc_candev()!\n"); + can_ml = can_get_ml_priv(ndev); + if (!can_ml) { dev_put(ndev); ret = -ENODEV; goto out_release_sock; diff --git a/net/can/proc.c b/net/can/proc.c index 5ea8695f507e..b15760b5c1cc 100644 --- a/net/can/proc.c +++ b/net/can/proc.c @@ -322,8 +322,11 @@ static int can_rcvlist_proc_show(struct seq_file *m, void *v) /* receive list for registered CAN devices */ for_each_netdev_rcu(net, dev) { - if (dev->type == ARPHRD_CAN && dev->ml_priv) - can_rcvlist_proc_show_one(m, idx, dev, dev->ml_priv); + struct can_ml_priv *can_ml = can_get_ml_priv(dev); + + if (can_ml) + can_rcvlist_proc_show_one(m, idx, dev, + &can_ml->dev_rcv_lists); } rcu_read_unlock(); @@ -375,8 +378,10 @@ static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v) /* sff receive list for registered CAN devices */ for_each_netdev_rcu(net, dev) { - if (dev->type == ARPHRD_CAN && dev->ml_priv) { - dev_rcv_lists = dev->ml_priv; + struct can_ml_priv *can_ml = can_get_ml_priv(dev); + + if (can_ml) { + dev_rcv_lists = &can_ml->dev_rcv_lists; can_rcvlist_proc_show_array(m, dev, dev_rcv_lists->rx_sff, ARRAY_SIZE(dev_rcv_lists->rx_sff)); } @@ -406,8 +411,10 @@ static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v) /* eff receive list for registered CAN devices */ for_each_netdev_rcu(net, dev) { - if (dev->type == ARPHRD_CAN && dev->ml_priv) { - dev_rcv_lists = dev->ml_priv; + struct can_ml_priv *can_ml = can_get_ml_priv(dev); + + if (can_ml) { + dev_rcv_lists = &can_ml->dev_rcv_lists; can_rcvlist_proc_show_array(m, dev, dev_rcv_lists->rx_eff, ARRAY_SIZE(dev_rcv_lists->rx_eff)); } diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index a45572cfb71a..3589224c8da9 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -9,6 +9,7 @@ menuconfig NET_DSA tristate "Distributed Switch Architecture" depends on HAVE_NET_DSA depends on BRIDGE || BRIDGE=n + depends on HSR || HSR=n select GRO_CELLS select NET_SWITCHDEV select PHYLINK diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index f9a8cc82ae2e..bb1351c38397 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -164,8 +164,10 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr, * as initialization. (0 could trigger an spurious ring error warning). */ now = jiffies; - for (i = 0; i < HSR_PT_PORTS; i++) + for (i = 0; i < HSR_PT_PORTS; i++) { new_node->time_in[i] = now; + new_node->time_out[i] = now; + } for (i = 0; i < HSR_PT_PORTS; i++) new_node->seq_out[i] = seq_out; @@ -413,9 +415,12 @@ void hsr_register_frame_in(struct hsr_node *node, struct hsr_port *port, int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node, u16 sequence_nr) { - if (seq_nr_before_or_eq(sequence_nr, node->seq_out[port->type])) + if (seq_nr_before_or_eq(sequence_nr, node->seq_out[port->type]) && + time_is_after_jiffies(node->time_out[port->type] + + msecs_to_jiffies(HSR_ENTRY_FORGET_TIME))) return 1; + node->time_out[port->type] = jiffies; node->seq_out[port->type] = sequence_nr; return 0; } diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h index 86b43f539f2c..d9628e7a5f05 100644 --- a/net/hsr/hsr_framereg.h +++ b/net/hsr/hsr_framereg.h @@ -75,6 +75,7 @@ struct hsr_node { enum hsr_port_type addr_B_port; unsigned long time_in[HSR_PT_PORTS]; bool time_in_stale[HSR_PT_PORTS]; + unsigned long time_out[HSR_PT_PORTS]; /* if the node is a SAN */ bool san_a; bool san_b; diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index a169808ee78a..8f264672b70b 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -22,6 +22,7 @@ #define HSR_LIFE_CHECK_INTERVAL 2000 /* ms */ #define HSR_NODE_FORGET_TIME 60000 /* ms */ #define HSR_ANNOUNCE_INTERVAL 100 /* ms */ +#define HSR_ENTRY_FORGET_TIME 400 /* ms */ /* By how much may slave1 and slave2 timestamps of latest received frame from * each node differ before we notify of communication problem? diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 396b492c804f..616e2dc1c8fa 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -775,13 +775,14 @@ EXPORT_SYMBOL(__icmp_send); void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info) { struct sk_buff *cloned_skb = NULL; + struct ip_options opts = { 0 }; enum ip_conntrack_info ctinfo; struct nf_conn *ct; __be32 orig_ip; ct = nf_ct_get(skb_in, &ctinfo); if (!ct || !(ct->status & IPS_SRC_NAT)) { - icmp_send(skb_in, type, code, info); + __icmp_send(skb_in, type, code, info, &opts); return; } @@ -796,7 +797,7 @@ void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info) orig_ip = ip_hdr(skb_in)->saddr; ip_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.ip; - icmp_send(skb_in, type, code, info); + __icmp_send(skb_in, type, code, info, &opts); ip_hdr(skb_in)->saddr = orig_ip; out: consume_skb(cloned_skb); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index f3d05866692e..fd1f896115c1 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -331,10 +331,9 @@ static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, st } #if IS_ENABLED(CONFIG_IPV6_MIP6) -static void mip6_addr_swap(struct sk_buff *skb) +static void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) { struct ipv6hdr *iph = ipv6_hdr(skb); - struct inet6_skb_parm *opt = IP6CB(skb); struct ipv6_destopt_hao *hao; struct in6_addr tmp; int off; @@ -351,7 +350,7 @@ static void mip6_addr_swap(struct sk_buff *skb) } } #else -static inline void mip6_addr_swap(struct sk_buff *skb) {} +static inline void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) {} #endif static struct dst_entry *icmpv6_route_lookup(struct net *net, @@ -446,7 +445,8 @@ static int icmp6_iif(const struct sk_buff *skb) * Send an ICMP message in response to a packet in error */ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, - const struct in6_addr *force_saddr) + const struct in6_addr *force_saddr, + const struct inet6_skb_parm *parm) { struct inet6_dev *idev = NULL; struct ipv6hdr *hdr = ipv6_hdr(skb); @@ -542,7 +542,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type)) goto out_bh_enable; - mip6_addr_swap(skb); + mip6_addr_swap(skb, parm); sk = icmpv6_xmit_lock(net); if (!sk) @@ -559,7 +559,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, /* select a more meaningful saddr from input if */ struct net_device *in_netdev; - in_netdev = dev_get_by_index(net, IP6CB(skb)->iif); + in_netdev = dev_get_by_index(net, parm->iif); if (in_netdev) { ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr, inet6_sk(sk)->srcprefs, @@ -640,7 +640,7 @@ EXPORT_SYMBOL(icmp6_send); */ void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos) { - icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL); + icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL, IP6CB(skb)); kfree_skb(skb); } @@ -697,10 +697,10 @@ int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type, } if (type == ICMP_TIME_EXCEEDED) icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, - info, &temp_saddr); + info, &temp_saddr, IP6CB(skb2)); else icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, - info, &temp_saddr); + info, &temp_saddr, IP6CB(skb2)); if (rt) ip6_rt_put(rt); diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c index 70c8c2f36c98..9e3574880cb0 100644 --- a/net/ipv6/ip6_icmp.c +++ b/net/ipv6/ip6_icmp.c @@ -33,23 +33,25 @@ int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn) } EXPORT_SYMBOL(inet6_unregister_icmp_sender); -void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) +void __icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, + const struct inet6_skb_parm *parm) { ip6_icmp_send_t *send; rcu_read_lock(); send = rcu_dereference(ip6_icmp_send); if (send) - send(skb, type, code, info, NULL); + send(skb, type, code, info, NULL, parm); rcu_read_unlock(); } -EXPORT_SYMBOL(icmpv6_send); +EXPORT_SYMBOL(__icmpv6_send); #endif #if IS_ENABLED(CONFIG_NF_NAT) #include <net/netfilter/nf_conntrack.h> void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info) { + struct inet6_skb_parm parm = { 0 }; struct sk_buff *cloned_skb = NULL; enum ip_conntrack_info ctinfo; struct in6_addr orig_ip; @@ -57,7 +59,7 @@ void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info) ct = nf_ct_get(skb_in, &ctinfo); if (!ct || !(ct->status & IPS_SRC_NAT)) { - icmpv6_send(skb_in, type, code, info); + __icmpv6_send(skb_in, type, code, info, &parm); return; } @@ -72,7 +74,7 @@ void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info) orig_ip = ipv6_hdr(skb_in)->saddr; ipv6_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.in6; - icmpv6_send(skb_in, type, code, info); + __icmpv6_send(skb_in, type, code, info, &parm); ipv6_hdr(skb_in)->saddr = orig_ip; out: consume_skb(cloned_skb); diff --git a/net/mptcp/options.c b/net/mptcp/options.c index b63574d6b812..444a38681e93 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -411,6 +411,7 @@ static void clear_3rdack_retransmission(struct sock *sk) } static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, + bool snd_data_fin_enable, unsigned int *size, unsigned int remaining, struct mptcp_out_options *opts) @@ -428,9 +429,10 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, if (!skb) return false; - /* MPC/MPJ needed only on 3rd ack packet */ - if (subflow->fully_established || - subflow->snd_isn != TCP_SKB_CB(skb)->seq) + /* MPC/MPJ needed only on 3rd ack packet, DATA_FIN and TCP shutdown take precedence */ + if (subflow->fully_established || snd_data_fin_enable || + subflow->snd_isn != TCP_SKB_CB(skb)->seq || + sk->sk_state != TCP_ESTABLISHED) return false; if (subflow->mp_capable) { @@ -502,20 +504,20 @@ static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow, } static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, + bool snd_data_fin_enable, unsigned int *size, unsigned int remaining, struct mptcp_out_options *opts) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); - u64 snd_data_fin_enable, ack_seq; unsigned int dss_size = 0; struct mptcp_ext *mpext; unsigned int ack_size; bool ret = false; + u64 ack_seq; mpext = skb ? mptcp_get_ext(skb) : NULL; - snd_data_fin_enable = mptcp_data_fin_enabled(msk); if (!skb || (mpext && mpext->use_map) || snd_data_fin_enable) { unsigned int map_size; @@ -717,12 +719,15 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, unsigned int *size, unsigned int remaining, struct mptcp_out_options *opts) { + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + struct mptcp_sock *msk = mptcp_sk(subflow->conn); unsigned int opt_size = 0; + bool snd_data_fin; bool ret = false; opts->suboptions = 0; - if (unlikely(mptcp_check_fallback(sk))) + if (unlikely(__mptcp_check_fallback(msk))) return false; /* prevent adding of any MPTCP related options on reset packet @@ -731,10 +736,10 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) return false; - if (mptcp_established_options_mp(sk, skb, &opt_size, remaining, opts)) + snd_data_fin = mptcp_data_fin_enabled(msk); + if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, remaining, opts)) ret = true; - else if (mptcp_established_options_dss(sk, skb, &opt_size, remaining, - opts)) + else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts)) ret = true; /* we reserved enough space for the above options, and exceeding the diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index a57f3eab7b6a..c5d5e68940ea 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -11,6 +11,7 @@ #include <linux/netdevice.h> #include <linux/sched/signal.h> #include <linux/atomic.h> +#include <linux/igmp.h> #include <net/sock.h> #include <net/inet_common.h> #include <net/inet_hashtables.h> @@ -19,6 +20,7 @@ #include <net/tcp_states.h> #if IS_ENABLED(CONFIG_MPTCP_IPV6) #include <net/transp_v6.h> +#include <net/addrconf.h> #endif #include <net/mptcp.h> #include <net/xfrm.h> @@ -2264,13 +2266,12 @@ static void mptcp_worker(struct work_struct *work) __mptcp_check_send_data_fin(sk); mptcp_check_data_fin(sk); - /* if the msk data is completely acked, or the socket timedout, - * there is no point in keeping around an orphaned sk + /* There is no point in keeping around an orphaned sk timedout or + * closed, but we need the msk around to reply to incoming DATA_FIN, + * even if it is orphaned and in FIN_WAIT2 state */ if (sock_flag(sk, SOCK_DEAD) && - (mptcp_check_close_timeout(sk) || - (state != sk->sk_state && - ((1 << inet_sk_state_load(sk)) & (TCPF_CLOSE | TCPF_FIN_WAIT2))))) { + (mptcp_check_close_timeout(sk) || sk->sk_state == TCP_CLOSE)) { inet_sk_state_store(sk, TCP_CLOSE); __mptcp_destroy_sock(sk); goto unlock; @@ -3375,10 +3376,34 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, return mask; } +static int mptcp_release(struct socket *sock) +{ + struct mptcp_subflow_context *subflow; + struct sock *sk = sock->sk; + struct mptcp_sock *msk; + + if (!sk) + return 0; + + lock_sock(sk); + + msk = mptcp_sk(sk); + + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + + ip_mc_drop_socket(ssk); + } + + release_sock(sk); + + return inet_release(sock); +} + static const struct proto_ops mptcp_stream_ops = { .family = PF_INET, .owner = THIS_MODULE, - .release = inet_release, + .release = mptcp_release, .bind = mptcp_bind, .connect = mptcp_stream_connect, .socketpair = sock_no_socketpair, @@ -3470,10 +3495,35 @@ void __init mptcp_proto_init(void) } #if IS_ENABLED(CONFIG_MPTCP_IPV6) +static int mptcp6_release(struct socket *sock) +{ + struct mptcp_subflow_context *subflow; + struct mptcp_sock *msk; + struct sock *sk = sock->sk; + + if (!sk) + return 0; + + lock_sock(sk); + + msk = mptcp_sk(sk); + + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + + ip_mc_drop_socket(ssk); + ipv6_sock_mc_close(ssk); + ipv6_sock_ac_close(ssk); + } + + release_sock(sk); + return inet6_release(sock); +} + static const struct proto_ops mptcp_v6_stream_ops = { .family = PF_INET6, .owner = THIS_MODULE, - .release = inet6_release, + .release = mptcp6_release, .bind = mptcp_bind, .connect = mptcp_stream_connect, .socketpair = sock_no_socketpair, diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 06e233410e0e..e1fbcab257e6 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1096,6 +1096,12 @@ static void subflow_data_ready(struct sock *sk) msk = mptcp_sk(parent); if (state & TCPF_LISTEN) { + /* MPJ subflow are removed from accept queue before reaching here, + * avoid stray wakeups + */ + if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue)) + return; + set_bit(MPTCP_DATA_READY, &msk->flags); parent->sk_data_ready(parent); return; diff --git a/net/psample/psample.c b/net/psample/psample.c index 33e238c965bd..482c07f2766b 100644 --- a/net/psample/psample.c +++ b/net/psample/psample.c @@ -309,10 +309,10 @@ static int psample_tunnel_meta_len(struct ip_tunnel_info *tun_info) unsigned short tun_proto = ip_tunnel_info_af(tun_info); const struct ip_tunnel_key *tun_key = &tun_info->key; int tun_opts_len = tun_info->options_len; - int sum = 0; + int sum = nla_total_size(0); /* PSAMPLE_ATTR_TUNNEL */ if (tun_key->tun_flags & TUNNEL_KEY) - sum += nla_total_size(sizeof(u64)); + sum += nla_total_size_64bit(sizeof(u64)); if (tun_info->mode & IP_TUNNEL_INFO_BRIDGE) sum += nla_total_size(0); diff --git a/net/qrtr/tun.c b/net/qrtr/tun.c index b238c40a9984..304b41fea5ab 100644 --- a/net/qrtr/tun.c +++ b/net/qrtr/tun.c @@ -31,6 +31,7 @@ static int qrtr_tun_send(struct qrtr_endpoint *ep, struct sk_buff *skb) static int qrtr_tun_open(struct inode *inode, struct file *filp) { struct qrtr_tun *tun; + int ret; tun = kzalloc(sizeof(*tun), GFP_KERNEL); if (!tun) @@ -43,7 +44,16 @@ static int qrtr_tun_open(struct inode *inode, struct file *filp) filp->private_data = tun; - return qrtr_endpoint_register(&tun->ep, QRTR_EP_NID_AUTO); + ret = qrtr_endpoint_register(&tun->ep, QRTR_EP_NID_AUTO); + if (ret) + goto out; + + return 0; + +out: + filp->private_data = NULL; + kfree(tun); + return ret; } static ssize_t qrtr_tun_read_iter(struct kiocb *iocb, struct iov_iter *to) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 2409e522c68f..d097b5c15faa 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1417,6 +1417,21 @@ static int fl_validate_ct_state(u16 state, struct nlattr *tb, return -EINVAL; } + if (state & TCA_FLOWER_KEY_CT_FLAGS_INVALID && + state & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED | + TCA_FLOWER_KEY_CT_FLAGS_INVALID)) { + NL_SET_ERR_MSG_ATTR(extack, tb, + "when inv is set, only trk may be set"); + return -EINVAL; + } + + if (state & TCA_FLOWER_KEY_CT_FLAGS_NEW && + state & TCA_FLOWER_KEY_CT_FLAGS_REPLY) { + NL_SET_ERR_MSG_ATTR(extack, tb, + "new and rpl are mutually exclusive"); + return -EINVAL; + } + return 0; } diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh index 74c69b75f6f5..7ed7cd95e58f 100755 --- a/tools/testing/selftests/wireguard/netns.sh +++ b/tools/testing/selftests/wireguard/netns.sh @@ -39,7 +39,7 @@ ip0() { pretty 0 "ip $*"; ip -n $netns0 "$@"; } ip1() { pretty 1 "ip $*"; ip -n $netns1 "$@"; } ip2() { pretty 2 "ip $*"; ip -n $netns2 "$@"; } sleep() { read -t "$1" -N 1 || true; } -waitiperf() { pretty "${1//*-}" "wait for iperf:5201 pid $2"; while [[ $(ss -N "$1" -tlpH 'sport = 5201') != *\"iperf3\",pid=$2,fd=* ]]; do sleep 0.1; done; } +waitiperf() { pretty "${1//*-}" "wait for iperf:${3:-5201} pid $2"; while [[ $(ss -N "$1" -tlpH "sport = ${3:-5201}") != *\"iperf3\",pid=$2,fd=* ]]; do sleep 0.1; done; } waitncatudp() { pretty "${1//*-}" "wait for udp:1111 pid $2"; while [[ $(ss -N "$1" -ulpH 'sport = 1111') != *\"ncat\",pid=$2,fd=* ]]; do sleep 0.1; done; } waitiface() { pretty "${1//*-}" "wait for $2 to come up"; ip netns exec "$1" bash -c "while [[ \$(< \"/sys/class/net/$2/operstate\") != up ]]; do read -t .1 -N 0 || true; done;"; } @@ -141,6 +141,19 @@ tests() { n2 iperf3 -s -1 -B fd00::2 & waitiperf $netns2 $! n1 iperf3 -Z -t 3 -b 0 -u -c fd00::2 + + # TCP over IPv4, in parallel + for max in 4 5 50; do + local pids=( ) + for ((i=0; i < max; ++i)) do + n2 iperf3 -p $(( 5200 + i )) -s -1 -B 192.168.241.2 & + pids+=( $! ); waitiperf $netns2 $! $(( 5200 + i )) + done + for ((i=0; i < max; ++i)) do + n1 iperf3 -Z -t 3 -p $(( 5200 + i )) -c 192.168.241.2 & + done + wait "${pids[@]}" + done } [[ $(ip1 link show dev wg0) =~ mtu\ ([0-9]+) ]] && orig_mtu="${BASH_REMATCH[1]}" |