diff options
98 files changed, 768 insertions, 331 deletions
diff --git a/Documentation/devicetree/bindings/net/wireless/qca,ath9k.yaml b/Documentation/devicetree/bindings/net/wireless/qca,ath9k.yaml index 8cd0adbf7021..7029cb1f38ff 100644 --- a/Documentation/devicetree/bindings/net/wireless/qca,ath9k.yaml +++ b/Documentation/devicetree/bindings/net/wireless/qca,ath9k.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Atheros ath9k wireless devices Generic Binding maintainers: - - Kalle Valo <kvalo@codeaurora.org> + - Toke Høiland-Jørgensen <toke@toke.dk> description: | This node provides properties for configuring the ath9k wireless device. diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml b/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml index 8c01fdba134b..a677b056f112 100644 --- a/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml +++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml @@ -9,7 +9,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Technologies ath11k wireless devices Generic Binding maintainers: - - Kalle Valo <kvalo@codeaurora.org> + - Kalle Valo <kvalo@kernel.org> description: | These are dt entries for Qualcomm Technologies, Inc. IEEE 802.11ax diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 9f41961d11d5..b3a534ed0e7c 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1085,7 +1085,7 @@ cipso_cache_enable - BOOLEAN cipso_cache_bucket_size - INTEGER The CIPSO label cache consists of a fixed size hash table with each hash bucket containing a number of cache entries. This variable limits - the number of entries in each hash bucket; the larger the value the + the number of entries in each hash bucket; the larger the value is, the more CIPSO label mappings that can be cached. When the number of entries in a given hash bucket reaches this limit adding new entries causes the oldest entry in the bucket to be removed to make room. @@ -1179,7 +1179,7 @@ ip_autobind_reuse - BOOLEAN option should only be set by experts. Default: 0 -ip_dynaddr - BOOLEAN +ip_dynaddr - INTEGER If set non-zero, enables support for dynamic addresses. If set to a non-zero value larger than 1, a kernel log message will be printed when dynamic address rewriting diff --git a/MAINTAINERS b/MAINTAINERS index f679152bdbad..08b9ef368709 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1038,6 +1038,7 @@ F: arch/arm64/boot/dts/amd/ AMD XGBE DRIVER M: Tom Lendacky <thomas.lendacky@amd.com> +M: "Shyam Sundar S K" <Shyam-sundar.S-k@amd.com> L: netdev@vger.kernel.org S: Supported F: arch/arm64/boot/dts/amd/amd-seattle-xgbe*.dtsi diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c index 831833911a52..8647125d60ae 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c @@ -379,7 +379,7 @@ static void aq_pci_shutdown(struct pci_dev *pdev) } } -static int aq_suspend_common(struct device *dev, bool deep) +static int aq_suspend_common(struct device *dev) { struct aq_nic_s *nic = pci_get_drvdata(to_pci_dev(dev)); @@ -392,17 +392,15 @@ static int aq_suspend_common(struct device *dev, bool deep) if (netif_running(nic->ndev)) aq_nic_stop(nic); - if (deep) { - aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol); - aq_nic_set_power(nic); - } + aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol); + aq_nic_set_power(nic); rtnl_unlock(); return 0; } -static int atl_resume_common(struct device *dev, bool deep) +static int atl_resume_common(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); struct aq_nic_s *nic; @@ -415,11 +413,6 @@ static int atl_resume_common(struct device *dev, bool deep) pci_set_power_state(pdev, PCI_D0); pci_restore_state(pdev); - if (deep) { - /* Reinitialize Nic/Vecs objects */ - aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol); - } - if (netif_running(nic->ndev)) { ret = aq_nic_init(nic); if (ret) @@ -444,22 +437,22 @@ err_exit: static int aq_pm_freeze(struct device *dev) { - return aq_suspend_common(dev, true); + return aq_suspend_common(dev); } static int aq_pm_suspend_poweroff(struct device *dev) { - return aq_suspend_common(dev, true); + return aq_suspend_common(dev); } static int aq_pm_thaw(struct device *dev) { - return atl_resume_common(dev, true); + return atl_resume_common(dev); } static int aq_pm_resume_restore(struct device *dev) { - return atl_resume_common(dev, true); + return atl_resume_common(dev); } static const struct dev_pm_ops aq_pm_ops = { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 56b46b8206a7..cf9b00576ed3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7790,7 +7790,7 @@ hwrm_dbg_qcaps_exit: static int bnxt_hwrm_queue_qportcfg(struct bnxt *bp); -static int bnxt_hwrm_func_qcaps(struct bnxt *bp) +int bnxt_hwrm_func_qcaps(struct bnxt *bp) { int rc; @@ -10065,7 +10065,8 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up) if (flags & FUNC_DRV_IF_CHANGE_RESP_FLAGS_RESC_CHANGE) resc_reinit = true; - if (flags & FUNC_DRV_IF_CHANGE_RESP_FLAGS_HOT_FW_RESET_DONE) + if (flags & FUNC_DRV_IF_CHANGE_RESP_FLAGS_HOT_FW_RESET_DONE || + test_bit(BNXT_STATE_FW_RESET_DET, &bp->state)) fw_reset = true; else bnxt_remap_fw_health_regs(bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index a1dca8c58f54..075c6206325c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2314,6 +2314,7 @@ int bnxt_cancel_reservations(struct bnxt *bp, bool fw_reset); int bnxt_hwrm_alloc_wol_fltr(struct bnxt *bp); int bnxt_hwrm_free_wol_fltr(struct bnxt *bp); int bnxt_hwrm_func_resc_qcaps(struct bnxt *bp, bool all); +int bnxt_hwrm_func_qcaps(struct bnxt *bp); int bnxt_hwrm_fw_set_time(struct bnxt *); int bnxt_open_nic(struct bnxt *, bool, bool); int bnxt_half_open_nic(struct bnxt *bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 3528ce9849e6..6b3d4f4c2a75 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -979,9 +979,11 @@ static int bnxt_dl_info_get(struct devlink *dl, struct devlink_info_req *req, if (rc) return rc; - rc = bnxt_dl_livepatch_info_put(bp, req, BNXT_FW_SRT_PATCH); - if (rc) - return rc; + if (BNXT_CHIP_P5(bp)) { + rc = bnxt_dl_livepatch_info_put(bp, req, BNXT_FW_SRT_PATCH); + if (rc) + return rc; + } return bnxt_dl_livepatch_info_put(bp, req, BNXT_FW_CRT_PATCH); } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index 562f8f68a47d..7f3c0875b6f5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -76,14 +76,23 @@ static int bnxt_refclk_read(struct bnxt *bp, struct ptp_system_timestamp *sts, u64 *ns) { struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; + u32 high_before, high_now, low; if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) return -EIO; + high_before = readl(bp->bar0 + ptp->refclk_mapped_regs[1]); ptp_read_system_prets(sts); - *ns = readl(bp->bar0 + ptp->refclk_mapped_regs[0]); + low = readl(bp->bar0 + ptp->refclk_mapped_regs[0]); ptp_read_system_postts(sts); - *ns |= (u64)readl(bp->bar0 + ptp->refclk_mapped_regs[1]) << 32; + high_now = readl(bp->bar0 + ptp->refclk_mapped_regs[1]); + if (high_now != high_before) { + ptp_read_system_prets(sts); + low = readl(bp->bar0 + ptp->refclk_mapped_regs[0]); + ptp_read_system_postts(sts); + } + *ns = ((u64)high_now << 32) | low; + return 0; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c index ddf2f3963abe..a1a2c7a64fd5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c @@ -823,8 +823,10 @@ static int bnxt_sriov_enable(struct bnxt *bp, int *num_vfs) goto err_out2; rc = pci_enable_sriov(bp->pdev, *num_vfs); - if (rc) + if (rc) { + bnxt_ulp_sriov_cfg(bp, 0); goto err_out2; + } return 0; @@ -832,6 +834,9 @@ err_out2: /* Free the resources reserved for various VF's */ bnxt_hwrm_func_vf_resource_free(bp, *num_vfs); + /* Restore the max resources */ + bnxt_hwrm_func_qcaps(bp); + err_out1: bnxt_free_vf_resources(bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index f02fe906dedb..f53387ed0167 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -28,7 +28,7 @@ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp, struct xdp_buff *xdp) { struct skb_shared_info *sinfo; - struct bnxt_sw_tx_bd *tx_buf, *first_buf; + struct bnxt_sw_tx_bd *tx_buf; struct tx_bd *txbd; int num_frags = 0; u32 flags; @@ -43,13 +43,14 @@ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp, /* fill up the first buffer */ prod = txr->tx_prod; tx_buf = &txr->tx_buf_ring[prod]; - first_buf = tx_buf; tx_buf->nr_frags = num_frags; if (xdp) tx_buf->page = virt_to_head_page(xdp->data); txbd = &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)]; - flags = ((len) << TX_BD_LEN_SHIFT) | ((num_frags + 1) << TX_BD_FLAGS_BD_CNT_SHIFT); + flags = (len << TX_BD_LEN_SHIFT) | + ((num_frags + 1) << TX_BD_FLAGS_BD_CNT_SHIFT) | + bnxt_lhint_arr[len >> 9]; txbd->tx_bd_len_flags_type = cpu_to_le32(flags); txbd->tx_bd_opaque = prod; txbd->tx_bd_haddr = cpu_to_le64(mapping); @@ -82,7 +83,6 @@ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp, flags = frag_len << TX_BD_LEN_SHIFT; txbd->tx_bd_len_flags_type = cpu_to_le32(flags); - txbd->tx_bd_opaque = prod; txbd->tx_bd_haddr = cpu_to_le64(frag_mapping); len = frag_len; @@ -96,7 +96,7 @@ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp, prod = NEXT_TX(prod); txr->tx_prod = prod; - return first_buf; + return tx_buf; } static void __bnxt_xmit_xdp(struct bnxt *bp, struct bnxt_tx_ring_info *txr, diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index 4af5561cbfc5..7c760aa65540 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -1392,7 +1392,7 @@ static void chtls_pass_accept_request(struct sock *sk, th_ecn = tcph->ece && tcph->cwr; if (th_ecn) { ect = !INET_ECN_is_not_ect(ip_dsfield); - ecn_ok = sock_net(sk)->ipv4.sysctl_tcp_ecn; + ecn_ok = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn); if ((!ect && ecn_ok) || tcp_ca_needs_ecn(sk)) inet_rsk(oreq)->ecn_ok = 1; } diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 5231818943c6..c03663785a8d 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1764,6 +1764,19 @@ cleanup_clk: return rc; } +static bool ftgmac100_has_child_node(struct device_node *np, const char *name) +{ + struct device_node *child_np = of_get_child_by_name(np, name); + bool ret = false; + + if (child_np) { + ret = true; + of_node_put(child_np); + } + + return ret; +} + static int ftgmac100_probe(struct platform_device *pdev) { struct resource *res; @@ -1883,7 +1896,7 @@ static int ftgmac100_probe(struct platform_device *pdev) /* Display what we found */ phy_attached_info(phy); - } else if (np && !of_get_child_by_name(np, "mdio")) { + } else if (np && !ftgmac100_has_child_node(np, "mdio")) { /* Support legacy ASPEED devicetree descriptions that decribe a * MAC with an embedded MDIO controller but have no "mdio" * child node. Automatically scan the MDIO bus for available diff --git a/drivers/net/ethernet/intel/ice/ice_devids.h b/drivers/net/ethernet/intel/ice/ice_devids.h index 61dd2f18dee8..b41bc3dc1745 100644 --- a/drivers/net/ethernet/intel/ice/ice_devids.h +++ b/drivers/net/ethernet/intel/ice/ice_devids.h @@ -5,6 +5,7 @@ #define _ICE_DEVIDS_H_ /* Device IDs */ +#define ICE_DEV_ID_E822_SI_DFLT 0x1888 /* Intel(R) Ethernet Connection E823-L for backplane */ #define ICE_DEV_ID_E823L_BACKPLANE 0x124C /* Intel(R) Ethernet Connection E823-L for SFP */ diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c index 3991d62473bf..3337314a7b35 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.c +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c @@ -814,6 +814,8 @@ void ice_devlink_destroy_vf_port(struct ice_vf *vf) devlink_port_unregister(devlink_port); } +#define ICE_DEVLINK_READ_BLK_SIZE (1024 * 1024) + /** * ice_devlink_nvm_snapshot - Capture a snapshot of the NVM flash contents * @devlink: the devlink instance @@ -840,8 +842,9 @@ static int ice_devlink_nvm_snapshot(struct devlink *devlink, struct ice_pf *pf = devlink_priv(devlink); struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; - void *nvm_data; - u32 nvm_size; + u8 *nvm_data, *tmp, i; + u32 nvm_size, left; + s8 num_blks; int status; nvm_size = hw->flash.flash_size; @@ -849,26 +852,44 @@ static int ice_devlink_nvm_snapshot(struct devlink *devlink, if (!nvm_data) return -ENOMEM; - status = ice_acquire_nvm(hw, ICE_RES_READ); - if (status) { - dev_dbg(dev, "ice_acquire_nvm failed, err %d aq_err %d\n", - status, hw->adminq.sq_last_status); - NL_SET_ERR_MSG_MOD(extack, "Failed to acquire NVM semaphore"); - vfree(nvm_data); - return status; - } - status = ice_read_flat_nvm(hw, 0, &nvm_size, nvm_data, false); - if (status) { - dev_dbg(dev, "ice_read_flat_nvm failed after reading %u bytes, err %d aq_err %d\n", - nvm_size, status, hw->adminq.sq_last_status); - NL_SET_ERR_MSG_MOD(extack, "Failed to read NVM contents"); + num_blks = DIV_ROUND_UP(nvm_size, ICE_DEVLINK_READ_BLK_SIZE); + tmp = nvm_data; + left = nvm_size; + + /* Some systems take longer to read the NVM than others which causes the + * FW to reclaim the NVM lock before the entire NVM has been read. Fix + * this by breaking the reads of the NVM into smaller chunks that will + * probably not take as long. This has some overhead since we are + * increasing the number of AQ commands, but it should always work + */ + for (i = 0; i < num_blks; i++) { + u32 read_sz = min_t(u32, ICE_DEVLINK_READ_BLK_SIZE, left); + + status = ice_acquire_nvm(hw, ICE_RES_READ); + if (status) { + dev_dbg(dev, "ice_acquire_nvm failed, err %d aq_err %d\n", + status, hw->adminq.sq_last_status); + NL_SET_ERR_MSG_MOD(extack, "Failed to acquire NVM semaphore"); + vfree(nvm_data); + return -EIO; + } + + status = ice_read_flat_nvm(hw, i * ICE_DEVLINK_READ_BLK_SIZE, + &read_sz, tmp, false); + if (status) { + dev_dbg(dev, "ice_read_flat_nvm failed after reading %u bytes, err %d aq_err %d\n", + read_sz, status, hw->adminq.sq_last_status); + NL_SET_ERR_MSG_MOD(extack, "Failed to read NVM contents"); + ice_release_nvm(hw); + vfree(nvm_data); + return -EIO; + } ice_release_nvm(hw); - vfree(nvm_data); - return status; - } - ice_release_nvm(hw); + tmp += read_sz; + left -= read_sz; + } *data = nvm_data; diff --git a/drivers/net/ethernet/intel/ice/ice_fw_update.c b/drivers/net/ethernet/intel/ice/ice_fw_update.c index 665a344fb9c0..3dc5662d62a6 100644 --- a/drivers/net/ethernet/intel/ice/ice_fw_update.c +++ b/drivers/net/ethernet/intel/ice/ice_fw_update.c @@ -736,7 +736,87 @@ static int ice_finalize_update(struct pldmfw *context) return 0; } -static const struct pldmfw_ops ice_fwu_ops = { +struct ice_pldm_pci_record_id { + u32 vendor; + u32 device; + u32 subsystem_vendor; + u32 subsystem_device; +}; + +/** + * ice_op_pci_match_record - Check if a PCI device matches the record + * @context: PLDM fw update structure + * @record: list of records extracted from the PLDM image + * + * Determine if the PCI device associated with this device matches the record + * data provided. + * + * Searches the descriptor TLVs and extracts the relevant descriptor data into + * a pldm_pci_record_id. This is then compared against the PCI device ID + * information. + * + * Returns: true if the device matches the record, false otherwise. + */ +static bool +ice_op_pci_match_record(struct pldmfw *context, struct pldmfw_record *record) +{ + struct pci_dev *pdev = to_pci_dev(context->dev); + struct ice_pldm_pci_record_id id = { + .vendor = PCI_ANY_ID, + .device = PCI_ANY_ID, + .subsystem_vendor = PCI_ANY_ID, + .subsystem_device = PCI_ANY_ID, + }; + struct pldmfw_desc_tlv *desc; + + list_for_each_entry(desc, &record->descs, entry) { + u16 value; + int *ptr; + + switch (desc->type) { + case PLDM_DESC_ID_PCI_VENDOR_ID: + ptr = &id.vendor; + break; + case PLDM_DESC_ID_PCI_DEVICE_ID: + ptr = &id.device; + break; + case PLDM_DESC_ID_PCI_SUBVENDOR_ID: + ptr = &id.subsystem_vendor; + break; + case PLDM_DESC_ID_PCI_SUBDEV_ID: + ptr = &id.subsystem_device; + break; + default: + /* Skip unrelated TLVs */ + continue; + } + + value = get_unaligned_le16(desc->data); + /* A value of zero for one of the descriptors is sometimes + * used when the record should ignore this field when matching + * device. For example if the record applies to any subsystem + * device or vendor. + */ + if (value) + *ptr = value; + else + *ptr = PCI_ANY_ID; + } + + /* the E822 device can have a generic device ID so check for that */ + if ((id.vendor == PCI_ANY_ID || id.vendor == pdev->vendor) && + (id.device == PCI_ANY_ID || id.device == pdev->device || + id.device == ICE_DEV_ID_E822_SI_DFLT) && + (id.subsystem_vendor == PCI_ANY_ID || + id.subsystem_vendor == pdev->subsystem_vendor) && + (id.subsystem_device == PCI_ANY_ID || + id.subsystem_device == pdev->subsystem_device)) + return true; + + return false; +} + +static const struct pldmfw_ops ice_fwu_ops_e810 = { .match_record = &pldmfw_op_pci_match_record, .send_package_data = &ice_send_package_data, .send_component_table = &ice_send_component_table, @@ -744,6 +824,14 @@ static const struct pldmfw_ops ice_fwu_ops = { .finalize_update = &ice_finalize_update, }; +static const struct pldmfw_ops ice_fwu_ops_e822 = { + .match_record = &ice_op_pci_match_record, + .send_package_data = &ice_send_package_data, + .send_component_table = &ice_send_component_table, + .flash_component = &ice_flash_component, + .finalize_update = &ice_finalize_update, +}; + /** * ice_get_pending_updates - Check if the component has a pending update * @pf: the PF driver structure @@ -921,7 +1009,11 @@ int ice_devlink_flash_update(struct devlink *devlink, memset(&priv, 0, sizeof(priv)); - priv.context.ops = &ice_fwu_ops; + /* the E822 device needs a slightly different ops */ + if (hw->mac_type == ICE_MAC_GENERIC) + priv.context.ops = &ice_fwu_ops_e822; + else + priv.context.ops = &ice_fwu_ops_e810; priv.context.dev = dev; priv.extack = extack; priv.pf = pf; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index c1ac2f746714..ff2eac2f8c64 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5413,6 +5413,7 @@ static const struct pci_device_id ice_pci_tbl[] = { { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_10G_BASE_T), 0 }, { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_1GBE), 0 }, { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_QSFP), 0 }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822_SI_DFLT), 0 }, /* required last entry */ { 0, } }; diff --git a/drivers/net/ethernet/marvell/prestera/prestera_router.c b/drivers/net/ethernet/marvell/prestera/prestera_router.c index 3754d8aec76d..3c8116f16b4d 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_router.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_router.c @@ -588,6 +588,7 @@ err_router_lib_init: void prestera_router_fini(struct prestera_switch *sw) { + unregister_fib_notifier(&init_net, &sw->router->fib_nb); unregister_inetaddr_notifier(&sw->router->inetaddr_nb); unregister_inetaddr_validator_notifier(&sw->router->inetaddr_valid_nb); rhashtable_destroy(&sw->router->kern_fib_cache_ht); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 25f51f80a9b4..ba171c7f0a67 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -76,6 +76,7 @@ struct mlx5_tc_ct_priv { struct mlx5_ct_fs *fs; struct mlx5_ct_fs_ops *fs_ops; spinlock_t ht_lock; /* protects ft entries */ + struct workqueue_struct *wq; struct mlx5_tc_ct_debugfs debugfs; }; @@ -941,14 +942,11 @@ static void mlx5_tc_ct_entry_del_work(struct work_struct *work) static void __mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry) { - struct mlx5e_priv *priv; - if (!refcount_dec_and_test(&entry->refcnt)) return; - priv = netdev_priv(entry->ct_priv->netdev); INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work); - queue_work(priv->wq, &entry->work); + queue_work(entry->ct_priv->wq, &entry->work); } static struct mlx5_ct_counter * @@ -1759,19 +1757,16 @@ mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg) static void mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) { - struct mlx5e_priv *priv; - if (!refcount_dec_and_test(&ft->refcount)) return; + flush_workqueue(ct_priv->wq); nf_flow_table_offload_del_cb(ft->nf_ft, mlx5_tc_ct_block_flow_offload, ft); rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params); rhashtable_free_and_destroy(&ft->ct_entries_ht, mlx5_tc_ct_flush_ft_entry, ct_priv); - priv = netdev_priv(ct_priv->netdev); - flush_workqueue(priv->wq); mlx5_tc_ct_free_pre_ct_tables(ft); mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id); kfree(ft); @@ -2176,6 +2171,12 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, if (rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params)) goto err_ct_tuples_nat_ht; + ct_priv->wq = alloc_ordered_workqueue("mlx5e_ct_priv_wq", 0); + if (!ct_priv->wq) { + err = -ENOMEM; + goto err_wq; + } + err = mlx5_tc_ct_fs_init(ct_priv); if (err) goto err_init_fs; @@ -2184,6 +2185,8 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, return ct_priv; err_init_fs: + destroy_workqueue(ct_priv->wq); +err_wq: rhashtable_destroy(&ct_priv->ct_tuples_nat_ht); err_ct_tuples_nat_ht: rhashtable_destroy(&ct_priv->ct_tuples_ht); @@ -2213,6 +2216,7 @@ mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv) if (!ct_priv) return; + destroy_workqueue(ct_priv->wq); mlx5_ct_tc_remove_dbgfs(ct_priv); chains = ct_priv->chains; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index 0bb0633b7542..27483aa7be8a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -231,8 +231,7 @@ mlx5e_set_ktls_rx_priv_ctx(struct tls_context *tls_ctx, struct mlx5e_ktls_offload_context_rx **ctx = __tls_driver_ctx(tls_ctx, TLS_OFFLOAD_CTX_DIR_RX); - BUILD_BUG_ON(sizeof(struct mlx5e_ktls_offload_context_rx *) > - TLS_OFFLOAD_CONTEXT_SIZE_RX); + BUILD_BUG_ON(sizeof(priv_rx) > TLS_DRIVER_STATE_SIZE_RX); *ctx = priv_rx; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index 4b6f0d1ea59a..f239fb2e832f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -68,8 +68,7 @@ mlx5e_set_ktls_tx_priv_ctx(struct tls_context *tls_ctx, struct mlx5e_ktls_offload_context_tx **ctx = __tls_driver_ctx(tls_ctx, TLS_OFFLOAD_CTX_DIR_TX); - BUILD_BUG_ON(sizeof(struct mlx5e_ktls_offload_context_tx *) > - TLS_OFFLOAD_CONTEXT_SIZE_TX); + BUILD_BUG_ON(sizeof(priv_tx) > TLS_DRIVER_STATE_SIZE_TX); *ctx = priv_tx; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 57fa0489eeb8..1e87bb2b7541 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -688,7 +688,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vnic_env) u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {}; struct mlx5_core_dev *mdev = priv->mdev; - if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard)) + if (!mlx5e_stats_grp_vnic_env_num_stats(priv)) return; MLX5_SET(query_vnic_env_in, in, opcode, MLX5_CMD_OP_QUERY_VNIC_ENV); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 3a39a50146dd..9ca2c8763237 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3793,7 +3793,7 @@ static bool is_lag_dev(struct mlx5e_priv *priv, static bool is_multiport_eligible(struct mlx5e_priv *priv, struct net_device *out_dev) { - if (mlx5e_eswitch_uplink_rep(out_dev) && + if (same_hw_reps(priv, out_dev) && MLX5_CAP_PORT_SELECTION(priv->mdev, port_select_flow_table) && MLX5_CAP_GEN(priv->mdev, create_lag_when_not_master_up)) return true; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 50d14cec4894..9a7250be229f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -341,6 +341,26 @@ static void mlx5e_tx_check_stop(struct mlx5e_txqsq *sq) } } +static void mlx5e_tx_flush(struct mlx5e_txqsq *sq) +{ + struct mlx5e_tx_wqe_info *wi; + struct mlx5e_tx_wqe *wqe; + u16 pi; + + /* Must not be called when a MPWQE session is active but empty. */ + mlx5e_tx_mpwqe_ensure_complete(sq); + + pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc); + wi = &sq->db.wqe_info[pi]; + + *wi = (struct mlx5e_tx_wqe_info) { + .num_wqebbs = 1, + }; + + wqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc); + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &wqe->ctrl); +} + static inline void mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, const struct mlx5e_tx_attr *attr, @@ -459,6 +479,7 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, err_drop: stats->dropped++; dev_kfree_skb_any(skb); + mlx5e_tx_flush(sq); } static bool mlx5e_tx_skb_supports_mpwqe(struct sk_buff *skb, struct mlx5e_tx_attr *attr) @@ -560,6 +581,13 @@ mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_wqe_ctrl_seg *cseg; struct mlx5e_xmit_data txd; + txd.data = skb->data; + txd.len = skb->len; + + txd.dma_addr = dma_map_single(sq->pdev, txd.data, txd.len, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(sq->pdev, txd.dma_addr))) + goto err_unmap; + if (!mlx5e_tx_mpwqe_session_is_active(sq)) { mlx5e_tx_mpwqe_session_start(sq, eseg); } else if (!mlx5e_tx_mpwqe_same_eseg(sq, eseg)) { @@ -569,18 +597,9 @@ mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, sq->stats->xmit_more += xmit_more; - txd.data = skb->data; - txd.len = skb->len; - - txd.dma_addr = dma_map_single(sq->pdev, txd.data, txd.len, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(sq->pdev, txd.dma_addr))) - goto err_unmap; mlx5e_dma_push(sq, txd.dma_addr, txd.len, MLX5E_DMA_MAP_SINGLE); - mlx5e_skb_fifo_push(&sq->db.skb_fifo, skb); - mlx5e_tx_mpwqe_add_dseg(sq, &txd); - mlx5e_tx_skb_update_hwts_flags(skb); if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe, sq->max_sq_mpw_wqebbs))) { @@ -602,6 +621,7 @@ err_unmap: mlx5e_dma_unmap_wqe_err(sq, 1); sq->stats->dropped++; dev_kfree_skb_any(skb); + mlx5e_tx_flush(sq); } void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq) @@ -1006,5 +1026,6 @@ void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, err_drop: stats->dropped++; dev_kfree_skb_any(skb); + mlx5e_tx_flush(sq); } #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c index 9d17206d1625..fabe49a35a5c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c @@ -11,6 +11,7 @@ #include "mlx5_core.h" #include "eswitch.h" #include "fs_core.h" +#include "fs_ft_pool.h" #include "esw/qos.h" enum { @@ -95,8 +96,7 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw) if (!flow_group_in) return -ENOMEM; - table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); - ft_attr.max_fte = table_size; + ft_attr.max_fte = POOL_NEXT_SIZE; ft_attr.prio = LEGACY_FDB_PRIO; fdb = mlx5_create_flow_table(root_ns, &ft_attr); if (IS_ERR(fdb)) { @@ -105,6 +105,7 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw) goto out; } esw->fdb_table.legacy.fdb = fdb; + table_size = fdb->max_fte; /* Addresses group : Full match unicast/multicast addresses */ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c index 15e41dc84d53..b8feaf0f5c4c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c @@ -44,7 +44,7 @@ static int port_sel_mode_show(struct seq_file *file, void *priv) ldev = dev->priv.lag; mutex_lock(&ldev->lock); if (__mlx5_lag_is_active(ldev)) - mode = mlx5_get_str_port_sel_mode(ldev); + mode = mlx5_get_str_port_sel_mode(ldev->mode, ldev->mode_flags); else ret = -EINVAL; mutex_unlock(&ldev->lock); @@ -72,6 +72,7 @@ static int state_show(struct seq_file *file, void *priv) static int flags_show(struct seq_file *file, void *priv) { struct mlx5_core_dev *dev = file->private; + bool fdb_sel_mode_native; struct mlx5_lag *ldev; bool shared_fdb; bool lag_active; @@ -79,14 +80,21 @@ static int flags_show(struct seq_file *file, void *priv) ldev = dev->priv.lag; mutex_lock(&ldev->lock); lag_active = __mlx5_lag_is_active(ldev); - if (lag_active) - shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); + if (!lag_active) + goto unlock; + + shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); + fdb_sel_mode_native = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, + &ldev->mode_flags); +unlock: mutex_unlock(&ldev->lock); if (!lag_active) return -EINVAL; seq_printf(file, "%s:%s\n", "shared_fdb", shared_fdb ? "on" : "off"); + seq_printf(file, "%s:%s\n", "fdb_selection_mode", + fdb_sel_mode_native ? "native" : "affinity"); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 2a8fc547eb37..5d41e19378e0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -68,14 +68,15 @@ static int get_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags) static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 *ports, int mode, unsigned long flags) { - bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); + bool fdb_sel_mode = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, + &flags); int port_sel_mode = get_port_sel_mode(mode, flags); u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {}; void *lag_ctx; lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx); MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG); - MLX5_SET(lagc, lag_ctx, fdb_selection_mode, shared_fdb); + MLX5_SET(lagc, lag_ctx, fdb_selection_mode, fdb_sel_mode); if (port_sel_mode == MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY) { MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]); MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]); @@ -471,8 +472,13 @@ static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode, bool roce_lag = mode == MLX5_LAG_MODE_ROCE; *flags = 0; - if (shared_fdb) + if (shared_fdb) { set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags); + set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags); + } + + if (mode == MLX5_LAG_MODE_MPESW) + set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags); if (roce_lag) return mlx5_lag_set_port_sel_mode_roce(ldev, flags); @@ -481,9 +487,9 @@ static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode, return 0; } -char *mlx5_get_str_port_sel_mode(struct mlx5_lag *ldev) +char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags) { - int port_sel_mode = get_port_sel_mode(ldev->mode, ldev->mode_flags); + int port_sel_mode = get_port_sel_mode(mode, flags); switch (port_sel_mode) { case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: return "queue_affinity"; @@ -507,7 +513,7 @@ static int mlx5_create_lag(struct mlx5_lag *ldev, if (tracker) mlx5_lag_print_mapping(dev0, ldev, tracker, flags); mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n", - shared_fdb, mlx5_get_str_port_sel_mode(ldev)); + shared_fdb, mlx5_get_str_port_sel_mode(mode, flags)); err = mlx5_cmd_create_lag(dev0, ldev->v2p_map, mode, flags); if (err) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h index c81b173156d2..ce2ce8ccbd70 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h @@ -24,6 +24,7 @@ enum { enum { MLX5_LAG_MODE_FLAG_HASH_BASED, MLX5_LAG_MODE_FLAG_SHARED_FDB, + MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, }; enum mlx5_lag_mode { @@ -114,7 +115,7 @@ bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev); void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev); int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev); -char *mlx5_get_str_port_sel_mode(struct mlx5_lag *ldev); +char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags); void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports, u8 *ports, int *num_enabled); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c index ee4b25a50315..f643202b29c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c @@ -41,7 +41,6 @@ void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev) int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev = dev->priv.lag; - bool shared_fdb; int err = 0; if (!ldev) @@ -55,8 +54,8 @@ int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev) err = -EINVAL; goto out; } - shared_fdb = mlx5_shared_fdb_supported(ldev); - err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, shared_fdb); + + err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, false); if (err) mlx5_core_warn(dev, "Failed to create LAG in MPESW mode (%d)\n", err); diff --git a/drivers/net/ethernet/mscc/ocelot_fdma.c b/drivers/net/ethernet/mscc/ocelot_fdma.c index 083fddd263ec..8e3894cf5f7c 100644 --- a/drivers/net/ethernet/mscc/ocelot_fdma.c +++ b/drivers/net/ethernet/mscc/ocelot_fdma.c @@ -94,19 +94,18 @@ static void ocelot_fdma_activate_chan(struct ocelot *ocelot, dma_addr_t dma, ocelot_fdma_writel(ocelot, MSCC_FDMA_CH_ACTIVATE, BIT(chan)); } +static u32 ocelot_fdma_read_ch_safe(struct ocelot *ocelot) +{ + return ocelot_fdma_readl(ocelot, MSCC_FDMA_CH_SAFE); +} + static int ocelot_fdma_wait_chan_safe(struct ocelot *ocelot, int chan) { - unsigned long timeout; u32 safe; - timeout = jiffies + usecs_to_jiffies(OCELOT_FDMA_CH_SAFE_TIMEOUT_US); - do { - safe = ocelot_fdma_readl(ocelot, MSCC_FDMA_CH_SAFE); - if (safe & BIT(chan)) - return 0; - } while (time_after(jiffies, timeout)); - - return -ETIMEDOUT; + return readx_poll_timeout_atomic(ocelot_fdma_read_ch_safe, ocelot, safe, + safe & BIT(chan), 0, + OCELOT_FDMA_CH_SAFE_TIMEOUT_US); } static void ocelot_fdma_dcb_set_data(struct ocelot_fdma_dcb *dcb, diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index 6bf3ec448e7e..97dcf8db7ed2 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -447,7 +447,8 @@ void nfp_tun_unlink_and_update_nn_entries(struct nfp_app *app, static void nfp_tun_write_neigh(struct net_device *netdev, struct nfp_app *app, - void *flow, struct neighbour *neigh, bool is_ipv6) + void *flow, struct neighbour *neigh, bool is_ipv6, + bool override) { bool neigh_invalid = !(neigh->nud_state & NUD_VALID) || neigh->dead; size_t neigh_size = is_ipv6 ? sizeof(struct nfp_tun_neigh_v6) : @@ -546,6 +547,13 @@ nfp_tun_write_neigh(struct net_device *netdev, struct nfp_app *app, if (nn_entry->flow) list_del(&nn_entry->list_head); kfree(nn_entry); + } else if (nn_entry && !neigh_invalid && override) { + mtype = is_ipv6 ? NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6 : + NFP_FLOWER_CMSG_TYPE_TUN_NEIGH; + nfp_tun_link_predt_entries(app, nn_entry); + nfp_flower_xmit_tun_conf(app, mtype, neigh_size, + nn_entry->payload, + GFP_ATOMIC); } spin_unlock_bh(&priv->predt_lock); @@ -610,7 +618,7 @@ nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, dst_release(dst); } - nfp_tun_write_neigh(n->dev, app, &flow6, n, true); + nfp_tun_write_neigh(n->dev, app, &flow6, n, true, false); #else return NOTIFY_DONE; #endif /* CONFIG_IPV6 */ @@ -633,7 +641,7 @@ nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, ip_rt_put(rt); } - nfp_tun_write_neigh(n->dev, app, &flow4, n, false); + nfp_tun_write_neigh(n->dev, app, &flow4, n, false, false); } #else return NOTIFY_DONE; @@ -676,7 +684,7 @@ void nfp_tunnel_request_route_v4(struct nfp_app *app, struct sk_buff *skb) ip_rt_put(rt); if (!n) goto fail_rcu_unlock; - nfp_tun_write_neigh(n->dev, app, &flow, n, false); + nfp_tun_write_neigh(n->dev, app, &flow, n, false, true); neigh_release(n); rcu_read_unlock(); return; @@ -718,7 +726,7 @@ void nfp_tunnel_request_route_v6(struct nfp_app *app, struct sk_buff *skb) if (!n) goto fail_rcu_unlock; - nfp_tun_write_neigh(n->dev, app, &flow, n, true); + nfp_tun_write_neigh(n->dev, app, &flow, n, true, true); neigh_release(n); rcu_read_unlock(); return; diff --git a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c index e509d6dcba5c..805071d64a20 100644 --- a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c +++ b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c @@ -125,17 +125,18 @@ nfp_nfdk_tx_csum(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, static int nfp_nfdk_tx_maybe_close_block(struct nfp_net_tx_ring *tx_ring, - unsigned int nr_frags, struct sk_buff *skb) + struct sk_buff *skb) { unsigned int n_descs, wr_p, nop_slots; const skb_frag_t *frag, *fend; struct nfp_nfdk_tx_desc *txd; + unsigned int nr_frags; unsigned int wr_idx; int err; recount_descs: n_descs = nfp_nfdk_headlen_to_segs(skb_headlen(skb)); - + nr_frags = skb_shinfo(skb)->nr_frags; frag = skb_shinfo(skb)->frags; fend = frag + nr_frags; for (; frag < fend; frag++) @@ -281,10 +282,13 @@ netdev_tx_t nfp_nfdk_tx(struct sk_buff *skb, struct net_device *netdev) if (unlikely((int)metadata < 0)) goto err_flush; - nr_frags = skb_shinfo(skb)->nr_frags; - if (nfp_nfdk_tx_maybe_close_block(tx_ring, nr_frags, skb)) + if (nfp_nfdk_tx_maybe_close_block(tx_ring, skb)) goto err_flush; + /* nr_frags will change after skb_linearize so we get nr_frags after + * nfp_nfdk_tx_maybe_close_block function + */ + nr_frags = skb_shinfo(skb)->nr_frags; /* DMA map all */ wr_idx = D_IDX(tx_ring, tx_ring->wr_p); txd = &tx_ring->ktxds[wr_idx]; @@ -310,7 +314,16 @@ netdev_tx_t nfp_nfdk_tx(struct sk_buff *skb, struct net_device *netdev) /* FIELD_PREP() implicitly truncates to chunk */ dma_len -= 1; - dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, dma_len) | + + /* We will do our best to pass as much data as we can in descriptor + * and we need to make sure the first descriptor includes whole head + * since there is limitation in firmware side. Sometimes the value of + * dma_len bitwise and NFDK_DESC_TX_DMA_LEN_HEAD will less than + * headlen. + */ + dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, + dma_len > NFDK_DESC_TX_DMA_LEN_HEAD ? + NFDK_DESC_TX_DMA_LEN_HEAD : dma_len) | FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type); txd->dma_len_type = cpu_to_le16(dlen_type); @@ -925,7 +938,9 @@ nfp_nfdk_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring, /* FIELD_PREP() implicitly truncates to chunk */ dma_len -= 1; - dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, dma_len) | + dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, + dma_len > NFDK_DESC_TX_DMA_LEN_HEAD ? + NFDK_DESC_TX_DMA_LEN_HEAD : dma_len) | FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type); txd->dma_len_type = cpu_to_le16(dlen_type); @@ -1303,7 +1318,7 @@ nfp_nfdk_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, skb_push(skb, 4)); } - if (nfp_nfdk_tx_maybe_close_block(tx_ring, 0, skb)) + if (nfp_nfdk_tx_maybe_close_block(tx_ring, skb)) goto err_free; /* DMA map all */ @@ -1328,7 +1343,9 @@ nfp_nfdk_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, txbuf++; dma_len -= 1; - dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, dma_len) | + dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, + dma_len > NFDK_DESC_TX_DMA_LEN_HEAD ? + NFDK_DESC_TX_DMA_LEN_HEAD : dma_len) | FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type); txd->dma_len_type = cpu_to_le16(dlen_type); diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 186cb28c03bd..8b62ce21aff3 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -1932,7 +1932,10 @@ static int efx_ef10_try_update_nic_stats_vf(struct efx_nic *efx) efx_update_sw_stats(efx, stats); out: + /* releasing a DMA coherent buffer with BH disabled can panic */ + spin_unlock_bh(&efx->stats_lock); efx_nic_free_buffer(efx, &stats_buf); + spin_lock_bh(&efx->stats_lock); return rc; } diff --git a/drivers/net/ethernet/sfc/ef10_sriov.c b/drivers/net/ethernet/sfc/ef10_sriov.c index 7f5aa4a8c451..92550c7e85ce 100644 --- a/drivers/net/ethernet/sfc/ef10_sriov.c +++ b/drivers/net/ethernet/sfc/ef10_sriov.c @@ -408,8 +408,9 @@ fail1: static int efx_ef10_pci_sriov_disable(struct efx_nic *efx, bool force) { struct pci_dev *dev = efx->pci_dev; + struct efx_ef10_nic_data *nic_data = efx->nic_data; unsigned int vfs_assigned = pci_vfs_assigned(dev); - int rc = 0; + int i, rc = 0; if (vfs_assigned && !force) { netif_info(efx, drv, efx->net_dev, "VFs are assigned to guests; " @@ -417,10 +418,13 @@ static int efx_ef10_pci_sriov_disable(struct efx_nic *efx, bool force) return -EBUSY; } - if (!vfs_assigned) + if (!vfs_assigned) { + for (i = 0; i < efx->vf_count; i++) + nic_data->vf[i].pci_dev = NULL; pci_disable_sriov(dev); - else + } else { rc = -EBUSY; + } efx_ef10_sriov_free_vf_vswitching(efx); efx->vf_count = 0; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c index bc91fd867dcd..358fc26f8d1f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c @@ -361,6 +361,7 @@ bypass_clk_reset_gpio: data->fix_mac_speed = tegra_eqos_fix_speed; data->init = tegra_eqos_init; data->bsp_priv = eqos; + data->sph_disable = 1; err = tegra_eqos_init(pdev, eqos); if (err < 0) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c index 9a6d819b84ae..378b4dd826bb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c @@ -273,7 +273,8 @@ static int ingenic_mac_probe(struct platform_device *pdev) mac->tx_delay = tx_delay_ps * 1000; } else { dev_err(&pdev->dev, "Invalid TX clock delay: %dps\n", tx_delay_ps); - return -EINVAL; + ret = -EINVAL; + goto err_remove_config_dt; } } @@ -283,7 +284,8 @@ static int ingenic_mac_probe(struct platform_device *pdev) mac->rx_delay = rx_delay_ps * 1000; } else { dev_err(&pdev->dev, "Invalid RX clock delay: %dps\n", rx_delay_ps); - return -EINVAL; + ret = -EINVAL; + goto err_remove_config_dt; } } diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c index 77e5dffb558f..8594ee839628 100644 --- a/drivers/net/ethernet/sun/sunhme.c +++ b/drivers/net/ethernet/sun/sunhme.c @@ -545,43 +545,24 @@ static int try_next_permutation(struct happy_meal *hp, void __iomem *tregs) static void display_link_mode(struct happy_meal *hp, void __iomem *tregs) { - printk(KERN_INFO "%s: Link is up using ", hp->dev->name); - if (hp->tcvr_type == external) - printk("external "); - else - printk("internal "); - printk("transceiver at "); hp->sw_lpa = happy_meal_tcvr_read(hp, tregs, MII_LPA); - if (hp->sw_lpa & (LPA_100HALF | LPA_100FULL)) { - if (hp->sw_lpa & LPA_100FULL) - printk("100Mb/s, Full Duplex.\n"); - else - printk("100Mb/s, Half Duplex.\n"); - } else { - if (hp->sw_lpa & LPA_10FULL) - printk("10Mb/s, Full Duplex.\n"); - else - printk("10Mb/s, Half Duplex.\n"); - } + + netdev_info(hp->dev, + "Link is up using %s transceiver at %dMb/s, %s Duplex.\n", + hp->tcvr_type == external ? "external" : "internal", + hp->sw_lpa & (LPA_100HALF | LPA_100FULL) ? 100 : 10, + hp->sw_lpa & (LPA_100FULL | LPA_10FULL) ? "Full" : "Half"); } static void display_forced_link_mode(struct happy_meal *hp, void __iomem *tregs) { - printk(KERN_INFO "%s: Link has been forced up using ", hp->dev->name); - if (hp->tcvr_type == external) - printk("external "); - else - printk("internal "); - printk("transceiver at "); hp->sw_bmcr = happy_meal_tcvr_read(hp, tregs, MII_BMCR); - if (hp->sw_bmcr & BMCR_SPEED100) - printk("100Mb/s, "); - else - printk("10Mb/s, "); - if (hp->sw_bmcr & BMCR_FULLDPLX) - printk("Full Duplex.\n"); - else - printk("Half Duplex.\n"); + + netdev_info(hp->dev, + "Link has been forced up using %s transceiver at %dMb/s, %s Duplex.\n", + hp->tcvr_type == external ? "external" : "internal", + hp->sw_bmcr & BMCR_SPEED100 ? 100 : 10, + hp->sw_bmcr & BMCR_FULLDPLX ? "Full" : "Half"); } static int set_happy_link_modes(struct happy_meal *hp, void __iomem *tregs) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index fb92d4c1547d..f4a6b590a1e3 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -2467,7 +2467,6 @@ static int am65_cpsw_nuss_register_devlink(struct am65_cpsw_common *common) port->port_id, ret); goto dl_port_unreg; } - devlink_port_type_eth_set(dl_port, port->ndev); } devlink_register(common->devlink); return ret; @@ -2511,6 +2510,7 @@ static void am65_cpsw_unregister_devlink(struct am65_cpsw_common *common) static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common) { struct device *dev = common->dev; + struct devlink_port *dl_port; struct am65_cpsw_port *port; int ret = 0, i; @@ -2527,6 +2527,10 @@ static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common) return ret; } + ret = am65_cpsw_nuss_register_devlink(common); + if (ret) + return ret; + for (i = 0; i < common->port_num; i++) { port = &common->ports[i]; @@ -2539,25 +2543,24 @@ static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common) i, ret); goto err_cleanup_ndev; } + + dl_port = &port->devlink_port; + devlink_port_type_eth_set(dl_port, port->ndev); } ret = am65_cpsw_register_notifiers(common); if (ret) goto err_cleanup_ndev; - ret = am65_cpsw_nuss_register_devlink(common); - if (ret) - goto clean_unregister_notifiers; - /* can't auto unregister ndev using devm_add_action() due to * devres release sequence in DD core for DMA */ return 0; -clean_unregister_notifiers: - am65_cpsw_unregister_notifiers(common); + err_cleanup_ndev: am65_cpsw_nuss_cleanup_ndev(common); + am65_cpsw_unregister_devlink(common); return ret; } diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 7389d6ef8569..b082819509e1 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -2156,7 +2156,7 @@ static inline void rtl_rx_vlan_tag(struct rx_desc *desc, struct sk_buff *skb) } static int r8152_tx_csum(struct r8152 *tp, struct tx_desc *desc, - struct sk_buff *skb, u32 len, u32 transport_offset) + struct sk_buff *skb, u32 len) { u32 mss = skb_shinfo(skb)->gso_size; u32 opts1, opts2 = 0; @@ -2167,6 +2167,8 @@ static int r8152_tx_csum(struct r8152 *tp, struct tx_desc *desc, opts1 = len | TX_FS | TX_LS; if (mss) { + u32 transport_offset = (u32)skb_transport_offset(skb); + if (transport_offset > GTTCPHO_MAX) { netif_warn(tp, tx_err, tp->netdev, "Invalid transport offset 0x%x for TSO\n", @@ -2197,6 +2199,7 @@ static int r8152_tx_csum(struct r8152 *tp, struct tx_desc *desc, opts1 |= transport_offset << GTTCPHO_SHIFT; opts2 |= min(mss, MSS_MAX) << MSS_SHIFT; } else if (skb->ip_summed == CHECKSUM_PARTIAL) { + u32 transport_offset = (u32)skb_transport_offset(skb); u8 ip_protocol; if (transport_offset > TCPHO_MAX) { @@ -2260,7 +2263,6 @@ static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg) struct tx_desc *tx_desc; struct sk_buff *skb; unsigned int len; - u32 offset; skb = __skb_dequeue(&skb_head); if (!skb) @@ -2276,9 +2278,7 @@ static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg) tx_data = tx_agg_align(tx_data); tx_desc = (struct tx_desc *)tx_data; - offset = (u32)skb_transport_offset(skb); - - if (r8152_tx_csum(tp, tx_desc, skb, skb->len, offset)) { + if (r8152_tx_csum(tp, tx_desc, skb, skb->len)) { r8152_csum_workaround(tp, skb, &skb_head); continue; } @@ -2759,9 +2759,9 @@ rtl8152_features_check(struct sk_buff *skb, struct net_device *dev, { u32 mss = skb_shinfo(skb)->gso_size; int max_offset = mss ? GTTCPHO_MAX : TCPHO_MAX; - int offset = skb_transport_offset(skb); - if ((mss || skb->ip_summed == CHECKSUM_PARTIAL) && offset > max_offset) + if ((mss || skb->ip_summed == CHECKSUM_PARTIAL) && + skb_transport_offset(skb) > max_offset) features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); else if ((skb->len + sizeof(struct tx_desc)) > agg_buf_sz) features &= ~NETIF_F_GSO_MASK; diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c index 84d1c7054013..7b1dc19c565e 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.c +++ b/drivers/net/wireless/ath/ath11k/wmi.c @@ -3822,7 +3822,8 @@ ath11k_wmi_obss_color_collision_event(struct ath11k_base *ab, struct sk_buff *sk switch (ev->evt_type) { case WMI_BSS_COLOR_COLLISION_DETECTION: - ieeee80211_obss_color_collision_notify(arvif->vif, ev->obss_color_bitmap); + ieeee80211_obss_color_collision_notify(arvif->vif, ev->obss_color_bitmap, + GFP_KERNEL); ath11k_dbg(ab, ATH11K_DBG_WMI, "OBSS color collision detected vdev:%d, event:%d, bitmap:%08llx\n", ev->vdev_id, ev->evt_type, ev->obss_color_bitmap); diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 2f746eb64507..6f83af849f2e 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -4912,6 +4912,8 @@ static int hwsim_virtio_probe(struct virtio_device *vdev) if (err) return err; + virtio_device_ready(vdev); + err = fill_vq(hwsim_vqs[HWSIM_VQ_RX]); if (err) goto out_remove; diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h index c02be4ac159e..7db627fc26be 100644 --- a/drivers/net/wireless/realtek/rtw88/main.h +++ b/drivers/net/wireless/realtek/rtw88/main.h @@ -1233,9 +1233,6 @@ struct rtw_chip_info { const struct wiphy_wowlan_support *wowlan_stub; const u8 max_sched_scan_ssids; - /* for 8821c set channel */ - u32 ch_param[3]; - /* coex paras */ u32 coex_para_ver; u8 bt_desired_ver; @@ -1937,6 +1934,9 @@ struct rtw_hal { enum rtw_sar_bands sar_band; struct rtw_sar sar; + + /* for 8821c set channel */ + u32 ch_param[3]; }; struct rtw_path_div { diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821c.c b/drivers/net/wireless/realtek/rtw88/rtw8821c.c index ffee39ea5df6..488a7ddd507c 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8821c.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8821c.c @@ -125,6 +125,7 @@ static void rtw8821c_phy_bf_init(struct rtw_dev *rtwdev) static void rtw8821c_phy_set_param(struct rtw_dev *rtwdev) { + struct rtw_hal *hal = &rtwdev->hal; u8 crystal_cap, val; /* power on BB/RF domain */ @@ -159,9 +160,9 @@ static void rtw8821c_phy_set_param(struct rtw_dev *rtwdev) /* post init after header files config */ rtw_write32_set(rtwdev, REG_RXPSEL, BIT_RX_PSEL_RST); - rtwdev->chip->ch_param[0] = rtw_read32_mask(rtwdev, REG_TXSF2, MASKDWORD); - rtwdev->chip->ch_param[1] = rtw_read32_mask(rtwdev, REG_TXSF6, MASKDWORD); - rtwdev->chip->ch_param[2] = rtw_read32_mask(rtwdev, REG_TXFILTER, MASKDWORD); + hal->ch_param[0] = rtw_read32_mask(rtwdev, REG_TXSF2, MASKDWORD); + hal->ch_param[1] = rtw_read32_mask(rtwdev, REG_TXSF6, MASKDWORD); + hal->ch_param[2] = rtw_read32_mask(rtwdev, REG_TXFILTER, MASKDWORD); rtw_phy_init(rtwdev); rtwdev->dm_info.cck_pd_default = rtw_read8(rtwdev, REG_CSRATIO) & 0x1f; @@ -351,6 +352,7 @@ static void rtw8821c_set_channel_rxdfir(struct rtw_dev *rtwdev, u8 bw) static void rtw8821c_set_channel_bb(struct rtw_dev *rtwdev, u8 channel, u8 bw, u8 primary_ch_idx) { + struct rtw_hal *hal = &rtwdev->hal; u32 val32; if (channel <= 14) { @@ -367,11 +369,11 @@ static void rtw8821c_set_channel_bb(struct rtw_dev *rtwdev, u8 channel, u8 bw, rtw_write32_mask(rtwdev, REG_TXFILTER, MASKDWORD, 0x00003667); } else { rtw_write32_mask(rtwdev, REG_TXSF2, MASKDWORD, - rtwdev->chip->ch_param[0]); + hal->ch_param[0]); rtw_write32_mask(rtwdev, REG_TXSF6, MASKLWORD, - rtwdev->chip->ch_param[1] & MASKLWORD); + hal->ch_param[1] & MASKLWORD); rtw_write32_mask(rtwdev, REG_TXFILTER, MASKDWORD, - rtwdev->chip->ch_param[2]); + hal->ch_param[2]); } } else if (channel > 35) { rtw_write32_mask(rtwdev, REG_ENTXCCK, BIT(18), 0x1); diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index dbac4c03d21a..a0335407be42 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -495,6 +495,7 @@ void xenvif_rx_action(struct xenvif_queue *queue) queue->rx_copy.completed = &completed_skbs; while (xenvif_rx_ring_slots_available(queue) && + !skb_queue_empty(&queue->rx_queue) && work_done < RX_BATCH_SIZE) { xenvif_rx_skb(queue); work_done++; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 6d02e12e4702..80f41446b1f0 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -8462,11 +8462,12 @@ int cfg80211_bss_color_notify(struct net_device *dev, gfp_t gfp, * cfg80211_obss_color_collision_notify - notify about bss color collision * @dev: network device * @color_bitmap: representations of the colors that the local BSS is aware of + * @gfp: allocation flags */ static inline int cfg80211_obss_color_collision_notify(struct net_device *dev, - u64 color_bitmap) + u64 color_bitmap, gfp_t gfp) { - return cfg80211_bss_color_notify(dev, GFP_KERNEL, + return cfg80211_bss_color_notify(dev, gfp, NL80211_CMD_OBSS_COLOR_COLLISION, 0, color_bitmap); } diff --git a/include/net/mac80211.h b/include/net/mac80211.h index ebadb2103968..47642b020706 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -6960,10 +6960,11 @@ ieee80211_get_unsol_bcast_probe_resp_tmpl(struct ieee80211_hw *hw, * @vif: &struct ieee80211_vif pointer from the add_interface callback. * @color_bitmap: a 64 bit bitmap representing the colors that the local BSS is * aware of. + * @gfp: allocation flags */ void ieeee80211_obss_color_collision_notify(struct ieee80211_vif *vif, - u64 color_bitmap); + u64 color_bitmap, gfp_t gfp); /** * ieee80211_is_tx_data - check if frame is a data frame diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 5c4e5a96a984..64cf655c818c 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -657,18 +657,22 @@ static inline void nft_set_ext_prepare(struct nft_set_ext_tmpl *tmpl) tmpl->len = sizeof(struct nft_set_ext); } -static inline void nft_set_ext_add_length(struct nft_set_ext_tmpl *tmpl, u8 id, - unsigned int len) +static inline int nft_set_ext_add_length(struct nft_set_ext_tmpl *tmpl, u8 id, + unsigned int len) { tmpl->len = ALIGN(tmpl->len, nft_set_ext_types[id].align); - BUG_ON(tmpl->len > U8_MAX); + if (tmpl->len > U8_MAX) + return -EINVAL; + tmpl->offset[id] = tmpl->len; tmpl->len += nft_set_ext_types[id].len + len; + + return 0; } -static inline void nft_set_ext_add(struct nft_set_ext_tmpl *tmpl, u8 id) +static inline int nft_set_ext_add(struct nft_set_ext_tmpl *tmpl, u8 id) { - nft_set_ext_add_length(tmpl, id, 0); + return nft_set_ext_add_length(tmpl, id, 0); } static inline void nft_set_ext_init(struct nft_set_ext *ext, diff --git a/include/net/raw.h b/include/net/raw.h index 8ad8df594853..c51a635671a7 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -75,7 +75,7 @@ static inline bool raw_sk_bound_dev_eq(struct net *net, int bound_dev_if, int dif, int sdif) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) - return inet_bound_dev_eq(!!net->ipv4.sysctl_raw_l3mdev_accept, + return inet_bound_dev_eq(READ_ONCE(net->ipv4.sysctl_raw_l3mdev_accept), bound_dev_if, dif, sdif); #else return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); diff --git a/include/net/sock.h b/include/net/sock.h index 72ca97ccb460..9fa54762e077 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1529,7 +1529,7 @@ void __sk_mem_reclaim(struct sock *sk, int amount); /* sysctl_mem values are in pages, we convert them in SK_MEM_QUANTUM units */ static inline long sk_prot_mem_limits(const struct sock *sk, int index) { - long val = sk->sk_prot->sysctl_mem[index]; + long val = READ_ONCE(sk->sk_prot->sysctl_mem[index]); #if PAGE_SIZE > SK_MEM_QUANTUM val <<= PAGE_SHIFT - SK_MEM_QUANTUM_SHIFT; diff --git a/include/net/tls.h b/include/net/tls.h index 8017f1703447..8bd938f98bdd 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -704,7 +704,7 @@ int tls_sw_fallback_init(struct sock *sk, struct tls_crypto_info *crypto_info); #ifdef CONFIG_TLS_DEVICE -void tls_device_init(void); +int tls_device_init(void); void tls_device_cleanup(void); void tls_device_sk_destruct(struct sock *sk); int tls_set_device_offload(struct sock *sk, struct tls_context *ctx); @@ -724,7 +724,7 @@ static inline bool tls_is_sk_rx_device_offloaded(struct sock *sk) return tls_get_ctx(sk)->rx_conf == TLS_HW; } #else -static inline void tls_device_init(void) {} +static inline int tls_device_init(void) { return 0; } static inline void tls_device_cleanup(void) {} static inline int diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h index 12c315782766..777ee6cbe933 100644 --- a/include/trace/events/sock.h +++ b/include/trace/events/sock.h @@ -98,7 +98,7 @@ TRACE_EVENT(sock_exceed_buf_limit, TP_STRUCT__entry( __array(char, name, 32) - __field(long *, sysctl_mem) + __array(long, sysctl_mem, 3) __field(long, allocated) __field(int, sysctl_rmem) __field(int, rmem_alloc) @@ -110,7 +110,9 @@ TRACE_EVENT(sock_exceed_buf_limit, TP_fast_assign( strncpy(__entry->name, prot->name, 32); - __entry->sysctl_mem = prot->sysctl_mem; + __entry->sysctl_mem[0] = READ_ONCE(prot->sysctl_mem[0]); + __entry->sysctl_mem[1] = READ_ONCE(prot->sysctl_mem[1]); + __entry->sysctl_mem[2] = READ_ONCE(prot->sysctl_mem[2]); __entry->allocated = allocated; __entry->sysctl_rmem = sk_get_rmem0(sk, prot); __entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f4009dbdf62d..ef78e0e1a754 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5222,22 +5222,25 @@ union bpf_attr { * Return * Nothing. Always succeeds. * - * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset) + * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset, u64 flags) * Description * Read *len* bytes from *src* into *dst*, starting from *offset* * into *src*. + * *flags* is currently unused. * Return * 0 on success, -E2BIG if *offset* + *len* exceeds the length - * of *src*'s data, -EINVAL if *src* is an invalid dynptr. + * of *src*'s data, -EINVAL if *src* is an invalid dynptr or if + * *flags* is not 0. * - * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len) + * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags) * Description * Write *len* bytes from *src* into *dst*, starting from *offset* * into *dst*. + * *flags* is currently unused. * Return * 0 on success, -E2BIG if *offset* + *len* exceeds the length * of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst* - * is a read-only dynptr. + * is a read-only dynptr or if *flags* is not 0. * * void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len) * Description diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 5f6f3f829b36..e7961508a47d 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -68,11 +68,13 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns { u8 *ptr = NULL; - if (k >= SKF_NET_OFF) + if (k >= SKF_NET_OFF) { ptr = skb_network_header(skb) + k - SKF_NET_OFF; - else if (k >= SKF_LL_OFF) + } else if (k >= SKF_LL_OFF) { + if (unlikely(!skb_mac_header_was_set(skb))) + return NULL; ptr = skb_mac_header(skb) + k - SKF_LL_OFF; - + } if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb)) return ptr; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 225806a02efb..bb1254f07667 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1497,11 +1497,12 @@ const struct bpf_func_proto bpf_dynptr_from_mem_proto = { .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT, }; -BPF_CALL_4(bpf_dynptr_read, void *, dst, u32, len, struct bpf_dynptr_kern *, src, u32, offset) +BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, struct bpf_dynptr_kern *, src, + u32, offset, u64, flags) { int err; - if (!src->data) + if (!src->data || flags) return -EINVAL; err = bpf_dynptr_check_off_len(src, offset, len); @@ -1521,13 +1522,15 @@ const struct bpf_func_proto bpf_dynptr_read_proto = { .arg2_type = ARG_CONST_SIZE_OR_ZERO, .arg3_type = ARG_PTR_TO_DYNPTR, .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, }; -BPF_CALL_4(bpf_dynptr_write, struct bpf_dynptr_kern *, dst, u32, offset, void *, src, u32, len) +BPF_CALL_5(bpf_dynptr_write, struct bpf_dynptr_kern *, dst, u32, offset, void *, src, + u32, len, u64, flags) { int err; - if (!dst->data || bpf_dynptr_is_rdonly(dst)) + if (!dst->data || flags || bpf_dynptr_is_rdonly(dst)) return -EINVAL; err = bpf_dynptr_check_off_len(dst, offset, len); @@ -1547,6 +1550,7 @@ const struct bpf_func_proto bpf_dynptr_write_proto = { .arg2_type = ARG_ANYTHING, .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY, .arg4_type = ARG_CONST_SIZE_OR_ZERO, + .arg5_type = ARG_ANYTHING, }; BPF_CALL_3(bpf_dynptr_data, struct bpf_dynptr_kern *, ptr, u32, offset, u32, len) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index e52b6e372c60..d99bc3945445 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -446,14 +446,14 @@ static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp, if (*negp) { if (*lvalp > (unsigned long) INT_MAX + 1) return -EINVAL; - *valp = -*lvalp; + WRITE_ONCE(*valp, -*lvalp); } else { if (*lvalp > (unsigned long) INT_MAX) return -EINVAL; - *valp = *lvalp; + WRITE_ONCE(*valp, *lvalp); } } else { - int val = *valp; + int val = READ_ONCE(*valp); if (val < 0) { *negp = true; *lvalp = -(unsigned long)val; @@ -472,9 +472,9 @@ static int do_proc_douintvec_conv(unsigned long *lvalp, if (write) { if (*lvalp > UINT_MAX) return -EINVAL; - *valp = *lvalp; + WRITE_ONCE(*valp, *lvalp); } else { - unsigned int val = *valp; + unsigned int val = READ_ONCE(*valp); *lvalp = (unsigned long)val; } return 0; @@ -857,7 +857,7 @@ static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp, if ((param->min && *param->min > tmp) || (param->max && *param->max < tmp)) return -EINVAL; - *valp = tmp; + WRITE_ONCE(*valp, tmp); } return 0; @@ -923,7 +923,7 @@ static int do_proc_douintvec_minmax_conv(unsigned long *lvalp, (param->max && *param->max < tmp)) return -ERANGE; - *valp = tmp; + WRITE_ONCE(*valp, tmp); } return 0; @@ -1007,13 +1007,13 @@ int proc_dou8vec_minmax(struct ctl_table *table, int write, tmp.maxlen = sizeof(val); tmp.data = &val; - val = *data; + val = READ_ONCE(*data); res = do_proc_douintvec(&tmp, write, buffer, lenp, ppos, do_proc_douintvec_minmax_conv, ¶m); if (res) return res; if (write) - *data = val; + WRITE_ONCE(*data, val); return 0; } EXPORT_SYMBOL_GPL(proc_dou8vec_minmax); @@ -1090,9 +1090,9 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, err = -EINVAL; break; } - *i = val; + WRITE_ONCE(*i, val); } else { - val = convdiv * (*i) / convmul; + val = convdiv * READ_ONCE(*i) / convmul; if (!first) proc_put_char(&buffer, &left, '\t'); proc_put_long(&buffer, &left, val, false); @@ -1173,9 +1173,12 @@ static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp, if (write) { if (*lvalp > INT_MAX / HZ) return 1; - *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ); + if (*negp) + WRITE_ONCE(*valp, -*lvalp * HZ); + else + WRITE_ONCE(*valp, *lvalp * HZ); } else { - int val = *valp; + int val = READ_ONCE(*valp); unsigned long lval; if (val < 0) { *negp = true; @@ -1221,9 +1224,9 @@ static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp, if (jif > INT_MAX) return 1; - *valp = (int)jif; + WRITE_ONCE(*valp, (int)jif); } else { - int val = *valp; + int val = READ_ONCE(*valp); unsigned long lval; if (val < 0) { *negp = true; @@ -1291,8 +1294,8 @@ int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, * @ppos: the current position in the file * * Reads/writes up to table->maxlen/sizeof(unsigned int) integer - * values from/to the user buffer, treated as an ASCII string. - * The values read are assumed to be in 1/1000 seconds, and + * values from/to the user buffer, treated as an ASCII string. + * The values read are assumed to be in 1/1000 seconds, and * are converted into jiffies. * * Returns 0 on success. diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 53b1955b027f..214532173536 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -182,10 +182,14 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev, else if (dev->mtu > max_mtu) return -EINVAL; + /* Note: If this initial vlan_changelink() fails, we need + * to call vlan_dev_free_egress_priority() to free memory. + */ err = vlan_changelink(dev, tb, data, extack); - if (err) - return err; - err = register_vlan_dev(dev, extack); + + if (!err) + err = register_vlan_dev(dev, extack); + if (err) vlan_dev_free_egress_priority(dev); return err; diff --git a/net/core/dev.c b/net/core/dev.c index 8e6f22961206..30a1603a7225 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4863,7 +4863,10 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, } /* When doing generic XDP we have to bypass the qdisc layer and the - * network taps in order to match in-driver-XDP behavior. + * network taps in order to match in-driver-XDP behavior. This also means + * that XDP packets are able to starve other packets going through a qdisc, + * and DDOS attacks will be more effective. In-driver-XDP use dedicated TX + * queues, so they do not have this starvation issue. */ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) { @@ -4875,7 +4878,7 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) txq = netdev_core_pick_tx(dev, skb, NULL); cpu = smp_processor_id(); HARD_TX_LOCK(dev, txq, cpu); - if (!netif_xmit_stopped(txq)) { + if (!netif_xmit_frozen_or_drv_stopped(txq)) { rc = netdev_start_xmit(skb, dev, txq, 0); if (dev_xmit_complete(rc)) free_skb = false; @@ -4883,6 +4886,7 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) HARD_TX_UNLOCK(dev, txq); if (free_skb) { trace_xdp_exception(dev, xdp_prog, XDP_TX); + dev_core_stats_tx_dropped_inc(dev); kfree_skb(skb); } } diff --git a/net/core/filter.c b/net/core/filter.c index 5d16d66727fc..2a6a0b0ce43e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6158,7 +6158,6 @@ static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len if (err) return err; - ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); return seg6_lookup_nexthop(skb, NULL, 0); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 93da9f783bec..ac67f6b4ec70 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1246,7 +1246,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) if (new_saddr == old_saddr) return 0; - if (sock_net(sk)->ipv4.sysctl_ip_dynaddr > 1) { + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) > 1) { pr_info("%s(): shifting inet->saddr from %pI4 to %pI4\n", __func__, &old_saddr, &new_saddr); } @@ -1301,7 +1301,7 @@ int inet_sk_rebuild_header(struct sock *sk) * Other protocols have to map its equivalent state to TCP_SYN_SENT. * DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme */ - if (!sock_net(sk)->ipv4.sysctl_ip_dynaddr || + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) || sk->sk_state != TCP_SYN_SENT || (sk->sk_userlocks & SOCK_BINDADDR_LOCK) || (err = inet_sk_reselect_saddr(sk)) != 0) diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 62d5f99760aa..6cd3b6c559f0 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -239,7 +239,7 @@ static int cipso_v4_cache_check(const unsigned char *key, struct cipso_v4_map_cache_entry *prev_entry = NULL; u32 hash; - if (!cipso_v4_cache_enabled) + if (!READ_ONCE(cipso_v4_cache_enabled)) return -ENOENT; hash = cipso_v4_map_cache_hash(key, key_len); @@ -296,13 +296,14 @@ static int cipso_v4_cache_check(const unsigned char *key, int cipso_v4_cache_add(const unsigned char *cipso_ptr, const struct netlbl_lsm_secattr *secattr) { + int bkt_size = READ_ONCE(cipso_v4_cache_bucketsize); int ret_val = -EPERM; u32 bkt; struct cipso_v4_map_cache_entry *entry = NULL; struct cipso_v4_map_cache_entry *old_entry = NULL; u32 cipso_ptr_len; - if (!cipso_v4_cache_enabled || cipso_v4_cache_bucketsize <= 0) + if (!READ_ONCE(cipso_v4_cache_enabled) || bkt_size <= 0) return 0; cipso_ptr_len = cipso_ptr[1]; @@ -322,7 +323,7 @@ int cipso_v4_cache_add(const unsigned char *cipso_ptr, bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETS - 1); spin_lock_bh(&cipso_v4_cache[bkt].lock); - if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) { + if (cipso_v4_cache[bkt].size < bkt_size) { list_add(&entry->list, &cipso_v4_cache[bkt].list); cipso_v4_cache[bkt].size += 1; } else { @@ -1199,7 +1200,8 @@ static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def, /* This will send packets using the "optimized" format when * possible as specified in section 3.4.2.6 of the * CIPSO draft. */ - if (cipso_v4_rbm_optfmt && ret_val > 0 && ret_val <= 10) + if (READ_ONCE(cipso_v4_rbm_optfmt) && ret_val > 0 && + ret_val <= 10) tag_len = 14; else tag_len = 4 + ret_val; @@ -1603,7 +1605,7 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option) * all the CIPSO validations here but it doesn't * really specify _exactly_ what we need to validate * ... so, just make it a sysctl tunable. */ - if (cipso_v4_rbm_strictvalid) { + if (READ_ONCE(cipso_v4_rbm_strictvalid)) { if (cipso_v4_map_lvl_valid(doi_def, tag[3]) < 0) { err_offset = opt_iter + 3; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index a57ba23571c9..d9fdcbae16ee 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1230,7 +1230,7 @@ static int fib_check_nh_nongw(struct net *net, struct fib_nh *nh, nh->fib_nh_dev = in_dev->dev; dev_hold_track(nh->fib_nh_dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC); - nh->fib_nh_scope = RT_SCOPE_HOST; + nh->fib_nh_scope = RT_SCOPE_LINK; if (!netif_carrier_ok(nh->fib_nh_dev)) nh->fib_nh_flags |= RTNH_F_LINKDOWN; err = 0; @@ -1811,7 +1811,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, goto nla_put_failure; if (nexthop_is_blackhole(fi->nh)) rtm->rtm_type = RTN_BLACKHOLE; - if (!fi->fib_net->ipv4.sysctl_nexthop_compat_mode) + if (!READ_ONCE(fi->fib_net->ipv4.sysctl_nexthop_compat_mode)) goto offload; } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 2734c3af7e24..46e8a5125853 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -498,7 +498,7 @@ static void tnode_free(struct key_vector *tn) tn = container_of(head, struct tnode, rcu)->kv; } - if (tnode_free_size >= sysctl_fib_sync_mem) { + if (tnode_free_size >= READ_ONCE(sysctl_fib_sync_mem)) { tnode_free_size = 0; synchronize_rcu(); } diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index efea0e796f06..57c4f0d87a7a 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -253,11 +253,12 @@ bool icmp_global_allow(void) spin_lock(&icmp_global.lock); delta = min_t(u32, now - icmp_global.stamp, HZ); if (delta >= HZ / 50) { - incr = sysctl_icmp_msgs_per_sec * delta / HZ ; + incr = READ_ONCE(sysctl_icmp_msgs_per_sec) * delta / HZ; if (incr) WRITE_ONCE(icmp_global.stamp, now); } - credit = min_t(u32, icmp_global.credit + incr, sysctl_icmp_msgs_burst); + credit = min_t(u32, icmp_global.credit + incr, + READ_ONCE(sysctl_icmp_msgs_burst)); if (credit) { /* We want to use a credit of one in average, but need to randomize * it for security reasons. @@ -281,7 +282,7 @@ static bool icmpv4_mask_allow(struct net *net, int type, int code) return true; /* Limit if icmp type is enabled in ratemask. */ - if (!((1 << type) & net->ipv4.sysctl_icmp_ratemask)) + if (!((1 << type) & READ_ONCE(net->ipv4.sysctl_icmp_ratemask))) return true; return false; @@ -319,7 +320,8 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, vif = l3mdev_master_ifindex(dst->dev); peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif, 1); - rc = inet_peer_xrlim_allow(peer, net->ipv4.sysctl_icmp_ratelimit); + rc = inet_peer_xrlim_allow(peer, + READ_ONCE(net->ipv4.sysctl_icmp_ratelimit)); if (peer) inet_putpeer(peer); out: @@ -692,7 +694,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, rcu_read_lock(); if (rt_is_input_route(rt) && - net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr) + READ_ONCE(net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr)) dev = dev_get_by_index_rcu(net, inet_iif(skb_in)); if (dev) @@ -932,7 +934,7 @@ static enum skb_drop_reason icmp_unreach(struct sk_buff *skb) * get the other vendor to fix their kit. */ - if (!net->ipv4.sysctl_icmp_ignore_bogus_error_responses && + if (!READ_ONCE(net->ipv4.sysctl_icmp_ignore_bogus_error_responses) && inet_addr_type_dev_table(net, skb->dev, iph->daddr) == RTN_BROADCAST) { net_warn_ratelimited("%pI4 sent an invalid ICMP type %u, code %u error to a broadcast: %pI4 on %s\n", &ip_hdr(skb)->saddr, @@ -992,7 +994,7 @@ static enum skb_drop_reason icmp_echo(struct sk_buff *skb) net = dev_net(skb_dst(skb)->dev); /* should there be an ICMP stat for ignored echos? */ - if (net->ipv4.sysctl_icmp_echo_ignore_all) + if (READ_ONCE(net->ipv4.sysctl_icmp_echo_ignore_all)) return SKB_NOT_DROPPED_YET; icmp_param.data.icmph = *icmp_hdr(skb); @@ -1027,7 +1029,7 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr) u16 ident_len; u8 status; - if (!net->ipv4.sysctl_icmp_echo_enable_probe) + if (!READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe)) return false; /* We currently only support probing interfaces on the proxy node @@ -1248,7 +1250,7 @@ int icmp_rcv(struct sk_buff *skb) */ if ((icmph->type == ICMP_ECHO || icmph->type == ICMP_TIMESTAMP) && - net->ipv4.sysctl_icmp_echo_ignore_broadcasts) { + READ_ONCE(net->ipv4.sysctl_icmp_echo_ignore_broadcasts)) { reason = SKB_DROP_REASON_INVALID_PROTO; goto error; } diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 0ec501845cb3..47ccc343c9fb 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -156,7 +156,8 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, { struct inet_timewait_sock *tw; - if (refcount_read(&dr->tw_refcount) - 1 >= dr->sysctl_max_tw_buckets) + if (refcount_read(&dr->tw_refcount) - 1 >= + READ_ONCE(dr->sysctl_max_tw_buckets)) return NULL; tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab, diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index da21dfce24d7..e9fed83e9b3c 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -141,16 +141,20 @@ static void inet_peer_gc(struct inet_peer_base *base, struct inet_peer *gc_stack[], unsigned int gc_cnt) { + int peer_threshold, peer_maxttl, peer_minttl; struct inet_peer *p; __u32 delta, ttl; int i; - if (base->total >= inet_peer_threshold) + peer_threshold = READ_ONCE(inet_peer_threshold); + peer_maxttl = READ_ONCE(inet_peer_maxttl); + peer_minttl = READ_ONCE(inet_peer_minttl); + + if (base->total >= peer_threshold) ttl = 0; /* be aggressive */ else - ttl = inet_peer_maxttl - - (inet_peer_maxttl - inet_peer_minttl) / HZ * - base->total / inet_peer_threshold * HZ; + ttl = peer_maxttl - (peer_maxttl - peer_minttl) / HZ * + base->total / peer_threshold * HZ; for (i = 0; i < gc_cnt; i++) { p = gc_stack[i]; diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index e459a391e607..853a75a8fbaf 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1858,7 +1858,7 @@ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh) /* __ip6_del_rt does a release, so do a hold here */ fib6_info_hold(f6i); ipv6_stub->ip6_del_rt(net, f6i, - !net->ipv4.sysctl_nexthop_compat_mode); + !READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode)); } } @@ -2361,7 +2361,8 @@ out: if (!rc) { nh_base_seq_inc(net); nexthop_notify(RTM_NEWNEXTHOP, new_nh, &cfg->nlinfo); - if (replace_notify && net->ipv4.sysctl_nexthop_compat_mode) + if (replace_notify && + READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode)) nexthop_replace_notify(net, new_nh, &cfg->nlinfo); } diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index f33c31dd7366..b387c4835155 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -273,7 +273,7 @@ bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt, if (!ecn_ok) return false; - if (net->ipv4.sysctl_tcp_ecn) + if (READ_ONCE(net->ipv4.sysctl_tcp_ecn)) return true; return dst_feature(dst, RTAX_FEATURE_ECN); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index cd448cdd3b38..108fd86f2718 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -599,6 +599,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE }, { .procname = "icmp_echo_enable_probe", @@ -615,6 +617,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE }, { .procname = "icmp_ignore_bogus_error_responses", @@ -622,6 +626,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE }, { .procname = "icmp_errors_use_inbound_ifaddr", @@ -629,6 +635,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE }, { .procname = "icmp_ratelimit", @@ -668,6 +676,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_TWO, }, { .procname = "tcp_ecn_fallback", @@ -675,6 +685,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { .procname = "ip_dynaddr", diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 028513d3e2a2..2222dfdde316 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2715,7 +2715,8 @@ static void tcp_orphan_update(struct timer_list *unused) static bool tcp_too_many_orphans(int shift) { - return READ_ONCE(tcp_orphan_cache) << shift > sysctl_tcp_max_orphans; + return READ_ONCE(tcp_orphan_cache) << shift > + READ_ONCE(sysctl_tcp_max_orphans); } bool tcp_check_oom(struct sock *sk, int shift) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2e2a9ece9af2..3ec4edc37313 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6729,7 +6729,7 @@ static void tcp_ecn_create_request(struct request_sock *req, ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield); ecn_ok_dst = dst_feature(dst, DST_FEATURE_ECN_MASK); - ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst; + ecn_ok = READ_ONCE(net->ipv4.sysctl_tcp_ecn) || ecn_ok_dst; if (((!ect || th->res1) && ecn_ok) || tcp_ca_needs_ecn(listen_sk) || (ecn_ok_dst & DST_FEATURE_ECN_CA) || diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 1c054431e358..11aa0ab10bba 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -324,7 +324,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk); - bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 || + bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 || tcp_ca_needs_ecn(sk) || bpf_needs_ecn; if (!use_ecn) { @@ -346,7 +346,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) static void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb) { - if (sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback) + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback)) /* tp->ecn_flags are cleared at a later point in time when * SYN ACK is ultimatively being received. */ diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 61770220774e..9d92d51c4757 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -925,7 +925,7 @@ static int icmpv6_rcv(struct sk_buff *skb) break; case ICMPV6_EXT_ECHO_REQUEST: if (!net->ipv6.sysctl.icmpv6_echo_ignore_all && - net->ipv4.sysctl_icmp_echo_enable_probe) + READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe)) icmpv6_echo_reply(skb); break; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 828355710c57..916417944ec8 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5741,7 +5741,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, if (nexthop_is_blackhole(rt->nh)) rtm->rtm_type = RTN_BLACKHOLE; - if (net->ipv4.sysctl_nexthop_compat_mode && + if (READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode) && rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0) goto nla_put_failure; diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index d64855010948..e756ba705fd9 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -189,6 +189,8 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) } #endif + hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); + skb_postpush_rcsum(skb, hdr, tot_len); return 0; @@ -241,6 +243,8 @@ int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) } #endif + hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); + skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen); return 0; @@ -302,7 +306,6 @@ static int seg6_do_srh(struct sk_buff *skb) break; } - ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); nf_reset_ct(skb); diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index 98a34287439c..2cd4a8d3b30a 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -826,7 +826,6 @@ static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt) if (err) goto drop; - ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); seg6_lookup_nexthop(skb, NULL, 0); @@ -858,7 +857,6 @@ static int input_action_end_b6_encap(struct sk_buff *skb, if (err) goto drop; - ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); seg6_lookup_nexthop(skb, NULL, 0); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index f7896f257e1b..4ddf297f40f2 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -4468,14 +4468,14 @@ EXPORT_SYMBOL_GPL(ieee80211_color_change_finish); void ieeee80211_obss_color_collision_notify(struct ieee80211_vif *vif, - u64 color_bitmap) + u64 color_bitmap, gfp_t gfp) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); if (sdata->vif.color_change_active || sdata->vif.csa_active) return; - cfg80211_obss_color_collision_notify(sdata->dev, color_bitmap); + cfg80211_obss_color_collision_notify(sdata->dev, color_bitmap, gfp); } EXPORT_SYMBOL_GPL(ieeee80211_obss_color_collision_notify); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 41531478437c..15a73b7fdd75 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -377,7 +377,9 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do bool cancel_scan; struct cfg80211_nan_func *func; + spin_lock_bh(&local->fq.lock); clear_bit(SDATA_STATE_RUNNING, &sdata->state); + spin_unlock_bh(&local->fq.lock); cancel_scan = rcu_access_pointer(local->scan_sdata) == sdata; if (cancel_scan) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 3c08ae04ddbc..1675f8cb87f1 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3217,7 +3217,8 @@ ieee80211_rx_check_bss_color_collision(struct ieee80211_rx_data *rx) IEEE80211_HE_OPERATION_BSS_COLOR_MASK); if (color == bss_conf->he_bss_color.color) ieeee80211_obss_color_collision_notify(&rx->sdata->vif, - BIT_ULL(color)); + BIT_ULL(color), + GFP_ATOMIC); } } diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 0e4efc08c762..c425f4fb7c2e 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2818,19 +2818,10 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, /* * If the skb is shared we need to obtain our own copy. */ - if (skb_shared(skb)) { - struct sk_buff *tmp_skb = skb; - - /* can't happen -- skb is a clone if info_id != 0 */ - WARN_ON(info_id); - - skb = skb_clone(skb, GFP_ATOMIC); - kfree_skb(tmp_skb); - - if (!skb) { - ret = -ENOMEM; - goto free; - } + skb = skb_share_check(skb, GFP_ATOMIC); + if (unlikely(!skb)) { + ret = -ENOMEM; + goto free; } hdr.frame_control = fc; @@ -3539,15 +3530,9 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, /* after this point (skb is modified) we cannot return false */ - if (skb_shared(skb)) { - struct sk_buff *tmp_skb = skb; - - skb = skb_clone(skb, GFP_ATOMIC); - kfree_skb(tmp_skb); - - if (!skb) - return true; - } + skb = skb_share_check(skb, GFP_ATOMIC); + if (unlikely(!skb)) + return true; if ((hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) && ieee80211_amsdu_aggregate(sdata, sta, fast_tx, skb)) @@ -4437,7 +4422,7 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata, struct net_device *dev, struct sta_info *sta, struct ieee80211_key *key, struct sk_buff *skb) { - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_tx_info *info; struct ieee80211_local *local = sdata->local; struct tid_ampdu_tx *tid_tx; u8 tid; @@ -4452,6 +4437,11 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata, test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state)) goto out_free; + skb = skb_share_check(skb, GFP_ATOMIC); + if (unlikely(!skb)) + return; + + info = IEEE80211_SKB_CB(skb); memset(info, 0, sizeof(*info)); ieee80211_aggr_check(sdata, sta, skb); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 1e26b5235add..dad42d42aa84 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -301,6 +301,9 @@ static void __ieee80211_wake_txqs(struct ieee80211_sub_if_data *sdata, int ac) local_bh_disable(); spin_lock(&fq->lock); + if (!test_bit(SDATA_STATE_RUNNING, &sdata->state)) + goto out; + if (sdata->vif.type == NL80211_IFTYPE_AP) ps = &sdata->bss->ps; diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 62c6733e0792..d50480b31750 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -147,8 +147,8 @@ u16 __ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, bool qos; /* all mesh/ocb stations are required to support WME */ - if (sdata->vif.type == NL80211_IFTYPE_MESH_POINT || - sdata->vif.type == NL80211_IFTYPE_OCB) + if (sta && (sdata->vif.type == NL80211_IFTYPE_MESH_POINT || + sdata->vif.type == NL80211_IFTYPE_OCB)) qos = true; else if (sta) qos = sta->sta.wme; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index cc21fafd9726..21a3ed64226e 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2919,12 +2919,12 @@ static void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk) static int mptcp_disconnect(struct sock *sk, int flags) { - struct mptcp_subflow_context *subflow; + struct mptcp_subflow_context *subflow, *tmp; struct mptcp_sock *msk = mptcp_sk(sk); inet_sk_state_store(sk, TCP_CLOSE); - mptcp_for_each_subflow(msk, subflow) { + list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); __mptcp_close_ssk(sk, ssk, subflow, MPTCP_CF_FASTCLOSE); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 082a2fd8d85b..369aeabb94fe 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -729,6 +729,9 @@ static void nf_ct_gc_expired(struct nf_conn *ct) if (!refcount_inc_not_zero(&ct->ct_general.use)) return; + /* load ->status after refcount increase */ + smp_acquire__after_ctrl_dep(); + if (nf_ct_should_gc(ct)) nf_ct_kill(ct); @@ -795,6 +798,9 @@ __nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone, */ ct = nf_ct_tuplehash_to_ctrack(h); if (likely(refcount_inc_not_zero(&ct->ct_general.use))) { + /* re-check key after refcount */ + smp_acquire__after_ctrl_dep(); + if (likely(nf_ct_key_equal(h, tuple, zone, net))) goto found; @@ -1387,6 +1393,9 @@ static unsigned int early_drop_list(struct net *net, if (!refcount_inc_not_zero(&tmp->ct_general.use)) continue; + /* load ->ct_net and ->status after refcount increase */ + smp_acquire__after_ctrl_dep(); + /* kill only if still in same netns -- might have moved due to * SLAB_TYPESAFE_BY_RCU rules. * @@ -1536,6 +1545,9 @@ static void gc_worker(struct work_struct *work) if (!refcount_inc_not_zero(&tmp->ct_general.use)) continue; + /* load ->status after refcount increase */ + smp_acquire__after_ctrl_dep(); + if (gc_worker_skip_ct(tmp)) { nf_ct_put(tmp); continue; @@ -1775,6 +1787,16 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, if (!exp) __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC); + /* Other CPU might have obtained a pointer to this object before it was + * released. Because refcount is 0, refcount_inc_not_zero() will fail. + * + * After refcount_set(1) it will succeed; ensure that zeroing of + * ct->status and the correct ct->net pointer are visible; else other + * core might observe CONFIRMED bit which means the entry is valid and + * in the hash table, but its not (anymore). + */ + smp_wmb(); + /* Now it is going to be associated with an sk_buff, set refcount to 1. */ refcount_set(&ct->ct_general.use, 1); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 722af5e309ba..f5905b5201a7 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1203,6 +1203,7 @@ restart: hnnode) { ct = nf_ct_tuplehash_to_ctrack(h); if (nf_ct_is_expired(ct)) { + /* need to defer nf_ct_kill() until lock is released */ if (i < ARRAY_SIZE(nf_ct_evict) && refcount_inc_not_zero(&ct->ct_general.use)) nf_ct_evict[i++] = ct; diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 6ad7bbc90d38..05895878610c 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -306,6 +306,9 @@ static int ct_seq_show(struct seq_file *s, void *v) if (unlikely(!refcount_inc_not_zero(&ct->ct_general.use))) return 0; + /* load ->status after refcount increase */ + smp_acquire__after_ctrl_dep(); + if (nf_ct_should_gc(ct)) { nf_ct_kill(ct); goto release; diff --git a/net/netfilter/nf_log_syslog.c b/net/netfilter/nf_log_syslog.c index 77bcb10fc586..cb894f0d63e9 100644 --- a/net/netfilter/nf_log_syslog.c +++ b/net/netfilter/nf_log_syslog.c @@ -67,7 +67,7 @@ dump_arp_packet(struct nf_log_buf *m, unsigned int logflags; struct arphdr _arph; - ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); + ah = skb_header_pointer(skb, nhoff, sizeof(_arph), &_arph); if (!ah) { nf_log_buf_add(m, "TRUNCATED"); return; @@ -96,7 +96,7 @@ dump_arp_packet(struct nf_log_buf *m, ah->ar_pln != sizeof(__be32)) return; - ap = skb_header_pointer(skb, sizeof(_arph), sizeof(_arpp), &_arpp); + ap = skb_header_pointer(skb, nhoff + sizeof(_arph), sizeof(_arpp), &_arpp); if (!ap) { nf_log_buf_add(m, " INCOMPLETE [%zu bytes]", skb->len - sizeof(_arph)); @@ -149,7 +149,7 @@ static void nf_log_arp_packet(struct net *net, u_int8_t pf, nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo, prefix); - dump_arp_packet(m, loginfo, skb, 0); + dump_arp_packet(m, loginfo, skb, skb_network_offset(skb)); nf_log_buf_close(m); } @@ -850,7 +850,7 @@ static void nf_log_ip_packet(struct net *net, u_int8_t pf, if (in) dump_mac_header(m, loginfo, skb); - dump_ipv4_packet(net, m, loginfo, skb, 0); + dump_ipv4_packet(net, m, loginfo, skb, skb_network_offset(skb)); nf_log_buf_close(m); } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d6b59beab3a9..646d5fd53604 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5833,8 +5833,11 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (!nla[NFTA_SET_ELEM_KEY] && !(flags & NFT_SET_ELEM_CATCHALL)) return -EINVAL; - if (flags != 0) - nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS); + if (flags != 0) { + err = nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS); + if (err < 0) + return err; + } if (set->flags & NFT_SET_MAP) { if (nla[NFTA_SET_ELEM_DATA] == NULL && @@ -5943,7 +5946,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (err < 0) goto err_set_elem_expr; - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen); + err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen); + if (err < 0) + goto err_parse_key; } if (nla[NFTA_SET_ELEM_KEY_END]) { @@ -5952,22 +5957,31 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (err < 0) goto err_parse_key; - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen); + err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen); + if (err < 0) + goto err_parse_key_end; } if (timeout > 0) { - nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION); - if (timeout != set->timeout) - nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT); + err = nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION); + if (err < 0) + goto err_parse_key_end; + + if (timeout != set->timeout) { + err = nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT); + if (err < 0) + goto err_parse_key_end; + } } if (num_exprs) { for (i = 0; i < num_exprs; i++) size += expr_array[i]->ops->size; - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_EXPRESSIONS, - sizeof(struct nft_set_elem_expr) + - size); + err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_EXPRESSIONS, + sizeof(struct nft_set_elem_expr) + size); + if (err < 0) + goto err_parse_key_end; } if (nla[NFTA_SET_ELEM_OBJREF] != NULL) { @@ -5982,7 +5996,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, err = PTR_ERR(obj); goto err_parse_key_end; } - nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF); + err = nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF); + if (err < 0) + goto err_parse_key_end; } if (nla[NFTA_SET_ELEM_DATA] != NULL) { @@ -6016,7 +6032,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, NFT_VALIDATE_NEED); } - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_DATA, desc.len); + err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_DATA, desc.len); + if (err < 0) + goto err_parse_data; } /* The full maximum length of userdata can exceed the maximum @@ -6026,9 +6044,12 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, ulen = 0; if (nla[NFTA_SET_ELEM_USERDATA] != NULL) { ulen = nla_len(nla[NFTA_SET_ELEM_USERDATA]); - if (ulen > 0) - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_USERDATA, - ulen); + if (ulen > 0) { + err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_USERDATA, + ulen); + if (err < 0) + goto err_parse_data; + } } err = -ENOMEM; @@ -6256,8 +6277,11 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, nft_set_ext_prepare(&tmpl); - if (flags != 0) - nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS); + if (flags != 0) { + err = nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS); + if (err < 0) + return err; + } if (nla[NFTA_SET_ELEM_KEY]) { err = nft_setelem_parse_key(ctx, set, &elem.key.val, @@ -6265,16 +6289,20 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, if (err < 0) return err; - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen); + err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen); + if (err < 0) + goto fail_elem; } if (nla[NFTA_SET_ELEM_KEY_END]) { err = nft_setelem_parse_key(ctx, set, &elem.key_end.val, nla[NFTA_SET_ELEM_KEY_END]); if (err < 0) - return err; + goto fail_elem; - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen); + err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen); + if (err < 0) + goto fail_elem_key_end; } err = -ENOMEM; @@ -6282,7 +6310,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, elem.key_end.val.data, NULL, 0, 0, GFP_KERNEL_ACCOUNT); if (elem.priv == NULL) - goto fail_elem; + goto fail_elem_key_end; ext = nft_set_elem_ext(set, elem.priv); if (flags) @@ -6306,6 +6334,8 @@ fail_ops: kfree(trans); fail_trans: kfree(elem.priv); +fail_elem_key_end: + nft_data_release(&elem.key_end.val, NFT_DATA_VALUE); fail_elem: nft_data_release(&elem.key.val, NFT_DATA_VALUE); return err; diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index ec6f4b699a2b..ce827e79c66a 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -1419,9 +1419,9 @@ static struct notifier_block tls_dev_notifier = { .notifier_call = tls_dev_event, }; -void __init tls_device_init(void) +int __init tls_device_init(void) { - register_netdevice_notifier(&tls_dev_notifier); + return register_netdevice_notifier(&tls_dev_notifier); } void __exit tls_device_cleanup(void) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 2ffede463e4a..d80ab3d1764e 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -1048,7 +1048,12 @@ static int __init tls_register(void) if (err) return err; - tls_device_init(); + err = tls_device_init(); + if (err) { + unregister_pernet_subsys(&tls_proc_ops); + return err; + } + tcp_register_ulp(&tcp_tls_ulp_ops); return 0; diff --git a/net/wireless/sme.c b/net/wireless/sme.c index ff4d48fcbfb2..607a68911047 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -1031,7 +1031,8 @@ void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *bssid) { ASSERT_WDEV_LOCK(wdev); - if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION)) + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION && + wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) return; if (WARN_ON(!wdev->current_bss) || diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index f4009dbdf62d..ef78e0e1a754 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5222,22 +5222,25 @@ union bpf_attr { * Return * Nothing. Always succeeds. * - * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset) + * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset, u64 flags) * Description * Read *len* bytes from *src* into *dst*, starting from *offset* * into *src*. + * *flags* is currently unused. * Return * 0 on success, -E2BIG if *offset* + *len* exceeds the length - * of *src*'s data, -EINVAL if *src* is an invalid dynptr. + * of *src*'s data, -EINVAL if *src* is an invalid dynptr or if + * *flags* is not 0. * - * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len) + * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags) * Description * Write *len* bytes from *src* into *dst*, starting from *offset* * into *dst*. + * *flags* is currently unused. * Return * 0 on success, -E2BIG if *offset* + *len* exceeds the length * of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst* - * is a read-only dynptr. + * is a read-only dynptr or if *flags* is not 0. * * void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len) * Description diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c index d811cff73597..0a26c243e6e9 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_fail.c +++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c @@ -140,12 +140,12 @@ int use_after_invalid(void *ctx) bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(read_data), 0, &ptr); - bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0); + bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0); bpf_ringbuf_submit_dynptr(&ptr, 0); /* this should fail */ - bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0); + bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0); return 0; } @@ -338,7 +338,7 @@ int invalid_helper2(void *ctx) get_map_val_dynptr(&ptr); /* this should fail */ - bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 8, 0); + bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 8, 0, 0); return 0; } @@ -377,7 +377,7 @@ int invalid_write2(void *ctx) memcpy((void *)&ptr + 8, &x, sizeof(x)); /* this should fail */ - bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0); + bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0); bpf_ringbuf_submit_dynptr(&ptr, 0); @@ -473,7 +473,7 @@ int invalid_read2(void *ctx) get_map_val_dynptr(&ptr); /* this should fail */ - bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 1, 0); + bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 1, 0, 0); return 0; } diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c index d67be48df4b2..a3a6103c8569 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_success.c +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c @@ -43,10 +43,10 @@ int test_read_write(void *ctx) bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(write_data), 0, &ptr); /* Write data into the dynptr */ - err = err ?: bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data)); + err = bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0); /* Read the data that was written into the dynptr */ - err = err ?: bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0); + err = err ?: bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0); /* Ensure the data we read matches the data we wrote */ for (i = 0; i < sizeof(read_data); i++) { diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index a29f79618934..ffc35a22e914 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -36,4 +36,5 @@ test_unix_oob gro ioam6_parser toeplitz +tun cmsg_sender diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index ddad703ace34..db05b3764b77 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -11,7 +11,7 @@ TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh l2tp.sh traceroute.sh -TEST_PROGS += fin_ack_lat.sh fib_nexthop_multiprefix.sh fib_nexthops.sh +TEST_PROGS += fin_ack_lat.sh fib_nexthop_multiprefix.sh fib_nexthops.sh fib_nexthop_nongw.sh TEST_PROGS += altnames.sh icmp.sh icmp_redirect.sh ip6_gre_headroom.sh TEST_PROGS += route_localnet.sh TEST_PROGS += reuseaddr_ports_exhausted.sh diff --git a/tools/testing/selftests/net/fib_nexthop_nongw.sh b/tools/testing/selftests/net/fib_nexthop_nongw.sh new file mode 100755 index 000000000000..b7b928b38ce4 --- /dev/null +++ b/tools/testing/selftests/net/fib_nexthop_nongw.sh @@ -0,0 +1,119 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# ns: h1 | ns: h2 +# 192.168.0.1/24 | +# eth0 | +# | 192.168.1.1/32 +# veth0 <---|---> veth1 +# Validate source address selection for route without gateway + +PAUSE_ON_FAIL=no +VERBOSE=0 +ret=0 + +################################################################################ +# helpers + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + printf "TEST: %-60s [ OK ]\n" "${msg}" + nsuccess=$((nsuccess+1)) + else + ret=1 + nfail=$((nfail+1)) + printf "TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi + + [ "$VERBOSE" = "1" ] && echo +} + +run_cmd() +{ + local cmd="$*" + local out + local rc + + if [ "$VERBOSE" = "1" ]; then + echo "COMMAND: $cmd" + fi + + out=$(eval $cmd 2>&1) + rc=$? + if [ "$VERBOSE" = "1" -a -n "$out" ]; then + echo "$out" + fi + + [ "$VERBOSE" = "1" ] && echo + + return $rc +} + +################################################################################ +# config +setup() +{ + ip netns add h1 + ip -n h1 link set lo up + ip netns add h2 + ip -n h2 link set lo up + + # Add a fake eth0 to support an ip address + ip -n h1 link add name eth0 type dummy + ip -n h1 link set eth0 up + ip -n h1 address add 192.168.0.1/24 dev eth0 + + # Configure veths (same @mac, arp off) + ip -n h1 link add name veth0 type veth peer name veth1 netns h2 + ip -n h1 link set veth0 up + + ip -n h2 link set veth1 up + + # Configure @IP in the peer netns + ip -n h2 address add 192.168.1.1/32 dev veth1 + ip -n h2 route add default dev veth1 + + # Add a nexthop without @gw and use it in a route + ip -n h1 nexthop add id 1 dev veth0 + ip -n h1 route add 192.168.1.1 nhid 1 +} + +cleanup() +{ + ip netns del h1 2>/dev/null + ip netns del h2 2>/dev/null +} + +trap cleanup EXIT + +################################################################################ +# main + +while getopts :pv o +do + case $o in + p) PAUSE_ON_FAIL=yes;; + v) VERBOSE=1;; + esac +done + +cleanup +setup + +run_cmd ip -netns h1 route get 192.168.1.1 +log_test $? 0 "nexthop: get route with nexthop without gw" +run_cmd ip netns exec h1 ping -c1 192.168.1.1 +log_test $? 0 "nexthop: ping through nexthop without gw" + +exit $ret diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index 8f481218a492..57b84e0c879e 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -37,6 +37,7 @@ TEST_PROGS = bridge_igmp.sh \ ipip_hier_gre_key.sh \ ipip_hier_gre_keys.sh \ ipip_hier_gre.sh \ + local_termination.sh \ loopback.sh \ mirror_gre_bound.sh \ mirror_gre_bridge_1d.sh \ @@ -52,6 +53,7 @@ TEST_PROGS = bridge_igmp.sh \ mirror_gre_vlan_bridge_1q.sh \ mirror_gre_vlan.sh \ mirror_vlan.sh \ + no_forwarding.sh \ pedit_dsfield.sh \ pedit_ip.sh \ pedit_l4port.sh \ diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index f905d5358e68..48a99e1453e1 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -6,7 +6,7 @@ KSFT_KHDR_INSTALL := 1 CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \ - simult_flows.sh mptcp_sockopt.sh + simult_flows.sh mptcp_sockopt.sh userspace_pm.sh TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq |