diff options
author | David S. Miller <davem@davemloft.net> | 2021-02-16 17:30:20 -0800 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2021-02-16 17:51:13 -0800 |
commit | d489ded1a3690d7eca8633575cba3f7dac8484c7 (patch) | |
tree | 20e739382965ac61d6314e6b0df4cb6acbbbca0e | |
parent | 86dd9868b8788a9063893a97649594af93cd5aa6 (diff) | |
parent | 3af409ca278d4a8d50e91f9f7c4c33b175645cf3 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
56 files changed, 724 insertions, 336 deletions
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 581bfce86dca..c7952ac5bd2f 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -651,16 +651,15 @@ tcp_rmem - vector of 3 INTEGERs: min, default, max default: initial size of receive buffer used by TCP sockets. This value overrides net.core.rmem_default used by other protocols. - Default: 87380 bytes. This value results in window of 65535 with - default setting of tcp_adv_win_scale and tcp_app_win:0 and a bit - less for default tcp_app_win. See below about these variables. + Default: 131072 bytes. + This value results in initial window of 65535. max: maximal size of receive buffer allowed for automatically selected receiver buffers for TCP socket. This value does not override net.core.rmem_max. Calling setsockopt() with SO_RCVBUF disables automatic tuning of that socket's receive buffer size, in which case this value is ignored. - Default: between 87380B and 6MB, depending on RAM size. + Default: between 131072 and 6MB, depending on RAM size. tcp_sack - BOOLEAN Enable select acknowledgments (SACKS). diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst index ae2ae37cd921..a64c01b52b4c 100644 --- a/Documentation/networking/netdev-FAQ.rst +++ b/Documentation/networking/netdev-FAQ.rst @@ -272,6 +272,22 @@ to the mailing list, e.g.:: Posting as one thread is discouraged because it confuses patchwork (as of patchwork 2.2.2). +Can I reproduce the checks from patchwork on my local machine? +-------------------------------------------------------------- + +Checks in patchwork are mostly simple wrappers around existing kernel +scripts, the sources are available at: + +https://github.com/kuba-moo/nipa/tree/master/tests + +Running all the builds and checks locally is a pain, can I post my patches and have the patchwork bot validate them? +-------------------------------------------------------------------------------------------------------------------- + +No, you must ensure that your patches are ready by testing them locally +before posting to the mailing list. The patchwork build bot instance +gets overloaded very easily and netdev@vger really doesn't need more +traffic if we can help it. + Any other tips to help ensure my net/net-next patch gets OK'd? -------------------------------------------------------------- Attention to detail. Re-read your own work as if you were the diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index 5f0472c18bcb..0c13cac903de 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -3743,16 +3743,7 @@ static int __init idt77252_init(void) struct sk_buff *skb; printk("%s: at %p\n", __func__, idt77252_init); - - if (sizeof(skb->cb) < sizeof(struct atm_skb_data) + - sizeof(struct idt77252_skb_prv)) { - printk(KERN_ERR "%s: skb->cb is too small (%lu < %lu)\n", - __func__, (unsigned long) sizeof(skb->cb), - (unsigned long) sizeof(struct atm_skb_data) + - sizeof(struct idt77252_skb_prv)); - return -EIO; - } - + BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct idt77252_skb_prv) + sizeof(struct atm_skb_data)); return pci_register_driver(&idt77252_driver); } diff --git a/drivers/atm/idt77252.h b/drivers/atm/idt77252.h index 9339197d701c..b059d31364dd 100644 --- a/drivers/atm/idt77252.h +++ b/drivers/atm/idt77252.h @@ -789,7 +789,7 @@ struct idt77252_skb_prv { struct scqe tbd; /* Transmit Buffer Descriptor */ dma_addr_t paddr; /* DMA handle */ u32 pool; /* sb_pool handle */ -}; +} __packed; #define IDT77252_PRV_TBD(skb) \ (((struct idt77252_skb_prv *)(ATM_SKB(skb)+1))->tbd) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h index b40d4377cc71..b2cd3bdba9f8 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h @@ -1279,10 +1279,18 @@ #define MDIO_PMA_10GBR_FECCTRL 0x00ab #endif +#ifndef MDIO_PMA_RX_CTRL1 +#define MDIO_PMA_RX_CTRL1 0x8051 +#endif + #ifndef MDIO_PCS_DIG_CTRL #define MDIO_PCS_DIG_CTRL 0x8000 #endif +#ifndef MDIO_PCS_DIGITAL_STAT +#define MDIO_PCS_DIGITAL_STAT 0x8010 +#endif + #ifndef MDIO_AN_XNP #define MDIO_AN_XNP 0x0016 #endif @@ -1358,6 +1366,8 @@ #define XGBE_KR_TRAINING_ENABLE BIT(1) #define XGBE_PCS_CL37_BP BIT(12) +#define XGBE_PCS_PSEQ_STATE_MASK 0x1c +#define XGBE_PCS_PSEQ_STATE_POWER_GOOD 0x10 #define XGBE_AN_CL37_INT_CMPLT BIT(0) #define XGBE_AN_CL37_INT_MASK 0x01 @@ -1375,6 +1385,10 @@ #define XGBE_PMA_CDR_TRACK_EN_OFF 0x00 #define XGBE_PMA_CDR_TRACK_EN_ON 0x01 +#define XGBE_PMA_RX_RST_0_MASK BIT(4) +#define XGBE_PMA_RX_RST_0_RESET_ON 0x10 +#define XGBE_PMA_RX_RST_0_RESET_OFF 0x00 + /* Bit setting and getting macros * The get macro will extract the current bit field value from within * the variable diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 99b6d5a9f1d9..4f714f874c4f 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -1368,6 +1368,7 @@ static void xgbe_stop(struct xgbe_prv_data *pdata) return; netif_tx_stop_all_queues(netdev); + netif_carrier_off(pdata->netdev); xgbe_stop_timers(pdata); flush_workqueue(pdata->dev_workqueue); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c index 93ef5a30cb8d..4e97b4869522 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c @@ -1345,7 +1345,7 @@ static void xgbe_phy_status(struct xgbe_prv_data *pdata) &an_restart); if (an_restart) { xgbe_phy_config_aneg(pdata); - return; + goto adjust_link; } if (pdata->phy.link) { @@ -1396,7 +1396,6 @@ static void xgbe_phy_stop(struct xgbe_prv_data *pdata) pdata->phy_if.phy_impl.stop(pdata); pdata->phy.link = 0; - netif_carrier_off(pdata->netdev); xgbe_phy_adjust_link(pdata); } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c index 859ded0c06b0..18e48b3bc402 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c @@ -922,6 +922,9 @@ static bool xgbe_phy_belfuse_phy_quirks(struct xgbe_prv_data *pdata) if ((phy_id & 0xfffffff0) != 0x03625d10) return false; + /* Reset PHY - wait for self-clearing reset bit to clear */ + genphy_soft_reset(phy_data->phydev); + /* Disable RGMII mode */ phy_write(phy_data->phydev, 0x18, 0x7007); reg = phy_read(phy_data->phydev, 0x18); @@ -1953,6 +1956,27 @@ static void xgbe_phy_set_redrv_mode(struct xgbe_prv_data *pdata) xgbe_phy_put_comm_ownership(pdata); } +static void xgbe_phy_rx_reset(struct xgbe_prv_data *pdata) +{ + int reg; + + reg = XMDIO_READ_BITS(pdata, MDIO_MMD_PCS, MDIO_PCS_DIGITAL_STAT, + XGBE_PCS_PSEQ_STATE_MASK); + if (reg == XGBE_PCS_PSEQ_STATE_POWER_GOOD) { + /* Mailbox command timed out, reset of RX block is required. + * This can be done by asseting the reset bit and wait for + * its compeletion. + */ + XMDIO_WRITE_BITS(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_RX_CTRL1, + XGBE_PMA_RX_RST_0_MASK, XGBE_PMA_RX_RST_0_RESET_ON); + ndelay(20); + XMDIO_WRITE_BITS(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_RX_CTRL1, + XGBE_PMA_RX_RST_0_MASK, XGBE_PMA_RX_RST_0_RESET_OFF); + usleep_range(40, 50); + netif_err(pdata, link, pdata->netdev, "firmware mailbox reset performed\n"); + } +} + static void xgbe_phy_perform_ratechange(struct xgbe_prv_data *pdata, unsigned int cmd, unsigned int sub_cmd) { @@ -1960,9 +1984,11 @@ static void xgbe_phy_perform_ratechange(struct xgbe_prv_data *pdata, unsigned int wait; /* Log if a previous command did not complete */ - if (XP_IOREAD_BITS(pdata, XP_DRIVER_INT_RO, STATUS)) + if (XP_IOREAD_BITS(pdata, XP_DRIVER_INT_RO, STATUS)) { netif_dbg(pdata, link, pdata->netdev, "firmware mailbox not ready for command\n"); + xgbe_phy_rx_reset(pdata); + } /* Construct the command */ XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, cmd); @@ -1984,6 +2010,9 @@ static void xgbe_phy_perform_ratechange(struct xgbe_prv_data *pdata, netif_dbg(pdata, link, pdata->netdev, "firmware mailbox command did not complete\n"); + + /* Reset on error */ + xgbe_phy_rx_reset(pdata); } static void xgbe_phy_rrc(struct xgbe_prv_data *pdata) @@ -2584,6 +2613,14 @@ static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart) if (reg & MDIO_STAT1_LSTATUS) return 1; + if (pdata->phy.autoneg == AUTONEG_ENABLE && + phy_data->port_mode == XGBE_PORT_MODE_BACKPLANE) { + if (!test_bit(XGBE_LINK_INIT, &pdata->dev_state)) { + netif_carrier_off(pdata->netdev); + *an_restart = 1; + } + } + /* No link, attempt a receiver reset cycle */ if (phy_data->rrc_count++ > XGBE_RRC_FREQUENCY) { phy_data->rrc_count = 0; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c index 8f70a3909929..4af0cd9530de 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c @@ -71,8 +71,10 @@ static int aq_ndev_open(struct net_device *ndev) goto err_exit; err = aq_nic_start(aq_nic); - if (err < 0) + if (err < 0) { + aq_nic_stop(aq_nic); goto err_exit; + } err_exit: if (err < 0) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index d0f3f68faa91..a680fd9c68ea 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -8984,9 +8984,10 @@ void bnxt_tx_disable(struct bnxt *bp) txr->dev_state = BNXT_DEV_STATE_CLOSING; } } + /* Drop carrier first to prevent TX timeout */ + netif_carrier_off(bp->dev); /* Stop all TX queues */ netif_tx_disable(bp->dev); - netif_carrier_off(bp->dev); } void bnxt_tx_enable(struct bnxt *bp) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 90a31b4a3020..64381be935a8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -471,8 +471,8 @@ static int bnxt_dl_info_get(struct devlink *dl, struct devlink_info_req *req, if (BNXT_PF(bp) && !bnxt_hwrm_get_nvm_cfg_ver(bp, &nvm_cfg_ver)) { u32 ver = nvm_cfg_ver.vu32; - sprintf(buf, "%X.%X.%X", (ver >> 16) & 0xF, (ver >> 8) & 0xF, - ver & 0xF); + sprintf(buf, "%d.%d.%d", (ver >> 16) & 0xf, (ver >> 8) & 0xf, + ver & 0xf); rc = bnxt_dl_info_put(bp, req, BNXT_VERSION_STORED, DEVLINK_INFO_VERSION_GENERIC_FW_PSID, buf); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index 1b49f2fa9b18..34546f5312ee 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -46,6 +46,9 @@ #define MAX_ULD_QSETS 16 #define MAX_ULD_NPORTS 4 +/* ulp_mem_io + ulptx_idata + payload + padding */ +#define MAX_IMM_ULPTX_WR_LEN (32 + 8 + 256 + 8) + /* CPL message priority levels */ enum { CPL_PRIORITY_DATA = 0, /* data messages */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 550cc065649f..256fae15e032 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -2846,17 +2846,22 @@ int t4_mgmt_tx(struct adapter *adap, struct sk_buff *skb) * @skb: the packet * * Returns true if a packet can be sent as an offload WR with immediate - * data. We currently use the same limit as for Ethernet packets. + * data. + * FW_OFLD_TX_DATA_WR limits the payload to 255 bytes due to 8-bit field. + * However, FW_ULPTX_WR commands have a 256 byte immediate only + * payload limit. */ static inline int is_ofld_imm(const struct sk_buff *skb) { struct work_request_hdr *req = (struct work_request_hdr *)skb->data; unsigned long opcode = FW_WR_OP_G(ntohl(req->wr_hi)); - if (opcode == FW_CRYPTO_LOOKASIDE_WR) + if (unlikely(opcode == FW_ULPTX_WR)) + return skb->len <= MAX_IMM_ULPTX_WR_LEN; + else if (opcode == FW_CRYPTO_LOOKASIDE_WR) return skb->len <= SGE_MAX_WR_LEN; else - return skb->len <= MAX_IMM_TX_PKT_LEN; + return skb->len <= MAX_IMM_OFLD_TX_DATA_WR_LEN; } /** diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h index 47ba81e42f5d..b1161bdeda4d 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h @@ -50,9 +50,6 @@ #define MIN_RCV_WND (24 * 1024U) #define LOOPBACK(x) (((x) & htonl(0xff000000)) == htonl(0x7f000000)) -/* ulp_mem_io + ulptx_idata + payload + padding */ -#define MAX_IMM_ULPTX_WR_LEN (32 + 8 + 256 + 8) - /* for TX: a skb must have a headroom of at least TX_HEADER_LEN bytes */ #define TX_HEADER_LEN \ (sizeof(struct fw_ofld_tx_data_wr) + sizeof(struct sge_opaque_hdr)) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 19f74d4cbb4e..492943bb9c48 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -395,10 +395,20 @@ static u32 dpaa2_eth_run_xdp(struct dpaa2_eth_priv *priv, xdp.frame_sz = DPAA2_ETH_RX_BUF_RAW_SIZE; err = xdp_do_redirect(priv->net_dev, &xdp, xdp_prog); - if (unlikely(err)) + if (unlikely(err)) { + addr = dma_map_page(priv->net_dev->dev.parent, + virt_to_page(vaddr), 0, + priv->rx_buf_size, DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(priv->net_dev->dev.parent, addr))) { + free_pages((unsigned long)vaddr, 0); + } else { + ch->buf_count++; + dpaa2_eth_xdp_release_buf(priv, ch, addr); + } ch->stats.xdp_drop++; - else + } else { ch->stats.xdp_redirect++; + } break; } diff --git a/drivers/net/ethernet/freescale/enetc/Kconfig b/drivers/net/ethernet/freescale/enetc/Kconfig index d99ea0f4e4a6..ab92382c399a 100644 --- a/drivers/net/ethernet/freescale/enetc/Kconfig +++ b/drivers/net/ethernet/freescale/enetc/Kconfig @@ -27,7 +27,7 @@ config FSL_ENETC_VF config FSL_ENETC_MDIO tristate "ENETC MDIO driver" - depends on PCI + depends on PCI && MDIO_DEVRES && MDIO_BUS help This driver supports NXP ENETC Central MDIO controller as a PCIe physical function (PF) device. diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index 3eb5f1375bd4..515c5b29d7aa 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -1157,14 +1157,15 @@ static void enetc_pf_remove(struct pci_dev *pdev) struct enetc_ndev_priv *priv; priv = netdev_priv(si->ndev); - enetc_phylink_destroy(priv); - enetc_mdiobus_destroy(pf); if (pf->num_vfs) enetc_sriov_configure(pdev, 0); unregister_netdev(si->ndev); + enetc_phylink_destroy(priv); + enetc_mdiobus_destroy(pf); + enetc_free_msix(priv); enetc_free_si_resources(priv); diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 927d5f36d308..5cf7e5a367f0 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -247,8 +247,13 @@ static void free_long_term_buff(struct ibmvnic_adapter *adapter, if (!ltb->buff) return; + /* VIOS automatically unmaps the long term buffer at remote + * end for the following resets: + * FAILOVER, MOBILITY, TIMEOUT. + */ if (adapter->reset_reason != VNIC_RESET_FAILOVER && - adapter->reset_reason != VNIC_RESET_MOBILITY) + adapter->reset_reason != VNIC_RESET_MOBILITY && + adapter->reset_reason != VNIC_RESET_TIMEOUT) send_request_unmap(adapter, ltb->map_id); dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr); } @@ -1322,10 +1327,8 @@ static int __ibmvnic_close(struct net_device *netdev) adapter->state = VNIC_CLOSING; rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN); - if (rc) - return rc; adapter->state = VNIC_CLOSED; - return 0; + return rc; } static int ibmvnic_close(struct net_device *netdev) @@ -1670,6 +1673,9 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) skb_copy_from_linear_data(skb, dst, skb->len); } + /* post changes to long_term_buff *dst before VIOS accessing it */ + dma_wmb(); + tx_pool->consumer_index = (tx_pool->consumer_index + 1) % tx_pool->num_buffers; @@ -2288,7 +2294,8 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter, unsigned long flags; int ret; - /* If failover is pending don't schedule any other reset. + /* + * If failover is pending don't schedule any other reset. * Instead let the failover complete. If there is already a * a failover reset scheduled, we will detect and drop the * duplicate reset when walking the ->rwi_list below. @@ -2308,14 +2315,11 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter, goto err; } - spin_lock_irqsave(&adapter->rwi_lock, flags); - list_for_each(entry, &adapter->rwi_list) { tmp = list_entry(entry, struct ibmvnic_rwi, list); if (tmp->reset_reason == reason) { netdev_dbg(netdev, "Skipping matching reset, reason=%d\n", reason); - spin_unlock_irqrestore(&adapter->rwi_lock, flags); ret = EBUSY; goto err; } @@ -2323,8 +2327,6 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter, rwi = kzalloc(sizeof(*rwi), GFP_ATOMIC); if (!rwi) { - spin_unlock_irqrestore(&adapter->rwi_lock, flags); - ibmvnic_close(netdev); ret = ENOMEM; goto err; } @@ -2337,12 +2339,17 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter, } rwi->reset_reason = reason; list_add_tail(&rwi->list, &adapter->rwi_list); - spin_unlock_irqrestore(&adapter->rwi_lock, flags); netdev_dbg(adapter->netdev, "Scheduling reset (reason %d)\n", reason); schedule_work(&adapter->ibmvnic_reset); - return 0; + ret = 0; err: + /* ibmvnic_close() below can block, so drop the lock first */ + spin_unlock_irqrestore(&adapter->rwi_lock, flags); + + if (ret == ENOMEM) + ibmvnic_close(netdev); + return -ret; } @@ -2433,6 +2440,8 @@ restart_poll: offset = be16_to_cpu(next->rx_comp.off_frame_data); flags = next->rx_comp.flags; skb = rx_buff->skb; + /* load long_term_buff before copying to skb */ + dma_rmb(); skb_copy_to_linear_data(skb, rx_buff->data + offset, length); @@ -5346,7 +5355,18 @@ static int ibmvnic_remove(struct vio_dev *dev) unsigned long flags; spin_lock_irqsave(&adapter->state_lock, flags); + + /* If ibmvnic_reset() is scheduling a reset, wait for it to + * finish. Then, set the state to REMOVING to prevent it from + * scheduling any more work and to have reset functions ignore + * any resets that have already been scheduled. Drop the lock + * after setting state, so __ibmvnic_reset() which is called + * from the flush_work() below, can make progress. + */ + spin_lock_irqsave(&adapter->rwi_lock, flags); adapter->state = VNIC_REMOVING; + spin_unlock_irqrestore(&adapter->rwi_lock, flags); + spin_unlock_irqrestore(&adapter->state_lock, flags); flush_work(&adapter->ibmvnic_reset); diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index 270d1cac86a4..e4dcc63b9710 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -31,7 +31,7 @@ #define IBMVNIC_BUFFS_PER_POOL 100 #define IBMVNIC_MAX_QUEUES 16 #define IBMVNIC_MAX_QUEUE_SZ 4096 -#define IBMVNIC_MAX_IND_DESCS 128 +#define IBMVNIC_MAX_IND_DESCS 16 #define IBMVNIC_IND_ARR_SZ (IBMVNIC_MAX_IND_DESCS * 32) #define IBMVNIC_TSO_BUF_SZ 65536 @@ -1084,6 +1084,8 @@ struct ibmvnic_adapter { /* Used for serializatin of state field */ spinlock_t state_lock; enum ibmvnic_reset_reason reset_reason; + /* when taking both state and rwi locks, take state lock first */ + spinlock_t rwi_lock; struct list_head rwi_list; /* Used for serialization of rwi_list */ spinlock_t rwi_lock; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index aa76a6e0dae8..d7d8a68ef23d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -141,6 +141,11 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change, return -EOPNOTSUPP; } + if (mlx5_lag_is_active(dev)) { + NL_SET_ERR_MSG_MOD(extack, "reload is unsupported in Lag mode\n"); + return -EOPNOTSUPP; + } + switch (action) { case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: mlx5_unload_one(dev, false); @@ -428,6 +433,10 @@ static int mlx5_devlink_enable_roce_validate(struct devlink *devlink, u32 id, NL_SET_ERR_MSG_MOD(extack, "Device doesn't support RoCE"); return -EOPNOTSUPP; } + if (mlx5_core_is_mp_slave(dev) || mlx5_lag_is_active(dev)) { + NL_SET_ERR_MSG_MOD(extack, "Multi port slave/Lag device can't configure RoCE"); + return -EOPNOTSUPP; + } return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 0b503ebe59ec..f3f6eb081948 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -12,6 +12,7 @@ #include <net/flow_offload.h> #include <net/netfilter/nf_flow_table.h> #include <linux/workqueue.h> +#include <linux/refcount.h> #include <linux/xarray.h> #include "lib/fs_chains.h" @@ -52,11 +53,11 @@ struct mlx5_tc_ct_priv { struct mlx5_flow_table *ct_nat; struct mlx5_flow_table *post_ct; struct mutex control_lock; /* guards parallel adds/dels */ - struct mutex shared_counter_lock; struct mapping_ctx *zone_mapping; struct mapping_ctx *labels_mapping; enum mlx5_flow_namespace_type ns_type; struct mlx5_fs_chains *chains; + spinlock_t ht_lock; /* protects ft entries */ }; struct mlx5_ct_flow { @@ -125,6 +126,10 @@ struct mlx5_ct_counter { bool is_shared; }; +enum { + MLX5_CT_ENTRY_FLAG_VALID, +}; + struct mlx5_ct_entry { struct rhash_head node; struct rhash_head tuple_node; @@ -135,6 +140,12 @@ struct mlx5_ct_entry { struct mlx5_ct_tuple tuple; struct mlx5_ct_tuple tuple_nat; struct mlx5_ct_zone_rule zone_rules[2]; + + struct mlx5_tc_ct_priv *ct_priv; + struct work_struct work; + + refcount_t refcnt; + unsigned long flags; }; static const struct rhashtable_params cts_ht_params = { @@ -742,6 +753,87 @@ err_attr: return err; } +static bool +mlx5_tc_ct_entry_valid(struct mlx5_ct_entry *entry) +{ + return test_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags); +} + +static struct mlx5_ct_entry * +mlx5_tc_ct_entry_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_tuple *tuple) +{ + struct mlx5_ct_entry *entry; + + entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, tuple, + tuples_ht_params); + if (entry && mlx5_tc_ct_entry_valid(entry) && + refcount_inc_not_zero(&entry->refcnt)) { + return entry; + } else if (!entry) { + entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht, + tuple, tuples_nat_ht_params); + if (entry && mlx5_tc_ct_entry_valid(entry) && + refcount_inc_not_zero(&entry->refcnt)) + return entry; + } + + return entry ? ERR_PTR(-EINVAL) : NULL; +} + +static void mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry *entry) +{ + struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv; + + rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht, + &entry->tuple_nat_node, + tuples_nat_ht_params); + rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node, + tuples_ht_params); +} + +static void mlx5_tc_ct_entry_del(struct mlx5_ct_entry *entry) +{ + struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv; + + mlx5_tc_ct_entry_del_rules(ct_priv, entry); + + spin_lock_bh(&ct_priv->ht_lock); + mlx5_tc_ct_entry_remove_from_tuples(entry); + spin_unlock_bh(&ct_priv->ht_lock); + + mlx5_tc_ct_counter_put(ct_priv, entry); + kfree(entry); +} + +static void +mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry) +{ + if (!refcount_dec_and_test(&entry->refcnt)) + return; + + mlx5_tc_ct_entry_del(entry); +} + +static void mlx5_tc_ct_entry_del_work(struct work_struct *work) +{ + struct mlx5_ct_entry *entry = container_of(work, struct mlx5_ct_entry, work); + + mlx5_tc_ct_entry_del(entry); +} + +static void +__mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry) +{ + struct mlx5e_priv *priv; + + if (!refcount_dec_and_test(&entry->refcnt)) + return; + + priv = netdev_priv(entry->ct_priv->netdev); + INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work); + queue_work(priv->wq, &entry->work); +} + static struct mlx5_ct_counter * mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv) { @@ -793,16 +885,26 @@ mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv, } /* Use the same counter as the reverse direction */ - mutex_lock(&ct_priv->shared_counter_lock); - rev_entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, &rev_tuple, - tuples_ht_params); - if (rev_entry) { - if (refcount_inc_not_zero(&rev_entry->counter->refcount)) { - mutex_unlock(&ct_priv->shared_counter_lock); - return rev_entry->counter; - } + spin_lock_bh(&ct_priv->ht_lock); + rev_entry = mlx5_tc_ct_entry_get(ct_priv, &rev_tuple); + + if (IS_ERR(rev_entry)) { + spin_unlock_bh(&ct_priv->ht_lock); + goto create_counter; } - mutex_unlock(&ct_priv->shared_counter_lock); + + if (rev_entry && refcount_inc_not_zero(&rev_entry->counter->refcount)) { + ct_dbg("Using shared counter entry=0x%p rev=0x%p\n", entry, rev_entry); + shared_counter = rev_entry->counter; + spin_unlock_bh(&ct_priv->ht_lock); + + mlx5_tc_ct_entry_put(rev_entry); + return shared_counter; + } + + spin_unlock_bh(&ct_priv->ht_lock); + +create_counter: shared_counter = mlx5_tc_ct_counter_create(ct_priv); if (IS_ERR(shared_counter)) @@ -865,10 +967,14 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, if (!meta_action) return -EOPNOTSUPP; - entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, - cts_ht_params); - if (entry) - return 0; + spin_lock_bh(&ct_priv->ht_lock); + entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params); + if (entry && refcount_inc_not_zero(&entry->refcnt)) { + spin_unlock_bh(&ct_priv->ht_lock); + mlx5_tc_ct_entry_put(entry); + return -EEXIST; + } + spin_unlock_bh(&ct_priv->ht_lock); entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (!entry) @@ -877,6 +983,8 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, entry->tuple.zone = ft->zone; entry->cookie = flow->cookie; entry->restore_cookie = meta_action->ct_metadata.cookie; + refcount_set(&entry->refcnt, 2); + entry->ct_priv = ct_priv; err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule); if (err) @@ -887,35 +995,40 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, if (err) goto err_set; - err = rhashtable_insert_fast(&ct_priv->ct_tuples_ht, - &entry->tuple_node, - tuples_ht_params); + spin_lock_bh(&ct_priv->ht_lock); + + err = rhashtable_lookup_insert_fast(&ft->ct_entries_ht, &entry->node, + cts_ht_params); + if (err) + goto err_entries; + + err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_ht, + &entry->tuple_node, + tuples_ht_params); if (err) goto err_tuple; if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) { - err = rhashtable_insert_fast(&ct_priv->ct_tuples_nat_ht, - &entry->tuple_nat_node, - tuples_nat_ht_params); + err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_nat_ht, + &entry->tuple_nat_node, + tuples_nat_ht_params); if (err) goto err_tuple_nat; } + spin_unlock_bh(&ct_priv->ht_lock); err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry, ft->zone_restore_id); if (err) goto err_rules; - err = rhashtable_insert_fast(&ft->ct_entries_ht, &entry->node, - cts_ht_params); - if (err) - goto err_insert; + set_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags); + mlx5_tc_ct_entry_put(entry); /* this function reference */ return 0; -err_insert: - mlx5_tc_ct_entry_del_rules(ct_priv, entry); err_rules: + spin_lock_bh(&ct_priv->ht_lock); if (mlx5_tc_ct_entry_has_nat(entry)) rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht, &entry->tuple_nat_node, tuples_nat_ht_params); @@ -924,47 +1037,43 @@ err_tuple_nat: &entry->tuple_node, tuples_ht_params); err_tuple: + rhashtable_remove_fast(&ft->ct_entries_ht, + &entry->node, + cts_ht_params); +err_entries: + spin_unlock_bh(&ct_priv->ht_lock); err_set: kfree(entry); - netdev_warn(ct_priv->netdev, - "Failed to offload ct entry, err: %d\n", err); + if (err != -EEXIST) + netdev_warn(ct_priv->netdev, "Failed to offload ct entry, err: %d\n", err); return err; } -static void -mlx5_tc_ct_del_ft_entry(struct mlx5_tc_ct_priv *ct_priv, - struct mlx5_ct_entry *entry) -{ - mlx5_tc_ct_entry_del_rules(ct_priv, entry); - mutex_lock(&ct_priv->shared_counter_lock); - if (mlx5_tc_ct_entry_has_nat(entry)) - rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht, - &entry->tuple_nat_node, - tuples_nat_ht_params); - rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node, - tuples_ht_params); - mutex_unlock(&ct_priv->shared_counter_lock); - mlx5_tc_ct_counter_put(ct_priv, entry); - -} - static int mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft, struct flow_cls_offload *flow) { + struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; unsigned long cookie = flow->cookie; struct mlx5_ct_entry *entry; - entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, - cts_ht_params); - if (!entry) + spin_lock_bh(&ct_priv->ht_lock); + entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params); + if (!entry) { + spin_unlock_bh(&ct_priv->ht_lock); return -ENOENT; + } - mlx5_tc_ct_del_ft_entry(ft->ct_priv, entry); - WARN_ON(rhashtable_remove_fast(&ft->ct_entries_ht, - &entry->node, - cts_ht_params)); - kfree(entry); + if (!mlx5_tc_ct_entry_valid(entry)) { + spin_unlock_bh(&ct_priv->ht_lock); + return -EINVAL; + } + + rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params); + mlx5_tc_ct_entry_remove_from_tuples(entry); + spin_unlock_bh(&ct_priv->ht_lock); + + mlx5_tc_ct_entry_put(entry); return 0; } @@ -973,19 +1082,30 @@ static int mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft, struct flow_cls_offload *f) { + struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; unsigned long cookie = f->cookie; struct mlx5_ct_entry *entry; u64 lastuse, packets, bytes; - entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, - cts_ht_params); - if (!entry) + spin_lock_bh(&ct_priv->ht_lock); + entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params); + if (!entry) { + spin_unlock_bh(&ct_priv->ht_lock); return -ENOENT; + } + + if (!mlx5_tc_ct_entry_valid(entry) || !refcount_inc_not_zero(&entry->refcnt)) { + spin_unlock_bh(&ct_priv->ht_lock); + return -EINVAL; + } + + spin_unlock_bh(&ct_priv->ht_lock); mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse); flow_stats_update(&f->stats, bytes, packets, 0, lastuse, FLOW_ACTION_HW_STATS_DELAYED); + mlx5_tc_ct_entry_put(entry); return 0; } @@ -1481,11 +1601,9 @@ err_mapping: static void mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg) { - struct mlx5_tc_ct_priv *ct_priv = arg; struct mlx5_ct_entry *entry = ptr; - mlx5_tc_ct_del_ft_entry(ct_priv, entry); - kfree(entry); + mlx5_tc_ct_entry_put(entry); } static void @@ -1962,6 +2080,7 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, goto err_mapping_labels; } + spin_lock_init(&ct_priv->ht_lock); ct_priv->ns_type = ns_type; ct_priv->chains = chains; ct_priv->netdev = priv->netdev; @@ -1996,7 +2115,6 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, idr_init(&ct_priv->fte_ids); mutex_init(&ct_priv->control_lock); - mutex_init(&ct_priv->shared_counter_lock); rhashtable_init(&ct_priv->zone_ht, &zone_params); rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params); rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params); @@ -2039,7 +2157,6 @@ mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv) rhashtable_destroy(&ct_priv->ct_tuples_nat_ht); rhashtable_destroy(&ct_priv->zone_ht); mutex_destroy(&ct_priv->control_lock); - mutex_destroy(&ct_priv->shared_counter_lock); idr_destroy(&ct_priv->fte_ids); kfree(ct_priv); } @@ -2061,14 +2178,22 @@ mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv, if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone)) return false; - entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, &tuple, - tuples_ht_params); - if (!entry) - entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht, - &tuple, tuples_nat_ht_params); - if (!entry) + spin_lock(&ct_priv->ht_lock); + + entry = mlx5_tc_ct_entry_get(ct_priv, &tuple); + if (!entry) { + spin_unlock(&ct_priv->ht_lock); + return false; + } + + if (IS_ERR(entry)) { + spin_unlock(&ct_priv->ht_lock); return false; + } + spin_unlock(&ct_priv->ht_lock); tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie); + __mlx5_tc_ct_entry_put(entry); + return true; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h index d487e5e37162..8d991c3b7a50 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -83,7 +83,7 @@ static inline void mlx5e_xdp_tx_disable(struct mlx5e_priv *priv) clear_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); /* Let other device's napi(s) and XSK wakeups see our new state. */ - synchronize_rcu(); + synchronize_net(); } static inline bool mlx5e_xdp_tx_is_enabled(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index d87c345878d3..f4bce1365639 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -111,7 +111,7 @@ err_free_cparam: void mlx5e_close_xsk(struct mlx5e_channel *c) { clear_bit(MLX5E_CHANNEL_STATE_XSK, c->state); - synchronize_rcu(); /* Sync with the XSK wakeup and with NAPI. */ + synchronize_net(); /* Sync with the XSK wakeup and with NAPI. */ mlx5e_close_rq(&c->xskrq); mlx5e_close_cq(&c->xskrq.cq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h index 959bb6cd7203..cc0efac7b812 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h @@ -173,7 +173,7 @@ static inline bool mlx5e_accel_tx_eseg(struct mlx5e_priv *priv, #endif #if IS_ENABLED(CONFIG_GENEVE) - if (skb->encapsulation) + if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL) mlx5e_tx_tunnel_accel(skb, eseg, ihs); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index 6a1d82503ef8..d06532d0baa4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -57,6 +57,20 @@ struct mlx5e_ktls_offload_context_rx { struct mlx5e_ktls_rx_resync_ctx resync; }; +static bool mlx5e_ktls_priv_rx_put(struct mlx5e_ktls_offload_context_rx *priv_rx) +{ + if (!refcount_dec_and_test(&priv_rx->resync.refcnt)) + return false; + + kfree(priv_rx); + return true; +} + +static void mlx5e_ktls_priv_rx_get(struct mlx5e_ktls_offload_context_rx *priv_rx) +{ + refcount_inc(&priv_rx->resync.refcnt); +} + static int mlx5e_ktls_create_tir(struct mlx5_core_dev *mdev, u32 *tirn, u32 rqtn) { int err, inlen; @@ -326,7 +340,7 @@ static void resync_handle_work(struct work_struct *work) priv_rx = container_of(resync, struct mlx5e_ktls_offload_context_rx, resync); if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) { - refcount_dec(&resync->refcnt); + mlx5e_ktls_priv_rx_put(priv_rx); return; } @@ -334,7 +348,7 @@ static void resync_handle_work(struct work_struct *work) sq = &c->async_icosq; if (resync_post_get_progress_params(sq, priv_rx)) - refcount_dec(&resync->refcnt); + mlx5e_ktls_priv_rx_put(priv_rx); } static void resync_init(struct mlx5e_ktls_rx_resync_ctx *resync, @@ -377,7 +391,11 @@ unlock: return err; } -/* Function is called with elevated refcount, it decreases it. */ +/* Function can be called with the refcount being either elevated or not. + * It decreases the refcount and may free the kTLS priv context. + * Refcount is not elevated only if tls_dev_del has been called, but GET_PSV was + * already in flight. + */ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, struct mlx5e_icosq *sq) { @@ -410,7 +428,7 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, tls_offload_rx_resync_async_request_end(priv_rx->sk, cpu_to_be32(hw_seq)); priv_rx->stats->tls_resync_req_end++; out: - refcount_dec(&resync->refcnt); + mlx5e_ktls_priv_rx_put(priv_rx); dma_unmap_single(dev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE); kfree(buf); } @@ -431,9 +449,9 @@ static bool resync_queue_get_psv(struct sock *sk) return false; resync = &priv_rx->resync; - refcount_inc(&resync->refcnt); + mlx5e_ktls_priv_rx_get(priv_rx); if (unlikely(!queue_work(resync->priv->tls->rx_wq, &resync->work))) - refcount_dec(&resync->refcnt); + mlx5e_ktls_priv_rx_put(priv_rx); return true; } @@ -625,31 +643,6 @@ err_create_key: return err; } -/* Elevated refcount on the resync object means there are - * outstanding operations (uncompleted GET_PSV WQEs) that - * will read the resync / priv_rx objects once completed. - * Wait for them to avoid use-after-free. - */ -static void wait_for_resync(struct net_device *netdev, - struct mlx5e_ktls_rx_resync_ctx *resync) -{ -#define MLX5E_KTLS_RX_RESYNC_TIMEOUT 20000 /* msecs */ - unsigned long exp_time = jiffies + msecs_to_jiffies(MLX5E_KTLS_RX_RESYNC_TIMEOUT); - unsigned int refcnt; - - do { - refcnt = refcount_read(&resync->refcnt); - if (refcnt == 1) - return; - - msleep(20); - } while (time_before(jiffies, exp_time)); - - netdev_warn(netdev, - "Failed waiting for kTLS RX resync refcnt to be released (%u).\n", - refcnt); -} - void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx) { struct mlx5e_ktls_offload_context_rx *priv_rx; @@ -663,7 +656,7 @@ void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx) priv_rx = mlx5e_get_ktls_rx_priv_ctx(tls_ctx); set_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags); mlx5e_set_ktls_rx_priv_ctx(tls_ctx, NULL); - synchronize_rcu(); /* Sync with NAPI */ + synchronize_net(); /* Sync with NAPI */ if (!cancel_work_sync(&priv_rx->rule.work)) /* completion is needed, as the priv_rx in the add flow * is maintained on the wqe info (wi), not on the socket. @@ -671,8 +664,7 @@ void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx) wait_for_completion(&priv_rx->add_ctx); resync = &priv_rx->resync; if (cancel_work_sync(&resync->work)) - refcount_dec(&resync->refcnt); - wait_for_resync(netdev, resync); + mlx5e_ktls_priv_rx_put(priv_rx); priv_rx->stats->tls_del++; if (priv_rx->rule.rule) @@ -680,5 +672,9 @@ void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx) mlx5_core_destroy_tir(mdev, priv_rx->tirn); mlx5_ktls_destroy_key(mdev, priv_rx->key_id); - kfree(priv_rx); + /* priv_rx should normally be freed here, but if there is an outstanding + * GET_PSV, deallocation will be delayed until the CQE for GET_PSV is + * processed. + */ + mlx5e_ktls_priv_rx_put(priv_rx); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 5e9474dba4e5..abdf721bb264 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -536,7 +536,7 @@ static int mlx5e_get_coalesce(struct net_device *netdev, #define MLX5E_MAX_COAL_FRAMES MLX5_MAX_CQ_COUNT static void -mlx5e_set_priv_channels_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal) +mlx5e_set_priv_channels_tx_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal) { struct mlx5_core_dev *mdev = priv->mdev; int tc; @@ -551,6 +551,17 @@ mlx5e_set_priv_channels_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesc coal->tx_coalesce_usecs, coal->tx_max_coalesced_frames); } + } +} + +static void +mlx5e_set_priv_channels_rx_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int i; + + for (i = 0; i < priv->channels.num; ++i) { + struct mlx5e_channel *c = priv->channels.c[i]; mlx5_core_modify_cq_moderation(mdev, &c->rq.cq.mcq, coal->rx_coalesce_usecs, @@ -597,21 +608,9 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, tx_moder->pkts = coal->tx_max_coalesced_frames; new_channels.params.tx_dim_enabled = !!coal->use_adaptive_tx_coalesce; - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { - priv->channels.params = new_channels.params; - goto out; - } - /* we are opened */ - reset_rx = !!coal->use_adaptive_rx_coalesce != priv->channels.params.rx_dim_enabled; reset_tx = !!coal->use_adaptive_tx_coalesce != priv->channels.params.tx_dim_enabled; - if (!reset_rx && !reset_tx) { - mlx5e_set_priv_channels_coalesce(priv, coal); - priv->channels.params = new_channels.params; - goto out; - } - if (reset_rx) { u8 mode = MLX5E_GET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_CQE_BASED_MODER); @@ -625,6 +624,20 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, mlx5e_reset_tx_moderation(&new_channels.params, mode); } + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + goto out; + } + + if (!reset_rx && !reset_tx) { + if (!coal->use_adaptive_rx_coalesce) + mlx5e_set_priv_channels_rx_coalesce(priv, coal); + if (!coal->use_adaptive_tx_coalesce) + mlx5e_set_priv_channels_tx_coalesce(priv, coal); + priv->channels.params = new_channels.params; + goto out; + } + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); out: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index c8866c14b8a3..39acbc83682d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -67,6 +67,7 @@ #include "en/ptp.h" #include "qos.h" #include "en/trap.h" +#include "fpga/ipsec.h" bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) { @@ -108,7 +109,7 @@ bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, if (!mlx5e_check_fragmented_striding_rq_cap(mdev)) return false; - if (MLX5_IPSEC_DEV(mdev)) + if (mlx5_fpga_is_ipsec_device(mdev)) return false; if (params->xdp_prog) { @@ -947,7 +948,7 @@ void mlx5e_activate_rq(struct mlx5e_rq *rq) void mlx5e_deactivate_rq(struct mlx5e_rq *rq) { clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); - synchronize_rcu(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */ + synchronize_net(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */ } void mlx5e_close_rq(struct mlx5e_rq *rq) @@ -1401,7 +1402,7 @@ void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq) struct mlx5_wq_cyc *wq = &sq->wq; clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); - synchronize_rcu(); /* Sync with NAPI to prevent netif_tx_wake_queue. */ + synchronize_net(); /* Sync with NAPI to prevent netif_tx_wake_queue. */ mlx5e_tx_disable_queue(sq->txq); @@ -1476,7 +1477,7 @@ void mlx5e_activate_icosq(struct mlx5e_icosq *icosq) void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq) { clear_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state); - synchronize_rcu(); /* Sync with NAPI. */ + synchronize_net(); /* Sync with NAPI. */ } void mlx5e_close_icosq(struct mlx5e_icosq *sq) @@ -1555,7 +1556,7 @@ void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq) struct mlx5e_channel *c = sq->channel; clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); - synchronize_rcu(); /* Sync with NAPI. */ + synchronize_net(); /* Sync with NAPI. */ mlx5e_destroy_sq(c->mdev, sq->sqn); mlx5e_free_xdpsq_descs(sq); @@ -1879,12 +1880,12 @@ static int mlx5e_open_queues(struct mlx5e_channel *c, mlx5e_build_create_cq_param(&ccp, c); - err = mlx5e_open_cq(c->priv, icocq_moder, &cparam->icosq.cqp, &ccp, + err = mlx5e_open_cq(c->priv, icocq_moder, &cparam->async_icosq.cqp, &ccp, &c->async_icosq.cq); if (err) return err; - err = mlx5e_open_cq(c->priv, icocq_moder, &cparam->async_icosq.cqp, &ccp, + err = mlx5e_open_cq(c->priv, icocq_moder, &cparam->icosq.cqp, &ccp, &c->icosq.cq); if (err) goto err_close_async_icosq_cq; @@ -2122,7 +2123,12 @@ static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, u32 buf_size = 0; int i; +<<<<<<< HEAD if (MLX5_IPSEC_DEV(mdev)) +======= +#ifdef CONFIG_MLX5_EN_IPSEC + if (mlx5_fpga_is_ipsec_device(mdev)) +>>>>>>> 3af409ca278d4a8d50e91f9f7c4c33b175645cf3 byte_count += MLX5E_METADATA_ETHER_LEN; if (mlx5e_rx_is_linear_skb(params, xsk)) { @@ -4590,8 +4596,9 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog) return -EINVAL; } - if (MLX5_IPSEC_DEV(priv->mdev)) { - netdev_warn(netdev, "can't set XDP with IPSec offload\n"); + if (mlx5_fpga_is_ipsec_device(priv->mdev)) { + netdev_warn(netdev, + "XDP is not available on Innova cards with IPsec support\n"); return -EINVAL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index fac96ea819a1..1b6ad94ebb10 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1783,8 +1783,8 @@ int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe_mpwqe; - if (MLX5_IPSEC_DEV(mdev)) { - netdev_err(netdev, "MPWQE RQ with IPSec offload not supported\n"); + if (mlx5_fpga_is_ipsec_device(mdev)) { + netdev_err(netdev, "MPWQE RQ with Innova IPSec offload not supported\n"); return -EINVAL; } if (!rq->handle_rx_cqe) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 9f126054d371..0da69b98f38f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -4445,7 +4445,7 @@ static int apply_police_params(struct mlx5e_priv *priv, u64 rate, */ if (rate) { rate = (rate * BITS_PER_BYTE) + 500000; - rate_mbps = max_t(u32, do_div(rate, 1000000), 1); + rate_mbps = max_t(u64, do_div(rate, 1000000), 1); } err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c index cc67366495b0..22bee4990232 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c @@ -124,7 +124,7 @@ struct mlx5_fpga_ipsec { struct ida halloc; }; -static bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev) +bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev) { if (!mdev->fpga || !MLX5_CAP_GEN(mdev, fpga)) return false; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h index db88eb4c49e3..8931b5584477 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h @@ -43,6 +43,7 @@ u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev); const struct mlx5_flow_cmds * mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type); void mlx5_fpga_ipsec_build_fs_cmds(void); +bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev); #else static inline const struct mlx5_accel_ipsec_ops *mlx5_fpga_ipsec_ops(struct mlx5_core_dev *mdev) @@ -55,6 +56,7 @@ mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type) } static inline void mlx5_fpga_ipsec_build_fs_cmds(void) {}; +static inline bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev) { return false; } #endif /* CONFIG_MLX5_FPGA_IPSEC */ #endif /* __MLX5_FPGA_IPSEC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 54523bed16cd..0c32c485eb58 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -190,6 +190,16 @@ static bool reset_fw_if_needed(struct mlx5_core_dev *dev) return true; } +static void enter_error_state(struct mlx5_core_dev *dev, bool force) +{ + if (mlx5_health_check_fatal_sensors(dev) || force) { /* protected state setting */ + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; + mlx5_cmd_flush(dev); + } + + mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1); +} + void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) { bool err_detected = false; @@ -208,12 +218,7 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) goto unlock; } - if (mlx5_health_check_fatal_sensors(dev) || force) { /* protected state setting */ - dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; - mlx5_cmd_flush(dev); - } - - mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1); + enter_error_state(dev, force); unlock: mutex_unlock(&dev->intf_state_mutex); } @@ -613,7 +618,7 @@ static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work) priv = container_of(health, struct mlx5_priv, health); dev = container_of(priv, struct mlx5_core_dev, priv); - mlx5_enter_error_state(dev, false); + enter_error_state(dev, false); if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) { if (mlx5_health_try_recover(dev)) mlx5_core_err(dev, "health recovery failed\n"); @@ -707,8 +712,9 @@ static void poll_health(struct timer_list *t) mlx5_core_err(dev, "Fatal error %u detected\n", fatal_error); dev->priv.health.fatal_error = fatal_error; print_health_info(dev); + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; mlx5_trigger_health_work(dev); - goto out; + return; } count = ioread32be(health->health_counter); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index e4c9627485aa..2f2c352f301e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1445,7 +1445,8 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *id) dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err); pci_save_state(pdev); - devlink_reload_enable(devlink); + if (!mlx5_core_is_mp_slave(dev)) + devlink_reload_enable(devlink); return 0; err_load_one: diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index cbc30df4e08a..9ce98e3d3f9f 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2230,6 +2230,31 @@ static void rtl_prepare_power_down(struct rtl8169_private *tp) phy_speed_down(tp->phydev, false); rtl_wol_enable_rx(tp); } + + switch (tp->mac_version) { + case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_26: + case RTL_GIGA_MAC_VER_29 ... RTL_GIGA_MAC_VER_30: + case RTL_GIGA_MAC_VER_32 ... RTL_GIGA_MAC_VER_33: + case RTL_GIGA_MAC_VER_37: + case RTL_GIGA_MAC_VER_39: + case RTL_GIGA_MAC_VER_43: + case RTL_GIGA_MAC_VER_44: + case RTL_GIGA_MAC_VER_45: + case RTL_GIGA_MAC_VER_46: + case RTL_GIGA_MAC_VER_47: + case RTL_GIGA_MAC_VER_48: + case RTL_GIGA_MAC_VER_50 ... RTL_GIGA_MAC_VER_63: + RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80); + break; + case RTL_GIGA_MAC_VER_40: + case RTL_GIGA_MAC_VER_41: + case RTL_GIGA_MAC_VER_49: + rtl_eri_clear_bits(tp, 0x1a8, 0xfc000000); + RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80); + break; + default: + break; + } } static void rtl_init_rxcfg(struct rtl8169_private *tp) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index e8febfb1924d..3a8775e0ca55 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -1862,6 +1862,18 @@ static int axienet_probe(struct platform_device *pdev) lp->options = XAE_OPTION_DEFAULTS; lp->rx_bd_num = RX_BD_NUM_DEFAULT; lp->tx_bd_num = TX_BD_NUM_DEFAULT; + + lp->clk = devm_clk_get_optional(&pdev->dev, NULL); + if (IS_ERR(lp->clk)) { + ret = PTR_ERR(lp->clk); + goto free_netdev; + } + ret = clk_prepare_enable(lp->clk); + if (ret) { + dev_err(&pdev->dev, "Unable to enable clock: %d\n", ret); + goto free_netdev; + } + /* Map device registers */ ethres = platform_get_resource(pdev, IORESOURCE_MEM, 0); lp->regs = devm_ioremap_resource(&pdev->dev, ethres); @@ -2046,20 +2058,6 @@ static int axienet_probe(struct platform_device *pdev) lp->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0); if (lp->phy_node) { - lp->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(lp->clk)) { - dev_warn(&pdev->dev, "Failed to get clock: %ld\n", - PTR_ERR(lp->clk)); - lp->clk = NULL; - } else { - ret = clk_prepare_enable(lp->clk); - if (ret) { - dev_err(&pdev->dev, "Unable to enable clock: %d\n", - ret); - goto free_netdev; - } - } - ret = axienet_mdio_setup(lp); if (ret) dev_warn(&pdev->dev, diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c index c10e7340b031..97c1b55405cb 100644 --- a/drivers/net/ipa/ipa_main.c +++ b/drivers/net/ipa/ipa_main.c @@ -580,10 +580,10 @@ ipa_resource_config(struct ipa *ipa, const struct ipa_resource_data *data) return -EINVAL; for (i = 0; i < data->resource_src_count; i++) - ipa_resource_config_src(ipa, data->resource_src); + ipa_resource_config_src(ipa, &data->resource_src[i]); for (i = 0; i < data->resource_dst_count; i++) - ipa_resource_config_dst(ipa, data->resource_dst); + ipa_resource_config_dst(ipa, &data->resource_dst[i]); return 0; } diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index d6ac3ed38197..ce495473cd5d 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -300,50 +300,22 @@ static int mdio_bus_phy_resume(struct device *dev) phydev->suspended_by_mdio_bus = 0; - ret = phy_resume(phydev); + ret = phy_init_hw(phydev); if (ret < 0) return ret; -no_resume: - if (phydev->attached_dev && phydev->adjust_link) - phy_start_machine(phydev); - - return 0; -} - -static int mdio_bus_phy_restore(struct device *dev) -{ - struct phy_device *phydev = to_phy_device(dev); - struct net_device *netdev = phydev->attached_dev; - int ret; - - if (!netdev) - return 0; - - ret = phy_init_hw(phydev); + ret = phy_resume(phydev); if (ret < 0) return ret; - +no_resume: if (phydev->attached_dev && phydev->adjust_link) phy_start_machine(phydev); return 0; } -static const struct dev_pm_ops mdio_bus_phy_pm_ops = { - .suspend = mdio_bus_phy_suspend, - .resume = mdio_bus_phy_resume, - .freeze = mdio_bus_phy_suspend, - .thaw = mdio_bus_phy_resume, - .restore = mdio_bus_phy_restore, -}; - -#define MDIO_BUS_PHY_PM_OPS (&mdio_bus_phy_pm_ops) - -#else - -#define MDIO_BUS_PHY_PM_OPS NULL - +static SIMPLE_DEV_PM_OPS(mdio_bus_phy_pm_ops, mdio_bus_phy_suspend, + mdio_bus_phy_resume); #endif /* CONFIG_PM */ /** @@ -554,7 +526,7 @@ static const struct device_type mdio_bus_phy_type = { .name = "PHY", .groups = phy_dev_groups, .release = phy_device_release, - .pm = MDIO_BUS_PHY_PM_OPS, + .pm = pm_ptr(&mdio_bus_phy_pm_ops), }; static int phy_request_driver_module(struct phy_device *dev, u32 phy_id) @@ -1144,10 +1116,19 @@ int phy_init_hw(struct phy_device *phydev) if (ret < 0) return ret; - if (phydev->drv->config_init) + if (phydev->drv->config_init) { ret = phydev->drv->config_init(phydev); + if (ret < 0) + return ret; + } - return ret; + if (phydev->drv->config_intr) { + ret = phydev->drv->config_intr(phydev); + if (ret < 0) + return ret; + } + + return 0; } EXPORT_SYMBOL(phy_init_hw); diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c index 93c7e8502845..6c163db52835 100644 --- a/drivers/net/wan/lmc/lmc_main.c +++ b/drivers/net/wan/lmc/lmc_main.c @@ -854,7 +854,7 @@ static int lmc_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) spin_lock_init(&sc->lmc_lock); pci_set_master(pdev); - printk(KERN_INFO "%s: detected at %lx, irq %d\n", dev->name, + printk(KERN_INFO "hdlc: detected at %lx, irq %d\n", dev->base_addr, dev->irq); err = register_hdlc_device(dev); @@ -899,6 +899,8 @@ static int lmc_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) break; default: printk(KERN_WARNING "%s: LMC UNKNOWN CARD!\n", dev->name); + unregister_hdlc_device(dev); + return -EIO; break; } diff --git a/drivers/net/wireless/broadcom/b43/phy_n.c b/drivers/net/wireless/broadcom/b43/phy_n.c index b669dff24b6e..665b737fbb0d 100644 --- a/drivers/net/wireless/broadcom/b43/phy_n.c +++ b/drivers/net/wireless/broadcom/b43/phy_n.c @@ -5311,7 +5311,7 @@ static void b43_nphy_restore_cal(struct b43_wldev *dev) for (i = 0; i < 4; i++) { if (dev->phy.rev >= 3) - table[i] = coef[i]; + coef[i] = table[i]; else coef[i] = 0; } diff --git a/drivers/target/iscsi/cxgbit/cxgbit_target.c b/drivers/target/iscsi/cxgbit/cxgbit_target.c index 9b3eb2e8c92a..b926e1d6c7b8 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_target.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_target.c @@ -86,8 +86,7 @@ static int cxgbit_is_ofld_imm(const struct sk_buff *skb) if (likely(cxgbit_skcb_flags(skb) & SKCBF_TX_ISO)) length += sizeof(struct cpl_tx_data_iso); -#define MAX_IMM_TX_PKT_LEN 256 - return length <= MAX_IMM_TX_PKT_LEN; + return length <= MAX_IMM_OFLD_TX_DATA_WR_LEN; } /* diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index d7493016cd46..60cd25c0461b 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -207,7 +207,7 @@ struct atm_skb_data { struct atm_vcc *vcc; /* ATM VCC */ unsigned long atm_options; /* ATM layer options */ unsigned int acct_truesize; /* truesize accounted to vcc */ -}; +} __packed; #define VCC_HTABLE_SIZE 32 diff --git a/include/net/act_api.h b/include/net/act_api.h index 761c0e331915..2bf3092ae7ec 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -166,6 +166,7 @@ int tcf_idr_create_from_flags(struct tc_action_net *tn, u32 index, struct nlattr *est, struct tc_action **a, const struct tc_action_ops *ops, int bind, u32 flags); +void tcf_idr_insert_many(struct tc_action *actions[]); void tcf_idr_cleanup(struct tc_action_net *tn, u32 index); int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index, struct tc_action **a, int bind); diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index afe6836e44b1..7ea59cfe1fa7 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -593,6 +593,7 @@ enum { TCA_FLOWER_KEY_CT_FLAGS_TRACKED = 1 << 3, /* Conntrack has occurred. */ TCA_FLOWER_KEY_CT_FLAGS_INVALID = 1 << 4, /* Conntrack is invalid. */ TCA_FLOWER_KEY_CT_FLAGS_REPLY = 1 << 5, /* Packet is in the reply direction. */ + __TCA_FLOWER_KEY_CT_FLAGS_MAX, }; enum { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 36d1e7339ede..1dda9d81f12c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -11563,7 +11563,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) bool isdiv = BPF_OP(insn->code) == BPF_DIV; struct bpf_insn *patchlet; struct bpf_insn chk_and_div[] = { - /* Rx div 0 -> 0 */ + /* [R,W]x div 0 -> 0 */ BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | BPF_JNE | BPF_K, insn->src_reg, 0, 2, 0), @@ -11572,16 +11572,18 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) *insn, }; struct bpf_insn chk_and_mod[] = { - /* Rx mod 0 -> Rx */ + /* [R,W]x mod 0 -> [R,W]x */ BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | BPF_JEQ | BPF_K, insn->src_reg, - 0, 1, 0), + 0, 1 + (is64 ? 0 : 1), 0), *insn, + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV32_REG(insn->dst_reg, insn->dst_reg), }; patchlet = isdiv ? chk_and_div : chk_and_mod; cnt = isdiv ? ARRAY_SIZE(chk_and_div) : - ARRAY_SIZE(chk_and_mod); + ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0); new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt); if (!new_prog) diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index ca1a0d07a087..ebda397fa95a 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1577,8 +1577,8 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) struct sk_buff *skb; struct net_device *dev; struct ddpehdr *ddp; - int size; - struct atalk_route *rt; + int size, hard_header_len; + struct atalk_route *rt, *rt_lo = NULL; int err; if (flags & ~(MSG_DONTWAIT|MSG_CMSG_COMPAT)) @@ -1641,7 +1641,22 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) SOCK_DEBUG(sk, "SK %p: Size needed %d, device %s\n", sk, size, dev->name); - size += dev->hard_header_len; + hard_header_len = dev->hard_header_len; + /* Leave room for loopback hardware header if necessary */ + if (usat->sat_addr.s_node == ATADDR_BCAST && + (dev->flags & IFF_LOOPBACK || !(rt->flags & RTF_GATEWAY))) { + struct atalk_addr at_lo; + + at_lo.s_node = 0; + at_lo.s_net = 0; + + rt_lo = atrtr_find(&at_lo); + + if (rt_lo && rt_lo->dev->hard_header_len > hard_header_len) + hard_header_len = rt_lo->dev->hard_header_len; + } + + size += hard_header_len; release_sock(sk); skb = sock_alloc_send_skb(sk, size, (flags & MSG_DONTWAIT), &err); lock_sock(sk); @@ -1649,7 +1664,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) goto out; skb_reserve(skb, ddp_dl->header_length); - skb_reserve(skb, dev->hard_header_len); + skb_reserve(skb, hard_header_len); skb->dev = dev; SOCK_DEBUG(sk, "SK %p: Begin build.\n", sk); @@ -1700,18 +1715,12 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) /* loop back */ skb_orphan(skb); if (ddp->deh_dnode == ATADDR_BCAST) { - struct atalk_addr at_lo; - - at_lo.s_node = 0; - at_lo.s_net = 0; - - rt = atrtr_find(&at_lo); - if (!rt) { + if (!rt_lo) { kfree_skb(skb); err = -ENETUNREACH; goto out; } - dev = rt->dev; + dev = rt_lo->dev; skb->dev = dev; } ddp_dl->request(ddp_dl, skb, dev->dev_addr); diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index 79b6a04d8eb6..fadc7c8a3107 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -115,10 +115,7 @@ static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt) else skb->ip_summed = CHECKSUM_NONE; - if (in_interrupt()) - netif_rx(skb); - else - netif_rx_ni(skb); + netif_rx_any_context(skb); /* Update statistics. */ priv->netdev->stats.rx_packets++; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index c565c7a17091..2ef2224b3bff 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1056,6 +1056,9 @@ proto_again: key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; } + __skb_flow_dissect_ipv4(skb, flow_dissector, + target_container, data, iph); + if (ip_is_fragment(iph)) { key_control->flags |= FLOW_DIS_IS_FRAGMENT; @@ -1072,9 +1075,6 @@ proto_again: } } - __skb_flow_dissect_ipv4(skb, flow_dissector, - target_container, data, iph); - break; } case htons(ETH_P_IPV6): { diff --git a/net/mptcp/options.c b/net/mptcp/options.c index bb874c5d663a..b63574d6b812 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -508,8 +508,8 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); + u64 snd_data_fin_enable, ack_seq; unsigned int dss_size = 0; - u64 snd_data_fin_enable; struct mptcp_ext *mpext; unsigned int ack_size; bool ret = false; @@ -541,13 +541,14 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, return ret; } + ack_seq = READ_ONCE(msk->ack_seq); if (READ_ONCE(msk->use_64bit_ack)) { ack_size = TCPOLEN_MPTCP_DSS_ACK64; - opts->ext_copy.data_ack = READ_ONCE(msk->ack_seq); + opts->ext_copy.data_ack = ack_seq; opts->ext_copy.ack64 = 1; } else { ack_size = TCPOLEN_MPTCP_DSS_ACK32; - opts->ext_copy.data_ack32 = (uint32_t)READ_ONCE(msk->ack_seq); + opts->ext_copy.data_ack32 = (uint32_t)ack_seq; opts->ext_copy.ack64 = 0; } opts->ext_copy.use_ack = 1; @@ -918,8 +919,7 @@ static void ack_update_msk(struct mptcp_sock *msk, msk->wnd_end = new_wnd_end; /* this assumes mptcp_incoming_options() is invoked after tcp_ack() */ - if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt)) && - sk_stream_memory_free(ssk)) + if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt))) __mptcp_check_push(sk, ssk); if (after64(new_snd_una, old_snd_una)) { diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index c2a8392254dc..a57f3eab7b6a 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -363,8 +363,6 @@ static void mptcp_check_data_fin_ack(struct sock *sk) /* Look for an acknowledged DATA_FIN */ if (mptcp_pending_data_fin_ack(sk)) { - mptcp_stop_timer(sk); - WRITE_ONCE(msk->snd_data_fin_enable, 0); switch (sk->sk_state) { @@ -458,7 +456,18 @@ static bool mptcp_subflow_cleanup_rbuf(struct sock *ssk) static void mptcp_cleanup_rbuf(struct mptcp_sock *msk) { struct sock *ack_hint = READ_ONCE(msk->ack_hint); + int old_space = READ_ONCE(msk->old_wspace); struct mptcp_subflow_context *subflow; + struct sock *sk = (struct sock *)msk; + bool cleanup; + + /* this is a simple superset of what tcp_cleanup_rbuf() implements + * so that we don't have to acquire the ssk socket lock most of the time + * to do actually nothing + */ + cleanup = __mptcp_space(sk) - old_space >= max(0, old_space); + if (!cleanup) + return; /* if the hinted ssk is still active, try to use it */ if (likely(ack_hint)) { @@ -1565,6 +1574,9 @@ out: mptcp_set_timeout(sk, ssk); tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, info.size_goal); + if (!mptcp_timer_pending(sk)) + mptcp_reset_timer(sk); + if (msk->snd_data_fin_enable && msk->snd_nxt + 1 == msk->write_seq) mptcp_schedule_work(sk); @@ -1868,7 +1880,7 @@ static void __mptcp_splice_receive_queue(struct sock *sk) skb_queue_splice_tail_init(&sk->sk_receive_queue, &msk->receive_queue); } -static bool __mptcp_move_skbs(struct mptcp_sock *msk, unsigned int rcv) +static bool __mptcp_move_skbs(struct mptcp_sock *msk) { struct sock *sk = (struct sock *)msk; unsigned int moved = 0; @@ -1888,13 +1900,10 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk, unsigned int rcv) slowpath = lock_sock_fast(ssk); mptcp_data_lock(sk); + __mptcp_update_rmem(sk); done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved); mptcp_data_unlock(sk); - if (moved && rcv) { - WRITE_ONCE(msk->rmem_pending, min(rcv, moved)); - tcp_cleanup_rbuf(ssk, 1); - WRITE_ONCE(msk->rmem_pending, 0); - } + tcp_cleanup_rbuf(ssk, moved); unlock_sock_fast(ssk, slowpath); } while (!done); @@ -1907,6 +1916,7 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk, unsigned int rcv) ret |= __mptcp_ofo_queue(msk); __mptcp_splice_receive_queue(sk); mptcp_data_unlock(sk); + mptcp_cleanup_rbuf(msk); } if (ret) mptcp_check_data_fin((struct sock *)msk); @@ -1936,7 +1946,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); while (copied < len) { - int bytes_read, old_space; + int bytes_read; bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied); if (unlikely(bytes_read < 0)) { @@ -1947,14 +1957,11 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, copied += bytes_read; - if (skb_queue_empty(&msk->receive_queue) && - __mptcp_move_skbs(msk, len - copied)) - continue; - /* be sure to advertise window change */ - old_space = READ_ONCE(msk->old_wspace); - if ((tcp_space(sk) - old_space) >= old_space) - mptcp_cleanup_rbuf(msk); + mptcp_cleanup_rbuf(msk); + + if (skb_queue_empty(&msk->receive_queue) && __mptcp_move_skbs(msk)) + continue; /* only the master socket status is relevant here. The exit * conditions mirror closely tcp_recvmsg() @@ -1982,7 +1989,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, /* race breaker: the shutdown could be after the * previous receive queue check */ - if (__mptcp_move_skbs(msk, len - copied)) + if (__mptcp_move_skbs(msk)) continue; break; } @@ -2015,7 +2022,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, /* .. race-breaker: ssk might have gotten new data * after last __mptcp_move_skbs() returned false. */ - if (unlikely(__mptcp_move_skbs(msk, 0))) + if (unlikely(__mptcp_move_skbs(msk))) set_bit(MPTCP_DATA_READY, &msk->flags); } else if (unlikely(!test_bit(MPTCP_DATA_READY, &msk->flags))) { /* data to read but mptcp_wait_data() cleared DATA_READY */ @@ -2275,6 +2282,7 @@ static void mptcp_worker(struct work_struct *work) if (!test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) goto unlock; + __mptcp_clean_una(sk); dfrag = mptcp_rtx_head(sk); if (!dfrag) goto unlock; @@ -2943,6 +2951,8 @@ static void mptcp_release_cb(struct sock *sk) mptcp_push_pending(sk, 0); spin_lock_bh(&sk->sk_lock.slock); } + if (test_and_clear_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags)) + __mptcp_error_report(sk); /* clear any wmem reservation and errors */ __mptcp_update_wmem(sk); @@ -3319,7 +3329,7 @@ static __poll_t mptcp_check_writeable(struct mptcp_sock *msk) struct sock *sk = (struct sock *)msk; if (unlikely(sk->sk_shutdown & SEND_SHUTDOWN)) - return 0; + return EPOLLOUT | EPOLLWRNORM; if (sk_stream_is_writeable(sk)) return EPOLLOUT | EPOLLWRNORM; @@ -3352,9 +3362,16 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, mask |= mptcp_check_readable(msk); mask |= mptcp_check_writeable(msk); } + if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE) + mask |= EPOLLHUP; if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; + /* This barrier is coupled with smp_wmb() in tcp_reset() */ + smp_rmb(); + if (sk->sk_err) + mask |= EPOLLERR; + return mask; } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 1b6ec1773678..91827d949766 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -102,6 +102,7 @@ #define MPTCP_WORK_CLOSE_SUBFLOW 5 #define MPTCP_PUSH_PENDING 6 #define MPTCP_CLEAN_UNA 7 +#define MPTCP_ERROR_REPORT 8 static inline bool before64(__u64 seq1, __u64 seq2) { @@ -237,7 +238,6 @@ struct mptcp_sock { u64 wnd_end; unsigned long timer_ival; u32 token; - int rmem_pending; int rmem_released; unsigned long flags; bool can_ack; @@ -301,7 +301,7 @@ static inline struct mptcp_sock *mptcp_sk(const struct sock *sk) static inline int __mptcp_space(const struct sock *sk) { - return tcp_space(sk) + READ_ONCE(mptcp_sk(sk)->rmem_pending); + return tcp_space(sk) + READ_ONCE(mptcp_sk(sk)->rmem_released); } static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk) @@ -334,20 +334,13 @@ static inline struct mptcp_data_frag *mptcp_pending_tail(const struct sock *sk) return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list); } -static inline struct mptcp_data_frag *mptcp_rtx_tail(const struct sock *sk) +static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); - if (!before64(msk->snd_nxt, READ_ONCE(msk->snd_una))) + if (msk->snd_una == READ_ONCE(msk->snd_nxt)) return NULL; - return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list); -} - -static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk) -{ - struct mptcp_sock *msk = mptcp_sk(sk); - return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list); } @@ -436,6 +429,7 @@ struct mptcp_subflow_context { void (*tcp_data_ready)(struct sock *sk); void (*tcp_state_change)(struct sock *sk); void (*tcp_write_space)(struct sock *sk); + void (*tcp_error_report)(struct sock *sk); struct rcu_head rcu; }; @@ -560,6 +554,7 @@ static inline void mptcp_subflow_tcp_fallback(struct sock *sk, sk->sk_data_ready = ctx->tcp_data_ready; sk->sk_state_change = ctx->tcp_state_change; sk->sk_write_space = ctx->tcp_write_space; + sk->sk_error_report = ctx->tcp_error_report; inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops; } @@ -587,6 +582,7 @@ bool mptcp_finish_join(struct sock *sk); bool mptcp_schedule_work(struct sock *sk); void __mptcp_check_push(struct sock *sk, struct sock *ssk); void __mptcp_data_acked(struct sock *sk); +void __mptcp_error_report(struct sock *sk); void mptcp_subflow_eof(struct sock *sk); bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit); void __mptcp_flush_join_list(struct mptcp_sock *msk); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index ce2dea2a6e0a..06e233410e0e 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -100,7 +100,7 @@ static struct mptcp_sock *subflow_token_join_request(struct request_sock *req) return msk; } -static int __subflow_init_req(struct request_sock *req, const struct sock *sk_listener) +static void subflow_init_req(struct request_sock *req, const struct sock *sk_listener) { struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); @@ -108,16 +108,6 @@ static int __subflow_init_req(struct request_sock *req, const struct sock *sk_li subflow_req->mp_join = 0; subflow_req->msk = NULL; mptcp_token_init_request(req); - -#ifdef CONFIG_TCP_MD5SIG - /* no MPTCP if MD5SIG is enabled on this socket or we may run out of - * TCP option space. - */ - if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info)) - return -EINVAL; -#endif - - return 0; } static bool subflow_use_different_sport(struct mptcp_sock *msk, const struct sock *sk) @@ -130,20 +120,23 @@ static bool subflow_use_different_sport(struct mptcp_sock *msk, const struct soc * Returns an error code if a JOIN has failed and a TCP reset * should be sent. */ -static int subflow_init_req(struct request_sock *req, - const struct sock *sk_listener, - struct sk_buff *skb) +static int subflow_check_req(struct request_sock *req, + const struct sock *sk_listener, + struct sk_buff *skb) { struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener); struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); struct mptcp_options_received mp_opt; - int ret; pr_debug("subflow_req=%p, listener=%p", subflow_req, listener); - ret = __subflow_init_req(req, sk_listener); - if (ret) - return 0; +#ifdef CONFIG_TCP_MD5SIG + /* no MPTCP if MD5SIG is enabled on this socket or we may run out of + * TCP option space. + */ + if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info)) + return -EINVAL; +#endif mptcp_get_options(skb, &mp_opt); @@ -236,10 +229,7 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req, struct mptcp_options_received mp_opt; int err; - err = __subflow_init_req(req, sk_listener); - if (err) - return err; - + subflow_init_req(req, sk_listener); mptcp_get_options(skb, &mp_opt); if (mp_opt.mp_capable && mp_opt.mp_join) @@ -279,12 +269,13 @@ static struct dst_entry *subflow_v4_route_req(const struct sock *sk, int err; tcp_rsk(req)->is_mptcp = 1; + subflow_init_req(req, sk); dst = tcp_request_sock_ipv4_ops.route_req(sk, skb, fl, req); if (!dst) return NULL; - err = subflow_init_req(req, sk, skb); + err = subflow_check_req(req, sk, skb); if (err == 0) return dst; @@ -304,12 +295,13 @@ static struct dst_entry *subflow_v6_route_req(const struct sock *sk, int err; tcp_rsk(req)->is_mptcp = 1; + subflow_init_req(req, sk); dst = tcp_request_sock_ipv6_ops.route_req(sk, skb, fl, req); if (!dst) return NULL; - err = subflow_init_req(req, sk, skb); + err = subflow_check_req(req, sk, skb); if (err == 0) return dst; @@ -1124,6 +1116,46 @@ static void subflow_write_space(struct sock *ssk) mptcp_write_space(sk); } +void __mptcp_error_report(struct sock *sk) +{ + struct mptcp_subflow_context *subflow; + struct mptcp_sock *msk = mptcp_sk(sk); + + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + int err = sock_error(ssk); + + if (!err) + continue; + + /* only propagate errors on fallen-back sockets or + * on MPC connect + */ + if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk)) + continue; + + inet_sk_state_store(sk, inet_sk_state_load(ssk)); + sk->sk_err = -err; + + /* This barrier is coupled with smp_rmb() in mptcp_poll() */ + smp_wmb(); + sk->sk_error_report(sk); + break; + } +} + +static void subflow_error_report(struct sock *ssk) +{ + struct sock *sk = mptcp_subflow_ctx(ssk)->conn; + + mptcp_data_lock(sk); + if (!sock_owned_by_user(sk)) + __mptcp_error_report(sk); + else + set_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags); + mptcp_data_unlock(sk); +} + static struct inet_connection_sock_af_ops * subflow_default_af_ops(struct sock *sk) { @@ -1470,9 +1502,11 @@ static int subflow_ulp_init(struct sock *sk) ctx->tcp_data_ready = sk->sk_data_ready; ctx->tcp_state_change = sk->sk_state_change; ctx->tcp_write_space = sk->sk_write_space; + ctx->tcp_error_report = sk->sk_error_report; sk->sk_data_ready = subflow_data_ready; sk->sk_write_space = subflow_write_space; sk->sk_state_change = subflow_state_change; + sk->sk_error_report = subflow_error_report; out: return err; } @@ -1526,6 +1560,7 @@ static void subflow_ulp_clone(const struct request_sock *req, new_ctx->tcp_data_ready = old_ctx->tcp_data_ready; new_ctx->tcp_state_change = old_ctx->tcp_state_change; new_ctx->tcp_write_space = old_ctx->tcp_write_space; + new_ctx->tcp_error_report = old_ctx->tcp_error_report; new_ctx->rel_write_seq = 1; new_ctx->tcp_sock = newsk; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 4dd235ce9a07..b919826939e0 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -908,7 +908,7 @@ static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = { [TCA_ACT_HW_STATS] = NLA_POLICY_BITFIELD32(TCA_ACT_HW_STATS_ANY), }; -static void tcf_idr_insert_many(struct tc_action *actions[]) +void tcf_idr_insert_many(struct tc_action *actions[]) { int i; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index a67c66a512a4..e37556cc37ab 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3060,6 +3060,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, act->type = exts->type = TCA_OLD_COMPAT; exts->actions[0] = act; exts->nr_actions = 1; + tcf_idr_insert_many(exts->actions); } else if (exts->action && tb[exts->action]) { int err; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index caf7643e9c83..2409e522c68f 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -30,6 +30,11 @@ #include <uapi/linux/netfilter/nf_conntrack_common.h> +#define TCA_FLOWER_KEY_CT_FLAGS_MAX \ + ((__TCA_FLOWER_KEY_CT_FLAGS_MAX - 1) << 1) +#define TCA_FLOWER_KEY_CT_FLAGS_MASK \ + (TCA_FLOWER_KEY_CT_FLAGS_MAX - 1) + struct fl_flow_key { struct flow_dissector_key_meta meta; struct flow_dissector_key_control control; @@ -690,8 +695,10 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_ENC_IP_TTL_MASK] = { .type = NLA_U8 }, [TCA_FLOWER_KEY_ENC_OPTS] = { .type = NLA_NESTED }, [TCA_FLOWER_KEY_ENC_OPTS_MASK] = { .type = NLA_NESTED }, - [TCA_FLOWER_KEY_CT_STATE] = { .type = NLA_U16 }, - [TCA_FLOWER_KEY_CT_STATE_MASK] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_CT_STATE] = + NLA_POLICY_MASK(NLA_U16, TCA_FLOWER_KEY_CT_FLAGS_MASK), + [TCA_FLOWER_KEY_CT_STATE_MASK] = + NLA_POLICY_MASK(NLA_U16, TCA_FLOWER_KEY_CT_FLAGS_MASK), [TCA_FLOWER_KEY_CT_ZONE] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_CT_ZONE_MASK] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_CT_MARK] = { .type = NLA_U32 }, @@ -1394,12 +1401,33 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, return 0; } +static int fl_validate_ct_state(u16 state, struct nlattr *tb, + struct netlink_ext_ack *extack) +{ + if (state && !(state & TCA_FLOWER_KEY_CT_FLAGS_TRACKED)) { + NL_SET_ERR_MSG_ATTR(extack, tb, + "no trk, so no other flag can be set"); + return -EINVAL; + } + + if (state & TCA_FLOWER_KEY_CT_FLAGS_NEW && + state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED) { + NL_SET_ERR_MSG_ATTR(extack, tb, + "new and est are mutually exclusive"); + return -EINVAL; + } + + return 0; +} + static int fl_set_key_ct(struct nlattr **tb, struct flow_dissector_key_ct *key, struct flow_dissector_key_ct *mask, struct netlink_ext_ack *extack) { if (tb[TCA_FLOWER_KEY_CT_STATE]) { + int err; + if (!IS_ENABLED(CONFIG_NF_CONNTRACK)) { NL_SET_ERR_MSG(extack, "Conntrack isn't enabled"); return -EOPNOTSUPP; @@ -1407,6 +1435,13 @@ static int fl_set_key_ct(struct nlattr **tb, fl_set_key_val(tb, &key->ct_state, TCA_FLOWER_KEY_CT_STATE, &mask->ct_state, TCA_FLOWER_KEY_CT_STATE_MASK, sizeof(key->ct_state)); + + err = fl_validate_ct_state(mask->ct_state, + tb[TCA_FLOWER_KEY_CT_STATE_MASK], + extack); + if (err) + return err; + } if (tb[TCA_FLOWER_KEY_CT_ZONE]) { if (!IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES)) { diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.sh b/tools/testing/selftests/bpf/test_xdp_redirect.sh index dd80f0c84afb..c033850886f4 100755 --- a/tools/testing/selftests/bpf/test_xdp_redirect.sh +++ b/tools/testing/selftests/bpf/test_xdp_redirect.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # Create 2 namespaces with two veth peers, and # forward packets in-between using generic XDP # @@ -57,12 +57,8 @@ test_xdp_redirect() ip link set dev veth1 $xdpmode obj test_xdp_redirect.o sec redirect_to_222 &> /dev/null ip link set dev veth2 $xdpmode obj test_xdp_redirect.o sec redirect_to_111 &> /dev/null - ip netns exec ns1 ping -c 1 10.1.1.22 &> /dev/null - local ret1=$? - ip netns exec ns2 ping -c 1 10.1.1.11 &> /dev/null - local ret2=$? - - if [ $ret1 -eq 0 -a $ret2 -eq 0 ]; then + if ip netns exec ns1 ping -c 1 10.1.1.22 &> /dev/null && + ip netns exec ns2 ping -c 1 10.1.1.11 &> /dev/null; then echo "selftests: test_xdp_redirect $xdpmode [PASS]"; else ret=1 diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh index a554838666c4..4b58ccae3429 100755 --- a/tools/testing/selftests/net/forwarding/tc_flower.sh +++ b/tools/testing/selftests/net/forwarding/tc_flower.sh @@ -3,7 +3,7 @@ ALL_TESTS="match_dst_mac_test match_src_mac_test match_dst_ip_test \ match_src_ip_test match_ip_flags_test match_pcp_test match_vlan_test \ - match_ip_tos_test match_indev_test match_mpls_label_test \ + match_ip_tos_test match_indev_testmatch_ip_ttl_test match_mpls_label_test \ match_mpls_tc_test match_mpls_bos_test match_mpls_ttl_test \ match_mpls_lse_test" NUM_NETIFS=2 @@ -312,6 +312,42 @@ match_ip_tos_test() log_test "ip_tos match ($tcflags)" } +match_ip_ttl_test() +{ + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags dst_ip 192.0.2.2 ip_ttl 63 action drop + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_ip 192.0.2.2 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip "ttl=63" -q + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip "ttl=63,mf,frag=256" -q + + tc_check_packets "dev $h2 ingress" 102 1 + check_fail $? "Matched on the wrong filter (no check on ttl)" + + tc_check_packets "dev $h2 ingress" 101 2 + check_err $? "Did not match on correct filter (ttl=63)" + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip "ttl=255" -q + + tc_check_packets "dev $h2 ingress" 101 3 + check_fail $? "Matched on a wrong filter (ttl=63)" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct filter (no check on ttl)" + + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + + log_test "ip_ttl match ($tcflags)" +} + match_indev_test() { RET=0 |