summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHuacai Chen <chenhuacai@loongson.cn>2023-11-01 10:55:00 +0800
committerHuacai Chen <chenhuacai@loongson.cn>2023-11-01 10:55:00 +0800
commita6bdc082ad1c91d389a6ba0c7a1945818f732114 (patch)
treefa630701d5d2a8bc1ab8c4abf759663bbb81aeeb
parentffc253263a1375a65fa6c9f62a893e9767fbebfa (diff)
parent99c9991f4e5d77328187187d0c921a3b62bfa998 (diff)
Merge 'bpf-next 2023-10-16' into loongarch-next
LoongArch architecture changes for 6.7 (BPF CPU v4 support) depend on the bpf changes to fix conflictions in selftests and work, so merge them to create a base.
-rw-r--r--Documentation/admin-guide/sysctl/net.rst1
-rw-r--r--Documentation/bpf/libbpf/program_types.rst10
-rw-r--r--Documentation/bpf/prog_flow_dissector.rst2
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-wed.yaml1
-rw-r--r--Documentation/devicetree/bindings/mfd/syscon.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml20
-rw-r--r--Documentation/devicetree/bindings/net/loongson,ls1b-gmac.yaml114
-rw-r--r--Documentation/devicetree/bindings/net/loongson,ls1c-emac.yaml113
-rw-r--r--Documentation/devicetree/bindings/net/snps,dwmac.yaml5
-rw-r--r--Documentation/devicetree/bindings/net/ti,icssg-prueth.yaml8
-rw-r--r--Documentation/devicetree/bindings/soc/mediatek/mediatek,mt7986-wo-ccif.yaml1
-rw-r--r--Documentation/driver-api/dpll.rst497
-rw-r--r--Documentation/driver-api/index.rst1
-rw-r--r--Documentation/netlink/specs/dpll.yaml488
-rw-r--r--Documentation/netlink/specs/netdev.yaml21
-rw-r--r--Documentation/networking/device_drivers/ethernet/index.rst1
-rw-r--r--Documentation/networking/device_drivers/ethernet/intel/idpf.rst160
-rw-r--r--Documentation/networking/filter.rst4
-rw-r--r--Documentation/networking/ip-sysctl.rst7
-rw-r--r--Documentation/networking/xdp-rx-metadata.rst7
-rw-r--r--MAINTAINERS21
-rw-r--r--arch/arm/net/bpf_jit_32.c280
-rw-r--r--arch/arm/net/bpf_jit_32.h4
-rw-r--r--arch/arm64/net/bpf_jit_comp.c2
-rw-r--r--arch/s390/net/bpf_jit_comp.c267
-rw-r--r--arch/x86/net/bpf_jit_comp.c148
-rw-r--r--drivers/Kconfig2
-rw-r--r--drivers/Makefile1
-rw-r--r--drivers/dpll/Kconfig7
-rw-r--r--drivers/dpll/Makefile9
-rw-r--r--drivers/dpll/dpll_core.c798
-rw-r--r--drivers/dpll/dpll_core.h89
-rw-r--r--drivers/dpll/dpll_netlink.c1253
-rw-r--r--drivers/dpll/dpll_netlink.h13
-rw-r--r--drivers/dpll/dpll_nl.c162
-rw-r--r--drivers/dpll/dpll_nl.h51
-rw-r--r--drivers/net/dsa/b53/b53_mmap.c6
-rw-r--r--drivers/net/dsa/b53/b53_srab.c8
-rw-r--r--drivers/net/dsa/bcm_sf2.c8
-rw-r--r--drivers/net/dsa/hirschmann/hellcreek.c8
-rw-r--r--drivers/net/dsa/lantiq_gswip.c8
-rw-r--r--drivers/net/dsa/microchip/Makefile2
-rw-r--r--drivers/net/dsa/microchip/ksz8795_reg.h14
-rw-r--r--drivers/net/dsa/microchip/ksz9477.c2
-rw-r--r--drivers/net/dsa/microchip/ksz9477.h36
-rw-r--r--drivers/net/dsa/microchip/ksz9477_acl.c1436
-rw-r--r--drivers/net/dsa/microchip/ksz9477_reg.h13
-rw-r--r--drivers/net/dsa/microchip/ksz9477_tc_flower.c281
-rw-r--r--drivers/net/dsa/microchip/ksz_common.c372
-rw-r--r--drivers/net/dsa/microchip/ksz_common.h21
-rw-r--r--drivers/net/dsa/mt7530-mmio.c7
-rw-r--r--drivers/net/dsa/mv88e6xxx/pcs-639x.c2
-rw-r--r--drivers/net/dsa/ocelot/ocelot_ext.c8
-rw-r--r--drivers/net/dsa/ocelot/seville_vsc9953.c8
-rw-r--r--drivers/net/dsa/realtek/realtek-smi.c8
-rw-r--r--drivers/net/dsa/realtek/rtl8366rb.c44
-rw-r--r--drivers/net/dsa/rzn1_a5psw.c8
-rw-r--r--drivers/net/dsa/sja1105/sja1105_clocking.c21
-rw-r--r--drivers/net/dsa/vitesse-vsc73xx-platform.c8
-rw-r--r--drivers/net/ethernet/8390/ax88796.c6
-rw-r--r--drivers/net/ethernet/8390/mcf8390.c5
-rw-r--r--drivers/net/ethernet/8390/ne.c5
-rw-r--r--drivers/net/ethernet/actions/owl-emac.c6
-rw-r--r--drivers/net/ethernet/aeroflex/greth.c6
-rw-r--r--drivers/net/ethernet/allwinner/sun4i-emac.c5
-rw-r--r--drivers/net/ethernet/altera/altera_tse_main.c6
-rw-r--r--drivers/net/ethernet/amd/au1000_eth.c6
-rw-r--r--drivers/net/ethernet/amd/pds_core/core.c43
-rw-r--r--drivers/net/ethernet/amd/pds_core/core.h7
-rw-r--r--drivers/net/ethernet/amd/pds_core/dev.c11
-rw-r--r--drivers/net/ethernet/amd/pds_core/main.c50
-rw-r--r--drivers/net/ethernet/amd/sunlance.c6
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-platform.c6
-rw-r--r--drivers/net/ethernet/apm/xgene-v2/main.c6
-rw-r--r--drivers/net/ethernet/apm/xgene/xgene_enet_main.c6
-rw-r--r--drivers/net/ethernet/apple/macmace.c6
-rw-r--r--drivers/net/ethernet/arc/emac_arc.c6
-rw-r--r--drivers/net/ethernet/arc/emac_rockchip.c5
-rw-r--r--drivers/net/ethernet/atheros/ag71xx.c8
-rw-r--r--drivers/net/ethernet/atheros/atl1c/atl1c.h3
-rw-r--r--drivers/net/ethernet/atheros/atl1c/atl1c_main.c67
-rw-r--r--drivers/net/ethernet/broadcom/asp2/bcmasp.c8
-rw-r--r--drivers/net/ethernet/broadcom/bcm4908_enet.c6
-rw-r--r--drivers/net/ethernet/broadcom/bcm63xx_enet.c10
-rw-r--r--drivers/net/ethernet/broadcom/bcmsysport.c6
-rw-r--r--drivers/net/ethernet/broadcom/bgmac-platform.c6
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.c6
-rw-r--r--drivers/net/ethernet/broadcom/sb1250-mac.c6
-rw-r--r--drivers/net/ethernet/cadence/macb_main.c6
-rw-r--r--drivers/net/ethernet/calxeda/xgmac.c6
-rw-r--r--drivers/net/ethernet/cavium/octeon/octeon_mgmt.c5
-rw-r--r--drivers/net/ethernet/cirrus/cs89x0.c5
-rw-r--r--drivers/net/ethernet/cirrus/ep93xx_eth.c8
-rw-r--r--drivers/net/ethernet/cirrus/mac89x0.c5
-rw-r--r--drivers/net/ethernet/cortina/gemini.c12
-rw-r--r--drivers/net/ethernet/davicom/dm9000.c6
-rw-r--r--drivers/net/ethernet/dnet.c6
-rw-r--r--drivers/net/ethernet/engleder/tsnep_main.c6
-rw-r--r--drivers/net/ethernet/ethoc.c6
-rw-r--r--drivers/net/ethernet/faraday/ftgmac100.c5
-rw-r--r--drivers/net/ethernet/faraday/ftmac100.c5
-rw-r--r--drivers/net/ethernet/google/gve/gve_main.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hip04_eth.c6
-rw-r--r--drivers/net/ethernet/hisilicon/hisi_femac.c6
-rw-r--r--drivers/net/ethernet/hisilicon/hix5hd2_gmac.c6
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c6
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_enet.c5
-rw-r--r--drivers/net/ethernet/hisilicon/hns_mdio.c5
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_tx.c8
-rw-r--r--drivers/net/ethernet/i825xx/sni_82596.c5
-rw-r--r--drivers/net/ethernet/ibm/ehea/ehea_main.c8
-rw-r--r--drivers/net/ethernet/ibm/emac/core.c6
-rw-r--r--drivers/net/ethernet/ibm/emac/mal.c6
-rw-r--r--drivers/net/ethernet/ibm/emac/rgmii.c6
-rw-r--r--drivers/net/ethernet/ibm/emac/tah.c6
-rw-r--r--drivers/net/ethernet/ibm/emac/zmii.c6
-rw-r--r--drivers/net/ethernet/intel/Kconfig13
-rw-r--r--drivers/net/ethernet/intel/Makefile1
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c10
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf.h18
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_main.c102
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_virtchnl.c79
-rw-r--r--drivers/net/ethernet/intel/ice/Makefile3
-rw-r--r--drivers/net/ethernet/intel/ice/ice.h8
-rw-r--r--drivers/net/ethernet/intel/ice/ice_adminq_cmd.h248
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.c507
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.h45
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dpll.c1904
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dpll.h104
-rw-r--r--drivers/net/ethernet/intel/ice/ice_gnss.c3
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.c18
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c8
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp.c101
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp_hw.c824
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp_hw.h99
-rw-r--r--drivers/net/ethernet/intel/ice/ice_type.h23
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vf_lib.h3
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl.c69
-rw-r--r--drivers/net/ethernet/intel/idpf/Makefile18
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf.h968
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_controlq.c621
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_controlq.h130
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_controlq_api.h169
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_controlq_setup.c171
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_dev.c165
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_devids.h10
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_ethtool.c1369
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_lan_pf_regs.h124
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_lan_txrx.h293
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_lan_vf_regs.h128
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_lib.c2379
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_main.c279
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_mem.h20
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c1183
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_txrx.c4289
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_txrx.h1023
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_vf_dev.c163
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_virtchnl.c3791
-rw-r--r--drivers/net/ethernet/intel/idpf/virtchnl2.h1273
-rw-r--r--drivers/net/ethernet/intel/idpf/virtchnl2_lan_desc.h451
-rw-r--r--drivers/net/ethernet/korina.c6
-rw-r--r--drivers/net/ethernet/lantiq_etop.c6
-rw-r--r--drivers/net/ethernet/lantiq_xrx200.c6
-rw-r--r--drivers/net/ethernet/litex/litex_liteeth.c6
-rw-r--r--drivers/net/ethernet/marvell/mv643xx_eth.c11
-rw-r--r--drivers/net/ethernet/marvell/mvmdio.c6
-rw-r--r--drivers/net/ethernet/marvell/mvneta.c6
-rw-r--r--drivers/net/ethernet/marvell/mvneta_bm.c6
-rw-r--r--drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c8
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c168
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep/octep_config.h22
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c24
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.h18
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep/octep_main.c213
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep/octep_main.h13
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h4
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/mbox.h5
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/ptp.c86
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c31
-rw-r--r--drivers/net/ethernet/marvell/pxa168_eth.c5
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.c7
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.h2
-rw-r--r--drivers/net/ethernet/mediatek/mtk_ppe.c4
-rw-r--r--drivers/net/ethernet/mediatek/mtk_ppe.h19
-rw-r--r--drivers/net/ethernet/mediatek/mtk_ppe_offload.c10
-rw-r--r--drivers/net/ethernet/mediatek/mtk_wed.c1432
-rw-r--r--drivers/net/ethernet/mediatek/mtk_wed.h57
-rw-r--r--drivers/net/ethernet/mediatek/mtk_wed_debugfs.c400
-rw-r--r--drivers/net/ethernet/mediatek/mtk_wed_mcu.c98
-rw-r--r--drivers/net/ethernet/mediatek/mtk_wed_regs.h369
-rw-r--r--drivers/net/ethernet/mediatek/mtk_wed_wo.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Kconfig8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/dev.c17
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/devlink.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/dpll.c432
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c26
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c34
-rw-r--r--drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_linecard_dev.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c2
-rw-r--r--drivers/net/ethernet/micrel/ks8842.c5
-rw-r--r--drivers/net/ethernet/micrel/ks8851_par.c6
-rw-r--r--drivers/net/ethernet/microchip/lan743x_main.c6
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_main.c6
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_main.c6
-rw-r--r--drivers/net/ethernet/moxa/moxart_ether.c6
-rw-r--r--drivers/net/ethernet/mscc/ocelot_vsc7514.c6
-rw-r--r--drivers/net/ethernet/natsemi/jazzsonic.c6
-rw-r--r--drivers/net/ethernet/natsemi/macsonic.c6
-rw-r--r--drivers/net/ethernet/natsemi/xtsonic.c6
-rw-r--r--drivers/net/ethernet/ni/nixge.c6
-rw-r--r--drivers/net/ethernet/nxp/lpc_eth.c6
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_dev.h3
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_lif.c12
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_txrx.c77
-rw-r--r--drivers/net/ethernet/qualcomm/emac/emac.c6
-rw-r--r--drivers/net/ethernet/renesas/ravb_main.c6
-rw-r--r--drivers/net/ethernet/renesas/rswitch.c6
-rw-r--r--drivers/net/ethernet/renesas/sh_eth.c6
-rw-r--r--drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c6
-rw-r--r--drivers/net/ethernet/seeq/sgiseeq.c6
-rw-r--r--drivers/net/ethernet/sfc/ptp.c27
-rw-r--r--drivers/net/ethernet/sgi/ioc3-eth.c6
-rw-r--r--drivers/net/ethernet/sgi/meth.c6
-rw-r--r--drivers/net/ethernet/smsc/smc91x.c6
-rw-r--r--drivers/net/ethernet/smsc/smsc911x.c6
-rw-r--r--drivers/net/ethernet/socionext/netsec.c6
-rw-r--r--drivers/net/ethernet/socionext/sni_ave.c6
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/Kconfig11
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/Makefile1
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c10
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c15
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c15
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c13
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c33
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c25
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c27
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-loongson1.c209
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c19
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c6
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c25
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c53
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c14
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c16
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c10
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c14
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c17
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c6
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c23
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c10
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c18
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c10
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c73
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h5
-rw-r--r--drivers/net/ethernet/sun/niu.c5
-rw-r--r--drivers/net/ethernet/sun/sunbmac.c6
-rw-r--r--drivers/net/ethernet/sun/sunqe.c6
-rw-r--r--drivers/net/ethernet/sunplus/spl2sw_driver.c6
-rw-r--r--drivers/net/ethernet/ti/cpmac.c6
-rw-r--r--drivers/net/ethernet/ti/davinci_emac.c6
-rw-r--r--drivers/net/ethernet/ti/davinci_mdio.c6
-rw-r--r--drivers/net/ethernet/ti/icssg/icssg_config.c14
-rw-r--r--drivers/net/ethernet/ti/icssg/icssg_prueth.c22
-rw-r--r--drivers/net/ethernet/ti/icssg/icssg_prueth.h2
-rw-r--r--drivers/net/ethernet/ti/netcp_core.c5
-rw-r--r--drivers/net/ethernet/toshiba/spider_net.c2
-rw-r--r--drivers/net/ethernet/tundra/tsi108_eth.c6
-rw-r--r--drivers/net/ethernet/via/via-rhine.c6
-rw-r--r--drivers/net/ethernet/via/via-velocity.c6
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_hw.c92
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_hw.h7
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_type.h1
-rw-r--r--drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c119
-rw-r--r--drivers/net/ethernet/wangxun/ngbe/ngbe_type.h3
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c56
-rw-r--r--drivers/net/ethernet/wiznet/w5100.c6
-rw-r--r--drivers/net/ethernet/wiznet/w5300.c5
-rw-r--r--drivers/net/ethernet/xilinx/ll_temac_main.c5
-rw-r--r--drivers/net/ethernet/xilinx/xilinx_axienet_main.c6
-rw-r--r--drivers/net/ethernet/xilinx/xilinx_emaclite.c6
-rw-r--r--drivers/net/ethernet/xscale/ixp4xx_eth.c5
-rw-r--r--drivers/net/gtp.c4
-rw-r--r--drivers/net/mdio/mdio-aspeed.c6
-rw-r--r--drivers/net/mdio/mdio-bcm-iproc.c6
-rw-r--r--drivers/net/mdio/mdio-bcm-unimac.c6
-rw-r--r--drivers/net/mdio/mdio-gpio.c6
-rw-r--r--drivers/net/mdio/mdio-hisi-femac.c6
-rw-r--r--drivers/net/mdio/mdio-ipq4019.c6
-rw-r--r--drivers/net/mdio/mdio-ipq8064.c7
-rw-r--r--drivers/net/mdio/mdio-moxart.c6
-rw-r--r--drivers/net/mdio/mdio-mscc-miim.c6
-rw-r--r--drivers/net/mdio/mdio-mux-bcm-iproc.c6
-rw-r--r--drivers/net/mdio/mdio-mux-bcm6368.c6
-rw-r--r--drivers/net/mdio/mdio-mux-gpio.c5
-rw-r--r--drivers/net/mdio/mdio-mux-meson-g12a.c6
-rw-r--r--drivers/net/mdio/mdio-mux-meson-gxl.c6
-rw-r--r--drivers/net/mdio/mdio-mux-mmioreg.c6
-rw-r--r--drivers/net/mdio/mdio-mux-multiplexer.c6
-rw-r--r--drivers/net/mdio/mdio-octeon.c5
-rw-r--r--drivers/net/mdio/mdio-sun4i.c6
-rw-r--r--drivers/net/mdio/mdio-xgene.c6
-rw-r--r--drivers/net/phy/ax88796b.c2
-rw-r--r--drivers/net/phy/phy.c207
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7915/mmio.c2
-rw-r--r--drivers/net/wwan/wwan_core.c5
-rw-r--r--drivers/ptp/Kconfig1
-rw-r--r--drivers/ptp/ptp_ocp.c369
-rw-r--r--drivers/vhost/vsock.c14
-rw-r--r--include/linux/avf/virtchnl.h11
-rw-r--r--include/linux/bpf-cgroup-defs.h5
-rw-r--r--include/linux/bpf-cgroup.h90
-rw-r--r--include/linux/bpf.h60
-rw-r--r--include/linux/bpf_verifier.h9
-rw-r--r--include/linux/ceph/mon_client.h2
-rw-r--r--include/linux/dpll.h152
-rw-r--r--include/linux/filter.h61
-rw-r--r--include/linux/ipv6.h49
-rw-r--r--include/linux/kasan.h2
-rw-r--r--include/linux/mlx5/device.h1
-rw-r--r--include/linux/mlx5/driver.h2
-rw-r--r--include/linux/mlx5/mlx5_ifc.h59
-rw-r--r--include/linux/netdevice.h22
-rw-r--r--include/linux/pds/pds_core_if.h1
-rw-r--r--include/linux/phy.h1
-rw-r--r--include/linux/soc/mediatek/mtk_wed.h76
-rw-r--r--include/linux/stmmac.h1
-rw-r--r--include/linux/tcp.h22
-rw-r--r--include/linux/trace_events.h6
-rw-r--r--include/linux/udp.h66
-rw-r--r--include/linux/virtio_vsock.h10
-rw-r--r--include/net/devlink.h9
-rw-r--r--include/net/dst.h4
-rw-r--r--include/net/inet_sock.h10
-rw-r--r--include/net/ip6_route.h19
-rw-r--r--include/net/ipv6.h36
-rw-r--r--include/net/ipv6_stubs.h5
-rw-r--r--include/net/netns/ipv4.h1
-rw-r--r--include/net/sock.h11
-rw-r--r--include/net/udp_tunnel.h9
-rw-r--r--include/net/udplite.h14
-rw-r--r--include/net/xdp.h19
-rw-r--r--include/net/xdp_sock.h2
-rw-r--r--include/net/xfrm.h2
-rw-r--r--include/trace/events/vsock_virtio_transport_common.h12
-rw-r--r--include/uapi/linux/bpf.h38
-rw-r--r--include/uapi/linux/devlink.h1
-rw-r--r--include/uapi/linux/dpll.h201
-rw-r--r--include/uapi/linux/if_link.h2
-rw-r--r--include/uapi/linux/netdev.h16
-rw-r--r--include/uapi/linux/tcp.h12
-rw-r--r--kernel/bpf/bpf_iter.c2
-rw-r--r--kernel/bpf/bpf_struct_ops.c26
-rw-r--r--kernel/bpf/btf.c35
-rw-r--r--kernel/bpf/cgroup.c28
-rw-r--r--kernel/bpf/core.c37
-rw-r--r--kernel/bpf/helpers.c69
-rw-r--r--kernel/bpf/memalloc.c30
-rw-r--r--kernel/bpf/offload.c18
-rw-r--r--kernel/bpf/stackmap.c2
-rw-r--r--kernel/bpf/syscall.c35
-rw-r--r--kernel/bpf/task_iter.c131
-rw-r--r--kernel/bpf/trampoline.c4
-rw-r--r--kernel/bpf/verifier.c724
-rw-r--r--kernel/trace/bpf_trace.c10
-rw-r--r--kernel/trace/trace_kprobe.c14
-rw-r--r--kernel/trace/trace_syscalls.c4
-rw-r--r--lib/test_bpf.c371
-rw-r--r--mm/kasan/kasan.h1
-rw-r--r--net/ceph/mon_client.c2
-rw-r--r--net/core/dev.c22
-rw-r--r--net/core/dst.c10
-rw-r--r--net/core/filter.c67
-rw-r--r--net/core/netdev-genl.c12
-rw-r--r--net/core/rtnetlink.c36
-rw-r--r--net/core/skbuff.c4
-rw-r--r--net/core/sock.c15
-rw-r--r--net/core/xdp.c4
-rw-r--r--net/dccp/ipv6.c8
-rw-r--r--net/devlink/core.c217
-rw-r--r--net/devlink/dev.c50
-rw-r--r--net/devlink/devl_internal.h34
-rw-r--r--net/devlink/linecard.c80
-rw-r--r--net/devlink/netlink.c26
-rw-r--r--net/devlink/port.c55
-rw-r--r--net/ipv4/af_inet.c9
-rw-r--r--net/ipv4/igmp.c2
-rw-r--r--net/ipv4/ping.c7
-rw-r--r--net/ipv4/route.c6
-rw-r--r--net/ipv4/sysctl_net_ipv4.c9
-rw-r--r--net/ipv4/tcp.c9
-rw-r--r--net/ipv4/tcp_input.c32
-rw-r--r--net/ipv4/tcp_ipv4.c3
-rw-r--r--net/ipv4/tcp_minisocks.c4
-rw-r--r--net/ipv4/tcp_output.c15
-rw-r--r--net/ipv4/tcp_timer.c19
-rw-r--r--net/ipv4/udp.c83
-rw-r--r--net/ipv4/udp_offload.c4
-rw-r--r--net/ipv4/udp_tunnel_core.c2
-rw-r--r--net/ipv4/udplite.c1
-rw-r--r--net/ipv4/xfrm4_input.c4
-rw-r--r--net/ipv6/af_inet6.c19
-rw-r--r--net/ipv6/datagram.c15
-rw-r--r--net/ipv6/icmp.c4
-rw-r--r--net/ipv6/ip6_flowlabel.c8
-rw-r--r--net/ipv6/ip6_output.c44
-rw-r--r--net/ipv6/ipv6_sockglue.c242
-rw-r--r--net/ipv6/mcast.c6
-rw-r--r--net/ipv6/ndisc.c4
-rw-r--r--net/ipv6/ping.c6
-rw-r--r--net/ipv6/raw.c16
-rw-r--r--net/ipv6/route.c6
-rw-r--r--net/ipv6/tcp_ipv6.c23
-rw-r--r--net/ipv6/udp.c52
-rw-r--r--net/ipv6/udplite.c1
-rw-r--r--net/ipv6/xfrm6_input.c4
-rw-r--r--net/l2tp/l2tp_core.c6
-rw-r--r--net/l2tp/l2tp_ip6.c6
-rw-r--r--net/mac80211/tdls.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c12
-rw-r--r--net/openvswitch/actions.c4
-rw-r--r--net/sched/cls_route.c37
-rw-r--r--net/sched/sch_frag.c4
-rw-r--r--net/sctp/ipv6.c7
-rw-r--r--net/tipc/link.c4
-rw-r--r--net/tls/tls_sw.c2
-rw-r--r--net/unix/af_unix.c35
-rw-r--r--net/vmw_vsock/af_vsock.c3
-rw-r--r--net/vmw_vsock/virtio_transport.c92
-rw-r--r--net/vmw_vsock/virtio_transport_common.c307
-rw-r--r--net/xdp/xsk.c2
-rw-r--r--net/xdp/xsk_buff_pool.c3
-rw-r--r--net/xfrm/xfrm_policy.c2
-rw-r--r--samples/bpf/Makefile7
-rw-r--r--samples/bpf/syscall_tp_user.c45
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-cgroup.rst16
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-prog.rst8
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool14
-rw-r--r--tools/bpf/bpftool/cgroup.c16
-rw-r--r--tools/bpf/bpftool/gen.c58
-rw-r--r--tools/bpf/bpftool/link.c6
-rw-r--r--tools/bpf/bpftool/prog.c7
-rw-r--r--tools/include/uapi/linux/bpf.h38
-rw-r--r--tools/include/uapi/linux/netdev.h16
-rw-r--r--tools/lib/bpf/bpf_helpers.h1
-rw-r--r--tools/lib/bpf/bpf_tracing.h2
-rw-r--r--tools/lib/bpf/btf.c160
-rw-r--r--tools/lib/bpf/elf.c139
-rw-r--r--tools/lib/bpf/libbpf.c198
-rw-r--r--tools/lib/bpf/libbpf.h73
-rw-r--r--tools/lib/bpf/libbpf.map7
-rw-r--r--tools/lib/bpf/ringbuf.c85
-rw-r--r--tools/net/ynl/generated/netdev-user.c19
-rw-r--r--tools/net/ynl/generated/netdev-user.h3
-rw-r--r--tools/net/ynl/samples/Makefile2
-rw-r--r--tools/net/ynl/samples/netdev.c8
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.aarch642
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.s390x26
-rw-r--r--tools/testing/selftests/bpf/Makefile37
-rw-r--r--tools/testing/selftests/bpf/bpf_experimental.h327
-rw-r--r--tools/testing/selftests/bpf/bpf_kfuncs.h14
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c5
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h2
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.c38
-rw-r--r--tools/testing/selftests/bpf/config1
-rw-r--r--tools/testing/selftests/bpf/liburandom_read.map15
-rw-r--r--tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c4
-rw-r--r--tools/testing/selftests/bpf/network_helpers.c34
-rw-r--r--tools/testing/selftests/bpf/network_helpers.h1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/align.c241
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter.c26
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/connect_ping.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/exceptions.c409
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fib_lookup.c83
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fill_link_info.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/iters.c58
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kprobe_multi_testmod_test.c20
-rw-r--r--tools/testing/selftests/bpf/prog_tests/libbpf_str.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/linked_list.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lwt_helpers.h3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/missed.c138
-rw-r--r--tools/testing/selftests/bpf/prog_tests/module_fentry_shadow.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/percpu_alloc.c128
-rw-r--r--tools/testing/selftests/bpf/prog_tests/preempted_bpf_ma_op.c89
-rw-r--r--tools/testing/selftests/bpf/prog_tests/queue_stack_map.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ringbuf.c26
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c15
-rw-r--r--tools/testing/selftests/bpf/prog_tests/section_names.c45
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sock_addr.c612
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_basic.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_listen.c150
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tailcalls.c269
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/uprobe.c95
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_metadata.c2
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c (renamed from tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c)0
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_misc.h3
-rw-r--r--tools/testing/selftests/bpf/progs/connect_unix_prog.c40
-rw-r--r--tools/testing/selftests/bpf/progs/exceptions.c368
-rw-r--r--tools/testing/selftests/bpf/progs/exceptions_assert.c135
-rw-r--r--tools/testing/selftests/bpf/progs/exceptions_ext.c72
-rw-r--r--tools/testing/selftests/bpf/progs/exceptions_fail.c347
-rw-r--r--tools/testing/selftests/bpf/progs/getpeername_unix_prog.c39
-rw-r--r--tools/testing/selftests/bpf/progs/getsockname_unix_prog.c39
-rw-r--r--tools/testing/selftests/bpf/progs/iters_task_vma.c43
-rw-r--r--tools/testing/selftests/bpf/progs/missed_kprobe.c30
-rw-r--r--tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c48
-rw-r--r--tools/testing/selftests/bpf/progs/missed_tp_recursion.c41
-rw-r--r--tools/testing/selftests/bpf/progs/percpu_alloc_array.c190
-rw-r--r--tools/testing/selftests/bpf/progs/percpu_alloc_cgrp_local_storage.c109
-rw-r--r--tools/testing/selftests/bpf/progs/percpu_alloc_fail.c164
-rw-r--r--tools/testing/selftests/bpf/progs/preempted_bpf_ma_op.c106
-rw-r--r--tools/testing/selftests/bpf/progs/profiler.inc.h2
-rw-r--r--tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c39
-rw-r--r--tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c40
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_fentry.c18
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_fexit.c18
-rw-r--r--tools/testing/selftests/bpf/progs/test_ldsx_insn.c9
-rw-r--r--tools/testing/selftests/bpf/progs/test_uprobe.c61
-rw-r--r--tools/testing/selftests/bpf/progs/test_vmlinux.c4
-rw-r--r--tools/testing/selftests/bpf/progs/timer.c63
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bswap.c4
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_gotol.c4
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_ldsx.c152
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_movsx.c4
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_sdiv.c4
-rw-r--r--tools/testing/selftests/bpf/progs/xsk_xdp_progs.c22
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_synctypes.py9
-rw-r--r--tools/testing/selftests/bpf/test_loader.c4
-rw-r--r--tools/testing/selftests/bpf/test_progs.c2
-rw-r--r--tools/testing/selftests/bpf/test_progs.h2
-rwxr-xr-xtools/testing/selftests/bpf/test_xsk.sh40
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.c134
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.h8
-rw-r--r--tools/testing/selftests/bpf/urandom_read.c15
-rw-r--r--tools/testing/selftests/bpf/urandom_read_lib1.c22
-rw-r--r--tools/testing/selftests/bpf/xdp_features.c4
-rw-r--r--tools/testing/selftests/bpf/xdp_hw_metadata.c2
-rw-r--r--tools/testing/selftests/bpf/xsk.c3
-rw-r--r--tools/testing/selftests/bpf/xsk.h2
-rwxr-xr-xtools/testing/selftests/bpf/xsk_prereqs.sh10
-rw-r--r--tools/testing/selftests/bpf/xsk_xdp_common.h12
-rw-r--r--tools/testing/selftests/bpf/xsk_xdp_metadata.h5
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.c1020
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.h57
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.sh981
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/fw.json49
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/route.json25
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/u32.json25
-rw-r--r--tools/testing/vsock/util.c124
-rw-r--r--tools/testing/vsock/util.h3
-rw-r--r--tools/testing/vsock/vsock_test.c325
558 files changed, 44893 insertions, 4883 deletions
diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst
index 4877563241f3..c7525942f12c 100644
--- a/Documentation/admin-guide/sysctl/net.rst
+++ b/Documentation/admin-guide/sysctl/net.rst
@@ -71,6 +71,7 @@ two flavors of JITs, the newer eBPF JIT currently supported on:
- s390x
- riscv64
- riscv32
+ - loongarch64
And the older cBPF JIT supported on the following archs:
diff --git a/Documentation/bpf/libbpf/program_types.rst b/Documentation/bpf/libbpf/program_types.rst
index ad4d4d5eecb0..63bb88846e50 100644
--- a/Documentation/bpf/libbpf/program_types.rst
+++ b/Documentation/bpf/libbpf/program_types.rst
@@ -56,6 +56,16 @@ described in more detail in the footnotes.
| | ``BPF_CGROUP_UDP6_RECVMSG`` | ``cgroup/recvmsg6`` | |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_CGROUP_UDP6_SENDMSG`` | ``cgroup/sendmsg6`` | |
+| +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_UNIX_CONNECT`` | ``cgroup/connect_unix`` | |
+| +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_UNIX_SENDMSG`` | ``cgroup/sendmsg_unix`` | |
+| +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_UNIX_RECVMSG`` | ``cgroup/recvmsg_unix`` | |
+| +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_UNIX_GETPEERNAME`` | ``cgroup/getpeername_unix`` | |
+| +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_CGROUP_UNIX_GETSOCKNAME`` | ``cgroup/getsockname_unix`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_CGROUP_SOCK`` | ``BPF_CGROUP_INET4_POST_BIND`` | ``cgroup/post_bind4`` | |
+ +----------------------------------------+----------------------------------+-----------+
diff --git a/Documentation/bpf/prog_flow_dissector.rst b/Documentation/bpf/prog_flow_dissector.rst
index 4d86780ab0f1..f24270b8b034 100644
--- a/Documentation/bpf/prog_flow_dissector.rst
+++ b/Documentation/bpf/prog_flow_dissector.rst
@@ -113,7 +113,7 @@ Flags
used by ``eth_get_headlen`` to estimate length of all headers for GRO.
* ``BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL`` - tells BPF flow dissector to
stop parsing as soon as it reaches IPv6 flow label; used by
- ``___skb_get_hash`` and ``__skb_get_hash_symmetric`` to get flow hash.
+ ``___skb_get_hash`` to get flow hash.
* ``BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP`` - tells BPF flow dissector to stop
parsing as soon as it reaches encapsulated headers; used by routing
infrastructure.
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-wed.yaml b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-wed.yaml
index 28ded09d72e3..e7720caf31b3 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-wed.yaml
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-wed.yaml
@@ -22,6 +22,7 @@ properties:
- mediatek,mt7622-wed
- mediatek,mt7981-wed
- mediatek,mt7986-wed
+ - mediatek,mt7988-wed
- const: syscon
reg:
diff --git a/Documentation/devicetree/bindings/mfd/syscon.yaml b/Documentation/devicetree/bindings/mfd/syscon.yaml
index 8103154bbb52..c77d7b155a4c 100644
--- a/Documentation/devicetree/bindings/mfd/syscon.yaml
+++ b/Documentation/devicetree/bindings/mfd/syscon.yaml
@@ -49,6 +49,8 @@ properties:
- hisilicon,peri-subctrl
- hpe,gxp-sysreg
- intel,lgm-syscon
+ - loongson,ls1b-syscon
+ - loongson,ls1c-syscon
- marvell,armada-3700-usb2-host-misc
- mediatek,mt8135-pctl-a-syscfg
- mediatek,mt8135-pctl-b-syscfg
diff --git a/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml b/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml
index 03b5567be389..41014f5c01c4 100644
--- a/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml
@@ -49,6 +49,26 @@ properties:
Set if the output SYNCLKO clock should be disabled. Do not mix with
microchip,synclko-125.
+ microchip,io-drive-strength-microamp:
+ description:
+ IO Pad Drive Strength
+ enum: [8000, 16000]
+ default: 16000
+
+ microchip,hi-drive-strength-microamp:
+ description:
+ High Speed Drive Strength. Controls drive strength of GMII / RGMII /
+ MII / RMII (except TX_CLK/REFCLKI, COL and CRS) and CLKO_25_125 lines.
+ enum: [2000, 4000, 8000, 12000, 16000, 20000, 24000, 28000]
+ default: 24000
+
+ microchip,lo-drive-strength-microamp:
+ description:
+ Low Speed Drive Strength. Controls drive strength of TX_CLK / REFCLKI,
+ COL, CRS, LEDs, PME_N, NTRP_N, SDO and SDI/SDA/MDIO lines.
+ enum: [2000, 4000, 8000, 12000, 16000, 20000, 24000, 28000]
+ default: 8000
+
interrupts:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/net/loongson,ls1b-gmac.yaml b/Documentation/devicetree/bindings/net/loongson,ls1b-gmac.yaml
new file mode 100644
index 000000000000..c4f3224bad38
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/loongson,ls1b-gmac.yaml
@@ -0,0 +1,114 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/loongson,ls1b-gmac.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Loongson-1B Gigabit Ethernet MAC Controller
+
+maintainers:
+ - Keguang Zhang <keguang.zhang@gmail.com>
+
+description: |
+ Loongson-1B Gigabit Ethernet MAC Controller is based on
+ Synopsys DesignWare MAC (version 3.50a).
+
+ Main features
+ - Dual 10/100/1000Mbps GMAC controllers
+ - Full-duplex operation (IEEE 802.3x flow control automatic transmission)
+ - Half-duplex operation (CSMA/CD Protocol and back-pressure support)
+ - RX Checksum Offload
+ - TX Checksum insertion
+ - MII interface
+ - RGMII interface
+
+select:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - loongson,ls1b-gmac
+ required:
+ - compatible
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - loongson,ls1b-gmac
+ - const: snps,dwmac-3.50a
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ items:
+ - const: stmmaceth
+
+ interrupts:
+ maxItems: 1
+
+ interrupt-names:
+ items:
+ - const: macirq
+
+ loongson,ls1-syscon:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ Phandle to the syscon containing some extra configurations
+ including PHY interface mode.
+
+ phy-mode:
+ enum:
+ - mii
+ - rgmii-id
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - interrupts
+ - interrupt-names
+ - loongson,ls1-syscon
+
+allOf:
+ - $ref: snps,dwmac.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/loongson,ls1x-clk.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ gmac0: ethernet@1fe10000 {
+ compatible = "loongson,ls1b-gmac", "snps,dwmac-3.50a";
+ reg = <0x1fe10000 0x10000>;
+
+ clocks = <&clkc LS1X_CLKID_AHB>;
+ clock-names = "stmmaceth";
+
+ interrupt-parent = <&intc1>;
+ interrupts = <2 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "macirq";
+
+ loongson,ls1-syscon = <&syscon>;
+
+ phy-handle = <&phy0>;
+ phy-mode = "mii";
+ snps,pbl = <1>;
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "snps,dwmac-mdio";
+
+ phy0: ethernet-phy@0 {
+ reg = <0x0>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/net/loongson,ls1c-emac.yaml b/Documentation/devicetree/bindings/net/loongson,ls1c-emac.yaml
new file mode 100644
index 000000000000..99001b940b83
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/loongson,ls1c-emac.yaml
@@ -0,0 +1,113 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/loongson,ls1c-emac.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Loongson-1C Ethernet MAC Controller
+
+maintainers:
+ - Keguang Zhang <keguang.zhang@gmail.com>
+
+description: |
+ Loongson-1C Ethernet MAC Controller is based on
+ Synopsys DesignWare MAC (version 3.50a).
+
+ Main features
+ - 10/100Mbps
+ - Full-duplex operation (IEEE 802.3x flow control automatic transmission)
+ - Half-duplex operation (CSMA/CD Protocol and back-pressure support)
+ - IEEE 802.1Q VLAN tag detection for reception frames
+ - MII interface
+ - RMII interface
+
+select:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - loongson,ls1c-emac
+ required:
+ - compatible
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - loongson,ls1c-emac
+ - const: snps,dwmac-3.50a
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ items:
+ - const: stmmaceth
+
+ interrupts:
+ maxItems: 1
+
+ interrupt-names:
+ items:
+ - const: macirq
+
+ loongson,ls1-syscon:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ Phandle to the syscon containing some extra configurations
+ including PHY interface mode.
+
+ phy-mode:
+ enum:
+ - mii
+ - rmii
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - interrupts
+ - interrupt-names
+ - loongson,ls1-syscon
+
+allOf:
+ - $ref: snps,dwmac.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/loongson,ls1x-clk.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ emac: ethernet@1fe10000 {
+ compatible = "loongson,ls1c-emac", "snps,dwmac-3.50a";
+ reg = <0x1fe10000 0x10000>;
+
+ clocks = <&clkc LS1X_CLKID_AHB>;
+ clock-names = "stmmaceth";
+
+ interrupt-parent = <&intc1>;
+ interrupts = <2 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "macirq";
+
+ loongson,ls1-syscon = <&syscon>;
+
+ phy-handle = <&phy0>;
+ phy-mode = "mii";
+ snps,pbl = <1>;
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "snps,dwmac-mdio";
+
+ phy0: ethernet-phy@13 {
+ reg = <0x13>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
index ddf9522a5dc2..5c2769dc689a 100644
--- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
@@ -394,6 +394,11 @@ properties:
When a PFC frame is received with priorities matching the bitmask,
the queue is blocked from transmitting for the pause time specified
in the PFC frame.
+
+ snps,coe-unsupported:
+ type: boolean
+ description: TX checksum offload is unsupported by the TX queue.
+
allOf:
- if:
required:
diff --git a/Documentation/devicetree/bindings/net/ti,icssg-prueth.yaml b/Documentation/devicetree/bindings/net/ti,icssg-prueth.yaml
index 311c570165f9..229c8f32019f 100644
--- a/Documentation/devicetree/bindings/net/ti,icssg-prueth.yaml
+++ b/Documentation/devicetree/bindings/net/ti,icssg-prueth.yaml
@@ -19,6 +19,7 @@ allOf:
properties:
compatible:
enum:
+ - ti,am642-icssg-prueth # for AM64x SoC family
- ti,am654-icssg-prueth # for AM65x SoC family
sram:
@@ -106,6 +107,13 @@ properties:
phandle to system controller node and register offset
to ICSSG control register for RGMII transmit delay
+ ti,half-duplex-capable:
+ type: boolean
+ description:
+ Indicates that the PHY output pin COL is routed to ICSSG GPIO pin
+ (PRGx_PRU0/1_GPIO10) as input so that the ICSSG MII port is
+ capable of half duplex operations.
+
required:
- reg
anyOf:
diff --git a/Documentation/devicetree/bindings/soc/mediatek/mediatek,mt7986-wo-ccif.yaml b/Documentation/devicetree/bindings/soc/mediatek/mediatek,mt7986-wo-ccif.yaml
index f0fa92b04b32..3b212f26abc5 100644
--- a/Documentation/devicetree/bindings/soc/mediatek/mediatek,mt7986-wo-ccif.yaml
+++ b/Documentation/devicetree/bindings/soc/mediatek/mediatek,mt7986-wo-ccif.yaml
@@ -20,6 +20,7 @@ properties:
items:
- enum:
- mediatek,mt7986-wo-ccif
+ - mediatek,mt7988-wo-ccif
- const: syscon
reg:
diff --git a/Documentation/driver-api/dpll.rst b/Documentation/driver-api/dpll.rst
new file mode 100644
index 000000000000..bb52f1b8c0be
--- /dev/null
+++ b/Documentation/driver-api/dpll.rst
@@ -0,0 +1,497 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============================
+The Linux kernel dpll subsystem
+===============================
+
+DPLL
+====
+
+PLL - Phase Locked Loop is an electronic circuit which syntonizes clock
+signal of a device with an external clock signal. Effectively enabling
+device to run on the same clock signal beat as provided on a PLL input.
+
+DPLL - Digital Phase Locked Loop is an integrated circuit which in
+addition to plain PLL behavior incorporates a digital phase detector
+and may have digital divider in the loop. As a result, the frequency on
+DPLL's input and output may be configurable.
+
+Subsystem
+=========
+
+The main purpose of dpll subsystem is to provide general interface
+to configure devices that use any kind of Digital PLL and could use
+different sources of input signal to synchronize to, as well as
+different types of outputs.
+The main interface is NETLINK_GENERIC based protocol with an event
+monitoring multicast group defined.
+
+Device object
+=============
+
+Single dpll device object means single Digital PLL circuit and bunch of
+connected pins.
+It reports the supported modes of operation and current status to the
+user in response to the `do` request of netlink command
+``DPLL_CMD_DEVICE_GET`` and list of dplls registered in the subsystem
+with `dump` netlink request of the same command.
+Changing the configuration of dpll device is done with `do` request of
+netlink ``DPLL_CMD_DEVICE_SET`` command.
+A device handle is ``DPLL_A_ID``, it shall be provided to get or set
+configuration of particular device in the system. It can be obtained
+with a ``DPLL_CMD_DEVICE_GET`` `dump` request or
+a ``DPLL_CMD_DEVICE_ID_GET`` `do` request, where the one must provide
+attributes that result in single device match.
+
+Pin object
+==========
+
+A pin is amorphic object which represents either input or output, it
+could be internal component of the device, as well as externally
+connected.
+The number of pins per dpll vary, but usually multiple pins shall be
+provided for a single dpll device.
+Pin's properties, capabilities and status is provided to the user in
+response to `do` request of netlink ``DPLL_CMD_PIN_GET`` command.
+It is also possible to list all the pins that were registered in the
+system with `dump` request of ``DPLL_CMD_PIN_GET`` command.
+Configuration of a pin can be changed by `do` request of netlink
+``DPLL_CMD_PIN_SET`` command.
+Pin handle is a ``DPLL_A_PIN_ID``, it shall be provided to get or set
+configuration of particular pin in the system. It can be obtained with
+``DPLL_CMD_PIN_GET`` `dump` request or ``DPLL_CMD_PIN_ID_GET`` `do`
+request, where user provides attributes that result in single pin match.
+
+Pin selection
+=============
+
+In general, selected pin (the one which signal is driving the dpll
+device) can be obtained from ``DPLL_A_PIN_STATE`` attribute, and only
+one pin shall be in ``DPLL_PIN_STATE_CONNECTED`` state for any dpll
+device.
+
+Pin selection can be done either manually or automatically, depending
+on hardware capabilities and active dpll device work mode
+(``DPLL_A_MODE`` attribute). The consequence is that there are
+differences for each mode in terms of available pin states, as well as
+for the states the user can request for a dpll device.
+
+In manual mode (``DPLL_MODE_MANUAL``) the user can request or receive
+one of following pin states:
+
+- ``DPLL_PIN_STATE_CONNECTED`` - the pin is used to drive dpll device
+- ``DPLL_PIN_STATE_DISCONNECTED`` - the pin is not used to drive dpll
+ device
+
+In automatic mode (``DPLL_MODE_AUTOMATIC``) the user can request or
+receive one of following pin states:
+
+- ``DPLL_PIN_STATE_SELECTABLE`` - the pin shall be considered as valid
+ input for automatic selection algorithm
+- ``DPLL_PIN_STATE_DISCONNECTED`` - the pin shall be not considered as
+ a valid input for automatic selection algorithm
+
+In automatic mode (``DPLL_MODE_AUTOMATIC``) the user can only receive
+pin state ``DPLL_PIN_STATE_CONNECTED`` once automatic selection
+algorithm locks a dpll device with one of the inputs.
+
+Shared pins
+===========
+
+A single pin object can be attached to multiple dpll devices.
+Then there are two groups of configuration knobs:
+
+1) Set on a pin - the configuration affects all dpll devices pin is
+ registered to (i.e., ``DPLL_A_PIN_FREQUENCY``),
+2) Set on a pin-dpll tuple - the configuration affects only selected
+ dpll device (i.e., ``DPLL_A_PIN_PRIO``, ``DPLL_A_PIN_STATE``,
+ ``DPLL_A_PIN_DIRECTION``).
+
+MUX-type pins
+=============
+
+A pin can be MUX-type, it aggregates child pins and serves as a pin
+multiplexer. One or more pins are registered with MUX-type instead of
+being directly registered to a dpll device.
+Pins registered with a MUX-type pin provide user with additional nested
+attribute ``DPLL_A_PIN_PARENT_PIN`` for each parent they were registered
+with.
+If a pin was registered with multiple parent pins, they behave like a
+multiple output multiplexer. In this case output of a
+``DPLL_CMD_PIN_GET`` would contain multiple pin-parent nested
+attributes with current state related to each parent, like:
+
+'pin': [{{
+ 'clock-id': 282574471561216,
+ 'module-name': 'ice',
+ 'capabilities': 4,
+ 'id': 13,
+ 'parent-pin': [
+ {'parent-id': 2, 'state': 'connected'},
+ {'parent-id': 3, 'state': 'disconnected'}
+ ],
+ 'type': 'synce-eth-port'
+ }}]
+
+Only one child pin can provide its signal to the parent MUX-type pin at
+a time, the selection is done by requesting change of a child pin state
+on desired parent, with the use of ``DPLL_A_PIN_PARENT`` nested
+attribute. Example of netlink `set state on parent pin` message format:
+
+ ========================== =============================================
+ ``DPLL_A_PIN_ID`` child pin id
+ ``DPLL_A_PIN_PARENT_PIN`` nested attribute for requesting configuration
+ related to parent pin
+ ``DPLL_A_PIN_PARENT_ID`` parent pin id
+ ``DPLL_A_PIN_STATE`` requested pin state on parent
+ ========================== =============================================
+
+Pin priority
+============
+
+Some devices might offer a capability of automatic pin selection mode
+(enum value ``DPLL_MODE_AUTOMATIC`` of ``DPLL_A_MODE`` attribute).
+Usually, automatic selection is performed on the hardware level, which
+means only pins directly connected to the dpll can be used for automatic
+input pin selection.
+In automatic selection mode, the user cannot manually select a input
+pin for the device, instead the user shall provide all directly
+connected pins with a priority ``DPLL_A_PIN_PRIO``, the device would
+pick a highest priority valid signal and use it to control the DPLL
+device. Example of netlink `set priority on parent pin` message format:
+
+ ============================ =============================================
+ ``DPLL_A_PIN_ID`` configured pin id
+ ``DPLL_A_PIN_PARENT_DEVICE`` nested attribute for requesting configuration
+ related to parent dpll device
+ ``DPLL_A_PIN_PARENT_ID`` parent dpll device id
+ ``DPLL_A_PIN_PRIO`` requested pin prio on parent dpll
+ ============================ =============================================
+
+Child pin of MUX-type pin is not capable of automatic input pin selection,
+in order to configure active input of a MUX-type pin, the user needs to
+request desired pin state of the child pin on the parent pin,
+as described in the ``MUX-type pins`` chapter.
+
+Configuration commands group
+============================
+
+Configuration commands are used to get information about registered
+dpll devices (and pins), as well as set configuration of device or pins.
+As dpll devices must be abstracted and reflect real hardware,
+there is no way to add new dpll device via netlink from user space and
+each device should be registered by its driver.
+
+All netlink commands require ``GENL_ADMIN_PERM``. This is to prevent
+any spamming/DoS from unauthorized userspace applications.
+
+List of netlink commands with possible attributes
+=================================================
+
+Constants identifying command types for dpll device uses a
+``DPLL_CMD_`` prefix and suffix according to command purpose.
+The dpll device related attributes use a ``DPLL_A_`` prefix and
+suffix according to attribute purpose.
+
+ ==================================== =================================
+ ``DPLL_CMD_DEVICE_ID_GET`` command to get device ID
+ ``DPLL_A_MODULE_NAME`` attr module name of registerer
+ ``DPLL_A_CLOCK_ID`` attr Unique Clock Identifier
+ (EUI-64), as defined by the
+ IEEE 1588 standard
+ ``DPLL_A_TYPE`` attr type of dpll device
+ ==================================== =================================
+
+ ==================================== =================================
+ ``DPLL_CMD_DEVICE_GET`` command to get device info or
+ dump list of available devices
+ ``DPLL_A_ID`` attr unique dpll device ID
+ ``DPLL_A_MODULE_NAME`` attr module name of registerer
+ ``DPLL_A_CLOCK_ID`` attr Unique Clock Identifier
+ (EUI-64), as defined by the
+ IEEE 1588 standard
+ ``DPLL_A_MODE`` attr selection mode
+ ``DPLL_A_MODE_SUPPORTED`` attr available selection modes
+ ``DPLL_A_LOCK_STATUS`` attr dpll device lock status
+ ``DPLL_A_TEMP`` attr device temperature info
+ ``DPLL_A_TYPE`` attr type of dpll device
+ ==================================== =================================
+
+ ==================================== =================================
+ ``DPLL_CMD_DEVICE_SET`` command to set dpll device config
+ ``DPLL_A_ID`` attr internal dpll device index
+ ``DPLL_A_MODE`` attr selection mode to configure
+ ==================================== =================================
+
+Constants identifying command types for pins uses a
+``DPLL_CMD_PIN_`` prefix and suffix according to command purpose.
+The pin related attributes use a ``DPLL_A_PIN_`` prefix and suffix
+according to attribute purpose.
+
+ ==================================== =================================
+ ``DPLL_CMD_PIN_ID_GET`` command to get pin ID
+ ``DPLL_A_PIN_MODULE_NAME`` attr module name of registerer
+ ``DPLL_A_PIN_CLOCK_ID`` attr Unique Clock Identifier
+ (EUI-64), as defined by the
+ IEEE 1588 standard
+ ``DPLL_A_PIN_BOARD_LABEL`` attr pin board label provided
+ by registerer
+ ``DPLL_A_PIN_PANEL_LABEL`` attr pin panel label provided
+ by registerer
+ ``DPLL_A_PIN_PACKAGE_LABEL`` attr pin package label provided
+ by registerer
+ ``DPLL_A_PIN_TYPE`` attr type of a pin
+ ==================================== =================================
+
+ ==================================== ==================================
+ ``DPLL_CMD_PIN_GET`` command to get pin info or dump
+ list of available pins
+ ``DPLL_A_PIN_ID`` attr unique a pin ID
+ ``DPLL_A_PIN_MODULE_NAME`` attr module name of registerer
+ ``DPLL_A_PIN_CLOCK_ID`` attr Unique Clock Identifier
+ (EUI-64), as defined by the
+ IEEE 1588 standard
+ ``DPLL_A_PIN_BOARD_LABEL`` attr pin board label provided
+ by registerer
+ ``DPLL_A_PIN_PANEL_LABEL`` attr pin panel label provided
+ by registerer
+ ``DPLL_A_PIN_PACKAGE_LABEL`` attr pin package label provided
+ by registerer
+ ``DPLL_A_PIN_TYPE`` attr type of a pin
+ ``DPLL_A_PIN_FREQUENCY`` attr current frequency of a pin
+ ``DPLL_A_PIN_FREQUENCY_SUPPORTED`` nested attr provides supported
+ frequencies
+ ``DPLL_A_PIN_ANY_FREQUENCY_MIN`` attr minimum value of frequency
+ ``DPLL_A_PIN_ANY_FREQUENCY_MAX`` attr maximum value of frequency
+ ``DPLL_A_PIN_PARENT_DEVICE`` nested attr for each parent device
+ the pin is connected with
+ ``DPLL_A_PIN_PARENT_ID`` attr parent dpll device id
+ ``DPLL_A_PIN_PRIO`` attr priority of pin on the
+ dpll device
+ ``DPLL_A_PIN_STATE`` attr state of pin on the parent
+ dpll device
+ ``DPLL_A_PIN_DIRECTION`` attr direction of a pin on the
+ parent dpll device
+ ``DPLL_A_PIN_PARENT_PIN`` nested attr for each parent pin
+ the pin is connected with
+ ``DPLL_A_PIN_PARENT_ID`` attr parent pin id
+ ``DPLL_A_PIN_STATE`` attr state of pin on the parent
+ pin
+ ``DPLL_A_PIN_CAPABILITIES`` attr bitmask of pin capabilities
+ ==================================== ==================================
+
+ ==================================== =================================
+ ``DPLL_CMD_PIN_SET`` command to set pins configuration
+ ``DPLL_A_PIN_ID`` attr unique a pin ID
+ ``DPLL_A_PIN_FREQUENCY`` attr requested frequency of a pin
+ ``DPLL_A_PIN_PARENT_DEVICE`` nested attr for each parent dpll
+ device configuration request
+ ``DPLL_A_PIN_PARENT_ID`` attr parent dpll device id
+ ``DPLL_A_PIN_DIRECTION`` attr requested direction of a pin
+ ``DPLL_A_PIN_PRIO`` attr requested priority of pin on
+ the dpll device
+ ``DPLL_A_PIN_STATE`` attr requested state of pin on
+ the dpll device
+ ``DPLL_A_PIN_PARENT_PIN`` nested attr for each parent pin
+ configuration request
+ ``DPLL_A_PIN_PARENT_ID`` attr parent pin id
+ ``DPLL_A_PIN_STATE`` attr requested state of pin on
+ parent pin
+ ==================================== =================================
+
+Netlink dump requests
+=====================
+
+The ``DPLL_CMD_DEVICE_GET`` and ``DPLL_CMD_PIN_GET`` commands are
+capable of dump type netlink requests, in which case the response is in
+the same format as for their ``do`` request, but every device or pin
+registered in the system is returned.
+
+SET commands format
+===================
+
+``DPLL_CMD_DEVICE_SET`` - to target a dpll device, the user provides
+``DPLL_A_ID``, which is unique identifier of dpll device in the system,
+as well as parameter being configured (``DPLL_A_MODE``).
+
+``DPLL_CMD_PIN_SET`` - to target a pin user must provide a
+``DPLL_A_PIN_ID``, which is unique identifier of a pin in the system.
+Also configured pin parameters must be added.
+If ``DPLL_A_PIN_FREQUENCY`` is configured, this affects all the dpll
+devices that are connected with the pin, that is why frequency attribute
+shall not be enclosed in ``DPLL_A_PIN_PARENT_DEVICE``.
+Other attributes: ``DPLL_A_PIN_PRIO``, ``DPLL_A_PIN_STATE`` or
+``DPLL_A_PIN_DIRECTION`` must be enclosed in
+``DPLL_A_PIN_PARENT_DEVICE`` as their configuration relates to only one
+of parent dplls, targeted by ``DPLL_A_PIN_PARENT_ID`` attribute which is
+also required inside that nest.
+For MUX-type pins the ``DPLL_A_PIN_STATE`` attribute is configured in
+similar way, by enclosing required state in ``DPLL_A_PIN_PARENT_PIN``
+nested attribute and targeted parent pin id in ``DPLL_A_PIN_PARENT_ID``.
+
+In general, it is possible to configure multiple parameters at once, but
+internally each parameter change will be invoked separately, where order
+of configuration is not guaranteed by any means.
+
+Configuration pre-defined enums
+===============================
+
+.. kernel-doc:: include/uapi/linux/dpll.h
+
+Notifications
+=============
+
+dpll device can provide notifications regarding status changes of the
+device, i.e. lock status changes, input/output changes or other alarms.
+There is one multicast group that is used to notify user-space apps via
+netlink socket: ``DPLL_MCGRP_MONITOR``
+
+Notifications messages:
+
+ ============================== =====================================
+ ``DPLL_CMD_DEVICE_CREATE_NTF`` dpll device was created
+ ``DPLL_CMD_DEVICE_DELETE_NTF`` dpll device was deleted
+ ``DPLL_CMD_DEVICE_CHANGE_NTF`` dpll device has changed
+ ``DPLL_CMD_PIN_CREATE_NTF`` dpll pin was created
+ ``DPLL_CMD_PIN_DELETE_NTF`` dpll pin was deleted
+ ``DPLL_CMD_PIN_CHANGE_NTF`` dpll pin has changed
+ ============================== =====================================
+
+Events format is the same as for the corresponding get command.
+Format of ``DPLL_CMD_DEVICE_`` events is the same as response of
+``DPLL_CMD_DEVICE_GET``.
+Format of ``DPLL_CMD_PIN_`` events is same as response of
+``DPLL_CMD_PIN_GET``.
+
+Device driver implementation
+============================
+
+Device is allocated by dpll_device_get() call. Second call with the
+same arguments will not create new object but provides pointer to
+previously created device for given arguments, it also increases
+refcount of that object.
+Device is deallocated by dpll_device_put() call, which first
+decreases the refcount, once refcount is cleared the object is
+destroyed.
+
+Device should implement set of operations and register device via
+dpll_device_register() at which point it becomes available to the
+users. Multiple driver instances can obtain reference to it with
+dpll_device_get(), as well as register dpll device with their own
+ops and priv.
+
+The pins are allocated separately with dpll_pin_get(), it works
+similarly to dpll_device_get(). Function first creates object and then
+for each call with the same arguments only the object refcount
+increases. Also dpll_pin_put() works similarly to dpll_device_put().
+
+A pin can be registered with parent dpll device or parent pin, depending
+on hardware needs. Each registration requires registerer to provide set
+of pin callbacks, and private data pointer for calling them:
+
+- dpll_pin_register() - register pin with a dpll device,
+- dpll_pin_on_pin_register() - register pin with another MUX type pin.
+
+Notifications of adding or removing dpll devices are created within
+subsystem itself.
+Notifications about registering/deregistering pins are also invoked by
+the subsystem.
+Notifications about status changes either of dpll device or a pin are
+invoked in two ways:
+
+- after successful change was requested on dpll subsystem, the subsystem
+ calls corresponding notification,
+- requested by device driver with dpll_device_change_ntf() or
+ dpll_pin_change_ntf() when driver informs about the status change.
+
+The device driver using dpll interface is not required to implement all
+the callback operation. Nevertheless, there are few required to be
+implemented.
+Required dpll device level callback operations:
+
+- ``.mode_get``,
+- ``.lock_status_get``.
+
+Required pin level callback operations:
+
+- ``.state_on_dpll_get`` (pins registered with dpll device),
+- ``.state_on_pin_get`` (pins registered with parent pin),
+- ``.direction_get``.
+
+Every other operation handler is checked for existence and
+``-EOPNOTSUPP`` is returned in case of absence of specific handler.
+
+The simplest implementation is in the OCP TimeCard driver. The ops
+structures are defined like this:
+
+.. code-block:: c
+ static const struct dpll_device_ops dpll_ops = {
+ .lock_status_get = ptp_ocp_dpll_lock_status_get,
+ .mode_get = ptp_ocp_dpll_mode_get,
+ .mode_supported = ptp_ocp_dpll_mode_supported,
+ };
+
+ static const struct dpll_pin_ops dpll_pins_ops = {
+ .frequency_get = ptp_ocp_dpll_frequency_get,
+ .frequency_set = ptp_ocp_dpll_frequency_set,
+ .direction_get = ptp_ocp_dpll_direction_get,
+ .direction_set = ptp_ocp_dpll_direction_set,
+ .state_on_dpll_get = ptp_ocp_dpll_state_get,
+ };
+
+The registration part is then looks like this part:
+
+.. code-block:: c
+ clkid = pci_get_dsn(pdev);
+ bp->dpll = dpll_device_get(clkid, 0, THIS_MODULE);
+ if (IS_ERR(bp->dpll)) {
+ err = PTR_ERR(bp->dpll);
+ dev_err(&pdev->dev, "dpll_device_alloc failed\n");
+ goto out;
+ }
+
+ err = dpll_device_register(bp->dpll, DPLL_TYPE_PPS, &dpll_ops, bp);
+ if (err)
+ goto out;
+
+ for (i = 0; i < OCP_SMA_NUM; i++) {
+ bp->sma[i].dpll_pin = dpll_pin_get(clkid, i, THIS_MODULE, &bp->sma[i].dpll_prop);
+ if (IS_ERR(bp->sma[i].dpll_pin)) {
+ err = PTR_ERR(bp->dpll);
+ goto out_dpll;
+ }
+
+ err = dpll_pin_register(bp->dpll, bp->sma[i].dpll_pin, &dpll_pins_ops,
+ &bp->sma[i]);
+ if (err) {
+ dpll_pin_put(bp->sma[i].dpll_pin);
+ goto out_dpll;
+ }
+ }
+
+In the error path we have to rewind every allocation in the reverse order:
+
+.. code-block:: c
+ while (i) {
+ --i;
+ dpll_pin_unregister(bp->dpll, bp->sma[i].dpll_pin, &dpll_pins_ops, &bp->sma[i]);
+ dpll_pin_put(bp->sma[i].dpll_pin);
+ }
+ dpll_device_put(bp->dpll);
+
+More complex example can be found in Intel's ICE driver or nVidia's mlx5 driver.
+
+SyncE enablement
+================
+For SyncE enablement it is required to allow control over dpll device
+for a software application which monitors and configures the inputs of
+dpll device in response to current state of a dpll device and its
+inputs.
+In such scenario, dpll device input signal shall be also configurable
+to drive dpll with signal recovered from the PHY netdevice.
+This is done by exposing a pin to the netdevice - attaching pin to the
+netdevice itself with
+``netdev_dpll_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin)``.
+Exposed pin id handle ``DPLL_A_PIN_ID`` is then identifiable by the user
+as it is attached to rtnetlink respond to get ``RTM_NEWLINK`` command in
+nested attribute ``IFLA_DPLL_PIN``.
diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
index 1e16a40da3ba..f549a68951d7 100644
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -114,6 +114,7 @@ available subsections can be seen below.
zorro
hte/index
wmi
+ dpll
.. only:: subproject and html
diff --git a/Documentation/netlink/specs/dpll.yaml b/Documentation/netlink/specs/dpll.yaml
new file mode 100644
index 000000000000..8b86b28b47a6
--- /dev/null
+++ b/Documentation/netlink/specs/dpll.yaml
@@ -0,0 +1,488 @@
+# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+
+name: dpll
+
+doc: DPLL subsystem.
+
+definitions:
+ -
+ type: enum
+ name: mode
+ doc: |
+ working modes a dpll can support, differentiates if and how dpll selects
+ one of its inputs to syntonize with it, valid values for DPLL_A_MODE
+ attribute
+ entries:
+ -
+ name: manual
+ doc: input can be only selected by sending a request to dpll
+ value: 1
+ -
+ name: automatic
+ doc: highest prio input pin auto selected by dpll
+ render-max: true
+ -
+ type: enum
+ name: lock-status
+ doc: |
+ provides information of dpll device lock status, valid values for
+ DPLL_A_LOCK_STATUS attribute
+ entries:
+ -
+ name: unlocked
+ doc: |
+ dpll was not yet locked to any valid input (or forced by setting
+ DPLL_A_MODE to DPLL_MODE_DETACHED)
+ value: 1
+ -
+ name: locked
+ doc: |
+ dpll is locked to a valid signal, but no holdover available
+ -
+ name: locked-ho-acq
+ doc: |
+ dpll is locked and holdover acquired
+ -
+ name: holdover
+ doc: |
+ dpll is in holdover state - lost a valid lock or was forced
+ by disconnecting all the pins (latter possible only
+ when dpll lock-state was already DPLL_LOCK_STATUS_LOCKED_HO_ACQ,
+ if dpll lock-state was not DPLL_LOCK_STATUS_LOCKED_HO_ACQ, the
+ dpll's lock-state shall remain DPLL_LOCK_STATUS_UNLOCKED)
+ render-max: true
+ -
+ type: const
+ name: temp-divider
+ value: 1000
+ doc: |
+ temperature divider allowing userspace to calculate the
+ temperature as float with three digit decimal precision.
+ Value of (DPLL_A_TEMP / DPLL_TEMP_DIVIDER) is integer part of
+ temperature value.
+ Value of (DPLL_A_TEMP % DPLL_TEMP_DIVIDER) is fractional part of
+ temperature value.
+ -
+ type: enum
+ name: type
+ doc: type of dpll, valid values for DPLL_A_TYPE attribute
+ entries:
+ -
+ name: pps
+ doc: dpll produces Pulse-Per-Second signal
+ value: 1
+ -
+ name: eec
+ doc: dpll drives the Ethernet Equipment Clock
+ render-max: true
+ -
+ type: enum
+ name: pin-type
+ doc: |
+ defines possible types of a pin, valid values for DPLL_A_PIN_TYPE
+ attribute
+ entries:
+ -
+ name: mux
+ doc: aggregates another layer of selectable pins
+ value: 1
+ -
+ name: ext
+ doc: external input
+ -
+ name: synce-eth-port
+ doc: ethernet port PHY's recovered clock
+ -
+ name: int-oscillator
+ doc: device internal oscillator
+ -
+ name: gnss
+ doc: GNSS recovered clock
+ render-max: true
+ -
+ type: enum
+ name: pin-direction
+ doc: |
+ defines possible direction of a pin, valid values for
+ DPLL_A_PIN_DIRECTION attribute
+ entries:
+ -
+ name: input
+ doc: pin used as a input of a signal
+ value: 1
+ -
+ name: output
+ doc: pin used to output the signal
+ render-max: true
+ -
+ type: const
+ name: pin-frequency-1-hz
+ value: 1
+ -
+ type: const
+ name: pin-frequency-10-khz
+ value: 10000
+ -
+ type: const
+ name: pin-frequency-77_5-khz
+ value: 77500
+ -
+ type: const
+ name: pin-frequency-10-mhz
+ value: 10000000
+ -
+ type: enum
+ name: pin-state
+ doc: |
+ defines possible states of a pin, valid values for
+ DPLL_A_PIN_STATE attribute
+ entries:
+ -
+ name: connected
+ doc: pin connected, active input of phase locked loop
+ value: 1
+ -
+ name: disconnected
+ doc: pin disconnected, not considered as a valid input
+ -
+ name: selectable
+ doc: pin enabled for automatic input selection
+ render-max: true
+ -
+ type: flags
+ name: pin-capabilities
+ doc: |
+ defines possible capabilities of a pin, valid flags on
+ DPLL_A_PIN_CAPABILITIES attribute
+ entries:
+ -
+ name: direction-can-change
+ doc: pin direction can be changed
+ -
+ name: priority-can-change
+ doc: pin priority can be changed
+ -
+ name: state-can-change
+ doc: pin state can be changed
+
+attribute-sets:
+ -
+ name: dpll
+ enum-name: dpll_a
+ attributes:
+ -
+ name: id
+ type: u32
+ -
+ name: module-name
+ type: string
+ -
+ name: pad
+ type: pad
+ -
+ name: clock-id
+ type: u64
+ -
+ name: mode
+ type: u32
+ enum: mode
+ -
+ name: mode-supported
+ type: u32
+ enum: mode
+ multi-attr: true
+ -
+ name: lock-status
+ type: u32
+ enum: lock-status
+ -
+ name: temp
+ type: s32
+ -
+ name: type
+ type: u32
+ enum: type
+ -
+ name: pin
+ enum-name: dpll_a_pin
+ attributes:
+ -
+ name: id
+ type: u32
+ -
+ name: parent-id
+ type: u32
+ -
+ name: module-name
+ type: string
+ -
+ name: pad
+ type: pad
+ -
+ name: clock-id
+ type: u64
+ -
+ name: board-label
+ type: string
+ -
+ name: panel-label
+ type: string
+ -
+ name: package-label
+ type: string
+ -
+ name: type
+ type: u32
+ enum: pin-type
+ -
+ name: direction
+ type: u32
+ enum: pin-direction
+ -
+ name: frequency
+ type: u64
+ -
+ name: frequency-supported
+ type: nest
+ multi-attr: true
+ nested-attributes: frequency-range
+ -
+ name: frequency-min
+ type: u64
+ -
+ name: frequency-max
+ type: u64
+ -
+ name: prio
+ type: u32
+ -
+ name: state
+ type: u32
+ enum: pin-state
+ -
+ name: capabilities
+ type: u32
+ -
+ name: parent-device
+ type: nest
+ multi-attr: true
+ nested-attributes: pin-parent-device
+ -
+ name: parent-pin
+ type: nest
+ multi-attr: true
+ nested-attributes: pin-parent-pin
+ -
+ name: pin-parent-device
+ subset-of: pin
+ attributes:
+ -
+ name: parent-id
+ type: u32
+ -
+ name: direction
+ type: u32
+ -
+ name: prio
+ type: u32
+ -
+ name: state
+ type: u32
+ -
+ name: pin-parent-pin
+ subset-of: pin
+ attributes:
+ -
+ name: parent-id
+ type: u32
+ -
+ name: state
+ type: u32
+ -
+ name: frequency-range
+ subset-of: pin
+ attributes:
+ -
+ name: frequency-min
+ type: u64
+ -
+ name: frequency-max
+ type: u64
+
+operations:
+ enum-name: dpll_cmd
+ list:
+ -
+ name: device-id-get
+ doc: |
+ Get id of dpll device that matches given attributes
+ attribute-set: dpll
+ flags: [ admin-perm ]
+
+ do:
+ pre: dpll-lock-doit
+ post: dpll-unlock-doit
+ request:
+ attributes:
+ - module-name
+ - clock-id
+ - type
+ reply:
+ attributes:
+ - id
+
+ -
+ name: device-get
+ doc: |
+ Get list of DPLL devices (dump) or attributes of a single dpll device
+ attribute-set: dpll
+ flags: [ admin-perm ]
+
+ do:
+ pre: dpll-pre-doit
+ post: dpll-post-doit
+ request:
+ attributes:
+ - id
+ reply: &dev-attrs
+ attributes:
+ - id
+ - module-name
+ - mode
+ - mode-supported
+ - lock-status
+ - temp
+ - clock-id
+ - type
+
+ dump:
+ pre: dpll-lock-dumpit
+ post: dpll-unlock-dumpit
+ reply: *dev-attrs
+
+ -
+ name: device-set
+ doc: Set attributes for a DPLL device
+ attribute-set: dpll
+ flags: [ admin-perm ]
+
+ do:
+ pre: dpll-pre-doit
+ post: dpll-post-doit
+ request:
+ attributes:
+ - id
+ -
+ name: device-create-ntf
+ doc: Notification about device appearing
+ notify: device-get
+ mcgrp: monitor
+ -
+ name: device-delete-ntf
+ doc: Notification about device disappearing
+ notify: device-get
+ mcgrp: monitor
+ -
+ name: device-change-ntf
+ doc: Notification about device configuration being changed
+ notify: device-get
+ mcgrp: monitor
+ -
+ name: pin-id-get
+ doc: |
+ Get id of a pin that matches given attributes
+ attribute-set: pin
+ flags: [ admin-perm ]
+
+ do:
+ pre: dpll-lock-doit
+ post: dpll-unlock-doit
+ request:
+ attributes:
+ - module-name
+ - clock-id
+ - board-label
+ - panel-label
+ - package-label
+ - type
+ reply:
+ attributes:
+ - id
+
+ -
+ name: pin-get
+ doc: |
+ Get list of pins and its attributes.
+ - dump request without any attributes given - list all the pins in the
+ system
+ - dump request with target dpll - list all the pins registered with
+ a given dpll device
+ - do request with target dpll and target pin - single pin attributes
+ attribute-set: pin
+ flags: [ admin-perm ]
+
+ do:
+ pre: dpll-pin-pre-doit
+ post: dpll-pin-post-doit
+ request:
+ attributes:
+ - id
+ reply: &pin-attrs
+ attributes:
+ - id
+ - board-label
+ - panel-label
+ - package-label
+ - type
+ - frequency
+ - frequency-supported
+ - capabilities
+ - parent-device
+ - parent-pin
+
+ dump:
+ pre: dpll-lock-dumpit
+ post: dpll-unlock-dumpit
+ request:
+ attributes:
+ - id
+ reply: *pin-attrs
+
+ -
+ name: pin-set
+ doc: Set attributes of a target pin
+ attribute-set: pin
+ flags: [ admin-perm ]
+
+ do:
+ pre: dpll-pin-pre-doit
+ post: dpll-pin-post-doit
+ request:
+ attributes:
+ - id
+ - frequency
+ - direction
+ - prio
+ - state
+ - parent-device
+ - parent-pin
+ -
+ name: pin-create-ntf
+ doc: Notification about pin appearing
+ notify: pin-get
+ mcgrp: monitor
+ -
+ name: pin-delete-ntf
+ doc: Notification about pin disappearing
+ notify: pin-get
+ mcgrp: monitor
+ -
+ name: pin-change-ntf
+ doc: Notification about pin configuration being changed
+ notify: pin-get
+ mcgrp: monitor
+
+mcast-groups:
+ list:
+ -
+ name: monitor
diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml
index 1c7284fd535b..c46fcc78fc04 100644
--- a/Documentation/netlink/specs/netdev.yaml
+++ b/Documentation/netlink/specs/netdev.yaml
@@ -42,6 +42,19 @@ definitions:
doc:
This feature informs if netdev implements non-linear XDP buffer
support in ndo_xdp_xmit callback.
+ -
+ type: flags
+ name: xdp-rx-metadata
+ render-max: true
+ entries:
+ -
+ name: timestamp
+ doc:
+ Device is capable of exposing receive HW timestamp via bpf_xdp_metadata_rx_timestamp().
+ -
+ name: hash
+ doc:
+ Device is capable of exposing receive packet hash via bpf_xdp_metadata_rx_hash().
attribute-sets:
-
@@ -68,6 +81,13 @@ attribute-sets:
type: u32
checks:
min: 1
+ -
+ name: xdp-rx-metadata-features
+ doc: Bitmask of supported XDP receive metadata features.
+ See Documentation/networking/xdp-rx-metadata.rst for more details.
+ type: u64
+ enum: xdp-rx-metadata
+ enum-as-flags: true
operations:
list:
@@ -84,6 +104,7 @@ operations:
- ifindex
- xdp-features
- xdp-zc-max-segs
+ - xdp-rx-metadata-features
dump:
reply: *dev-all
-
diff --git a/Documentation/networking/device_drivers/ethernet/index.rst b/Documentation/networking/device_drivers/ethernet/index.rst
index 9827e816084b..43de285b8a92 100644
--- a/Documentation/networking/device_drivers/ethernet/index.rst
+++ b/Documentation/networking/device_drivers/ethernet/index.rst
@@ -32,6 +32,7 @@ Contents:
intel/e1000
intel/e1000e
intel/fm10k
+ intel/idpf
intel/igb
intel/igbvf
intel/ixgbe
diff --git a/Documentation/networking/device_drivers/ethernet/intel/idpf.rst b/Documentation/networking/device_drivers/ethernet/intel/idpf.rst
new file mode 100644
index 000000000000..adb16e2abd21
--- /dev/null
+++ b/Documentation/networking/device_drivers/ethernet/intel/idpf.rst
@@ -0,0 +1,160 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+==========================================================================
+idpf Linux* Base Driver for the Intel(R) Infrastructure Data Path Function
+==========================================================================
+
+Intel idpf Linux driver.
+Copyright(C) 2023 Intel Corporation.
+
+.. contents::
+
+The idpf driver serves as both the Physical Function (PF) and Virtual Function
+(VF) driver for the Intel(R) Infrastructure Data Path Function.
+
+Driver information can be obtained using ethtool, lspci, and ip.
+
+For questions related to hardware requirements, refer to the documentation
+supplied with your Intel adapter. All hardware requirements listed apply to use
+with Linux.
+
+
+Identifying Your Adapter
+========================
+For information on how to identify your adapter, and for the latest Intel
+network drivers, refer to the Intel Support website:
+http://www.intel.com/support
+
+
+Additional Features and Configurations
+======================================
+
+ethtool
+-------
+The driver utilizes the ethtool interface for driver configuration and
+diagnostics, as well as displaying statistical information. The latest ethtool
+version is required for this functionality. If you don't have one yet, you can
+obtain it at:
+https://kernel.org/pub/software/network/ethtool/
+
+
+Viewing Link Messages
+---------------------
+Link messages will not be displayed to the console if the distribution is
+restricting system messages. In order to see network driver link messages on
+your console, set dmesg to eight by entering the following::
+
+ # dmesg -n 8
+
+.. note::
+ This setting is not saved across reboots.
+
+
+Jumbo Frames
+------------
+Jumbo Frames support is enabled by changing the Maximum Transmission Unit (MTU)
+to a value larger than the default value of 1500.
+
+Use the ip command to increase the MTU size. For example, enter the following
+where <ethX> is the interface number::
+
+ # ip link set mtu 9000 dev <ethX>
+ # ip link set up dev <ethX>
+
+.. note::
+ The maximum MTU setting for jumbo frames is 9706. This corresponds to the
+ maximum jumbo frame size of 9728 bytes.
+
+.. note::
+ This driver will attempt to use multiple page sized buffers to receive
+ each jumbo packet. This should help to avoid buffer starvation issues when
+ allocating receive packets.
+
+.. note::
+ Packet loss may have a greater impact on throughput when you use jumbo
+ frames. If you observe a drop in performance after enabling jumbo frames,
+ enabling flow control may mitigate the issue.
+
+
+Performance Optimization
+========================
+Driver defaults are meant to fit a wide variety of workloads, but if further
+optimization is required, we recommend experimenting with the following
+settings.
+
+
+Interrupt Rate Limiting
+-----------------------
+This driver supports an adaptive interrupt throttle rate (ITR) mechanism that
+is tuned for general workloads. The user can customize the interrupt rate
+control for specific workloads, via ethtool, adjusting the number of
+microseconds between interrupts.
+
+To set the interrupt rate manually, you must disable adaptive mode::
+
+ # ethtool -C <ethX> adaptive-rx off adaptive-tx off
+
+For lower CPU utilization:
+ - Disable adaptive ITR and lower Rx and Tx interrupts. The examples below
+ affect every queue of the specified interface.
+
+ - Setting rx-usecs and tx-usecs to 80 will limit interrupts to about
+ 12,500 interrupts per second per queue::
+
+ # ethtool -C <ethX> adaptive-rx off adaptive-tx off rx-usecs 80
+ tx-usecs 80
+
+For reduced latency:
+ - Disable adaptive ITR and ITR by setting rx-usecs and tx-usecs to 0
+ using ethtool::
+
+ # ethtool -C <ethX> adaptive-rx off adaptive-tx off rx-usecs 0
+ tx-usecs 0
+
+Per-queue interrupt rate settings:
+ - The following examples are for queues 1 and 3, but you can adjust other
+ queues.
+
+ - To disable Rx adaptive ITR and set static Rx ITR to 10 microseconds or
+ about 100,000 interrupts/second, for queues 1 and 3::
+
+ # ethtool --per-queue <ethX> queue_mask 0xa --coalesce adaptive-rx off
+ rx-usecs 10
+
+ - To show the current coalesce settings for queues 1 and 3::
+
+ # ethtool --per-queue <ethX> queue_mask 0xa --show-coalesce
+
+
+
+Virtualized Environments
+------------------------
+In addition to the other suggestions in this section, the following may be
+helpful to optimize performance in VMs.
+
+ - Using the appropriate mechanism (vcpupin) in the VM, pin the CPUs to
+ individual LCPUs, making sure to use a set of CPUs included in the
+ device's local_cpulist: /sys/class/net/<ethX>/device/local_cpulist.
+
+ - Configure as many Rx/Tx queues in the VM as available. (See the idpf driver
+ documentation for the number of queues supported.) For example::
+
+ # ethtool -L <virt_interface> rx <max> tx <max>
+
+
+Support
+=======
+For general information, go to the Intel support website at:
+http://www.intel.com/support/
+
+If an issue is identified with the released source code on a supported kernel
+with a supported adapter, email the specific information related to the issue
+to intel-wired-lan@lists.osuosl.org.
+
+
+Trademarks
+==========
+Intel is a trademark or registered trademark of Intel Corporation or its
+subsidiaries in the United States and/or other countries.
+
+* Other names and brands may be claimed as the property of others.
diff --git a/Documentation/networking/filter.rst b/Documentation/networking/filter.rst
index f69da5074860..7d8c5380492f 100644
--- a/Documentation/networking/filter.rst
+++ b/Documentation/networking/filter.rst
@@ -650,8 +650,8 @@ before a conversion to the new layout is being done behind the scenes!
Currently, the classic BPF format is being used for JITing on most
32-bit architectures, whereas x86-64, aarch64, s390x, powerpc64,
-sparc64, arm32, riscv64, riscv32 perform JIT compilation from eBPF
-instruction set.
+sparc64, arm32, riscv64, riscv32, loongarch64 perform JIT compilation
+from eBPF instruction set.
Testing
-------
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index a66054d0763a..5bfa1837968c 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -745,6 +745,13 @@ tcp_comp_sack_nr - INTEGER
Default : 44
+tcp_backlog_ack_defer - BOOLEAN
+ If set, user thread processing socket backlog tries sending
+ one ACK for the whole queue. This helps to avoid potential
+ long latencies at end of a TCP socket syscall.
+
+ Default : true
+
tcp_slow_start_after_idle - BOOLEAN
If set, provide RFC2861 behavior and time out the congestion
window after an idle period. An idle period is defined at
diff --git a/Documentation/networking/xdp-rx-metadata.rst b/Documentation/networking/xdp-rx-metadata.rst
index 25ce72af81c2..205696780b78 100644
--- a/Documentation/networking/xdp-rx-metadata.rst
+++ b/Documentation/networking/xdp-rx-metadata.rst
@@ -105,6 +105,13 @@ bpf_tail_call
Adding programs that access metadata kfuncs to the ``BPF_MAP_TYPE_PROG_ARRAY``
is currently not supported.
+Supported Devices
+=================
+
+It is possible to query which kfunc the particular netdev implements via
+netlink. See ``xdp-rx-metadata-features`` attribute set in
+``Documentation/netlink/specs/netdev.yaml``.
+
Example
=======
diff --git a/MAINTAINERS b/MAINTAINERS
index dd5de540ec0b..80a46642a662 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3614,9 +3614,10 @@ F: Documentation/devicetree/bindings/iio/accel/bosch,bma400.yaml
F: drivers/iio/accel/bma400*
BPF JIT for ARM
-M: Shubham Bansal <illusionist.neo@gmail.com>
+M: Russell King <linux@armlinux.org.uk>
+M: Puranjay Mohan <puranjay12@gmail.com>
L: bpf@vger.kernel.org
-S: Odd Fixes
+S: Maintained
F: arch/arm/net/
BPF JIT for ARM64
@@ -4338,8 +4339,7 @@ F: drivers/net/ethernet/broadcom/bcmsysport.*
F: drivers/net/ethernet/broadcom/unimac.h
BROADCOM TG3 GIGABIT ETHERNET DRIVER
-M: Siva Reddy Kallam <siva.kallam@broadcom.com>
-M: Prashant Sreedharan <prashant@broadcom.com>
+M: Pavan Chebbi <pavan.chebbi@broadcom.com>
M: Michael Chan <mchan@broadcom.com>
L: netdev@vger.kernel.org
S: Supported
@@ -6352,6 +6352,17 @@ F: Documentation/networking/device_drivers/ethernet/freescale/dpaa2/switch-drive
F: drivers/net/ethernet/freescale/dpaa2/dpaa2-switch*
F: drivers/net/ethernet/freescale/dpaa2/dpsw*
+DPLL SUBSYSTEM
+M: Vadim Fedorenko <vadim.fedorenko@linux.dev>
+M: Arkadiusz Kubalewski <arkadiusz.kubalewski@intel.com>
+M: Jiri Pirko <jiri@resnulli.us>
+L: netdev@vger.kernel.org
+S: Supported
+F: Documentation/driver-api/dpll.rst
+F: drivers/dpll/*
+F: include/net/dpll.h
+F: include/uapi/linux/dpll.h
+
DRBD DRIVER
M: Philipp Reisner <philipp.reisner@linbit.com>
M: Lars Ellenberg <lars.ellenberg@linbit.com>
@@ -14347,9 +14358,11 @@ MIPS/LOONGSON1 ARCHITECTURE
M: Keguang Zhang <keguang.zhang@gmail.com>
L: linux-mips@vger.kernel.org
S: Maintained
+F: Documentation/devicetree/bindings/*/loongson,ls1*.yaml
F: arch/mips/include/asm/mach-loongson32/
F: arch/mips/loongson32/
F: drivers/*/*loongson1*
+F: drivers/net/ethernet/stmicro/stmmac/dwmac-loongson1.c
MIPS/LOONGSON2EF ARCHITECTURE
M: Jiaxun Yang <jiaxun.yang@flygoat.com>
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 6a1c9fca5260..1d672457d02f 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -2,6 +2,7 @@
/*
* Just-In-Time compiler for eBPF filters on 32bit ARM
*
+ * Copyright (c) 2023 Puranjay Mohan <puranjay12@gmail.com>
* Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com>
* Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com>
*/
@@ -15,6 +16,7 @@
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/if_vlan.h>
+#include <linux/math64.h>
#include <asm/cacheflush.h>
#include <asm/hwcap.h>
@@ -228,6 +230,44 @@ static u32 jit_mod32(u32 dividend, u32 divisor)
return dividend % divisor;
}
+static s32 jit_sdiv32(s32 dividend, s32 divisor)
+{
+ return dividend / divisor;
+}
+
+static s32 jit_smod32(s32 dividend, s32 divisor)
+{
+ return dividend % divisor;
+}
+
+/* Wrappers for 64-bit div/mod */
+static u64 jit_udiv64(u64 dividend, u64 divisor)
+{
+ return div64_u64(dividend, divisor);
+}
+
+static u64 jit_mod64(u64 dividend, u64 divisor)
+{
+ u64 rem;
+
+ div64_u64_rem(dividend, divisor, &rem);
+ return rem;
+}
+
+static s64 jit_sdiv64(s64 dividend, s64 divisor)
+{
+ return div64_s64(dividend, divisor);
+}
+
+static s64 jit_smod64(s64 dividend, s64 divisor)
+{
+ u64 q;
+
+ q = div64_s64(dividend, divisor);
+
+ return dividend - q * divisor;
+}
+
static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx)
{
inst |= (cond << 28);
@@ -333,6 +373,9 @@ static u32 arm_bpf_ldst_imm8(u32 op, u8 rt, u8 rn, s16 imm8)
#define ARM_LDRD_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRD_I, rt, rn, off)
#define ARM_LDRH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRH_I, rt, rn, off)
+#define ARM_LDRSH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRSH_I, rt, rn, off)
+#define ARM_LDRSB_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRSB_I, rt, rn, off)
+
#define ARM_STR_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STR_I, rt, rn, off)
#define ARM_STRB_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STRB_I, rt, rn, off)
#define ARM_STRD_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_STRD_I, rt, rn, off)
@@ -474,17 +517,18 @@ static inline int epilogue_offset(const struct jit_ctx *ctx)
return to - from - 2;
}
-static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
+static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op, u8 sign)
{
const int exclude_mask = BIT(ARM_R0) | BIT(ARM_R1);
const s8 *tmp = bpf2a32[TMP_REG_1];
+ u32 dst;
#if __LINUX_ARM_ARCH__ == 7
if (elf_hwcap & HWCAP_IDIVA) {
- if (op == BPF_DIV)
- emit(ARM_UDIV(rd, rm, rn), ctx);
- else {
- emit(ARM_UDIV(ARM_IP, rm, rn), ctx);
+ if (op == BPF_DIV) {
+ emit(sign ? ARM_SDIV(rd, rm, rn) : ARM_UDIV(rd, rm, rn), ctx);
+ } else {
+ emit(sign ? ARM_SDIV(ARM_IP, rm, rn) : ARM_UDIV(ARM_IP, rm, rn), ctx);
emit(ARM_MLS(rd, rn, ARM_IP, rm), ctx);
}
return;
@@ -512,8 +556,19 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
emit(ARM_PUSH(CALLER_MASK & ~exclude_mask), ctx);
/* Call appropriate function */
- emit_mov_i(ARM_IP, op == BPF_DIV ?
- (u32)jit_udiv32 : (u32)jit_mod32, ctx);
+ if (sign) {
+ if (op == BPF_DIV)
+ dst = (u32)jit_sdiv32;
+ else
+ dst = (u32)jit_smod32;
+ } else {
+ if (op == BPF_DIV)
+ dst = (u32)jit_udiv32;
+ else
+ dst = (u32)jit_mod32;
+ }
+
+ emit_mov_i(ARM_IP, dst, ctx);
emit_blx_r(ARM_IP, ctx);
/* Restore caller-saved registers from stack */
@@ -530,6 +585,78 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
emit(ARM_MOV_R(ARM_R0, tmp[1]), ctx);
}
+static inline void emit_udivmod64(const s8 *rd, const s8 *rm, const s8 *rn, struct jit_ctx *ctx,
+ u8 op, u8 sign)
+{
+ u32 dst;
+
+ /* Push caller-saved registers on stack */
+ emit(ARM_PUSH(CALLER_MASK), ctx);
+
+ /*
+ * As we are implementing 64-bit div/mod as function calls, We need to put the dividend in
+ * R0-R1 and the divisor in R2-R3. As we have already pushed these registers on the stack,
+ * we can recover them later after returning from the function call.
+ */
+ if (rm[1] != ARM_R0 || rn[1] != ARM_R2) {
+ /*
+ * Move Rm to {R1, R0} if it is not already there.
+ */
+ if (rm[1] != ARM_R0) {
+ if (rn[1] == ARM_R0)
+ emit(ARM_PUSH(BIT(ARM_R0) | BIT(ARM_R1)), ctx);
+ emit(ARM_MOV_R(ARM_R1, rm[0]), ctx);
+ emit(ARM_MOV_R(ARM_R0, rm[1]), ctx);
+ if (rn[1] == ARM_R0) {
+ emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx);
+ goto cont;
+ }
+ }
+ /*
+ * Move Rn to {R3, R2} if it is not already there.
+ */
+ if (rn[1] != ARM_R2) {
+ emit(ARM_MOV_R(ARM_R3, rn[0]), ctx);
+ emit(ARM_MOV_R(ARM_R2, rn[1]), ctx);
+ }
+ }
+
+cont:
+
+ /* Call appropriate function */
+ if (sign) {
+ if (op == BPF_DIV)
+ dst = (u32)jit_sdiv64;
+ else
+ dst = (u32)jit_smod64;
+ } else {
+ if (op == BPF_DIV)
+ dst = (u32)jit_udiv64;
+ else
+ dst = (u32)jit_mod64;
+ }
+
+ emit_mov_i(ARM_IP, dst, ctx);
+ emit_blx_r(ARM_IP, ctx);
+
+ /* Save return value */
+ if (rd[1] != ARM_R0) {
+ emit(ARM_MOV_R(rd[0], ARM_R1), ctx);
+ emit(ARM_MOV_R(rd[1], ARM_R0), ctx);
+ }
+
+ /* Recover {R3, R2} and {R1, R0} from stack if they are not Rd */
+ if (rd[1] != ARM_R0 && rd[1] != ARM_R2) {
+ emit(ARM_POP(CALLER_MASK), ctx);
+ } else if (rd[1] != ARM_R0) {
+ emit(ARM_POP(BIT(ARM_R0) | BIT(ARM_R1)), ctx);
+ emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx);
+ } else {
+ emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx);
+ emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx);
+ }
+}
+
/* Is the translated BPF register on stack? */
static bool is_stacked(s8 reg)
{
@@ -744,12 +871,16 @@ static inline void emit_a32_alu_r64(const bool is64, const s8 dst[],
}
/* dst = src (4 bytes)*/
-static inline void emit_a32_mov_r(const s8 dst, const s8 src,
+static inline void emit_a32_mov_r(const s8 dst, const s8 src, const u8 off,
struct jit_ctx *ctx) {
const s8 *tmp = bpf2a32[TMP_REG_1];
s8 rt;
rt = arm_bpf_get_reg32(src, tmp[0], ctx);
+ if (off && off != 32) {
+ emit(ARM_LSL_I(rt, rt, 32 - off), ctx);
+ emit(ARM_ASR_I(rt, rt, 32 - off), ctx);
+ }
arm_bpf_put_reg32(dst, rt, ctx);
}
@@ -758,15 +889,15 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[],
const s8 src[],
struct jit_ctx *ctx) {
if (!is64) {
- emit_a32_mov_r(dst_lo, src_lo, ctx);
+ emit_a32_mov_r(dst_lo, src_lo, 0, ctx);
if (!ctx->prog->aux->verifier_zext)
/* Zero out high 4 bytes */
emit_a32_mov_i(dst_hi, 0, ctx);
} else if (__LINUX_ARM_ARCH__ < 6 &&
ctx->cpu_architecture < CPU_ARCH_ARMv5TE) {
/* complete 8 byte move */
- emit_a32_mov_r(dst_lo, src_lo, ctx);
- emit_a32_mov_r(dst_hi, src_hi, ctx);
+ emit_a32_mov_r(dst_lo, src_lo, 0, ctx);
+ emit_a32_mov_r(dst_hi, src_hi, 0, ctx);
} else if (is_stacked(src_lo) && is_stacked(dst_lo)) {
const u8 *tmp = bpf2a32[TMP_REG_1];
@@ -782,6 +913,24 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[],
}
}
+/* dst = (signed)src */
+static inline void emit_a32_movsx_r64(const bool is64, const u8 off, const s8 dst[], const s8 src[],
+ struct jit_ctx *ctx) {
+ const s8 *tmp = bpf2a32[TMP_REG_1];
+ const s8 *rt;
+
+ rt = arm_bpf_get_reg64(dst, tmp, ctx);
+
+ emit_a32_mov_r(dst_lo, src_lo, off, ctx);
+ if (!is64) {
+ if (!ctx->prog->aux->verifier_zext)
+ /* Zero out high 4 bytes */
+ emit_a32_mov_i(dst_hi, 0, ctx);
+ } else {
+ emit(ARM_ASR_I(rt[0], rt[1], 31), ctx);
+ }
+}
+
/* Shift operations */
static inline void emit_a32_alu_i(const s8 dst, const u32 val,
struct jit_ctx *ctx, const u8 op) {
@@ -1026,6 +1175,24 @@ static bool is_ldst_imm(s16 off, const u8 size)
return -off_max <= off && off <= off_max;
}
+static bool is_ldst_imm8(s16 off, const u8 size)
+{
+ s16 off_max = 0;
+
+ switch (size) {
+ case BPF_B:
+ off_max = 0xff;
+ break;
+ case BPF_W:
+ off_max = 0xfff;
+ break;
+ case BPF_H:
+ off_max = 0xff;
+ break;
+ }
+ return -off_max <= off && off <= off_max;
+}
+
/* *(size *)(dst + off) = src */
static inline void emit_str_r(const s8 dst, const s8 src[],
s16 off, struct jit_ctx *ctx, const u8 sz){
@@ -1105,6 +1272,50 @@ static inline void emit_ldx_r(const s8 dst[], const s8 src,
arm_bpf_put_reg64(dst, rd, ctx);
}
+/* dst = *(signed size*)(src + off) */
+static inline void emit_ldsx_r(const s8 dst[], const s8 src,
+ s16 off, struct jit_ctx *ctx, const u8 sz){
+ const s8 *tmp = bpf2a32[TMP_REG_1];
+ const s8 *rd = is_stacked(dst_lo) ? tmp : dst;
+ s8 rm = src;
+ int add_off;
+
+ if (!is_ldst_imm8(off, sz)) {
+ /*
+ * offset does not fit in the load/store immediate,
+ * construct an ADD instruction to apply the offset.
+ */
+ add_off = imm8m(off);
+ if (add_off > 0) {
+ emit(ARM_ADD_I(tmp[0], src, add_off), ctx);
+ rm = tmp[0];
+ } else {
+ emit_a32_mov_i(tmp[0], off, ctx);
+ emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx);
+ rm = tmp[0];
+ }
+ off = 0;
+ }
+
+ switch (sz) {
+ case BPF_B:
+ /* Load a Byte with sign extension*/
+ emit(ARM_LDRSB_I(rd[1], rm, off), ctx);
+ break;
+ case BPF_H:
+ /* Load a HalfWord with sign extension*/
+ emit(ARM_LDRSH_I(rd[1], rm, off), ctx);
+ break;
+ case BPF_W:
+ /* Load a Word*/
+ emit(ARM_LDR_I(rd[1], rm, off), ctx);
+ break;
+ }
+ /* Carry the sign extension to upper 32 bits */
+ emit(ARM_ASR_I(rd[0], rd[1], 31), ctx);
+ arm_bpf_put_reg64(dst, rd, ctx);
+}
+
/* Arithmatic Operation */
static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm,
const u8 rn, struct jit_ctx *ctx, u8 op,
@@ -1385,7 +1596,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
emit_a32_mov_i(dst_hi, 0, ctx);
break;
}
- emit_a32_mov_r64(is64, dst, src, ctx);
+ if (insn->off)
+ emit_a32_movsx_r64(is64, insn->off, dst, src, ctx);
+ else
+ emit_a32_mov_r64(is64, dst, src, ctx);
break;
case BPF_K:
/* Sign-extend immediate value to destination reg */
@@ -1461,7 +1675,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
rt = src_lo;
break;
}
- emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code));
+ emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code), off);
arm_bpf_put_reg32(dst_lo, rd_lo, ctx);
if (!ctx->prog->aux->verifier_zext)
emit_a32_mov_i(dst_hi, 0, ctx);
@@ -1470,7 +1684,19 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
case BPF_ALU64 | BPF_DIV | BPF_X:
case BPF_ALU64 | BPF_MOD | BPF_K:
case BPF_ALU64 | BPF_MOD | BPF_X:
- goto notyet;
+ rd = arm_bpf_get_reg64(dst, tmp2, ctx);
+ switch (BPF_SRC(code)) {
+ case BPF_X:
+ rs = arm_bpf_get_reg64(src, tmp, ctx);
+ break;
+ case BPF_K:
+ rs = tmp;
+ emit_a32_mov_se_i64(is64, rs, imm, ctx);
+ break;
+ }
+ emit_udivmod64(rd, rd, rs, ctx, BPF_OP(code), off);
+ arm_bpf_put_reg64(dst, rd, ctx);
+ break;
/* dst = dst << imm */
/* dst = dst >> imm */
/* dst = dst >> imm (signed) */
@@ -1545,10 +1771,12 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
break;
/* dst = htole(dst) */
/* dst = htobe(dst) */
- case BPF_ALU | BPF_END | BPF_FROM_LE:
- case BPF_ALU | BPF_END | BPF_FROM_BE:
+ case BPF_ALU | BPF_END | BPF_FROM_LE: /* also BPF_TO_LE */
+ case BPF_ALU | BPF_END | BPF_FROM_BE: /* also BPF_TO_BE */
+ /* dst = bswap(dst) */
+ case BPF_ALU64 | BPF_END | BPF_FROM_LE: /* also BPF_TO_LE */
rd = arm_bpf_get_reg64(dst, tmp, ctx);
- if (BPF_SRC(code) == BPF_FROM_LE)
+ if (BPF_SRC(code) == BPF_FROM_LE && BPF_CLASS(code) != BPF_ALU64)
goto emit_bswap_uxt;
switch (imm) {
case 16:
@@ -1603,8 +1831,15 @@ exit:
case BPF_LDX | BPF_MEM | BPF_H:
case BPF_LDX | BPF_MEM | BPF_B:
case BPF_LDX | BPF_MEM | BPF_DW:
+ /* LDSX: dst = *(signed size *)(src + off) */
+ case BPF_LDX | BPF_MEMSX | BPF_B:
+ case BPF_LDX | BPF_MEMSX | BPF_H:
+ case BPF_LDX | BPF_MEMSX | BPF_W:
rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx);
- emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code));
+ if (BPF_MODE(insn->code) == BPF_MEMSX)
+ emit_ldsx_r(dst, rn, off, ctx, BPF_SIZE(code));
+ else
+ emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code));
break;
/* speculation barrier */
case BPF_ST | BPF_NOSPEC:
@@ -1761,10 +1996,15 @@ go_jmp:
break;
/* JMP OFF */
case BPF_JMP | BPF_JA:
+ case BPF_JMP32 | BPF_JA:
{
- if (off == 0)
+ if (BPF_CLASS(code) == BPF_JMP32 && imm != 0)
+ jmp_offset = bpf2a32_offset(i + imm, i, ctx);
+ else if (BPF_CLASS(code) == BPF_JMP && off != 0)
+ jmp_offset = bpf2a32_offset(i + off, i, ctx);
+ else
break;
- jmp_offset = bpf2a32_offset(i+off, i, ctx);
+
check_imm24(jmp_offset);
emit(ARM_B(jmp_offset), ctx);
break;
diff --git a/arch/arm/net/bpf_jit_32.h b/arch/arm/net/bpf_jit_32.h
index e0b593a1498d..438f0e1f91a0 100644
--- a/arch/arm/net/bpf_jit_32.h
+++ b/arch/arm/net/bpf_jit_32.h
@@ -79,9 +79,11 @@
#define ARM_INST_LDST__IMM12 0x00000fff
#define ARM_INST_LDRB_I 0x05500000
#define ARM_INST_LDRB_R 0x07d00000
+#define ARM_INST_LDRSB_I 0x015000d0
#define ARM_INST_LDRD_I 0x014000d0
#define ARM_INST_LDRH_I 0x015000b0
#define ARM_INST_LDRH_R 0x019000b0
+#define ARM_INST_LDRSH_I 0x015000f0
#define ARM_INST_LDR_I 0x05100000
#define ARM_INST_LDR_R 0x07900000
@@ -137,6 +139,7 @@
#define ARM_INST_TST_I 0x03100000
#define ARM_INST_UDIV 0x0730f010
+#define ARM_INST_SDIV 0x0710f010
#define ARM_INST_UMULL 0x00800090
@@ -265,6 +268,7 @@
#define ARM_TST_I(rn, imm) _AL3_I(ARM_INST_TST, 0, rn, imm)
#define ARM_UDIV(rd, rn, rm) (ARM_INST_UDIV | (rd) << 16 | (rn) | (rm) << 8)
+#define ARM_SDIV(rd, rn, rm) (ARM_INST_SDIV | (rd) << 16 | (rn) | (rm) << 8)
#define ARM_UMULL(rd_lo, rd_hi, rn, rm) (ARM_INST_UMULL | (rd_hi) << 16 \
| (rd_lo) << 12 | (rm) << 8 | rn)
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 150d1c6543f7..7d4af64e3982 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -288,7 +288,7 @@ static bool is_lsi_offset(int offset, int scale)
static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
{
const struct bpf_prog *prog = ctx->prog;
- const bool is_main_prog = prog->aux->func_idx == 0;
+ const bool is_main_prog = !bpf_is_subprog(prog);
const u8 r6 = bpf2a64[BPF_REG_6];
const u8 r7 = bpf2a64[BPF_REG_7];
const u8 r8 = bpf2a64[BPF_REG_8];
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index e507692e51e7..bf06b7283f0c 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -556,7 +556,7 @@ static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT6_PCREL_RILC(0xc0040000, 0, jit->prologue_plt);
jit->prologue_plt_ret = jit->prg;
- if (fp->aux->func_idx == 0) {
+ if (!bpf_is_subprog(fp)) {
/* Initialize the tail call counter in the main program. */
/* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */
_EMIT6(0xd703f000 | STK_OFF_TCCNT, 0xf000 | STK_OFF_TCCNT);
@@ -670,15 +670,18 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
static int get_probe_mem_regno(const u8 *insn)
{
/*
- * insn must point to llgc, llgh, llgf or lg, which have destination
- * register at the same position.
+ * insn must point to llgc, llgh, llgf, lg, lgb, lgh or lgf, which have
+ * destination register at the same position.
*/
- if (insn[0] != 0xe3) /* common llgc, llgh, llgf and lg prefix */
+ if (insn[0] != 0xe3) /* common prefix */
return -1;
if (insn[5] != 0x90 && /* llgc */
insn[5] != 0x91 && /* llgh */
insn[5] != 0x16 && /* llgf */
- insn[5] != 0x04) /* lg */
+ insn[5] != 0x04 && /* lg */
+ insn[5] != 0x77 && /* lgb */
+ insn[5] != 0x15 && /* lgh */
+ insn[5] != 0x14) /* lgf */
return -1;
return insn[1] >> 4;
}
@@ -776,6 +779,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
int i, bool extra_pass, u32 stack_depth)
{
struct bpf_insn *insn = &fp->insnsi[i];
+ s16 branch_oc_off = insn->off;
u32 dst_reg = insn->dst_reg;
u32 src_reg = insn->src_reg;
int last, insn_count = 1;
@@ -788,22 +792,55 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
int err;
if (BPF_CLASS(insn->code) == BPF_LDX &&
- BPF_MODE(insn->code) == BPF_PROBE_MEM)
+ (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
+ BPF_MODE(insn->code) == BPF_PROBE_MEMSX))
probe_prg = jit->prg;
switch (insn->code) {
/*
* BPF_MOV
*/
- case BPF_ALU | BPF_MOV | BPF_X: /* dst = (u32) src */
- /* llgfr %dst,%src */
- EMIT4(0xb9160000, dst_reg, src_reg);
- if (insn_is_zext(&insn[1]))
- insn_count = 2;
+ case BPF_ALU | BPF_MOV | BPF_X:
+ switch (insn->off) {
+ case 0: /* DST = (u32) SRC */
+ /* llgfr %dst,%src */
+ EMIT4(0xb9160000, dst_reg, src_reg);
+ if (insn_is_zext(&insn[1]))
+ insn_count = 2;
+ break;
+ case 8: /* DST = (u32)(s8) SRC */
+ /* lbr %dst,%src */
+ EMIT4(0xb9260000, dst_reg, src_reg);
+ /* llgfr %dst,%dst */
+ EMIT4(0xb9160000, dst_reg, dst_reg);
+ break;
+ case 16: /* DST = (u32)(s16) SRC */
+ /* lhr %dst,%src */
+ EMIT4(0xb9270000, dst_reg, src_reg);
+ /* llgfr %dst,%dst */
+ EMIT4(0xb9160000, dst_reg, dst_reg);
+ break;
+ }
break;
- case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
- /* lgr %dst,%src */
- EMIT4(0xb9040000, dst_reg, src_reg);
+ case BPF_ALU64 | BPF_MOV | BPF_X:
+ switch (insn->off) {
+ case 0: /* DST = SRC */
+ /* lgr %dst,%src */
+ EMIT4(0xb9040000, dst_reg, src_reg);
+ break;
+ case 8: /* DST = (s8) SRC */
+ /* lgbr %dst,%src */
+ EMIT4(0xb9060000, dst_reg, src_reg);
+ break;
+ case 16: /* DST = (s16) SRC */
+ /* lghr %dst,%src */
+ EMIT4(0xb9070000, dst_reg, src_reg);
+ break;
+ case 32: /* DST = (s32) SRC */
+ /* lgfr %dst,%src */
+ EMIT4(0xb9140000, dst_reg, src_reg);
+ break;
+ }
break;
case BPF_ALU | BPF_MOV | BPF_K: /* dst = (u32) imm */
/* llilf %dst,imm */
@@ -912,66 +949,115 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
/*
* BPF_DIV / BPF_MOD
*/
- case BPF_ALU | BPF_DIV | BPF_X: /* dst = (u32) dst / (u32) src */
- case BPF_ALU | BPF_MOD | BPF_X: /* dst = (u32) dst % (u32) src */
+ case BPF_ALU | BPF_DIV | BPF_X:
+ case BPF_ALU | BPF_MOD | BPF_X:
{
int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
- /* lhi %w0,0 */
- EMIT4_IMM(0xa7080000, REG_W0, 0);
- /* lr %w1,%dst */
- EMIT2(0x1800, REG_W1, dst_reg);
- /* dlr %w0,%src */
- EMIT4(0xb9970000, REG_W0, src_reg);
+ switch (off) {
+ case 0: /* dst = (u32) dst {/,%} (u32) src */
+ /* xr %w0,%w0 */
+ EMIT2(0x1700, REG_W0, REG_W0);
+ /* lr %w1,%dst */
+ EMIT2(0x1800, REG_W1, dst_reg);
+ /* dlr %w0,%src */
+ EMIT4(0xb9970000, REG_W0, src_reg);
+ break;
+ case 1: /* dst = (u32) ((s32) dst {/,%} (s32) src) */
+ /* lgfr %r1,%dst */
+ EMIT4(0xb9140000, REG_W1, dst_reg);
+ /* dsgfr %r0,%src */
+ EMIT4(0xb91d0000, REG_W0, src_reg);
+ break;
+ }
/* llgfr %dst,%rc */
EMIT4(0xb9160000, dst_reg, rc_reg);
if (insn_is_zext(&insn[1]))
insn_count = 2;
break;
}
- case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / src */
- case BPF_ALU64 | BPF_MOD | BPF_X: /* dst = dst % src */
+ case BPF_ALU64 | BPF_DIV | BPF_X:
+ case BPF_ALU64 | BPF_MOD | BPF_X:
{
int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
- /* lghi %w0,0 */
- EMIT4_IMM(0xa7090000, REG_W0, 0);
- /* lgr %w1,%dst */
- EMIT4(0xb9040000, REG_W1, dst_reg);
- /* dlgr %w0,%dst */
- EMIT4(0xb9870000, REG_W0, src_reg);
+ switch (off) {
+ case 0: /* dst = dst {/,%} src */
+ /* lghi %w0,0 */
+ EMIT4_IMM(0xa7090000, REG_W0, 0);
+ /* lgr %w1,%dst */
+ EMIT4(0xb9040000, REG_W1, dst_reg);
+ /* dlgr %w0,%src */
+ EMIT4(0xb9870000, REG_W0, src_reg);
+ break;
+ case 1: /* dst = (s64) dst {/,%} (s64) src */
+ /* lgr %w1,%dst */
+ EMIT4(0xb9040000, REG_W1, dst_reg);
+ /* dsgr %w0,%src */
+ EMIT4(0xb90d0000, REG_W0, src_reg);
+ break;
+ }
/* lgr %dst,%rc */
EMIT4(0xb9040000, dst_reg, rc_reg);
break;
}
- case BPF_ALU | BPF_DIV | BPF_K: /* dst = (u32) dst / (u32) imm */
- case BPF_ALU | BPF_MOD | BPF_K: /* dst = (u32) dst % (u32) imm */
+ case BPF_ALU | BPF_DIV | BPF_K:
+ case BPF_ALU | BPF_MOD | BPF_K:
{
int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
if (imm == 1) {
if (BPF_OP(insn->code) == BPF_MOD)
- /* lhgi %dst,0 */
+ /* lghi %dst,0 */
EMIT4_IMM(0xa7090000, dst_reg, 0);
else
EMIT_ZERO(dst_reg);
break;
}
- /* lhi %w0,0 */
- EMIT4_IMM(0xa7080000, REG_W0, 0);
- /* lr %w1,%dst */
- EMIT2(0x1800, REG_W1, dst_reg);
if (!is_first_pass(jit) && can_use_ldisp_for_lit32(jit)) {
- /* dl %w0,<d(imm)>(%l) */
- EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0, REG_L,
- EMIT_CONST_U32(imm));
+ switch (off) {
+ case 0: /* dst = (u32) dst {/,%} (u32) imm */
+ /* xr %w0,%w0 */
+ EMIT2(0x1700, REG_W0, REG_W0);
+ /* lr %w1,%dst */
+ EMIT2(0x1800, REG_W1, dst_reg);
+ /* dl %w0,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0,
+ REG_L, EMIT_CONST_U32(imm));
+ break;
+ case 1: /* dst = (s32) dst {/,%} (s32) imm */
+ /* lgfr %r1,%dst */
+ EMIT4(0xb9140000, REG_W1, dst_reg);
+ /* dsgf %r0,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x001d, REG_W0, REG_0,
+ REG_L, EMIT_CONST_U32(imm));
+ break;
+ }
} else {
- /* lgfrl %dst,imm */
- EMIT6_PCREL_RILB(0xc40c0000, dst_reg,
- _EMIT_CONST_U32(imm));
- jit->seen |= SEEN_LITERAL;
- /* dlr %w0,%dst */
- EMIT4(0xb9970000, REG_W0, dst_reg);
+ switch (off) {
+ case 0: /* dst = (u32) dst {/,%} (u32) imm */
+ /* xr %w0,%w0 */
+ EMIT2(0x1700, REG_W0, REG_W0);
+ /* lr %w1,%dst */
+ EMIT2(0x1800, REG_W1, dst_reg);
+ /* lrl %dst,imm */
+ EMIT6_PCREL_RILB(0xc40d0000, dst_reg,
+ _EMIT_CONST_U32(imm));
+ jit->seen |= SEEN_LITERAL;
+ /* dlr %w0,%dst */
+ EMIT4(0xb9970000, REG_W0, dst_reg);
+ break;
+ case 1: /* dst = (s32) dst {/,%} (s32) imm */
+ /* lgfr %w1,%dst */
+ EMIT4(0xb9140000, REG_W1, dst_reg);
+ /* lgfrl %dst,imm */
+ EMIT6_PCREL_RILB(0xc40c0000, dst_reg,
+ _EMIT_CONST_U32(imm));
+ jit->seen |= SEEN_LITERAL;
+ /* dsgr %w0,%dst */
+ EMIT4(0xb90d0000, REG_W0, dst_reg);
+ break;
+ }
}
/* llgfr %dst,%rc */
EMIT4(0xb9160000, dst_reg, rc_reg);
@@ -979,8 +1065,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
insn_count = 2;
break;
}
- case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / imm */
- case BPF_ALU64 | BPF_MOD | BPF_K: /* dst = dst % imm */
+ case BPF_ALU64 | BPF_DIV | BPF_K:
+ case BPF_ALU64 | BPF_MOD | BPF_K:
{
int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
@@ -990,21 +1076,50 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT4_IMM(0xa7090000, dst_reg, 0);
break;
}
- /* lghi %w0,0 */
- EMIT4_IMM(0xa7090000, REG_W0, 0);
- /* lgr %w1,%dst */
- EMIT4(0xb9040000, REG_W1, dst_reg);
if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
- /* dlg %w0,<d(imm)>(%l) */
- EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L,
- EMIT_CONST_U64(imm));
+ switch (off) {
+ case 0: /* dst = dst {/,%} imm */
+ /* lghi %w0,0 */
+ EMIT4_IMM(0xa7090000, REG_W0, 0);
+ /* lgr %w1,%dst */
+ EMIT4(0xb9040000, REG_W1, dst_reg);
+ /* dlg %w0,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0,
+ REG_L, EMIT_CONST_U64(imm));
+ break;
+ case 1: /* dst = (s64) dst {/,%} (s64) imm */
+ /* lgr %w1,%dst */
+ EMIT4(0xb9040000, REG_W1, dst_reg);
+ /* dsg %w0,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x000d, REG_W0, REG_0,
+ REG_L, EMIT_CONST_U64(imm));
+ break;
+ }
} else {
- /* lgrl %dst,imm */
- EMIT6_PCREL_RILB(0xc4080000, dst_reg,
- _EMIT_CONST_U64(imm));
- jit->seen |= SEEN_LITERAL;
- /* dlgr %w0,%dst */
- EMIT4(0xb9870000, REG_W0, dst_reg);
+ switch (off) {
+ case 0: /* dst = dst {/,%} imm */
+ /* lghi %w0,0 */
+ EMIT4_IMM(0xa7090000, REG_W0, 0);
+ /* lgr %w1,%dst */
+ EMIT4(0xb9040000, REG_W1, dst_reg);
+ /* lgrl %dst,imm */
+ EMIT6_PCREL_RILB(0xc4080000, dst_reg,
+ _EMIT_CONST_U64(imm));
+ jit->seen |= SEEN_LITERAL;
+ /* dlgr %w0,%dst */
+ EMIT4(0xb9870000, REG_W0, dst_reg);
+ break;
+ case 1: /* dst = (s64) dst {/,%} (s64) imm */
+ /* lgr %w1,%dst */
+ EMIT4(0xb9040000, REG_W1, dst_reg);
+ /* lgrl %dst,imm */
+ EMIT6_PCREL_RILB(0xc4080000, dst_reg,
+ _EMIT_CONST_U64(imm));
+ jit->seen |= SEEN_LITERAL;
+ /* dsgr %w0,%dst */
+ EMIT4(0xb90d0000, REG_W0, dst_reg);
+ break;
+ }
}
/* lgr %dst,%rc */
EMIT4(0xb9040000, dst_reg, rc_reg);
@@ -1217,6 +1332,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
}
break;
case BPF_ALU | BPF_END | BPF_FROM_LE:
+ case BPF_ALU64 | BPF_END | BPF_FROM_LE:
switch (imm) {
case 16: /* dst = (u16) cpu_to_le16(dst) */
/* lrvr %dst,%dst */
@@ -1374,6 +1490,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
if (insn_is_zext(&insn[1]))
insn_count = 2;
break;
+ case BPF_LDX | BPF_MEMSX | BPF_B: /* dst = *(s8 *)(ul) (src + off) */
+ case BPF_LDX | BPF_PROBE_MEMSX | BPF_B:
+ /* lgb %dst,0(off,%src) */
+ EMIT6_DISP_LH(0xe3000000, 0x0077, dst_reg, src_reg, REG_0, off);
+ jit->seen |= SEEN_MEM;
+ break;
case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */
case BPF_LDX | BPF_PROBE_MEM | BPF_H:
/* llgh %dst,0(off,%src) */
@@ -1382,6 +1504,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
if (insn_is_zext(&insn[1]))
insn_count = 2;
break;
+ case BPF_LDX | BPF_MEMSX | BPF_H: /* dst = *(s16 *)(ul) (src + off) */
+ case BPF_LDX | BPF_PROBE_MEMSX | BPF_H:
+ /* lgh %dst,0(off,%src) */
+ EMIT6_DISP_LH(0xe3000000, 0x0015, dst_reg, src_reg, REG_0, off);
+ jit->seen |= SEEN_MEM;
+ break;
case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */
case BPF_LDX | BPF_PROBE_MEM | BPF_W:
/* llgf %dst,off(%src) */
@@ -1390,6 +1518,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
if (insn_is_zext(&insn[1]))
insn_count = 2;
break;
+ case BPF_LDX | BPF_MEMSX | BPF_W: /* dst = *(s32 *)(ul) (src + off) */
+ case BPF_LDX | BPF_PROBE_MEMSX | BPF_W:
+ /* lgf %dst,off(%src) */
+ jit->seen |= SEEN_MEM;
+ EMIT6_DISP_LH(0xe3000000, 0x0014, dst_reg, src_reg, REG_0, off);
+ break;
case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
/* lg %dst,0(off,%src) */
@@ -1570,6 +1704,9 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
* instruction itself (loop) and for BPF with offset 0 we
* branch to the instruction behind the branch.
*/
+ case BPF_JMP32 | BPF_JA: /* if (true) */
+ branch_oc_off = imm;
+ fallthrough;
case BPF_JMP | BPF_JA: /* if (true) */
mask = 0xf000; /* j */
goto branch_oc;
@@ -1738,14 +1875,16 @@ branch_xu:
break;
branch_oc:
if (!is_first_pass(jit) &&
- can_use_rel(jit, addrs[i + off + 1])) {
+ can_use_rel(jit, addrs[i + branch_oc_off + 1])) {
/* brc mask,off */
EMIT4_PCREL_RIC(0xa7040000,
- mask >> 12, addrs[i + off + 1]);
+ mask >> 12,
+ addrs[i + branch_oc_off + 1]);
} else {
/* brcl mask,off */
EMIT6_PCREL_RILC(0xc0040000,
- mask >> 12, addrs[i + off + 1]);
+ mask >> 12,
+ addrs[i + branch_oc_off + 1]);
}
break;
}
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index a5930042139d..8c10d9abc239 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -16,6 +16,9 @@
#include <asm/set_memory.h>
#include <asm/nospec-branch.h>
#include <asm/text-patching.h>
+#include <asm/unwind.h>
+
+static bool all_callee_regs_used[4] = {true, true, true, true};
static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
{
@@ -255,6 +258,14 @@ struct jit_context {
/* Number of bytes that will be skipped on tailcall */
#define X86_TAIL_CALL_OFFSET (11 + ENDBR_INSN_SIZE)
+static void push_r12(u8 **pprog)
+{
+ u8 *prog = *pprog;
+
+ EMIT2(0x41, 0x54); /* push r12 */
+ *pprog = prog;
+}
+
static void push_callee_regs(u8 **pprog, bool *callee_regs_used)
{
u8 *prog = *pprog;
@@ -270,6 +281,14 @@ static void push_callee_regs(u8 **pprog, bool *callee_regs_used)
*pprog = prog;
}
+static void pop_r12(u8 **pprog)
+{
+ u8 *prog = *pprog;
+
+ EMIT2(0x41, 0x5C); /* pop r12 */
+ *pprog = prog;
+}
+
static void pop_callee_regs(u8 **pprog, bool *callee_regs_used)
{
u8 *prog = *pprog;
@@ -291,7 +310,8 @@ static void pop_callee_regs(u8 **pprog, bool *callee_regs_used)
* while jumping to another program
*/
static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
- bool tail_call_reachable, bool is_subprog)
+ bool tail_call_reachable, bool is_subprog,
+ bool is_exception_cb)
{
u8 *prog = *pprog;
@@ -303,12 +323,30 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
prog += X86_PATCH_SIZE;
if (!ebpf_from_cbpf) {
if (tail_call_reachable && !is_subprog)
+ /* When it's the entry of the whole tailcall context,
+ * zeroing rax means initialising tail_call_cnt.
+ */
EMIT2(0x31, 0xC0); /* xor eax, eax */
else
+ /* Keep the same instruction layout. */
EMIT2(0x66, 0x90); /* nop2 */
}
- EMIT1(0x55); /* push rbp */
- EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
+ /* Exception callback receives FP as third parameter */
+ if (is_exception_cb) {
+ EMIT3(0x48, 0x89, 0xF4); /* mov rsp, rsi */
+ EMIT3(0x48, 0x89, 0xD5); /* mov rbp, rdx */
+ /* The main frame must have exception_boundary as true, so we
+ * first restore those callee-saved regs from stack, before
+ * reusing the stack frame.
+ */
+ pop_callee_regs(&prog, all_callee_regs_used);
+ pop_r12(&prog);
+ /* Reset the stack frame. */
+ EMIT3(0x48, 0x89, 0xEC); /* mov rsp, rbp */
+ } else {
+ EMIT1(0x55); /* push rbp */
+ EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
+ }
/* X86_TAIL_CALL_OFFSET is here */
EMIT_ENDBR();
@@ -467,7 +505,8 @@ static void emit_return(u8 **pprog, u8 *ip)
* goto *(prog->bpf_func + prologue_size);
* out:
*/
-static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
+static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog,
+ u8 **pprog, bool *callee_regs_used,
u32 stack_depth, u8 *ip,
struct jit_context *ctx)
{
@@ -517,7 +556,12 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
offset = ctx->tail_call_indirect_label - (prog + 2 - start);
EMIT2(X86_JE, offset); /* je out */
- pop_callee_regs(&prog, callee_regs_used);
+ if (bpf_prog->aux->exception_boundary) {
+ pop_callee_regs(&prog, all_callee_regs_used);
+ pop_r12(&prog);
+ } else {
+ pop_callee_regs(&prog, callee_regs_used);
+ }
EMIT1(0x58); /* pop rax */
if (stack_depth)
@@ -541,7 +585,8 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
*pprog = prog;
}
-static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
+static void emit_bpf_tail_call_direct(struct bpf_prog *bpf_prog,
+ struct bpf_jit_poke_descriptor *poke,
u8 **pprog, u8 *ip,
bool *callee_regs_used, u32 stack_depth,
struct jit_context *ctx)
@@ -570,7 +615,13 @@ static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
emit_jump(&prog, (u8 *)poke->tailcall_target + X86_PATCH_SIZE,
poke->tailcall_bypass);
- pop_callee_regs(&prog, callee_regs_used);
+ if (bpf_prog->aux->exception_boundary) {
+ pop_callee_regs(&prog, all_callee_regs_used);
+ pop_r12(&prog);
+ } else {
+ pop_callee_regs(&prog, callee_regs_used);
+ }
+
EMIT1(0x58); /* pop rax */
if (stack_depth)
EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8));
@@ -1018,6 +1069,10 @@ static void emit_shiftx(u8 **pprog, u32 dst_reg, u8 src_reg, bool is64, u8 op)
#define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp)))
+/* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */
+#define RESTORE_TAIL_CALL_CNT(stack) \
+ EMIT3_off32(0x48, 0x8B, 0x85, -round_up(stack, 8) - 8)
+
static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image,
int oldproglen, struct jit_context *ctx, bool jmp_padding)
{
@@ -1041,8 +1096,20 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
emit_prologue(&prog, bpf_prog->aux->stack_depth,
bpf_prog_was_classic(bpf_prog), tail_call_reachable,
- bpf_prog->aux->func_idx != 0);
- push_callee_regs(&prog, callee_regs_used);
+ bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb);
+ /* Exception callback will clobber callee regs for its own use, and
+ * restore the original callee regs from main prog's stack frame.
+ */
+ if (bpf_prog->aux->exception_boundary) {
+ /* We also need to save r12, which is not mapped to any BPF
+ * register, as we throw after entry into the kernel, which may
+ * overwrite r12.
+ */
+ push_r12(&prog);
+ push_callee_regs(&prog, all_callee_regs_used);
+ } else {
+ push_callee_regs(&prog, callee_regs_used);
+ }
ilen = prog - temp;
if (rw_image)
@@ -1623,9 +1690,7 @@ st: if (is_imm8(insn->off))
func = (u8 *) __bpf_call_base + imm32;
if (tail_call_reachable) {
- /* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */
- EMIT3_off32(0x48, 0x8B, 0x85,
- -round_up(bpf_prog->aux->stack_depth, 8) - 8);
+ RESTORE_TAIL_CALL_CNT(bpf_prog->aux->stack_depth);
if (!imm32)
return -EINVAL;
offs = 7 + x86_call_depth_emit_accounting(&prog, func);
@@ -1641,13 +1706,15 @@ st: if (is_imm8(insn->off))
case BPF_JMP | BPF_TAIL_CALL:
if (imm32)
- emit_bpf_tail_call_direct(&bpf_prog->aux->poke_tab[imm32 - 1],
+ emit_bpf_tail_call_direct(bpf_prog,
+ &bpf_prog->aux->poke_tab[imm32 - 1],
&prog, image + addrs[i - 1],
callee_regs_used,
bpf_prog->aux->stack_depth,
ctx);
else
- emit_bpf_tail_call_indirect(&prog,
+ emit_bpf_tail_call_indirect(bpf_prog,
+ &prog,
callee_regs_used,
bpf_prog->aux->stack_depth,
image + addrs[i - 1],
@@ -1900,7 +1967,12 @@ emit_jmp:
seen_exit = true;
/* Update cleanup_addr */
ctx->cleanup_addr = proglen;
- pop_callee_regs(&prog, callee_regs_used);
+ if (bpf_prog->aux->exception_boundary) {
+ pop_callee_regs(&prog, all_callee_regs_used);
+ pop_r12(&prog);
+ } else {
+ pop_callee_regs(&prog, callee_regs_used);
+ }
EMIT1(0xC9); /* leave */
emit_return(&prog, image + addrs[i - 1] + (prog - temp));
break;
@@ -2400,6 +2472,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
* [ ... ]
* [ stack_arg2 ]
* RBP - arg_stack_off [ stack_arg1 ]
+ * RSP [ tail_call_cnt ] BPF_TRAMP_F_TAIL_CALL_CTX
*/
/* room for return value of orig_call or fentry prog */
@@ -2464,6 +2537,8 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
else
/* sub rsp, stack_size */
EMIT4(0x48, 0x83, 0xEC, stack_size);
+ if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
+ EMIT1(0x50); /* push rax */
/* mov QWORD PTR [rbp - rbx_off], rbx */
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_6, -rbx_off);
@@ -2516,9 +2591,15 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
restore_regs(m, &prog, regs_off);
save_args(m, &prog, arg_stack_off, true);
+ if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
+ /* Before calling the original function, restore the
+ * tail_call_cnt from stack to rax.
+ */
+ RESTORE_TAIL_CALL_CNT(stack_size);
+
if (flags & BPF_TRAMP_F_ORIG_STACK) {
- emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, 8);
- EMIT2(0xff, 0xd0); /* call *rax */
+ emit_ldx(&prog, BPF_DW, BPF_REG_6, BPF_REG_FP, 8);
+ EMIT2(0xff, 0xd3); /* call *rbx */
} else {
/* call original function */
if (emit_rsb_call(&prog, orig_call, prog)) {
@@ -2569,7 +2650,12 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
ret = -EINVAL;
goto cleanup;
}
- }
+ } else if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
+ /* Before running the original function, restore the
+ * tail_call_cnt from stack to rax.
+ */
+ RESTORE_TAIL_CALL_CNT(stack_size);
+
/* restore return value of orig_call or fentry prog back into RAX */
if (save_ret)
emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
@@ -2913,3 +2999,29 @@ void bpf_jit_free(struct bpf_prog *prog)
bpf_prog_unlock_free(prog);
}
+
+bool bpf_jit_supports_exceptions(void)
+{
+ /* We unwind through both kernel frames (starting from within bpf_throw
+ * call) and BPF frames. Therefore we require ORC unwinder to be enabled
+ * to walk kernel frames and reach BPF frames in the stack trace.
+ */
+ return IS_ENABLED(CONFIG_UNWINDER_ORC);
+}
+
+void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie)
+{
+#if defined(CONFIG_UNWINDER_ORC)
+ struct unwind_state state;
+ unsigned long addr;
+
+ for (unwind_start(&state, current, NULL, NULL); !unwind_done(&state);
+ unwind_next_frame(&state)) {
+ addr = unwind_get_return_address(&state);
+ if (!addr || !consume_fn(cookie, (u64)addr, (u64)state.sp, (u64)state.bp))
+ break;
+ }
+ return;
+#endif
+ WARN(1, "verification of programs using bpf_throw should have failed\n");
+}
diff --git a/drivers/Kconfig b/drivers/Kconfig
index efb66e25fa2d..8ba3e8b9ad72 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -243,4 +243,6 @@ source "drivers/hte/Kconfig"
source "drivers/cdx/Kconfig"
+source "drivers/dpll/Kconfig"
+
endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index 1bec7819a837..722d15be0eb7 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -197,5 +197,6 @@ obj-$(CONFIG_PECI) += peci/
obj-$(CONFIG_HTE) += hte/
obj-$(CONFIG_DRM_ACCEL) += accel/
obj-$(CONFIG_CDX_BUS) += cdx/
+obj-$(CONFIG_DPLL) += dpll/
obj-$(CONFIG_S390) += s390/
diff --git a/drivers/dpll/Kconfig b/drivers/dpll/Kconfig
new file mode 100644
index 000000000000..a4cae73f20d3
--- /dev/null
+++ b/drivers/dpll/Kconfig
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Generic DPLL drivers configuration
+#
+
+config DPLL
+ bool
diff --git a/drivers/dpll/Makefile b/drivers/dpll/Makefile
new file mode 100644
index 000000000000..2e5b27850110
--- /dev/null
+++ b/drivers/dpll/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for DPLL drivers.
+#
+
+obj-$(CONFIG_DPLL) += dpll.o
+dpll-y += dpll_core.o
+dpll-y += dpll_netlink.o
+dpll-y += dpll_nl.o
diff --git a/drivers/dpll/dpll_core.c b/drivers/dpll/dpll_core.c
new file mode 100644
index 000000000000..3568149b9562
--- /dev/null
+++ b/drivers/dpll/dpll_core.c
@@ -0,0 +1,798 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * dpll_core.c - DPLL subsystem kernel-space interface implementation.
+ *
+ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates
+ * Copyright (c) 2023 Intel Corporation.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+#include "dpll_core.h"
+#include "dpll_netlink.h"
+
+/* Mutex lock to protect DPLL subsystem devices and pins */
+DEFINE_MUTEX(dpll_lock);
+
+DEFINE_XARRAY_FLAGS(dpll_device_xa, XA_FLAGS_ALLOC);
+DEFINE_XARRAY_FLAGS(dpll_pin_xa, XA_FLAGS_ALLOC);
+
+static u32 dpll_xa_id;
+
+#define ASSERT_DPLL_REGISTERED(d) \
+ WARN_ON_ONCE(!xa_get_mark(&dpll_device_xa, (d)->id, DPLL_REGISTERED))
+#define ASSERT_DPLL_NOT_REGISTERED(d) \
+ WARN_ON_ONCE(xa_get_mark(&dpll_device_xa, (d)->id, DPLL_REGISTERED))
+#define ASSERT_PIN_REGISTERED(p) \
+ WARN_ON_ONCE(!xa_get_mark(&dpll_pin_xa, (p)->id, DPLL_REGISTERED))
+
+struct dpll_device_registration {
+ struct list_head list;
+ const struct dpll_device_ops *ops;
+ void *priv;
+};
+
+struct dpll_pin_registration {
+ struct list_head list;
+ const struct dpll_pin_ops *ops;
+ void *priv;
+};
+
+struct dpll_device *dpll_device_get_by_id(int id)
+{
+ if (xa_get_mark(&dpll_device_xa, id, DPLL_REGISTERED))
+ return xa_load(&dpll_device_xa, id);
+
+ return NULL;
+}
+
+static struct dpll_pin_registration *
+dpll_pin_registration_find(struct dpll_pin_ref *ref,
+ const struct dpll_pin_ops *ops, void *priv)
+{
+ struct dpll_pin_registration *reg;
+
+ list_for_each_entry(reg, &ref->registration_list, list) {
+ if (reg->ops == ops && reg->priv == priv)
+ return reg;
+ }
+ return NULL;
+}
+
+static int
+dpll_xa_ref_pin_add(struct xarray *xa_pins, struct dpll_pin *pin,
+ const struct dpll_pin_ops *ops, void *priv)
+{
+ struct dpll_pin_registration *reg;
+ struct dpll_pin_ref *ref;
+ bool ref_exists = false;
+ unsigned long i;
+ int ret;
+
+ xa_for_each(xa_pins, i, ref) {
+ if (ref->pin != pin)
+ continue;
+ reg = dpll_pin_registration_find(ref, ops, priv);
+ if (reg) {
+ refcount_inc(&ref->refcount);
+ return 0;
+ }
+ ref_exists = true;
+ break;
+ }
+
+ if (!ref_exists) {
+ ref = kzalloc(sizeof(*ref), GFP_KERNEL);
+ if (!ref)
+ return -ENOMEM;
+ ref->pin = pin;
+ INIT_LIST_HEAD(&ref->registration_list);
+ ret = xa_insert(xa_pins, pin->pin_idx, ref, GFP_KERNEL);
+ if (ret) {
+ kfree(ref);
+ return ret;
+ }
+ refcount_set(&ref->refcount, 1);
+ }
+
+ reg = kzalloc(sizeof(*reg), GFP_KERNEL);
+ if (!reg) {
+ if (!ref_exists) {
+ xa_erase(xa_pins, pin->pin_idx);
+ kfree(ref);
+ }
+ return -ENOMEM;
+ }
+ reg->ops = ops;
+ reg->priv = priv;
+ if (ref_exists)
+ refcount_inc(&ref->refcount);
+ list_add_tail(&reg->list, &ref->registration_list);
+
+ return 0;
+}
+
+static int dpll_xa_ref_pin_del(struct xarray *xa_pins, struct dpll_pin *pin,
+ const struct dpll_pin_ops *ops, void *priv)
+{
+ struct dpll_pin_registration *reg;
+ struct dpll_pin_ref *ref;
+ unsigned long i;
+
+ xa_for_each(xa_pins, i, ref) {
+ if (ref->pin != pin)
+ continue;
+ reg = dpll_pin_registration_find(ref, ops, priv);
+ if (WARN_ON(!reg))
+ return -EINVAL;
+ if (refcount_dec_and_test(&ref->refcount)) {
+ list_del(&reg->list);
+ kfree(reg);
+ xa_erase(xa_pins, i);
+ WARN_ON(!list_empty(&ref->registration_list));
+ kfree(ref);
+ }
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static int
+dpll_xa_ref_dpll_add(struct xarray *xa_dplls, struct dpll_device *dpll,
+ const struct dpll_pin_ops *ops, void *priv)
+{
+ struct dpll_pin_registration *reg;
+ struct dpll_pin_ref *ref;
+ bool ref_exists = false;
+ unsigned long i;
+ int ret;
+
+ xa_for_each(xa_dplls, i, ref) {
+ if (ref->dpll != dpll)
+ continue;
+ reg = dpll_pin_registration_find(ref, ops, priv);
+ if (reg) {
+ refcount_inc(&ref->refcount);
+ return 0;
+ }
+ ref_exists = true;
+ break;
+ }
+
+ if (!ref_exists) {
+ ref = kzalloc(sizeof(*ref), GFP_KERNEL);
+ if (!ref)
+ return -ENOMEM;
+ ref->dpll = dpll;
+ INIT_LIST_HEAD(&ref->registration_list);
+ ret = xa_insert(xa_dplls, dpll->id, ref, GFP_KERNEL);
+ if (ret) {
+ kfree(ref);
+ return ret;
+ }
+ refcount_set(&ref->refcount, 1);
+ }
+
+ reg = kzalloc(sizeof(*reg), GFP_KERNEL);
+ if (!reg) {
+ if (!ref_exists) {
+ xa_erase(xa_dplls, dpll->id);
+ kfree(ref);
+ }
+ return -ENOMEM;
+ }
+ reg->ops = ops;
+ reg->priv = priv;
+ if (ref_exists)
+ refcount_inc(&ref->refcount);
+ list_add_tail(&reg->list, &ref->registration_list);
+
+ return 0;
+}
+
+static void
+dpll_xa_ref_dpll_del(struct xarray *xa_dplls, struct dpll_device *dpll,
+ const struct dpll_pin_ops *ops, void *priv)
+{
+ struct dpll_pin_registration *reg;
+ struct dpll_pin_ref *ref;
+ unsigned long i;
+
+ xa_for_each(xa_dplls, i, ref) {
+ if (ref->dpll != dpll)
+ continue;
+ reg = dpll_pin_registration_find(ref, ops, priv);
+ if (WARN_ON(!reg))
+ return;
+ if (refcount_dec_and_test(&ref->refcount)) {
+ list_del(&reg->list);
+ kfree(reg);
+ xa_erase(xa_dplls, i);
+ WARN_ON(!list_empty(&ref->registration_list));
+ kfree(ref);
+ }
+ return;
+ }
+}
+
+struct dpll_pin_ref *dpll_xa_ref_dpll_first(struct xarray *xa_refs)
+{
+ struct dpll_pin_ref *ref;
+ unsigned long i = 0;
+
+ ref = xa_find(xa_refs, &i, ULONG_MAX, XA_PRESENT);
+ WARN_ON(!ref);
+ return ref;
+}
+
+static struct dpll_device *
+dpll_device_alloc(const u64 clock_id, u32 device_idx, struct module *module)
+{
+ struct dpll_device *dpll;
+ int ret;
+
+ dpll = kzalloc(sizeof(*dpll), GFP_KERNEL);
+ if (!dpll)
+ return ERR_PTR(-ENOMEM);
+ refcount_set(&dpll->refcount, 1);
+ INIT_LIST_HEAD(&dpll->registration_list);
+ dpll->device_idx = device_idx;
+ dpll->clock_id = clock_id;
+ dpll->module = module;
+ ret = xa_alloc_cyclic(&dpll_device_xa, &dpll->id, dpll, xa_limit_32b,
+ &dpll_xa_id, GFP_KERNEL);
+ if (ret < 0) {
+ kfree(dpll);
+ return ERR_PTR(ret);
+ }
+ xa_init_flags(&dpll->pin_refs, XA_FLAGS_ALLOC);
+
+ return dpll;
+}
+
+/**
+ * dpll_device_get - find existing or create new dpll device
+ * @clock_id: clock_id of creator
+ * @device_idx: idx given by device driver
+ * @module: reference to registering module
+ *
+ * Get existing object of a dpll device, unique for given arguments.
+ * Create new if doesn't exist yet.
+ *
+ * Context: Acquires a lock (dpll_lock)
+ * Return:
+ * * valid dpll_device struct pointer if succeeded
+ * * ERR_PTR(X) - error
+ */
+struct dpll_device *
+dpll_device_get(u64 clock_id, u32 device_idx, struct module *module)
+{
+ struct dpll_device *dpll, *ret = NULL;
+ unsigned long index;
+
+ mutex_lock(&dpll_lock);
+ xa_for_each(&dpll_device_xa, index, dpll) {
+ if (dpll->clock_id == clock_id &&
+ dpll->device_idx == device_idx &&
+ dpll->module == module) {
+ ret = dpll;
+ refcount_inc(&ret->refcount);
+ break;
+ }
+ }
+ if (!ret)
+ ret = dpll_device_alloc(clock_id, device_idx, module);
+ mutex_unlock(&dpll_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(dpll_device_get);
+
+/**
+ * dpll_device_put - decrease the refcount and free memory if possible
+ * @dpll: dpll_device struct pointer
+ *
+ * Context: Acquires a lock (dpll_lock)
+ * Drop reference for a dpll device, if all references are gone, delete
+ * dpll device object.
+ */
+void dpll_device_put(struct dpll_device *dpll)
+{
+ mutex_lock(&dpll_lock);
+ if (refcount_dec_and_test(&dpll->refcount)) {
+ ASSERT_DPLL_NOT_REGISTERED(dpll);
+ WARN_ON_ONCE(!xa_empty(&dpll->pin_refs));
+ xa_destroy(&dpll->pin_refs);
+ xa_erase(&dpll_device_xa, dpll->id);
+ WARN_ON(!list_empty(&dpll->registration_list));
+ kfree(dpll);
+ }
+ mutex_unlock(&dpll_lock);
+}
+EXPORT_SYMBOL_GPL(dpll_device_put);
+
+static struct dpll_device_registration *
+dpll_device_registration_find(struct dpll_device *dpll,
+ const struct dpll_device_ops *ops, void *priv)
+{
+ struct dpll_device_registration *reg;
+
+ list_for_each_entry(reg, &dpll->registration_list, list) {
+ if (reg->ops == ops && reg->priv == priv)
+ return reg;
+ }
+ return NULL;
+}
+
+/**
+ * dpll_device_register - register the dpll device in the subsystem
+ * @dpll: pointer to a dpll
+ * @type: type of a dpll
+ * @ops: ops for a dpll device
+ * @priv: pointer to private information of owner
+ *
+ * Make dpll device available for user space.
+ *
+ * Context: Acquires a lock (dpll_lock)
+ * Return:
+ * * 0 on success
+ * * negative - error value
+ */
+int dpll_device_register(struct dpll_device *dpll, enum dpll_type type,
+ const struct dpll_device_ops *ops, void *priv)
+{
+ struct dpll_device_registration *reg;
+ bool first_registration = false;
+
+ if (WARN_ON(!ops))
+ return -EINVAL;
+ if (WARN_ON(!ops->mode_get))
+ return -EINVAL;
+ if (WARN_ON(!ops->lock_status_get))
+ return -EINVAL;
+ if (WARN_ON(type < DPLL_TYPE_PPS || type > DPLL_TYPE_MAX))
+ return -EINVAL;
+
+ mutex_lock(&dpll_lock);
+ reg = dpll_device_registration_find(dpll, ops, priv);
+ if (reg) {
+ mutex_unlock(&dpll_lock);
+ return -EEXIST;
+ }
+
+ reg = kzalloc(sizeof(*reg), GFP_KERNEL);
+ if (!reg) {
+ mutex_unlock(&dpll_lock);
+ return -ENOMEM;
+ }
+ reg->ops = ops;
+ reg->priv = priv;
+ dpll->type = type;
+ first_registration = list_empty(&dpll->registration_list);
+ list_add_tail(&reg->list, &dpll->registration_list);
+ if (!first_registration) {
+ mutex_unlock(&dpll_lock);
+ return 0;
+ }
+
+ xa_set_mark(&dpll_device_xa, dpll->id, DPLL_REGISTERED);
+ dpll_device_create_ntf(dpll);
+ mutex_unlock(&dpll_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dpll_device_register);
+
+/**
+ * dpll_device_unregister - unregister dpll device
+ * @dpll: registered dpll pointer
+ * @ops: ops for a dpll device
+ * @priv: pointer to private information of owner
+ *
+ * Unregister device, make it unavailable for userspace.
+ * Note: It does not free the memory
+ * Context: Acquires a lock (dpll_lock)
+ */
+void dpll_device_unregister(struct dpll_device *dpll,
+ const struct dpll_device_ops *ops, void *priv)
+{
+ struct dpll_device_registration *reg;
+
+ mutex_lock(&dpll_lock);
+ ASSERT_DPLL_REGISTERED(dpll);
+ dpll_device_delete_ntf(dpll);
+ reg = dpll_device_registration_find(dpll, ops, priv);
+ if (WARN_ON(!reg)) {
+ mutex_unlock(&dpll_lock);
+ return;
+ }
+ list_del(&reg->list);
+ kfree(reg);
+
+ if (!list_empty(&dpll->registration_list)) {
+ mutex_unlock(&dpll_lock);
+ return;
+ }
+ xa_clear_mark(&dpll_device_xa, dpll->id, DPLL_REGISTERED);
+ mutex_unlock(&dpll_lock);
+}
+EXPORT_SYMBOL_GPL(dpll_device_unregister);
+
+static struct dpll_pin *
+dpll_pin_alloc(u64 clock_id, u32 pin_idx, struct module *module,
+ const struct dpll_pin_properties *prop)
+{
+ struct dpll_pin *pin;
+ int ret;
+
+ pin = kzalloc(sizeof(*pin), GFP_KERNEL);
+ if (!pin)
+ return ERR_PTR(-ENOMEM);
+ pin->pin_idx = pin_idx;
+ pin->clock_id = clock_id;
+ pin->module = module;
+ if (WARN_ON(prop->type < DPLL_PIN_TYPE_MUX ||
+ prop->type > DPLL_PIN_TYPE_MAX)) {
+ ret = -EINVAL;
+ goto err;
+ }
+ pin->prop = prop;
+ refcount_set(&pin->refcount, 1);
+ xa_init_flags(&pin->dpll_refs, XA_FLAGS_ALLOC);
+ xa_init_flags(&pin->parent_refs, XA_FLAGS_ALLOC);
+ ret = xa_alloc(&dpll_pin_xa, &pin->id, pin, xa_limit_16b, GFP_KERNEL);
+ if (ret)
+ goto err;
+ return pin;
+err:
+ xa_destroy(&pin->dpll_refs);
+ xa_destroy(&pin->parent_refs);
+ kfree(pin);
+ return ERR_PTR(ret);
+}
+
+/**
+ * dpll_pin_get - find existing or create new dpll pin
+ * @clock_id: clock_id of creator
+ * @pin_idx: idx given by dev driver
+ * @module: reference to registering module
+ * @prop: dpll pin properties
+ *
+ * Get existing object of a pin (unique for given arguments) or create new
+ * if doesn't exist yet.
+ *
+ * Context: Acquires a lock (dpll_lock)
+ * Return:
+ * * valid allocated dpll_pin struct pointer if succeeded
+ * * ERR_PTR(X) - error
+ */
+struct dpll_pin *
+dpll_pin_get(u64 clock_id, u32 pin_idx, struct module *module,
+ const struct dpll_pin_properties *prop)
+{
+ struct dpll_pin *pos, *ret = NULL;
+ unsigned long i;
+
+ mutex_lock(&dpll_lock);
+ xa_for_each(&dpll_pin_xa, i, pos) {
+ if (pos->clock_id == clock_id &&
+ pos->pin_idx == pin_idx &&
+ pos->module == module) {
+ ret = pos;
+ refcount_inc(&ret->refcount);
+ break;
+ }
+ }
+ if (!ret)
+ ret = dpll_pin_alloc(clock_id, pin_idx, module, prop);
+ mutex_unlock(&dpll_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(dpll_pin_get);
+
+/**
+ * dpll_pin_put - decrease the refcount and free memory if possible
+ * @pin: pointer to a pin to be put
+ *
+ * Drop reference for a pin, if all references are gone, delete pin object.
+ *
+ * Context: Acquires a lock (dpll_lock)
+ */
+void dpll_pin_put(struct dpll_pin *pin)
+{
+ mutex_lock(&dpll_lock);
+ if (refcount_dec_and_test(&pin->refcount)) {
+ xa_destroy(&pin->dpll_refs);
+ xa_destroy(&pin->parent_refs);
+ xa_erase(&dpll_pin_xa, pin->id);
+ kfree(pin);
+ }
+ mutex_unlock(&dpll_lock);
+}
+EXPORT_SYMBOL_GPL(dpll_pin_put);
+
+static int
+__dpll_pin_register(struct dpll_device *dpll, struct dpll_pin *pin,
+ const struct dpll_pin_ops *ops, void *priv)
+{
+ int ret;
+
+ ret = dpll_xa_ref_pin_add(&dpll->pin_refs, pin, ops, priv);
+ if (ret)
+ return ret;
+ ret = dpll_xa_ref_dpll_add(&pin->dpll_refs, dpll, ops, priv);
+ if (ret)
+ goto ref_pin_del;
+ xa_set_mark(&dpll_pin_xa, pin->id, DPLL_REGISTERED);
+ dpll_pin_create_ntf(pin);
+
+ return ret;
+
+ref_pin_del:
+ dpll_xa_ref_pin_del(&dpll->pin_refs, pin, ops, priv);
+ return ret;
+}
+
+/**
+ * dpll_pin_register - register the dpll pin in the subsystem
+ * @dpll: pointer to a dpll
+ * @pin: pointer to a dpll pin
+ * @ops: ops for a dpll pin ops
+ * @priv: pointer to private information of owner
+ *
+ * Context: Acquires a lock (dpll_lock)
+ * Return:
+ * * 0 on success
+ * * negative - error value
+ */
+int
+dpll_pin_register(struct dpll_device *dpll, struct dpll_pin *pin,
+ const struct dpll_pin_ops *ops, void *priv)
+{
+ int ret;
+
+ if (WARN_ON(!ops) ||
+ WARN_ON(!ops->state_on_dpll_get) ||
+ WARN_ON(!ops->direction_get))
+ return -EINVAL;
+ if (ASSERT_DPLL_REGISTERED(dpll))
+ return -EINVAL;
+
+ mutex_lock(&dpll_lock);
+ if (WARN_ON(!(dpll->module == pin->module &&
+ dpll->clock_id == pin->clock_id)))
+ ret = -EINVAL;
+ else
+ ret = __dpll_pin_register(dpll, pin, ops, priv);
+ mutex_unlock(&dpll_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(dpll_pin_register);
+
+static void
+__dpll_pin_unregister(struct dpll_device *dpll, struct dpll_pin *pin,
+ const struct dpll_pin_ops *ops, void *priv)
+{
+ dpll_xa_ref_pin_del(&dpll->pin_refs, pin, ops, priv);
+ dpll_xa_ref_dpll_del(&pin->dpll_refs, dpll, ops, priv);
+ if (xa_empty(&pin->dpll_refs))
+ xa_clear_mark(&dpll_pin_xa, pin->id, DPLL_REGISTERED);
+}
+
+/**
+ * dpll_pin_unregister - unregister dpll pin from dpll device
+ * @dpll: registered dpll pointer
+ * @pin: pointer to a pin
+ * @ops: ops for a dpll pin
+ * @priv: pointer to private information of owner
+ *
+ * Note: It does not free the memory
+ * Context: Acquires a lock (dpll_lock)
+ */
+void dpll_pin_unregister(struct dpll_device *dpll, struct dpll_pin *pin,
+ const struct dpll_pin_ops *ops, void *priv)
+{
+ if (WARN_ON(xa_empty(&dpll->pin_refs)))
+ return;
+ if (WARN_ON(!xa_empty(&pin->parent_refs)))
+ return;
+
+ mutex_lock(&dpll_lock);
+ dpll_pin_delete_ntf(pin);
+ __dpll_pin_unregister(dpll, pin, ops, priv);
+ mutex_unlock(&dpll_lock);
+}
+EXPORT_SYMBOL_GPL(dpll_pin_unregister);
+
+/**
+ * dpll_pin_on_pin_register - register a pin with a parent pin
+ * @parent: pointer to a parent pin
+ * @pin: pointer to a pin
+ * @ops: ops for a dpll pin
+ * @priv: pointer to private information of owner
+ *
+ * Register a pin with a parent pin, create references between them and
+ * between newly registered pin and dplls connected with a parent pin.
+ *
+ * Context: Acquires a lock (dpll_lock)
+ * Return:
+ * * 0 on success
+ * * negative - error value
+ */
+int dpll_pin_on_pin_register(struct dpll_pin *parent, struct dpll_pin *pin,
+ const struct dpll_pin_ops *ops, void *priv)
+{
+ struct dpll_pin_ref *ref;
+ unsigned long i, stop;
+ int ret;
+
+ if (WARN_ON(parent->prop->type != DPLL_PIN_TYPE_MUX))
+ return -EINVAL;
+
+ if (WARN_ON(!ops) ||
+ WARN_ON(!ops->state_on_pin_get) ||
+ WARN_ON(!ops->direction_get))
+ return -EINVAL;
+ if (ASSERT_PIN_REGISTERED(parent))
+ return -EINVAL;
+
+ mutex_lock(&dpll_lock);
+ ret = dpll_xa_ref_pin_add(&pin->parent_refs, parent, ops, priv);
+ if (ret)
+ goto unlock;
+ refcount_inc(&pin->refcount);
+ xa_for_each(&parent->dpll_refs, i, ref) {
+ ret = __dpll_pin_register(ref->dpll, pin, ops, priv);
+ if (ret) {
+ stop = i;
+ goto dpll_unregister;
+ }
+ dpll_pin_create_ntf(pin);
+ }
+ mutex_unlock(&dpll_lock);
+
+ return ret;
+
+dpll_unregister:
+ xa_for_each(&parent->dpll_refs, i, ref)
+ if (i < stop) {
+ __dpll_pin_unregister(ref->dpll, pin, ops, priv);
+ dpll_pin_delete_ntf(pin);
+ }
+ refcount_dec(&pin->refcount);
+ dpll_xa_ref_pin_del(&pin->parent_refs, parent, ops, priv);
+unlock:
+ mutex_unlock(&dpll_lock);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(dpll_pin_on_pin_register);
+
+/**
+ * dpll_pin_on_pin_unregister - unregister dpll pin from a parent pin
+ * @parent: pointer to a parent pin
+ * @pin: pointer to a pin
+ * @ops: ops for a dpll pin
+ * @priv: pointer to private information of owner
+ *
+ * Context: Acquires a lock (dpll_lock)
+ * Note: It does not free the memory
+ */
+void dpll_pin_on_pin_unregister(struct dpll_pin *parent, struct dpll_pin *pin,
+ const struct dpll_pin_ops *ops, void *priv)
+{
+ struct dpll_pin_ref *ref;
+ unsigned long i;
+
+ mutex_lock(&dpll_lock);
+ dpll_pin_delete_ntf(pin);
+ dpll_xa_ref_pin_del(&pin->parent_refs, parent, ops, priv);
+ refcount_dec(&pin->refcount);
+ xa_for_each(&pin->dpll_refs, i, ref)
+ __dpll_pin_unregister(ref->dpll, pin, ops, priv);
+ mutex_unlock(&dpll_lock);
+}
+EXPORT_SYMBOL_GPL(dpll_pin_on_pin_unregister);
+
+static struct dpll_device_registration *
+dpll_device_registration_first(struct dpll_device *dpll)
+{
+ struct dpll_device_registration *reg;
+
+ reg = list_first_entry_or_null((struct list_head *)&dpll->registration_list,
+ struct dpll_device_registration, list);
+ WARN_ON(!reg);
+ return reg;
+}
+
+void *dpll_priv(struct dpll_device *dpll)
+{
+ struct dpll_device_registration *reg;
+
+ reg = dpll_device_registration_first(dpll);
+ return reg->priv;
+}
+
+const struct dpll_device_ops *dpll_device_ops(struct dpll_device *dpll)
+{
+ struct dpll_device_registration *reg;
+
+ reg = dpll_device_registration_first(dpll);
+ return reg->ops;
+}
+
+static struct dpll_pin_registration *
+dpll_pin_registration_first(struct dpll_pin_ref *ref)
+{
+ struct dpll_pin_registration *reg;
+
+ reg = list_first_entry_or_null(&ref->registration_list,
+ struct dpll_pin_registration, list);
+ WARN_ON(!reg);
+ return reg;
+}
+
+void *dpll_pin_on_dpll_priv(struct dpll_device *dpll,
+ struct dpll_pin *pin)
+{
+ struct dpll_pin_registration *reg;
+ struct dpll_pin_ref *ref;
+
+ ref = xa_load(&dpll->pin_refs, pin->pin_idx);
+ if (!ref)
+ return NULL;
+ reg = dpll_pin_registration_first(ref);
+ return reg->priv;
+}
+
+void *dpll_pin_on_pin_priv(struct dpll_pin *parent,
+ struct dpll_pin *pin)
+{
+ struct dpll_pin_registration *reg;
+ struct dpll_pin_ref *ref;
+
+ ref = xa_load(&pin->parent_refs, parent->pin_idx);
+ if (!ref)
+ return NULL;
+ reg = dpll_pin_registration_first(ref);
+ return reg->priv;
+}
+
+const struct dpll_pin_ops *dpll_pin_ops(struct dpll_pin_ref *ref)
+{
+ struct dpll_pin_registration *reg;
+
+ reg = dpll_pin_registration_first(ref);
+ return reg->ops;
+}
+
+static int __init dpll_init(void)
+{
+ int ret;
+
+ ret = genl_register_family(&dpll_nl_family);
+ if (ret)
+ goto error;
+
+ return 0;
+
+error:
+ mutex_destroy(&dpll_lock);
+ return ret;
+}
+
+static void __exit dpll_exit(void)
+{
+ genl_unregister_family(&dpll_nl_family);
+ mutex_destroy(&dpll_lock);
+}
+
+subsys_initcall(dpll_init);
+module_exit(dpll_exit);
diff --git a/drivers/dpll/dpll_core.h b/drivers/dpll/dpll_core.h
new file mode 100644
index 000000000000..5585873c5c1b
--- /dev/null
+++ b/drivers/dpll/dpll_core.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates
+ * Copyright (c) 2023 Intel and affiliates
+ */
+
+#ifndef __DPLL_CORE_H__
+#define __DPLL_CORE_H__
+
+#include <linux/dpll.h>
+#include <linux/list.h>
+#include <linux/refcount.h>
+#include "dpll_nl.h"
+
+#define DPLL_REGISTERED XA_MARK_1
+
+/**
+ * struct dpll_device - stores DPLL device internal data
+ * @id: unique id number for device given by dpll subsystem
+ * @device_idx: id given by dev driver
+ * @clock_id: unique identifier (clock_id) of a dpll
+ * @module: module of creator
+ * @type: type of a dpll
+ * @pin_refs: stores pins registered within a dpll
+ * @refcount: refcount
+ * @registration_list: list of registered ops and priv data of dpll owners
+ **/
+struct dpll_device {
+ u32 id;
+ u32 device_idx;
+ u64 clock_id;
+ struct module *module;
+ enum dpll_type type;
+ struct xarray pin_refs;
+ refcount_t refcount;
+ struct list_head registration_list;
+};
+
+/**
+ * struct dpll_pin - structure for a dpll pin
+ * @id: unique id number for pin given by dpll subsystem
+ * @pin_idx: index of a pin given by dev driver
+ * @clock_id: clock_id of creator
+ * @module: module of creator
+ * @dpll_refs: hold referencees to dplls pin was registered with
+ * @parent_refs: hold references to parent pins pin was registered with
+ * @prop: pointer to pin properties given by registerer
+ * @rclk_dev_name: holds name of device when pin can recover clock from it
+ * @refcount: refcount
+ **/
+struct dpll_pin {
+ u32 id;
+ u32 pin_idx;
+ u64 clock_id;
+ struct module *module;
+ struct xarray dpll_refs;
+ struct xarray parent_refs;
+ const struct dpll_pin_properties *prop;
+ refcount_t refcount;
+};
+
+/**
+ * struct dpll_pin_ref - structure for referencing either dpll or pins
+ * @dpll: pointer to a dpll
+ * @pin: pointer to a pin
+ * @registration_list: list of ops and priv data registered with the ref
+ * @refcount: refcount
+ **/
+struct dpll_pin_ref {
+ union {
+ struct dpll_device *dpll;
+ struct dpll_pin *pin;
+ };
+ struct list_head registration_list;
+ refcount_t refcount;
+};
+
+void *dpll_priv(struct dpll_device *dpll);
+void *dpll_pin_on_dpll_priv(struct dpll_device *dpll, struct dpll_pin *pin);
+void *dpll_pin_on_pin_priv(struct dpll_pin *parent, struct dpll_pin *pin);
+
+const struct dpll_device_ops *dpll_device_ops(struct dpll_device *dpll);
+struct dpll_device *dpll_device_get_by_id(int id);
+const struct dpll_pin_ops *dpll_pin_ops(struct dpll_pin_ref *ref);
+struct dpll_pin_ref *dpll_xa_ref_dpll_first(struct xarray *xa_refs);
+extern struct xarray dpll_device_xa;
+extern struct xarray dpll_pin_xa;
+extern struct mutex dpll_lock;
+#endif
diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c
new file mode 100644
index 000000000000..e20daba6896a
--- /dev/null
+++ b/drivers/dpll/dpll_netlink.c
@@ -0,0 +1,1253 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Generic netlink for DPLL management framework
+ *
+ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates
+ * Copyright (c) 2023 Intel and affiliates
+ *
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <net/genetlink.h>
+#include "dpll_core.h"
+#include "dpll_netlink.h"
+#include "dpll_nl.h"
+#include <uapi/linux/dpll.h>
+
+#define ASSERT_NOT_NULL(ptr) (WARN_ON(!ptr))
+
+#define xa_for_each_marked_start(xa, index, entry, filter, start) \
+ for (index = start, entry = xa_find(xa, &index, ULONG_MAX, filter); \
+ entry; entry = xa_find_after(xa, &index, ULONG_MAX, filter))
+
+struct dpll_dump_ctx {
+ unsigned long idx;
+};
+
+static struct dpll_dump_ctx *dpll_dump_context(struct netlink_callback *cb)
+{
+ return (struct dpll_dump_ctx *)cb->ctx;
+}
+
+static int
+dpll_msg_add_dev_handle(struct sk_buff *msg, struct dpll_device *dpll)
+{
+ if (nla_put_u32(msg, DPLL_A_ID, dpll->id))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int
+dpll_msg_add_dev_parent_handle(struct sk_buff *msg, u32 id)
+{
+ if (nla_put_u32(msg, DPLL_A_PIN_PARENT_ID, id))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+/**
+ * dpll_msg_pin_handle_size - get size of pin handle attribute for given pin
+ * @pin: pin pointer
+ *
+ * Return: byte size of pin handle attribute for given pin.
+ */
+size_t dpll_msg_pin_handle_size(struct dpll_pin *pin)
+{
+ return pin ? nla_total_size(4) : 0; /* DPLL_A_PIN_ID */
+}
+EXPORT_SYMBOL_GPL(dpll_msg_pin_handle_size);
+
+/**
+ * dpll_msg_add_pin_handle - attach pin handle attribute to a given message
+ * @msg: pointer to sk_buff message to attach a pin handle
+ * @pin: pin pointer
+ *
+ * Return:
+ * * 0 - success
+ * * -EMSGSIZE - no space in message to attach pin handle
+ */
+int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin)
+{
+ if (!pin)
+ return 0;
+ if (nla_put_u32(msg, DPLL_A_PIN_ID, pin->id))
+ return -EMSGSIZE;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dpll_msg_add_pin_handle);
+
+static int
+dpll_msg_add_mode(struct sk_buff *msg, struct dpll_device *dpll,
+ struct netlink_ext_ack *extack)
+{
+ const struct dpll_device_ops *ops = dpll_device_ops(dpll);
+ enum dpll_mode mode;
+ int ret;
+
+ ret = ops->mode_get(dpll, dpll_priv(dpll), &mode, extack);
+ if (ret)
+ return ret;
+ if (nla_put_u32(msg, DPLL_A_MODE, mode))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int
+dpll_msg_add_mode_supported(struct sk_buff *msg, struct dpll_device *dpll,
+ struct netlink_ext_ack *extack)
+{
+ const struct dpll_device_ops *ops = dpll_device_ops(dpll);
+ enum dpll_mode mode;
+
+ if (!ops->mode_supported)
+ return 0;
+ for (mode = DPLL_MODE_MANUAL; mode <= DPLL_MODE_MAX; mode++)
+ if (ops->mode_supported(dpll, dpll_priv(dpll), mode, extack))
+ if (nla_put_u32(msg, DPLL_A_MODE_SUPPORTED, mode))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int
+dpll_msg_add_lock_status(struct sk_buff *msg, struct dpll_device *dpll,
+ struct netlink_ext_ack *extack)
+{
+ const struct dpll_device_ops *ops = dpll_device_ops(dpll);
+ enum dpll_lock_status status;
+ int ret;
+
+ ret = ops->lock_status_get(dpll, dpll_priv(dpll), &status, extack);
+ if (ret)
+ return ret;
+ if (nla_put_u32(msg, DPLL_A_LOCK_STATUS, status))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int
+dpll_msg_add_temp(struct sk_buff *msg, struct dpll_device *dpll,
+ struct netlink_ext_ack *extack)
+{
+ const struct dpll_device_ops *ops = dpll_device_ops(dpll);
+ s32 temp;
+ int ret;
+
+ if (!ops->temp_get)
+ return 0;
+ ret = ops->temp_get(dpll, dpll_priv(dpll), &temp, extack);
+ if (ret)
+ return ret;
+ if (nla_put_s32(msg, DPLL_A_TEMP, temp))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int
+dpll_msg_add_pin_prio(struct sk_buff *msg, struct dpll_pin *pin,
+ struct dpll_pin_ref *ref,
+ struct netlink_ext_ack *extack)
+{
+ const struct dpll_pin_ops *ops = dpll_pin_ops(ref);
+ struct dpll_device *dpll = ref->dpll;
+ u32 prio;
+ int ret;
+
+ if (!ops->prio_get)
+ return 0;
+ ret = ops->prio_get(pin, dpll_pin_on_dpll_priv(dpll, pin), dpll,
+ dpll_priv(dpll), &prio, extack);
+ if (ret)
+ return ret;
+ if (nla_put_u32(msg, DPLL_A_PIN_PRIO, prio))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int
+dpll_msg_add_pin_on_dpll_state(struct sk_buff *msg, struct dpll_pin *pin,
+ struct dpll_pin_ref *ref,
+ struct netlink_ext_ack *extack)
+{
+ const struct dpll_pin_ops *ops = dpll_pin_ops(ref);
+ struct dpll_device *dpll = ref->dpll;
+ enum dpll_pin_state state;
+ int ret;
+
+ if (!ops->state_on_dpll_get)
+ return 0;
+ ret = ops->state_on_dpll_get(pin, dpll_pin_on_dpll_priv(dpll, pin),
+ dpll, dpll_priv(dpll), &state, extack);
+ if (ret)
+ return ret;
+ if (nla_put_u32(msg, DPLL_A_PIN_STATE, state))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int
+dpll_msg_add_pin_direction(struct sk_buff *msg, struct dpll_pin *pin,
+ struct dpll_pin_ref *ref,
+ struct netlink_ext_ack *extack)
+{
+ const struct dpll_pin_ops *ops = dpll_pin_ops(ref);
+ struct dpll_device *dpll = ref->dpll;
+ enum dpll_pin_direction direction;
+ int ret;
+
+ ret = ops->direction_get(pin, dpll_pin_on_dpll_priv(dpll, pin), dpll,
+ dpll_priv(dpll), &direction, extack);
+ if (ret)
+ return ret;
+ if (nla_put_u32(msg, DPLL_A_PIN_DIRECTION, direction))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int
+dpll_msg_add_pin_freq(struct sk_buff *msg, struct dpll_pin *pin,
+ struct dpll_pin_ref *ref, struct netlink_ext_ack *extack)
+{
+ const struct dpll_pin_ops *ops = dpll_pin_ops(ref);
+ struct dpll_device *dpll = ref->dpll;
+ struct nlattr *nest;
+ int fs, ret;
+ u64 freq;
+
+ if (!ops->frequency_get)
+ return 0;
+ ret = ops->frequency_get(pin, dpll_pin_on_dpll_priv(dpll, pin), dpll,
+ dpll_priv(dpll), &freq, extack);
+ if (ret)
+ return ret;
+ if (nla_put_64bit(msg, DPLL_A_PIN_FREQUENCY, sizeof(freq), &freq,
+ DPLL_A_PIN_PAD))
+ return -EMSGSIZE;
+ for (fs = 0; fs < pin->prop->freq_supported_num; fs++) {
+ nest = nla_nest_start(msg, DPLL_A_PIN_FREQUENCY_SUPPORTED);
+ if (!nest)
+ return -EMSGSIZE;
+ freq = pin->prop->freq_supported[fs].min;
+ if (nla_put_64bit(msg, DPLL_A_PIN_FREQUENCY_MIN, sizeof(freq),
+ &freq, DPLL_A_PIN_PAD)) {
+ nla_nest_cancel(msg, nest);
+ return -EMSGSIZE;
+ }
+ freq = pin->prop->freq_supported[fs].max;
+ if (nla_put_64bit(msg, DPLL_A_PIN_FREQUENCY_MAX, sizeof(freq),
+ &freq, DPLL_A_PIN_PAD)) {
+ nla_nest_cancel(msg, nest);
+ return -EMSGSIZE;
+ }
+ nla_nest_end(msg, nest);
+ }
+
+ return 0;
+}
+
+static bool dpll_pin_is_freq_supported(struct dpll_pin *pin, u32 freq)
+{
+ int fs;
+
+ for (fs = 0; fs < pin->prop->freq_supported_num; fs++)
+ if (freq >= pin->prop->freq_supported[fs].min &&
+ freq <= pin->prop->freq_supported[fs].max)
+ return true;
+ return false;
+}
+
+static int
+dpll_msg_add_pin_parents(struct sk_buff *msg, struct dpll_pin *pin,
+ struct dpll_pin_ref *dpll_ref,
+ struct netlink_ext_ack *extack)
+{
+ enum dpll_pin_state state;
+ struct dpll_pin_ref *ref;
+ struct dpll_pin *ppin;
+ struct nlattr *nest;
+ unsigned long index;
+ int ret;
+
+ xa_for_each(&pin->parent_refs, index, ref) {
+ const struct dpll_pin_ops *ops = dpll_pin_ops(ref);
+ void *parent_priv;
+
+ ppin = ref->pin;
+ parent_priv = dpll_pin_on_dpll_priv(dpll_ref->dpll, ppin);
+ ret = ops->state_on_pin_get(pin,
+ dpll_pin_on_pin_priv(ppin, pin),
+ ppin, parent_priv, &state, extack);
+ if (ret)
+ return ret;
+ nest = nla_nest_start(msg, DPLL_A_PIN_PARENT_PIN);
+ if (!nest)
+ return -EMSGSIZE;
+ ret = dpll_msg_add_dev_parent_handle(msg, ppin->id);
+ if (ret)
+ goto nest_cancel;
+ if (nla_put_u32(msg, DPLL_A_PIN_STATE, state)) {
+ ret = -EMSGSIZE;
+ goto nest_cancel;
+ }
+ nla_nest_end(msg, nest);
+ }
+
+ return 0;
+
+nest_cancel:
+ nla_nest_cancel(msg, nest);
+ return ret;
+}
+
+static int
+dpll_msg_add_pin_dplls(struct sk_buff *msg, struct dpll_pin *pin,
+ struct netlink_ext_ack *extack)
+{
+ struct dpll_pin_ref *ref;
+ struct nlattr *attr;
+ unsigned long index;
+ int ret;
+
+ xa_for_each(&pin->dpll_refs, index, ref) {
+ attr = nla_nest_start(msg, DPLL_A_PIN_PARENT_DEVICE);
+ if (!attr)
+ return -EMSGSIZE;
+ ret = dpll_msg_add_dev_parent_handle(msg, ref->dpll->id);
+ if (ret)
+ goto nest_cancel;
+ ret = dpll_msg_add_pin_on_dpll_state(msg, pin, ref, extack);
+ if (ret)
+ goto nest_cancel;
+ ret = dpll_msg_add_pin_prio(msg, pin, ref, extack);
+ if (ret)
+ goto nest_cancel;
+ ret = dpll_msg_add_pin_direction(msg, pin, ref, extack);
+ if (ret)
+ goto nest_cancel;
+ nla_nest_end(msg, attr);
+ }
+
+ return 0;
+
+nest_cancel:
+ nla_nest_end(msg, attr);
+ return ret;
+}
+
+static int
+dpll_cmd_pin_get_one(struct sk_buff *msg, struct dpll_pin *pin,
+ struct netlink_ext_ack *extack)
+{
+ const struct dpll_pin_properties *prop = pin->prop;
+ struct dpll_pin_ref *ref;
+ int ret;
+
+ ref = dpll_xa_ref_dpll_first(&pin->dpll_refs);
+ ASSERT_NOT_NULL(ref);
+
+ ret = dpll_msg_add_pin_handle(msg, pin);
+ if (ret)
+ return ret;
+ if (nla_put_string(msg, DPLL_A_PIN_MODULE_NAME,
+ module_name(pin->module)))
+ return -EMSGSIZE;
+ if (nla_put_64bit(msg, DPLL_A_PIN_CLOCK_ID, sizeof(pin->clock_id),
+ &pin->clock_id, DPLL_A_PIN_PAD))
+ return -EMSGSIZE;
+ if (prop->board_label &&
+ nla_put_string(msg, DPLL_A_PIN_BOARD_LABEL, prop->board_label))
+ return -EMSGSIZE;
+ if (prop->panel_label &&
+ nla_put_string(msg, DPLL_A_PIN_PANEL_LABEL, prop->panel_label))
+ return -EMSGSIZE;
+ if (prop->package_label &&
+ nla_put_string(msg, DPLL_A_PIN_PACKAGE_LABEL,
+ prop->package_label))
+ return -EMSGSIZE;
+ if (nla_put_u32(msg, DPLL_A_PIN_TYPE, prop->type))
+ return -EMSGSIZE;
+ if (nla_put_u32(msg, DPLL_A_PIN_CAPABILITIES, prop->capabilities))
+ return -EMSGSIZE;
+ ret = dpll_msg_add_pin_freq(msg, pin, ref, extack);
+ if (ret)
+ return ret;
+ if (xa_empty(&pin->parent_refs))
+ ret = dpll_msg_add_pin_dplls(msg, pin, extack);
+ else
+ ret = dpll_msg_add_pin_parents(msg, pin, ref, extack);
+
+ return ret;
+}
+
+static int
+dpll_device_get_one(struct dpll_device *dpll, struct sk_buff *msg,
+ struct netlink_ext_ack *extack)
+{
+ int ret;
+
+ ret = dpll_msg_add_dev_handle(msg, dpll);
+ if (ret)
+ return ret;
+ if (nla_put_string(msg, DPLL_A_MODULE_NAME, module_name(dpll->module)))
+ return -EMSGSIZE;
+ if (nla_put_64bit(msg, DPLL_A_CLOCK_ID, sizeof(dpll->clock_id),
+ &dpll->clock_id, DPLL_A_PAD))
+ return -EMSGSIZE;
+ ret = dpll_msg_add_temp(msg, dpll, extack);
+ if (ret)
+ return ret;
+ ret = dpll_msg_add_lock_status(msg, dpll, extack);
+ if (ret)
+ return ret;
+ ret = dpll_msg_add_mode(msg, dpll, extack);
+ if (ret)
+ return ret;
+ ret = dpll_msg_add_mode_supported(msg, dpll, extack);
+ if (ret)
+ return ret;
+ if (nla_put_u32(msg, DPLL_A_TYPE, dpll->type))
+ return -EMSGSIZE;
+
+ return ret;
+}
+
+static int
+dpll_device_event_send(enum dpll_cmd event, struct dpll_device *dpll)
+{
+ struct sk_buff *msg;
+ int ret = -ENOMEM;
+ void *hdr;
+
+ if (WARN_ON(!xa_get_mark(&dpll_device_xa, dpll->id, DPLL_REGISTERED)))
+ return -ENODEV;
+ msg = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+ hdr = genlmsg_put(msg, 0, 0, &dpll_nl_family, 0, event);
+ if (!hdr)
+ goto err_free_msg;
+ ret = dpll_device_get_one(dpll, msg, NULL);
+ if (ret)
+ goto err_cancel_msg;
+ genlmsg_end(msg, hdr);
+ genlmsg_multicast(&dpll_nl_family, msg, 0, 0, GFP_KERNEL);
+
+ return 0;
+
+err_cancel_msg:
+ genlmsg_cancel(msg, hdr);
+err_free_msg:
+ nlmsg_free(msg);
+
+ return ret;
+}
+
+int dpll_device_create_ntf(struct dpll_device *dpll)
+{
+ return dpll_device_event_send(DPLL_CMD_DEVICE_CREATE_NTF, dpll);
+}
+
+int dpll_device_delete_ntf(struct dpll_device *dpll)
+{
+ return dpll_device_event_send(DPLL_CMD_DEVICE_DELETE_NTF, dpll);
+}
+
+static int
+__dpll_device_change_ntf(struct dpll_device *dpll)
+{
+ return dpll_device_event_send(DPLL_CMD_DEVICE_CHANGE_NTF, dpll);
+}
+
+/**
+ * dpll_device_change_ntf - notify that the dpll device has been changed
+ * @dpll: registered dpll pointer
+ *
+ * Context: acquires and holds a dpll_lock.
+ * Return: 0 if succeeds, error code otherwise.
+ */
+int dpll_device_change_ntf(struct dpll_device *dpll)
+{
+ int ret;
+
+ mutex_lock(&dpll_lock);
+ ret = __dpll_device_change_ntf(dpll);
+ mutex_unlock(&dpll_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(dpll_device_change_ntf);
+
+static int
+dpll_pin_event_send(enum dpll_cmd event, struct dpll_pin *pin)
+{
+ struct sk_buff *msg;
+ int ret = -ENOMEM;
+ void *hdr;
+
+ if (WARN_ON(!xa_get_mark(&dpll_pin_xa, pin->id, DPLL_REGISTERED)))
+ return -ENODEV;
+
+ msg = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ hdr = genlmsg_put(msg, 0, 0, &dpll_nl_family, 0, event);
+ if (!hdr)
+ goto err_free_msg;
+ ret = dpll_cmd_pin_get_one(msg, pin, NULL);
+ if (ret)
+ goto err_cancel_msg;
+ genlmsg_end(msg, hdr);
+ genlmsg_multicast(&dpll_nl_family, msg, 0, 0, GFP_KERNEL);
+
+ return 0;
+
+err_cancel_msg:
+ genlmsg_cancel(msg, hdr);
+err_free_msg:
+ nlmsg_free(msg);
+
+ return ret;
+}
+
+int dpll_pin_create_ntf(struct dpll_pin *pin)
+{
+ return dpll_pin_event_send(DPLL_CMD_PIN_CREATE_NTF, pin);
+}
+
+int dpll_pin_delete_ntf(struct dpll_pin *pin)
+{
+ return dpll_pin_event_send(DPLL_CMD_PIN_DELETE_NTF, pin);
+}
+
+static int __dpll_pin_change_ntf(struct dpll_pin *pin)
+{
+ return dpll_pin_event_send(DPLL_CMD_PIN_CHANGE_NTF, pin);
+}
+
+/**
+ * dpll_pin_change_ntf - notify that the pin has been changed
+ * @pin: registered pin pointer
+ *
+ * Context: acquires and holds a dpll_lock.
+ * Return: 0 if succeeds, error code otherwise.
+ */
+int dpll_pin_change_ntf(struct dpll_pin *pin)
+{
+ int ret;
+
+ mutex_lock(&dpll_lock);
+ ret = __dpll_pin_change_ntf(pin);
+ mutex_unlock(&dpll_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(dpll_pin_change_ntf);
+
+static int
+dpll_pin_freq_set(struct dpll_pin *pin, struct nlattr *a,
+ struct netlink_ext_ack *extack)
+{
+ u64 freq = nla_get_u64(a);
+ struct dpll_pin_ref *ref;
+ unsigned long i;
+ int ret;
+
+ if (!dpll_pin_is_freq_supported(pin, freq)) {
+ NL_SET_ERR_MSG_ATTR(extack, a, "frequency is not supported by the device");
+ return -EINVAL;
+ }
+
+ xa_for_each(&pin->dpll_refs, i, ref) {
+ const struct dpll_pin_ops *ops = dpll_pin_ops(ref);
+ struct dpll_device *dpll = ref->dpll;
+
+ if (!ops->frequency_set)
+ return -EOPNOTSUPP;
+ ret = ops->frequency_set(pin, dpll_pin_on_dpll_priv(dpll, pin),
+ dpll, dpll_priv(dpll), freq, extack);
+ if (ret)
+ return ret;
+ }
+ __dpll_pin_change_ntf(pin);
+
+ return 0;
+}
+
+static int
+dpll_pin_on_pin_state_set(struct dpll_pin *pin, u32 parent_idx,
+ enum dpll_pin_state state,
+ struct netlink_ext_ack *extack)
+{
+ struct dpll_pin_ref *parent_ref;
+ const struct dpll_pin_ops *ops;
+ struct dpll_pin_ref *dpll_ref;
+ void *pin_priv, *parent_priv;
+ struct dpll_pin *parent;
+ unsigned long i;
+ int ret;
+
+ if (!(DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE &
+ pin->prop->capabilities)) {
+ NL_SET_ERR_MSG(extack, "state changing is not allowed");
+ return -EOPNOTSUPP;
+ }
+ parent = xa_load(&dpll_pin_xa, parent_idx);
+ if (!parent)
+ return -EINVAL;
+ parent_ref = xa_load(&pin->parent_refs, parent->pin_idx);
+ if (!parent_ref)
+ return -EINVAL;
+ xa_for_each(&parent->dpll_refs, i, dpll_ref) {
+ ops = dpll_pin_ops(parent_ref);
+ if (!ops->state_on_pin_set)
+ return -EOPNOTSUPP;
+ pin_priv = dpll_pin_on_pin_priv(parent, pin);
+ parent_priv = dpll_pin_on_dpll_priv(dpll_ref->dpll, parent);
+ ret = ops->state_on_pin_set(pin, pin_priv, parent, parent_priv,
+ state, extack);
+ if (ret)
+ return ret;
+ }
+ __dpll_pin_change_ntf(pin);
+
+ return 0;
+}
+
+static int
+dpll_pin_state_set(struct dpll_device *dpll, struct dpll_pin *pin,
+ enum dpll_pin_state state,
+ struct netlink_ext_ack *extack)
+{
+ const struct dpll_pin_ops *ops;
+ struct dpll_pin_ref *ref;
+ int ret;
+
+ if (!(DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE &
+ pin->prop->capabilities)) {
+ NL_SET_ERR_MSG(extack, "state changing is not allowed");
+ return -EOPNOTSUPP;
+ }
+ ref = xa_load(&pin->dpll_refs, dpll->id);
+ ASSERT_NOT_NULL(ref);
+ ops = dpll_pin_ops(ref);
+ if (!ops->state_on_dpll_set)
+ return -EOPNOTSUPP;
+ ret = ops->state_on_dpll_set(pin, dpll_pin_on_dpll_priv(dpll, pin),
+ dpll, dpll_priv(dpll), state, extack);
+ if (ret)
+ return ret;
+ __dpll_pin_change_ntf(pin);
+
+ return 0;
+}
+
+static int
+dpll_pin_prio_set(struct dpll_device *dpll, struct dpll_pin *pin,
+ u32 prio, struct netlink_ext_ack *extack)
+{
+ const struct dpll_pin_ops *ops;
+ struct dpll_pin_ref *ref;
+ int ret;
+
+ if (!(DPLL_PIN_CAPABILITIES_PRIORITY_CAN_CHANGE &
+ pin->prop->capabilities)) {
+ NL_SET_ERR_MSG(extack, "prio changing is not allowed");
+ return -EOPNOTSUPP;
+ }
+ ref = xa_load(&pin->dpll_refs, dpll->id);
+ ASSERT_NOT_NULL(ref);
+ ops = dpll_pin_ops(ref);
+ if (!ops->prio_set)
+ return -EOPNOTSUPP;
+ ret = ops->prio_set(pin, dpll_pin_on_dpll_priv(dpll, pin), dpll,
+ dpll_priv(dpll), prio, extack);
+ if (ret)
+ return ret;
+ __dpll_pin_change_ntf(pin);
+
+ return 0;
+}
+
+static int
+dpll_pin_direction_set(struct dpll_pin *pin, struct dpll_device *dpll,
+ enum dpll_pin_direction direction,
+ struct netlink_ext_ack *extack)
+{
+ const struct dpll_pin_ops *ops;
+ struct dpll_pin_ref *ref;
+ int ret;
+
+ if (!(DPLL_PIN_CAPABILITIES_DIRECTION_CAN_CHANGE &
+ pin->prop->capabilities)) {
+ NL_SET_ERR_MSG(extack, "direction changing is not allowed");
+ return -EOPNOTSUPP;
+ }
+ ref = xa_load(&pin->dpll_refs, dpll->id);
+ ASSERT_NOT_NULL(ref);
+ ops = dpll_pin_ops(ref);
+ if (!ops->direction_set)
+ return -EOPNOTSUPP;
+ ret = ops->direction_set(pin, dpll_pin_on_dpll_priv(dpll, pin),
+ dpll, dpll_priv(dpll), direction, extack);
+ if (ret)
+ return ret;
+ __dpll_pin_change_ntf(pin);
+
+ return 0;
+}
+
+static int
+dpll_pin_parent_device_set(struct dpll_pin *pin, struct nlattr *parent_nest,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[DPLL_A_PIN_MAX + 1];
+ enum dpll_pin_direction direction;
+ enum dpll_pin_state state;
+ struct dpll_pin_ref *ref;
+ struct dpll_device *dpll;
+ u32 pdpll_idx, prio;
+ int ret;
+
+ nla_parse_nested(tb, DPLL_A_PIN_MAX, parent_nest,
+ dpll_pin_parent_device_nl_policy, extack);
+ if (!tb[DPLL_A_PIN_PARENT_ID]) {
+ NL_SET_ERR_MSG(extack, "device parent id expected");
+ return -EINVAL;
+ }
+ pdpll_idx = nla_get_u32(tb[DPLL_A_PIN_PARENT_ID]);
+ dpll = xa_load(&dpll_device_xa, pdpll_idx);
+ if (!dpll) {
+ NL_SET_ERR_MSG(extack, "parent device not found");
+ return -EINVAL;
+ }
+ ref = xa_load(&pin->dpll_refs, dpll->id);
+ if (!ref) {
+ NL_SET_ERR_MSG(extack, "pin not connected to given parent device");
+ return -EINVAL;
+ }
+ if (tb[DPLL_A_PIN_STATE]) {
+ state = nla_get_u32(tb[DPLL_A_PIN_STATE]);
+ ret = dpll_pin_state_set(dpll, pin, state, extack);
+ if (ret)
+ return ret;
+ }
+ if (tb[DPLL_A_PIN_PRIO]) {
+ prio = nla_get_u32(tb[DPLL_A_PIN_PRIO]);
+ ret = dpll_pin_prio_set(dpll, pin, prio, extack);
+ if (ret)
+ return ret;
+ }
+ if (tb[DPLL_A_PIN_DIRECTION]) {
+ direction = nla_get_u32(tb[DPLL_A_PIN_DIRECTION]);
+ ret = dpll_pin_direction_set(pin, dpll, direction, extack);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+static int
+dpll_pin_parent_pin_set(struct dpll_pin *pin, struct nlattr *parent_nest,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[DPLL_A_PIN_MAX + 1];
+ enum dpll_pin_state state;
+ u32 ppin_idx;
+ int ret;
+
+ nla_parse_nested(tb, DPLL_A_PIN_MAX, parent_nest,
+ dpll_pin_parent_pin_nl_policy, extack);
+ if (!tb[DPLL_A_PIN_PARENT_ID]) {
+ NL_SET_ERR_MSG(extack, "device parent id expected");
+ return -EINVAL;
+ }
+ ppin_idx = nla_get_u32(tb[DPLL_A_PIN_PARENT_ID]);
+ state = nla_get_u32(tb[DPLL_A_PIN_STATE]);
+ ret = dpll_pin_on_pin_state_set(pin, ppin_idx, state, extack);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int
+dpll_pin_set_from_nlattr(struct dpll_pin *pin, struct genl_info *info)
+{
+ struct nlattr *a;
+ int rem, ret;
+
+ nla_for_each_attr(a, genlmsg_data(info->genlhdr),
+ genlmsg_len(info->genlhdr), rem) {
+ switch (nla_type(a)) {
+ case DPLL_A_PIN_FREQUENCY:
+ ret = dpll_pin_freq_set(pin, a, info->extack);
+ if (ret)
+ return ret;
+ break;
+ case DPLL_A_PIN_PARENT_DEVICE:
+ ret = dpll_pin_parent_device_set(pin, a, info->extack);
+ if (ret)
+ return ret;
+ break;
+ case DPLL_A_PIN_PARENT_PIN:
+ ret = dpll_pin_parent_pin_set(pin, a, info->extack);
+ if (ret)
+ return ret;
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static struct dpll_pin *
+dpll_pin_find(u64 clock_id, struct nlattr *mod_name_attr,
+ enum dpll_pin_type type, struct nlattr *board_label,
+ struct nlattr *panel_label, struct nlattr *package_label,
+ struct netlink_ext_ack *extack)
+{
+ bool board_match, panel_match, package_match;
+ struct dpll_pin *pin_match = NULL, *pin;
+ const struct dpll_pin_properties *prop;
+ bool cid_match, mod_match, type_match;
+ unsigned long i;
+
+ xa_for_each_marked(&dpll_pin_xa, i, pin, DPLL_REGISTERED) {
+ prop = pin->prop;
+ cid_match = clock_id ? pin->clock_id == clock_id : true;
+ mod_match = mod_name_attr && module_name(pin->module) ?
+ !nla_strcmp(mod_name_attr,
+ module_name(pin->module)) : true;
+ type_match = type ? prop->type == type : true;
+ board_match = board_label ? (prop->board_label ?
+ !nla_strcmp(board_label, prop->board_label) : false) :
+ true;
+ panel_match = panel_label ? (prop->panel_label ?
+ !nla_strcmp(panel_label, prop->panel_label) : false) :
+ true;
+ package_match = package_label ? (prop->package_label ?
+ !nla_strcmp(package_label, prop->package_label) :
+ false) : true;
+ if (cid_match && mod_match && type_match && board_match &&
+ panel_match && package_match) {
+ if (pin_match) {
+ NL_SET_ERR_MSG(extack, "multiple matches");
+ return ERR_PTR(-EINVAL);
+ }
+ pin_match = pin;
+ }
+ }
+ if (!pin_match) {
+ NL_SET_ERR_MSG(extack, "not found");
+ return ERR_PTR(-ENODEV);
+ }
+ return pin_match;
+}
+
+static struct dpll_pin *dpll_pin_find_from_nlattr(struct genl_info *info)
+{
+ struct nlattr *attr, *mod_name_attr = NULL, *board_label_attr = NULL,
+ *panel_label_attr = NULL, *package_label_attr = NULL;
+ enum dpll_pin_type type = 0;
+ u64 clock_id = 0;
+ int rem = 0;
+
+ nla_for_each_attr(attr, genlmsg_data(info->genlhdr),
+ genlmsg_len(info->genlhdr), rem) {
+ switch (nla_type(attr)) {
+ case DPLL_A_PIN_CLOCK_ID:
+ if (clock_id)
+ goto duplicated_attr;
+ clock_id = nla_get_u64(attr);
+ break;
+ case DPLL_A_PIN_MODULE_NAME:
+ if (mod_name_attr)
+ goto duplicated_attr;
+ mod_name_attr = attr;
+ break;
+ case DPLL_A_PIN_TYPE:
+ if (type)
+ goto duplicated_attr;
+ type = nla_get_u32(attr);
+ break;
+ case DPLL_A_PIN_BOARD_LABEL:
+ if (board_label_attr)
+ goto duplicated_attr;
+ board_label_attr = attr;
+ break;
+ case DPLL_A_PIN_PANEL_LABEL:
+ if (panel_label_attr)
+ goto duplicated_attr;
+ panel_label_attr = attr;
+ break;
+ case DPLL_A_PIN_PACKAGE_LABEL:
+ if (package_label_attr)
+ goto duplicated_attr;
+ package_label_attr = attr;
+ break;
+ default:
+ break;
+ }
+ }
+ if (!(clock_id || mod_name_attr || board_label_attr ||
+ panel_label_attr || package_label_attr)) {
+ NL_SET_ERR_MSG(info->extack, "missing attributes");
+ return ERR_PTR(-EINVAL);
+ }
+ return dpll_pin_find(clock_id, mod_name_attr, type, board_label_attr,
+ panel_label_attr, package_label_attr,
+ info->extack);
+duplicated_attr:
+ NL_SET_ERR_MSG(info->extack, "duplicated attribute");
+ return ERR_PTR(-EINVAL);
+}
+
+int dpll_nl_pin_id_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct dpll_pin *pin;
+ struct sk_buff *msg;
+ struct nlattr *hdr;
+ int ret;
+
+ msg = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+ hdr = genlmsg_put_reply(msg, info, &dpll_nl_family, 0,
+ DPLL_CMD_PIN_ID_GET);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ pin = dpll_pin_find_from_nlattr(info);
+ if (!IS_ERR(pin)) {
+ ret = dpll_msg_add_pin_handle(msg, pin);
+ if (ret) {
+ nlmsg_free(msg);
+ return ret;
+ }
+ }
+ genlmsg_end(msg, hdr);
+
+ return genlmsg_reply(msg, info);
+}
+
+int dpll_nl_pin_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct dpll_pin *pin = info->user_ptr[0];
+ struct sk_buff *msg;
+ struct nlattr *hdr;
+ int ret;
+
+ if (!pin)
+ return -ENODEV;
+ msg = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+ hdr = genlmsg_put_reply(msg, info, &dpll_nl_family, 0,
+ DPLL_CMD_PIN_GET);
+ if (!hdr)
+ return -EMSGSIZE;
+ ret = dpll_cmd_pin_get_one(msg, pin, info->extack);
+ if (ret) {
+ nlmsg_free(msg);
+ return ret;
+ }
+ genlmsg_end(msg, hdr);
+
+ return genlmsg_reply(msg, info);
+}
+
+int dpll_nl_pin_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct dpll_dump_ctx *ctx = dpll_dump_context(cb);
+ struct dpll_pin *pin;
+ struct nlattr *hdr;
+ unsigned long i;
+ int ret = 0;
+
+ xa_for_each_marked_start(&dpll_pin_xa, i, pin, DPLL_REGISTERED,
+ ctx->idx) {
+ hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ &dpll_nl_family, NLM_F_MULTI,
+ DPLL_CMD_PIN_GET);
+ if (!hdr) {
+ ret = -EMSGSIZE;
+ break;
+ }
+ ret = dpll_cmd_pin_get_one(skb, pin, cb->extack);
+ if (ret) {
+ genlmsg_cancel(skb, hdr);
+ break;
+ }
+ genlmsg_end(skb, hdr);
+ }
+ if (ret == -EMSGSIZE) {
+ ctx->idx = i;
+ return skb->len;
+ }
+ return ret;
+}
+
+int dpll_nl_pin_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct dpll_pin *pin = info->user_ptr[0];
+
+ return dpll_pin_set_from_nlattr(pin, info);
+}
+
+static struct dpll_device *
+dpll_device_find(u64 clock_id, struct nlattr *mod_name_attr,
+ enum dpll_type type, struct netlink_ext_ack *extack)
+{
+ struct dpll_device *dpll_match = NULL, *dpll;
+ bool cid_match, mod_match, type_match;
+ unsigned long i;
+
+ xa_for_each_marked(&dpll_device_xa, i, dpll, DPLL_REGISTERED) {
+ cid_match = clock_id ? dpll->clock_id == clock_id : true;
+ mod_match = mod_name_attr ? (module_name(dpll->module) ?
+ !nla_strcmp(mod_name_attr,
+ module_name(dpll->module)) : false) : true;
+ type_match = type ? dpll->type == type : true;
+ if (cid_match && mod_match && type_match) {
+ if (dpll_match) {
+ NL_SET_ERR_MSG(extack, "multiple matches");
+ return ERR_PTR(-EINVAL);
+ }
+ dpll_match = dpll;
+ }
+ }
+ if (!dpll_match) {
+ NL_SET_ERR_MSG(extack, "not found");
+ return ERR_PTR(-ENODEV);
+ }
+
+ return dpll_match;
+}
+
+static struct dpll_device *
+dpll_device_find_from_nlattr(struct genl_info *info)
+{
+ struct nlattr *attr, *mod_name_attr = NULL;
+ enum dpll_type type = 0;
+ u64 clock_id = 0;
+ int rem = 0;
+
+ nla_for_each_attr(attr, genlmsg_data(info->genlhdr),
+ genlmsg_len(info->genlhdr), rem) {
+ switch (nla_type(attr)) {
+ case DPLL_A_CLOCK_ID:
+ if (clock_id)
+ goto duplicated_attr;
+ clock_id = nla_get_u64(attr);
+ break;
+ case DPLL_A_MODULE_NAME:
+ if (mod_name_attr)
+ goto duplicated_attr;
+ mod_name_attr = attr;
+ break;
+ case DPLL_A_TYPE:
+ if (type)
+ goto duplicated_attr;
+ type = nla_get_u32(attr);
+ break;
+ default:
+ break;
+ }
+ }
+ if (!clock_id && !mod_name_attr && !type) {
+ NL_SET_ERR_MSG(info->extack, "missing attributes");
+ return ERR_PTR(-EINVAL);
+ }
+ return dpll_device_find(clock_id, mod_name_attr, type, info->extack);
+duplicated_attr:
+ NL_SET_ERR_MSG(info->extack, "duplicated attribute");
+ return ERR_PTR(-EINVAL);
+}
+
+int dpll_nl_device_id_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct dpll_device *dpll;
+ struct sk_buff *msg;
+ struct nlattr *hdr;
+ int ret;
+
+ msg = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+ hdr = genlmsg_put_reply(msg, info, &dpll_nl_family, 0,
+ DPLL_CMD_DEVICE_ID_GET);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ dpll = dpll_device_find_from_nlattr(info);
+ if (!IS_ERR(dpll)) {
+ ret = dpll_msg_add_dev_handle(msg, dpll);
+ if (ret) {
+ nlmsg_free(msg);
+ return ret;
+ }
+ }
+ genlmsg_end(msg, hdr);
+
+ return genlmsg_reply(msg, info);
+}
+
+int dpll_nl_device_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct dpll_device *dpll = info->user_ptr[0];
+ struct sk_buff *msg;
+ struct nlattr *hdr;
+ int ret;
+
+ msg = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+ hdr = genlmsg_put_reply(msg, info, &dpll_nl_family, 0,
+ DPLL_CMD_DEVICE_GET);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ ret = dpll_device_get_one(dpll, msg, info->extack);
+ if (ret) {
+ nlmsg_free(msg);
+ return ret;
+ }
+ genlmsg_end(msg, hdr);
+
+ return genlmsg_reply(msg, info);
+}
+
+int dpll_nl_device_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ /* placeholder for set command */
+ return 0;
+}
+
+int dpll_nl_device_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct dpll_dump_ctx *ctx = dpll_dump_context(cb);
+ struct dpll_device *dpll;
+ struct nlattr *hdr;
+ unsigned long i;
+ int ret = 0;
+
+ xa_for_each_marked_start(&dpll_device_xa, i, dpll, DPLL_REGISTERED,
+ ctx->idx) {
+ hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, &dpll_nl_family,
+ NLM_F_MULTI, DPLL_CMD_DEVICE_GET);
+ if (!hdr) {
+ ret = -EMSGSIZE;
+ break;
+ }
+ ret = dpll_device_get_one(dpll, skb, cb->extack);
+ if (ret) {
+ genlmsg_cancel(skb, hdr);
+ break;
+ }
+ genlmsg_end(skb, hdr);
+ }
+ if (ret == -EMSGSIZE) {
+ ctx->idx = i;
+ return skb->len;
+ }
+ return ret;
+}
+
+int dpll_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
+ struct genl_info *info)
+{
+ u32 id;
+
+ if (GENL_REQ_ATTR_CHECK(info, DPLL_A_ID))
+ return -EINVAL;
+
+ mutex_lock(&dpll_lock);
+ id = nla_get_u32(info->attrs[DPLL_A_ID]);
+ info->user_ptr[0] = dpll_device_get_by_id(id);
+ if (!info->user_ptr[0]) {
+ NL_SET_ERR_MSG(info->extack, "device not found");
+ goto unlock;
+ }
+ return 0;
+unlock:
+ mutex_unlock(&dpll_lock);
+ return -ENODEV;
+}
+
+void dpll_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
+ struct genl_info *info)
+{
+ mutex_unlock(&dpll_lock);
+}
+
+int
+dpll_lock_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
+ struct genl_info *info)
+{
+ mutex_lock(&dpll_lock);
+
+ return 0;
+}
+
+void
+dpll_unlock_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
+ struct genl_info *info)
+{
+ mutex_unlock(&dpll_lock);
+}
+
+int dpll_lock_dumpit(struct netlink_callback *cb)
+{
+ mutex_lock(&dpll_lock);
+
+ return 0;
+}
+
+int dpll_unlock_dumpit(struct netlink_callback *cb)
+{
+ mutex_unlock(&dpll_lock);
+
+ return 0;
+}
+
+int dpll_pin_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
+ struct genl_info *info)
+{
+ int ret;
+
+ mutex_lock(&dpll_lock);
+ if (GENL_REQ_ATTR_CHECK(info, DPLL_A_PIN_ID)) {
+ ret = -EINVAL;
+ goto unlock_dev;
+ }
+ info->user_ptr[0] = xa_load(&dpll_pin_xa,
+ nla_get_u32(info->attrs[DPLL_A_PIN_ID]));
+ if (!info->user_ptr[0]) {
+ NL_SET_ERR_MSG(info->extack, "pin not found");
+ ret = -ENODEV;
+ goto unlock_dev;
+ }
+
+ return 0;
+
+unlock_dev:
+ mutex_unlock(&dpll_lock);
+ return ret;
+}
+
+void dpll_pin_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
+ struct genl_info *info)
+{
+ mutex_unlock(&dpll_lock);
+}
diff --git a/drivers/dpll/dpll_netlink.h b/drivers/dpll/dpll_netlink.h
new file mode 100644
index 000000000000..a9cfd55f57fc
--- /dev/null
+++ b/drivers/dpll/dpll_netlink.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates
+ * Copyright (c) 2023 Intel and affiliates
+ */
+
+int dpll_device_create_ntf(struct dpll_device *dpll);
+
+int dpll_device_delete_ntf(struct dpll_device *dpll);
+
+int dpll_pin_create_ntf(struct dpll_pin *pin);
+
+int dpll_pin_delete_ntf(struct dpll_pin *pin);
diff --git a/drivers/dpll/dpll_nl.c b/drivers/dpll/dpll_nl.c
new file mode 100644
index 000000000000..14064c8c783b
--- /dev/null
+++ b/drivers/dpll/dpll_nl.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/dpll.yaml */
+/* YNL-GEN kernel source */
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include "dpll_nl.h"
+
+#include <uapi/linux/dpll.h>
+
+/* Common nested types */
+const struct nla_policy dpll_pin_parent_device_nl_policy[DPLL_A_PIN_STATE + 1] = {
+ [DPLL_A_PIN_PARENT_ID] = { .type = NLA_U32, },
+ [DPLL_A_PIN_DIRECTION] = NLA_POLICY_RANGE(NLA_U32, 1, 2),
+ [DPLL_A_PIN_PRIO] = { .type = NLA_U32, },
+ [DPLL_A_PIN_STATE] = NLA_POLICY_RANGE(NLA_U32, 1, 3),
+};
+
+const struct nla_policy dpll_pin_parent_pin_nl_policy[DPLL_A_PIN_STATE + 1] = {
+ [DPLL_A_PIN_PARENT_ID] = { .type = NLA_U32, },
+ [DPLL_A_PIN_STATE] = NLA_POLICY_RANGE(NLA_U32, 1, 3),
+};
+
+/* DPLL_CMD_DEVICE_ID_GET - do */
+static const struct nla_policy dpll_device_id_get_nl_policy[DPLL_A_TYPE + 1] = {
+ [DPLL_A_MODULE_NAME] = { .type = NLA_NUL_STRING, },
+ [DPLL_A_CLOCK_ID] = { .type = NLA_U64, },
+ [DPLL_A_TYPE] = NLA_POLICY_RANGE(NLA_U32, 1, 2),
+};
+
+/* DPLL_CMD_DEVICE_GET - do */
+static const struct nla_policy dpll_device_get_nl_policy[DPLL_A_ID + 1] = {
+ [DPLL_A_ID] = { .type = NLA_U32, },
+};
+
+/* DPLL_CMD_DEVICE_SET - do */
+static const struct nla_policy dpll_device_set_nl_policy[DPLL_A_ID + 1] = {
+ [DPLL_A_ID] = { .type = NLA_U32, },
+};
+
+/* DPLL_CMD_PIN_ID_GET - do */
+static const struct nla_policy dpll_pin_id_get_nl_policy[DPLL_A_PIN_TYPE + 1] = {
+ [DPLL_A_PIN_MODULE_NAME] = { .type = NLA_NUL_STRING, },
+ [DPLL_A_PIN_CLOCK_ID] = { .type = NLA_U64, },
+ [DPLL_A_PIN_BOARD_LABEL] = { .type = NLA_NUL_STRING, },
+ [DPLL_A_PIN_PANEL_LABEL] = { .type = NLA_NUL_STRING, },
+ [DPLL_A_PIN_PACKAGE_LABEL] = { .type = NLA_NUL_STRING, },
+ [DPLL_A_PIN_TYPE] = NLA_POLICY_RANGE(NLA_U32, 1, 5),
+};
+
+/* DPLL_CMD_PIN_GET - do */
+static const struct nla_policy dpll_pin_get_do_nl_policy[DPLL_A_PIN_ID + 1] = {
+ [DPLL_A_PIN_ID] = { .type = NLA_U32, },
+};
+
+/* DPLL_CMD_PIN_GET - dump */
+static const struct nla_policy dpll_pin_get_dump_nl_policy[DPLL_A_PIN_ID + 1] = {
+ [DPLL_A_PIN_ID] = { .type = NLA_U32, },
+};
+
+/* DPLL_CMD_PIN_SET - do */
+static const struct nla_policy dpll_pin_set_nl_policy[DPLL_A_PIN_PARENT_PIN + 1] = {
+ [DPLL_A_PIN_ID] = { .type = NLA_U32, },
+ [DPLL_A_PIN_FREQUENCY] = { .type = NLA_U64, },
+ [DPLL_A_PIN_DIRECTION] = NLA_POLICY_RANGE(NLA_U32, 1, 2),
+ [DPLL_A_PIN_PRIO] = { .type = NLA_U32, },
+ [DPLL_A_PIN_STATE] = NLA_POLICY_RANGE(NLA_U32, 1, 3),
+ [DPLL_A_PIN_PARENT_DEVICE] = NLA_POLICY_NESTED(dpll_pin_parent_device_nl_policy),
+ [DPLL_A_PIN_PARENT_PIN] = NLA_POLICY_NESTED(dpll_pin_parent_pin_nl_policy),
+};
+
+/* Ops table for dpll */
+static const struct genl_split_ops dpll_nl_ops[] = {
+ {
+ .cmd = DPLL_CMD_DEVICE_ID_GET,
+ .pre_doit = dpll_lock_doit,
+ .doit = dpll_nl_device_id_get_doit,
+ .post_doit = dpll_unlock_doit,
+ .policy = dpll_device_id_get_nl_policy,
+ .maxattr = DPLL_A_TYPE,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DPLL_CMD_DEVICE_GET,
+ .pre_doit = dpll_pre_doit,
+ .doit = dpll_nl_device_get_doit,
+ .post_doit = dpll_post_doit,
+ .policy = dpll_device_get_nl_policy,
+ .maxattr = DPLL_A_ID,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DPLL_CMD_DEVICE_GET,
+ .start = dpll_lock_dumpit,
+ .dumpit = dpll_nl_device_get_dumpit,
+ .done = dpll_unlock_dumpit,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DPLL_CMD_DEVICE_SET,
+ .pre_doit = dpll_pre_doit,
+ .doit = dpll_nl_device_set_doit,
+ .post_doit = dpll_post_doit,
+ .policy = dpll_device_set_nl_policy,
+ .maxattr = DPLL_A_ID,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DPLL_CMD_PIN_ID_GET,
+ .pre_doit = dpll_lock_doit,
+ .doit = dpll_nl_pin_id_get_doit,
+ .post_doit = dpll_unlock_doit,
+ .policy = dpll_pin_id_get_nl_policy,
+ .maxattr = DPLL_A_PIN_TYPE,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DPLL_CMD_PIN_GET,
+ .pre_doit = dpll_pin_pre_doit,
+ .doit = dpll_nl_pin_get_doit,
+ .post_doit = dpll_pin_post_doit,
+ .policy = dpll_pin_get_do_nl_policy,
+ .maxattr = DPLL_A_PIN_ID,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DPLL_CMD_PIN_GET,
+ .start = dpll_lock_dumpit,
+ .dumpit = dpll_nl_pin_get_dumpit,
+ .done = dpll_unlock_dumpit,
+ .policy = dpll_pin_get_dump_nl_policy,
+ .maxattr = DPLL_A_PIN_ID,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DPLL_CMD_PIN_SET,
+ .pre_doit = dpll_pin_pre_doit,
+ .doit = dpll_nl_pin_set_doit,
+ .post_doit = dpll_pin_post_doit,
+ .policy = dpll_pin_set_nl_policy,
+ .maxattr = DPLL_A_PIN_PARENT_PIN,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+};
+
+static const struct genl_multicast_group dpll_nl_mcgrps[] = {
+ [DPLL_NLGRP_MONITOR] = { "monitor", },
+};
+
+struct genl_family dpll_nl_family __ro_after_init = {
+ .name = DPLL_FAMILY_NAME,
+ .version = DPLL_FAMILY_VERSION,
+ .netnsok = true,
+ .parallel_ops = true,
+ .module = THIS_MODULE,
+ .split_ops = dpll_nl_ops,
+ .n_split_ops = ARRAY_SIZE(dpll_nl_ops),
+ .mcgrps = dpll_nl_mcgrps,
+ .n_mcgrps = ARRAY_SIZE(dpll_nl_mcgrps),
+};
diff --git a/drivers/dpll/dpll_nl.h b/drivers/dpll/dpll_nl.h
new file mode 100644
index 000000000000..1f67aaed4742
--- /dev/null
+++ b/drivers/dpll/dpll_nl.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/dpll.yaml */
+/* YNL-GEN kernel header */
+
+#ifndef _LINUX_DPLL_GEN_H
+#define _LINUX_DPLL_GEN_H
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include <uapi/linux/dpll.h>
+
+/* Common nested types */
+extern const struct nla_policy dpll_pin_parent_device_nl_policy[DPLL_A_PIN_STATE + 1];
+extern const struct nla_policy dpll_pin_parent_pin_nl_policy[DPLL_A_PIN_STATE + 1];
+
+int dpll_lock_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
+ struct genl_info *info);
+int dpll_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
+ struct genl_info *info);
+int dpll_pin_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
+ struct genl_info *info);
+void
+dpll_unlock_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
+ struct genl_info *info);
+void
+dpll_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
+ struct genl_info *info);
+void
+dpll_pin_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
+ struct genl_info *info);
+int dpll_lock_dumpit(struct netlink_callback *cb);
+int dpll_unlock_dumpit(struct netlink_callback *cb);
+
+int dpll_nl_device_id_get_doit(struct sk_buff *skb, struct genl_info *info);
+int dpll_nl_device_get_doit(struct sk_buff *skb, struct genl_info *info);
+int dpll_nl_device_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
+int dpll_nl_device_set_doit(struct sk_buff *skb, struct genl_info *info);
+int dpll_nl_pin_id_get_doit(struct sk_buff *skb, struct genl_info *info);
+int dpll_nl_pin_get_doit(struct sk_buff *skb, struct genl_info *info);
+int dpll_nl_pin_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
+int dpll_nl_pin_set_doit(struct sk_buff *skb, struct genl_info *info);
+
+enum {
+ DPLL_NLGRP_MONITOR,
+};
+
+extern struct genl_family dpll_nl_family;
+
+#endif /* _LINUX_DPLL_GEN_H */
diff --git a/drivers/net/dsa/b53/b53_mmap.c b/drivers/net/dsa/b53/b53_mmap.c
index 5e39641ea887..3a89349dc918 100644
--- a/drivers/net/dsa/b53/b53_mmap.c
+++ b/drivers/net/dsa/b53/b53_mmap.c
@@ -324,14 +324,12 @@ static int b53_mmap_probe(struct platform_device *pdev)
return b53_switch_register(dev);
}
-static int b53_mmap_remove(struct platform_device *pdev)
+static void b53_mmap_remove(struct platform_device *pdev)
{
struct b53_device *dev = platform_get_drvdata(pdev);
if (dev)
b53_switch_remove(dev);
-
- return 0;
}
static void b53_mmap_shutdown(struct platform_device *pdev)
@@ -372,7 +370,7 @@ MODULE_DEVICE_TABLE(of, b53_mmap_of_table);
static struct platform_driver b53_mmap_driver = {
.probe = b53_mmap_probe,
- .remove = b53_mmap_remove,
+ .remove_new = b53_mmap_remove,
.shutdown = b53_mmap_shutdown,
.driver = {
.name = "b53-switch",
diff --git a/drivers/net/dsa/b53/b53_srab.c b/drivers/net/dsa/b53/b53_srab.c
index bcb44034404d..f3f95332ff17 100644
--- a/drivers/net/dsa/b53/b53_srab.c
+++ b/drivers/net/dsa/b53/b53_srab.c
@@ -657,17 +657,15 @@ static int b53_srab_probe(struct platform_device *pdev)
return b53_switch_register(dev);
}
-static int b53_srab_remove(struct platform_device *pdev)
+static void b53_srab_remove(struct platform_device *pdev)
{
struct b53_device *dev = platform_get_drvdata(pdev);
if (!dev)
- return 0;
+ return;
b53_srab_intr_set(dev->priv, false);
b53_switch_remove(dev);
-
- return 0;
}
static void b53_srab_shutdown(struct platform_device *pdev)
@@ -684,7 +682,7 @@ static void b53_srab_shutdown(struct platform_device *pdev)
static struct platform_driver b53_srab_driver = {
.probe = b53_srab_probe,
- .remove = b53_srab_remove,
+ .remove_new = b53_srab_remove,
.shutdown = b53_srab_shutdown,
.driver = {
.name = "b53-srab-switch",
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index cd1f240c90f3..326937e91f52 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -1543,12 +1543,12 @@ out_clk:
return ret;
}
-static int bcm_sf2_sw_remove(struct platform_device *pdev)
+static void bcm_sf2_sw_remove(struct platform_device *pdev)
{
struct bcm_sf2_priv *priv = platform_get_drvdata(pdev);
if (!priv)
- return 0;
+ return;
priv->wol_ports_mask = 0;
/* Disable interrupts */
@@ -1560,8 +1560,6 @@ static int bcm_sf2_sw_remove(struct platform_device *pdev)
clk_disable_unprepare(priv->clk);
if (priv->type == BCM7278_DEVICE_ID)
reset_control_assert(priv->rcdev);
-
- return 0;
}
static void bcm_sf2_sw_shutdown(struct platform_device *pdev)
@@ -1607,7 +1605,7 @@ static SIMPLE_DEV_PM_OPS(bcm_sf2_pm_ops,
static struct platform_driver bcm_sf2_driver = {
.probe = bcm_sf2_sw_probe,
- .remove = bcm_sf2_sw_remove,
+ .remove_new = bcm_sf2_sw_remove,
.shutdown = bcm_sf2_sw_shutdown,
.driver = {
.name = "brcm-sf2",
diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c
index 11ef1d7ea229..beda1e9d350f 100644
--- a/drivers/net/dsa/hirschmann/hellcreek.c
+++ b/drivers/net/dsa/hirschmann/hellcreek.c
@@ -2060,18 +2060,16 @@ err_ptp_setup:
return ret;
}
-static int hellcreek_remove(struct platform_device *pdev)
+static void hellcreek_remove(struct platform_device *pdev)
{
struct hellcreek *hellcreek = platform_get_drvdata(pdev);
if (!hellcreek)
- return 0;
+ return;
hellcreek_hwtstamp_free(hellcreek);
hellcreek_ptp_free(hellcreek);
dsa_unregister_switch(hellcreek->ds);
-
- return 0;
}
static void hellcreek_shutdown(struct platform_device *pdev)
@@ -2107,7 +2105,7 @@ MODULE_DEVICE_TABLE(of, hellcreek_of_match);
static struct platform_driver hellcreek_driver = {
.probe = hellcreek_probe,
- .remove = hellcreek_remove,
+ .remove_new = hellcreek_remove,
.shutdown = hellcreek_shutdown,
.driver = {
.name = "hellcreek",
diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c
index 3c76a1a14aee..25abf2caf5fb 100644
--- a/drivers/net/dsa/lantiq_gswip.c
+++ b/drivers/net/dsa/lantiq_gswip.c
@@ -2207,13 +2207,13 @@ put_mdio_node:
return err;
}
-static int gswip_remove(struct platform_device *pdev)
+static void gswip_remove(struct platform_device *pdev)
{
struct gswip_priv *priv = platform_get_drvdata(pdev);
int i;
if (!priv)
- return 0;
+ return;
/* disable the switch */
gswip_mdio_mask(priv, GSWIP_MDIO_GLOB_ENABLE, 0, GSWIP_MDIO_GLOB);
@@ -2228,8 +2228,6 @@ static int gswip_remove(struct platform_device *pdev)
for (i = 0; i < priv->num_gphy_fw; i++)
gswip_gphy_fw_remove(priv, &priv->gphy_fw[i]);
-
- return 0;
}
static void gswip_shutdown(struct platform_device *pdev)
@@ -2266,7 +2264,7 @@ MODULE_DEVICE_TABLE(of, gswip_of_match);
static struct platform_driver gswip_driver = {
.probe = gswip_probe,
- .remove = gswip_remove,
+ .remove_new = gswip_remove,
.shutdown = gswip_shutdown,
.driver = {
.name = "gswip",
diff --git a/drivers/net/dsa/microchip/Makefile b/drivers/net/dsa/microchip/Makefile
index 48360cc9fc68..49459a50dbc8 100644
--- a/drivers/net/dsa/microchip/Makefile
+++ b/drivers/net/dsa/microchip/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_NET_DSA_MICROCHIP_KSZ_COMMON) += ksz_switch.o
ksz_switch-objs := ksz_common.o
-ksz_switch-objs += ksz9477.o
+ksz_switch-objs += ksz9477.o ksz9477_acl.o ksz9477_tc_flower.o
ksz_switch-objs += ksz8795.o
ksz_switch-objs += lan937x_main.o
diff --git a/drivers/net/dsa/microchip/ksz8795_reg.h b/drivers/net/dsa/microchip/ksz8795_reg.h
index 7a57c6088f80..d33db4f86c64 100644
--- a/drivers/net/dsa/microchip/ksz8795_reg.h
+++ b/drivers/net/dsa/microchip/ksz8795_reg.h
@@ -442,20 +442,6 @@
#define TOS_PRIO_M KS_PRIO_M
#define TOS_PRIO_S KS_PRIO_S
-#define REG_SW_CTRL_20 0xA3
-
-#define SW_GMII_DRIVE_STRENGTH_S 4
-#define SW_DRIVE_STRENGTH_M 0x7
-#define SW_DRIVE_STRENGTH_2MA 0
-#define SW_DRIVE_STRENGTH_4MA 1
-#define SW_DRIVE_STRENGTH_8MA 2
-#define SW_DRIVE_STRENGTH_12MA 3
-#define SW_DRIVE_STRENGTH_16MA 4
-#define SW_DRIVE_STRENGTH_20MA 5
-#define SW_DRIVE_STRENGTH_24MA 6
-#define SW_DRIVE_STRENGTH_28MA 7
-#define SW_MII_DRIVE_STRENGTH_S 0
-
#define REG_SW_CTRL_21 0xA4
#define SW_IPV6_MLD_OPTION BIT(3)
diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c
index 83b7f2d5c1ea..9e63ed820c92 100644
--- a/drivers/net/dsa/microchip/ksz9477.c
+++ b/drivers/net/dsa/microchip/ksz9477.c
@@ -1004,6 +1004,8 @@ void ksz9477_port_setup(struct ksz_device *dev, int port, bool cpu_port)
/* clear pending interrupts */
if (dev->info->internal_phy[port])
ksz_pread16(dev, port, REG_PORT_PHY_INT_ENABLE, &data16);
+
+ ksz9477_port_acl_init(dev, port);
}
void ksz9477_config_cpu_port(struct dsa_switch *ds)
diff --git a/drivers/net/dsa/microchip/ksz9477.h b/drivers/net/dsa/microchip/ksz9477.h
index a6f425866a29..d7bbd9bd8893 100644
--- a/drivers/net/dsa/microchip/ksz9477.h
+++ b/drivers/net/dsa/microchip/ksz9477.h
@@ -57,4 +57,40 @@ int ksz9477_switch_init(struct ksz_device *dev);
void ksz9477_switch_exit(struct ksz_device *dev);
void ksz9477_port_queue_split(struct ksz_device *dev, int port);
+int ksz9477_port_acl_init(struct ksz_device *dev, int port);
+void ksz9477_port_acl_free(struct ksz_device *dev, int port);
+int ksz9477_cls_flower_add(struct dsa_switch *ds, int port,
+ struct flow_cls_offload *cls, bool ingress);
+int ksz9477_cls_flower_del(struct dsa_switch *ds, int port,
+ struct flow_cls_offload *cls, bool ingress);
+
+#define KSZ9477_ACL_ENTRY_SIZE 18
+#define KSZ9477_ACL_MAX_ENTRIES 16
+
+struct ksz9477_acl_entry {
+ u8 entry[KSZ9477_ACL_ENTRY_SIZE];
+ unsigned long cookie;
+ u32 prio;
+};
+
+struct ksz9477_acl_entries {
+ struct ksz9477_acl_entry entries[KSZ9477_ACL_MAX_ENTRIES];
+ int entries_count;
+};
+
+struct ksz9477_acl_priv {
+ struct ksz9477_acl_entries acles;
+};
+
+void ksz9477_acl_remove_entries(struct ksz_device *dev, int port,
+ struct ksz9477_acl_entries *acles,
+ unsigned long cookie);
+int ksz9477_acl_write_list(struct ksz_device *dev, int port);
+int ksz9477_sort_acl_entries(struct ksz_device *dev, int port);
+void ksz9477_acl_action_rule_cfg(u8 *entry, bool force_prio, u8 prio_val);
+void ksz9477_acl_processing_rule_set_action(u8 *entry, u8 action_idx);
+void ksz9477_acl_match_process_l2(struct ksz_device *dev, int port,
+ u16 ethtype, u8 *src_mac, u8 *dst_mac,
+ unsigned long cookie, u32 prio);
+
#endif
diff --git a/drivers/net/dsa/microchip/ksz9477_acl.c b/drivers/net/dsa/microchip/ksz9477_acl.c
new file mode 100644
index 000000000000..06d74c19eb94
--- /dev/null
+++ b/drivers/net/dsa/microchip/ksz9477_acl.c
@@ -0,0 +1,1436 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2023 Pengutronix, Oleksij Rempel <kernel@pengutronix.de>
+
+/* Access Control List (ACL) structure:
+ *
+ * There are multiple groups of registers involved in ACL configuration:
+ *
+ * - Matching Rules: These registers define the criteria for matching incoming
+ * packets based on their header information (Layer 2 MAC, Layer 3 IP, or
+ * Layer 4 TCP/UDP). Different register settings are used depending on the
+ * matching rule mode (MD) and the Enable (ENB) settings.
+ *
+ * - Action Rules: These registers define how the ACL should modify the packet's
+ * priority, VLAN tag priority, and forwarding map once a matching rule has
+ * been triggered. The settings vary depending on whether the matching rule is
+ * in Count Mode (MD = 01 and ENB = 00) or not.
+ *
+ * - Processing Rules: These registers control the overall behavior of the ACL,
+ * such as selecting which matching rule to apply first, enabling/disabling
+ * specific rules, or specifying actions for matched packets.
+ *
+ * ACL Structure:
+ * +----------------------+
+ * +----------------------+ | (optional) |
+ * | Matching Rules | | Matching Rules |
+ * | (Layer 2, 3, 4) | | (Layer 2, 3, 4) |
+ * +----------------------+ +----------------------+
+ * | |
+ * \___________________________/
+ * v
+ * +----------------------+
+ * | Processing Rules |
+ * | (action idx, |
+ * | matching rule set) |
+ * +----------------------+
+ * |
+ * v
+ * +----------------------+
+ * | Action Rules |
+ * | (Modify Priority, |
+ * | Forwarding Map, |
+ * | VLAN tag, etc) |
+ * +----------------------+
+ */
+
+#include <linux/bitops.h>
+
+#include "ksz9477.h"
+#include "ksz9477_reg.h"
+#include "ksz_common.h"
+
+#define KSZ9477_PORT_ACL_0 0x600
+
+enum ksz9477_acl_port_access {
+ KSZ9477_ACL_PORT_ACCESS_0 = 0x00,
+ KSZ9477_ACL_PORT_ACCESS_1 = 0x01,
+ KSZ9477_ACL_PORT_ACCESS_2 = 0x02,
+ KSZ9477_ACL_PORT_ACCESS_3 = 0x03,
+ KSZ9477_ACL_PORT_ACCESS_4 = 0x04,
+ KSZ9477_ACL_PORT_ACCESS_5 = 0x05,
+ KSZ9477_ACL_PORT_ACCESS_6 = 0x06,
+ KSZ9477_ACL_PORT_ACCESS_7 = 0x07,
+ KSZ9477_ACL_PORT_ACCESS_8 = 0x08,
+ KSZ9477_ACL_PORT_ACCESS_9 = 0x09,
+ KSZ9477_ACL_PORT_ACCESS_A = 0x0A,
+ KSZ9477_ACL_PORT_ACCESS_B = 0x0B,
+ KSZ9477_ACL_PORT_ACCESS_C = 0x0C,
+ KSZ9477_ACL_PORT_ACCESS_D = 0x0D,
+ KSZ9477_ACL_PORT_ACCESS_E = 0x0E,
+ KSZ9477_ACL_PORT_ACCESS_F = 0x0F,
+ KSZ9477_ACL_PORT_ACCESS_10 = 0x10,
+ KSZ9477_ACL_PORT_ACCESS_11 = 0x11
+};
+
+#define KSZ9477_ACL_MD_MASK GENMASK(5, 4)
+#define KSZ9477_ACL_MD_DISABLE 0
+#define KSZ9477_ACL_MD_L2_MAC 1
+#define KSZ9477_ACL_MD_L3_IP 2
+#define KSZ9477_ACL_MD_L4_TCP_UDP 3
+
+#define KSZ9477_ACL_ENB_MASK GENMASK(3, 2)
+#define KSZ9477_ACL_ENB_L2_COUNTER 0
+#define KSZ9477_ACL_ENB_L2_TYPE 1
+#define KSZ9477_ACL_ENB_L2_MAC 2
+#define KSZ9477_ACL_ENB_L2_MAC_TYPE 3
+
+/* only IPv4 src or dst can be used with mask */
+#define KSZ9477_ACL_ENB_L3_IPV4_ADDR_MASK 1
+/* only IPv4 src and dst can be used without mask */
+#define KSZ9477_ACL_ENB_L3_IPV4_ADDR_SRC_DST 2
+
+#define KSZ9477_ACL_ENB_L4_IP_PROTO 0
+#define KSZ9477_ACL_ENB_L4_TCP_SRC_DST_PORT 1
+#define KSZ9477_ACL_ENB_L4_UDP_SRC_DST_PORT 2
+#define KSZ9477_ACL_ENB_L4_TCP_SEQ_NUMBER 3
+
+#define KSZ9477_ACL_SD_SRC BIT(1)
+#define KSZ9477_ACL_SD_DST 0
+#define KSZ9477_ACL_EQ_EQUAL BIT(0)
+#define KSZ9477_ACL_EQ_NOT_EQUAL 0
+
+#define KSZ9477_ACL_PM_M GENMASK(7, 6)
+#define KSZ9477_ACL_PM_DISABLE 0
+#define KSZ9477_ACL_PM_HIGHER 1
+#define KSZ9477_ACL_PM_LOWER 2
+#define KSZ9477_ACL_PM_REPLACE 3
+#define KSZ9477_ACL_P_M GENMASK(5, 3)
+
+#define KSZ9477_PORT_ACL_CTRL_0 0x0612
+
+#define KSZ9477_ACL_WRITE_DONE BIT(6)
+#define KSZ9477_ACL_READ_DONE BIT(5)
+#define KSZ9477_ACL_WRITE BIT(4)
+#define KSZ9477_ACL_INDEX_M GENMASK(3, 0)
+
+/**
+ * ksz9477_dump_acl_index - Print the ACL entry at the specified index
+ *
+ * @dev: Pointer to the ksz9477 device structure.
+ * @acle: Pointer to the ACL entry array.
+ * @index: The index of the ACL entry to print.
+ *
+ * This function prints the details of an ACL entry, located at a particular
+ * index within the ksz9477 device's ACL table. It omits printing entries that
+ * are empty.
+ *
+ * Return: 1 if the entry is non-empty and printed, 0 otherwise.
+ */
+static int ksz9477_dump_acl_index(struct ksz_device *dev,
+ struct ksz9477_acl_entry *acle, int index)
+{
+ bool empty = true;
+ char buf[64];
+ u8 *entry;
+ int i;
+
+ entry = &acle[index].entry[0];
+ for (i = 0; i <= KSZ9477_ACL_PORT_ACCESS_11; i++) {
+ if (entry[i])
+ empty = false;
+
+ sprintf(buf + (i * 3), "%02x ", entry[i]);
+ }
+
+ /* no need to print empty entries */
+ if (empty)
+ return 0;
+
+ dev_err(dev->dev, " Entry %02d, prio: %02d : %s", index,
+ acle[index].prio, buf);
+
+ return 1;
+}
+
+/**
+ * ksz9477_dump_acl - Print ACL entries
+ *
+ * @dev: Pointer to the device structure.
+ * @acle: Pointer to the ACL entry array.
+ */
+static void ksz9477_dump_acl(struct ksz_device *dev,
+ struct ksz9477_acl_entry *acle)
+{
+ int count = 0;
+ int i;
+
+ for (i = 0; i < KSZ9477_ACL_MAX_ENTRIES; i++)
+ count += ksz9477_dump_acl_index(dev, acle, i);
+
+ if (count != KSZ9477_ACL_MAX_ENTRIES - 1)
+ dev_err(dev->dev, " Empty ACL entries were skipped\n");
+}
+
+/**
+ * ksz9477_acl_is_valid_matching_rule - Check if an ACL entry contains a valid
+ * matching rule.
+ *
+ * @entry: Pointer to ACL entry buffer
+ *
+ * This function checks if the given ACL entry buffer contains a valid
+ * matching rule by inspecting the Mode (MD) and Enable (ENB) fields.
+ *
+ * Returns: True if it's a valid matching rule, false otherwise.
+ */
+static bool ksz9477_acl_is_valid_matching_rule(u8 *entry)
+{
+ u8 val1, md, enb;
+
+ val1 = entry[KSZ9477_ACL_PORT_ACCESS_1];
+
+ md = FIELD_GET(KSZ9477_ACL_MD_MASK, val1);
+ if (md == KSZ9477_ACL_MD_DISABLE)
+ return false;
+
+ if (md == KSZ9477_ACL_MD_L2_MAC) {
+ /* L2 counter is not support, so it is not valid rule for now */
+ enb = FIELD_GET(KSZ9477_ACL_ENB_MASK, val1);
+ if (enb == KSZ9477_ACL_ENB_L2_COUNTER)
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * ksz9477_acl_get_cont_entr - Get count of contiguous ACL entries and validate
+ * the matching rules.
+ * @dev: Pointer to the KSZ9477 device structure.
+ * @port: Port number.
+ * @index: Index of the starting ACL entry.
+ *
+ * Based on the KSZ9477 switch's Access Control List (ACL) system, the RuleSet
+ * in an ACL entry indicates which entries contain Matching rules linked to it.
+ * This RuleSet is represented by two registers: KSZ9477_ACL_PORT_ACCESS_E and
+ * KSZ9477_ACL_PORT_ACCESS_F. Each bit set in these registers corresponds to
+ * an entry containing a Matching rule for this RuleSet.
+ *
+ * For a single Matching rule linked, only one bit is set. However, when an
+ * entry links multiple Matching rules, forming what's termed a 'complex rule',
+ * multiple bits are set in these registers.
+ *
+ * This function checks that, for complex rules, the entries containing the
+ * linked Matching rules are contiguous in terms of their indices. It calculates
+ * and returns the number of these contiguous entries.
+ *
+ * Returns:
+ * - 0 if the entry is empty and can be safely overwritten
+ * - 1 if the entry represents a simple rule
+ * - The number of contiguous entries if it is the root entry of a complex
+ * rule
+ * - -ENOTEMPTY if the entry is part of a complex rule but not the root
+ * entry
+ * - -EINVAL if the validation fails
+ */
+static int ksz9477_acl_get_cont_entr(struct ksz_device *dev, int port,
+ int index)
+{
+ struct ksz9477_acl_priv *acl = dev->ports[port].acl_priv;
+ struct ksz9477_acl_entries *acles = &acl->acles;
+ int start_idx, end_idx, contiguous_count;
+ unsigned long val;
+ u8 vale, valf;
+ u8 *entry;
+ int i;
+
+ entry = &acles->entries[index].entry[0];
+ vale = entry[KSZ9477_ACL_PORT_ACCESS_E];
+ valf = entry[KSZ9477_ACL_PORT_ACCESS_F];
+
+ val = (vale << 8) | valf;
+
+ /* If no bits are set, return an appropriate value or error */
+ if (!val) {
+ if (ksz9477_acl_is_valid_matching_rule(entry)) {
+ /* Looks like we are about to corrupt some complex rule.
+ * Do not print an error here, as this is a normal case
+ * when we are trying to find a free or starting entry.
+ */
+ dev_dbg(dev->dev, "ACL: entry %d starting with a valid matching rule, but no bits set in RuleSet\n",
+ index);
+ return -ENOTEMPTY;
+ }
+
+ /* This entry does not contain a valid matching rule */
+ return 0;
+ }
+
+ start_idx = find_first_bit((unsigned long *)&val, 16);
+ end_idx = find_last_bit((unsigned long *)&val, 16);
+
+ /* Calculate the contiguous count */
+ contiguous_count = end_idx - start_idx + 1;
+
+ /* Check if the number of bits set in val matches our calculated count */
+ if (contiguous_count != hweight16(val)) {
+ /* Probably we have a fragmented complex rule, which is not
+ * supported by this driver.
+ */
+ dev_err(dev->dev, "ACL: number of bits set in RuleSet does not match calculated count\n");
+ return -EINVAL;
+ }
+
+ /* loop over the contiguous entries and check for valid matching rules */
+ for (i = start_idx; i <= end_idx; i++) {
+ u8 *current_entry = &acles->entries[i].entry[0];
+
+ if (!ksz9477_acl_is_valid_matching_rule(current_entry)) {
+ /* we have something linked without a valid matching
+ * rule. ACL table?
+ */
+ dev_err(dev->dev, "ACL: entry %d does not contain a valid matching rule\n",
+ i);
+ return -EINVAL;
+ }
+
+ if (i > start_idx) {
+ vale = current_entry[KSZ9477_ACL_PORT_ACCESS_E];
+ valf = current_entry[KSZ9477_ACL_PORT_ACCESS_F];
+ /* Following entry should have empty linkage list */
+ if (vale || valf) {
+ dev_err(dev->dev, "ACL: entry %d has non-empty RuleSet linkage\n",
+ i);
+ return -EINVAL;
+ }
+ }
+ }
+
+ return contiguous_count;
+}
+
+/**
+ * ksz9477_acl_update_linkage - Update the RuleSet linkage for an ACL entry
+ * after a move operation.
+ *
+ * @dev: Pointer to the ksz_device.
+ * @entry: Pointer to the ACL entry array.
+ * @old_idx: The original index of the ACL entry before moving.
+ * @new_idx: The new index of the ACL entry after moving.
+ *
+ * This function updates the RuleSet linkage bits for an ACL entry when
+ * it's moved from one position to another in the ACL table. The RuleSet
+ * linkage is represented by two 8-bit registers, which are combined
+ * into a 16-bit value for easier manipulation. The linkage bits are shifted
+ * based on the difference between the old and new index. If any bits are lost
+ * during the shift operation, an error is returned.
+ *
+ * Note: Fragmentation within a RuleSet is not supported. Hence, entries must
+ * be moved as complete blocks, maintaining the integrity of the RuleSet.
+ *
+ * Returns: 0 on success, or -EINVAL if any RuleSet linkage bits are lost
+ * during the move.
+ */
+static int ksz9477_acl_update_linkage(struct ksz_device *dev, u8 *entry,
+ u16 old_idx, u16 new_idx)
+{
+ unsigned int original_bit_count;
+ unsigned long rule_linkage;
+ u8 vale, valf, val0;
+ int shift;
+
+ val0 = entry[KSZ9477_ACL_PORT_ACCESS_0];
+ vale = entry[KSZ9477_ACL_PORT_ACCESS_E];
+ valf = entry[KSZ9477_ACL_PORT_ACCESS_F];
+
+ /* Combine the two u8 values into one u16 for easier manipulation */
+ rule_linkage = (vale << 8) | valf;
+ original_bit_count = hweight16(rule_linkage);
+
+ /* Even if HW is able to handle fragmented RuleSet, we don't support it.
+ * RuleSet is filled only for the first entry of the set.
+ */
+ if (!rule_linkage)
+ return 0;
+
+ if (val0 != old_idx) {
+ dev_err(dev->dev, "ACL: entry %d has unexpected ActionRule linkage: %d\n",
+ old_idx, val0);
+ return -EINVAL;
+ }
+
+ val0 = new_idx;
+
+ /* Calculate the number of positions to shift */
+ shift = new_idx - old_idx;
+
+ /* Shift the RuleSet */
+ if (shift > 0)
+ rule_linkage <<= shift;
+ else
+ rule_linkage >>= -shift;
+
+ /* Check that no bits were lost in the process */
+ if (original_bit_count != hweight16(rule_linkage)) {
+ dev_err(dev->dev, "ACL RuleSet linkage bits lost during move\n");
+ return -EINVAL;
+ }
+
+ entry[KSZ9477_ACL_PORT_ACCESS_0] = val0;
+
+ /* Update the RuleSet bitfields in the entry */
+ entry[KSZ9477_ACL_PORT_ACCESS_E] = (rule_linkage >> 8) & 0xFF;
+ entry[KSZ9477_ACL_PORT_ACCESS_F] = rule_linkage & 0xFF;
+
+ return 0;
+}
+
+/**
+ * ksz9477_validate_and_get_src_count - Validate source and destination indices
+ * and determine the source entry count.
+ * @dev: Pointer to the KSZ device structure.
+ * @port: Port number on the KSZ device where the ACL entries reside.
+ * @src_idx: Index of the starting ACL entry that needs to be validated.
+ * @dst_idx: Index of the destination where the source entries are intended to
+ * be moved.
+ * @src_count: Pointer to the variable that will hold the number of contiguous
+ * source entries if the validation passes.
+ * @dst_count: Pointer to the variable that will hold the number of contiguous
+ * destination entries if the validation passes.
+ *
+ * This function performs validation on the source and destination indices
+ * provided for ACL entries. It checks if the indices are within the valid
+ * range, and if the source entries are contiguous. Additionally, the function
+ * ensures that there's adequate space at the destination for the source entries
+ * and that the destination index isn't in the middle of a RuleSet. If all
+ * validations pass, the function returns the number of contiguous source and
+ * destination entries.
+ *
+ * Return: 0 on success, otherwise returns a negative error code if any
+ * validation check fails.
+ */
+static int ksz9477_validate_and_get_src_count(struct ksz_device *dev, int port,
+ int src_idx, int dst_idx,
+ int *src_count, int *dst_count)
+{
+ int ret;
+
+ if (src_idx >= KSZ9477_ACL_MAX_ENTRIES ||
+ dst_idx >= KSZ9477_ACL_MAX_ENTRIES) {
+ dev_err(dev->dev, "ACL: invalid entry index\n");
+ return -EINVAL;
+ }
+
+ /* Nothing to do */
+ if (src_idx == dst_idx)
+ return 0;
+
+ /* Validate if the source entries are contiguous */
+ ret = ksz9477_acl_get_cont_entr(dev, port, src_idx);
+ if (ret < 0)
+ return ret;
+ *src_count = ret;
+
+ if (!*src_count) {
+ dev_err(dev->dev, "ACL: source entry is empty\n");
+ return -EINVAL;
+ }
+
+ if (dst_idx + *src_count >= KSZ9477_ACL_MAX_ENTRIES) {
+ dev_err(dev->dev, "ACL: Not enough space at the destination. Move operation will fail.\n");
+ return -EINVAL;
+ }
+
+ /* Validate if the destination entry is empty or not in the middle of
+ * a RuleSet.
+ */
+ ret = ksz9477_acl_get_cont_entr(dev, port, dst_idx);
+ if (ret < 0)
+ return ret;
+ *dst_count = ret;
+
+ return 0;
+}
+
+/**
+ * ksz9477_move_entries_downwards - Move a range of ACL entries downwards in
+ * the list.
+ * @dev: Pointer to the KSZ device structure.
+ * @acles: Pointer to the structure encapsulating all the ACL entries.
+ * @start_idx: Starting index of the entries to be relocated.
+ * @num_entries_to_move: Number of consecutive entries to be relocated.
+ * @end_idx: Destination index where the first entry should be situated post
+ * relocation.
+ *
+ * This function is responsible for rearranging a specific block of ACL entries
+ * by shifting them downwards in the list based on the supplied source and
+ * destination indices. It ensures that the linkage between the ACL entries is
+ * maintained accurately after the relocation.
+ *
+ * Return: 0 on successful relocation of entries, otherwise returns a negative
+ * error code.
+ */
+static int ksz9477_move_entries_downwards(struct ksz_device *dev,
+ struct ksz9477_acl_entries *acles,
+ u16 start_idx,
+ u16 num_entries_to_move,
+ u16 end_idx)
+{
+ struct ksz9477_acl_entry *e;
+ int ret, i;
+
+ for (i = start_idx; i < end_idx; i++) {
+ e = &acles->entries[i];
+ *e = acles->entries[i + num_entries_to_move];
+
+ ret = ksz9477_acl_update_linkage(dev, &e->entry[0],
+ i + num_entries_to_move, i);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * ksz9477_move_entries_upwards - Move a range of ACL entries upwards in the
+ * list.
+ * @dev: Pointer to the KSZ device structure.
+ * @acles: Pointer to the structure holding all the ACL entries.
+ * @start_idx: The starting index of the entries to be moved.
+ * @num_entries_to_move: Number of contiguous entries to be moved.
+ * @target_idx: The destination index where the first entry should be placed
+ * after moving.
+ *
+ * This function rearranges a chunk of ACL entries by moving them upwards
+ * in the list based on the given source and destination indices. The reordering
+ * process preserves the linkage between entries by updating it accordingly.
+ *
+ * Return: 0 if the entries were successfully moved, otherwise a negative error
+ * code.
+ */
+static int ksz9477_move_entries_upwards(struct ksz_device *dev,
+ struct ksz9477_acl_entries *acles,
+ u16 start_idx, u16 num_entries_to_move,
+ u16 target_idx)
+{
+ struct ksz9477_acl_entry *e;
+ int ret, i, b;
+
+ for (i = start_idx; i > target_idx; i--) {
+ b = i + num_entries_to_move - 1;
+
+ e = &acles->entries[b];
+ *e = acles->entries[i - 1];
+
+ ret = ksz9477_acl_update_linkage(dev, &e->entry[0], i - 1, b);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * ksz9477_acl_move_entries - Move a block of contiguous ACL entries from a
+ * source to a destination index.
+ * @dev: Pointer to the KSZ9477 device structure.
+ * @port: Port number.
+ * @src_idx: Index of the starting source ACL entry.
+ * @dst_idx: Index of the starting destination ACL entry.
+ *
+ * This function aims to move a block of contiguous ACL entries from the source
+ * index to the destination index while ensuring the integrity and validity of
+ * the ACL table.
+ *
+ * In case of any errors during the adjustments or copying, the function will
+ * restore the ACL entries to their original state from the backup.
+ *
+ * Return: 0 if the move operation is successful. Returns -EINVAL for validation
+ * errors or other error codes based on specific failure conditions.
+ */
+static int ksz9477_acl_move_entries(struct ksz_device *dev, int port,
+ u16 src_idx, u16 dst_idx)
+{
+ struct ksz9477_acl_entry buffer[KSZ9477_ACL_MAX_ENTRIES];
+ struct ksz9477_acl_priv *acl = dev->ports[port].acl_priv;
+ struct ksz9477_acl_entries *acles = &acl->acles;
+ int src_count, ret, dst_count;
+
+ ret = ksz9477_validate_and_get_src_count(dev, port, src_idx, dst_idx,
+ &src_count, &dst_count);
+ if (ret)
+ return ret;
+
+ /* In case dst_index is greater than src_index, we need to adjust the
+ * destination index to account for the entries that will be moved
+ * downwards and the size of the entry located at dst_idx.
+ */
+ if (dst_idx > src_idx)
+ dst_idx = dst_idx + dst_count - src_count;
+
+ /* Copy source block to buffer and update its linkage */
+ for (int i = 0; i < src_count; i++) {
+ buffer[i] = acles->entries[src_idx + i];
+ ret = ksz9477_acl_update_linkage(dev, &buffer[i].entry[0],
+ src_idx + i, dst_idx + i);
+ if (ret < 0)
+ return ret;
+ }
+
+ /* Adjust other entries and their linkage based on destination */
+ if (dst_idx > src_idx) {
+ ret = ksz9477_move_entries_downwards(dev, acles, src_idx,
+ src_count, dst_idx);
+ } else {
+ ret = ksz9477_move_entries_upwards(dev, acles, src_idx,
+ src_count, dst_idx);
+ }
+ if (ret < 0)
+ return ret;
+
+ /* Copy buffer to destination block */
+ for (int i = 0; i < src_count; i++)
+ acles->entries[dst_idx + i] = buffer[i];
+
+ return 0;
+}
+
+/**
+ * ksz9477_get_next_block_start - Identify the starting index of the next ACL
+ * block.
+ * @dev: Pointer to the device structure.
+ * @port: The port number on which the ACL entries are being checked.
+ * @start: The starting index from which the search begins.
+ *
+ * This function looks for the next valid ACL block starting from the provided
+ * 'start' index and returns the beginning index of that block. If the block is
+ * invalid or if it reaches the end of the ACL entries without finding another
+ * block, it returns the maximum ACL entries count.
+ *
+ * Returns:
+ * - The starting index of the next valid ACL block.
+ * - KSZ9477_ACL_MAX_ENTRIES if no other valid blocks are found after 'start'.
+ * - A negative error code if an error occurs while checking.
+ */
+static int ksz9477_get_next_block_start(struct ksz_device *dev, int port,
+ int start)
+{
+ int block_size;
+
+ for (int i = start; i < KSZ9477_ACL_MAX_ENTRIES;) {
+ block_size = ksz9477_acl_get_cont_entr(dev, port, i);
+ if (block_size < 0 && block_size != -ENOTEMPTY)
+ return block_size;
+
+ if (block_size > 0)
+ return i;
+
+ i++;
+ }
+ return KSZ9477_ACL_MAX_ENTRIES;
+}
+
+/**
+ * ksz9477_swap_acl_blocks - Swap two ACL blocks
+ * @dev: Pointer to the device structure.
+ * @port: The port number on which the ACL blocks are to be swapped.
+ * @i: The starting index of the first ACL block.
+ * @j: The starting index of the second ACL block.
+ *
+ * This function is used to swap two ACL blocks present at given indices. The
+ * main purpose is to aid in the sorting and reordering of ACL blocks based on
+ * certain criteria, e.g., priority. It checks the validity of the block at
+ * index 'i', ensuring it's not an empty block, and then proceeds to swap it
+ * with the block at index 'j'.
+ *
+ * Returns:
+ * - 0 on successful swapping of blocks.
+ * - -EINVAL if the block at index 'i' is empty.
+ * - A negative error code if any other error occurs during the swap.
+ */
+static int ksz9477_swap_acl_blocks(struct ksz_device *dev, int port, int i,
+ int j)
+{
+ int ret, current_block_size;
+
+ current_block_size = ksz9477_acl_get_cont_entr(dev, port, i);
+ if (current_block_size < 0)
+ return current_block_size;
+
+ if (!current_block_size) {
+ dev_err(dev->dev, "ACL: swapping empty entry %d\n", i);
+ return -EINVAL;
+ }
+
+ ret = ksz9477_acl_move_entries(dev, port, i, j);
+ if (ret)
+ return ret;
+
+ ret = ksz9477_acl_move_entries(dev, port, j - current_block_size, i);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+/**
+ * ksz9477_sort_acl_entr_no_back - Sort ACL entries for a given port based on
+ * priority without backing up entries.
+ * @dev: Pointer to the device structure.
+ * @port: The port number whose ACL entries need to be sorted.
+ *
+ * This function sorts ACL entries of the specified port using a variant of the
+ * bubble sort algorithm. It operates on blocks of ACL entries rather than
+ * individual entries. Each block's starting point is identified and then
+ * compared with subsequent blocks based on their priority. If the current
+ * block has a lower priority than the subsequent block, the two blocks are
+ * swapped.
+ *
+ * This is done in order to maintain an organized order of ACL entries based on
+ * priority, ensuring efficient and predictable ACL rule application.
+ *
+ * Returns:
+ * - 0 on successful sorting of entries.
+ * - A negative error code if any issue arises during sorting, e.g.,
+ * if the function is unable to get the next block start.
+ */
+static int ksz9477_sort_acl_entr_no_back(struct ksz_device *dev, int port)
+{
+ struct ksz9477_acl_priv *acl = dev->ports[port].acl_priv;
+ struct ksz9477_acl_entries *acles = &acl->acles;
+ struct ksz9477_acl_entry *curr, *next;
+ int i, j, ret;
+
+ /* Bubble sort */
+ for (i = 0; i < KSZ9477_ACL_MAX_ENTRIES;) {
+ curr = &acles->entries[i];
+
+ j = ksz9477_get_next_block_start(dev, port, i + 1);
+ if (j < 0)
+ return j;
+
+ while (j < KSZ9477_ACL_MAX_ENTRIES) {
+ next = &acles->entries[j];
+
+ if (curr->prio > next->prio) {
+ ret = ksz9477_swap_acl_blocks(dev, port, i, j);
+ if (ret)
+ return ret;
+ }
+
+ j = ksz9477_get_next_block_start(dev, port, j + 1);
+ if (j < 0)
+ return j;
+ }
+
+ i = ksz9477_get_next_block_start(dev, port, i + 1);
+ if (i < 0)
+ return i;
+ }
+
+ return 0;
+}
+
+/**
+ * ksz9477_sort_acl_entries - Sort the ACL entries for a given port.
+ * @dev: Pointer to the KSZ device.
+ * @port: Port number.
+ *
+ * This function sorts the Access Control List (ACL) entries for a specified
+ * port. Before sorting, a backup of the original entries is created. If the
+ * sorting process fails, the function will log error messages displaying both
+ * the original and attempted sorted entries, and then restore the original
+ * entries from the backup.
+ *
+ * Return: 0 if the sorting succeeds, otherwise a negative error code.
+ */
+int ksz9477_sort_acl_entries(struct ksz_device *dev, int port)
+{
+ struct ksz9477_acl_entry backup[KSZ9477_ACL_MAX_ENTRIES];
+ struct ksz9477_acl_priv *acl = dev->ports[port].acl_priv;
+ struct ksz9477_acl_entries *acles = &acl->acles;
+ int ret;
+
+ /* create a backup of the ACL entries, if something goes wrong
+ * we can restore the ACL entries.
+ */
+ memcpy(backup, acles->entries, sizeof(backup));
+
+ ret = ksz9477_sort_acl_entr_no_back(dev, port);
+ if (ret) {
+ dev_err(dev->dev, "ACL: failed to sort entries for port %d\n",
+ port);
+ dev_err(dev->dev, "ACL dump before sorting:\n");
+ ksz9477_dump_acl(dev, backup);
+ dev_err(dev->dev, "ACL dump after sorting:\n");
+ ksz9477_dump_acl(dev, acles->entries);
+ /* Restore the original entries */
+ memcpy(acles->entries, backup, sizeof(backup));
+ }
+
+ return ret;
+}
+
+/**
+ * ksz9477_acl_wait_ready - Waits for the ACL operation to complete on a given
+ * port.
+ * @dev: The ksz_device instance.
+ * @port: The port number to wait for.
+ *
+ * This function checks if the ACL write or read operation is completed by
+ * polling the specified register.
+ *
+ * Returns: 0 if the operation is successful, or a negative error code if an
+ * error occurs.
+ */
+static int ksz9477_acl_wait_ready(struct ksz_device *dev, int port)
+{
+ unsigned int wr_mask = KSZ9477_ACL_WRITE_DONE | KSZ9477_ACL_READ_DONE;
+ unsigned int val, reg;
+ int ret;
+
+ reg = dev->dev_ops->get_port_addr(port, KSZ9477_PORT_ACL_CTRL_0);
+
+ ret = regmap_read_poll_timeout(dev->regmap[0], reg, val,
+ (val & wr_mask) == wr_mask, 1000, 10000);
+ if (ret)
+ dev_err(dev->dev, "Failed to read/write ACL table\n");
+
+ return ret;
+}
+
+/**
+ * ksz9477_acl_entry_write - Writes an ACL entry to a given port at the
+ * specified index.
+ * @dev: The ksz_device instance.
+ * @port: The port number to write the ACL entry to.
+ * @entry: A pointer to the ACL entry data.
+ * @idx: The index at which to write the ACL entry.
+ *
+ * This function writes the provided ACL entry to the specified port at the
+ * given index.
+ *
+ * Returns: 0 if the operation is successful, or a negative error code if an
+ * error occurs.
+ */
+static int ksz9477_acl_entry_write(struct ksz_device *dev, int port, u8 *entry,
+ int idx)
+{
+ int ret, i;
+ u8 val;
+
+ for (i = 0; i < KSZ9477_ACL_ENTRY_SIZE; i++) {
+ ret = ksz_pwrite8(dev, port, KSZ9477_PORT_ACL_0 + i, entry[i]);
+ if (ret) {
+ dev_err(dev->dev, "Failed to write ACL entry %d\n", i);
+ return ret;
+ }
+ }
+
+ /* write everything down */
+ val = FIELD_PREP(KSZ9477_ACL_INDEX_M, idx) | KSZ9477_ACL_WRITE;
+ ret = ksz_pwrite8(dev, port, KSZ9477_PORT_ACL_CTRL_0, val);
+ if (ret)
+ return ret;
+
+ /* wait until everything is written */
+ return ksz9477_acl_wait_ready(dev, port);
+}
+
+/**
+ * ksz9477_acl_port_enable - Enables ACL functionality on a given port.
+ * @dev: The ksz_device instance.
+ * @port: The port number on which to enable ACL functionality.
+ *
+ * This function enables ACL functionality on the specified port by configuring
+ * the appropriate control registers. It returns 0 if the operation is
+ * successful, or a negative error code if an error occurs.
+ *
+ * 0xn801 - KSZ9477S 5.2.8.2 Port Priority Control Register
+ * Bit 7 - Highest Priority
+ * Bit 6 - OR'ed Priority
+ * Bit 4 - MAC Address Priority Classification
+ * Bit 3 - VLAN Priority Classification
+ * Bit 2 - 802.1p Priority Classification
+ * Bit 1 - Diffserv Priority Classification
+ * Bit 0 - ACL Priority Classification
+ *
+ * Current driver implementation sets 802.1p priority classification by default.
+ * In this function we add ACL priority classification with OR'ed priority.
+ * According to testing, priority set by ACL will supersede the 802.1p priority.
+ *
+ * 0xn803 - KSZ9477S 5.2.8.4 Port Authentication Control Register
+ * Bit 2 - Access Control List (ACL) Enable
+ * Bits 1:0 - Authentication Mode
+ * 00 = Reserved
+ * 01 = Block Mode. Authentication is enabled. When ACL is
+ * enabled, all traffic that misses the ACL rules is
+ * blocked; otherwise ACL actions apply.
+ * 10 = Pass Mode. Authentication is disabled. When ACL is
+ * enabled, all traffic that misses the ACL rules is
+ * forwarded; otherwise ACL actions apply.
+ * 11 = Trap Mode. Authentication is enabled. All traffic is
+ * forwarded to the host port. When ACL is enabled, all
+ * traffic that misses the ACL rules is blocked; otherwise
+ * ACL actions apply.
+ *
+ * We are using Pass Mode int this function.
+ *
+ * Returns: 0 if the operation is successful, or a negative error code if an
+ * error occurs.
+ */
+static int ksz9477_acl_port_enable(struct ksz_device *dev, int port)
+{
+ int ret;
+
+ ret = ksz_prmw8(dev, port, P_PRIO_CTRL, 0, PORT_ACL_PRIO_ENABLE |
+ PORT_OR_PRIO);
+ if (ret)
+ return ret;
+
+ return ksz_pwrite8(dev, port, REG_PORT_MRI_AUTHEN_CTRL,
+ PORT_ACL_ENABLE |
+ FIELD_PREP(PORT_AUTHEN_MODE, PORT_AUTHEN_PASS));
+}
+
+/**
+ * ksz9477_acl_port_disable - Disables ACL functionality on a given port.
+ * @dev: The ksz_device instance.
+ * @port: The port number on which to disable ACL functionality.
+ *
+ * This function disables ACL functionality on the specified port by writing a
+ * value of 0 to the REG_PORT_MRI_AUTHEN_CTRL control register and remove
+ * PORT_ACL_PRIO_ENABLE bit from P_PRIO_CTRL register.
+ *
+ * Returns: 0 if the operation is successful, or a negative error code if an
+ * error occurs.
+ */
+static int ksz9477_acl_port_disable(struct ksz_device *dev, int port)
+{
+ int ret;
+
+ ret = ksz_prmw8(dev, port, P_PRIO_CTRL, PORT_ACL_PRIO_ENABLE, 0);
+ if (ret)
+ return ret;
+
+ return ksz_pwrite8(dev, port, REG_PORT_MRI_AUTHEN_CTRL, 0);
+}
+
+/**
+ * ksz9477_acl_write_list - Write a list of ACL entries to a given port.
+ * @dev: The ksz_device instance.
+ * @port: The port number on which to write ACL entries.
+ *
+ * This function enables ACL functionality on the specified port, writes a list
+ * of ACL entries to the port, and disables ACL functionality if there are no
+ * entries.
+ *
+ * Returns: 0 if the operation is successful, or a negative error code if an
+ * error occurs.
+ */
+int ksz9477_acl_write_list(struct ksz_device *dev, int port)
+{
+ struct ksz9477_acl_priv *acl = dev->ports[port].acl_priv;
+ struct ksz9477_acl_entries *acles = &acl->acles;
+ int ret, i;
+
+ /* ACL should be enabled before writing entries */
+ ret = ksz9477_acl_port_enable(dev, port);
+ if (ret)
+ return ret;
+
+ /* write all entries */
+ for (i = 0; i < ARRAY_SIZE(acles->entries); i++) {
+ u8 *entry = acles->entries[i].entry;
+
+ /* Check if entry was removed and should be zeroed.
+ * If last fields of the entry are not zero, it means
+ * it is removed locally but currently not synced with the HW.
+ * So, we will write it down to the HW to remove it.
+ */
+ if (i >= acles->entries_count &&
+ entry[KSZ9477_ACL_PORT_ACCESS_10] == 0 &&
+ entry[KSZ9477_ACL_PORT_ACCESS_11] == 0)
+ continue;
+
+ ret = ksz9477_acl_entry_write(dev, port, entry, i);
+ if (ret)
+ return ret;
+
+ /* now removed entry is clean on HW side, so it can
+ * in the cache too
+ */
+ if (i >= acles->entries_count &&
+ entry[KSZ9477_ACL_PORT_ACCESS_10] != 0 &&
+ entry[KSZ9477_ACL_PORT_ACCESS_11] != 0) {
+ entry[KSZ9477_ACL_PORT_ACCESS_10] = 0;
+ entry[KSZ9477_ACL_PORT_ACCESS_11] = 0;
+ }
+ }
+
+ if (!acles->entries_count)
+ return ksz9477_acl_port_disable(dev, port);
+
+ return 0;
+}
+
+/**
+ * ksz9477_acl_remove_entries - Remove ACL entries with a given cookie from a
+ * specified ksz9477_acl_entries structure.
+ * @dev: The ksz_device instance.
+ * @port: The port number on which to remove ACL entries.
+ * @acles: The ksz9477_acl_entries instance.
+ * @cookie: The cookie value to match for entry removal.
+ *
+ * This function iterates through the entries array, removing any entries with
+ * a matching cookie value. The remaining entries are then shifted down to fill
+ * the gap.
+ */
+void ksz9477_acl_remove_entries(struct ksz_device *dev, int port,
+ struct ksz9477_acl_entries *acles,
+ unsigned long cookie)
+{
+ int entries_count = acles->entries_count;
+ int ret, i, src_count;
+ int src_idx = -1;
+
+ if (!entries_count)
+ return;
+
+ /* Search for the first position with the cookie */
+ for (i = 0; i < entries_count; i++) {
+ if (acles->entries[i].cookie == cookie) {
+ src_idx = i;
+ break;
+ }
+ }
+
+ /* No entries with the matching cookie found */
+ if (src_idx == -1)
+ return;
+
+ /* Get the size of the cookie entry. We may have complex entries. */
+ src_count = ksz9477_acl_get_cont_entr(dev, port, src_idx);
+ if (src_count <= 0)
+ return;
+
+ /* Move all entries down to overwrite removed entry with the cookie */
+ ret = ksz9477_move_entries_downwards(dev, acles, src_idx,
+ src_count,
+ entries_count - src_count);
+ if (ret) {
+ dev_err(dev->dev, "Failed to move ACL entries down\n");
+ return;
+ }
+
+ /* Overwrite new empty places at the end of the list with zeros to make
+ * sure not unexpected things will happen or no unexplored quirks will
+ * come out.
+ */
+ for (i = entries_count - src_count; i < entries_count; i++) {
+ struct ksz9477_acl_entry *entry = &acles->entries[i];
+
+ memset(entry, 0, sizeof(*entry));
+
+ /* Set all access bits to be able to write zeroed entry to HW */
+ entry->entry[KSZ9477_ACL_PORT_ACCESS_10] = 0xff;
+ entry->entry[KSZ9477_ACL_PORT_ACCESS_11] = 0xff;
+ }
+
+ /* Adjust the total entries count */
+ acles->entries_count -= src_count;
+}
+
+/**
+ * ksz9477_port_acl_init - Initialize the ACL for a specified port on a ksz
+ * device.
+ * @dev: The ksz_device instance.
+ * @port: The port number to initialize the ACL for.
+ *
+ * This function allocates memory for an acl structure, associates it with the
+ * specified port, and initializes the ACL entries to a default state. The
+ * entries are then written using the ksz9477_acl_write_list function, ensuring
+ * the ACL has a predictable initial hardware state.
+ *
+ * Returns: 0 on success, or an error code on failure.
+ */
+int ksz9477_port_acl_init(struct ksz_device *dev, int port)
+{
+ struct ksz9477_acl_entries *acles;
+ struct ksz9477_acl_priv *acl;
+ int ret, i;
+
+ acl = kzalloc(sizeof(*acl), GFP_KERNEL);
+ if (!acl)
+ return -ENOMEM;
+
+ dev->ports[port].acl_priv = acl;
+
+ acles = &acl->acles;
+ /* write all entries */
+ for (i = 0; i < ARRAY_SIZE(acles->entries); i++) {
+ u8 *entry = acles->entries[i].entry;
+
+ /* Set all access bits to be able to write zeroed
+ * entry
+ */
+ entry[KSZ9477_ACL_PORT_ACCESS_10] = 0xff;
+ entry[KSZ9477_ACL_PORT_ACCESS_11] = 0xff;
+ }
+
+ ret = ksz9477_acl_write_list(dev, port);
+ if (ret)
+ goto free_acl;
+
+ return 0;
+
+free_acl:
+ kfree(dev->ports[port].acl_priv);
+ dev->ports[port].acl_priv = NULL;
+
+ return ret;
+}
+
+/**
+ * ksz9477_port_acl_free - Free the ACL resources for a specified port on a ksz
+ * device.
+ * @dev: The ksz_device instance.
+ * @port: The port number to initialize the ACL for.
+ *
+ * This disables the ACL for the specified port and frees the associated memory,
+ */
+void ksz9477_port_acl_free(struct ksz_device *dev, int port)
+{
+ if (!dev->ports[port].acl_priv)
+ return;
+
+ ksz9477_acl_port_disable(dev, port);
+
+ kfree(dev->ports[port].acl_priv);
+ dev->ports[port].acl_priv = NULL;
+}
+
+/**
+ * ksz9477_acl_set_reg - Set entry[16] and entry[17] depending on the updated
+ * entry[]
+ * @entry: An array containing the entries
+ * @reg: The register of the entry that needs to be updated
+ * @value: The value to be assigned to the updated entry
+ *
+ * This function updates the entry[] array based on the provided register and
+ * value. It also sets entry[0x10] and entry[0x11] according to the ACL byte
+ * enable rules.
+ *
+ * 0x10 - Byte Enable [15:8]
+ *
+ * Each bit enables accessing one of the ACL bytes when a read or write is
+ * initiated by writing to the Port ACL Byte Enable LSB Register.
+ * Bit 0 applies to the Port ACL Access 7 Register
+ * Bit 1 applies to the Port ACL Access 6 Register, etc.
+ * Bit 7 applies to the Port ACL Access 0 Register
+ * 1 = Byte is selected for read/write
+ * 0 = Byte is not selected
+ *
+ * 0x11 - Byte Enable [7:0]
+ *
+ * Each bit enables accessing one of the ACL bytes when a read or write is
+ * initiated by writing to the Port ACL Byte Enable LSB Register.
+ * Bit 0 applies to the Port ACL Access F Register
+ * Bit 1 applies to the Port ACL Access E Register, etc.
+ * Bit 7 applies to the Port ACL Access 8 Register
+ * 1 = Byte is selected for read/write
+ * 0 = Byte is not selected
+ */
+static void ksz9477_acl_set_reg(u8 *entry, enum ksz9477_acl_port_access reg,
+ u8 value)
+{
+ if (reg >= KSZ9477_ACL_PORT_ACCESS_0 &&
+ reg <= KSZ9477_ACL_PORT_ACCESS_7) {
+ entry[KSZ9477_ACL_PORT_ACCESS_10] |=
+ BIT(KSZ9477_ACL_PORT_ACCESS_7 - reg);
+ } else if (reg >= KSZ9477_ACL_PORT_ACCESS_8 &&
+ reg <= KSZ9477_ACL_PORT_ACCESS_F) {
+ entry[KSZ9477_ACL_PORT_ACCESS_11] |=
+ BIT(KSZ9477_ACL_PORT_ACCESS_F - reg);
+ } else {
+ WARN_ON(1);
+ return;
+ }
+
+ entry[reg] = value;
+}
+
+/**
+ * ksz9477_acl_matching_rule_cfg_l2 - Configure an ACL filtering entry to match
+ * L2 types of Ethernet frames
+ * @entry: Pointer to ACL entry buffer
+ * @ethertype: Ethertype value
+ * @eth_addr: Pointer to Ethernet address
+ * @is_src: If true, match the source MAC address; if false, match the
+ * destination MAC address
+ *
+ * This function configures an Access Control List (ACL) filtering
+ * entry to match Layer 2 types of Ethernet frames based on the provided
+ * ethertype and Ethernet address. Additionally, it can match either the source
+ * or destination MAC address depending on the value of the is_src parameter.
+ *
+ * Register Descriptions for MD = 01 and ENB != 00 (Layer 2 MAC header
+ * filtering)
+ *
+ * 0x01 - Mode and Enable
+ * Bits 5:4 - MD (Mode)
+ * 01 = Layer 2 MAC header or counter filtering
+ * Bits 3:2 - ENB (Enable)
+ * 01 = Comparison is performed only on the TYPE value
+ * 10 = Comparison is performed only on the MAC Address value
+ * 11 = Both the MAC Address and TYPE are tested
+ * Bit 1 - S/D (Source / Destination)
+ * 0 = Destination address
+ * 1 = Source address
+ * Bit 0 - EQ (Equal / Not Equal)
+ * 0 = Not Equal produces true result
+ * 1 = Equal produces true result
+ *
+ * 0x02-0x07 - MAC Address
+ * 0x02 - MAC Address [47:40]
+ * 0x03 - MAC Address [39:32]
+ * 0x04 - MAC Address [31:24]
+ * 0x05 - MAC Address [23:16]
+ * 0x06 - MAC Address [15:8]
+ * 0x07 - MAC Address [7:0]
+ *
+ * 0x08-0x09 - EtherType
+ * 0x08 - EtherType [15:8]
+ * 0x09 - EtherType [7:0]
+ */
+static void ksz9477_acl_matching_rule_cfg_l2(u8 *entry, u16 ethertype,
+ u8 *eth_addr, bool is_src)
+{
+ u8 enb = 0;
+ u8 val;
+
+ if (ethertype)
+ enb |= KSZ9477_ACL_ENB_L2_TYPE;
+ if (eth_addr)
+ enb |= KSZ9477_ACL_ENB_L2_MAC;
+
+ val = FIELD_PREP(KSZ9477_ACL_MD_MASK, KSZ9477_ACL_MD_L2_MAC) |
+ FIELD_PREP(KSZ9477_ACL_ENB_MASK, enb) |
+ FIELD_PREP(KSZ9477_ACL_SD_SRC, is_src) | KSZ9477_ACL_EQ_EQUAL;
+ ksz9477_acl_set_reg(entry, KSZ9477_ACL_PORT_ACCESS_1, val);
+
+ if (eth_addr) {
+ int i;
+
+ for (i = 0; i < ETH_ALEN; i++) {
+ ksz9477_acl_set_reg(entry,
+ KSZ9477_ACL_PORT_ACCESS_2 + i,
+ eth_addr[i]);
+ }
+ }
+
+ ksz9477_acl_set_reg(entry, KSZ9477_ACL_PORT_ACCESS_8, ethertype >> 8);
+ ksz9477_acl_set_reg(entry, KSZ9477_ACL_PORT_ACCESS_9, ethertype & 0xff);
+}
+
+/**
+ * ksz9477_acl_action_rule_cfg - Set action for an ACL entry
+ * @entry: Pointer to the ACL entry
+ * @force_prio: If true, force the priority value
+ * @prio_val: Priority value
+ *
+ * This function sets the action for the specified ACL entry. It prepares
+ * the priority mode and traffic class values and updates the entry's
+ * action registers accordingly. Currently, there is no port or VLAN PCP
+ * remapping.
+ *
+ * ACL Action Rule Parameters for Non-Count Modes (MD ≠ 01 or ENB ≠ 00)
+ *
+ * 0x0A - PM, P, RPE, RP[2:1]
+ * Bits 7:6 - PM[1:0] - Priority Mode
+ * 00 = ACL does not specify the packet priority. Priority is
+ * determined by standard QoS functions.
+ * 01 = Change packet priority to P[2:0] if it is greater than QoS
+ * result.
+ * 10 = Change packet priority to P[2:0] if it is smaller than the
+ * QoS result.
+ * 11 = Always change packet priority to P[2:0].
+ * Bits 5:3 - P[2:0] - Priority value
+ * Bit 2 - RPE - Remark Priority Enable
+ * Bits 1:0 - RP[2:1] - Remarked Priority value (bits 2:1)
+ * 0 = Disable priority remarking
+ * 1 = Enable priority remarking. VLAN tag priority (PCP) bits are
+ * replaced by RP[2:0].
+ *
+ * 0x0B - RP[0], MM
+ * Bit 7 - RP[0] - Remarked Priority value (bit 0)
+ * Bits 6:5 - MM[1:0] - Map Mode
+ * 00 = No forwarding remapping
+ * 01 = The forwarding map in FORWARD is OR'ed with the forwarding
+ * map from the Address Lookup Table.
+ * 10 = The forwarding map in FORWARD is AND'ed with the forwarding
+ * map from the Address Lookup Table.
+ * 11 = The forwarding map in FORWARD replaces the forwarding map
+ * from the Address Lookup Table.
+ * 0x0D - FORWARD[n:0]
+ * Bits 7:0 - FORWARD[n:0] - Forwarding map. Bit 0 = port 1,
+ * bit 1 = port 2, etc.
+ * 1 = enable forwarding to this port
+ * 0 = do not forward to this port
+ */
+void ksz9477_acl_action_rule_cfg(u8 *entry, bool force_prio, u8 prio_val)
+{
+ u8 prio_mode, val;
+
+ if (force_prio)
+ prio_mode = KSZ9477_ACL_PM_REPLACE;
+ else
+ prio_mode = KSZ9477_ACL_PM_DISABLE;
+
+ val = FIELD_PREP(KSZ9477_ACL_PM_M, prio_mode) |
+ FIELD_PREP(KSZ9477_ACL_P_M, prio_val);
+ ksz9477_acl_set_reg(entry, KSZ9477_ACL_PORT_ACCESS_A, val);
+
+ /* no port or VLAN PCP remapping for now */
+ ksz9477_acl_set_reg(entry, KSZ9477_ACL_PORT_ACCESS_B, 0);
+ ksz9477_acl_set_reg(entry, KSZ9477_ACL_PORT_ACCESS_D, 0);
+}
+
+/**
+ * ksz9477_acl_processing_rule_set_action - Set the action for the processing
+ * rule set.
+ * @entry: Pointer to the ACL entry
+ * @action_idx: Index of the action to be applied
+ *
+ * This function sets the action for the processing rule set by updating the
+ * appropriate register in the entry. There can be only one action per
+ * processing rule.
+ *
+ * Access Control List (ACL) Processing Rule Registers:
+ *
+ * 0x00 - First Rule Number (FRN)
+ * Bits 3:0 - First Rule Number. Pointer to an Action rule entry.
+ */
+void ksz9477_acl_processing_rule_set_action(u8 *entry, u8 action_idx)
+{
+ ksz9477_acl_set_reg(entry, KSZ9477_ACL_PORT_ACCESS_0, action_idx);
+}
+
+/**
+ * ksz9477_acl_processing_rule_add_match - Add a matching rule to the rule set
+ * @entry: Pointer to the ACL entry
+ * @match_idx: Index of the matching rule to be added
+ *
+ * This function adds a matching rule to the rule set by updating the
+ * appropriate bits in the entry's rule set registers.
+ *
+ * Access Control List (ACL) Processing Rule Registers:
+ *
+ * 0x0E - RuleSet [15:8]
+ * Bits 7:0 - RuleSet [15:8] Specifies a set of one or more Matching rule
+ * entries. RuleSet has one bit for each of the 16 Matching rule entries.
+ * If multiple Matching rules are selected, then all conditions will be
+ * AND'ed to produce a final match result.
+ * 0 = Matching rule not selected
+ * 1 = Matching rule selected
+ *
+ * 0x0F - RuleSet [7:0]
+ * Bits 7:0 - RuleSet [7:0]
+ */
+static void ksz9477_acl_processing_rule_add_match(u8 *entry, u8 match_idx)
+{
+ u8 vale = entry[KSZ9477_ACL_PORT_ACCESS_E];
+ u8 valf = entry[KSZ9477_ACL_PORT_ACCESS_F];
+
+ if (match_idx < 8)
+ valf |= BIT(match_idx);
+ else
+ vale |= BIT(match_idx - 8);
+
+ ksz9477_acl_set_reg(entry, KSZ9477_ACL_PORT_ACCESS_E, vale);
+ ksz9477_acl_set_reg(entry, KSZ9477_ACL_PORT_ACCESS_F, valf);
+}
+
+/**
+ * ksz9477_acl_get_init_entry - Get a new uninitialized entry for a specified
+ * port on a ksz_device.
+ * @dev: The ksz_device instance.
+ * @port: The port number to get the uninitialized entry for.
+ * @cookie: The cookie to associate with the entry.
+ * @prio: The priority to associate with the entry.
+ *
+ * This function retrieves the next available ACL entry for the specified port,
+ * clears all access flags, and associates it with the current cookie.
+ *
+ * Returns: A pointer to the new uninitialized ACL entry.
+ */
+static struct ksz9477_acl_entry *
+ksz9477_acl_get_init_entry(struct ksz_device *dev, int port,
+ unsigned long cookie, u32 prio)
+{
+ struct ksz9477_acl_priv *acl = dev->ports[port].acl_priv;
+ struct ksz9477_acl_entries *acles = &acl->acles;
+ struct ksz9477_acl_entry *entry;
+
+ entry = &acles->entries[acles->entries_count];
+ entry->cookie = cookie;
+ entry->prio = prio;
+
+ /* clear all access flags */
+ entry->entry[KSZ9477_ACL_PORT_ACCESS_10] = 0;
+ entry->entry[KSZ9477_ACL_PORT_ACCESS_11] = 0;
+
+ return entry;
+}
+
+/**
+ * ksz9477_acl_match_process_l2 - Configure Layer 2 ACL matching rules and
+ * processing rules.
+ * @dev: Pointer to the ksz_device.
+ * @port: Port number.
+ * @ethtype: Ethernet type.
+ * @src_mac: Source MAC address.
+ * @dst_mac: Destination MAC address.
+ * @cookie: The cookie to associate with the entry.
+ * @prio: The priority of the entry.
+ *
+ * This function sets up matching and processing rules for Layer 2 ACLs.
+ * It takes into account that only one MAC per entry is supported.
+ */
+void ksz9477_acl_match_process_l2(struct ksz_device *dev, int port,
+ u16 ethtype, u8 *src_mac, u8 *dst_mac,
+ unsigned long cookie, u32 prio)
+{
+ struct ksz9477_acl_priv *acl = dev->ports[port].acl_priv;
+ struct ksz9477_acl_entries *acles = &acl->acles;
+ struct ksz9477_acl_entry *entry;
+
+ entry = ksz9477_acl_get_init_entry(dev, port, cookie, prio);
+
+ /* ACL supports only one MAC per entry */
+ if (src_mac && dst_mac) {
+ ksz9477_acl_matching_rule_cfg_l2(entry->entry, ethtype, src_mac,
+ true);
+
+ /* Add both match entries to first processing rule */
+ ksz9477_acl_processing_rule_add_match(entry->entry,
+ acles->entries_count);
+ acles->entries_count++;
+ ksz9477_acl_processing_rule_add_match(entry->entry,
+ acles->entries_count);
+
+ entry = ksz9477_acl_get_init_entry(dev, port, cookie, prio);
+ ksz9477_acl_matching_rule_cfg_l2(entry->entry, 0, dst_mac,
+ false);
+ acles->entries_count++;
+ } else {
+ u8 *mac = src_mac ? src_mac : dst_mac;
+ bool is_src = src_mac ? true : false;
+
+ ksz9477_acl_matching_rule_cfg_l2(entry->entry, ethtype, mac,
+ is_src);
+ ksz9477_acl_processing_rule_add_match(entry->entry,
+ acles->entries_count);
+ acles->entries_count++;
+ }
+}
diff --git a/drivers/net/dsa/microchip/ksz9477_reg.h b/drivers/net/dsa/microchip/ksz9477_reg.h
index cba3dba58bc3..504e085aab52 100644
--- a/drivers/net/dsa/microchip/ksz9477_reg.h
+++ b/drivers/net/dsa/microchip/ksz9477_reg.h
@@ -112,19 +112,6 @@
#define REG_SW_IBA_SYNC__1 0x010C
-#define REG_SW_IO_STRENGTH__1 0x010D
-#define SW_DRIVE_STRENGTH_M 0x7
-#define SW_DRIVE_STRENGTH_2MA 0
-#define SW_DRIVE_STRENGTH_4MA 1
-#define SW_DRIVE_STRENGTH_8MA 2
-#define SW_DRIVE_STRENGTH_12MA 3
-#define SW_DRIVE_STRENGTH_16MA 4
-#define SW_DRIVE_STRENGTH_20MA 5
-#define SW_DRIVE_STRENGTH_24MA 6
-#define SW_DRIVE_STRENGTH_28MA 7
-#define SW_HI_SPEED_DRIVE_STRENGTH_S 4
-#define SW_LO_SPEED_DRIVE_STRENGTH_S 0
-
#define REG_SW_IBA_STATUS__4 0x0110
#define SW_IBA_REQ BIT(31)
diff --git a/drivers/net/dsa/microchip/ksz9477_tc_flower.c b/drivers/net/dsa/microchip/ksz9477_tc_flower.c
new file mode 100644
index 000000000000..8b2f5be667e0
--- /dev/null
+++ b/drivers/net/dsa/microchip/ksz9477_tc_flower.c
@@ -0,0 +1,281 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2023 Pengutronix, Oleksij Rempel <kernel@pengutronix.de>
+
+#include "ksz9477.h"
+#include "ksz9477_reg.h"
+#include "ksz_common.h"
+
+#define ETHER_TYPE_FULL_MASK cpu_to_be16(~0)
+#define KSZ9477_MAX_TC 7
+
+/**
+ * ksz9477_flower_parse_key_l2 - Parse Layer 2 key from flow rule and configure
+ * ACL entries accordingly.
+ * @dev: Pointer to the ksz_device.
+ * @port: Port number.
+ * @extack: Pointer to the netlink_ext_ack.
+ * @rule: Pointer to the flow_rule.
+ * @cookie: The cookie to associate with the entry.
+ * @prio: The priority of the entry.
+ *
+ * This function parses the Layer 2 key from the flow rule and configures
+ * the corresponding ACL entries. It checks for unsupported offloads and
+ * available entries before proceeding with the configuration.
+ *
+ * Returns: 0 on success or a negative error code on failure.
+ */
+static int ksz9477_flower_parse_key_l2(struct ksz_device *dev, int port,
+ struct netlink_ext_ack *extack,
+ struct flow_rule *rule,
+ unsigned long cookie, u32 prio)
+{
+ struct ksz9477_acl_priv *acl = dev->ports[port].acl_priv;
+ struct flow_match_eth_addrs ematch;
+ struct ksz9477_acl_entries *acles;
+ int required_entries;
+ u8 *src_mac = NULL;
+ u8 *dst_mac = NULL;
+ u16 ethtype = 0;
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+ struct flow_match_basic match;
+
+ flow_rule_match_basic(rule, &match);
+
+ if (match.key->n_proto) {
+ if (match.mask->n_proto != ETHER_TYPE_FULL_MASK) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "ethernet type mask must be a full mask");
+ return -EINVAL;
+ }
+
+ ethtype = be16_to_cpu(match.key->n_proto);
+ }
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+ flow_rule_match_eth_addrs(rule, &ematch);
+
+ if (!is_zero_ether_addr(ematch.key->src)) {
+ if (!is_broadcast_ether_addr(ematch.mask->src))
+ goto not_full_mask_err;
+
+ src_mac = ematch.key->src;
+ }
+
+ if (!is_zero_ether_addr(ematch.key->dst)) {
+ if (!is_broadcast_ether_addr(ematch.mask->dst))
+ goto not_full_mask_err;
+
+ dst_mac = ematch.key->dst;
+ }
+ }
+
+ acles = &acl->acles;
+ /* ACL supports only one MAC per entry */
+ required_entries = src_mac && dst_mac ? 2 : 1;
+
+ /* Check if there are enough available entries */
+ if (acles->entries_count + required_entries > KSZ9477_ACL_MAX_ENTRIES) {
+ NL_SET_ERR_MSG_MOD(extack, "ACL entry limit reached");
+ return -EOPNOTSUPP;
+ }
+
+ ksz9477_acl_match_process_l2(dev, port, ethtype, src_mac, dst_mac,
+ cookie, prio);
+
+ return 0;
+
+not_full_mask_err:
+ NL_SET_ERR_MSG_MOD(extack, "MAC address mask must be a full mask");
+ return -EOPNOTSUPP;
+}
+
+/**
+ * ksz9477_flower_parse_key - Parse flow rule keys for a specified port on a
+ * ksz_device.
+ * @dev: The ksz_device instance.
+ * @port: The port number to parse the flow rule keys for.
+ * @extack: The netlink extended ACK for reporting errors.
+ * @rule: The flow_rule to parse.
+ * @cookie: The cookie to associate with the entry.
+ * @prio: The priority of the entry.
+ *
+ * This function checks if the used keys in the flow rule are supported by
+ * the device and parses the L2 keys if they match. If unsupported keys are
+ * used, an error message is set in the extended ACK.
+ *
+ * Returns: 0 on success or a negative error code on failure.
+ */
+static int ksz9477_flower_parse_key(struct ksz_device *dev, int port,
+ struct netlink_ext_ack *extack,
+ struct flow_rule *rule,
+ unsigned long cookie, u32 prio)
+{
+ struct flow_dissector *dissector = rule->match.dissector;
+ int ret;
+
+ if (dissector->used_keys &
+ ~(BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) |
+ BIT_ULL(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+ BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL))) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Unsupported keys used");
+ return -EOPNOTSUPP;
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC) ||
+ flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+ ret = ksz9477_flower_parse_key_l2(dev, port, extack, rule,
+ cookie, prio);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * ksz9477_flower_parse_action - Parse flow rule actions for a specified port
+ * on a ksz_device.
+ * @dev: The ksz_device instance.
+ * @port: The port number to parse the flow rule actions for.
+ * @extack: The netlink extended ACK for reporting errors.
+ * @cls: The flow_cls_offload instance containing the flow rule.
+ * @entry_idx: The index of the ACL entry to store the action.
+ *
+ * This function checks if the actions in the flow rule are supported by
+ * the device. Currently, only actions that change priorities are supported.
+ * If unsupported actions are encountered, an error message is set in the
+ * extended ACK.
+ *
+ * Returns: 0 on success or a negative error code on failure.
+ */
+static int ksz9477_flower_parse_action(struct ksz_device *dev, int port,
+ struct netlink_ext_ack *extack,
+ struct flow_cls_offload *cls,
+ int entry_idx)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
+ struct ksz9477_acl_priv *acl = dev->ports[port].acl_priv;
+ const struct flow_action_entry *act;
+ struct ksz9477_acl_entry *entry;
+ bool prio_force = false;
+ u8 prio_val = 0;
+ int i;
+
+ if (TC_H_MIN(cls->classid)) {
+ NL_SET_ERR_MSG_MOD(extack, "hw_tc is not supported. Use: action skbedit prio");
+ return -EOPNOTSUPP;
+ }
+
+ flow_action_for_each(i, act, &rule->action) {
+ switch (act->id) {
+ case FLOW_ACTION_PRIORITY:
+ if (act->priority > KSZ9477_MAX_TC) {
+ NL_SET_ERR_MSG_MOD(extack, "Priority value is too high");
+ return -EOPNOTSUPP;
+ }
+ prio_force = true;
+ prio_val = act->priority;
+ break;
+ default:
+ NL_SET_ERR_MSG_MOD(extack, "action not supported");
+ return -EOPNOTSUPP;
+ }
+ }
+
+ /* pick entry to store action */
+ entry = &acl->acles.entries[entry_idx];
+
+ ksz9477_acl_action_rule_cfg(entry->entry, prio_force, prio_val);
+ ksz9477_acl_processing_rule_set_action(entry->entry, entry_idx);
+
+ return 0;
+}
+
+/**
+ * ksz9477_cls_flower_add - Add a flow classification rule for a specified port
+ * on a ksz_device.
+ * @ds: The DSA switch instance.
+ * @port: The port number to add the flow classification rule to.
+ * @cls: The flow_cls_offload instance containing the flow rule.
+ * @ingress: A flag indicating if the rule is applied on the ingress path.
+ *
+ * This function adds a flow classification rule for a specified port on a
+ * ksz_device. It checks if the ACL offloading is supported and parses the flow
+ * keys and actions. If the ACL is not supported, it returns an error. If there
+ * are unprocessed entries, it parses the action for the rule.
+ *
+ * Returns: 0 on success or a negative error code on failure.
+ */
+int ksz9477_cls_flower_add(struct dsa_switch *ds, int port,
+ struct flow_cls_offload *cls, bool ingress)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
+ struct netlink_ext_ack *extack = cls->common.extack;
+ struct ksz_device *dev = ds->priv;
+ struct ksz9477_acl_priv *acl;
+ int action_entry_idx;
+ int ret;
+
+ acl = dev->ports[port].acl_priv;
+
+ if (!acl) {
+ NL_SET_ERR_MSG_MOD(extack, "ACL offloading is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ /* A complex rule set can take multiple entries. Use first entry
+ * to store the action.
+ */
+ action_entry_idx = acl->acles.entries_count;
+
+ ret = ksz9477_flower_parse_key(dev, port, extack, rule, cls->cookie,
+ cls->common.prio);
+ if (ret)
+ return ret;
+
+ ret = ksz9477_flower_parse_action(dev, port, extack, cls,
+ action_entry_idx);
+ if (ret)
+ return ret;
+
+ ret = ksz9477_sort_acl_entries(dev, port);
+ if (ret)
+ return ret;
+
+ return ksz9477_acl_write_list(dev, port);
+}
+
+/**
+ * ksz9477_cls_flower_del - Remove a flow classification rule for a specified
+ * port on a ksz_device.
+ * @ds: The DSA switch instance.
+ * @port: The port number to remove the flow classification rule from.
+ * @cls: The flow_cls_offload instance containing the flow rule.
+ * @ingress: A flag indicating if the rule is applied on the ingress path.
+ *
+ * This function removes a flow classification rule for a specified port on a
+ * ksz_device. It checks if the ACL is initialized, and if not, returns an
+ * error. If the ACL is initialized, it removes entries with the specified
+ * cookie and rewrites the ACL list.
+ *
+ * Returns: 0 on success or a negative error code on failure.
+ */
+int ksz9477_cls_flower_del(struct dsa_switch *ds, int port,
+ struct flow_cls_offload *cls, bool ingress)
+{
+ unsigned long cookie = cls->cookie;
+ struct ksz_device *dev = ds->priv;
+ struct ksz9477_acl_priv *acl;
+
+ acl = dev->ports[port].acl_priv;
+
+ if (!acl)
+ return -EOPNOTSUPP;
+
+ ksz9477_acl_remove_entries(dev, port, &acl->acles, cookie);
+
+ return ksz9477_acl_write_list(dev, port);
+}
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 42db7679c360..173ad8f04671 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -186,6 +186,72 @@ static const struct ksz_mib_names ksz9477_mib_names[] = {
{ 0x83, "tx_discards" },
};
+struct ksz_driver_strength_prop {
+ const char *name;
+ int offset;
+ int value;
+};
+
+enum ksz_driver_strength_type {
+ KSZ_DRIVER_STRENGTH_HI,
+ KSZ_DRIVER_STRENGTH_LO,
+ KSZ_DRIVER_STRENGTH_IO,
+};
+
+/**
+ * struct ksz_drive_strength - drive strength mapping
+ * @reg_val: register value
+ * @microamp: microamp value
+ */
+struct ksz_drive_strength {
+ u32 reg_val;
+ u32 microamp;
+};
+
+/* ksz9477_drive_strengths - Drive strength mapping for KSZ9477 variants
+ *
+ * This values are not documented in KSZ9477 variants but confirmed by
+ * Microchip that KSZ9477, KSZ9567, KSZ8567, KSZ9897, KSZ9896, KSZ9563, KSZ9893
+ * and KSZ8563 are using same register (drive strength) settings like KSZ8795.
+ *
+ * Documentation in KSZ8795CLX provides more information with some
+ * recommendations:
+ * - for high speed signals
+ * 1. 4 mA or 8 mA is often used for MII, RMII, and SPI interface with using
+ * 2.5V or 3.3V VDDIO.
+ * 2. 12 mA or 16 mA is often used for MII, RMII, and SPI interface with
+ * using 1.8V VDDIO.
+ * 3. 20 mA or 24 mA is often used for GMII/RGMII interface with using 2.5V
+ * or 3.3V VDDIO.
+ * 4. 28 mA is often used for GMII/RGMII interface with using 1.8V VDDIO.
+ * 5. In same interface, the heavy loading should use higher one of the
+ * drive current strength.
+ * - for low speed signals
+ * 1. 3.3V VDDIO, use either 4 mA or 8 mA.
+ * 2. 2.5V VDDIO, use either 8 mA or 12 mA.
+ * 3. 1.8V VDDIO, use either 12 mA or 16 mA.
+ * 4. If it is heavy loading, can use higher drive current strength.
+ */
+static const struct ksz_drive_strength ksz9477_drive_strengths[] = {
+ { SW_DRIVE_STRENGTH_2MA, 2000 },
+ { SW_DRIVE_STRENGTH_4MA, 4000 },
+ { SW_DRIVE_STRENGTH_8MA, 8000 },
+ { SW_DRIVE_STRENGTH_12MA, 12000 },
+ { SW_DRIVE_STRENGTH_16MA, 16000 },
+ { SW_DRIVE_STRENGTH_20MA, 20000 },
+ { SW_DRIVE_STRENGTH_24MA, 24000 },
+ { SW_DRIVE_STRENGTH_28MA, 28000 },
+};
+
+/* ksz8830_drive_strengths - Drive strength mapping for KSZ8830, KSZ8873, ..
+ * variants.
+ * This values are documented in KSZ8873 and KSZ8863 datasheets.
+ */
+static const struct ksz_drive_strength ksz8830_drive_strengths[] = {
+ { 0, 8000 },
+ { KSZ8873_DRIVE_STRENGTH_16MA, 16000 },
+};
+
static const struct ksz_dev_ops ksz8_dev_ops = {
.setup = ksz8_setup,
.get_port_addr = ksz8_get_port_addr,
@@ -2498,8 +2564,7 @@ static int ksz_port_mdb_del(struct dsa_switch *ds, int port,
return dev->dev_ops->mdb_del(dev, port, mdb, db);
}
-static int ksz_enable_port(struct dsa_switch *ds, int port,
- struct phy_device *phy)
+static int ksz_port_setup(struct dsa_switch *ds, int port)
{
struct ksz_device *dev = ds->priv;
@@ -2562,6 +2627,23 @@ void ksz_port_stp_state_set(struct dsa_switch *ds, int port, u8 state)
ksz_update_port_member(dev, port);
}
+static void ksz_port_teardown(struct dsa_switch *ds, int port)
+{
+ struct ksz_device *dev = ds->priv;
+
+ switch (dev->chip_id) {
+ case KSZ8563_CHIP_ID:
+ case KSZ9477_CHIP_ID:
+ case KSZ9563_CHIP_ID:
+ case KSZ9567_CHIP_ID:
+ case KSZ9893_CHIP_ID:
+ case KSZ9896_CHIP_ID:
+ case KSZ9897_CHIP_ID:
+ if (dsa_is_user_port(ds, port))
+ ksz9477_port_acl_free(dev, port);
+ }
+}
+
static int ksz_port_pre_bridge_flags(struct dsa_switch *ds, int port,
struct switchdev_brport_flags flags,
struct netlink_ext_ack *extack)
@@ -3107,6 +3189,44 @@ static int ksz_switch_detect(struct ksz_device *dev)
return 0;
}
+static int ksz_cls_flower_add(struct dsa_switch *ds, int port,
+ struct flow_cls_offload *cls, bool ingress)
+{
+ struct ksz_device *dev = ds->priv;
+
+ switch (dev->chip_id) {
+ case KSZ8563_CHIP_ID:
+ case KSZ9477_CHIP_ID:
+ case KSZ9563_CHIP_ID:
+ case KSZ9567_CHIP_ID:
+ case KSZ9893_CHIP_ID:
+ case KSZ9896_CHIP_ID:
+ case KSZ9897_CHIP_ID:
+ return ksz9477_cls_flower_add(ds, port, cls, ingress);
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static int ksz_cls_flower_del(struct dsa_switch *ds, int port,
+ struct flow_cls_offload *cls, bool ingress)
+{
+ struct ksz_device *dev = ds->priv;
+
+ switch (dev->chip_id) {
+ case KSZ8563_CHIP_ID:
+ case KSZ9477_CHIP_ID:
+ case KSZ9563_CHIP_ID:
+ case KSZ9567_CHIP_ID:
+ case KSZ9893_CHIP_ID:
+ case KSZ9896_CHIP_ID:
+ case KSZ9897_CHIP_ID:
+ return ksz9477_cls_flower_del(ds, port, cls, ingress);
+ }
+
+ return -EOPNOTSUPP;
+}
+
/* Bandwidth is calculated by idle slope/transmission speed. Then the Bandwidth
* is converted to Hex-decimal using the successive multiplication method. On
* every step, integer part is taken and decimal part is carry forwarded.
@@ -3431,7 +3551,7 @@ static const struct dsa_switch_ops ksz_switch_ops = {
.phylink_mac_config = ksz_phylink_mac_config,
.phylink_mac_link_up = ksz_phylink_mac_link_up,
.phylink_mac_link_down = ksz_mac_link_down,
- .port_enable = ksz_enable_port,
+ .port_setup = ksz_port_setup,
.set_ageing_time = ksz_set_ageing_time,
.get_strings = ksz_get_strings,
.get_ethtool_stats = ksz_get_ethtool_stats,
@@ -3439,6 +3559,7 @@ static const struct dsa_switch_ops ksz_switch_ops = {
.port_bridge_join = ksz_port_bridge_join,
.port_bridge_leave = ksz_port_bridge_leave,
.port_stp_state_set = ksz_port_stp_state_set,
+ .port_teardown = ksz_port_teardown,
.port_pre_bridge_flags = ksz_port_pre_bridge_flags,
.port_bridge_flags = ksz_port_bridge_flags,
.port_fast_age = ksz_port_fast_age,
@@ -3461,6 +3582,8 @@ static const struct dsa_switch_ops ksz_switch_ops = {
.port_hwtstamp_set = ksz_hwtstamp_set,
.port_txtstamp = ksz_port_txtstamp,
.port_rxtstamp = ksz_port_rxtstamp,
+ .cls_flower_add = ksz_cls_flower_add,
+ .cls_flower_del = ksz_cls_flower_del,
.port_setup_tc = ksz_setup_tc,
.get_mac_eee = ksz_get_mac_eee,
.set_mac_eee = ksz_set_mac_eee,
@@ -3530,6 +3653,245 @@ static void ksz_parse_rgmii_delay(struct ksz_device *dev, int port_num,
dev->ports[port_num].rgmii_tx_val = tx_delay;
}
+/**
+ * ksz_drive_strength_to_reg() - Convert drive strength value to corresponding
+ * register value.
+ * @array: The array of drive strength values to search.
+ * @array_size: The size of the array.
+ * @microamp: The drive strength value in microamp to be converted.
+ *
+ * This function searches the array of drive strength values for the given
+ * microamp value and returns the corresponding register value for that drive.
+ *
+ * Returns: If found, the corresponding register value for that drive strength
+ * is returned. Otherwise, -EINVAL is returned indicating an invalid value.
+ */
+static int ksz_drive_strength_to_reg(const struct ksz_drive_strength *array,
+ size_t array_size, int microamp)
+{
+ int i;
+
+ for (i = 0; i < array_size; i++) {
+ if (array[i].microamp == microamp)
+ return array[i].reg_val;
+ }
+
+ return -EINVAL;
+}
+
+/**
+ * ksz_drive_strength_error() - Report invalid drive strength value
+ * @dev: ksz device
+ * @array: The array of drive strength values to search.
+ * @array_size: The size of the array.
+ * @microamp: Invalid drive strength value in microamp
+ *
+ * This function logs an error message when an unsupported drive strength value
+ * is detected. It lists out all the supported drive strength values for
+ * reference in the error message.
+ */
+static void ksz_drive_strength_error(struct ksz_device *dev,
+ const struct ksz_drive_strength *array,
+ size_t array_size, int microamp)
+{
+ char supported_values[100];
+ size_t remaining_size;
+ int added_len;
+ char *ptr;
+ int i;
+
+ remaining_size = sizeof(supported_values);
+ ptr = supported_values;
+
+ for (i = 0; i < array_size; i++) {
+ added_len = snprintf(ptr, remaining_size,
+ i == 0 ? "%d" : ", %d", array[i].microamp);
+
+ if (added_len >= remaining_size)
+ break;
+
+ ptr += added_len;
+ remaining_size -= added_len;
+ }
+
+ dev_err(dev->dev, "Invalid drive strength %d, supported values are %s\n",
+ microamp, supported_values);
+}
+
+/**
+ * ksz9477_drive_strength_write() - Set the drive strength for specific KSZ9477
+ * chip variants.
+ * @dev: ksz device
+ * @props: Array of drive strength properties to be applied
+ * @num_props: Number of properties in the array
+ *
+ * This function configures the drive strength for various KSZ9477 chip variants
+ * based on the provided properties. It handles chip-specific nuances and
+ * ensures only valid drive strengths are written to the respective chip.
+ *
+ * Return: 0 on successful configuration, a negative error code on failure.
+ */
+static int ksz9477_drive_strength_write(struct ksz_device *dev,
+ struct ksz_driver_strength_prop *props,
+ int num_props)
+{
+ size_t array_size = ARRAY_SIZE(ksz9477_drive_strengths);
+ int i, ret, reg;
+ u8 mask = 0;
+ u8 val = 0;
+
+ if (props[KSZ_DRIVER_STRENGTH_IO].value != -1)
+ dev_warn(dev->dev, "%s is not supported by this chip variant\n",
+ props[KSZ_DRIVER_STRENGTH_IO].name);
+
+ if (dev->chip_id == KSZ8795_CHIP_ID ||
+ dev->chip_id == KSZ8794_CHIP_ID ||
+ dev->chip_id == KSZ8765_CHIP_ID)
+ reg = KSZ8795_REG_SW_CTRL_20;
+ else
+ reg = KSZ9477_REG_SW_IO_STRENGTH;
+
+ for (i = 0; i < num_props; i++) {
+ if (props[i].value == -1)
+ continue;
+
+ ret = ksz_drive_strength_to_reg(ksz9477_drive_strengths,
+ array_size, props[i].value);
+ if (ret < 0) {
+ ksz_drive_strength_error(dev, ksz9477_drive_strengths,
+ array_size, props[i].value);
+ return ret;
+ }
+
+ mask |= SW_DRIVE_STRENGTH_M << props[i].offset;
+ val |= ret << props[i].offset;
+ }
+
+ return ksz_rmw8(dev, reg, mask, val);
+}
+
+/**
+ * ksz8830_drive_strength_write() - Set the drive strength configuration for
+ * KSZ8830 compatible chip variants.
+ * @dev: ksz device
+ * @props: Array of drive strength properties to be set
+ * @num_props: Number of properties in the array
+ *
+ * This function applies the specified drive strength settings to KSZ8830 chip
+ * variants (KSZ8873, KSZ8863).
+ * It ensures the configurations align with what the chip variant supports and
+ * warns or errors out on unsupported settings.
+ *
+ * Return: 0 on success, error code otherwise
+ */
+static int ksz8830_drive_strength_write(struct ksz_device *dev,
+ struct ksz_driver_strength_prop *props,
+ int num_props)
+{
+ size_t array_size = ARRAY_SIZE(ksz8830_drive_strengths);
+ int microamp;
+ int i, ret;
+
+ for (i = 0; i < num_props; i++) {
+ if (props[i].value == -1 || i == KSZ_DRIVER_STRENGTH_IO)
+ continue;
+
+ dev_warn(dev->dev, "%s is not supported by this chip variant\n",
+ props[i].name);
+ }
+
+ microamp = props[KSZ_DRIVER_STRENGTH_IO].value;
+ ret = ksz_drive_strength_to_reg(ksz8830_drive_strengths, array_size,
+ microamp);
+ if (ret < 0) {
+ ksz_drive_strength_error(dev, ksz8830_drive_strengths,
+ array_size, microamp);
+ return ret;
+ }
+
+ return ksz_rmw8(dev, KSZ8873_REG_GLOBAL_CTRL_12,
+ KSZ8873_DRIVE_STRENGTH_16MA, ret);
+}
+
+/**
+ * ksz_parse_drive_strength() - Extract and apply drive strength configurations
+ * from device tree properties.
+ * @dev: ksz device
+ *
+ * This function reads the specified drive strength properties from the
+ * device tree, validates against the supported chip variants, and sets
+ * them accordingly. An error should be critical here, as the drive strength
+ * settings are crucial for EMI compliance.
+ *
+ * Return: 0 on success, error code otherwise
+ */
+static int ksz_parse_drive_strength(struct ksz_device *dev)
+{
+ struct ksz_driver_strength_prop of_props[] = {
+ [KSZ_DRIVER_STRENGTH_HI] = {
+ .name = "microchip,hi-drive-strength-microamp",
+ .offset = SW_HI_SPEED_DRIVE_STRENGTH_S,
+ .value = -1,
+ },
+ [KSZ_DRIVER_STRENGTH_LO] = {
+ .name = "microchip,lo-drive-strength-microamp",
+ .offset = SW_LO_SPEED_DRIVE_STRENGTH_S,
+ .value = -1,
+ },
+ [KSZ_DRIVER_STRENGTH_IO] = {
+ .name = "microchip,io-drive-strength-microamp",
+ .offset = 0, /* don't care */
+ .value = -1,
+ },
+ };
+ struct device_node *np = dev->dev->of_node;
+ bool have_any_prop = false;
+ int i, ret;
+
+ for (i = 0; i < ARRAY_SIZE(of_props); i++) {
+ ret = of_property_read_u32(np, of_props[i].name,
+ &of_props[i].value);
+ if (ret && ret != -EINVAL)
+ dev_warn(dev->dev, "Failed to read %s\n",
+ of_props[i].name);
+ if (ret)
+ continue;
+
+ have_any_prop = true;
+ }
+
+ if (!have_any_prop)
+ return 0;
+
+ switch (dev->chip_id) {
+ case KSZ8830_CHIP_ID:
+ return ksz8830_drive_strength_write(dev, of_props,
+ ARRAY_SIZE(of_props));
+ case KSZ8795_CHIP_ID:
+ case KSZ8794_CHIP_ID:
+ case KSZ8765_CHIP_ID:
+ case KSZ8563_CHIP_ID:
+ case KSZ9477_CHIP_ID:
+ case KSZ9563_CHIP_ID:
+ case KSZ9567_CHIP_ID:
+ case KSZ9893_CHIP_ID:
+ case KSZ9896_CHIP_ID:
+ case KSZ9897_CHIP_ID:
+ return ksz9477_drive_strength_write(dev, of_props,
+ ARRAY_SIZE(of_props));
+ default:
+ for (i = 0; i < ARRAY_SIZE(of_props); i++) {
+ if (of_props[i].value == -1)
+ continue;
+
+ dev_warn(dev->dev, "%s is not supported by this chip variant\n",
+ of_props[i].name);
+ }
+ }
+
+ return 0;
+}
+
int ksz_switch_register(struct ksz_device *dev)
{
const struct ksz_chip_data *info;
@@ -3612,6 +3974,10 @@ int ksz_switch_register(struct ksz_device *dev)
for (port_num = 0; port_num < dev->info->port_cnt; ++port_num)
dev->ports[port_num].interface = PHY_INTERFACE_MODE_NA;
if (dev->dev->of_node) {
+ ret = ksz_parse_drive_strength(dev);
+ if (ret)
+ return ret;
+
ret = of_get_phy_mode(dev->dev->of_node, &interface);
if (ret == 0)
dev->compat_interface = interface;
diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h
index a4de58847dea..d180c8a34e27 100644
--- a/drivers/net/dsa/microchip/ksz_common.h
+++ b/drivers/net/dsa/microchip/ksz_common.h
@@ -117,6 +117,7 @@ struct ksz_port {
u32 rgmii_tx_val;
u32 rgmii_rx_val;
struct ksz_device *ksz_dev;
+ void *acl_priv;
struct ksz_irq pirq;
u8 num;
#if IS_ENABLED(CONFIG_NET_DSA_MICROCHIP_KSZ_PTP)
@@ -689,6 +690,26 @@ static inline int is_lan937x(struct ksz_device *dev)
#define KSZ8_LEGAL_PACKET_SIZE 1518
#define KSZ9477_MAX_FRAME_SIZE 9000
+#define KSZ8873_REG_GLOBAL_CTRL_12 0x0e
+/* Drive Strength of I/O Pad
+ * 0: 8mA, 1: 16mA
+ */
+#define KSZ8873_DRIVE_STRENGTH_16MA BIT(6)
+
+#define KSZ8795_REG_SW_CTRL_20 0xa3
+#define KSZ9477_REG_SW_IO_STRENGTH 0x010d
+#define SW_DRIVE_STRENGTH_M 0x7
+#define SW_DRIVE_STRENGTH_2MA 0
+#define SW_DRIVE_STRENGTH_4MA 1
+#define SW_DRIVE_STRENGTH_8MA 2
+#define SW_DRIVE_STRENGTH_12MA 3
+#define SW_DRIVE_STRENGTH_16MA 4
+#define SW_DRIVE_STRENGTH_20MA 5
+#define SW_DRIVE_STRENGTH_24MA 6
+#define SW_DRIVE_STRENGTH_28MA 7
+#define SW_HI_SPEED_DRIVE_STRENGTH_S 4
+#define SW_LO_SPEED_DRIVE_STRENGTH_S 0
+
#define KSZ9477_REG_PORT_OUT_RATE_0 0x0420
#define KSZ9477_OUT_RATE_NO_LIMIT 0
diff --git a/drivers/net/dsa/mt7530-mmio.c b/drivers/net/dsa/mt7530-mmio.c
index 0a6a2fe34e64..b74a230a3f13 100644
--- a/drivers/net/dsa/mt7530-mmio.c
+++ b/drivers/net/dsa/mt7530-mmio.c
@@ -63,15 +63,12 @@ mt7988_probe(struct platform_device *pdev)
return dsa_register_switch(priv->ds);
}
-static int
-mt7988_remove(struct platform_device *pdev)
+static void mt7988_remove(struct platform_device *pdev)
{
struct mt7530_priv *priv = platform_get_drvdata(pdev);
if (priv)
mt7530_remove_common(priv);
-
- return 0;
}
static void mt7988_shutdown(struct platform_device *pdev)
@@ -88,7 +85,7 @@ static void mt7988_shutdown(struct platform_device *pdev)
static struct platform_driver mt7988_platform_driver = {
.probe = mt7988_probe,
- .remove = mt7988_remove,
+ .remove_new = mt7988_remove,
.shutdown = mt7988_shutdown,
.driver = {
.name = "mt7530-mmio",
diff --git a/drivers/net/dsa/mv88e6xxx/pcs-639x.c b/drivers/net/dsa/mv88e6xxx/pcs-639x.c
index ba373656bfe1..9a8429f5d09c 100644
--- a/drivers/net/dsa/mv88e6xxx/pcs-639x.c
+++ b/drivers/net/dsa/mv88e6xxx/pcs-639x.c
@@ -208,7 +208,7 @@ static void mv88e639x_sgmii_pcs_pre_config(struct phylink_pcs *pcs,
static int mv88e6390_erratum_3_14(struct mv88e639x_pcs *mpcs)
{
- const int lanes[] = { MV88E6390_PORT9_LANE0, MV88E6390_PORT9_LANE1,
+ static const int lanes[] = { MV88E6390_PORT9_LANE0, MV88E6390_PORT9_LANE1,
MV88E6390_PORT9_LANE2, MV88E6390_PORT9_LANE3,
MV88E6390_PORT10_LANE0, MV88E6390_PORT10_LANE1,
MV88E6390_PORT10_LANE2, MV88E6390_PORT10_LANE3 };
diff --git a/drivers/net/dsa/ocelot/ocelot_ext.c b/drivers/net/dsa/ocelot/ocelot_ext.c
index c29bee5a5c48..22187d831c4b 100644
--- a/drivers/net/dsa/ocelot/ocelot_ext.c
+++ b/drivers/net/dsa/ocelot/ocelot_ext.c
@@ -115,19 +115,17 @@ err_free_felix:
return err;
}
-static int ocelot_ext_remove(struct platform_device *pdev)
+static void ocelot_ext_remove(struct platform_device *pdev)
{
struct felix *felix = dev_get_drvdata(&pdev->dev);
if (!felix)
- return 0;
+ return;
dsa_unregister_switch(felix->ds);
kfree(felix->ds);
kfree(felix);
-
- return 0;
}
static void ocelot_ext_shutdown(struct platform_device *pdev)
@@ -154,7 +152,7 @@ static struct platform_driver ocelot_ext_switch_driver = {
.of_match_table = ocelot_ext_switch_of_match,
},
.probe = ocelot_ext_probe,
- .remove = ocelot_ext_remove,
+ .remove_new = ocelot_ext_remove,
.shutdown = ocelot_ext_shutdown,
};
module_platform_driver(ocelot_ext_switch_driver);
diff --git a/drivers/net/dsa/ocelot/seville_vsc9953.c b/drivers/net/dsa/ocelot/seville_vsc9953.c
index 8f912bda120b..049930da0521 100644
--- a/drivers/net/dsa/ocelot/seville_vsc9953.c
+++ b/drivers/net/dsa/ocelot/seville_vsc9953.c
@@ -1029,19 +1029,17 @@ err_alloc_felix:
return err;
}
-static int seville_remove(struct platform_device *pdev)
+static void seville_remove(struct platform_device *pdev)
{
struct felix *felix = platform_get_drvdata(pdev);
if (!felix)
- return 0;
+ return;
dsa_unregister_switch(felix->ds);
kfree(felix->ds);
kfree(felix);
-
- return 0;
}
static void seville_shutdown(struct platform_device *pdev)
@@ -1064,7 +1062,7 @@ MODULE_DEVICE_TABLE(of, seville_of_match);
static struct platform_driver seville_vsc9953_driver = {
.probe = seville_probe,
- .remove = seville_remove,
+ .remove_new = seville_remove,
.shutdown = seville_shutdown,
.driver = {
.name = "mscc_seville",
diff --git a/drivers/net/dsa/realtek/realtek-smi.c b/drivers/net/dsa/realtek/realtek-smi.c
index ff13563059c5..bfd11591faf4 100644
--- a/drivers/net/dsa/realtek/realtek-smi.c
+++ b/drivers/net/dsa/realtek/realtek-smi.c
@@ -506,12 +506,12 @@ static int realtek_smi_probe(struct platform_device *pdev)
return 0;
}
-static int realtek_smi_remove(struct platform_device *pdev)
+static void realtek_smi_remove(struct platform_device *pdev)
{
struct realtek_priv *priv = platform_get_drvdata(pdev);
if (!priv)
- return 0;
+ return;
dsa_unregister_switch(priv->ds);
if (priv->slave_mii_bus)
@@ -520,8 +520,6 @@ static int realtek_smi_remove(struct platform_device *pdev)
/* leave the device reset asserted */
if (priv->reset)
gpiod_set_value(priv->reset, 1);
-
- return 0;
}
static void realtek_smi_shutdown(struct platform_device *pdev)
@@ -559,7 +557,7 @@ static struct platform_driver realtek_smi_driver = {
.of_match_table = realtek_smi_of_match,
},
.probe = realtek_smi_probe,
- .remove = realtek_smi_remove,
+ .remove_new = realtek_smi_remove,
.shutdown = realtek_smi_shutdown,
};
module_platform_driver(realtek_smi_driver);
diff --git a/drivers/net/dsa/realtek/rtl8366rb.c b/drivers/net/dsa/realtek/rtl8366rb.c
index 7868ef237f6c..b39b719a5b8f 100644
--- a/drivers/net/dsa/realtek/rtl8366rb.c
+++ b/drivers/net/dsa/realtek/rtl8366rb.c
@@ -95,12 +95,6 @@
#define RTL8366RB_PAACR_RX_PAUSE BIT(6)
#define RTL8366RB_PAACR_AN BIT(7)
-#define RTL8366RB_PAACR_CPU_PORT (RTL8366RB_PAACR_SPEED_1000M | \
- RTL8366RB_PAACR_FULL_DUPLEX | \
- RTL8366RB_PAACR_LINK_UP | \
- RTL8366RB_PAACR_TX_PAUSE | \
- RTL8366RB_PAACR_RX_PAUSE)
-
/* bits 0..7 = port 0, bits 8..15 = port 1 */
#define RTL8366RB_PSTAT0 0x0014
/* bits 0..7 = port 2, bits 8..15 = port 3 */
@@ -1081,29 +1075,61 @@ rtl8366rb_mac_link_up(struct dsa_switch *ds, int port, unsigned int mode,
int speed, int duplex, bool tx_pause, bool rx_pause)
{
struct realtek_priv *priv = ds->priv;
+ unsigned int val;
int ret;
+ /* Allow forcing the mode on the fixed CPU port, no autonegotiation.
+ * We assume autonegotiation works on the PHY-facing ports.
+ */
if (port != priv->cpu_port)
return;
dev_dbg(priv->dev, "MAC link up on CPU port (%d)\n", port);
- /* Force the fixed CPU port into 1Gbit mode, no autonegotiation */
ret = regmap_update_bits(priv->map, RTL8366RB_MAC_FORCE_CTRL_REG,
BIT(port), BIT(port));
if (ret) {
- dev_err(priv->dev, "failed to force 1Gbit on CPU port\n");
+ dev_err(priv->dev, "failed to force CPU port\n");
return;
}
+ /* Conjure port config */
+ switch (speed) {
+ case SPEED_10:
+ val = RTL8366RB_PAACR_SPEED_10M;
+ break;
+ case SPEED_100:
+ val = RTL8366RB_PAACR_SPEED_100M;
+ break;
+ case SPEED_1000:
+ val = RTL8366RB_PAACR_SPEED_1000M;
+ break;
+ default:
+ val = RTL8366RB_PAACR_SPEED_1000M;
+ break;
+ }
+
+ if (duplex == DUPLEX_FULL)
+ val |= RTL8366RB_PAACR_FULL_DUPLEX;
+
+ if (tx_pause)
+ val |= RTL8366RB_PAACR_TX_PAUSE;
+
+ if (rx_pause)
+ val |= RTL8366RB_PAACR_RX_PAUSE;
+
+ val |= RTL8366RB_PAACR_LINK_UP;
+
ret = regmap_update_bits(priv->map, RTL8366RB_PAACR2,
0xFF00U,
- RTL8366RB_PAACR_CPU_PORT << 8);
+ val << 8);
if (ret) {
dev_err(priv->dev, "failed to set PAACR on CPU port\n");
return;
}
+ dev_dbg(priv->dev, "set PAACR to %04x\n", val);
+
/* Enable the CPU port */
ret = regmap_update_bits(priv->map, RTL8366RB_PECR, BIT(port),
0);
diff --git a/drivers/net/dsa/rzn1_a5psw.c b/drivers/net/dsa/rzn1_a5psw.c
index 2eda10b33f2e..10092ea85e46 100644
--- a/drivers/net/dsa/rzn1_a5psw.c
+++ b/drivers/net/dsa/rzn1_a5psw.c
@@ -1272,19 +1272,17 @@ free_pcs:
return ret;
}
-static int a5psw_remove(struct platform_device *pdev)
+static void a5psw_remove(struct platform_device *pdev)
{
struct a5psw *a5psw = platform_get_drvdata(pdev);
if (!a5psw)
- return 0;
+ return;
dsa_unregister_switch(&a5psw->ds);
a5psw_pcs_free(a5psw);
clk_disable_unprepare(a5psw->hclk);
clk_disable_unprepare(a5psw->clk);
-
- return 0;
}
static void a5psw_shutdown(struct platform_device *pdev)
@@ -1311,7 +1309,7 @@ static struct platform_driver a5psw_driver = {
.of_match_table = a5psw_of_mtable,
},
.probe = a5psw_probe,
- .remove = a5psw_remove,
+ .remove_new = a5psw_remove,
.shutdown = a5psw_shutdown,
};
module_platform_driver(a5psw_driver);
diff --git a/drivers/net/dsa/sja1105/sja1105_clocking.c b/drivers/net/dsa/sja1105/sja1105_clocking.c
index e3699f76f6d7..08a3e7b96254 100644
--- a/drivers/net/dsa/sja1105/sja1105_clocking.c
+++ b/drivers/net/dsa/sja1105/sja1105_clocking.c
@@ -153,14 +153,14 @@ static int sja1105_cgu_mii_tx_clk_config(struct sja1105_private *priv,
{
const struct sja1105_regs *regs = priv->info->regs;
struct sja1105_cgu_mii_ctrl mii_tx_clk;
- const int mac_clk_sources[] = {
+ static const int mac_clk_sources[] = {
CLKSRC_MII0_TX_CLK,
CLKSRC_MII1_TX_CLK,
CLKSRC_MII2_TX_CLK,
CLKSRC_MII3_TX_CLK,
CLKSRC_MII4_TX_CLK,
};
- const int phy_clk_sources[] = {
+ static const int phy_clk_sources[] = {
CLKSRC_IDIV0,
CLKSRC_IDIV1,
CLKSRC_IDIV2,
@@ -194,7 +194,7 @@ sja1105_cgu_mii_rx_clk_config(struct sja1105_private *priv, int port)
const struct sja1105_regs *regs = priv->info->regs;
struct sja1105_cgu_mii_ctrl mii_rx_clk;
u8 packed_buf[SJA1105_SIZE_CGU_CMD] = {0};
- const int clk_sources[] = {
+ static const int clk_sources[] = {
CLKSRC_MII0_RX_CLK,
CLKSRC_MII1_RX_CLK,
CLKSRC_MII2_RX_CLK,
@@ -221,7 +221,7 @@ sja1105_cgu_mii_ext_tx_clk_config(struct sja1105_private *priv, int port)
const struct sja1105_regs *regs = priv->info->regs;
struct sja1105_cgu_mii_ctrl mii_ext_tx_clk;
u8 packed_buf[SJA1105_SIZE_CGU_CMD] = {0};
- const int clk_sources[] = {
+ static const int clk_sources[] = {
CLKSRC_IDIV0,
CLKSRC_IDIV1,
CLKSRC_IDIV2,
@@ -248,7 +248,7 @@ sja1105_cgu_mii_ext_rx_clk_config(struct sja1105_private *priv, int port)
const struct sja1105_regs *regs = priv->info->regs;
struct sja1105_cgu_mii_ctrl mii_ext_rx_clk;
u8 packed_buf[SJA1105_SIZE_CGU_CMD] = {0};
- const int clk_sources[] = {
+ static const int clk_sources[] = {
CLKSRC_IDIV0,
CLKSRC_IDIV1,
CLKSRC_IDIV2,
@@ -349,8 +349,13 @@ static int sja1105_cgu_rgmii_tx_clk_config(struct sja1105_private *priv,
if (speed == priv->info->port_speed[SJA1105_SPEED_1000MBPS]) {
clksrc = CLKSRC_PLL0;
} else {
- int clk_sources[] = {CLKSRC_IDIV0, CLKSRC_IDIV1, CLKSRC_IDIV2,
- CLKSRC_IDIV3, CLKSRC_IDIV4};
+ static const int clk_sources[] = {
+ CLKSRC_IDIV0,
+ CLKSRC_IDIV1,
+ CLKSRC_IDIV2,
+ CLKSRC_IDIV3,
+ CLKSRC_IDIV4,
+ };
clksrc = clk_sources[port];
}
@@ -638,7 +643,7 @@ static int sja1105_cgu_rmii_ref_clk_config(struct sja1105_private *priv,
const struct sja1105_regs *regs = priv->info->regs;
struct sja1105_cgu_mii_ctrl ref_clk;
u8 packed_buf[SJA1105_SIZE_CGU_CMD] = {0};
- const int clk_sources[] = {
+ static const int clk_sources[] = {
CLKSRC_MII0_TX_CLK,
CLKSRC_MII1_TX_CLK,
CLKSRC_MII2_TX_CLK,
diff --git a/drivers/net/dsa/vitesse-vsc73xx-platform.c b/drivers/net/dsa/vitesse-vsc73xx-platform.c
index bd4206e8f9af..755b7895a15a 100644
--- a/drivers/net/dsa/vitesse-vsc73xx-platform.c
+++ b/drivers/net/dsa/vitesse-vsc73xx-platform.c
@@ -112,16 +112,14 @@ static int vsc73xx_platform_probe(struct platform_device *pdev)
return vsc73xx_probe(&vsc_platform->vsc);
}
-static int vsc73xx_platform_remove(struct platform_device *pdev)
+static void vsc73xx_platform_remove(struct platform_device *pdev)
{
struct vsc73xx_platform *vsc_platform = platform_get_drvdata(pdev);
if (!vsc_platform)
- return 0;
+ return;
vsc73xx_remove(&vsc_platform->vsc);
-
- return 0;
}
static void vsc73xx_platform_shutdown(struct platform_device *pdev)
@@ -160,7 +158,7 @@ MODULE_DEVICE_TABLE(of, vsc73xx_of_match);
static struct platform_driver vsc73xx_platform_driver = {
.probe = vsc73xx_platform_probe,
- .remove = vsc73xx_platform_remove,
+ .remove_new = vsc73xx_platform_remove,
.shutdown = vsc73xx_platform_shutdown,
.driver = {
.name = "vsc73xx-platform",
diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c
index af603256b724..2874680ef24d 100644
--- a/drivers/net/ethernet/8390/ax88796.c
+++ b/drivers/net/ethernet/8390/ax88796.c
@@ -811,7 +811,7 @@ static int ax_init_dev(struct net_device *dev)
return ret;
}
-static int ax_remove(struct platform_device *pdev)
+static void ax_remove(struct platform_device *pdev)
{
struct net_device *dev = platform_get_drvdata(pdev);
struct ei_device *ei_local = netdev_priv(dev);
@@ -832,8 +832,6 @@ static int ax_remove(struct platform_device *pdev)
platform_set_drvdata(pdev, NULL);
free_netdev(dev);
-
- return 0;
}
/*
@@ -1011,7 +1009,7 @@ static struct platform_driver axdrv = {
.name = "ax88796",
},
.probe = ax_probe,
- .remove = ax_remove,
+ .remove_new = ax_remove,
.suspend = ax_suspend,
.resume = ax_resume,
};
diff --git a/drivers/net/ethernet/8390/mcf8390.c b/drivers/net/ethernet/8390/mcf8390.c
index 217838b28220..5a0fa995e643 100644
--- a/drivers/net/ethernet/8390/mcf8390.c
+++ b/drivers/net/ethernet/8390/mcf8390.c
@@ -441,7 +441,7 @@ static int mcf8390_probe(struct platform_device *pdev)
return 0;
}
-static int mcf8390_remove(struct platform_device *pdev)
+static void mcf8390_remove(struct platform_device *pdev)
{
struct net_device *dev = platform_get_drvdata(pdev);
struct resource *mem;
@@ -450,7 +450,6 @@ static int mcf8390_remove(struct platform_device *pdev)
mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
release_mem_region(mem->start, resource_size(mem));
free_netdev(dev);
- return 0;
}
static struct platform_driver mcf8390_drv = {
@@ -458,7 +457,7 @@ static struct platform_driver mcf8390_drv = {
.name = "mcf8390",
},
.probe = mcf8390_probe,
- .remove = mcf8390_remove,
+ .remove_new = mcf8390_remove,
};
module_platform_driver(mcf8390_drv);
diff --git a/drivers/net/ethernet/8390/ne.c b/drivers/net/ethernet/8390/ne.c
index 7d89ec1cf273..350683a09d2e 100644
--- a/drivers/net/ethernet/8390/ne.c
+++ b/drivers/net/ethernet/8390/ne.c
@@ -823,7 +823,7 @@ static int __init ne_drv_probe(struct platform_device *pdev)
return 0;
}
-static int ne_drv_remove(struct platform_device *pdev)
+static void ne_drv_remove(struct platform_device *pdev)
{
struct net_device *dev = platform_get_drvdata(pdev);
@@ -842,7 +842,6 @@ static int ne_drv_remove(struct platform_device *pdev)
release_region(dev->base_addr, NE_IO_EXTENT);
free_netdev(dev);
}
- return 0;
}
/* Remove unused devices or all if true. */
@@ -895,7 +894,7 @@ static int ne_drv_resume(struct platform_device *pdev)
#endif
static struct platform_driver ne_driver = {
- .remove = ne_drv_remove,
+ .remove_new = ne_drv_remove,
.suspend = ne_drv_suspend,
.resume = ne_drv_resume,
.driver = {
diff --git a/drivers/net/ethernet/actions/owl-emac.c b/drivers/net/ethernet/actions/owl-emac.c
index c6f8f852bff1..e03193da5874 100644
--- a/drivers/net/ethernet/actions/owl-emac.c
+++ b/drivers/net/ethernet/actions/owl-emac.c
@@ -1582,15 +1582,13 @@ static int owl_emac_probe(struct platform_device *pdev)
return 0;
}
-static int owl_emac_remove(struct platform_device *pdev)
+static void owl_emac_remove(struct platform_device *pdev)
{
struct owl_emac_priv *priv = platform_get_drvdata(pdev);
netif_napi_del(&priv->napi);
phy_disconnect(priv->netdev->phydev);
cancel_work_sync(&priv->mac_reset_task);
-
- return 0;
}
static const struct of_device_id owl_emac_of_match[] = {
@@ -1609,7 +1607,7 @@ static struct platform_driver owl_emac_driver = {
.pm = &owl_emac_pm_ops,
},
.probe = owl_emac_probe,
- .remove = owl_emac_remove,
+ .remove_new = owl_emac_remove,
};
module_platform_driver(owl_emac_driver);
diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c
index 597a02c75d52..27af7746d645 100644
--- a/drivers/net/ethernet/aeroflex/greth.c
+++ b/drivers/net/ethernet/aeroflex/greth.c
@@ -1525,7 +1525,7 @@ error1:
return err;
}
-static int greth_of_remove(struct platform_device *of_dev)
+static void greth_of_remove(struct platform_device *of_dev)
{
struct net_device *ndev = platform_get_drvdata(of_dev);
struct greth_private *greth = netdev_priv(ndev);
@@ -1544,8 +1544,6 @@ static int greth_of_remove(struct platform_device *of_dev)
of_iounmap(&of_dev->resource[0], greth->regs, resource_size(&of_dev->resource[0]));
free_netdev(ndev);
-
- return 0;
}
static const struct of_device_id greth_of_match[] = {
@@ -1566,7 +1564,7 @@ static struct platform_driver greth_of_driver = {
.of_match_table = greth_of_match,
},
.probe = greth_of_probe,
- .remove = greth_of_remove,
+ .remove_new = greth_of_remove,
};
module_platform_driver(greth_of_driver);
diff --git a/drivers/net/ethernet/allwinner/sun4i-emac.c b/drivers/net/ethernet/allwinner/sun4i-emac.c
index a94c62956eed..d761c08fe5c1 100644
--- a/drivers/net/ethernet/allwinner/sun4i-emac.c
+++ b/drivers/net/ethernet/allwinner/sun4i-emac.c
@@ -1083,7 +1083,7 @@ out:
return ret;
}
-static int emac_remove(struct platform_device *pdev)
+static void emac_remove(struct platform_device *pdev)
{
struct net_device *ndev = platform_get_drvdata(pdev);
struct emac_board_info *db = netdev_priv(ndev);
@@ -1101,7 +1101,6 @@ static int emac_remove(struct platform_device *pdev)
free_netdev(ndev);
dev_dbg(&pdev->dev, "released and freed device\n");
- return 0;
}
static int emac_suspend(struct platform_device *dev, pm_message_t state)
@@ -1143,7 +1142,7 @@ static struct platform_driver emac_driver = {
.of_match_table = emac_of_match,
},
.probe = emac_probe,
- .remove = emac_remove,
+ .remove_new = emac_remove,
.suspend = emac_suspend,
.resume = emac_resume,
};
diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c
index 2e15800e5310..1b1799985d1d 100644
--- a/drivers/net/ethernet/altera/altera_tse_main.c
+++ b/drivers/net/ethernet/altera/altera_tse_main.c
@@ -1464,7 +1464,7 @@ err_free_netdev:
/* Remove Altera TSE MAC device
*/
-static int altera_tse_remove(struct platform_device *pdev)
+static void altera_tse_remove(struct platform_device *pdev)
{
struct net_device *ndev = platform_get_drvdata(pdev);
struct altera_tse_private *priv = netdev_priv(ndev);
@@ -1476,8 +1476,6 @@ static int altera_tse_remove(struct platform_device *pdev)
lynx_pcs_destroy(priv->pcs);
free_netdev(ndev);
-
- return 0;
}
static const struct altera_dmaops altera_dtype_sgdma = {
@@ -1528,7 +1526,7 @@ MODULE_DEVICE_TABLE(of, altera_tse_ids);
static struct platform_driver altera_tse_driver = {
.probe = altera_tse_probe,
- .remove = altera_tse_remove,
+ .remove_new = altera_tse_remove,
.suspend = NULL,
.resume = NULL,
.driver = {
diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c
index c5cec4e79489..85c978149bf6 100644
--- a/drivers/net/ethernet/amd/au1000_eth.c
+++ b/drivers/net/ethernet/amd/au1000_eth.c
@@ -1323,7 +1323,7 @@ out:
return err;
}
-static int au1000_remove(struct platform_device *pdev)
+static void au1000_remove(struct platform_device *pdev)
{
struct net_device *dev = platform_get_drvdata(pdev);
struct au1000_private *aup = netdev_priv(dev);
@@ -1359,13 +1359,11 @@ static int au1000_remove(struct platform_device *pdev)
release_mem_region(macen->start, resource_size(macen));
free_netdev(dev);
-
- return 0;
}
static struct platform_driver au1000_eth_driver = {
.probe = au1000_probe,
- .remove = au1000_remove,
+ .remove_new = au1000_remove,
.driver = {
.name = "au1000-eth",
},
diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c
index 36f9b932b9e2..2a8643e167e1 100644
--- a/drivers/net/ethernet/amd/pds_core/core.c
+++ b/drivers/net/ethernet/amd/pds_core/core.c
@@ -445,12 +445,13 @@ int pdsc_setup(struct pdsc *pdsc, bool init)
goto err_out_teardown;
/* Set up the VIFs */
- err = pdsc_viftypes_init(pdsc);
- if (err)
- goto err_out_teardown;
+ if (init) {
+ err = pdsc_viftypes_init(pdsc);
+ if (err)
+ goto err_out_teardown;
- if (init)
pdsc_debugfs_add_viftype(pdsc);
+ }
clear_bit(PDSC_S_FW_DEAD, &pdsc->state);
return 0;
@@ -469,8 +470,10 @@ void pdsc_teardown(struct pdsc *pdsc, bool removing)
pdsc_qcq_free(pdsc, &pdsc->notifyqcq);
pdsc_qcq_free(pdsc, &pdsc->adminqcq);
- kfree(pdsc->viftype_status);
- pdsc->viftype_status = NULL;
+ if (removing) {
+ kfree(pdsc->viftype_status);
+ pdsc->viftype_status = NULL;
+ }
if (pdsc->intr_info) {
for (i = 0; i < pdsc->nintrs; i++)
@@ -512,7 +515,7 @@ void pdsc_stop(struct pdsc *pdsc)
PDS_CORE_INTR_MASK_SET);
}
-static void pdsc_fw_down(struct pdsc *pdsc)
+void pdsc_fw_down(struct pdsc *pdsc)
{
union pds_core_notifyq_comp reset_event = {
.reset.ecode = cpu_to_le16(PDS_EVENT_RESET),
@@ -520,10 +523,13 @@ static void pdsc_fw_down(struct pdsc *pdsc)
};
if (test_and_set_bit(PDSC_S_FW_DEAD, &pdsc->state)) {
- dev_err(pdsc->dev, "%s: already happening\n", __func__);
+ dev_warn(pdsc->dev, "%s: already happening\n", __func__);
return;
}
+ if (pdsc->pdev->is_virtfn)
+ return;
+
/* Notify clients of fw_down */
if (pdsc->fw_reporter)
devlink_health_report(pdsc->fw_reporter, "FW down reported", pdsc);
@@ -533,7 +539,7 @@ static void pdsc_fw_down(struct pdsc *pdsc)
pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY);
}
-static void pdsc_fw_up(struct pdsc *pdsc)
+void pdsc_fw_up(struct pdsc *pdsc)
{
union pds_core_notifyq_comp reset_event = {
.reset.ecode = cpu_to_le16(PDS_EVENT_RESET),
@@ -546,6 +552,11 @@ static void pdsc_fw_up(struct pdsc *pdsc)
return;
}
+ if (pdsc->pdev->is_virtfn) {
+ clear_bit(PDSC_S_FW_DEAD, &pdsc->state);
+ return;
+ }
+
err = pdsc_setup(pdsc, PDSC_SETUP_RECOVERY);
if (err)
goto err_out;
@@ -567,6 +578,18 @@ err_out:
pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY);
}
+static void pdsc_check_pci_health(struct pdsc *pdsc)
+{
+ u8 fw_status = ioread8(&pdsc->info_regs->fw_status);
+
+ /* is PCI broken? */
+ if (fw_status != PDS_RC_BAD_PCI)
+ return;
+
+ pdsc_reset_prepare(pdsc->pdev);
+ pdsc_reset_done(pdsc->pdev);
+}
+
void pdsc_health_thread(struct work_struct *work)
{
struct pdsc *pdsc = container_of(work, struct pdsc, health_work);
@@ -593,6 +616,8 @@ void pdsc_health_thread(struct work_struct *work)
pdsc_fw_down(pdsc);
}
+ pdsc_check_pci_health(pdsc);
+
pdsc->fw_generation = pdsc->fw_status & PDS_CORE_FW_STS_F_GENERATION;
out_unlock:
diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h
index e545fafc4819..f3a7deda9972 100644
--- a/drivers/net/ethernet/amd/pds_core/core.h
+++ b/drivers/net/ethernet/amd/pds_core/core.h
@@ -283,6 +283,9 @@ int pdsc_devcmd_reset(struct pdsc *pdsc);
int pdsc_dev_reinit(struct pdsc *pdsc);
int pdsc_dev_init(struct pdsc *pdsc);
+void pdsc_reset_prepare(struct pci_dev *pdev);
+void pdsc_reset_done(struct pci_dev *pdev);
+
int pdsc_intr_alloc(struct pdsc *pdsc, char *name,
irq_handler_t handler, void *data);
void pdsc_intr_free(struct pdsc *pdsc, int index);
@@ -309,4 +312,8 @@ irqreturn_t pdsc_adminq_isr(int irq, void *data);
int pdsc_firmware_update(struct pdsc *pdsc, const struct firmware *fw,
struct netlink_ext_ack *extack);
+
+void pdsc_fw_down(struct pdsc *pdsc);
+void pdsc_fw_up(struct pdsc *pdsc);
+
#endif /* _PDSC_H_ */
diff --git a/drivers/net/ethernet/amd/pds_core/dev.c b/drivers/net/ethernet/amd/pds_core/dev.c
index f77cd9f5a2fd..7c1b965d61a9 100644
--- a/drivers/net/ethernet/amd/pds_core/dev.c
+++ b/drivers/net/ethernet/amd/pds_core/dev.c
@@ -42,6 +42,8 @@ int pdsc_err_to_errno(enum pds_core_status_code code)
return -ERANGE;
case PDS_RC_BAD_ADDR:
return -EFAULT;
+ case PDS_RC_BAD_PCI:
+ return -ENXIO;
case PDS_RC_EOPCODE:
case PDS_RC_EINTR:
case PDS_RC_DEV_CMD:
@@ -62,7 +64,7 @@ bool pdsc_is_fw_running(struct pdsc *pdsc)
/* Firmware is useful only if the running bit is set and
* fw_status != 0xff (bad PCI read)
*/
- return (pdsc->fw_status != 0xff) &&
+ return (pdsc->fw_status != PDS_RC_BAD_PCI) &&
(pdsc->fw_status & PDS_CORE_FW_STS_F_RUNNING);
}
@@ -128,6 +130,7 @@ static int pdsc_devcmd_wait(struct pdsc *pdsc, u8 opcode, int max_seconds)
unsigned long max_wait;
unsigned long duration;
int timeout = 0;
+ bool running;
int done = 0;
int err = 0;
int status;
@@ -136,6 +139,10 @@ static int pdsc_devcmd_wait(struct pdsc *pdsc, u8 opcode, int max_seconds)
max_wait = start_time + (max_seconds * HZ);
while (!done && !timeout) {
+ running = pdsc_is_fw_running(pdsc);
+ if (!running)
+ break;
+
done = pdsc_devcmd_done(pdsc);
if (done)
break;
@@ -152,7 +159,7 @@ static int pdsc_devcmd_wait(struct pdsc *pdsc, u8 opcode, int max_seconds)
dev_dbg(dev, "DEVCMD %d %s after %ld secs\n",
opcode, pdsc_devcmd_str(opcode), duration / HZ);
- if (!done || timeout) {
+ if ((!done || timeout) && running) {
dev_err(dev, "DEVCMD %d %s timeout, done %d timeout %d max_seconds=%d\n",
opcode, pdsc_devcmd_str(opcode), done, timeout,
max_seconds);
diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c
index 3a45bf474a19..3080898d7b95 100644
--- a/drivers/net/ethernet/amd/pds_core/main.c
+++ b/drivers/net/ethernet/amd/pds_core/main.c
@@ -445,12 +445,62 @@ static void pdsc_remove(struct pci_dev *pdev)
devlink_free(dl);
}
+void pdsc_reset_prepare(struct pci_dev *pdev)
+{
+ struct pdsc *pdsc = pci_get_drvdata(pdev);
+
+ pdsc_fw_down(pdsc);
+
+ pci_free_irq_vectors(pdev);
+ pdsc_unmap_bars(pdsc);
+ pci_release_regions(pdev);
+ pci_disable_device(pdev);
+}
+
+void pdsc_reset_done(struct pci_dev *pdev)
+{
+ struct pdsc *pdsc = pci_get_drvdata(pdev);
+ struct device *dev = pdsc->dev;
+ int err;
+
+ err = pci_enable_device(pdev);
+ if (err) {
+ dev_err(dev, "Cannot enable PCI device: %pe\n", ERR_PTR(err));
+ return;
+ }
+ pci_set_master(pdev);
+
+ if (!pdev->is_virtfn) {
+ pcie_print_link_status(pdsc->pdev);
+
+ err = pci_request_regions(pdsc->pdev, PDS_CORE_DRV_NAME);
+ if (err) {
+ dev_err(pdsc->dev, "Cannot request PCI regions: %pe\n",
+ ERR_PTR(err));
+ return;
+ }
+
+ err = pdsc_map_bars(pdsc);
+ if (err)
+ return;
+ }
+
+ pdsc_fw_up(pdsc);
+}
+
+static const struct pci_error_handlers pdsc_err_handler = {
+ /* FLR handling */
+ .reset_prepare = pdsc_reset_prepare,
+ .reset_done = pdsc_reset_done,
+};
+
static struct pci_driver pdsc_driver = {
.name = PDS_CORE_DRV_NAME,
.id_table = pdsc_id_table,
.probe = pdsc_probe,
.remove = pdsc_remove,
.sriov_configure = pdsc_sriov_configure,
+ .err_handler = &pdsc_err_handler,
};
void *pdsc_get_pf_struct(struct pci_dev *vf_pdev)
diff --git a/drivers/net/ethernet/amd/sunlance.c b/drivers/net/ethernet/amd/sunlance.c
index 33bb539ad70a..c78706d21a6a 100644
--- a/drivers/net/ethernet/amd/sunlance.c
+++ b/drivers/net/ethernet/amd/sunlance.c
@@ -1487,7 +1487,7 @@ static int sunlance_sbus_probe(struct platform_device *op)
return err;
}
-static int sunlance_sbus_remove(struct platform_device *op)
+static void sunlance_sbus_remove(struct platform_device *op)
{
struct lance_private *lp = platform_get_drvdata(op);
struct net_device *net_dev = lp->dev;
@@ -1497,8 +1497,6 @@ static int sunlance_sbus_remove(struct platform_device *op)
lance_free_hwresources(lp);
free_netdev(net_dev);
-
- return 0;
}
static const struct of_device_id sunlance_sbus_match[] = {
@@ -1516,7 +1514,7 @@ static struct platform_driver sunlance_sbus_driver = {
.of_match_table = sunlance_sbus_match,
},
.probe = sunlance_sbus_probe,
- .remove = sunlance_sbus_remove,
+ .remove_new = sunlance_sbus_remove,
};
module_platform_driver(sunlance_sbus_driver);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c
index 4d790a89fe77..91842a5e161b 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c
@@ -512,7 +512,7 @@ err_alloc:
return ret;
}
-static int xgbe_platform_remove(struct platform_device *pdev)
+static void xgbe_platform_remove(struct platform_device *pdev)
{
struct xgbe_prv_data *pdata = platform_get_drvdata(pdev);
@@ -521,8 +521,6 @@ static int xgbe_platform_remove(struct platform_device *pdev)
platform_device_put(pdata->phy_platdev);
xgbe_free_pdata(pdata);
-
- return 0;
}
#ifdef CONFIG_PM_SLEEP
@@ -615,7 +613,7 @@ static struct platform_driver xgbe_driver = {
.pm = &xgbe_platform_pm_ops,
},
.probe = xgbe_platform_probe,
- .remove = xgbe_platform_remove,
+ .remove_new = xgbe_platform_remove,
};
int xgbe_platform_init(void)
diff --git a/drivers/net/ethernet/apm/xgene-v2/main.c b/drivers/net/ethernet/apm/xgene-v2/main.c
index 379d19d18dbe..9e90c2381491 100644
--- a/drivers/net/ethernet/apm/xgene-v2/main.c
+++ b/drivers/net/ethernet/apm/xgene-v2/main.c
@@ -690,7 +690,7 @@ err:
return ret;
}
-static int xge_remove(struct platform_device *pdev)
+static void xge_remove(struct platform_device *pdev)
{
struct xge_pdata *pdata;
struct net_device *ndev;
@@ -706,8 +706,6 @@ static int xge_remove(struct platform_device *pdev)
xge_mdio_remove(ndev);
unregister_netdev(ndev);
free_netdev(ndev);
-
- return 0;
}
static void xge_shutdown(struct platform_device *pdev)
@@ -736,7 +734,7 @@ static struct platform_driver xge_driver = {
.acpi_match_table = ACPI_PTR(xge_acpi_match),
},
.probe = xge_probe,
- .remove = xge_remove,
+ .remove_new = xge_remove,
.shutdown = xge_shutdown,
};
module_platform_driver(xge_driver);
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
index f3305c434c95..91b4ab31f233 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
@@ -2127,7 +2127,7 @@ err:
return ret;
}
-static int xgene_enet_remove(struct platform_device *pdev)
+static void xgene_enet_remove(struct platform_device *pdev)
{
struct xgene_enet_pdata *pdata;
struct net_device *ndev;
@@ -2149,8 +2149,6 @@ static int xgene_enet_remove(struct platform_device *pdev)
xgene_enet_delete_desc_rings(pdata);
pdata->port_ops->shutdown(pdata);
free_netdev(ndev);
-
- return 0;
}
static void xgene_enet_shutdown(struct platform_device *pdev)
@@ -2174,7 +2172,7 @@ static struct platform_driver xgene_enet_driver = {
.acpi_match_table = ACPI_PTR(xgene_enet_acpi_match),
},
.probe = xgene_enet_probe,
- .remove = xgene_enet_remove,
+ .remove_new = xgene_enet_remove,
.shutdown = xgene_enet_shutdown,
};
diff --git a/drivers/net/ethernet/apple/macmace.c b/drivers/net/ethernet/apple/macmace.c
index 8775c3234e91..766ab78256fe 100644
--- a/drivers/net/ethernet/apple/macmace.c
+++ b/drivers/net/ethernet/apple/macmace.c
@@ -739,7 +739,7 @@ MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Macintosh MACE ethernet driver");
MODULE_ALIAS("platform:macmace");
-static int mac_mace_device_remove(struct platform_device *pdev)
+static void mac_mace_device_remove(struct platform_device *pdev)
{
struct net_device *dev = platform_get_drvdata(pdev);
struct mace_data *mp = netdev_priv(dev);
@@ -755,13 +755,11 @@ static int mac_mace_device_remove(struct platform_device *pdev)
mp->tx_ring, mp->tx_ring_phys);
free_netdev(dev);
-
- return 0;
}
static struct platform_driver mac_mace_driver = {
.probe = mace_probe,
- .remove = mac_mace_device_remove,
+ .remove_new = mac_mace_device_remove,
.driver = {
.name = mac_mace_string,
},
diff --git a/drivers/net/ethernet/arc/emac_arc.c b/drivers/net/ethernet/arc/emac_arc.c
index ce3147e886a1..a3afddb23ee8 100644
--- a/drivers/net/ethernet/arc/emac_arc.c
+++ b/drivers/net/ethernet/arc/emac_arc.c
@@ -58,14 +58,12 @@ out_netdev:
return err;
}
-static int emac_arc_remove(struct platform_device *pdev)
+static void emac_arc_remove(struct platform_device *pdev)
{
struct net_device *ndev = platform_get_drvdata(pdev);
arc_emac_remove(ndev);
free_netdev(ndev);
-
- return 0;
}
static const struct of_device_id emac_arc_dt_ids[] = {
@@ -76,7 +74,7 @@ MODULE_DEVICE_TABLE(of, emac_arc_dt_ids);
static struct platform_driver emac_arc_driver = {
.probe = emac_arc_probe,
- .remove = emac_arc_remove,
+ .remove_new = emac_arc_remove,
.driver = {
.name = DRV_NAME,
.of_match_table = emac_arc_dt_ids,
diff --git a/drivers/net/ethernet/arc/emac_rockchip.c b/drivers/net/ethernet/arc/emac_rockchip.c
index 509101112279..493d6356c8ca 100644
--- a/drivers/net/ethernet/arc/emac_rockchip.c
+++ b/drivers/net/ethernet/arc/emac_rockchip.c
@@ -244,7 +244,7 @@ out_netdev:
return err;
}
-static int emac_rockchip_remove(struct platform_device *pdev)
+static void emac_rockchip_remove(struct platform_device *pdev)
{
struct net_device *ndev = platform_get_drvdata(pdev);
struct rockchip_priv_data *priv = netdev_priv(ndev);
@@ -260,12 +260,11 @@ static int emac_rockchip_remove(struct platform_device *pdev)
clk_disable_unprepare(priv->macclk);
free_netdev(ndev);
- return 0;
}
static struct platform_driver emac_rockchip_driver = {
.probe = emac_rockchip_probe,
- .remove = emac_rockchip_remove,
+ .remove_new = emac_rockchip_remove,
.driver = {
.name = DRV_NAME,
.of_match_table = emac_rockchip_dt_ids,
diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c
index 009e0b3066fa..0f2f400b5bc4 100644
--- a/drivers/net/ethernet/atheros/ag71xx.c
+++ b/drivers/net/ethernet/atheros/ag71xx.c
@@ -1968,21 +1968,19 @@ err_put_clk:
return err;
}
-static int ag71xx_remove(struct platform_device *pdev)
+static void ag71xx_remove(struct platform_device *pdev)
{
struct net_device *ndev = platform_get_drvdata(pdev);
struct ag71xx *ag;
if (!ndev)
- return 0;
+ return;
ag = netdev_priv(ndev);
unregister_netdev(ndev);
ag71xx_mdio_remove(ag);
clk_disable_unprepare(ag->clk_eth);
platform_set_drvdata(pdev, NULL);
-
- return 0;
}
static const u32 ar71xx_fifo_ar7100[] = {
@@ -2069,7 +2067,7 @@ static const struct of_device_id ag71xx_match[] = {
static struct platform_driver ag71xx_driver = {
.probe = ag71xx_probe,
- .remove = ag71xx_remove,
+ .remove_new = ag71xx_remove,
.driver = {
.name = "ag71xx",
.of_match_table = ag71xx_match,
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c.h b/drivers/net/ethernet/atheros/atl1c/atl1c.h
index 43d821fe7a54..63ba64dbb731 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c.h
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c.h
@@ -504,15 +504,12 @@ struct atl1c_rrd_ring {
u16 next_to_use;
u16 next_to_clean;
struct napi_struct napi;
- struct page *rx_page;
- unsigned int rx_page_offset;
};
/* board specific private data structure */
struct atl1c_adapter {
struct net_device *netdev;
struct pci_dev *pdev;
- unsigned int rx_frag_size;
struct atl1c_hw hw;
struct atl1c_hw_stats hw_stats;
struct mii_if_info mii; /* MII interface info */
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index 940c5d1ff9cf..74b78164cf74 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -483,15 +483,10 @@ static int atl1c_set_mac_addr(struct net_device *netdev, void *p)
static void atl1c_set_rxbufsize(struct atl1c_adapter *adapter,
struct net_device *dev)
{
- unsigned int head_size;
int mtu = dev->mtu;
adapter->rx_buffer_len = mtu > AT_RX_BUF_SIZE ?
roundup(mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN, 8) : AT_RX_BUF_SIZE;
-
- head_size = SKB_DATA_ALIGN(adapter->rx_buffer_len + NET_SKB_PAD + NET_IP_ALIGN) +
- SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- adapter->rx_frag_size = roundup_pow_of_two(head_size);
}
static netdev_features_t atl1c_fix_features(struct net_device *netdev,
@@ -964,7 +959,6 @@ static void atl1c_init_ring_ptrs(struct atl1c_adapter *adapter)
static void atl1c_free_ring_resources(struct atl1c_adapter *adapter)
{
struct pci_dev *pdev = adapter->pdev;
- int i;
dma_free_coherent(&pdev->dev, adapter->ring_header.size,
adapter->ring_header.desc, adapter->ring_header.dma);
@@ -977,12 +971,6 @@ static void atl1c_free_ring_resources(struct atl1c_adapter *adapter)
kfree(adapter->tpd_ring[0].buffer_info);
adapter->tpd_ring[0].buffer_info = NULL;
}
- for (i = 0; i < adapter->rx_queue_count; ++i) {
- if (adapter->rrd_ring[i].rx_page) {
- put_page(adapter->rrd_ring[i].rx_page);
- adapter->rrd_ring[i].rx_page = NULL;
- }
- }
}
/**
@@ -1754,48 +1742,11 @@ static inline void atl1c_rx_checksum(struct atl1c_adapter *adapter,
skb_checksum_none_assert(skb);
}
-static struct sk_buff *atl1c_alloc_skb(struct atl1c_adapter *adapter,
- u32 queue, bool napi_mode)
-{
- struct atl1c_rrd_ring *rrd_ring = &adapter->rrd_ring[queue];
- struct sk_buff *skb;
- struct page *page;
-
- if (adapter->rx_frag_size > PAGE_SIZE) {
- if (likely(napi_mode))
- return napi_alloc_skb(&rrd_ring->napi,
- adapter->rx_buffer_len);
- else
- return netdev_alloc_skb_ip_align(adapter->netdev,
- adapter->rx_buffer_len);
- }
-
- page = rrd_ring->rx_page;
- if (!page) {
- page = alloc_page(GFP_ATOMIC);
- if (unlikely(!page))
- return NULL;
- rrd_ring->rx_page = page;
- rrd_ring->rx_page_offset = 0;
- }
-
- skb = build_skb(page_address(page) + rrd_ring->rx_page_offset,
- adapter->rx_frag_size);
- if (likely(skb)) {
- skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
- rrd_ring->rx_page_offset += adapter->rx_frag_size;
- if (rrd_ring->rx_page_offset >= PAGE_SIZE)
- rrd_ring->rx_page = NULL;
- else
- get_page(page);
- }
- return skb;
-}
-
static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter, u32 queue,
bool napi_mode)
{
struct atl1c_rfd_ring *rfd_ring = &adapter->rfd_ring[queue];
+ struct atl1c_rrd_ring *rrd_ring = &adapter->rrd_ring[queue];
struct pci_dev *pdev = adapter->pdev;
struct atl1c_buffer *buffer_info, *next_info;
struct sk_buff *skb;
@@ -1814,13 +1765,27 @@ static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter, u32 queue,
while (next_info->flags & ATL1C_BUFFER_FREE) {
rfd_desc = ATL1C_RFD_DESC(rfd_ring, rfd_next_to_use);
- skb = atl1c_alloc_skb(adapter, queue, napi_mode);
+ /* When DMA RX address is set to something like
+ * 0x....fc0, it will be very likely to cause DMA
+ * RFD overflow issue.
+ *
+ * To work around it, we apply rx skb with 64 bytes
+ * longer space, and offset the address whenever
+ * 0x....fc0 is detected.
+ */
+ if (likely(napi_mode))
+ skb = napi_alloc_skb(&rrd_ring->napi, adapter->rx_buffer_len + 64);
+ else
+ skb = netdev_alloc_skb(adapter->netdev, adapter->rx_buffer_len + 64);
if (unlikely(!skb)) {
if (netif_msg_rx_err(adapter))
dev_warn(&pdev->dev, "alloc rx buffer failed\n");
break;
}
+ if (((unsigned long)skb->data & 0xfff) == 0xfc0)
+ skb_reserve(skb, 64);
+
/*
* Make buffer alignment 2 beyond a 16 byte boundary
* this will result in a 16 byte aligned IP header after
diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp.c b/drivers/net/ethernet/broadcom/asp2/bcmasp.c
index 41a6098eb0c2..29b04a274d07 100644
--- a/drivers/net/ethernet/broadcom/asp2/bcmasp.c
+++ b/drivers/net/ethernet/broadcom/asp2/bcmasp.c
@@ -1344,17 +1344,15 @@ of_put_exit:
return ret;
}
-static int bcmasp_remove(struct platform_device *pdev)
+static void bcmasp_remove(struct platform_device *pdev)
{
struct bcmasp_priv *priv = dev_get_drvdata(&pdev->dev);
if (!priv)
- return 0;
+ return;
priv->destroy_wol(priv);
bcmasp_remove_intfs(priv);
-
- return 0;
}
static void bcmasp_shutdown(struct platform_device *pdev)
@@ -1428,7 +1426,7 @@ static SIMPLE_DEV_PM_OPS(bcmasp_pm_ops,
static struct platform_driver bcmasp_driver = {
.probe = bcmasp_probe,
- .remove = bcmasp_remove,
+ .remove_new = bcmasp_remove,
.shutdown = bcmasp_shutdown,
.driver = {
.name = "brcm,asp-v2",
diff --git a/drivers/net/ethernet/broadcom/bcm4908_enet.c b/drivers/net/ethernet/broadcom/bcm4908_enet.c
index 33d86683af50..3e7c8671cd11 100644
--- a/drivers/net/ethernet/broadcom/bcm4908_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm4908_enet.c
@@ -768,7 +768,7 @@ err_dma_free:
return err;
}
-static int bcm4908_enet_remove(struct platform_device *pdev)
+static void bcm4908_enet_remove(struct platform_device *pdev)
{
struct bcm4908_enet *enet = platform_get_drvdata(pdev);
@@ -776,8 +776,6 @@ static int bcm4908_enet_remove(struct platform_device *pdev)
netif_napi_del(&enet->rx_ring.napi);
netif_napi_del(&enet->tx_ring.napi);
bcm4908_enet_dma_free(enet);
-
- return 0;
}
static const struct of_device_id bcm4908_enet_of_match[] = {
@@ -791,7 +789,7 @@ static struct platform_driver bcm4908_enet_driver = {
.of_match_table = bcm4908_enet_of_match,
},
.probe = bcm4908_enet_probe,
- .remove = bcm4908_enet_remove,
+ .remove_new = bcm4908_enet_remove,
};
module_platform_driver(bcm4908_enet_driver);
diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
index a741070f1f9a..105afde5dbe1 100644
--- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
@@ -1902,7 +1902,7 @@ out:
/*
* exit func, stops hardware and unregisters netdevice
*/
-static int bcm_enet_remove(struct platform_device *pdev)
+static void bcm_enet_remove(struct platform_device *pdev)
{
struct bcm_enet_priv *priv;
struct net_device *dev;
@@ -1932,12 +1932,11 @@ static int bcm_enet_remove(struct platform_device *pdev)
clk_disable_unprepare(priv->mac_clk);
free_netdev(dev);
- return 0;
}
static struct platform_driver bcm63xx_enet_driver = {
.probe = bcm_enet_probe,
- .remove = bcm_enet_remove,
+ .remove_new = bcm_enet_remove,
.driver = {
.name = "bcm63xx_enet",
},
@@ -2739,7 +2738,7 @@ out:
/* exit func, stops hardware and unregisters netdevice */
-static int bcm_enetsw_remove(struct platform_device *pdev)
+static void bcm_enetsw_remove(struct platform_device *pdev)
{
struct bcm_enet_priv *priv;
struct net_device *dev;
@@ -2752,12 +2751,11 @@ static int bcm_enetsw_remove(struct platform_device *pdev)
clk_disable_unprepare(priv->mac_clk);
free_netdev(dev);
- return 0;
}
static struct platform_driver bcm63xx_enetsw_driver = {
.probe = bcm_enetsw_probe,
- .remove = bcm_enetsw_remove,
+ .remove_new = bcm_enetsw_remove,
.driver = {
.name = "bcm63xx_enetsw",
},
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index bf1611cce974..ab096795e805 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -2648,7 +2648,7 @@ err_free_netdev:
return ret;
}
-static int bcm_sysport_remove(struct platform_device *pdev)
+static void bcm_sysport_remove(struct platform_device *pdev)
{
struct net_device *dev = dev_get_drvdata(&pdev->dev);
struct bcm_sysport_priv *priv = netdev_priv(dev);
@@ -2663,8 +2663,6 @@ static int bcm_sysport_remove(struct platform_device *pdev)
of_phy_deregister_fixed_link(dn);
free_netdev(dev);
dev_set_drvdata(&pdev->dev, NULL);
-
- return 0;
}
static int bcm_sysport_suspend_to_wol(struct bcm_sysport_priv *priv)
@@ -2901,7 +2899,7 @@ static SIMPLE_DEV_PM_OPS(bcm_sysport_pm_ops,
static struct platform_driver bcm_sysport_driver = {
.probe = bcm_sysport_probe,
- .remove = bcm_sysport_remove,
+ .remove_new = bcm_sysport_remove,
.driver = {
.name = "brcm-systemport",
.of_match_table = bcm_sysport_of_match,
diff --git a/drivers/net/ethernet/broadcom/bgmac-platform.c b/drivers/net/ethernet/broadcom/bgmac-platform.c
index b4381cd41979..0b21fd5bd457 100644
--- a/drivers/net/ethernet/broadcom/bgmac-platform.c
+++ b/drivers/net/ethernet/broadcom/bgmac-platform.c
@@ -246,13 +246,11 @@ static int bgmac_probe(struct platform_device *pdev)
return bgmac_enet_probe(bgmac);
}
-static int bgmac_remove(struct platform_device *pdev)
+static void bgmac_remove(struct platform_device *pdev)
{
struct bgmac *bgmac = platform_get_drvdata(pdev);
bgmac_enet_remove(bgmac);
-
- return 0;
}
#ifdef CONFIG_PM
@@ -296,7 +294,7 @@ static struct platform_driver bgmac_enet_driver = {
.pm = BGMAC_PM_OPS
},
.probe = bgmac_probe,
- .remove = bgmac_remove,
+ .remove_new = bgmac_remove,
};
module_platform_driver(bgmac_enet_driver);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 24bade875ca6..91f3a7e78d65 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -4164,7 +4164,7 @@ err:
return err;
}
-static int bcmgenet_remove(struct platform_device *pdev)
+static void bcmgenet_remove(struct platform_device *pdev)
{
struct bcmgenet_priv *priv = dev_to_priv(&pdev->dev);
@@ -4172,8 +4172,6 @@ static int bcmgenet_remove(struct platform_device *pdev)
unregister_netdev(priv->dev);
bcmgenet_mii_exit(priv->dev);
free_netdev(priv->dev);
-
- return 0;
}
static void bcmgenet_shutdown(struct platform_device *pdev)
@@ -4352,7 +4350,7 @@ MODULE_DEVICE_TABLE(acpi, genet_acpi_match);
static struct platform_driver bcmgenet_driver = {
.probe = bcmgenet_probe,
- .remove = bcmgenet_remove,
+ .remove_new = bcmgenet_remove,
.shutdown = bcmgenet_shutdown,
.driver = {
.name = "bcmgenet",
diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c
index 3a6763c5e8b3..fcf8485f3446 100644
--- a/drivers/net/ethernet/broadcom/sb1250-mac.c
+++ b/drivers/net/ethernet/broadcom/sb1250-mac.c
@@ -2593,7 +2593,7 @@ out_out:
return err;
}
-static int sbmac_remove(struct platform_device *pldev)
+static void sbmac_remove(struct platform_device *pldev)
{
struct net_device *dev = platform_get_drvdata(pldev);
struct sbmac_softc *sc = netdev_priv(dev);
@@ -2604,13 +2604,11 @@ static int sbmac_remove(struct platform_device *pldev)
mdiobus_free(sc->mii_bus);
iounmap(sc->sbm_base);
free_netdev(dev);
-
- return 0;
}
static struct platform_driver sbmac_driver = {
.probe = sbmac_probe,
- .remove = sbmac_remove,
+ .remove_new = sbmac_remove,
.driver = {
.name = sbmac_string,
},
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index b940dcd3ace6..cebae0f418f2 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -5156,7 +5156,7 @@ err_disable_clocks:
return err;
}
-static int macb_remove(struct platform_device *pdev)
+static void macb_remove(struct platform_device *pdev)
{
struct net_device *dev;
struct macb *bp;
@@ -5181,8 +5181,6 @@ static int macb_remove(struct platform_device *pdev)
phylink_destroy(bp->phylink);
free_netdev(dev);
}
-
- return 0;
}
static int __maybe_unused macb_suspend(struct device *dev)
@@ -5398,7 +5396,7 @@ static const struct dev_pm_ops macb_pm_ops = {
static struct platform_driver macb_driver = {
.probe = macb_probe,
- .remove = macb_remove,
+ .remove_new = macb_remove,
.driver = {
.name = "macb",
.of_match_table = of_match_ptr(macb_dt_ids),
diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c
index f4f87dfa9687..5e97f1e4e38e 100644
--- a/drivers/net/ethernet/calxeda/xgmac.c
+++ b/drivers/net/ethernet/calxeda/xgmac.c
@@ -1820,7 +1820,7 @@ err_alloc:
* changes the link status, releases the DMA descriptor rings,
* unregisters the MDIO bus and unmaps the allocated memory.
*/
-static int xgmac_remove(struct platform_device *pdev)
+static void xgmac_remove(struct platform_device *pdev)
{
struct net_device *ndev = platform_get_drvdata(pdev);
struct xgmac_priv *priv = netdev_priv(ndev);
@@ -1840,8 +1840,6 @@ static int xgmac_remove(struct platform_device *pdev)
release_mem_region(res->start, resource_size(res));
free_netdev(ndev);
-
- return 0;
}
#ifdef CONFIG_PM_SLEEP
@@ -1921,7 +1919,7 @@ static struct platform_driver xgmac_driver = {
.pm = &xgmac_pm_ops,
},
.probe = xgmac_probe,
- .remove = xgmac_remove,
+ .remove_new = xgmac_remove,
};
module_platform_driver(xgmac_driver);
diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
index edde0b8fa49c..007d4b06819e 100644
--- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
+++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
@@ -1521,7 +1521,7 @@ err:
return result;
}
-static int octeon_mgmt_remove(struct platform_device *pdev)
+static void octeon_mgmt_remove(struct platform_device *pdev)
{
struct net_device *netdev = platform_get_drvdata(pdev);
struct octeon_mgmt *p = netdev_priv(netdev);
@@ -1529,7 +1529,6 @@ static int octeon_mgmt_remove(struct platform_device *pdev)
unregister_netdev(netdev);
of_node_put(p->phy_np);
free_netdev(netdev);
- return 0;
}
static const struct of_device_id octeon_mgmt_match[] = {
@@ -1546,7 +1545,7 @@ static struct platform_driver octeon_mgmt_driver = {
.of_match_table = octeon_mgmt_match,
},
.probe = octeon_mgmt_probe,
- .remove = octeon_mgmt_remove,
+ .remove_new = octeon_mgmt_remove,
};
module_platform_driver(octeon_mgmt_driver);
diff --git a/drivers/net/ethernet/cirrus/cs89x0.c b/drivers/net/ethernet/cirrus/cs89x0.c
index d323c5c23521..0a21a10a791c 100644
--- a/drivers/net/ethernet/cirrus/cs89x0.c
+++ b/drivers/net/ethernet/cirrus/cs89x0.c
@@ -1879,7 +1879,7 @@ free:
return err;
}
-static int cs89x0_platform_remove(struct platform_device *pdev)
+static void cs89x0_platform_remove(struct platform_device *pdev)
{
struct net_device *dev = platform_get_drvdata(pdev);
@@ -1889,7 +1889,6 @@ static int cs89x0_platform_remove(struct platform_device *pdev)
*/
unregister_netdev(dev);
free_netdev(dev);
- return 0;
}
static const struct of_device_id __maybe_unused cs89x0_match[] = {
@@ -1904,7 +1903,7 @@ static struct platform_driver cs89x0_driver = {
.name = DRV_NAME,
.of_match_table = of_match_ptr(cs89x0_match),
},
- .remove = cs89x0_platform_remove,
+ .remove_new = cs89x0_platform_remove,
};
module_platform_driver_probe(cs89x0_driver, cs89x0_platform_probe);
diff --git a/drivers/net/ethernet/cirrus/ep93xx_eth.c b/drivers/net/ethernet/cirrus/ep93xx_eth.c
index 8627ab19d470..1c2a540db13d 100644
--- a/drivers/net/ethernet/cirrus/ep93xx_eth.c
+++ b/drivers/net/ethernet/cirrus/ep93xx_eth.c
@@ -757,7 +757,7 @@ static struct net_device *ep93xx_dev_alloc(struct ep93xx_eth_data *data)
}
-static int ep93xx_eth_remove(struct platform_device *pdev)
+static void ep93xx_eth_remove(struct platform_device *pdev)
{
struct net_device *dev;
struct ep93xx_priv *ep;
@@ -765,7 +765,7 @@ static int ep93xx_eth_remove(struct platform_device *pdev)
dev = platform_get_drvdata(pdev);
if (dev == NULL)
- return 0;
+ return;
ep = netdev_priv(dev);
@@ -782,8 +782,6 @@ static int ep93xx_eth_remove(struct platform_device *pdev)
}
free_netdev(dev);
-
- return 0;
}
static int ep93xx_eth_probe(struct platform_device *pdev)
@@ -862,7 +860,7 @@ err_out:
static struct platform_driver ep93xx_eth_driver = {
.probe = ep93xx_eth_probe,
- .remove = ep93xx_eth_remove,
+ .remove_new = ep93xx_eth_remove,
.driver = {
.name = "ep93xx-eth",
},
diff --git a/drivers/net/ethernet/cirrus/mac89x0.c b/drivers/net/ethernet/cirrus/mac89x0.c
index 21a70b1f0ac5..887876f35f10 100644
--- a/drivers/net/ethernet/cirrus/mac89x0.c
+++ b/drivers/net/ethernet/cirrus/mac89x0.c
@@ -556,19 +556,18 @@ static int set_mac_address(struct net_device *dev, void *addr)
MODULE_LICENSE("GPL");
-static int mac89x0_device_remove(struct platform_device *pdev)
+static void mac89x0_device_remove(struct platform_device *pdev)
{
struct net_device *dev = platform_get_drvdata(pdev);
unregister_netdev(dev);
nubus_writew(0, dev->base_addr + ADD_PORT);
free_netdev(dev);
- return 0;
}
static struct platform_driver mac89x0_platform_driver = {
.probe = mac89x0_device_probe,
- .remove = mac89x0_device_remove,
+ .remove_new = mac89x0_device_remove,
.driver = {
.name = "mac89x0",
},
diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c
index a8b9d1a3e4d5..5423fe26b4ef 100644
--- a/drivers/net/ethernet/cortina/gemini.c
+++ b/drivers/net/ethernet/cortina/gemini.c
@@ -2518,13 +2518,11 @@ unprepare:
return ret;
}
-static int gemini_ethernet_port_remove(struct platform_device *pdev)
+static void gemini_ethernet_port_remove(struct platform_device *pdev)
{
struct gemini_ethernet_port *port = platform_get_drvdata(pdev);
gemini_port_remove(port);
-
- return 0;
}
static const struct of_device_id gemini_ethernet_port_of_match[] = {
@@ -2541,7 +2539,7 @@ static struct platform_driver gemini_ethernet_port_driver = {
.of_match_table = gemini_ethernet_port_of_match,
},
.probe = gemini_ethernet_port_probe,
- .remove = gemini_ethernet_port_remove,
+ .remove_new = gemini_ethernet_port_remove,
};
static int gemini_ethernet_probe(struct platform_device *pdev)
@@ -2583,14 +2581,12 @@ static int gemini_ethernet_probe(struct platform_device *pdev)
return devm_of_platform_populate(dev);
}
-static int gemini_ethernet_remove(struct platform_device *pdev)
+static void gemini_ethernet_remove(struct platform_device *pdev)
{
struct gemini_ethernet *geth = platform_get_drvdata(pdev);
geth_cleanup_freeq(geth);
geth->initialized = false;
-
- return 0;
}
static const struct of_device_id gemini_ethernet_of_match[] = {
@@ -2607,7 +2603,7 @@ static struct platform_driver gemini_ethernet_driver = {
.of_match_table = gemini_ethernet_of_match,
},
.probe = gemini_ethernet_probe,
- .remove = gemini_ethernet_remove,
+ .remove_new = gemini_ethernet_remove,
};
static int __init gemini_ethernet_module_init(void)
diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c
index 05a89ab6766c..150cc94ae9f8 100644
--- a/drivers/net/ethernet/davicom/dm9000.c
+++ b/drivers/net/ethernet/davicom/dm9000.c
@@ -1770,8 +1770,7 @@ static const struct dev_pm_ops dm9000_drv_pm_ops = {
.resume = dm9000_drv_resume,
};
-static int
-dm9000_drv_remove(struct platform_device *pdev)
+static void dm9000_drv_remove(struct platform_device *pdev)
{
struct net_device *ndev = platform_get_drvdata(pdev);
struct board_info *dm = to_dm9000_board(ndev);
@@ -1783,7 +1782,6 @@ dm9000_drv_remove(struct platform_device *pdev)
regulator_disable(dm->power_supply);
dev_dbg(&pdev->dev, "released and freed device\n");
- return 0;
}
#ifdef CONFIG_OF
@@ -1801,7 +1799,7 @@ static struct platform_driver dm9000_driver = {
.of_match_table = of_match_ptr(dm9000_of_matches),
},
.probe = dm9000_probe,
- .remove = dm9000_drv_remove,
+ .remove_new = dm9000_drv_remove,
};
module_platform_driver(dm9000_driver);
diff --git a/drivers/net/ethernet/dnet.c b/drivers/net/ethernet/dnet.c
index 151ca9573be9..2a18df3605f1 100644
--- a/drivers/net/ethernet/dnet.c
+++ b/drivers/net/ethernet/dnet.c
@@ -841,7 +841,7 @@ err_out_free_dev:
return err;
}
-static int dnet_remove(struct platform_device *pdev)
+static void dnet_remove(struct platform_device *pdev)
{
struct net_device *dev;
@@ -859,13 +859,11 @@ static int dnet_remove(struct platform_device *pdev)
free_irq(dev->irq, dev);
free_netdev(dev);
}
-
- return 0;
}
static struct platform_driver dnet_driver = {
.probe = dnet_probe,
- .remove = dnet_remove,
+ .remove_new = dnet_remove,
.driver = {
.name = "dnet",
},
diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c
index 8b992dc9bb52..77a3fcb821c8 100644
--- a/drivers/net/ethernet/engleder/tsnep_main.c
+++ b/drivers/net/ethernet/engleder/tsnep_main.c
@@ -2587,7 +2587,7 @@ mdio_init_failed:
return retval;
}
-static int tsnep_remove(struct platform_device *pdev)
+static void tsnep_remove(struct platform_device *pdev)
{
struct tsnep_adapter *adapter = platform_get_drvdata(pdev);
@@ -2603,8 +2603,6 @@ static int tsnep_remove(struct platform_device *pdev)
mdiobus_unregister(adapter->mdiobus);
tsnep_disable_irq(adapter, ECM_INT_ALL);
-
- return 0;
}
static const struct of_device_id tsnep_of_match[] = {
@@ -2619,7 +2617,7 @@ static struct platform_driver tsnep_driver = {
.of_match_table = tsnep_of_match,
},
.probe = tsnep_probe,
- .remove = tsnep_remove,
+ .remove_new = tsnep_remove,
};
module_platform_driver(tsnep_driver);
diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c
index 95cbad198b4b..ad41c9019018 100644
--- a/drivers/net/ethernet/ethoc.c
+++ b/drivers/net/ethernet/ethoc.c
@@ -1254,7 +1254,7 @@ out:
* ethoc_remove - shutdown OpenCores ethernet MAC
* @pdev: platform device
*/
-static int ethoc_remove(struct platform_device *pdev)
+static void ethoc_remove(struct platform_device *pdev)
{
struct net_device *netdev = platform_get_drvdata(pdev);
struct ethoc *priv = netdev_priv(netdev);
@@ -1271,8 +1271,6 @@ static int ethoc_remove(struct platform_device *pdev)
unregister_netdev(netdev);
free_netdev(netdev);
}
-
- return 0;
}
#ifdef CONFIG_PM
@@ -1298,7 +1296,7 @@ MODULE_DEVICE_TABLE(of, ethoc_match);
static struct platform_driver ethoc_driver = {
.probe = ethoc_probe,
- .remove = ethoc_remove,
+ .remove_new = ethoc_remove,
.suspend = ethoc_suspend,
.resume = ethoc_resume,
.driver = {
diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 9135b918dd49..fddfd1dd5070 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -2012,7 +2012,7 @@ err_alloc_etherdev:
return err;
}
-static int ftgmac100_remove(struct platform_device *pdev)
+static void ftgmac100_remove(struct platform_device *pdev)
{
struct net_device *netdev;
struct ftgmac100 *priv;
@@ -2040,7 +2040,6 @@ static int ftgmac100_remove(struct platform_device *pdev)
netif_napi_del(&priv->napi);
free_netdev(netdev);
- return 0;
}
static const struct of_device_id ftgmac100_of_match[] = {
@@ -2051,7 +2050,7 @@ MODULE_DEVICE_TABLE(of, ftgmac100_of_match);
static struct platform_driver ftgmac100_driver = {
.probe = ftgmac100_probe,
- .remove = ftgmac100_remove,
+ .remove_new = ftgmac100_remove,
.driver = {
.name = DRV_NAME,
.of_match_table = ftgmac100_of_match,
diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c
index 183069581bc0..003bc9a45c65 100644
--- a/drivers/net/ethernet/faraday/ftmac100.c
+++ b/drivers/net/ethernet/faraday/ftmac100.c
@@ -1219,7 +1219,7 @@ err_alloc_etherdev:
return err;
}
-static int ftmac100_remove(struct platform_device *pdev)
+static void ftmac100_remove(struct platform_device *pdev)
{
struct net_device *netdev;
struct ftmac100 *priv;
@@ -1234,7 +1234,6 @@ static int ftmac100_remove(struct platform_device *pdev)
netif_napi_del(&priv->napi);
free_netdev(netdev);
- return 0;
}
static const struct of_device_id ftmac100_of_ids[] = {
@@ -1244,7 +1243,7 @@ static const struct of_device_id ftmac100_of_ids[] = {
static struct platform_driver ftmac100_driver = {
.probe = ftmac100_probe,
- .remove = ftmac100_remove,
+ .remove_new = ftmac100_remove,
.driver = {
.name = DRV_NAME,
.of_match_table = ftmac100_of_ids
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index 5704b5f57cd0..83b09dcfafc4 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -190,7 +190,7 @@ static int gve_alloc_stats_report(struct gve_priv *priv)
rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) *
priv->rx_cfg.num_queues;
priv->stats_report_len = struct_size(priv->stats_report, stats,
- tx_stats_num + rx_stats_num);
+ size_add(tx_stats_num, rx_stats_num));
priv->stats_report =
dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len,
&priv->stats_report_bus, GFP_KERNEL);
diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c
index ecf92a5d56bb..b91e7a06b97f 100644
--- a/drivers/net/ethernet/hisilicon/hip04_eth.c
+++ b/drivers/net/ethernet/hisilicon/hip04_eth.c
@@ -1021,7 +1021,7 @@ init_fail:
return ret;
}
-static int hip04_remove(struct platform_device *pdev)
+static void hip04_remove(struct platform_device *pdev)
{
struct net_device *ndev = platform_get_drvdata(pdev);
struct hip04_priv *priv = netdev_priv(ndev);
@@ -1035,8 +1035,6 @@ static int hip04_remove(struct platform_device *pdev)
of_node_put(priv->phy_node);
cancel_work_sync(&priv->tx_timeout_task);
free_netdev(ndev);
-
- return 0;
}
static const struct of_device_id hip04_mac_match[] = {
@@ -1048,7 +1046,7 @@ MODULE_DEVICE_TABLE(of, hip04_mac_match);
static struct platform_driver hip04_mac_driver = {
.probe = hip04_mac_probe,
- .remove = hip04_remove,
+ .remove_new = hip04_remove,
.driver = {
.name = DRV_NAME,
.of_match_table = hip04_mac_match,
diff --git a/drivers/net/ethernet/hisilicon/hisi_femac.c b/drivers/net/ethernet/hisilicon/hisi_femac.c
index cb7b0293fe85..2406263c9dd3 100644
--- a/drivers/net/ethernet/hisilicon/hisi_femac.c
+++ b/drivers/net/ethernet/hisilicon/hisi_femac.c
@@ -893,7 +893,7 @@ out_free_netdev:
return ret;
}
-static int hisi_femac_drv_remove(struct platform_device *pdev)
+static void hisi_femac_drv_remove(struct platform_device *pdev)
{
struct net_device *ndev = platform_get_drvdata(pdev);
struct hisi_femac_priv *priv = netdev_priv(ndev);
@@ -904,8 +904,6 @@ static int hisi_femac_drv_remove(struct platform_device *pdev)
phy_disconnect(ndev->phydev);
clk_disable_unprepare(priv->clk);
free_netdev(ndev);
-
- return 0;
}
#ifdef CONFIG_PM
@@ -961,7 +959,7 @@ static struct platform_driver hisi_femac_driver = {
.of_match_table = hisi_femac_match,
},
.probe = hisi_femac_drv_probe,
- .remove = hisi_femac_drv_remove,
+ .remove_new = hisi_femac_drv_remove,
#ifdef CONFIG_PM
.suspend = hisi_femac_drv_suspend,
.resume = hisi_femac_drv_resume,
diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
index 26d22bb04b87..506fa3d8bbee 100644
--- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
+++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
@@ -1282,7 +1282,7 @@ out_free_netdev:
return ret;
}
-static int hix5hd2_dev_remove(struct platform_device *pdev)
+static void hix5hd2_dev_remove(struct platform_device *pdev)
{
struct net_device *ndev = platform_get_drvdata(pdev);
struct hix5hd2_priv *priv = netdev_priv(ndev);
@@ -1298,8 +1298,6 @@ static int hix5hd2_dev_remove(struct platform_device *pdev)
of_node_put(priv->phy_node);
cancel_work_sync(&priv->tx_timeout_task);
free_netdev(ndev);
-
- return 0;
}
static const struct of_device_id hix5hd2_of_match[] = {
@@ -1319,7 +1317,7 @@ static struct platform_driver hix5hd2_dev_driver = {
.of_match_table = hix5hd2_of_match,
},
.probe = hix5hd2_dev_probe,
- .remove = hix5hd2_dev_remove,
+ .remove_new = hix5hd2_dev_remove,
};
module_platform_driver(hix5hd2_dev_driver);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
index fcaf5132b865..1b67da1f6fa8 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
@@ -3007,7 +3007,7 @@ free_dev:
* hns_dsaf_remove - remove dsaf dev
* @pdev: dasf platform device
*/
-static int hns_dsaf_remove(struct platform_device *pdev)
+static void hns_dsaf_remove(struct platform_device *pdev)
{
struct dsaf_device *dsaf_dev = dev_get_drvdata(&pdev->dev);
@@ -3020,8 +3020,6 @@ static int hns_dsaf_remove(struct platform_device *pdev)
hns_dsaf_free(dsaf_dev);
hns_dsaf_free_dev(dsaf_dev);
-
- return 0;
}
static const struct of_device_id g_dsaf_match[] = {
@@ -3033,7 +3031,7 @@ MODULE_DEVICE_TABLE(of, g_dsaf_match);
static struct platform_driver g_dsaf_driver = {
.probe = hns_dsaf_probe,
- .remove = hns_dsaf_remove,
+ .remove_new = hns_dsaf_remove,
.driver = {
.name = DSAF_DRV_NAME,
.of_match_table = g_dsaf_match,
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index 7cf10d1e2b31..0900abf5c508 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -2384,7 +2384,7 @@ out_read_prop_fail:
return ret;
}
-static int hns_nic_dev_remove(struct platform_device *pdev)
+static void hns_nic_dev_remove(struct platform_device *pdev)
{
struct net_device *ndev = platform_get_drvdata(pdev);
struct hns_nic_priv *priv = netdev_priv(ndev);
@@ -2413,7 +2413,6 @@ static int hns_nic_dev_remove(struct platform_device *pdev)
of_node_put(to_of_node(priv->fwnode));
free_netdev(ndev);
- return 0;
}
static const struct of_device_id hns_enet_of_match[] = {
@@ -2431,7 +2430,7 @@ static struct platform_driver hns_nic_dev_driver = {
.acpi_match_table = ACPI_PTR(hns_enet_acpi_match),
},
.probe = hns_nic_dev_probe,
- .remove = hns_nic_dev_remove,
+ .remove_new = hns_nic_dev_remove,
};
module_platform_driver(hns_nic_dev_driver);
diff --git a/drivers/net/ethernet/hisilicon/hns_mdio.c b/drivers/net/ethernet/hisilicon/hns_mdio.c
index 409a89d80220..ed73707176c1 100644
--- a/drivers/net/ethernet/hisilicon/hns_mdio.c
+++ b/drivers/net/ethernet/hisilicon/hns_mdio.c
@@ -610,7 +610,7 @@ static int hns_mdio_probe(struct platform_device *pdev)
*
* Return 0 on success, negative on failure
*/
-static int hns_mdio_remove(struct platform_device *pdev)
+static void hns_mdio_remove(struct platform_device *pdev)
{
struct mii_bus *bus;
@@ -618,7 +618,6 @@ static int hns_mdio_remove(struct platform_device *pdev)
mdiobus_unregister(bus);
platform_set_drvdata(pdev, NULL);
- return 0;
}
static const struct of_device_id hns_mdio_match[] = {
@@ -636,7 +635,7 @@ MODULE_DEVICE_TABLE(acpi, hns_mdio_acpi_match);
static struct platform_driver hns_mdio_driver = {
.probe = hns_mdio_probe,
- .remove = hns_mdio_remove,
+ .remove_new = hns_mdio_remove,
.driver = {
.name = MDIO_DRV_NAME,
.of_match_table = hns_mdio_match,
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
index ad47ac51a139..9b60966736db 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
@@ -861,7 +861,7 @@ int hinic_init_txq(struct hinic_txq *txq, struct hinic_sq *sq,
struct hinic_qp *qp = container_of(sq, struct hinic_qp, sq);
struct hinic_dev *nic_dev = netdev_priv(netdev);
struct hinic_hwdev *hwdev = nic_dev->hwdev;
- int err, irqname_len;
+ int err;
txq->netdev = netdev;
txq->sq = sq;
@@ -882,15 +882,13 @@ int hinic_init_txq(struct hinic_txq *txq, struct hinic_sq *sq,
goto err_alloc_free_sges;
}
- irqname_len = snprintf(NULL, 0, "%s_txq%d", netdev->name, qp->q_id) + 1;
- txq->irq_name = devm_kzalloc(&netdev->dev, irqname_len, GFP_KERNEL);
+ txq->irq_name = devm_kasprintf(&netdev->dev, GFP_KERNEL, "%s_txq%d",
+ netdev->name, qp->q_id);
if (!txq->irq_name) {
err = -ENOMEM;
goto err_alloc_irqname;
}
- sprintf(txq->irq_name, "%s_txq%d", netdev->name, qp->q_id);
-
err = hinic_hwdev_hw_ci_addr_set(hwdev, sq, CI_UPDATE_NO_PENDING,
CI_UPDATE_NO_COALESC);
if (err)
diff --git a/drivers/net/ethernet/i825xx/sni_82596.c b/drivers/net/ethernet/i825xx/sni_82596.c
index 54bb4d9a0d1e..813403c2628f 100644
--- a/drivers/net/ethernet/i825xx/sni_82596.c
+++ b/drivers/net/ethernet/i825xx/sni_82596.c
@@ -153,7 +153,7 @@ probe_failed_free_mpu:
return retval;
}
-static int sni_82596_driver_remove(struct platform_device *pdev)
+static void sni_82596_driver_remove(struct platform_device *pdev)
{
struct net_device *dev = platform_get_drvdata(pdev);
struct i596_private *lp = netdev_priv(dev);
@@ -164,12 +164,11 @@ static int sni_82596_driver_remove(struct platform_device *pdev)
iounmap(lp->ca);
iounmap(lp->mpu_port);
free_netdev (dev);
- return 0;
}
static struct platform_driver sni_82596_driver = {
.probe = sni_82596_probe,
- .remove = sni_82596_driver_remove,
+ .remove_new = sni_82596_driver_remove,
.driver = {
.name = sni_82596_string,
},
diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c
index 0a56e9752464..251dedd55cfb 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea_main.c
+++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c
@@ -90,7 +90,7 @@ static struct ehea_bcmc_reg_array ehea_bcmc_regs;
static int ehea_probe_adapter(struct platform_device *dev);
-static int ehea_remove(struct platform_device *dev);
+static void ehea_remove(struct platform_device *dev);
static const struct of_device_id ehea_module_device_table[] = {
{
@@ -121,7 +121,7 @@ static struct platform_driver ehea_driver = {
.of_match_table = ehea_device_table,
},
.probe = ehea_probe_adapter,
- .remove = ehea_remove,
+ .remove_new = ehea_remove,
};
void ehea_dump(void *adr, int len, char *msg)
@@ -3471,7 +3471,7 @@ out:
return ret;
}
-static int ehea_remove(struct platform_device *dev)
+static void ehea_remove(struct platform_device *dev)
{
struct ehea_adapter *adapter = platform_get_drvdata(dev);
int i;
@@ -3492,8 +3492,6 @@ static int ehea_remove(struct platform_device *dev)
list_del(&adapter->list);
ehea_update_firmware_handles();
-
- return 0;
}
static int check_module_parm(void)
diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c
index 0c314bf97480..e6e47b1842ea 100644
--- a/drivers/net/ethernet/ibm/emac/core.c
+++ b/drivers/net/ethernet/ibm/emac/core.c
@@ -3253,7 +3253,7 @@ static int emac_probe(struct platform_device *ofdev)
return err;
}
-static int emac_remove(struct platform_device *ofdev)
+static void emac_remove(struct platform_device *ofdev)
{
struct emac_instance *dev = platform_get_drvdata(ofdev);
@@ -3290,8 +3290,6 @@ static int emac_remove(struct platform_device *ofdev)
irq_dispose_mapping(dev->emac_irq);
free_netdev(dev->ndev);
-
- return 0;
}
/* XXX Features in here should be replaced by properties... */
@@ -3319,7 +3317,7 @@ static struct platform_driver emac_driver = {
.of_match_table = emac_match,
},
.probe = emac_probe,
- .remove = emac_remove,
+ .remove_new = emac_remove,
};
static void __init emac_make_bootlist(void)
diff --git a/drivers/net/ethernet/ibm/emac/mal.c b/drivers/net/ethernet/ibm/emac/mal.c
index c3236b59e7e9..462646d1b817 100644
--- a/drivers/net/ethernet/ibm/emac/mal.c
+++ b/drivers/net/ethernet/ibm/emac/mal.c
@@ -711,7 +711,7 @@ static int mal_probe(struct platform_device *ofdev)
return err;
}
-static int mal_remove(struct platform_device *ofdev)
+static void mal_remove(struct platform_device *ofdev)
{
struct mal_instance *mal = platform_get_drvdata(ofdev);
@@ -740,8 +740,6 @@ static int mal_remove(struct platform_device *ofdev)
NUM_RX_BUFF * mal->num_rx_chans), mal->bd_virt,
mal->bd_dma);
kfree(mal);
-
- return 0;
}
static const struct of_device_id mal_platform_match[] =
@@ -770,7 +768,7 @@ static struct platform_driver mal_of_driver = {
.of_match_table = mal_platform_match,
},
.probe = mal_probe,
- .remove = mal_remove,
+ .remove_new = mal_remove,
};
int __init mal_init(void)
diff --git a/drivers/net/ethernet/ibm/emac/rgmii.c b/drivers/net/ethernet/ibm/emac/rgmii.c
index fd437f986edf..e1712fdc3c31 100644
--- a/drivers/net/ethernet/ibm/emac/rgmii.c
+++ b/drivers/net/ethernet/ibm/emac/rgmii.c
@@ -273,7 +273,7 @@ static int rgmii_probe(struct platform_device *ofdev)
return rc;
}
-static int rgmii_remove(struct platform_device *ofdev)
+static void rgmii_remove(struct platform_device *ofdev)
{
struct rgmii_instance *dev = platform_get_drvdata(ofdev);
@@ -281,8 +281,6 @@ static int rgmii_remove(struct platform_device *ofdev)
iounmap(dev->base);
kfree(dev);
-
- return 0;
}
static const struct of_device_id rgmii_match[] =
@@ -302,7 +300,7 @@ static struct platform_driver rgmii_driver = {
.of_match_table = rgmii_match,
},
.probe = rgmii_probe,
- .remove = rgmii_remove,
+ .remove_new = rgmii_remove,
};
int __init rgmii_init(void)
diff --git a/drivers/net/ethernet/ibm/emac/tah.c b/drivers/net/ethernet/ibm/emac/tah.c
index aae9a88d95d7..fa3488258ca2 100644
--- a/drivers/net/ethernet/ibm/emac/tah.c
+++ b/drivers/net/ethernet/ibm/emac/tah.c
@@ -130,7 +130,7 @@ static int tah_probe(struct platform_device *ofdev)
return rc;
}
-static int tah_remove(struct platform_device *ofdev)
+static void tah_remove(struct platform_device *ofdev)
{
struct tah_instance *dev = platform_get_drvdata(ofdev);
@@ -138,8 +138,6 @@ static int tah_remove(struct platform_device *ofdev)
iounmap(dev->base);
kfree(dev);
-
- return 0;
}
static const struct of_device_id tah_match[] =
@@ -160,7 +158,7 @@ static struct platform_driver tah_driver = {
.of_match_table = tah_match,
},
.probe = tah_probe,
- .remove = tah_remove,
+ .remove_new = tah_remove,
};
int __init tah_init(void)
diff --git a/drivers/net/ethernet/ibm/emac/zmii.c b/drivers/net/ethernet/ibm/emac/zmii.c
index 6337388ee5f4..26e86cdee2f6 100644
--- a/drivers/net/ethernet/ibm/emac/zmii.c
+++ b/drivers/net/ethernet/ibm/emac/zmii.c
@@ -278,7 +278,7 @@ static int zmii_probe(struct platform_device *ofdev)
return rc;
}
-static int zmii_remove(struct platform_device *ofdev)
+static void zmii_remove(struct platform_device *ofdev)
{
struct zmii_instance *dev = platform_get_drvdata(ofdev);
@@ -286,8 +286,6 @@ static int zmii_remove(struct platform_device *ofdev)
iounmap(dev->base);
kfree(dev);
-
- return 0;
}
static const struct of_device_id zmii_match[] =
@@ -308,7 +306,7 @@ static struct platform_driver zmii_driver = {
.of_match_table = zmii_match,
},
.probe = zmii_probe,
- .remove = zmii_remove,
+ .remove_new = zmii_remove,
};
int __init zmii_init(void)
diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig
index 9bc0a9519899..e6684f3cc0ce 100644
--- a/drivers/net/ethernet/intel/Kconfig
+++ b/drivers/net/ethernet/intel/Kconfig
@@ -284,6 +284,7 @@ config ICE
select DIMLIB
select NET_DEVLINK
select PLDMFW
+ select DPLL
help
This driver supports Intel(R) Ethernet Connection E800 Series of
devices. For more information on how to identify your adapter, go
@@ -355,5 +356,17 @@ config IGC
To compile this driver as a module, choose M here. The module
will be called igc.
+config IDPF
+ tristate "Intel(R) Infrastructure Data Path Function Support"
+ depends on PCI_MSI
+ select DIMLIB
+ select PAGE_POOL
+ select PAGE_POOL_STATS
+ help
+ This driver supports Intel(R) Infrastructure Data Path Function
+ devices.
+
+ To compile this driver as a module, choose M here. The module
+ will be called idpf.
endif # NET_VENDOR_INTEL
diff --git a/drivers/net/ethernet/intel/Makefile b/drivers/net/ethernet/intel/Makefile
index d80d04132073..dacb481ee5b1 100644
--- a/drivers/net/ethernet/intel/Makefile
+++ b/drivers/net/ethernet/intel/Makefile
@@ -15,3 +15,4 @@ obj-$(CONFIG_I40E) += i40e/
obj-$(CONFIG_IAVF) += iavf/
obj-$(CONFIG_FM10K) += fm10k/
obj-$(CONFIG_ICE) += ice/
+obj-$(CONFIG_IDPF) += idpf/
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index de7fd43dc11c..00ca2b88165c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -16320,11 +16320,15 @@ static void i40e_remove(struct pci_dev *pdev)
i40e_switch_branch_release(pf->veb[i]);
}
- /* Now we can shutdown the PF's VSI, just before we kill
+ /* Now we can shutdown the PF's VSIs, just before we kill
* adminq and hmc.
*/
- if (pf->vsi[pf->lan_vsi])
- i40e_vsi_release(pf->vsi[pf->lan_vsi]);
+ for (i = pf->num_alloc_vsi; i--;)
+ if (pf->vsi[i]) {
+ i40e_vsi_close(pf->vsi[i]);
+ i40e_vsi_release(pf->vsi[i]);
+ pf->vsi[i] = NULL;
+ }
i40e_cloud_filter_exit(pf);
diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h
index e110ba346185..44daf335e8c5 100644
--- a/drivers/net/ethernet/intel/iavf/iavf.h
+++ b/drivers/net/ethernet/intel/iavf/iavf.h
@@ -298,8 +298,6 @@ struct iavf_adapter {
#define IAVF_FLAG_CLIENT_NEEDS_OPEN BIT(10)
#define IAVF_FLAG_CLIENT_NEEDS_CLOSE BIT(11)
#define IAVF_FLAG_CLIENT_NEEDS_L2_PARAMS BIT(12)
-#define IAVF_FLAG_PROMISC_ON BIT(13)
-#define IAVF_FLAG_ALLMULTI_ON BIT(14)
#define IAVF_FLAG_LEGACY_RX BIT(15)
#define IAVF_FLAG_REINIT_ITR_NEEDED BIT(16)
#define IAVF_FLAG_QUEUES_DISABLED BIT(17)
@@ -325,10 +323,7 @@ struct iavf_adapter {
#define IAVF_FLAG_AQ_SET_HENA BIT_ULL(12)
#define IAVF_FLAG_AQ_SET_RSS_KEY BIT_ULL(13)
#define IAVF_FLAG_AQ_SET_RSS_LUT BIT_ULL(14)
-#define IAVF_FLAG_AQ_REQUEST_PROMISC BIT_ULL(15)
-#define IAVF_FLAG_AQ_RELEASE_PROMISC BIT_ULL(16)
-#define IAVF_FLAG_AQ_REQUEST_ALLMULTI BIT_ULL(17)
-#define IAVF_FLAG_AQ_RELEASE_ALLMULTI BIT_ULL(18)
+#define IAVF_FLAG_AQ_CONFIGURE_PROMISC_MODE BIT_ULL(15)
#define IAVF_FLAG_AQ_ENABLE_VLAN_STRIPPING BIT_ULL(19)
#define IAVF_FLAG_AQ_DISABLE_VLAN_STRIPPING BIT_ULL(20)
#define IAVF_FLAG_AQ_ENABLE_CHANNELS BIT_ULL(21)
@@ -365,6 +360,12 @@ struct iavf_adapter {
(IAVF_EXTENDED_CAP_SEND_VLAN_V2 | \
IAVF_EXTENDED_CAP_RECV_VLAN_V2)
+ /* Lock to prevent possible clobbering of
+ * current_netdev_promisc_flags
+ */
+ spinlock_t current_netdev_promisc_flags_lock;
+ netdev_features_t current_netdev_promisc_flags;
+
/* OS defined structs */
struct net_device *netdev;
struct pci_dev *pdev;
@@ -405,6 +406,8 @@ struct iavf_adapter {
VIRTCHNL_VF_OFFLOAD_VLAN)
#define VLAN_V2_ALLOWED(_a) ((_a)->vf_res->vf_cap_flags & \
VIRTCHNL_VF_OFFLOAD_VLAN_V2)
+#define CRC_OFFLOAD_ALLOWED(_a) ((_a)->vf_res->vf_cap_flags & \
+ VIRTCHNL_VF_OFFLOAD_CRC)
#define VLAN_V2_FILTERING_ALLOWED(_a) \
(VLAN_V2_ALLOWED((_a)) && \
((_a)->vlan_v2_caps.filtering.filtering_support.outer || \
@@ -551,7 +554,8 @@ void iavf_add_ether_addrs(struct iavf_adapter *adapter);
void iavf_del_ether_addrs(struct iavf_adapter *adapter);
void iavf_add_vlans(struct iavf_adapter *adapter);
void iavf_del_vlans(struct iavf_adapter *adapter);
-void iavf_set_promiscuous(struct iavf_adapter *adapter, int flags);
+void iavf_set_promiscuous(struct iavf_adapter *adapter);
+bool iavf_promiscuous_mode_changed(struct iavf_adapter *adapter);
void iavf_request_stats(struct iavf_adapter *adapter);
int iavf_request_reset(struct iavf_adapter *adapter);
void iavf_get_hena(struct iavf_adapter *adapter);
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index b3434dbc90d6..d47eae3a2d32 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -1187,6 +1187,16 @@ static int iavf_addr_unsync(struct net_device *netdev, const u8 *addr)
}
/**
+ * iavf_promiscuous_mode_changed - check if promiscuous mode bits changed
+ * @adapter: device specific adapter
+ */
+bool iavf_promiscuous_mode_changed(struct iavf_adapter *adapter)
+{
+ return (adapter->current_netdev_promisc_flags ^ adapter->netdev->flags) &
+ (IFF_PROMISC | IFF_ALLMULTI);
+}
+
+/**
* iavf_set_rx_mode - NDO callback to set the netdev filters
* @netdev: network interface device structure
**/
@@ -1199,19 +1209,10 @@ static void iavf_set_rx_mode(struct net_device *netdev)
__dev_mc_sync(netdev, iavf_addr_sync, iavf_addr_unsync);
spin_unlock_bh(&adapter->mac_vlan_list_lock);
- if (netdev->flags & IFF_PROMISC &&
- !(adapter->flags & IAVF_FLAG_PROMISC_ON))
- adapter->aq_required |= IAVF_FLAG_AQ_REQUEST_PROMISC;
- else if (!(netdev->flags & IFF_PROMISC) &&
- adapter->flags & IAVF_FLAG_PROMISC_ON)
- adapter->aq_required |= IAVF_FLAG_AQ_RELEASE_PROMISC;
-
- if (netdev->flags & IFF_ALLMULTI &&
- !(adapter->flags & IAVF_FLAG_ALLMULTI_ON))
- adapter->aq_required |= IAVF_FLAG_AQ_REQUEST_ALLMULTI;
- else if (!(netdev->flags & IFF_ALLMULTI) &&
- adapter->flags & IAVF_FLAG_ALLMULTI_ON)
- adapter->aq_required |= IAVF_FLAG_AQ_RELEASE_ALLMULTI;
+ spin_lock_bh(&adapter->current_netdev_promisc_flags_lock);
+ if (iavf_promiscuous_mode_changed(adapter))
+ adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_PROMISC_MODE;
+ spin_unlock_bh(&adapter->current_netdev_promisc_flags_lock);
}
/**
@@ -2162,19 +2163,8 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter)
return 0;
}
- if (adapter->aq_required & IAVF_FLAG_AQ_REQUEST_PROMISC) {
- iavf_set_promiscuous(adapter, FLAG_VF_UNICAST_PROMISC |
- FLAG_VF_MULTICAST_PROMISC);
- return 0;
- }
-
- if (adapter->aq_required & IAVF_FLAG_AQ_REQUEST_ALLMULTI) {
- iavf_set_promiscuous(adapter, FLAG_VF_MULTICAST_PROMISC);
- return 0;
- }
- if ((adapter->aq_required & IAVF_FLAG_AQ_RELEASE_PROMISC) ||
- (adapter->aq_required & IAVF_FLAG_AQ_RELEASE_ALLMULTI)) {
- iavf_set_promiscuous(adapter, 0);
+ if (adapter->aq_required & IAVF_FLAG_AQ_CONFIGURE_PROMISC_MODE) {
+ iavf_set_promiscuous(adapter);
return 0;
}
@@ -4410,6 +4400,9 @@ static int iavf_set_features(struct net_device *netdev,
(features & NETIF_VLAN_OFFLOAD_FEATURES))
iavf_set_vlan_offload_features(adapter, netdev->features,
features);
+ if (CRC_OFFLOAD_ALLOWED(adapter) &&
+ ((netdev->features & NETIF_F_RXFCS) ^ (features & NETIF_F_RXFCS)))
+ iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED);
return 0;
}
@@ -4531,6 +4524,9 @@ iavf_get_netdev_vlan_hw_features(struct iavf_adapter *adapter)
}
}
+ if (CRC_OFFLOAD_ALLOWED(adapter))
+ hw_features |= NETIF_F_RXFCS;
+
return hw_features;
}
@@ -4695,6 +4691,55 @@ iavf_fix_netdev_vlan_features(struct iavf_adapter *adapter,
}
/**
+ * iavf_fix_strip_features - fix NETDEV CRC and VLAN strip features
+ * @adapter: board private structure
+ * @requested_features: stack requested NETDEV features
+ *
+ * Returns fixed-up features bits
+ **/
+static netdev_features_t
+iavf_fix_strip_features(struct iavf_adapter *adapter,
+ netdev_features_t requested_features)
+{
+ struct net_device *netdev = adapter->netdev;
+ bool crc_offload_req, is_vlan_strip;
+ netdev_features_t vlan_strip;
+ int num_non_zero_vlan;
+
+ crc_offload_req = CRC_OFFLOAD_ALLOWED(adapter) &&
+ (requested_features & NETIF_F_RXFCS);
+ num_non_zero_vlan = iavf_get_num_vlans_added(adapter);
+ vlan_strip = (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_STAG_RX);
+ is_vlan_strip = requested_features & vlan_strip;
+
+ if (!crc_offload_req)
+ return requested_features;
+
+ if (!num_non_zero_vlan && (netdev->features & vlan_strip) &&
+ !(netdev->features & NETIF_F_RXFCS) && is_vlan_strip) {
+ requested_features &= ~vlan_strip;
+ netdev_info(netdev, "Disabling VLAN stripping as FCS/CRC stripping is also disabled and there is no VLAN configured\n");
+ return requested_features;
+ }
+
+ if ((netdev->features & NETIF_F_RXFCS) && is_vlan_strip) {
+ requested_features &= ~vlan_strip;
+ if (!(netdev->features & vlan_strip))
+ netdev_info(netdev, "To enable VLAN stripping, first need to enable FCS/CRC stripping");
+
+ return requested_features;
+ }
+
+ if (num_non_zero_vlan && is_vlan_strip &&
+ !(netdev->features & NETIF_F_RXFCS)) {
+ requested_features &= ~NETIF_F_RXFCS;
+ netdev_info(netdev, "To disable FCS/CRC stripping, first need to disable VLAN stripping");
+ }
+
+ return requested_features;
+}
+
+/**
* iavf_fix_features - fix up the netdev feature bits
* @netdev: our net device
* @features: desired feature bits
@@ -4706,7 +4751,9 @@ static netdev_features_t iavf_fix_features(struct net_device *netdev,
{
struct iavf_adapter *adapter = netdev_priv(netdev);
- return iavf_fix_netdev_vlan_features(adapter, features);
+ features = iavf_fix_netdev_vlan_features(adapter, features);
+
+ return iavf_fix_strip_features(adapter, features);
}
static const struct net_device_ops iavf_netdev_ops = {
@@ -4970,6 +5017,7 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
spin_lock_init(&adapter->cloud_filter_list_lock);
spin_lock_init(&adapter->fdir_fltr_lock);
spin_lock_init(&adapter->adv_rss_lock);
+ spin_lock_init(&adapter->current_netdev_promisc_flags_lock);
INIT_LIST_HEAD(&adapter->mac_filter_list);
INIT_LIST_HEAD(&adapter->vlan_filter_list);
diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
index f9727e9c3d63..8ce6389b5815 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
@@ -142,6 +142,7 @@ int iavf_send_vf_config_msg(struct iavf_adapter *adapter)
VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 |
VIRTCHNL_VF_OFFLOAD_ENCAP |
VIRTCHNL_VF_OFFLOAD_VLAN_V2 |
+ VIRTCHNL_VF_OFFLOAD_CRC |
VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM |
VIRTCHNL_VF_OFFLOAD_REQ_QUEUES |
VIRTCHNL_VF_OFFLOAD_ADQ |
@@ -312,6 +313,9 @@ void iavf_configure_queues(struct iavf_adapter *adapter)
vqpi->rxq.databuffer_size =
ALIGN(adapter->rx_rings[i].rx_buf_len,
BIT_ULL(IAVF_RXQ_CTX_DBUFF_SHIFT));
+ if (CRC_OFFLOAD_ALLOWED(adapter))
+ vqpi->rxq.crc_disable = !!(adapter->netdev->features &
+ NETIF_F_RXFCS);
vqpi++;
}
@@ -936,14 +940,14 @@ void iavf_del_vlans(struct iavf_adapter *adapter)
/**
* iavf_set_promiscuous
* @adapter: adapter structure
- * @flags: bitmask to control unicast/multicast promiscuous.
*
* Request that the PF enable promiscuous mode for our VSI.
**/
-void iavf_set_promiscuous(struct iavf_adapter *adapter, int flags)
+void iavf_set_promiscuous(struct iavf_adapter *adapter)
{
+ struct net_device *netdev = adapter->netdev;
struct virtchnl_promisc_info vpi;
- int promisc_all;
+ unsigned int flags;
if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
/* bail because we already have a command pending */
@@ -952,36 +956,57 @@ void iavf_set_promiscuous(struct iavf_adapter *adapter, int flags)
return;
}
- promisc_all = FLAG_VF_UNICAST_PROMISC |
- FLAG_VF_MULTICAST_PROMISC;
- if ((flags & promisc_all) == promisc_all) {
- adapter->flags |= IAVF_FLAG_PROMISC_ON;
- adapter->aq_required &= ~IAVF_FLAG_AQ_REQUEST_PROMISC;
- dev_info(&adapter->pdev->dev, "Entering promiscuous mode\n");
- }
+ /* prevent changes to promiscuous flags */
+ spin_lock_bh(&adapter->current_netdev_promisc_flags_lock);
- if (flags & FLAG_VF_MULTICAST_PROMISC) {
- adapter->flags |= IAVF_FLAG_ALLMULTI_ON;
- adapter->aq_required &= ~IAVF_FLAG_AQ_REQUEST_ALLMULTI;
- dev_info(&adapter->pdev->dev, "%s is entering multicast promiscuous mode\n",
- adapter->netdev->name);
+ /* sanity check to prevent duplicate AQ calls */
+ if (!iavf_promiscuous_mode_changed(adapter)) {
+ adapter->aq_required &= ~IAVF_FLAG_AQ_CONFIGURE_PROMISC_MODE;
+ dev_dbg(&adapter->pdev->dev, "No change in promiscuous mode\n");
+ /* allow changes to promiscuous flags */
+ spin_unlock_bh(&adapter->current_netdev_promisc_flags_lock);
+ return;
}
- if (!flags) {
- if (adapter->flags & IAVF_FLAG_PROMISC_ON) {
- adapter->flags &= ~IAVF_FLAG_PROMISC_ON;
- adapter->aq_required &= ~IAVF_FLAG_AQ_RELEASE_PROMISC;
- dev_info(&adapter->pdev->dev, "Leaving promiscuous mode\n");
- }
+ /* there are 2 bits, but only 3 states */
+ if (!(netdev->flags & IFF_PROMISC) &&
+ netdev->flags & IFF_ALLMULTI) {
+ /* State 1 - only multicast promiscuous mode enabled
+ * - !IFF_PROMISC && IFF_ALLMULTI
+ */
+ flags = FLAG_VF_MULTICAST_PROMISC;
+ adapter->current_netdev_promisc_flags |= IFF_ALLMULTI;
+ adapter->current_netdev_promisc_flags &= ~IFF_PROMISC;
+ dev_info(&adapter->pdev->dev, "Entering multicast promiscuous mode\n");
+ } else if (!(netdev->flags & IFF_PROMISC) &&
+ !(netdev->flags & IFF_ALLMULTI)) {
+ /* State 2 - unicast/multicast promiscuous mode disabled
+ * - !IFF_PROMISC && !IFF_ALLMULTI
+ */
+ flags = 0;
+ adapter->current_netdev_promisc_flags &=
+ ~(IFF_PROMISC | IFF_ALLMULTI);
+ dev_info(&adapter->pdev->dev, "Leaving promiscuous mode\n");
+ } else {
+ /* State 3 - unicast/multicast promiscuous mode enabled
+ * - IFF_PROMISC && IFF_ALLMULTI
+ * - IFF_PROMISC && !IFF_ALLMULTI
+ */
+ flags = FLAG_VF_UNICAST_PROMISC | FLAG_VF_MULTICAST_PROMISC;
+ adapter->current_netdev_promisc_flags |= IFF_PROMISC;
+ if (netdev->flags & IFF_ALLMULTI)
+ adapter->current_netdev_promisc_flags |= IFF_ALLMULTI;
+ else
+ adapter->current_netdev_promisc_flags &= ~IFF_ALLMULTI;
- if (adapter->flags & IAVF_FLAG_ALLMULTI_ON) {
- adapter->flags &= ~IAVF_FLAG_ALLMULTI_ON;
- adapter->aq_required &= ~IAVF_FLAG_AQ_RELEASE_ALLMULTI;
- dev_info(&adapter->pdev->dev, "%s is leaving multicast promiscuous mode\n",
- adapter->netdev->name);
- }
+ dev_info(&adapter->pdev->dev, "Entering promiscuous mode\n");
}
+ adapter->aq_required &= ~IAVF_FLAG_AQ_CONFIGURE_PROMISC_MODE;
+
+ /* allow changes to promiscuous flags */
+ spin_unlock_bh(&adapter->current_netdev_promisc_flags_lock);
+
adapter->current_op = VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE;
vpi.vsi_id = adapter->vsi_res->vsi_id;
vpi.flags = flags;
diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
index 960277d78e09..00806ddf5bf0 100644
--- a/drivers/net/ethernet/intel/ice/Makefile
+++ b/drivers/net/ethernet/intel/ice/Makefile
@@ -34,7 +34,8 @@ ice-y := ice_main.o \
ice_lag.o \
ice_ethtool.o \
ice_repr.o \
- ice_tc_lib.o
+ ice_tc_lib.o \
+ ice_dpll.o
ice-$(CONFIG_PCI_IOV) += \
ice_sriov.o \
ice_virtchnl.o \
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 5022b036ca4f..d30ae39c19f0 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -76,6 +76,7 @@
#include "ice_vsi_vlan_ops.h"
#include "ice_gnss.h"
#include "ice_irq.h"
+#include "ice_dpll.h"
#define ICE_BAR0 0
#define ICE_REQ_DESC_MULTIPLE 32
@@ -195,10 +196,13 @@
#define ice_pf_to_dev(pf) (&((pf)->pdev->dev))
+#define ice_pf_src_tmr_owned(pf) ((pf)->hw.func_caps.ts_func_info.src_tmr_owned)
+
enum ice_feature {
ICE_F_DSCP,
- ICE_F_PTP_EXTTS,
+ ICE_F_PHY_RCLK,
ICE_F_SMA_CTRL,
+ ICE_F_CGU,
ICE_F_GNSS,
ICE_F_ROCE_LAG,
ICE_F_SRIOV_LAG,
@@ -508,6 +512,7 @@ enum ice_pf_flags {
ICE_FLAG_UNPLUG_AUX_DEV,
ICE_FLAG_MTU_CHANGED,
ICE_FLAG_GNSS, /* GNSS successfully initialized */
+ ICE_FLAG_DPLL, /* SyncE/PTP dplls initialized */
ICE_PF_FLAGS_NBITS /* must be last */
};
@@ -640,6 +645,7 @@ struct ice_pf {
#define ICE_VF_AGG_NODE_ID_START 65
#define ICE_MAX_VF_AGG_NODES 32
struct ice_agg_node vf_agg_node[ICE_MAX_VF_AGG_NODES];
+ struct ice_dplls dplls;
};
extern struct workqueue_struct *ice_lag_wq;
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index 29f7a9852aec..24293f52f2d1 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -1351,6 +1351,30 @@ struct ice_aqc_set_mac_lb {
u8 reserved[15];
};
+/* Set PHY recovered clock output (direct 0x0630) */
+struct ice_aqc_set_phy_rec_clk_out {
+ u8 phy_output;
+ u8 port_num;
+#define ICE_AQC_SET_PHY_REC_CLK_OUT_CURR_PORT 0xFF
+ u8 flags;
+#define ICE_AQC_SET_PHY_REC_CLK_OUT_OUT_EN BIT(0)
+ u8 rsvd;
+ __le32 freq;
+ u8 rsvd2[6];
+ __le16 node_handle;
+};
+
+/* Get PHY recovered clock output (direct 0x0631) */
+struct ice_aqc_get_phy_rec_clk_out {
+ u8 phy_output;
+ u8 port_num;
+#define ICE_AQC_GET_PHY_REC_CLK_OUT_CURR_PORT 0xFF
+ u8 flags;
+#define ICE_AQC_GET_PHY_REC_CLK_OUT_OUT_EN BIT(0)
+ u8 rsvd[11];
+ __le16 node_handle;
+};
+
struct ice_aqc_link_topo_params {
u8 lport_num;
u8 lport_num_valid;
@@ -1367,6 +1391,9 @@ struct ice_aqc_link_topo_params {
#define ICE_AQC_LINK_TOPO_NODE_TYPE_CAGE 6
#define ICE_AQC_LINK_TOPO_NODE_TYPE_MEZZ 7
#define ICE_AQC_LINK_TOPO_NODE_TYPE_ID_EEPROM 8
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_CLK_CTRL 9
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_CLK_MUX 10
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_GPS 11
#define ICE_AQC_LINK_TOPO_NODE_CTX_S 4
#define ICE_AQC_LINK_TOPO_NODE_CTX_M \
(0xF << ICE_AQC_LINK_TOPO_NODE_CTX_S)
@@ -1403,8 +1430,13 @@ struct ice_aqc_link_topo_addr {
struct ice_aqc_get_link_topo {
struct ice_aqc_link_topo_addr addr;
u8 node_part_num;
-#define ICE_AQC_GET_LINK_TOPO_NODE_NR_PCA9575 0x21
-#define ICE_AQC_GET_LINK_TOPO_NODE_NR_C827 0x31
+#define ICE_AQC_GET_LINK_TOPO_NODE_NR_PCA9575 0x21
+#define ICE_AQC_GET_LINK_TOPO_NODE_NR_ZL30632_80032 0x24
+#define ICE_AQC_GET_LINK_TOPO_NODE_NR_SI5383_5384 0x25
+#define ICE_AQC_GET_LINK_TOPO_NODE_NR_E822_PHY 0x30
+#define ICE_AQC_GET_LINK_TOPO_NODE_NR_C827 0x31
+#define ICE_AQC_GET_LINK_TOPO_NODE_NR_GEN_CLK_MUX 0x47
+#define ICE_AQC_GET_LINK_TOPO_NODE_NR_GEN_GPS 0x48
u8 rsvd[9];
};
@@ -2125,6 +2157,193 @@ struct ice_aqc_get_pkg_info_resp {
struct ice_aqc_get_pkg_info pkg_info[];
};
+/* Get CGU abilities command response data structure (indirect 0x0C61) */
+struct ice_aqc_get_cgu_abilities {
+ u8 num_inputs;
+ u8 num_outputs;
+ u8 pps_dpll_idx;
+ u8 eec_dpll_idx;
+ __le32 max_in_freq;
+ __le32 max_in_phase_adj;
+ __le32 max_out_freq;
+ __le32 max_out_phase_adj;
+ u8 cgu_part_num;
+ u8 rsvd[3];
+};
+
+/* Set CGU input config (direct 0x0C62) */
+struct ice_aqc_set_cgu_input_config {
+ u8 input_idx;
+ u8 flags1;
+#define ICE_AQC_SET_CGU_IN_CFG_FLG1_UPDATE_FREQ BIT(6)
+#define ICE_AQC_SET_CGU_IN_CFG_FLG1_UPDATE_DELAY BIT(7)
+ u8 flags2;
+#define ICE_AQC_SET_CGU_IN_CFG_FLG2_INPUT_EN BIT(5)
+#define ICE_AQC_SET_CGU_IN_CFG_FLG2_ESYNC_EN BIT(6)
+ u8 rsvd;
+ __le32 freq;
+ __le32 phase_delay;
+ u8 rsvd2[2];
+ __le16 node_handle;
+};
+
+/* Get CGU input config response descriptor structure (direct 0x0C63) */
+struct ice_aqc_get_cgu_input_config {
+ u8 input_idx;
+ u8 status;
+#define ICE_AQC_GET_CGU_IN_CFG_STATUS_LOS BIT(0)
+#define ICE_AQC_GET_CGU_IN_CFG_STATUS_SCM_FAIL BIT(1)
+#define ICE_AQC_GET_CGU_IN_CFG_STATUS_CFM_FAIL BIT(2)
+#define ICE_AQC_GET_CGU_IN_CFG_STATUS_GST_FAIL BIT(3)
+#define ICE_AQC_GET_CGU_IN_CFG_STATUS_PFM_FAIL BIT(4)
+#define ICE_AQC_GET_CGU_IN_CFG_STATUS_ESYNC_FAIL BIT(6)
+#define ICE_AQC_GET_CGU_IN_CFG_STATUS_ESYNC_CAP BIT(7)
+ u8 type;
+#define ICE_AQC_GET_CGU_IN_CFG_TYPE_READ_ONLY BIT(0)
+#define ICE_AQC_GET_CGU_IN_CFG_TYPE_GPS BIT(4)
+#define ICE_AQC_GET_CGU_IN_CFG_TYPE_EXTERNAL BIT(5)
+#define ICE_AQC_GET_CGU_IN_CFG_TYPE_PHY BIT(6)
+ u8 flags1;
+#define ICE_AQC_GET_CGU_IN_CFG_FLG1_PHASE_DELAY_SUPP BIT(0)
+#define ICE_AQC_GET_CGU_IN_CFG_FLG1_1PPS_SUPP BIT(2)
+#define ICE_AQC_GET_CGU_IN_CFG_FLG1_10MHZ_SUPP BIT(3)
+#define ICE_AQC_GET_CGU_IN_CFG_FLG1_ANYFREQ BIT(7)
+ __le32 freq;
+ __le32 phase_delay;
+ u8 flags2;
+#define ICE_AQC_GET_CGU_IN_CFG_FLG2_INPUT_EN BIT(5)
+#define ICE_AQC_GET_CGU_IN_CFG_FLG2_ESYNC_EN BIT(6)
+ u8 rsvd[1];
+ __le16 node_handle;
+};
+
+/* Set CGU output config (direct 0x0C64) */
+struct ice_aqc_set_cgu_output_config {
+ u8 output_idx;
+ u8 flags;
+#define ICE_AQC_SET_CGU_OUT_CFG_OUT_EN BIT(0)
+#define ICE_AQC_SET_CGU_OUT_CFG_ESYNC_EN BIT(1)
+#define ICE_AQC_SET_CGU_OUT_CFG_UPDATE_FREQ BIT(2)
+#define ICE_AQC_SET_CGU_OUT_CFG_UPDATE_PHASE BIT(3)
+#define ICE_AQC_SET_CGU_OUT_CFG_UPDATE_SRC_SEL BIT(4)
+ u8 src_sel;
+#define ICE_AQC_SET_CGU_OUT_CFG_DPLL_SRC_SEL ICE_M(0x1F, 0)
+ u8 rsvd;
+ __le32 freq;
+ __le32 phase_delay;
+ u8 rsvd2[2];
+ __le16 node_handle;
+};
+
+/* Get CGU output config (direct 0x0C65) */
+struct ice_aqc_get_cgu_output_config {
+ u8 output_idx;
+ u8 flags;
+#define ICE_AQC_GET_CGU_OUT_CFG_OUT_EN BIT(0)
+#define ICE_AQC_GET_CGU_OUT_CFG_ESYNC_EN BIT(1)
+#define ICE_AQC_GET_CGU_OUT_CFG_ESYNC_ABILITY BIT(2)
+ u8 src_sel;
+#define ICE_AQC_GET_CGU_OUT_CFG_DPLL_SRC_SEL_SHIFT 0
+#define ICE_AQC_GET_CGU_OUT_CFG_DPLL_SRC_SEL \
+ ICE_M(0x1F, ICE_AQC_GET_CGU_OUT_CFG_DPLL_SRC_SEL_SHIFT)
+#define ICE_AQC_GET_CGU_OUT_CFG_DPLL_MODE_SHIFT 5
+#define ICE_AQC_GET_CGU_OUT_CFG_DPLL_MODE \
+ ICE_M(0x7, ICE_AQC_GET_CGU_OUT_CFG_DPLL_MODE_SHIFT)
+ u8 rsvd;
+ __le32 freq;
+ __le32 src_freq;
+ u8 rsvd2[2];
+ __le16 node_handle;
+};
+
+/* Get CGU DPLL status (direct 0x0C66) */
+struct ice_aqc_get_cgu_dpll_status {
+ u8 dpll_num;
+ u8 ref_state;
+#define ICE_AQC_GET_CGU_DPLL_STATUS_REF_SW_LOS BIT(0)
+#define ICE_AQC_GET_CGU_DPLL_STATUS_REF_SW_SCM BIT(1)
+#define ICE_AQC_GET_CGU_DPLL_STATUS_REF_SW_CFM BIT(2)
+#define ICE_AQC_GET_CGU_DPLL_STATUS_REF_SW_GST BIT(3)
+#define ICE_AQC_GET_CGU_DPLL_STATUS_REF_SW_PFM BIT(4)
+#define ICE_AQC_GET_CGU_DPLL_STATUS_FAST_LOCK_EN BIT(5)
+#define ICE_AQC_GET_CGU_DPLL_STATUS_REF_SW_ESYNC BIT(6)
+ u8 dpll_state;
+#define ICE_AQC_GET_CGU_DPLL_STATUS_STATE_LOCK BIT(0)
+#define ICE_AQC_GET_CGU_DPLL_STATUS_STATE_HO BIT(1)
+#define ICE_AQC_GET_CGU_DPLL_STATUS_STATE_HO_READY BIT(2)
+#define ICE_AQC_GET_CGU_DPLL_STATUS_STATE_FLHIT BIT(5)
+#define ICE_AQC_GET_CGU_DPLL_STATUS_STATE_PSLHIT BIT(7)
+ u8 config;
+#define ICE_AQC_GET_CGU_DPLL_CONFIG_CLK_REF_SEL ICE_M(0x1F, 0)
+#define ICE_AQC_GET_CGU_DPLL_CONFIG_MODE_SHIFT 5
+#define ICE_AQC_GET_CGU_DPLL_CONFIG_MODE \
+ ICE_M(0x7, ICE_AQC_GET_CGU_DPLL_CONFIG_MODE_SHIFT)
+#define ICE_AQC_GET_CGU_DPLL_CONFIG_MODE_FREERUN 0
+#define ICE_AQC_GET_CGU_DPLL_CONFIG_MODE_AUTOMATIC \
+ ICE_M(0x3, ICE_AQC_GET_CGU_DPLL_CONFIG_MODE_SHIFT)
+ __le32 phase_offset_h;
+ __le32 phase_offset_l;
+ u8 eec_mode;
+#define ICE_AQC_GET_CGU_DPLL_STATUS_EEC_MODE_1 0xA
+#define ICE_AQC_GET_CGU_DPLL_STATUS_EEC_MODE_2 0xB
+#define ICE_AQC_GET_CGU_DPLL_STATUS_EEC_MODE_UNKNOWN 0xF
+ u8 rsvd[1];
+ __le16 node_handle;
+};
+
+/* Set CGU DPLL config (direct 0x0C67) */
+struct ice_aqc_set_cgu_dpll_config {
+ u8 dpll_num;
+ u8 ref_state;
+#define ICE_AQC_SET_CGU_DPLL_CONFIG_REF_SW_LOS BIT(0)
+#define ICE_AQC_SET_CGU_DPLL_CONFIG_REF_SW_SCM BIT(1)
+#define ICE_AQC_SET_CGU_DPLL_CONFIG_REF_SW_CFM BIT(2)
+#define ICE_AQC_SET_CGU_DPLL_CONFIG_REF_SW_GST BIT(3)
+#define ICE_AQC_SET_CGU_DPLL_CONFIG_REF_SW_PFM BIT(4)
+#define ICE_AQC_SET_CGU_DPLL_CONFIG_REF_FLOCK_EN BIT(5)
+#define ICE_AQC_SET_CGU_DPLL_CONFIG_REF_SW_ESYNC BIT(6)
+ u8 rsvd;
+ u8 config;
+#define ICE_AQC_SET_CGU_DPLL_CONFIG_CLK_REF_SEL ICE_M(0x1F, 0)
+#define ICE_AQC_SET_CGU_DPLL_CONFIG_MODE_SHIFT 5
+#define ICE_AQC_SET_CGU_DPLL_CONFIG_MODE \
+ ICE_M(0x7, ICE_AQC_SET_CGU_DPLL_CONFIG_MODE_SHIFT)
+#define ICE_AQC_SET_CGU_DPLL_CONFIG_MODE_FREERUN 0
+#define ICE_AQC_SET_CGU_DPLL_CONFIG_MODE_AUTOMATIC \
+ ICE_M(0x3, ICE_AQC_SET_CGU_DPLL_CONFIG_MODE_SHIFT)
+ u8 rsvd2[8];
+ u8 eec_mode;
+ u8 rsvd3[1];
+ __le16 node_handle;
+};
+
+/* Set CGU reference priority (direct 0x0C68) */
+struct ice_aqc_set_cgu_ref_prio {
+ u8 dpll_num;
+ u8 ref_idx;
+ u8 ref_priority;
+ u8 rsvd[11];
+ __le16 node_handle;
+};
+
+/* Get CGU reference priority (direct 0x0C69) */
+struct ice_aqc_get_cgu_ref_prio {
+ u8 dpll_num;
+ u8 ref_idx;
+ u8 ref_priority; /* Valid only in response */
+ u8 rsvd[13];
+};
+
+/* Get CGU info (direct 0x0C6A) */
+struct ice_aqc_get_cgu_info {
+ __le32 cgu_id;
+ __le32 cgu_cfg_ver;
+ __le32 cgu_fw_ver;
+ u8 node_part_num;
+ u8 dev_rev;
+ __le16 node_handle;
+};
+
/* Driver Shared Parameters (direct, 0x0C90) */
struct ice_aqc_driver_shared_params {
u8 set_or_get_op;
@@ -2194,6 +2413,8 @@ struct ice_aq_desc {
struct ice_aqc_get_phy_caps get_phy;
struct ice_aqc_set_phy_cfg set_phy;
struct ice_aqc_restart_an restart_an;
+ struct ice_aqc_set_phy_rec_clk_out set_phy_rec_clk_out;
+ struct ice_aqc_get_phy_rec_clk_out get_phy_rec_clk_out;
struct ice_aqc_gpio read_write_gpio;
struct ice_aqc_sff_eeprom read_write_sff_param;
struct ice_aqc_set_port_id_led set_port_id_led;
@@ -2234,6 +2455,15 @@ struct ice_aq_desc {
struct ice_aqc_fw_logging fw_logging;
struct ice_aqc_get_clear_fw_log get_clear_fw_log;
struct ice_aqc_download_pkg download_pkg;
+ struct ice_aqc_set_cgu_input_config set_cgu_input_config;
+ struct ice_aqc_get_cgu_input_config get_cgu_input_config;
+ struct ice_aqc_set_cgu_output_config set_cgu_output_config;
+ struct ice_aqc_get_cgu_output_config get_cgu_output_config;
+ struct ice_aqc_get_cgu_dpll_status get_cgu_dpll_status;
+ struct ice_aqc_set_cgu_dpll_config set_cgu_dpll_config;
+ struct ice_aqc_set_cgu_ref_prio set_cgu_ref_prio;
+ struct ice_aqc_get_cgu_ref_prio get_cgu_ref_prio;
+ struct ice_aqc_get_cgu_info get_cgu_info;
struct ice_aqc_driver_shared_params drv_shared_params;
struct ice_aqc_set_mac_lb set_mac_lb;
struct ice_aqc_alloc_free_res_cmd sw_res_ctrl;
@@ -2358,6 +2588,8 @@ enum ice_adminq_opc {
ice_aqc_opc_get_link_status = 0x0607,
ice_aqc_opc_set_event_mask = 0x0613,
ice_aqc_opc_set_mac_lb = 0x0620,
+ ice_aqc_opc_set_phy_rec_clk_out = 0x0630,
+ ice_aqc_opc_get_phy_rec_clk_out = 0x0631,
ice_aqc_opc_get_link_topo = 0x06E0,
ice_aqc_opc_read_i2c = 0x06E2,
ice_aqc_opc_write_i2c = 0x06E3,
@@ -2413,6 +2645,18 @@ enum ice_adminq_opc {
ice_aqc_opc_update_pkg = 0x0C42,
ice_aqc_opc_get_pkg_info_list = 0x0C43,
+ /* 1588/SyncE commands/events */
+ ice_aqc_opc_get_cgu_abilities = 0x0C61,
+ ice_aqc_opc_set_cgu_input_config = 0x0C62,
+ ice_aqc_opc_get_cgu_input_config = 0x0C63,
+ ice_aqc_opc_set_cgu_output_config = 0x0C64,
+ ice_aqc_opc_get_cgu_output_config = 0x0C65,
+ ice_aqc_opc_get_cgu_dpll_status = 0x0C66,
+ ice_aqc_opc_set_cgu_dpll_config = 0x0C67,
+ ice_aqc_opc_set_cgu_ref_prio = 0x0C68,
+ ice_aqc_opc_get_cgu_ref_prio = 0x0C69,
+ ice_aqc_opc_get_cgu_info = 0x0C6A,
+
ice_aqc_opc_driver_shared_params = 0x0C90,
/* Standalone Commands/Events */
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 80deca45ab59..8f31ae449948 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -8,6 +8,7 @@
#include "ice_ptp_hw.h"
#define ICE_PF_RESET_WAIT_COUNT 300
+#define ICE_MAX_NETLIST_SIZE 10
static const char * const ice_link_mode_str_low[] = {
[0] = "100BASE_TX",
@@ -436,6 +437,81 @@ ice_aq_get_link_topo_handle(struct ice_port_info *pi, u8 node_type,
}
/**
+ * ice_aq_get_netlist_node
+ * @hw: pointer to the hw struct
+ * @cmd: get_link_topo AQ structure
+ * @node_part_number: output node part number if node found
+ * @node_handle: output node handle parameter if node found
+ *
+ * Get netlist node handle.
+ */
+int
+ice_aq_get_netlist_node(struct ice_hw *hw, struct ice_aqc_get_link_topo *cmd,
+ u8 *node_part_number, u16 *node_handle)
+{
+ struct ice_aq_desc desc;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_topo);
+ desc.params.get_link_topo = *cmd;
+
+ if (ice_aq_send_cmd(hw, &desc, NULL, 0, NULL))
+ return -EINTR;
+
+ if (node_handle)
+ *node_handle =
+ le16_to_cpu(desc.params.get_link_topo.addr.handle);
+ if (node_part_number)
+ *node_part_number = desc.params.get_link_topo.node_part_num;
+
+ return 0;
+}
+
+/**
+ * ice_find_netlist_node
+ * @hw: pointer to the hw struct
+ * @node_type_ctx: type of netlist node to look for
+ * @node_part_number: node part number to look for
+ * @node_handle: output parameter if node found - optional
+ *
+ * Find and return the node handle for a given node type and part number in the
+ * netlist. When found ICE_SUCCESS is returned, ICE_ERR_DOES_NOT_EXIST
+ * otherwise. If node_handle provided, it would be set to found node handle.
+ */
+int
+ice_find_netlist_node(struct ice_hw *hw, u8 node_type_ctx, u8 node_part_number,
+ u16 *node_handle)
+{
+ struct ice_aqc_get_link_topo cmd;
+ u8 rec_node_part_number;
+ u16 rec_node_handle;
+ u8 idx;
+
+ for (idx = 0; idx < ICE_MAX_NETLIST_SIZE; idx++) {
+ int status;
+
+ memset(&cmd, 0, sizeof(cmd));
+
+ cmd.addr.topo_params.node_type_ctx =
+ (node_type_ctx << ICE_AQC_LINK_TOPO_NODE_TYPE_S);
+ cmd.addr.topo_params.index = idx;
+
+ status = ice_aq_get_netlist_node(hw, &cmd,
+ &rec_node_part_number,
+ &rec_node_handle);
+ if (status)
+ return status;
+
+ if (rec_node_part_number == node_part_number) {
+ if (node_handle)
+ *node_handle = rec_node_handle;
+ return 0;
+ }
+ }
+
+ return -ENOTBLK;
+}
+
+/**
* ice_is_media_cage_present
* @pi: port information structure
*
@@ -2655,33 +2731,6 @@ ice_parse_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p,
}
/**
- * ice_aq_get_netlist_node
- * @hw: pointer to the hw struct
- * @cmd: get_link_topo AQ structure
- * @node_part_number: output node part number if node found
- * @node_handle: output node handle parameter if node found
- */
-static int
-ice_aq_get_netlist_node(struct ice_hw *hw, struct ice_aqc_get_link_topo *cmd,
- u8 *node_part_number, u16 *node_handle)
-{
- struct ice_aq_desc desc;
-
- ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_topo);
- desc.params.get_link_topo = *cmd;
-
- if (ice_aq_send_cmd(hw, &desc, NULL, 0, NULL))
- return -EIO;
-
- if (node_handle)
- *node_handle = le16_to_cpu(desc.params.get_link_topo.addr.handle);
- if (node_part_number)
- *node_part_number = desc.params.get_link_topo.node_part_num;
-
- return 0;
-}
-
-/**
* ice_is_pf_c827 - check if pf contains c827 phy
* @hw: pointer to the hw struct
*/
@@ -2716,6 +2765,21 @@ bool ice_is_pf_c827(struct ice_hw *hw)
}
/**
+ * ice_is_gps_in_netlist
+ * @hw: pointer to the hw struct
+ *
+ * Check if the GPS generic device is present in the netlist
+ */
+bool ice_is_gps_in_netlist(struct ice_hw *hw)
+{
+ if (ice_find_netlist_node(hw, ICE_AQC_LINK_TOPO_NODE_TYPE_GPS,
+ ICE_AQC_GET_LINK_TOPO_NODE_NR_GEN_GPS, NULL))
+ return false;
+
+ return true;
+}
+
+/**
* ice_aq_list_caps - query function/device capabilities
* @hw: pointer to the HW struct
* @buf: a buffer to hold the capabilities
@@ -4999,6 +5063,395 @@ ice_dis_vsi_rdma_qset(struct ice_port_info *pi, u16 count, u32 *qset_teid,
}
/**
+ * ice_aq_get_cgu_abilities - get cgu abilities
+ * @hw: pointer to the HW struct
+ * @abilities: CGU abilities
+ *
+ * Get CGU abilities (0x0C61)
+ * Return: 0 on success or negative value on failure.
+ */
+int
+ice_aq_get_cgu_abilities(struct ice_hw *hw,
+ struct ice_aqc_get_cgu_abilities *abilities)
+{
+ struct ice_aq_desc desc;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_cgu_abilities);
+ return ice_aq_send_cmd(hw, &desc, abilities, sizeof(*abilities), NULL);
+}
+
+/**
+ * ice_aq_set_input_pin_cfg - set input pin config
+ * @hw: pointer to the HW struct
+ * @input_idx: Input index
+ * @flags1: Input flags
+ * @flags2: Input flags
+ * @freq: Frequency in Hz
+ * @phase_delay: Delay in ps
+ *
+ * Set CGU input config (0x0C62)
+ * Return: 0 on success or negative value on failure.
+ */
+int
+ice_aq_set_input_pin_cfg(struct ice_hw *hw, u8 input_idx, u8 flags1, u8 flags2,
+ u32 freq, s32 phase_delay)
+{
+ struct ice_aqc_set_cgu_input_config *cmd;
+ struct ice_aq_desc desc;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_cgu_input_config);
+ cmd = &desc.params.set_cgu_input_config;
+ cmd->input_idx = input_idx;
+ cmd->flags1 = flags1;
+ cmd->flags2 = flags2;
+ cmd->freq = cpu_to_le32(freq);
+ cmd->phase_delay = cpu_to_le32(phase_delay);
+
+ return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+}
+
+/**
+ * ice_aq_get_input_pin_cfg - get input pin config
+ * @hw: pointer to the HW struct
+ * @input_idx: Input index
+ * @status: Pin status
+ * @type: Pin type
+ * @flags1: Input flags
+ * @flags2: Input flags
+ * @freq: Frequency in Hz
+ * @phase_delay: Delay in ps
+ *
+ * Get CGU input config (0x0C63)
+ * Return: 0 on success or negative value on failure.
+ */
+int
+ice_aq_get_input_pin_cfg(struct ice_hw *hw, u8 input_idx, u8 *status, u8 *type,
+ u8 *flags1, u8 *flags2, u32 *freq, s32 *phase_delay)
+{
+ struct ice_aqc_get_cgu_input_config *cmd;
+ struct ice_aq_desc desc;
+ int ret;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_cgu_input_config);
+ cmd = &desc.params.get_cgu_input_config;
+ cmd->input_idx = input_idx;
+
+ ret = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+ if (!ret) {
+ if (status)
+ *status = cmd->status;
+ if (type)
+ *type = cmd->type;
+ if (flags1)
+ *flags1 = cmd->flags1;
+ if (flags2)
+ *flags2 = cmd->flags2;
+ if (freq)
+ *freq = le32_to_cpu(cmd->freq);
+ if (phase_delay)
+ *phase_delay = le32_to_cpu(cmd->phase_delay);
+ }
+
+ return ret;
+}
+
+/**
+ * ice_aq_set_output_pin_cfg - set output pin config
+ * @hw: pointer to the HW struct
+ * @output_idx: Output index
+ * @flags: Output flags
+ * @src_sel: Index of DPLL block
+ * @freq: Output frequency
+ * @phase_delay: Output phase compensation
+ *
+ * Set CGU output config (0x0C64)
+ * Return: 0 on success or negative value on failure.
+ */
+int
+ice_aq_set_output_pin_cfg(struct ice_hw *hw, u8 output_idx, u8 flags,
+ u8 src_sel, u32 freq, s32 phase_delay)
+{
+ struct ice_aqc_set_cgu_output_config *cmd;
+ struct ice_aq_desc desc;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_cgu_output_config);
+ cmd = &desc.params.set_cgu_output_config;
+ cmd->output_idx = output_idx;
+ cmd->flags = flags;
+ cmd->src_sel = src_sel;
+ cmd->freq = cpu_to_le32(freq);
+ cmd->phase_delay = cpu_to_le32(phase_delay);
+
+ return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+}
+
+/**
+ * ice_aq_get_output_pin_cfg - get output pin config
+ * @hw: pointer to the HW struct
+ * @output_idx: Output index
+ * @flags: Output flags
+ * @src_sel: Internal DPLL source
+ * @freq: Output frequency
+ * @src_freq: Source frequency
+ *
+ * Get CGU output config (0x0C65)
+ * Return: 0 on success or negative value on failure.
+ */
+int
+ice_aq_get_output_pin_cfg(struct ice_hw *hw, u8 output_idx, u8 *flags,
+ u8 *src_sel, u32 *freq, u32 *src_freq)
+{
+ struct ice_aqc_get_cgu_output_config *cmd;
+ struct ice_aq_desc desc;
+ int ret;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_cgu_output_config);
+ cmd = &desc.params.get_cgu_output_config;
+ cmd->output_idx = output_idx;
+
+ ret = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+ if (!ret) {
+ if (flags)
+ *flags = cmd->flags;
+ if (src_sel)
+ *src_sel = cmd->src_sel;
+ if (freq)
+ *freq = le32_to_cpu(cmd->freq);
+ if (src_freq)
+ *src_freq = le32_to_cpu(cmd->src_freq);
+ }
+
+ return ret;
+}
+
+/**
+ * ice_aq_get_cgu_dpll_status - get dpll status
+ * @hw: pointer to the HW struct
+ * @dpll_num: DPLL index
+ * @ref_state: Reference clock state
+ * @config: current DPLL config
+ * @dpll_state: current DPLL state
+ * @phase_offset: Phase offset in ns
+ * @eec_mode: EEC_mode
+ *
+ * Get CGU DPLL status (0x0C66)
+ * Return: 0 on success or negative value on failure.
+ */
+int
+ice_aq_get_cgu_dpll_status(struct ice_hw *hw, u8 dpll_num, u8 *ref_state,
+ u8 *dpll_state, u8 *config, s64 *phase_offset,
+ u8 *eec_mode)
+{
+ struct ice_aqc_get_cgu_dpll_status *cmd;
+ const s64 nsec_per_psec = 1000LL;
+ struct ice_aq_desc desc;
+ int status;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_cgu_dpll_status);
+ cmd = &desc.params.get_cgu_dpll_status;
+ cmd->dpll_num = dpll_num;
+
+ status = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+ if (!status) {
+ *ref_state = cmd->ref_state;
+ *dpll_state = cmd->dpll_state;
+ *config = cmd->config;
+ *phase_offset = le32_to_cpu(cmd->phase_offset_h);
+ *phase_offset <<= 32;
+ *phase_offset += le32_to_cpu(cmd->phase_offset_l);
+ *phase_offset = div64_s64(sign_extend64(*phase_offset, 47),
+ nsec_per_psec);
+ *eec_mode = cmd->eec_mode;
+ }
+
+ return status;
+}
+
+/**
+ * ice_aq_set_cgu_dpll_config - set dpll config
+ * @hw: pointer to the HW struct
+ * @dpll_num: DPLL index
+ * @ref_state: Reference clock state
+ * @config: DPLL config
+ * @eec_mode: EEC mode
+ *
+ * Set CGU DPLL config (0x0C67)
+ * Return: 0 on success or negative value on failure.
+ */
+int
+ice_aq_set_cgu_dpll_config(struct ice_hw *hw, u8 dpll_num, u8 ref_state,
+ u8 config, u8 eec_mode)
+{
+ struct ice_aqc_set_cgu_dpll_config *cmd;
+ struct ice_aq_desc desc;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_cgu_dpll_config);
+ cmd = &desc.params.set_cgu_dpll_config;
+ cmd->dpll_num = dpll_num;
+ cmd->ref_state = ref_state;
+ cmd->config = config;
+ cmd->eec_mode = eec_mode;
+
+ return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+}
+
+/**
+ * ice_aq_set_cgu_ref_prio - set input reference priority
+ * @hw: pointer to the HW struct
+ * @dpll_num: DPLL index
+ * @ref_idx: Reference pin index
+ * @ref_priority: Reference input priority
+ *
+ * Set CGU reference priority (0x0C68)
+ * Return: 0 on success or negative value on failure.
+ */
+int
+ice_aq_set_cgu_ref_prio(struct ice_hw *hw, u8 dpll_num, u8 ref_idx,
+ u8 ref_priority)
+{
+ struct ice_aqc_set_cgu_ref_prio *cmd;
+ struct ice_aq_desc desc;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_cgu_ref_prio);
+ cmd = &desc.params.set_cgu_ref_prio;
+ cmd->dpll_num = dpll_num;
+ cmd->ref_idx = ref_idx;
+ cmd->ref_priority = ref_priority;
+
+ return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+}
+
+/**
+ * ice_aq_get_cgu_ref_prio - get input reference priority
+ * @hw: pointer to the HW struct
+ * @dpll_num: DPLL index
+ * @ref_idx: Reference pin index
+ * @ref_prio: Reference input priority
+ *
+ * Get CGU reference priority (0x0C69)
+ * Return: 0 on success or negative value on failure.
+ */
+int
+ice_aq_get_cgu_ref_prio(struct ice_hw *hw, u8 dpll_num, u8 ref_idx,
+ u8 *ref_prio)
+{
+ struct ice_aqc_get_cgu_ref_prio *cmd;
+ struct ice_aq_desc desc;
+ int status;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_cgu_ref_prio);
+ cmd = &desc.params.get_cgu_ref_prio;
+ cmd->dpll_num = dpll_num;
+ cmd->ref_idx = ref_idx;
+
+ status = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+ if (!status)
+ *ref_prio = cmd->ref_priority;
+
+ return status;
+}
+
+/**
+ * ice_aq_get_cgu_info - get cgu info
+ * @hw: pointer to the HW struct
+ * @cgu_id: CGU ID
+ * @cgu_cfg_ver: CGU config version
+ * @cgu_fw_ver: CGU firmware version
+ *
+ * Get CGU info (0x0C6A)
+ * Return: 0 on success or negative value on failure.
+ */
+int
+ice_aq_get_cgu_info(struct ice_hw *hw, u32 *cgu_id, u32 *cgu_cfg_ver,
+ u32 *cgu_fw_ver)
+{
+ struct ice_aqc_get_cgu_info *cmd;
+ struct ice_aq_desc desc;
+ int status;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_cgu_info);
+ cmd = &desc.params.get_cgu_info;
+
+ status = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+ if (!status) {
+ *cgu_id = le32_to_cpu(cmd->cgu_id);
+ *cgu_cfg_ver = le32_to_cpu(cmd->cgu_cfg_ver);
+ *cgu_fw_ver = le32_to_cpu(cmd->cgu_fw_ver);
+ }
+
+ return status;
+}
+
+/**
+ * ice_aq_set_phy_rec_clk_out - set RCLK phy out
+ * @hw: pointer to the HW struct
+ * @phy_output: PHY reference clock output pin
+ * @enable: GPIO state to be applied
+ * @freq: PHY output frequency
+ *
+ * Set phy recovered clock as reference (0x0630)
+ * Return: 0 on success or negative value on failure.
+ */
+int
+ice_aq_set_phy_rec_clk_out(struct ice_hw *hw, u8 phy_output, bool enable,
+ u32 *freq)
+{
+ struct ice_aqc_set_phy_rec_clk_out *cmd;
+ struct ice_aq_desc desc;
+ int status;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_phy_rec_clk_out);
+ cmd = &desc.params.set_phy_rec_clk_out;
+ cmd->phy_output = phy_output;
+ cmd->port_num = ICE_AQC_SET_PHY_REC_CLK_OUT_CURR_PORT;
+ cmd->flags = enable & ICE_AQC_SET_PHY_REC_CLK_OUT_OUT_EN;
+ cmd->freq = cpu_to_le32(*freq);
+
+ status = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+ if (!status)
+ *freq = le32_to_cpu(cmd->freq);
+
+ return status;
+}
+
+/**
+ * ice_aq_get_phy_rec_clk_out - get phy recovered signal info
+ * @hw: pointer to the HW struct
+ * @phy_output: PHY reference clock output pin
+ * @port_num: Port number
+ * @flags: PHY flags
+ * @node_handle: PHY output frequency
+ *
+ * Get PHY recovered clock output info (0x0631)
+ * Return: 0 on success or negative value on failure.
+ */
+int
+ice_aq_get_phy_rec_clk_out(struct ice_hw *hw, u8 *phy_output, u8 *port_num,
+ u8 *flags, u16 *node_handle)
+{
+ struct ice_aqc_get_phy_rec_clk_out *cmd;
+ struct ice_aq_desc desc;
+ int status;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_phy_rec_clk_out);
+ cmd = &desc.params.get_phy_rec_clk_out;
+ cmd->phy_output = *phy_output;
+
+ status = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+ if (!status) {
+ *phy_output = cmd->phy_output;
+ if (port_num)
+ *port_num = cmd->port_num;
+ if (flags)
+ *flags = cmd->flags;
+ if (node_handle)
+ *node_handle = le16_to_cpu(cmd->node_handle);
+ }
+
+ return status;
+}
+
+/**
* ice_replay_pre_init - replay pre initialization
* @hw: pointer to the HW struct
*
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index 226b81f97a92..47a75651ca38 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -93,6 +93,13 @@ ice_aq_get_phy_caps(struct ice_port_info *pi, bool qual_mods, u8 report_mode,
struct ice_aqc_get_phy_caps_data *caps,
struct ice_sq_cd *cd);
bool ice_is_pf_c827(struct ice_hw *hw);
+bool ice_is_gps_in_netlist(struct ice_hw *hw);
+int
+ice_find_netlist_node(struct ice_hw *hw, u8 node_type_ctx, u8 node_part_number,
+ u16 *node_handle);
+int
+ice_aq_get_netlist_node(struct ice_hw *hw, struct ice_aqc_get_link_topo *cmd,
+ u8 *node_part_number, u16 *node_handle);
int
ice_aq_list_caps(struct ice_hw *hw, void *buf, u16 buf_size, u32 *cap_count,
enum ice_adminq_opc opc, struct ice_sq_cd *cd);
@@ -196,6 +203,44 @@ void ice_output_fw_log(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf);
struct ice_q_ctx *
ice_get_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 q_handle);
int ice_sbq_rw_reg(struct ice_hw *hw, struct ice_sbq_msg_input *in);
+int
+ice_aq_get_cgu_abilities(struct ice_hw *hw,
+ struct ice_aqc_get_cgu_abilities *abilities);
+int
+ice_aq_set_input_pin_cfg(struct ice_hw *hw, u8 input_idx, u8 flags1, u8 flags2,
+ u32 freq, s32 phase_delay);
+int
+ice_aq_get_input_pin_cfg(struct ice_hw *hw, u8 input_idx, u8 *status, u8 *type,
+ u8 *flags1, u8 *flags2, u32 *freq, s32 *phase_delay);
+int
+ice_aq_set_output_pin_cfg(struct ice_hw *hw, u8 output_idx, u8 flags,
+ u8 src_sel, u32 freq, s32 phase_delay);
+int
+ice_aq_get_output_pin_cfg(struct ice_hw *hw, u8 output_idx, u8 *flags,
+ u8 *src_sel, u32 *freq, u32 *src_freq);
+int
+ice_aq_get_cgu_dpll_status(struct ice_hw *hw, u8 dpll_num, u8 *ref_state,
+ u8 *dpll_state, u8 *config, s64 *phase_offset,
+ u8 *eec_mode);
+int
+ice_aq_set_cgu_dpll_config(struct ice_hw *hw, u8 dpll_num, u8 ref_state,
+ u8 config, u8 eec_mode);
+int
+ice_aq_set_cgu_ref_prio(struct ice_hw *hw, u8 dpll_num, u8 ref_idx,
+ u8 ref_priority);
+int
+ice_aq_get_cgu_ref_prio(struct ice_hw *hw, u8 dpll_num, u8 ref_idx,
+ u8 *ref_prio);
+int
+ice_aq_get_cgu_info(struct ice_hw *hw, u32 *cgu_id, u32 *cgu_cfg_ver,
+ u32 *cgu_fw_ver);
+
+int
+ice_aq_set_phy_rec_clk_out(struct ice_hw *hw, u8 phy_output, bool enable,
+ u32 *freq);
+int
+ice_aq_get_phy_rec_clk_out(struct ice_hw *hw, u8 *phy_output, u8 *port_num,
+ u8 *flags, u16 *node_handle);
void
ice_stat_update40(struct ice_hw *hw, u32 reg, bool prev_stat_loaded,
u64 *prev_stat, u64 *cur_stat);
diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c b/drivers/net/ethernet/intel/ice/ice_dpll.c
new file mode 100644
index 000000000000..1faee9cb944d
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_dpll.c
@@ -0,0 +1,1904 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2022, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_lib.h"
+#include "ice_trace.h"
+#include <linux/dpll.h>
+
+#define ICE_CGU_STATE_ACQ_ERR_THRESHOLD 50
+#define ICE_DPLL_PIN_IDX_INVALID 0xff
+#define ICE_DPLL_RCLK_NUM_PER_PF 1
+
+/**
+ * enum ice_dpll_pin_type - enumerate ice pin types:
+ * @ICE_DPLL_PIN_INVALID: invalid pin type
+ * @ICE_DPLL_PIN_TYPE_INPUT: input pin
+ * @ICE_DPLL_PIN_TYPE_OUTPUT: output pin
+ * @ICE_DPLL_PIN_TYPE_RCLK_INPUT: recovery clock input pin
+ */
+enum ice_dpll_pin_type {
+ ICE_DPLL_PIN_INVALID,
+ ICE_DPLL_PIN_TYPE_INPUT,
+ ICE_DPLL_PIN_TYPE_OUTPUT,
+ ICE_DPLL_PIN_TYPE_RCLK_INPUT,
+};
+
+static const char * const pin_type_name[] = {
+ [ICE_DPLL_PIN_TYPE_INPUT] = "input",
+ [ICE_DPLL_PIN_TYPE_OUTPUT] = "output",
+ [ICE_DPLL_PIN_TYPE_RCLK_INPUT] = "rclk-input",
+};
+
+/**
+ * ice_dpll_pin_freq_set - set pin's frequency
+ * @pf: private board structure
+ * @pin: pointer to a pin
+ * @pin_type: type of pin being configured
+ * @freq: frequency to be set
+ * @extack: error reporting
+ *
+ * Set requested frequency on a pin.
+ *
+ * Context: Called under pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - error on AQ or wrong pin type given
+ */
+static int
+ice_dpll_pin_freq_set(struct ice_pf *pf, struct ice_dpll_pin *pin,
+ enum ice_dpll_pin_type pin_type, const u32 freq,
+ struct netlink_ext_ack *extack)
+{
+ u8 flags;
+ int ret;
+
+ switch (pin_type) {
+ case ICE_DPLL_PIN_TYPE_INPUT:
+ flags = ICE_AQC_SET_CGU_IN_CFG_FLG1_UPDATE_FREQ;
+ ret = ice_aq_set_input_pin_cfg(&pf->hw, pin->idx, flags,
+ pin->flags[0], freq, 0);
+ break;
+ case ICE_DPLL_PIN_TYPE_OUTPUT:
+ flags = ICE_AQC_SET_CGU_OUT_CFG_UPDATE_FREQ;
+ ret = ice_aq_set_output_pin_cfg(&pf->hw, pin->idx, flags,
+ 0, freq, 0);
+ break;
+ default:
+ return -EINVAL;
+ }
+ if (ret) {
+ NL_SET_ERR_MSG_FMT(extack,
+ "err:%d %s failed to set pin freq:%u on pin:%u\n",
+ ret,
+ ice_aq_str(pf->hw.adminq.sq_last_status),
+ freq, pin->idx);
+ return ret;
+ }
+ pin->freq = freq;
+
+ return 0;
+}
+
+/**
+ * ice_dpll_frequency_set - wrapper for pin callback for set frequency
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @dpll: pointer to dpll
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @frequency: frequency to be set
+ * @extack: error reporting
+ * @pin_type: type of pin being configured
+ *
+ * Wraps internal set frequency command on a pin.
+ *
+ * Context: Acquires pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - error pin not found or couldn't set in hw
+ */
+static int
+ice_dpll_frequency_set(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_device *dpll, void *dpll_priv,
+ const u32 frequency,
+ struct netlink_ext_ack *extack,
+ enum ice_dpll_pin_type pin_type)
+{
+ struct ice_dpll_pin *p = pin_priv;
+ struct ice_dpll *d = dpll_priv;
+ struct ice_pf *pf = d->pf;
+ int ret;
+
+ mutex_lock(&pf->dplls.lock);
+ ret = ice_dpll_pin_freq_set(pf, p, pin_type, frequency, extack);
+ mutex_unlock(&pf->dplls.lock);
+
+ return ret;
+}
+
+/**
+ * ice_dpll_input_frequency_set - input pin callback for set frequency
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @dpll: pointer to dpll
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @frequency: frequency to be set
+ * @extack: error reporting
+ *
+ * Wraps internal set frequency command on a pin.
+ *
+ * Context: Calls a function which acquires pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - error pin not found or couldn't set in hw
+ */
+static int
+ice_dpll_input_frequency_set(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_device *dpll, void *dpll_priv,
+ u64 frequency, struct netlink_ext_ack *extack)
+{
+ return ice_dpll_frequency_set(pin, pin_priv, dpll, dpll_priv, frequency,
+ extack, ICE_DPLL_PIN_TYPE_INPUT);
+}
+
+/**
+ * ice_dpll_output_frequency_set - output pin callback for set frequency
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @dpll: pointer to dpll
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @frequency: frequency to be set
+ * @extack: error reporting
+ *
+ * Wraps internal set frequency command on a pin.
+ *
+ * Context: Calls a function which acquires pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - error pin not found or couldn't set in hw
+ */
+static int
+ice_dpll_output_frequency_set(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_device *dpll, void *dpll_priv,
+ u64 frequency, struct netlink_ext_ack *extack)
+{
+ return ice_dpll_frequency_set(pin, pin_priv, dpll, dpll_priv, frequency,
+ extack, ICE_DPLL_PIN_TYPE_OUTPUT);
+}
+
+/**
+ * ice_dpll_frequency_get - wrapper for pin callback for get frequency
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @dpll: pointer to dpll
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @frequency: on success holds pin's frequency
+ * @extack: error reporting
+ * @pin_type: type of pin being configured
+ *
+ * Wraps internal get frequency command of a pin.
+ *
+ * Context: Acquires pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - error pin not found or couldn't get from hw
+ */
+static int
+ice_dpll_frequency_get(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_device *dpll, void *dpll_priv,
+ u64 *frequency, struct netlink_ext_ack *extack,
+ enum ice_dpll_pin_type pin_type)
+{
+ struct ice_dpll_pin *p = pin_priv;
+ struct ice_dpll *d = dpll_priv;
+ struct ice_pf *pf = d->pf;
+
+ mutex_lock(&pf->dplls.lock);
+ *frequency = p->freq;
+ mutex_unlock(&pf->dplls.lock);
+
+ return 0;
+}
+
+/**
+ * ice_dpll_input_frequency_get - input pin callback for get frequency
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @dpll: pointer to dpll
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @frequency: on success holds pin's frequency
+ * @extack: error reporting
+ *
+ * Wraps internal get frequency command of a input pin.
+ *
+ * Context: Calls a function which acquires pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - error pin not found or couldn't get from hw
+ */
+static int
+ice_dpll_input_frequency_get(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_device *dpll, void *dpll_priv,
+ u64 *frequency, struct netlink_ext_ack *extack)
+{
+ return ice_dpll_frequency_get(pin, pin_priv, dpll, dpll_priv, frequency,
+ extack, ICE_DPLL_PIN_TYPE_INPUT);
+}
+
+/**
+ * ice_dpll_output_frequency_get - output pin callback for get frequency
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @dpll: pointer to dpll
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @frequency: on success holds pin's frequency
+ * @extack: error reporting
+ *
+ * Wraps internal get frequency command of a pin.
+ *
+ * Context: Calls a function which acquires pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - error pin not found or couldn't get from hw
+ */
+static int
+ice_dpll_output_frequency_get(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_device *dpll, void *dpll_priv,
+ u64 *frequency, struct netlink_ext_ack *extack)
+{
+ return ice_dpll_frequency_get(pin, pin_priv, dpll, dpll_priv, frequency,
+ extack, ICE_DPLL_PIN_TYPE_OUTPUT);
+}
+
+/**
+ * ice_dpll_pin_enable - enable a pin on dplls
+ * @hw: board private hw structure
+ * @pin: pointer to a pin
+ * @pin_type: type of pin being enabled
+ * @extack: error reporting
+ *
+ * Enable a pin on both dplls. Store current state in pin->flags.
+ *
+ * Context: Called under pf->dplls.lock
+ * Return:
+ * * 0 - OK
+ * * negative - error
+ */
+static int
+ice_dpll_pin_enable(struct ice_hw *hw, struct ice_dpll_pin *pin,
+ enum ice_dpll_pin_type pin_type,
+ struct netlink_ext_ack *extack)
+{
+ u8 flags = 0;
+ int ret;
+
+ switch (pin_type) {
+ case ICE_DPLL_PIN_TYPE_INPUT:
+ if (pin->flags[0] & ICE_AQC_GET_CGU_IN_CFG_FLG2_ESYNC_EN)
+ flags |= ICE_AQC_SET_CGU_IN_CFG_FLG2_ESYNC_EN;
+ flags |= ICE_AQC_SET_CGU_IN_CFG_FLG2_INPUT_EN;
+ ret = ice_aq_set_input_pin_cfg(hw, pin->idx, 0, flags, 0, 0);
+ break;
+ case ICE_DPLL_PIN_TYPE_OUTPUT:
+ if (pin->flags[0] & ICE_AQC_GET_CGU_OUT_CFG_ESYNC_EN)
+ flags |= ICE_AQC_SET_CGU_OUT_CFG_ESYNC_EN;
+ flags |= ICE_AQC_SET_CGU_OUT_CFG_OUT_EN;
+ ret = ice_aq_set_output_pin_cfg(hw, pin->idx, flags, 0, 0, 0);
+ break;
+ default:
+ return -EINVAL;
+ }
+ if (ret)
+ NL_SET_ERR_MSG_FMT(extack,
+ "err:%d %s failed to enable %s pin:%u\n",
+ ret, ice_aq_str(hw->adminq.sq_last_status),
+ pin_type_name[pin_type], pin->idx);
+
+ return ret;
+}
+
+/**
+ * ice_dpll_pin_disable - disable a pin on dplls
+ * @hw: board private hw structure
+ * @pin: pointer to a pin
+ * @pin_type: type of pin being disabled
+ * @extack: error reporting
+ *
+ * Disable a pin on both dplls. Store current state in pin->flags.
+ *
+ * Context: Called under pf->dplls.lock
+ * Return:
+ * * 0 - OK
+ * * negative - error
+ */
+static int
+ice_dpll_pin_disable(struct ice_hw *hw, struct ice_dpll_pin *pin,
+ enum ice_dpll_pin_type pin_type,
+ struct netlink_ext_ack *extack)
+{
+ u8 flags = 0;
+ int ret;
+
+ switch (pin_type) {
+ case ICE_DPLL_PIN_TYPE_INPUT:
+ if (pin->flags[0] & ICE_AQC_GET_CGU_IN_CFG_FLG2_ESYNC_EN)
+ flags |= ICE_AQC_SET_CGU_IN_CFG_FLG2_ESYNC_EN;
+ ret = ice_aq_set_input_pin_cfg(hw, pin->idx, 0, flags, 0, 0);
+ break;
+ case ICE_DPLL_PIN_TYPE_OUTPUT:
+ if (pin->flags[0] & ICE_AQC_GET_CGU_OUT_CFG_ESYNC_EN)
+ flags |= ICE_AQC_SET_CGU_OUT_CFG_ESYNC_EN;
+ ret = ice_aq_set_output_pin_cfg(hw, pin->idx, flags, 0, 0, 0);
+ break;
+ default:
+ return -EINVAL;
+ }
+ if (ret)
+ NL_SET_ERR_MSG_FMT(extack,
+ "err:%d %s failed to disable %s pin:%u\n",
+ ret, ice_aq_str(hw->adminq.sq_last_status),
+ pin_type_name[pin_type], pin->idx);
+
+ return ret;
+}
+
+/**
+ * ice_dpll_pin_state_update - update pin's state
+ * @pf: private board struct
+ * @pin: structure with pin attributes to be updated
+ * @pin_type: type of pin being updated
+ * @extack: error reporting
+ *
+ * Determine pin current state and frequency, then update struct
+ * holding the pin info. For input pin states are separated for each
+ * dpll, for rclk pins states are separated for each parent.
+ *
+ * Context: Called under pf->dplls.lock
+ * Return:
+ * * 0 - OK
+ * * negative - error
+ */
+static int
+ice_dpll_pin_state_update(struct ice_pf *pf, struct ice_dpll_pin *pin,
+ enum ice_dpll_pin_type pin_type,
+ struct netlink_ext_ack *extack)
+{
+ u8 parent, port_num = ICE_AQC_SET_PHY_REC_CLK_OUT_CURR_PORT;
+ int ret;
+
+ switch (pin_type) {
+ case ICE_DPLL_PIN_TYPE_INPUT:
+ ret = ice_aq_get_input_pin_cfg(&pf->hw, pin->idx, NULL, NULL,
+ NULL, &pin->flags[0],
+ &pin->freq, NULL);
+ if (ret)
+ goto err;
+ if (ICE_AQC_GET_CGU_IN_CFG_FLG2_INPUT_EN & pin->flags[0]) {
+ if (pin->pin) {
+ pin->state[pf->dplls.eec.dpll_idx] =
+ pin->pin == pf->dplls.eec.active_input ?
+ DPLL_PIN_STATE_CONNECTED :
+ DPLL_PIN_STATE_SELECTABLE;
+ pin->state[pf->dplls.pps.dpll_idx] =
+ pin->pin == pf->dplls.pps.active_input ?
+ DPLL_PIN_STATE_CONNECTED :
+ DPLL_PIN_STATE_SELECTABLE;
+ } else {
+ pin->state[pf->dplls.eec.dpll_idx] =
+ DPLL_PIN_STATE_SELECTABLE;
+ pin->state[pf->dplls.pps.dpll_idx] =
+ DPLL_PIN_STATE_SELECTABLE;
+ }
+ } else {
+ pin->state[pf->dplls.eec.dpll_idx] =
+ DPLL_PIN_STATE_DISCONNECTED;
+ pin->state[pf->dplls.pps.dpll_idx] =
+ DPLL_PIN_STATE_DISCONNECTED;
+ }
+ break;
+ case ICE_DPLL_PIN_TYPE_OUTPUT:
+ ret = ice_aq_get_output_pin_cfg(&pf->hw, pin->idx,
+ &pin->flags[0], NULL,
+ &pin->freq, NULL);
+ if (ret)
+ goto err;
+ if (ICE_AQC_SET_CGU_OUT_CFG_OUT_EN & pin->flags[0])
+ pin->state[0] = DPLL_PIN_STATE_CONNECTED;
+ else
+ pin->state[0] = DPLL_PIN_STATE_DISCONNECTED;
+ break;
+ case ICE_DPLL_PIN_TYPE_RCLK_INPUT:
+ for (parent = 0; parent < pf->dplls.rclk.num_parents;
+ parent++) {
+ u8 p = parent;
+
+ ret = ice_aq_get_phy_rec_clk_out(&pf->hw, &p,
+ &port_num,
+ &pin->flags[parent],
+ NULL);
+ if (ret)
+ goto err;
+ if (ICE_AQC_GET_PHY_REC_CLK_OUT_OUT_EN &
+ pin->flags[parent])
+ pin->state[parent] = DPLL_PIN_STATE_CONNECTED;
+ else
+ pin->state[parent] =
+ DPLL_PIN_STATE_DISCONNECTED;
+ }
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+err:
+ if (extack)
+ NL_SET_ERR_MSG_FMT(extack,
+ "err:%d %s failed to update %s pin:%u\n",
+ ret,
+ ice_aq_str(pf->hw.adminq.sq_last_status),
+ pin_type_name[pin_type], pin->idx);
+ else
+ dev_err_ratelimited(ice_pf_to_dev(pf),
+ "err:%d %s failed to update %s pin:%u\n",
+ ret,
+ ice_aq_str(pf->hw.adminq.sq_last_status),
+ pin_type_name[pin_type], pin->idx);
+ return ret;
+}
+
+/**
+ * ice_dpll_hw_input_prio_set - set input priority value in hardware
+ * @pf: board private structure
+ * @dpll: ice dpll pointer
+ * @pin: ice pin pointer
+ * @prio: priority value being set on a dpll
+ * @extack: error reporting
+ *
+ * Internal wrapper for setting the priority in the hardware.
+ *
+ * Context: Called under pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - failure
+ */
+static int
+ice_dpll_hw_input_prio_set(struct ice_pf *pf, struct ice_dpll *dpll,
+ struct ice_dpll_pin *pin, const u32 prio,
+ struct netlink_ext_ack *extack)
+{
+ int ret;
+
+ ret = ice_aq_set_cgu_ref_prio(&pf->hw, dpll->dpll_idx, pin->idx,
+ (u8)prio);
+ if (ret)
+ NL_SET_ERR_MSG_FMT(extack,
+ "err:%d %s failed to set pin prio:%u on pin:%u\n",
+ ret,
+ ice_aq_str(pf->hw.adminq.sq_last_status),
+ prio, pin->idx);
+ else
+ dpll->input_prio[pin->idx] = prio;
+
+ return ret;
+}
+
+/**
+ * ice_dpll_lock_status_get - get dpll lock status callback
+ * @dpll: registered dpll pointer
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @status: on success holds dpll's lock status
+ * @extack: error reporting
+ *
+ * Dpll subsystem callback, provides dpll's lock status.
+ *
+ * Context: Acquires pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - failure
+ */
+static int
+ice_dpll_lock_status_get(const struct dpll_device *dpll, void *dpll_priv,
+ enum dpll_lock_status *status,
+ struct netlink_ext_ack *extack)
+{
+ struct ice_dpll *d = dpll_priv;
+ struct ice_pf *pf = d->pf;
+
+ mutex_lock(&pf->dplls.lock);
+ *status = d->dpll_state;
+ mutex_unlock(&pf->dplls.lock);
+
+ return 0;
+}
+
+/**
+ * ice_dpll_mode_supported - check if dpll's working mode is supported
+ * @dpll: registered dpll pointer
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @mode: mode to be checked for support
+ * @extack: error reporting
+ *
+ * Dpll subsystem callback. Provides information if working mode is supported
+ * by dpll.
+ *
+ * Return:
+ * * true - mode is supported
+ * * false - mode is not supported
+ */
+static bool ice_dpll_mode_supported(const struct dpll_device *dpll,
+ void *dpll_priv,
+ enum dpll_mode mode,
+ struct netlink_ext_ack *extack)
+{
+ if (mode == DPLL_MODE_AUTOMATIC)
+ return true;
+
+ return false;
+}
+
+/**
+ * ice_dpll_mode_get - get dpll's working mode
+ * @dpll: registered dpll pointer
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @mode: on success holds current working mode of dpll
+ * @extack: error reporting
+ *
+ * Dpll subsystem callback. Provides working mode of dpll.
+ *
+ * Context: Acquires pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - failure
+ */
+static int ice_dpll_mode_get(const struct dpll_device *dpll, void *dpll_priv,
+ enum dpll_mode *mode,
+ struct netlink_ext_ack *extack)
+{
+ struct ice_dpll *d = dpll_priv;
+ struct ice_pf *pf = d->pf;
+
+ mutex_lock(&pf->dplls.lock);
+ *mode = d->mode;
+ mutex_unlock(&pf->dplls.lock);
+
+ return 0;
+}
+
+/**
+ * ice_dpll_pin_state_set - set pin's state on dpll
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @dpll: registered dpll pointer
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @enable: if pin shalll be enabled
+ * @extack: error reporting
+ * @pin_type: type of a pin
+ *
+ * Set pin state on a pin.
+ *
+ * Context: Acquires pf->dplls.lock
+ * Return:
+ * * 0 - OK or no change required
+ * * negative - error
+ */
+static int
+ice_dpll_pin_state_set(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_device *dpll, void *dpll_priv,
+ bool enable, struct netlink_ext_ack *extack,
+ enum ice_dpll_pin_type pin_type)
+{
+ struct ice_dpll_pin *p = pin_priv;
+ struct ice_dpll *d = dpll_priv;
+ struct ice_pf *pf = d->pf;
+ int ret;
+
+ mutex_lock(&pf->dplls.lock);
+ if (enable)
+ ret = ice_dpll_pin_enable(&pf->hw, p, pin_type, extack);
+ else
+ ret = ice_dpll_pin_disable(&pf->hw, p, pin_type, extack);
+ if (!ret)
+ ret = ice_dpll_pin_state_update(pf, p, pin_type, extack);
+ mutex_unlock(&pf->dplls.lock);
+
+ return ret;
+}
+
+/**
+ * ice_dpll_output_state_set - enable/disable output pin on dpll device
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @dpll: dpll being configured
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @state: state of pin to be set
+ * @extack: error reporting
+ *
+ * Dpll subsystem callback. Set given state on output type pin.
+ *
+ * Context: Calls a function which acquires pf->dplls.lock
+ * Return:
+ * * 0 - successfully enabled mode
+ * * negative - failed to enable mode
+ */
+static int
+ice_dpll_output_state_set(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_device *dpll, void *dpll_priv,
+ enum dpll_pin_state state,
+ struct netlink_ext_ack *extack)
+{
+ bool enable = state == DPLL_PIN_STATE_CONNECTED;
+
+ return ice_dpll_pin_state_set(pin, pin_priv, dpll, dpll_priv, enable,
+ extack, ICE_DPLL_PIN_TYPE_OUTPUT);
+}
+
+/**
+ * ice_dpll_input_state_set - enable/disable input pin on dpll levice
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @dpll: dpll being configured
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @state: state of pin to be set
+ * @extack: error reporting
+ *
+ * Dpll subsystem callback. Enables given mode on input type pin.
+ *
+ * Context: Calls a function which acquires pf->dplls.lock
+ * Return:
+ * * 0 - successfully enabled mode
+ * * negative - failed to enable mode
+ */
+static int
+ice_dpll_input_state_set(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_device *dpll, void *dpll_priv,
+ enum dpll_pin_state state,
+ struct netlink_ext_ack *extack)
+{
+ bool enable = state == DPLL_PIN_STATE_SELECTABLE;
+
+ return ice_dpll_pin_state_set(pin, pin_priv, dpll, dpll_priv, enable,
+ extack, ICE_DPLL_PIN_TYPE_INPUT);
+}
+
+/**
+ * ice_dpll_pin_state_get - set pin's state on dpll
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @dpll: registered dpll pointer
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @state: on success holds state of the pin
+ * @extack: error reporting
+ * @pin_type: type of questioned pin
+ *
+ * Determine pin state set it on a pin.
+ *
+ * Context: Acquires pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - failed to get state
+ */
+static int
+ice_dpll_pin_state_get(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_device *dpll, void *dpll_priv,
+ enum dpll_pin_state *state,
+ struct netlink_ext_ack *extack,
+ enum ice_dpll_pin_type pin_type)
+{
+ struct ice_dpll_pin *p = pin_priv;
+ struct ice_dpll *d = dpll_priv;
+ struct ice_pf *pf = d->pf;
+ int ret;
+
+ mutex_lock(&pf->dplls.lock);
+ ret = ice_dpll_pin_state_update(pf, p, pin_type, extack);
+ if (ret)
+ goto unlock;
+ if (pin_type == ICE_DPLL_PIN_TYPE_INPUT)
+ *state = p->state[d->dpll_idx];
+ else if (pin_type == ICE_DPLL_PIN_TYPE_OUTPUT)
+ *state = p->state[0];
+ ret = 0;
+unlock:
+ mutex_unlock(&pf->dplls.lock);
+
+ return ret;
+}
+
+/**
+ * ice_dpll_output_state_get - get output pin state on dpll device
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @dpll: registered dpll pointer
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @state: on success holds state of the pin
+ * @extack: error reporting
+ *
+ * Dpll subsystem callback. Check state of a pin.
+ *
+ * Context: Calls a function which acquires pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - failed to get state
+ */
+static int
+ice_dpll_output_state_get(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_device *dpll, void *dpll_priv,
+ enum dpll_pin_state *state,
+ struct netlink_ext_ack *extack)
+{
+ return ice_dpll_pin_state_get(pin, pin_priv, dpll, dpll_priv, state,
+ extack, ICE_DPLL_PIN_TYPE_OUTPUT);
+}
+
+/**
+ * ice_dpll_input_state_get - get input pin state on dpll device
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @dpll: registered dpll pointer
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @state: on success holds state of the pin
+ * @extack: error reporting
+ *
+ * Dpll subsystem callback. Check state of a input pin.
+ *
+ * Context: Calls a function which acquires pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - failed to get state
+ */
+static int
+ice_dpll_input_state_get(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_device *dpll, void *dpll_priv,
+ enum dpll_pin_state *state,
+ struct netlink_ext_ack *extack)
+{
+ return ice_dpll_pin_state_get(pin, pin_priv, dpll, dpll_priv, state,
+ extack, ICE_DPLL_PIN_TYPE_INPUT);
+}
+
+/**
+ * ice_dpll_input_prio_get - get dpll's input prio
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @dpll: registered dpll pointer
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @prio: on success - returns input priority on dpll
+ * @extack: error reporting
+ *
+ * Dpll subsystem callback. Handler for getting priority of a input pin.
+ *
+ * Context: Acquires pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - failure
+ */
+static int
+ice_dpll_input_prio_get(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_device *dpll, void *dpll_priv,
+ u32 *prio, struct netlink_ext_ack *extack)
+{
+ struct ice_dpll_pin *p = pin_priv;
+ struct ice_dpll *d = dpll_priv;
+ struct ice_pf *pf = d->pf;
+
+ mutex_lock(&pf->dplls.lock);
+ *prio = d->input_prio[p->idx];
+ mutex_unlock(&pf->dplls.lock);
+
+ return 0;
+}
+
+/**
+ * ice_dpll_input_prio_set - set dpll input prio
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @dpll: registered dpll pointer
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @prio: input priority to be set on dpll
+ * @extack: error reporting
+ *
+ * Dpll subsystem callback. Handler for setting priority of a input pin.
+ *
+ * Context: Acquires pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - failure
+ */
+static int
+ice_dpll_input_prio_set(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_device *dpll, void *dpll_priv,
+ u32 prio, struct netlink_ext_ack *extack)
+{
+ struct ice_dpll_pin *p = pin_priv;
+ struct ice_dpll *d = dpll_priv;
+ struct ice_pf *pf = d->pf;
+ int ret;
+
+ if (prio > ICE_DPLL_PRIO_MAX) {
+ NL_SET_ERR_MSG_FMT(extack, "prio out of supported range 0-%d",
+ ICE_DPLL_PRIO_MAX);
+ return -EINVAL;
+ }
+
+ mutex_lock(&pf->dplls.lock);
+ ret = ice_dpll_hw_input_prio_set(pf, d, p, prio, extack);
+ mutex_unlock(&pf->dplls.lock);
+
+ return ret;
+}
+
+/**
+ * ice_dpll_input_direction - callback for get input pin direction
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @dpll: registered dpll pointer
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @direction: holds input pin direction
+ * @extack: error reporting
+ *
+ * Dpll subsystem callback. Handler for getting direction of a input pin.
+ *
+ * Return:
+ * * 0 - success
+ */
+static int
+ice_dpll_input_direction(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_device *dpll, void *dpll_priv,
+ enum dpll_pin_direction *direction,
+ struct netlink_ext_ack *extack)
+{
+ *direction = DPLL_PIN_DIRECTION_INPUT;
+
+ return 0;
+}
+
+/**
+ * ice_dpll_output_direction - callback for get output pin direction
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @dpll: registered dpll pointer
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @direction: holds output pin direction
+ * @extack: error reporting
+ *
+ * Dpll subsystem callback. Handler for getting direction of an output pin.
+ *
+ * Return:
+ * * 0 - success
+ */
+static int
+ice_dpll_output_direction(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_device *dpll, void *dpll_priv,
+ enum dpll_pin_direction *direction,
+ struct netlink_ext_ack *extack)
+{
+ *direction = DPLL_PIN_DIRECTION_OUTPUT;
+
+ return 0;
+}
+
+/**
+ * ice_dpll_rclk_state_on_pin_set - set a state on rclk pin
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @parent_pin: pin parent pointer
+ * @parent_pin_priv: parent private data pointer passed on pin registration
+ * @state: state to be set on pin
+ * @extack: error reporting
+ *
+ * Dpll subsystem callback, set a state of a rclk pin on a parent pin
+ *
+ * Context: Acquires pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - failure
+ */
+static int
+ice_dpll_rclk_state_on_pin_set(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_pin *parent_pin,
+ void *parent_pin_priv,
+ enum dpll_pin_state state,
+ struct netlink_ext_ack *extack)
+{
+ struct ice_dpll_pin *p = pin_priv, *parent = parent_pin_priv;
+ bool enable = state == DPLL_PIN_STATE_CONNECTED;
+ struct ice_pf *pf = p->pf;
+ int ret = -EINVAL;
+ u32 hw_idx;
+
+ mutex_lock(&pf->dplls.lock);
+ hw_idx = parent->idx - pf->dplls.base_rclk_idx;
+ if (hw_idx >= pf->dplls.num_inputs)
+ goto unlock;
+
+ if ((enable && p->state[hw_idx] == DPLL_PIN_STATE_CONNECTED) ||
+ (!enable && p->state[hw_idx] == DPLL_PIN_STATE_DISCONNECTED)) {
+ NL_SET_ERR_MSG_FMT(extack,
+ "pin:%u state:%u on parent:%u already set",
+ p->idx, state, parent->idx);
+ goto unlock;
+ }
+ ret = ice_aq_set_phy_rec_clk_out(&pf->hw, hw_idx, enable,
+ &p->freq);
+ if (ret)
+ NL_SET_ERR_MSG_FMT(extack,
+ "err:%d %s failed to set pin state:%u for pin:%u on parent:%u\n",
+ ret,
+ ice_aq_str(pf->hw.adminq.sq_last_status),
+ state, p->idx, parent->idx);
+unlock:
+ mutex_unlock(&pf->dplls.lock);
+
+ return ret;
+}
+
+/**
+ * ice_dpll_rclk_state_on_pin_get - get a state of rclk pin
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @parent_pin: pin parent pointer
+ * @parent_pin_priv: pin parent priv data pointer passed on pin registration
+ * @state: on success holds pin state on parent pin
+ * @extack: error reporting
+ *
+ * dpll subsystem callback, get a state of a recovered clock pin.
+ *
+ * Context: Acquires pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - failure
+ */
+static int
+ice_dpll_rclk_state_on_pin_get(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_pin *parent_pin,
+ void *parent_pin_priv,
+ enum dpll_pin_state *state,
+ struct netlink_ext_ack *extack)
+{
+ struct ice_dpll_pin *p = pin_priv, *parent = parent_pin_priv;
+ struct ice_pf *pf = p->pf;
+ int ret = -EINVAL;
+ u32 hw_idx;
+
+ mutex_lock(&pf->dplls.lock);
+ hw_idx = parent->idx - pf->dplls.base_rclk_idx;
+ if (hw_idx >= pf->dplls.num_inputs)
+ goto unlock;
+
+ ret = ice_dpll_pin_state_update(pf, p, ICE_DPLL_PIN_TYPE_RCLK_INPUT,
+ extack);
+ if (ret)
+ goto unlock;
+
+ *state = p->state[hw_idx];
+ ret = 0;
+unlock:
+ mutex_unlock(&pf->dplls.lock);
+
+ return ret;
+}
+
+static const struct dpll_pin_ops ice_dpll_rclk_ops = {
+ .state_on_pin_set = ice_dpll_rclk_state_on_pin_set,
+ .state_on_pin_get = ice_dpll_rclk_state_on_pin_get,
+ .direction_get = ice_dpll_input_direction,
+};
+
+static const struct dpll_pin_ops ice_dpll_input_ops = {
+ .frequency_get = ice_dpll_input_frequency_get,
+ .frequency_set = ice_dpll_input_frequency_set,
+ .state_on_dpll_get = ice_dpll_input_state_get,
+ .state_on_dpll_set = ice_dpll_input_state_set,
+ .prio_get = ice_dpll_input_prio_get,
+ .prio_set = ice_dpll_input_prio_set,
+ .direction_get = ice_dpll_input_direction,
+};
+
+static const struct dpll_pin_ops ice_dpll_output_ops = {
+ .frequency_get = ice_dpll_output_frequency_get,
+ .frequency_set = ice_dpll_output_frequency_set,
+ .state_on_dpll_get = ice_dpll_output_state_get,
+ .state_on_dpll_set = ice_dpll_output_state_set,
+ .direction_get = ice_dpll_output_direction,
+};
+
+static const struct dpll_device_ops ice_dpll_ops = {
+ .lock_status_get = ice_dpll_lock_status_get,
+ .mode_supported = ice_dpll_mode_supported,
+ .mode_get = ice_dpll_mode_get,
+};
+
+/**
+ * ice_generate_clock_id - generates unique clock_id for registering dpll.
+ * @pf: board private structure
+ *
+ * Generates unique (per board) clock_id for allocation and search of dpll
+ * devices in Linux dpll subsystem.
+ *
+ * Return: generated clock id for the board
+ */
+static u64 ice_generate_clock_id(struct ice_pf *pf)
+{
+ return pci_get_dsn(pf->pdev);
+}
+
+/**
+ * ice_dpll_notify_changes - notify dpll subsystem about changes
+ * @d: pointer do dpll
+ *
+ * Once change detected appropriate event is submitted to the dpll subsystem.
+ */
+static void ice_dpll_notify_changes(struct ice_dpll *d)
+{
+ if (d->prev_dpll_state != d->dpll_state) {
+ d->prev_dpll_state = d->dpll_state;
+ dpll_device_change_ntf(d->dpll);
+ }
+ if (d->prev_input != d->active_input) {
+ if (d->prev_input)
+ dpll_pin_change_ntf(d->prev_input);
+ d->prev_input = d->active_input;
+ if (d->active_input)
+ dpll_pin_change_ntf(d->active_input);
+ }
+}
+
+/**
+ * ice_dpll_update_state - update dpll state
+ * @pf: pf private structure
+ * @d: pointer to queried dpll device
+ * @init: if function called on initialization of ice dpll
+ *
+ * Poll current state of dpll from hw and update ice_dpll struct.
+ *
+ * Context: Called by kworker under pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - AQ failure
+ */
+static int
+ice_dpll_update_state(struct ice_pf *pf, struct ice_dpll *d, bool init)
+{
+ struct ice_dpll_pin *p = NULL;
+ int ret;
+
+ ret = ice_get_cgu_state(&pf->hw, d->dpll_idx, d->prev_dpll_state,
+ &d->input_idx, &d->ref_state, &d->eec_mode,
+ &d->phase_shift, &d->dpll_state);
+
+ dev_dbg(ice_pf_to_dev(pf),
+ "update dpll=%d, prev_src_idx:%u, src_idx:%u, state:%d, prev:%d mode:%d\n",
+ d->dpll_idx, d->prev_input_idx, d->input_idx,
+ d->dpll_state, d->prev_dpll_state, d->mode);
+ if (ret) {
+ dev_err(ice_pf_to_dev(pf),
+ "update dpll=%d state failed, ret=%d %s\n",
+ d->dpll_idx, ret,
+ ice_aq_str(pf->hw.adminq.sq_last_status));
+ return ret;
+ }
+ if (init) {
+ if (d->dpll_state == DPLL_LOCK_STATUS_LOCKED ||
+ d->dpll_state == DPLL_LOCK_STATUS_LOCKED_HO_ACQ)
+ d->active_input = pf->dplls.inputs[d->input_idx].pin;
+ p = &pf->dplls.inputs[d->input_idx];
+ return ice_dpll_pin_state_update(pf, p,
+ ICE_DPLL_PIN_TYPE_INPUT, NULL);
+ }
+ if (d->dpll_state == DPLL_LOCK_STATUS_HOLDOVER ||
+ d->dpll_state == DPLL_LOCK_STATUS_UNLOCKED) {
+ d->active_input = NULL;
+ if (d->input_idx != ICE_DPLL_PIN_IDX_INVALID)
+ p = &pf->dplls.inputs[d->input_idx];
+ d->prev_input_idx = ICE_DPLL_PIN_IDX_INVALID;
+ d->input_idx = ICE_DPLL_PIN_IDX_INVALID;
+ if (!p)
+ return 0;
+ ret = ice_dpll_pin_state_update(pf, p,
+ ICE_DPLL_PIN_TYPE_INPUT, NULL);
+ } else if (d->input_idx != d->prev_input_idx) {
+ if (d->prev_input_idx != ICE_DPLL_PIN_IDX_INVALID) {
+ p = &pf->dplls.inputs[d->prev_input_idx];
+ ice_dpll_pin_state_update(pf, p,
+ ICE_DPLL_PIN_TYPE_INPUT,
+ NULL);
+ }
+ if (d->input_idx != ICE_DPLL_PIN_IDX_INVALID) {
+ p = &pf->dplls.inputs[d->input_idx];
+ d->active_input = p->pin;
+ ice_dpll_pin_state_update(pf, p,
+ ICE_DPLL_PIN_TYPE_INPUT,
+ NULL);
+ }
+ d->prev_input_idx = d->input_idx;
+ }
+
+ return ret;
+}
+
+/**
+ * ice_dpll_periodic_work - DPLLs periodic worker
+ * @work: pointer to kthread_work structure
+ *
+ * DPLLs periodic worker is responsible for polling state of dpll.
+ * Context: Holds pf->dplls.lock
+ */
+static void ice_dpll_periodic_work(struct kthread_work *work)
+{
+ struct ice_dplls *d = container_of(work, struct ice_dplls, work.work);
+ struct ice_pf *pf = container_of(d, struct ice_pf, dplls);
+ struct ice_dpll *de = &pf->dplls.eec;
+ struct ice_dpll *dp = &pf->dplls.pps;
+ int ret;
+
+ mutex_lock(&pf->dplls.lock);
+ ret = ice_dpll_update_state(pf, de, false);
+ if (!ret)
+ ret = ice_dpll_update_state(pf, dp, false);
+ if (ret) {
+ d->cgu_state_acq_err_num++;
+ /* stop rescheduling this worker */
+ if (d->cgu_state_acq_err_num >
+ ICE_CGU_STATE_ACQ_ERR_THRESHOLD) {
+ dev_err(ice_pf_to_dev(pf),
+ "EEC/PPS DPLLs periodic work disabled\n");
+ mutex_unlock(&pf->dplls.lock);
+ return;
+ }
+ }
+ mutex_unlock(&pf->dplls.lock);
+ ice_dpll_notify_changes(de);
+ ice_dpll_notify_changes(dp);
+
+ /* Run twice a second or reschedule if update failed */
+ kthread_queue_delayed_work(d->kworker, &d->work,
+ ret ? msecs_to_jiffies(10) :
+ msecs_to_jiffies(500));
+}
+
+/**
+ * ice_dpll_release_pins - release pins resources from dpll subsystem
+ * @pins: pointer to pins array
+ * @count: number of pins
+ *
+ * Release resources of given pins array in the dpll subsystem.
+ */
+static void ice_dpll_release_pins(struct ice_dpll_pin *pins, int count)
+{
+ int i;
+
+ for (i = 0; i < count; i++)
+ dpll_pin_put(pins[i].pin);
+}
+
+/**
+ * ice_dpll_get_pins - get pins from dpll subsystem
+ * @pf: board private structure
+ * @pins: pointer to pins array
+ * @start_idx: get starts from this pin idx value
+ * @count: number of pins
+ * @clock_id: clock_id of dpll device
+ *
+ * Get pins - allocate - in dpll subsystem, store them in pin field of given
+ * pins array.
+ *
+ * Return:
+ * * 0 - success
+ * * negative - allocation failure reason
+ */
+static int
+ice_dpll_get_pins(struct ice_pf *pf, struct ice_dpll_pin *pins,
+ int start_idx, int count, u64 clock_id)
+{
+ int i, ret;
+
+ for (i = 0; i < count; i++) {
+ pins[i].pin = dpll_pin_get(clock_id, i + start_idx, THIS_MODULE,
+ &pins[i].prop);
+ if (IS_ERR(pins[i].pin)) {
+ ret = PTR_ERR(pins[i].pin);
+ goto release_pins;
+ }
+ }
+
+ return 0;
+
+release_pins:
+ while (--i >= 0)
+ dpll_pin_put(pins[i].pin);
+ return ret;
+}
+
+/**
+ * ice_dpll_unregister_pins - unregister pins from a dpll
+ * @dpll: dpll device pointer
+ * @pins: pointer to pins array
+ * @ops: callback ops registered with the pins
+ * @count: number of pins
+ *
+ * Unregister pins of a given array of pins from given dpll device registered in
+ * dpll subsystem.
+ */
+static void
+ice_dpll_unregister_pins(struct dpll_device *dpll, struct ice_dpll_pin *pins,
+ const struct dpll_pin_ops *ops, int count)
+{
+ int i;
+
+ for (i = 0; i < count; i++)
+ dpll_pin_unregister(dpll, pins[i].pin, ops, &pins[i]);
+}
+
+/**
+ * ice_dpll_register_pins - register pins with a dpll
+ * @dpll: dpll pointer to register pins with
+ * @pins: pointer to pins array
+ * @ops: callback ops registered with the pins
+ * @count: number of pins
+ *
+ * Register pins of a given array with given dpll in dpll subsystem.
+ *
+ * Return:
+ * * 0 - success
+ * * negative - registration failure reason
+ */
+static int
+ice_dpll_register_pins(struct dpll_device *dpll, struct ice_dpll_pin *pins,
+ const struct dpll_pin_ops *ops, int count)
+{
+ int ret, i;
+
+ for (i = 0; i < count; i++) {
+ ret = dpll_pin_register(dpll, pins[i].pin, ops, &pins[i]);
+ if (ret)
+ goto unregister_pins;
+ }
+
+ return 0;
+
+unregister_pins:
+ while (--i >= 0)
+ dpll_pin_unregister(dpll, pins[i].pin, ops, &pins[i]);
+ return ret;
+}
+
+/**
+ * ice_dpll_deinit_direct_pins - deinitialize direct pins
+ * @cgu: if cgu is present and controlled by this NIC
+ * @pins: pointer to pins array
+ * @count: number of pins
+ * @ops: callback ops registered with the pins
+ * @first: dpll device pointer
+ * @second: dpll device pointer
+ *
+ * If cgu is owned unregister pins from given dplls.
+ * Release pins resources to the dpll subsystem.
+ */
+static void
+ice_dpll_deinit_direct_pins(bool cgu, struct ice_dpll_pin *pins, int count,
+ const struct dpll_pin_ops *ops,
+ struct dpll_device *first,
+ struct dpll_device *second)
+{
+ if (cgu) {
+ ice_dpll_unregister_pins(first, pins, ops, count);
+ ice_dpll_unregister_pins(second, pins, ops, count);
+ }
+ ice_dpll_release_pins(pins, count);
+}
+
+/**
+ * ice_dpll_init_direct_pins - initialize direct pins
+ * @pf: board private structure
+ * @cgu: if cgu is present and controlled by this NIC
+ * @pins: pointer to pins array
+ * @start_idx: on which index shall allocation start in dpll subsystem
+ * @count: number of pins
+ * @ops: callback ops registered with the pins
+ * @first: dpll device pointer
+ * @second: dpll device pointer
+ *
+ * Allocate directly connected pins of a given array in dpll subsystem.
+ * If cgu is owned register allocated pins with given dplls.
+ *
+ * Return:
+ * * 0 - success
+ * * negative - registration failure reason
+ */
+static int
+ice_dpll_init_direct_pins(struct ice_pf *pf, bool cgu,
+ struct ice_dpll_pin *pins, int start_idx, int count,
+ const struct dpll_pin_ops *ops,
+ struct dpll_device *first, struct dpll_device *second)
+{
+ int ret;
+
+ ret = ice_dpll_get_pins(pf, pins, start_idx, count, pf->dplls.clock_id);
+ if (ret)
+ return ret;
+ if (cgu) {
+ ret = ice_dpll_register_pins(first, pins, ops, count);
+ if (ret)
+ goto release_pins;
+ ret = ice_dpll_register_pins(second, pins, ops, count);
+ if (ret)
+ goto unregister_first;
+ }
+
+ return 0;
+
+unregister_first:
+ ice_dpll_unregister_pins(first, pins, ops, count);
+release_pins:
+ ice_dpll_release_pins(pins, count);
+ return ret;
+}
+
+/**
+ * ice_dpll_deinit_rclk_pin - release rclk pin resources
+ * @pf: board private structure
+ *
+ * Deregister rclk pin from parent pins and release resources in dpll subsystem.
+ */
+static void ice_dpll_deinit_rclk_pin(struct ice_pf *pf)
+{
+ struct ice_dpll_pin *rclk = &pf->dplls.rclk;
+ struct ice_vsi *vsi = ice_get_main_vsi(pf);
+ struct dpll_pin *parent;
+ int i;
+
+ for (i = 0; i < rclk->num_parents; i++) {
+ parent = pf->dplls.inputs[rclk->parent_idx[i]].pin;
+ if (!parent)
+ continue;
+ dpll_pin_on_pin_unregister(parent, rclk->pin,
+ &ice_dpll_rclk_ops, rclk);
+ }
+ if (WARN_ON_ONCE(!vsi || !vsi->netdev))
+ return;
+ netdev_dpll_pin_clear(vsi->netdev);
+ dpll_pin_put(rclk->pin);
+}
+
+/**
+ * ice_dpll_init_rclk_pins - initialize recovered clock pin
+ * @pf: board private structure
+ * @pin: pin to register
+ * @start_idx: on which index shall allocation start in dpll subsystem
+ * @ops: callback ops registered with the pins
+ *
+ * Allocate resource for recovered clock pin in dpll subsystem. Register the
+ * pin with the parents it has in the info. Register pin with the pf's main vsi
+ * netdev.
+ *
+ * Return:
+ * * 0 - success
+ * * negative - registration failure reason
+ */
+static int
+ice_dpll_init_rclk_pins(struct ice_pf *pf, struct ice_dpll_pin *pin,
+ int start_idx, const struct dpll_pin_ops *ops)
+{
+ struct ice_vsi *vsi = ice_get_main_vsi(pf);
+ struct dpll_pin *parent;
+ int ret, i;
+
+ ret = ice_dpll_get_pins(pf, pin, start_idx, ICE_DPLL_RCLK_NUM_PER_PF,
+ pf->dplls.clock_id);
+ if (ret)
+ return ret;
+ for (i = 0; i < pf->dplls.rclk.num_parents; i++) {
+ parent = pf->dplls.inputs[pf->dplls.rclk.parent_idx[i]].pin;
+ if (!parent) {
+ ret = -ENODEV;
+ goto unregister_pins;
+ }
+ ret = dpll_pin_on_pin_register(parent, pf->dplls.rclk.pin,
+ ops, &pf->dplls.rclk);
+ if (ret)
+ goto unregister_pins;
+ }
+ if (WARN_ON((!vsi || !vsi->netdev)))
+ return -EINVAL;
+ netdev_dpll_pin_set(vsi->netdev, pf->dplls.rclk.pin);
+
+ return 0;
+
+unregister_pins:
+ while (i) {
+ parent = pf->dplls.inputs[pf->dplls.rclk.parent_idx[--i]].pin;
+ dpll_pin_on_pin_unregister(parent, pf->dplls.rclk.pin,
+ &ice_dpll_rclk_ops, &pf->dplls.rclk);
+ }
+ ice_dpll_release_pins(pin, ICE_DPLL_RCLK_NUM_PER_PF);
+ return ret;
+}
+
+/**
+ * ice_dpll_deinit_pins - deinitialize direct pins
+ * @pf: board private structure
+ * @cgu: if cgu is controlled by this pf
+ *
+ * If cgu is owned unregister directly connected pins from the dplls.
+ * Release resources of directly connected pins from the dpll subsystem.
+ */
+static void ice_dpll_deinit_pins(struct ice_pf *pf, bool cgu)
+{
+ struct ice_dpll_pin *outputs = pf->dplls.outputs;
+ struct ice_dpll_pin *inputs = pf->dplls.inputs;
+ int num_outputs = pf->dplls.num_outputs;
+ int num_inputs = pf->dplls.num_inputs;
+ struct ice_dplls *d = &pf->dplls;
+ struct ice_dpll *de = &d->eec;
+ struct ice_dpll *dp = &d->pps;
+
+ ice_dpll_deinit_rclk_pin(pf);
+ if (cgu) {
+ ice_dpll_unregister_pins(dp->dpll, inputs, &ice_dpll_input_ops,
+ num_inputs);
+ ice_dpll_unregister_pins(de->dpll, inputs, &ice_dpll_input_ops,
+ num_inputs);
+ }
+ ice_dpll_release_pins(inputs, num_inputs);
+ if (cgu) {
+ ice_dpll_unregister_pins(dp->dpll, outputs,
+ &ice_dpll_output_ops, num_outputs);
+ ice_dpll_unregister_pins(de->dpll, outputs,
+ &ice_dpll_output_ops, num_outputs);
+ ice_dpll_release_pins(outputs, num_outputs);
+ }
+}
+
+/**
+ * ice_dpll_init_pins - init pins and register pins with a dplls
+ * @pf: board private structure
+ * @cgu: if cgu is present and controlled by this NIC
+ *
+ * Initialize directly connected pf's pins within pf's dplls in a Linux dpll
+ * subsystem.
+ *
+ * Return:
+ * * 0 - success
+ * * negative - initialization failure reason
+ */
+static int ice_dpll_init_pins(struct ice_pf *pf, bool cgu)
+{
+ u32 rclk_idx;
+ int ret;
+
+ ret = ice_dpll_init_direct_pins(pf, cgu, pf->dplls.inputs, 0,
+ pf->dplls.num_inputs,
+ &ice_dpll_input_ops,
+ pf->dplls.eec.dpll, pf->dplls.pps.dpll);
+ if (ret)
+ return ret;
+ if (cgu) {
+ ret = ice_dpll_init_direct_pins(pf, cgu, pf->dplls.outputs,
+ pf->dplls.num_inputs,
+ pf->dplls.num_outputs,
+ &ice_dpll_output_ops,
+ pf->dplls.eec.dpll,
+ pf->dplls.pps.dpll);
+ if (ret)
+ goto deinit_inputs;
+ }
+ rclk_idx = pf->dplls.num_inputs + pf->dplls.num_outputs + pf->hw.pf_id;
+ ret = ice_dpll_init_rclk_pins(pf, &pf->dplls.rclk, rclk_idx,
+ &ice_dpll_rclk_ops);
+ if (ret)
+ goto deinit_outputs;
+
+ return 0;
+deinit_outputs:
+ ice_dpll_deinit_direct_pins(cgu, pf->dplls.outputs,
+ pf->dplls.num_outputs,
+ &ice_dpll_output_ops, pf->dplls.pps.dpll,
+ pf->dplls.eec.dpll);
+deinit_inputs:
+ ice_dpll_deinit_direct_pins(cgu, pf->dplls.inputs, pf->dplls.num_inputs,
+ &ice_dpll_input_ops, pf->dplls.pps.dpll,
+ pf->dplls.eec.dpll);
+ return ret;
+}
+
+/**
+ * ice_dpll_deinit_dpll - deinitialize dpll device
+ * @pf: board private structure
+ * @d: pointer to ice_dpll
+ * @cgu: if cgu is present and controlled by this NIC
+ *
+ * If cgu is owned unregister the dpll from dpll subsystem.
+ * Release resources of dpll device from dpll subsystem.
+ */
+static void
+ice_dpll_deinit_dpll(struct ice_pf *pf, struct ice_dpll *d, bool cgu)
+{
+ if (cgu)
+ dpll_device_unregister(d->dpll, &ice_dpll_ops, d);
+ dpll_device_put(d->dpll);
+}
+
+/**
+ * ice_dpll_init_dpll - initialize dpll device in dpll subsystem
+ * @pf: board private structure
+ * @d: dpll to be initialized
+ * @cgu: if cgu is present and controlled by this NIC
+ * @type: type of dpll being initialized
+ *
+ * Allocate dpll instance for this board in dpll subsystem, if cgu is controlled
+ * by this NIC, register dpll with the callback ops.
+ *
+ * Return:
+ * * 0 - success
+ * * negative - initialization failure reason
+ */
+static int
+ice_dpll_init_dpll(struct ice_pf *pf, struct ice_dpll *d, bool cgu,
+ enum dpll_type type)
+{
+ u64 clock_id = pf->dplls.clock_id;
+ int ret;
+
+ d->dpll = dpll_device_get(clock_id, d->dpll_idx, THIS_MODULE);
+ if (IS_ERR(d->dpll)) {
+ ret = PTR_ERR(d->dpll);
+ dev_err(ice_pf_to_dev(pf),
+ "dpll_device_get failed (%p) err=%d\n", d, ret);
+ return ret;
+ }
+ d->pf = pf;
+ if (cgu) {
+ ret = dpll_device_register(d->dpll, type, &ice_dpll_ops, d);
+ if (ret) {
+ dpll_device_put(d->dpll);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * ice_dpll_deinit_worker - deinitialize dpll kworker
+ * @pf: board private structure
+ *
+ * Stop dpll's kworker, release it's resources.
+ */
+static void ice_dpll_deinit_worker(struct ice_pf *pf)
+{
+ struct ice_dplls *d = &pf->dplls;
+
+ kthread_cancel_delayed_work_sync(&d->work);
+ kthread_destroy_worker(d->kworker);
+}
+
+/**
+ * ice_dpll_init_worker - Initialize DPLLs periodic worker
+ * @pf: board private structure
+ *
+ * Create and start DPLLs periodic worker.
+ *
+ * Context: Shall be called after pf->dplls.lock is initialized.
+ * Return:
+ * * 0 - success
+ * * negative - create worker failure
+ */
+static int ice_dpll_init_worker(struct ice_pf *pf)
+{
+ struct ice_dplls *d = &pf->dplls;
+ struct kthread_worker *kworker;
+
+ ice_dpll_update_state(pf, &d->eec, true);
+ ice_dpll_update_state(pf, &d->pps, true);
+ kthread_init_delayed_work(&d->work, ice_dpll_periodic_work);
+ kworker = kthread_create_worker(0, "ice-dplls-%s",
+ dev_name(ice_pf_to_dev(pf)));
+ if (IS_ERR(kworker))
+ return PTR_ERR(kworker);
+ d->kworker = kworker;
+ d->cgu_state_acq_err_num = 0;
+ kthread_queue_delayed_work(d->kworker, &d->work, 0);
+
+ return 0;
+}
+
+/**
+ * ice_dpll_init_info_direct_pins - initializes direct pins info
+ * @pf: board private structure
+ * @pin_type: type of pins being initialized
+ *
+ * Init information for directly connected pins, cache them in pf's pins
+ * structures.
+ *
+ * Return:
+ * * 0 - success
+ * * negative - init failure reason
+ */
+static int
+ice_dpll_init_info_direct_pins(struct ice_pf *pf,
+ enum ice_dpll_pin_type pin_type)
+{
+ struct ice_dpll *de = &pf->dplls.eec, *dp = &pf->dplls.pps;
+ int num_pins, i, ret = -EINVAL;
+ struct ice_hw *hw = &pf->hw;
+ struct ice_dpll_pin *pins;
+ u8 freq_supp_num;
+ bool input;
+
+ switch (pin_type) {
+ case ICE_DPLL_PIN_TYPE_INPUT:
+ pins = pf->dplls.inputs;
+ num_pins = pf->dplls.num_inputs;
+ input = true;
+ break;
+ case ICE_DPLL_PIN_TYPE_OUTPUT:
+ pins = pf->dplls.outputs;
+ num_pins = pf->dplls.num_outputs;
+ input = false;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ for (i = 0; i < num_pins; i++) {
+ pins[i].idx = i;
+ pins[i].prop.board_label = ice_cgu_get_pin_name(hw, i, input);
+ pins[i].prop.type = ice_cgu_get_pin_type(hw, i, input);
+ if (input) {
+ ret = ice_aq_get_cgu_ref_prio(hw, de->dpll_idx, i,
+ &de->input_prio[i]);
+ if (ret)
+ return ret;
+ ret = ice_aq_get_cgu_ref_prio(hw, dp->dpll_idx, i,
+ &dp->input_prio[i]);
+ if (ret)
+ return ret;
+ pins[i].prop.capabilities |=
+ DPLL_PIN_CAPABILITIES_PRIORITY_CAN_CHANGE;
+ }
+ pins[i].prop.capabilities |=
+ DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE;
+ ret = ice_dpll_pin_state_update(pf, &pins[i], pin_type, NULL);
+ if (ret)
+ return ret;
+ pins[i].prop.freq_supported =
+ ice_cgu_get_pin_freq_supp(hw, i, input, &freq_supp_num);
+ pins[i].prop.freq_supported_num = freq_supp_num;
+ pins[i].pf = pf;
+ }
+
+ return ret;
+}
+
+/**
+ * ice_dpll_init_info_rclk_pin - initializes rclk pin information
+ * @pf: board private structure
+ *
+ * Init information for rclk pin, cache them in pf->dplls.rclk.
+ *
+ * Return:
+ * * 0 - success
+ * * negative - init failure reason
+ */
+static int ice_dpll_init_info_rclk_pin(struct ice_pf *pf)
+{
+ struct ice_dpll_pin *pin = &pf->dplls.rclk;
+
+ pin->prop.type = DPLL_PIN_TYPE_SYNCE_ETH_PORT;
+ pin->prop.capabilities |= DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE;
+ pin->pf = pf;
+
+ return ice_dpll_pin_state_update(pf, pin,
+ ICE_DPLL_PIN_TYPE_RCLK_INPUT, NULL);
+}
+
+/**
+ * ice_dpll_init_pins_info - init pins info wrapper
+ * @pf: board private structure
+ * @pin_type: type of pins being initialized
+ *
+ * Wraps functions for pin initialization.
+ *
+ * Return:
+ * * 0 - success
+ * * negative - init failure reason
+ */
+static int
+ice_dpll_init_pins_info(struct ice_pf *pf, enum ice_dpll_pin_type pin_type)
+{
+ switch (pin_type) {
+ case ICE_DPLL_PIN_TYPE_INPUT:
+ case ICE_DPLL_PIN_TYPE_OUTPUT:
+ return ice_dpll_init_info_direct_pins(pf, pin_type);
+ case ICE_DPLL_PIN_TYPE_RCLK_INPUT:
+ return ice_dpll_init_info_rclk_pin(pf);
+ default:
+ return -EINVAL;
+ }
+}
+
+/**
+ * ice_dpll_deinit_info - release memory allocated for pins info
+ * @pf: board private structure
+ *
+ * Release memory allocated for pins by ice_dpll_init_info function.
+ */
+static void ice_dpll_deinit_info(struct ice_pf *pf)
+{
+ kfree(pf->dplls.inputs);
+ kfree(pf->dplls.outputs);
+ kfree(pf->dplls.eec.input_prio);
+ kfree(pf->dplls.pps.input_prio);
+}
+
+/**
+ * ice_dpll_init_info - prepare pf's dpll information structure
+ * @pf: board private structure
+ * @cgu: if cgu is present and controlled by this NIC
+ *
+ * Acquire (from HW) and set basic dpll information (on pf->dplls struct).
+ *
+ * Return:
+ * * 0 - success
+ * * negative - init failure reason
+ */
+static int ice_dpll_init_info(struct ice_pf *pf, bool cgu)
+{
+ struct ice_aqc_get_cgu_abilities abilities;
+ struct ice_dpll *de = &pf->dplls.eec;
+ struct ice_dpll *dp = &pf->dplls.pps;
+ struct ice_dplls *d = &pf->dplls;
+ struct ice_hw *hw = &pf->hw;
+ int ret, alloc_size, i;
+
+ d->clock_id = ice_generate_clock_id(pf);
+ ret = ice_aq_get_cgu_abilities(hw, &abilities);
+ if (ret) {
+ dev_err(ice_pf_to_dev(pf),
+ "err:%d %s failed to read cgu abilities\n",
+ ret, ice_aq_str(hw->adminq.sq_last_status));
+ return ret;
+ }
+
+ de->dpll_idx = abilities.eec_dpll_idx;
+ dp->dpll_idx = abilities.pps_dpll_idx;
+ d->num_inputs = abilities.num_inputs;
+ d->num_outputs = abilities.num_outputs;
+ d->input_phase_adj_max = le32_to_cpu(abilities.max_in_phase_adj);
+ d->output_phase_adj_max = le32_to_cpu(abilities.max_out_phase_adj);
+
+ alloc_size = sizeof(*d->inputs) * d->num_inputs;
+ d->inputs = kzalloc(alloc_size, GFP_KERNEL);
+ if (!d->inputs)
+ return -ENOMEM;
+
+ alloc_size = sizeof(*de->input_prio) * d->num_inputs;
+ de->input_prio = kzalloc(alloc_size, GFP_KERNEL);
+ if (!de->input_prio)
+ return -ENOMEM;
+
+ dp->input_prio = kzalloc(alloc_size, GFP_KERNEL);
+ if (!dp->input_prio)
+ return -ENOMEM;
+
+ ret = ice_dpll_init_pins_info(pf, ICE_DPLL_PIN_TYPE_INPUT);
+ if (ret)
+ goto deinit_info;
+
+ if (cgu) {
+ alloc_size = sizeof(*d->outputs) * d->num_outputs;
+ d->outputs = kzalloc(alloc_size, GFP_KERNEL);
+ if (!d->outputs) {
+ ret = -ENOMEM;
+ goto deinit_info;
+ }
+
+ ret = ice_dpll_init_pins_info(pf, ICE_DPLL_PIN_TYPE_OUTPUT);
+ if (ret)
+ goto deinit_info;
+ }
+
+ ret = ice_get_cgu_rclk_pin_info(&pf->hw, &d->base_rclk_idx,
+ &pf->dplls.rclk.num_parents);
+ if (ret)
+ return ret;
+ for (i = 0; i < pf->dplls.rclk.num_parents; i++)
+ pf->dplls.rclk.parent_idx[i] = d->base_rclk_idx + i;
+ ret = ice_dpll_init_pins_info(pf, ICE_DPLL_PIN_TYPE_RCLK_INPUT);
+ if (ret)
+ return ret;
+ de->mode = DPLL_MODE_AUTOMATIC;
+ dp->mode = DPLL_MODE_AUTOMATIC;
+
+ dev_dbg(ice_pf_to_dev(pf),
+ "%s - success, inputs:%u, outputs:%u rclk-parents:%u\n",
+ __func__, d->num_inputs, d->num_outputs, d->rclk.num_parents);
+
+ return 0;
+
+deinit_info:
+ dev_err(ice_pf_to_dev(pf),
+ "%s - fail: d->inputs:%p, de->input_prio:%p, dp->input_prio:%p, d->outputs:%p\n",
+ __func__, d->inputs, de->input_prio,
+ dp->input_prio, d->outputs);
+ ice_dpll_deinit_info(pf);
+ return ret;
+}
+
+/**
+ * ice_dpll_deinit - Disable the driver/HW support for dpll subsystem
+ * the dpll device.
+ * @pf: board private structure
+ *
+ * Handles the cleanup work required after dpll initialization, freeing
+ * resources and unregistering the dpll, pin and all resources used for
+ * handling them.
+ *
+ * Context: Destroys pf->dplls.lock mutex. Call only if ICE_FLAG_DPLL was set.
+ */
+void ice_dpll_deinit(struct ice_pf *pf)
+{
+ bool cgu = ice_is_feature_supported(pf, ICE_F_CGU);
+
+ clear_bit(ICE_FLAG_DPLL, pf->flags);
+ if (cgu)
+ ice_dpll_deinit_worker(pf);
+
+ ice_dpll_deinit_pins(pf, cgu);
+ ice_dpll_deinit_dpll(pf, &pf->dplls.pps, cgu);
+ ice_dpll_deinit_dpll(pf, &pf->dplls.eec, cgu);
+ ice_dpll_deinit_info(pf);
+ mutex_destroy(&pf->dplls.lock);
+}
+
+/**
+ * ice_dpll_init - initialize support for dpll subsystem
+ * @pf: board private structure
+ *
+ * Set up the device dplls, register them and pins connected within Linux dpll
+ * subsystem. Allow userspace to obtain state of DPLL and handling of DPLL
+ * configuration requests.
+ *
+ * Context: Initializes pf->dplls.lock mutex.
+ */
+void ice_dpll_init(struct ice_pf *pf)
+{
+ bool cgu = ice_is_feature_supported(pf, ICE_F_CGU);
+ struct ice_dplls *d = &pf->dplls;
+ int err = 0;
+
+ err = ice_dpll_init_info(pf, cgu);
+ if (err)
+ goto err_exit;
+ err = ice_dpll_init_dpll(pf, &pf->dplls.eec, cgu, DPLL_TYPE_EEC);
+ if (err)
+ goto deinit_info;
+ err = ice_dpll_init_dpll(pf, &pf->dplls.pps, cgu, DPLL_TYPE_PPS);
+ if (err)
+ goto deinit_eec;
+ err = ice_dpll_init_pins(pf, cgu);
+ if (err)
+ goto deinit_pps;
+ mutex_init(&d->lock);
+ if (cgu) {
+ err = ice_dpll_init_worker(pf);
+ if (err)
+ goto deinit_pins;
+ }
+ set_bit(ICE_FLAG_DPLL, pf->flags);
+
+ return;
+
+deinit_pins:
+ ice_dpll_deinit_pins(pf, cgu);
+deinit_pps:
+ ice_dpll_deinit_dpll(pf, &pf->dplls.pps, cgu);
+deinit_eec:
+ ice_dpll_deinit_dpll(pf, &pf->dplls.eec, cgu);
+deinit_info:
+ ice_dpll_deinit_info(pf);
+err_exit:
+ mutex_destroy(&d->lock);
+ dev_warn(ice_pf_to_dev(pf), "DPLLs init failure err:%d\n", err);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.h b/drivers/net/ethernet/intel/ice/ice_dpll.h
new file mode 100644
index 000000000000..9c524c4bdfd7
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_dpll.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2022, Intel Corporation. */
+
+#ifndef _ICE_DPLL_H_
+#define _ICE_DPLL_H_
+
+#include "ice.h"
+
+#define ICE_DPLL_PRIO_MAX 0xF
+#define ICE_DPLL_RCLK_NUM_MAX 4
+
+/** ice_dpll_pin - store info about pins
+ * @pin: dpll pin structure
+ * @pf: pointer to pf, which has registered the dpll_pin
+ * @idx: ice pin private idx
+ * @num_parents: hols number of parent pins
+ * @parent_idx: hold indexes of parent pins
+ * @flags: pin flags returned from HW
+ * @state: state of a pin
+ * @prop: pin properties
+ * @freq: current frequency of a pin
+ */
+struct ice_dpll_pin {
+ struct dpll_pin *pin;
+ struct ice_pf *pf;
+ u8 idx;
+ u8 num_parents;
+ u8 parent_idx[ICE_DPLL_RCLK_NUM_MAX];
+ u8 flags[ICE_DPLL_RCLK_NUM_MAX];
+ u8 state[ICE_DPLL_RCLK_NUM_MAX];
+ struct dpll_pin_properties prop;
+ u32 freq;
+};
+
+/** ice_dpll - store info required for DPLL control
+ * @dpll: pointer to dpll dev
+ * @pf: pointer to pf, which has registered the dpll_device
+ * @dpll_idx: index of dpll on the NIC
+ * @input_idx: currently selected input index
+ * @prev_input_idx: previously selected input index
+ * @ref_state: state of dpll reference signals
+ * @eec_mode: eec_mode dpll is configured for
+ * @phase_shift: phase shift delay of a dpll
+ * @input_prio: priorities of each input
+ * @dpll_state: current dpll sync state
+ * @prev_dpll_state: last dpll sync state
+ * @active_input: pointer to active input pin
+ * @prev_input: pointer to previous active input pin
+ */
+struct ice_dpll {
+ struct dpll_device *dpll;
+ struct ice_pf *pf;
+ u8 dpll_idx;
+ u8 input_idx;
+ u8 prev_input_idx;
+ u8 ref_state;
+ u8 eec_mode;
+ s64 phase_shift;
+ u8 *input_prio;
+ enum dpll_lock_status dpll_state;
+ enum dpll_lock_status prev_dpll_state;
+ enum dpll_mode mode;
+ struct dpll_pin *active_input;
+ struct dpll_pin *prev_input;
+};
+
+/** ice_dplls - store info required for CCU (clock controlling unit)
+ * @kworker: periodic worker
+ * @work: periodic work
+ * @lock: locks access to configuration of a dpll
+ * @eec: pointer to EEC dpll dev
+ * @pps: pointer to PPS dpll dev
+ * @inputs: input pins pointer
+ * @outputs: output pins pointer
+ * @rclk: recovered pins pointer
+ * @num_inputs: number of input pins available on dpll
+ * @num_outputs: number of output pins available on dpll
+ * @cgu_state_acq_err_num: number of errors returned during periodic work
+ * @base_rclk_idx: idx of first pin used for clock revocery pins
+ * @clock_id: clock_id of dplls
+ */
+struct ice_dplls {
+ struct kthread_worker *kworker;
+ struct kthread_delayed_work work;
+ struct mutex lock;
+ struct ice_dpll eec;
+ struct ice_dpll pps;
+ struct ice_dpll_pin *inputs;
+ struct ice_dpll_pin *outputs;
+ struct ice_dpll_pin rclk;
+ u8 num_inputs;
+ u8 num_outputs;
+ int cgu_state_acq_err_num;
+ u8 base_rclk_idx;
+ u64 clock_id;
+ s32 input_phase_adj_max;
+ s32 output_phase_adj_max;
+};
+
+void ice_dpll_init(struct ice_pf *pf);
+
+void ice_dpll_deinit(struct ice_pf *pf);
+
+#endif
diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.c b/drivers/net/ethernet/intel/ice/ice_gnss.c
index 75c9de675f20..c8ea1af51ad3 100644
--- a/drivers/net/ethernet/intel/ice/ice_gnss.c
+++ b/drivers/net/ethernet/intel/ice/ice_gnss.c
@@ -389,6 +389,9 @@ bool ice_gnss_is_gps_present(struct ice_hw *hw)
if (!hw->func_caps.ts_func_info.src_tmr_owned)
return false;
+ if (!ice_is_gps_in_netlist(hw))
+ return false;
+
#if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
if (ice_is_e810t(hw)) {
int err;
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 73bbf06a76db..031abe311ea3 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -3990,13 +3990,21 @@ void ice_init_feature_support(struct ice_pf *pf)
case ICE_DEV_ID_E810C_BACKPLANE:
case ICE_DEV_ID_E810C_QSFP:
case ICE_DEV_ID_E810C_SFP:
+ case ICE_DEV_ID_E810_XXV_BACKPLANE:
+ case ICE_DEV_ID_E810_XXV_QSFP:
+ case ICE_DEV_ID_E810_XXV_SFP:
ice_set_feature_support(pf, ICE_F_DSCP);
- ice_set_feature_support(pf, ICE_F_PTP_EXTTS);
- if (ice_is_e810t(&pf->hw)) {
+ if (ice_is_phy_rclk_present(&pf->hw))
+ ice_set_feature_support(pf, ICE_F_PHY_RCLK);
+ /* If we don't own the timer - don't enable other caps */
+ if (!ice_pf_src_tmr_owned(pf))
+ break;
+ if (ice_is_cgu_present(&pf->hw))
+ ice_set_feature_support(pf, ICE_F_CGU);
+ if (ice_is_clock_mux_present_e810t(&pf->hw))
ice_set_feature_support(pf, ICE_F_SMA_CTRL);
- if (ice_gnss_is_gps_present(&pf->hw))
- ice_set_feature_support(pf, ICE_F_GNSS);
- }
+ if (ice_gnss_is_gps_present(&pf->hw))
+ ice_set_feature_support(pf, ICE_F_GNSS);
break;
default:
break;
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 7784135160fd..4fd245f55b2d 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -3160,7 +3160,7 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
ena_mask &= ~PFINT_OICR_TSYN_EVNT_M;
- if (hw->func_caps.ts_func_info.src_tmr_owned) {
+ if (ice_pf_src_tmr_owned(pf)) {
/* Save EVENTs from GLTSYN register */
pf->ptp.ext_ts_irq |= gltsyn_stat &
(GLTSYN_STAT_EVENT0_M |
@@ -4666,6 +4666,10 @@ static void ice_init_features(struct ice_pf *pf)
if (ice_is_feature_supported(pf, ICE_F_GNSS))
ice_gnss_init(pf);
+ if (ice_is_feature_supported(pf, ICE_F_CGU) ||
+ ice_is_feature_supported(pf, ICE_F_PHY_RCLK))
+ ice_dpll_init(pf);
+
/* Note: Flow director init failure is non-fatal to load */
if (ice_init_fdir(pf))
dev_err(dev, "could not initialize flow director\n");
@@ -4695,6 +4699,8 @@ static void ice_deinit_features(struct ice_pf *pf)
ice_gnss_exit(pf);
if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
ice_ptp_release(pf);
+ if (test_bit(ICE_FLAG_DPLL, pf->flags))
+ ice_dpll_deinit(pf);
}
static void ice_init_wakeup(struct ice_pf *pf)
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
index 81d96a40d5a7..05f922d3b316 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
@@ -436,7 +436,7 @@ static void ice_clear_ptp_clock_index(struct ice_pf *pf)
int err;
/* Do not clear the index if we don't own the timer */
- if (!hw->func_caps.ts_func_info.src_tmr_owned)
+ if (!ice_pf_src_tmr_owned(pf))
return;
tmr_idx = hw->func_caps.ts_func_info.tmr_index_assoc;
@@ -1366,6 +1366,7 @@ out_unlock:
void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup)
{
struct ice_ptp_port *ptp_port;
+ struct ice_hw *hw = &pf->hw;
if (!test_bit(ICE_FLAG_PTP, pf->flags))
return;
@@ -1380,11 +1381,16 @@ void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup)
/* Update cached link status for this port immediately */
ptp_port->link_up = linkup;
- /* E810 devices do not need to reconfigure the PHY */
- if (ice_is_e810(&pf->hw))
+ switch (hw->phy_model) {
+ case ICE_PHY_E810:
+ /* Do not reconfigure E810 PHY */
return;
-
- ice_ptp_port_phy_restart(ptp_port);
+ case ICE_PHY_E822:
+ ice_ptp_port_phy_restart(ptp_port);
+ return;
+ default:
+ dev_warn(ice_pf_to_dev(pf), "%s: Unknown PHY type\n", __func__);
+ }
}
/**
@@ -1976,21 +1982,32 @@ ice_ptp_get_syncdevicetime(ktime_t *device,
u32 hh_lock, hh_art_ctl;
int i;
- /* Get the HW lock */
- hh_lock = rd32(hw, PFHH_SEM + (PFTSYN_SEM_BYTES * hw->pf_id));
+#define MAX_HH_HW_LOCK_TRIES 5
+#define MAX_HH_CTL_LOCK_TRIES 100
+
+ for (i = 0; i < MAX_HH_HW_LOCK_TRIES; i++) {
+ /* Get the HW lock */
+ hh_lock = rd32(hw, PFHH_SEM + (PFTSYN_SEM_BYTES * hw->pf_id));
+ if (hh_lock & PFHH_SEM_BUSY_M) {
+ usleep_range(10000, 15000);
+ continue;
+ }
+ break;
+ }
if (hh_lock & PFHH_SEM_BUSY_M) {
dev_err(ice_pf_to_dev(pf), "PTP failed to get hh lock\n");
- return -EFAULT;
+ return -EBUSY;
}
+ /* Program cmd to master timer */
+ ice_ptp_src_cmd(hw, ICE_PTP_READ_TIME);
+
/* Start the ART and device clock sync sequence */
hh_art_ctl = rd32(hw, GLHH_ART_CTL);
hh_art_ctl = hh_art_ctl | GLHH_ART_CTL_ACTIVE_M;
wr32(hw, GLHH_ART_CTL, hh_art_ctl);
-#define MAX_HH_LOCK_TRIES 100
-
- for (i = 0; i < MAX_HH_LOCK_TRIES; i++) {
+ for (i = 0; i < MAX_HH_CTL_LOCK_TRIES; i++) {
/* Wait for sync to complete */
hh_art_ctl = rd32(hw, GLHH_ART_CTL);
if (hh_art_ctl & GLHH_ART_CTL_ACTIVE_M) {
@@ -2014,19 +2031,23 @@ ice_ptp_get_syncdevicetime(ktime_t *device,
break;
}
}
+
+ /* Clear the master timer */
+ ice_ptp_src_cmd(hw, ICE_PTP_NOP);
+
/* Release HW lock */
hh_lock = rd32(hw, PFHH_SEM + (PFTSYN_SEM_BYTES * hw->pf_id));
hh_lock = hh_lock & ~PFHH_SEM_BUSY_M;
wr32(hw, PFHH_SEM + (PFTSYN_SEM_BYTES * hw->pf_id), hh_lock);
- if (i == MAX_HH_LOCK_TRIES)
+ if (i == MAX_HH_CTL_LOCK_TRIES)
return -ETIMEDOUT;
return 0;
}
/**
- * ice_ptp_getcrosststamp_e822 - Capture a device cross timestamp
+ * ice_ptp_getcrosststamp_e82x - Capture a device cross timestamp
* @info: the driver's PTP info structure
* @cts: The memory to fill the cross timestamp info
*
@@ -2034,14 +2055,14 @@ ice_ptp_get_syncdevicetime(ktime_t *device,
* clock. Fill the cross timestamp information and report it back to the
* caller.
*
- * This is only valid for E822 devices which have support for generating the
- * cross timestamp via PCIe PTM.
+ * This is only valid for E822 and E823 devices which have support for
+ * generating the cross timestamp via PCIe PTM.
*
* In order to correctly correlate the ART timestamp back to the TSC time, the
* CPU must have X86_FEATURE_TSC_KNOWN_FREQ.
*/
static int
-ice_ptp_getcrosststamp_e822(struct ptp_clock_info *info,
+ice_ptp_getcrosststamp_e82x(struct ptp_clock_info *info,
struct system_device_crosststamp *cts)
{
struct ice_pf *pf = ptp_info_to_pf(info);
@@ -2246,18 +2267,20 @@ ice_ptp_setup_sma_pins_e810t(struct ice_pf *pf, struct ptp_clock_info *info)
static void
ice_ptp_setup_pins_e810(struct ice_pf *pf, struct ptp_clock_info *info)
{
- info->n_per_out = N_PER_OUT_E810;
-
- if (ice_is_feature_supported(pf, ICE_F_PTP_EXTTS))
- info->n_ext_ts = N_EXT_TS_E810;
-
if (ice_is_feature_supported(pf, ICE_F_SMA_CTRL)) {
info->n_ext_ts = N_EXT_TS_E810;
+ info->n_per_out = N_PER_OUT_E810T;
info->n_pins = NUM_PTP_PINS_E810T;
info->verify = ice_verify_pin_e810t;
/* Complete setup of the SMA pins */
ice_ptp_setup_sma_pins_e810t(pf, info);
+ } else if (ice_is_e810t(&pf->hw)) {
+ info->n_ext_ts = N_EXT_TS_NO_SMA_E810T;
+ info->n_per_out = N_PER_OUT_NO_SMA_E810T;
+ } else {
+ info->n_per_out = N_PER_OUT_E810;
+ info->n_ext_ts = N_EXT_TS_E810;
}
}
@@ -2275,22 +2298,22 @@ ice_ptp_setup_pins_e823(struct ice_pf *pf, struct ptp_clock_info *info)
}
/**
- * ice_ptp_set_funcs_e822 - Set specialized functions for E822 support
+ * ice_ptp_set_funcs_e82x - Set specialized functions for E82x support
* @pf: Board private structure
* @info: PTP info to fill
*
- * Assign functions to the PTP capabiltiies structure for E822 devices.
+ * Assign functions to the PTP capabiltiies structure for E82x devices.
* Functions which operate across all device families should be set directly
- * in ice_ptp_set_caps. Only add functions here which are distinct for E822
+ * in ice_ptp_set_caps. Only add functions here which are distinct for E82x
* devices.
*/
static void
-ice_ptp_set_funcs_e822(struct ice_pf *pf, struct ptp_clock_info *info)
+ice_ptp_set_funcs_e82x(struct ice_pf *pf, struct ptp_clock_info *info)
{
#ifdef CONFIG_ICE_HWTS
if (boot_cpu_has(X86_FEATURE_ART) &&
boot_cpu_has(X86_FEATURE_TSC_KNOWN_FREQ))
- info->getcrosststamp = ice_ptp_getcrosststamp_e822;
+ info->getcrosststamp = ice_ptp_getcrosststamp_e82x;
#endif /* CONFIG_ICE_HWTS */
}
@@ -2324,6 +2347,8 @@ ice_ptp_set_funcs_e810(struct ice_pf *pf, struct ptp_clock_info *info)
static void
ice_ptp_set_funcs_e823(struct ice_pf *pf, struct ptp_clock_info *info)
{
+ ice_ptp_set_funcs_e82x(pf, info);
+
info->enable = ice_ptp_gpio_enable_e823;
ice_ptp_setup_pins_e823(pf, info);
}
@@ -2351,7 +2376,7 @@ static void ice_ptp_set_caps(struct ice_pf *pf)
else if (ice_is_e823(&pf->hw))
ice_ptp_set_funcs_e823(pf, info);
else
- ice_ptp_set_funcs_e822(pf, info);
+ ice_ptp_set_funcs_e82x(pf, info);
}
/**
@@ -2474,7 +2499,7 @@ void ice_ptp_reset(struct ice_pf *pf)
if (test_bit(ICE_PFR_REQ, pf->state))
goto pfr;
- if (!hw->func_caps.ts_func_info.src_tmr_owned)
+ if (!ice_pf_src_tmr_owned(pf))
goto reset_ts;
err = ice_ptp_init_phc(hw);
@@ -2685,14 +2710,22 @@ static int ice_ptp_init_work(struct ice_pf *pf, struct ice_ptp *ptp)
*/
static int ice_ptp_init_port(struct ice_pf *pf, struct ice_ptp_port *ptp_port)
{
+ struct ice_hw *hw = &pf->hw;
+
mutex_init(&ptp_port->ps_lock);
- if (ice_is_e810(&pf->hw))
+ switch (hw->phy_model) {
+ case ICE_PHY_E810:
return ice_ptp_init_tx_e810(pf, &ptp_port->tx);
+ case ICE_PHY_E822:
+ kthread_init_delayed_work(&ptp_port->ov_work,
+ ice_ptp_wait_for_offsets);
- kthread_init_delayed_work(&ptp_port->ov_work,
- ice_ptp_wait_for_offsets);
- return ice_ptp_init_tx_e822(pf, &ptp_port->tx, ptp_port->port_num);
+ return ice_ptp_init_tx_e822(pf, &ptp_port->tx,
+ ptp_port->port_num);
+ default:
+ return -ENODEV;
+ }
}
/**
@@ -2713,10 +2746,12 @@ void ice_ptp_init(struct ice_pf *pf)
struct ice_hw *hw = &pf->hw;
int err;
+ ice_ptp_init_phy_model(hw);
+
/* If this function owns the clock hardware, it must allocate and
* configure the PTP clock device to represent it.
*/
- if (hw->func_caps.ts_func_info.src_tmr_owned) {
+ if (ice_pf_src_tmr_owned(pf)) {
err = ice_ptp_init_owner(pf);
if (err)
goto err;
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
index f818dd215c05..f839f186797d 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
@@ -7,6 +7,132 @@
#include "ice_ptp_consts.h"
#include "ice_cgu_regs.h"
+static struct dpll_pin_frequency ice_cgu_pin_freq_common[] = {
+ DPLL_PIN_FREQUENCY_1PPS,
+ DPLL_PIN_FREQUENCY_10MHZ,
+};
+
+static struct dpll_pin_frequency ice_cgu_pin_freq_1_hz[] = {
+ DPLL_PIN_FREQUENCY_1PPS,
+};
+
+static struct dpll_pin_frequency ice_cgu_pin_freq_10_mhz[] = {
+ DPLL_PIN_FREQUENCY_10MHZ,
+};
+
+static const struct ice_cgu_pin_desc ice_e810t_sfp_cgu_inputs[] = {
+ { "CVL-SDP22", ZL_REF0P, DPLL_PIN_TYPE_INT_OSCILLATOR,
+ ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common },
+ { "CVL-SDP20", ZL_REF0N, DPLL_PIN_TYPE_INT_OSCILLATOR,
+ ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common },
+ { "C827_0-RCLKA", ZL_REF1P, DPLL_PIN_TYPE_MUX, 0, },
+ { "C827_0-RCLKB", ZL_REF1N, DPLL_PIN_TYPE_MUX, 0, },
+ { "SMA1", ZL_REF3P, DPLL_PIN_TYPE_EXT,
+ ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common },
+ { "SMA2/U.FL2", ZL_REF3N, DPLL_PIN_TYPE_EXT,
+ ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common },
+ { "GNSS-1PPS", ZL_REF4P, DPLL_PIN_TYPE_GNSS,
+ ARRAY_SIZE(ice_cgu_pin_freq_1_hz), ice_cgu_pin_freq_1_hz },
+ { "OCXO", ZL_REF4N, DPLL_PIN_TYPE_INT_OSCILLATOR, 0, },
+};
+
+static const struct ice_cgu_pin_desc ice_e810t_qsfp_cgu_inputs[] = {
+ { "CVL-SDP22", ZL_REF0P, DPLL_PIN_TYPE_INT_OSCILLATOR,
+ ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common },
+ { "CVL-SDP20", ZL_REF0N, DPLL_PIN_TYPE_INT_OSCILLATOR,
+ ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common },
+ { "C827_0-RCLKA", ZL_REF1P, DPLL_PIN_TYPE_MUX, },
+ { "C827_0-RCLKB", ZL_REF1N, DPLL_PIN_TYPE_MUX, },
+ { "C827_1-RCLKA", ZL_REF2P, DPLL_PIN_TYPE_MUX, },
+ { "C827_1-RCLKB", ZL_REF2N, DPLL_PIN_TYPE_MUX, },
+ { "SMA1", ZL_REF3P, DPLL_PIN_TYPE_EXT,
+ ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common },
+ { "SMA2/U.FL2", ZL_REF3N, DPLL_PIN_TYPE_EXT,
+ ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common },
+ { "GNSS-1PPS", ZL_REF4P, DPLL_PIN_TYPE_GNSS,
+ ARRAY_SIZE(ice_cgu_pin_freq_1_hz), ice_cgu_pin_freq_1_hz },
+ { "OCXO", ZL_REF4N, DPLL_PIN_TYPE_INT_OSCILLATOR, },
+};
+
+static const struct ice_cgu_pin_desc ice_e810t_sfp_cgu_outputs[] = {
+ { "REF-SMA1", ZL_OUT0, DPLL_PIN_TYPE_EXT,
+ ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common },
+ { "REF-SMA2/U.FL2", ZL_OUT1, DPLL_PIN_TYPE_EXT,
+ ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common },
+ { "PHY-CLK", ZL_OUT2, DPLL_PIN_TYPE_SYNCE_ETH_PORT, },
+ { "MAC-CLK", ZL_OUT3, DPLL_PIN_TYPE_SYNCE_ETH_PORT, },
+ { "CVL-SDP21", ZL_OUT4, DPLL_PIN_TYPE_EXT,
+ ARRAY_SIZE(ice_cgu_pin_freq_1_hz), ice_cgu_pin_freq_1_hz },
+ { "CVL-SDP23", ZL_OUT5, DPLL_PIN_TYPE_EXT,
+ ARRAY_SIZE(ice_cgu_pin_freq_1_hz), ice_cgu_pin_freq_1_hz },
+};
+
+static const struct ice_cgu_pin_desc ice_e810t_qsfp_cgu_outputs[] = {
+ { "REF-SMA1", ZL_OUT0, DPLL_PIN_TYPE_EXT,
+ ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common },
+ { "REF-SMA2/U.FL2", ZL_OUT1, DPLL_PIN_TYPE_EXT,
+ ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common },
+ { "PHY-CLK", ZL_OUT2, DPLL_PIN_TYPE_SYNCE_ETH_PORT, 0 },
+ { "PHY2-CLK", ZL_OUT3, DPLL_PIN_TYPE_SYNCE_ETH_PORT, 0 },
+ { "MAC-CLK", ZL_OUT4, DPLL_PIN_TYPE_SYNCE_ETH_PORT, 0 },
+ { "CVL-SDP21", ZL_OUT5, DPLL_PIN_TYPE_EXT,
+ ARRAY_SIZE(ice_cgu_pin_freq_1_hz), ice_cgu_pin_freq_1_hz },
+ { "CVL-SDP23", ZL_OUT6, DPLL_PIN_TYPE_EXT,
+ ARRAY_SIZE(ice_cgu_pin_freq_1_hz), ice_cgu_pin_freq_1_hz },
+};
+
+static const struct ice_cgu_pin_desc ice_e823_si_cgu_inputs[] = {
+ { "NONE", SI_REF0P, 0, 0 },
+ { "NONE", SI_REF0N, 0, 0 },
+ { "SYNCE0_DP", SI_REF1P, DPLL_PIN_TYPE_MUX, 0 },
+ { "SYNCE0_DN", SI_REF1N, DPLL_PIN_TYPE_MUX, 0 },
+ { "EXT_CLK_SYNC", SI_REF2P, DPLL_PIN_TYPE_EXT,
+ ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common },
+ { "NONE", SI_REF2N, 0, 0 },
+ { "EXT_PPS_OUT", SI_REF3, DPLL_PIN_TYPE_EXT,
+ ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common },
+ { "INT_PPS_OUT", SI_REF4, DPLL_PIN_TYPE_EXT,
+ ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common },
+};
+
+static const struct ice_cgu_pin_desc ice_e823_si_cgu_outputs[] = {
+ { "1588-TIME_SYNC", SI_OUT0, DPLL_PIN_TYPE_EXT,
+ ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common },
+ { "PHY-CLK", SI_OUT1, DPLL_PIN_TYPE_SYNCE_ETH_PORT, 0 },
+ { "10MHZ-SMA2", SI_OUT2, DPLL_PIN_TYPE_EXT,
+ ARRAY_SIZE(ice_cgu_pin_freq_10_mhz), ice_cgu_pin_freq_10_mhz },
+ { "PPS-SMA1", SI_OUT3, DPLL_PIN_TYPE_EXT,
+ ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common },
+};
+
+static const struct ice_cgu_pin_desc ice_e823_zl_cgu_inputs[] = {
+ { "NONE", ZL_REF0P, 0, 0 },
+ { "INT_PPS_OUT", ZL_REF0N, DPLL_PIN_TYPE_EXT,
+ ARRAY_SIZE(ice_cgu_pin_freq_1_hz), ice_cgu_pin_freq_1_hz },
+ { "SYNCE0_DP", ZL_REF1P, DPLL_PIN_TYPE_MUX, 0 },
+ { "SYNCE0_DN", ZL_REF1N, DPLL_PIN_TYPE_MUX, 0 },
+ { "NONE", ZL_REF2P, 0, 0 },
+ { "NONE", ZL_REF2N, 0, 0 },
+ { "EXT_CLK_SYNC", ZL_REF3P, DPLL_PIN_TYPE_EXT,
+ ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common },
+ { "NONE", ZL_REF3N, 0, 0 },
+ { "EXT_PPS_OUT", ZL_REF4P, DPLL_PIN_TYPE_EXT,
+ ARRAY_SIZE(ice_cgu_pin_freq_1_hz), ice_cgu_pin_freq_1_hz },
+ { "OCXO", ZL_REF4N, DPLL_PIN_TYPE_INT_OSCILLATOR, 0 },
+};
+
+static const struct ice_cgu_pin_desc ice_e823_zl_cgu_outputs[] = {
+ { "PPS-SMA1", ZL_OUT0, DPLL_PIN_TYPE_EXT,
+ ARRAY_SIZE(ice_cgu_pin_freq_1_hz), ice_cgu_pin_freq_1_hz },
+ { "10MHZ-SMA2", ZL_OUT1, DPLL_PIN_TYPE_EXT,
+ ARRAY_SIZE(ice_cgu_pin_freq_10_mhz), ice_cgu_pin_freq_10_mhz },
+ { "PHY-CLK", ZL_OUT2, DPLL_PIN_TYPE_SYNCE_ETH_PORT, 0 },
+ { "1588-TIME_REF", ZL_OUT3, DPLL_PIN_TYPE_SYNCE_ETH_PORT, 0 },
+ { "CPK-TIME_SYNC", ZL_OUT4, DPLL_PIN_TYPE_EXT,
+ ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common },
+ { "NONE", ZL_OUT5, 0, 0 },
+};
+
/* Low level functions for interacting with and managing the device clock used
* for the Precision Time Protocol.
*
@@ -107,7 +233,7 @@ static u64 ice_ptp_read_src_incval(struct ice_hw *hw)
*
* Prepare the source timer for an upcoming timer sync command.
*/
-static void ice_ptp_src_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd)
+void ice_ptp_src_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd)
{
u32 cmd_val;
u8 tmr_idx;
@@ -116,19 +242,19 @@ static void ice_ptp_src_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd)
cmd_val = tmr_idx << SEL_CPK_SRC;
switch (cmd) {
- case INIT_TIME:
+ case ICE_PTP_INIT_TIME:
cmd_val |= GLTSYN_CMD_INIT_TIME;
break;
- case INIT_INCVAL:
+ case ICE_PTP_INIT_INCVAL:
cmd_val |= GLTSYN_CMD_INIT_INCVAL;
break;
- case ADJ_TIME:
+ case ICE_PTP_ADJ_TIME:
cmd_val |= GLTSYN_CMD_ADJ_TIME;
break;
- case ADJ_TIME_AT_TIME:
+ case ICE_PTP_ADJ_TIME_AT_TIME:
cmd_val |= GLTSYN_CMD_ADJ_INIT_TIME;
break;
- case READ_TIME:
+ case ICE_PTP_READ_TIME:
cmd_val |= GLTSYN_CMD_READ_TIME;
break;
case ICE_PTP_NOP:
@@ -168,9 +294,9 @@ ice_fill_phy_msg_e822(struct ice_sbq_msg_input *msg, u8 port, u16 offset)
{
int phy_port, phy, quadtype;
- phy_port = port % ICE_PORTS_PER_PHY;
- phy = port / ICE_PORTS_PER_PHY;
- quadtype = (port / ICE_PORTS_PER_QUAD) % ICE_NUM_QUAD_TYPE;
+ phy_port = port % ICE_PORTS_PER_PHY_E822;
+ phy = port / ICE_PORTS_PER_PHY_E822;
+ quadtype = (port / ICE_PORTS_PER_QUAD) % ICE_QUADS_PER_PHY_E822;
if (quadtype == 0) {
msg->msg_addr_low = P_Q0_L(P_0_BASE + offset, phy_port);
@@ -495,20 +621,25 @@ ice_write_64b_phy_reg_e822(struct ice_hw *hw, u8 port, u16 low_addr, u64 val)
* Fill a message buffer for accessing a register in a quad shared between
* multiple PHYs.
*/
-static void
+static int
ice_fill_quad_msg_e822(struct ice_sbq_msg_input *msg, u8 quad, u16 offset)
{
u32 addr;
+ if (quad >= ICE_MAX_QUAD)
+ return -EINVAL;
+
msg->dest_dev = rmn_0;
- if ((quad % ICE_NUM_QUAD_TYPE) == 0)
+ if ((quad % ICE_QUADS_PER_PHY_E822) == 0)
addr = Q_0_BASE + offset;
else
addr = Q_1_BASE + offset;
msg->msg_addr_low = lower_16_bits(addr);
msg->msg_addr_high = upper_16_bits(addr);
+
+ return 0;
}
/**
@@ -527,10 +658,10 @@ ice_read_quad_reg_e822(struct ice_hw *hw, u8 quad, u16 offset, u32 *val)
struct ice_sbq_msg_input msg = {0};
int err;
- if (quad >= ICE_MAX_QUAD)
- return -EINVAL;
+ err = ice_fill_quad_msg_e822(&msg, quad, offset);
+ if (err)
+ return err;
- ice_fill_quad_msg_e822(&msg, quad, offset);
msg.opcode = ice_sbq_msg_rd;
err = ice_sbq_rw_reg(hw, &msg);
@@ -561,10 +692,10 @@ ice_write_quad_reg_e822(struct ice_hw *hw, u8 quad, u16 offset, u32 val)
struct ice_sbq_msg_input msg = {0};
int err;
- if (quad >= ICE_MAX_QUAD)
- return -EINVAL;
+ err = ice_fill_quad_msg_e822(&msg, quad, offset);
+ if (err)
+ return err;
- ice_fill_quad_msg_e822(&msg, quad, offset);
msg.opcode = ice_sbq_msg_wr;
msg.data = val;
@@ -628,29 +759,32 @@ ice_read_phy_tstamp_e822(struct ice_hw *hw, u8 quad, u8 idx, u64 *tstamp)
* @quad: the quad to read from
* @idx: the timestamp index to reset
*
- * Clear a timestamp, resetting its valid bit, from the PHY quad block that is
- * shared between the internal PHYs on the E822 devices.
+ * Read the timestamp out of the quad to clear its timestamp status bit from
+ * the PHY quad block that is shared between the internal PHYs of the E822
+ * devices.
+ *
+ * Note that unlike E810, software cannot directly write to the quad memory
+ * bank registers. E822 relies on the ice_get_phy_tx_tstamp_ready() function
+ * to determine which timestamps are valid. Reading a timestamp auto-clears
+ * the valid bit.
+ *
+ * To directly clear the contents of the timestamp block entirely, discarding
+ * all timestamp data at once, software should instead use
+ * ice_ptp_reset_ts_memory_quad_e822().
+ *
+ * This function should only be called on an idx whose bit is set according to
+ * ice_get_phy_tx_tstamp_ready().
*/
static int
ice_clear_phy_tstamp_e822(struct ice_hw *hw, u8 quad, u8 idx)
{
- u16 lo_addr, hi_addr;
+ u64 unused_tstamp;
int err;
- lo_addr = (u16)TS_L(Q_REG_TX_MEMORY_BANK_START, idx);
- hi_addr = (u16)TS_H(Q_REG_TX_MEMORY_BANK_START, idx);
-
- err = ice_write_quad_reg_e822(hw, quad, lo_addr, 0);
+ err = ice_read_phy_tstamp_e822(hw, quad, idx, &unused_tstamp);
if (err) {
- ice_debug(hw, ICE_DBG_PTP, "Failed to clear low PTP timestamp register, err %d\n",
- err);
- return err;
- }
-
- err = ice_write_quad_reg_e822(hw, quad, hi_addr, 0);
- if (err) {
- ice_debug(hw, ICE_DBG_PTP, "Failed to clear high PTP timestamp register, err %d\n",
- err);
+ ice_debug(hw, ICE_DBG_PTP, "Failed to read the timestamp register for quad %u, idx %u, err %d\n",
+ quad, idx, err);
return err;
}
@@ -1025,7 +1159,7 @@ static int ice_ptp_init_phc_e822(struct ice_hw *hw)
* @time: Time to initialize the PHY port clocks to
*
* Program the PHY port registers with a new initial time value. The port
- * clock will be initialized once the driver issues an INIT_TIME sync
+ * clock will be initialized once the driver issues an ICE_PTP_INIT_TIME sync
* command. The time value is the upper 32 bits of the PHY timer, usually in
* units of nominal nanoseconds.
*/
@@ -1074,7 +1208,7 @@ exit_err:
*
* Program the port for an atomic adjustment by writing the Tx and Rx timer
* registers. The atomic adjustment won't be completed until the driver issues
- * an ADJ_TIME command.
+ * an ICE_PTP_ADJ_TIME command.
*
* Note that time is not in units of nanoseconds. It is in clock time
* including the lower sub-nanosecond portion of the port timer.
@@ -1127,7 +1261,7 @@ exit_err:
*
* Prepare the PHY ports for an atomic time adjustment by programming the PHY
* Tx and Rx port registers. The actual adjustment is completed by issuing an
- * ADJ_TIME or ADJ_TIME_AT_TIME sync command.
+ * ICE_PTP_ADJ_TIME or ICE_PTP_ADJ_TIME_AT_TIME sync command.
*/
static int
ice_ptp_prep_phy_adj_e822(struct ice_hw *hw, s32 adj)
@@ -1162,7 +1296,7 @@ ice_ptp_prep_phy_adj_e822(struct ice_hw *hw, s32 adj)
*
* Prepare each of the PHY ports for a new increment value by programming the
* port's TIMETUS registers. The new increment value will be updated after
- * issuing an INIT_INCVAL command.
+ * issuing an ICE_PTP_INIT_INCVAL command.
*/
static int
ice_ptp_prep_phy_incval_e822(struct ice_hw *hw, u64 incval)
@@ -1248,19 +1382,19 @@ ice_ptp_write_port_cmd_e822(struct ice_hw *hw, u8 port, enum ice_ptp_tmr_cmd cmd
tmr_idx = ice_get_ptp_src_clock_index(hw);
cmd_val = tmr_idx << SEL_PHY_SRC;
switch (cmd) {
- case INIT_TIME:
+ case ICE_PTP_INIT_TIME:
cmd_val |= PHY_CMD_INIT_TIME;
break;
- case INIT_INCVAL:
+ case ICE_PTP_INIT_INCVAL:
cmd_val |= PHY_CMD_INIT_INCVAL;
break;
- case ADJ_TIME:
+ case ICE_PTP_ADJ_TIME:
cmd_val |= PHY_CMD_ADJ_TIME;
break;
- case READ_TIME:
+ case ICE_PTP_READ_TIME:
cmd_val |= PHY_CMD_READ_TIME;
break;
- case ADJ_TIME_AT_TIME:
+ case ICE_PTP_ADJ_TIME_AT_TIME:
cmd_val |= PHY_CMD_ADJ_TIME_AT_TIME;
break;
case ICE_PTP_NOP:
@@ -2196,8 +2330,8 @@ int ice_phy_cfg_rx_offset_e822(struct ice_hw *hw, u8 port)
* @phy_time: on return, the 64bit PHY timer value
* @phc_time: on return, the lower 64bits of PHC time
*
- * Issue a READ_TIME timer command to simultaneously capture the PHY and PHC
- * timer values.
+ * Issue a ICE_PTP_READ_TIME timer command to simultaneously capture the PHY
+ * and PHC timer values.
*/
static int
ice_read_phy_and_phc_time_e822(struct ice_hw *hw, u8 port, u64 *phy_time,
@@ -2210,15 +2344,15 @@ ice_read_phy_and_phc_time_e822(struct ice_hw *hw, u8 port, u64 *phy_time,
tmr_idx = ice_get_ptp_src_clock_index(hw);
- /* Prepare the PHC timer for a READ_TIME capture command */
- ice_ptp_src_cmd(hw, READ_TIME);
+ /* Prepare the PHC timer for a ICE_PTP_READ_TIME capture command */
+ ice_ptp_src_cmd(hw, ICE_PTP_READ_TIME);
- /* Prepare the PHY timer for a READ_TIME capture command */
- err = ice_ptp_one_port_cmd(hw, port, READ_TIME);
+ /* Prepare the PHY timer for a ICE_PTP_READ_TIME capture command */
+ err = ice_ptp_one_port_cmd(hw, port, ICE_PTP_READ_TIME);
if (err)
return err;
- /* Issue the sync to start the READ_TIME capture */
+ /* Issue the sync to start the ICE_PTP_READ_TIME capture */
ice_ptp_exec_tmr_cmd(hw);
/* Read the captured PHC time from the shadow time registers */
@@ -2252,10 +2386,11 @@ ice_read_phy_and_phc_time_e822(struct ice_hw *hw, u8 port, u64 *phy_time,
* @port: the PHY port to synchronize
*
* Perform an adjustment to ensure that the PHY and PHC timers are in sync.
- * This is done by issuing a READ_TIME command which triggers a simultaneous
- * read of the PHY timer and PHC timer. Then we use the difference to
- * calculate an appropriate 2s complement addition to add to the PHY timer in
- * order to ensure it reads the same value as the primary PHC timer.
+ * This is done by issuing a ICE_PTP_READ_TIME command which triggers a
+ * simultaneous read of the PHY timer and PHC timer. Then we use the
+ * difference to calculate an appropriate 2s complement addition to add
+ * to the PHY timer in order to ensure it reads the same value as the
+ * primary PHC timer.
*/
static int ice_sync_phy_timer_e822(struct ice_hw *hw, u8 port)
{
@@ -2285,7 +2420,7 @@ static int ice_sync_phy_timer_e822(struct ice_hw *hw, u8 port)
if (err)
goto err_unlock;
- err = ice_ptp_one_port_cmd(hw, port, ADJ_TIME);
+ err = ice_ptp_one_port_cmd(hw, port, ICE_PTP_ADJ_TIME);
if (err)
goto err_unlock;
@@ -2408,7 +2543,7 @@ int ice_start_phy_timer_e822(struct ice_hw *hw, u8 port)
if (err)
return err;
- err = ice_ptp_one_port_cmd(hw, port, INIT_INCVAL);
+ err = ice_ptp_one_port_cmd(hw, port, ICE_PTP_INIT_INCVAL);
if (err)
return err;
@@ -2436,7 +2571,7 @@ int ice_start_phy_timer_e822(struct ice_hw *hw, u8 port)
if (err)
return err;
- err = ice_ptp_one_port_cmd(hw, port, INIT_INCVAL);
+ err = ice_ptp_one_port_cmd(hw, port, ICE_PTP_INIT_INCVAL);
if (err)
return err;
@@ -2685,28 +2820,39 @@ ice_read_phy_tstamp_e810(struct ice_hw *hw, u8 lport, u8 idx, u64 *tstamp)
* @lport: the lport to read from
* @idx: the timestamp index to reset
*
- * Clear a timestamp, resetting its valid bit, from the timestamp block of the
- * external PHY on the E810 device.
+ * Read the timestamp and then forcibly overwrite its value to clear the valid
+ * bit from the timestamp block of the external PHY on the E810 device.
+ *
+ * This function should only be called on an idx whose bit is set according to
+ * ice_get_phy_tx_tstamp_ready().
*/
static int ice_clear_phy_tstamp_e810(struct ice_hw *hw, u8 lport, u8 idx)
{
u32 lo_addr, hi_addr;
+ u64 unused_tstamp;
int err;
+ err = ice_read_phy_tstamp_e810(hw, lport, idx, &unused_tstamp);
+ if (err) {
+ ice_debug(hw, ICE_DBG_PTP, "Failed to read the timestamp register for lport %u, idx %u, err %d\n",
+ lport, idx, err);
+ return err;
+ }
+
lo_addr = TS_EXT(LOW_TX_MEMORY_BANK_START, lport, idx);
hi_addr = TS_EXT(HIGH_TX_MEMORY_BANK_START, lport, idx);
err = ice_write_phy_reg_e810(hw, lo_addr, 0);
if (err) {
- ice_debug(hw, ICE_DBG_PTP, "Failed to clear low PTP timestamp register, err %d\n",
- err);
+ ice_debug(hw, ICE_DBG_PTP, "Failed to clear low PTP timestamp register for lport %u, idx %u, err %d\n",
+ lport, idx, err);
return err;
}
err = ice_write_phy_reg_e810(hw, hi_addr, 0);
if (err) {
- ice_debug(hw, ICE_DBG_PTP, "Failed to clear high PTP timestamp register, err %d\n",
- err);
+ ice_debug(hw, ICE_DBG_PTP, "Failed to clear high PTP timestamp register for lport %u, idx %u, err %d\n",
+ lport, idx, err);
return err;
}
@@ -2757,7 +2903,7 @@ static int ice_ptp_init_phc_e810(struct ice_hw *hw)
*
* Program the PHY port ETH_GLTSYN_SHTIME registers in preparation setting the
* initial clock time. The time will not actually be programmed until the
- * driver issues an INIT_TIME command.
+ * driver issues an ICE_PTP_INIT_TIME command.
*
* The time value is the upper 32 bits of the PHY timer, usually in units of
* nominal nanoseconds.
@@ -2792,7 +2938,7 @@ static int ice_ptp_prep_phy_time_e810(struct ice_hw *hw, u32 time)
*
* Prepare the PHY port for an atomic adjustment by programming the PHY
* ETH_GLTSYN_SHADJ_L and ETH_GLTSYN_SHADJ_H registers. The actual adjustment
- * is completed by issuing an ADJ_TIME sync command.
+ * is completed by issuing an ICE_PTP_ADJ_TIME sync command.
*
* The adjustment value only contains the portion used for the upper 32bits of
* the PHY timer, usually in units of nominal nanoseconds. Negative
@@ -2832,7 +2978,7 @@ static int ice_ptp_prep_phy_adj_e810(struct ice_hw *hw, s32 adj)
*
* Prepare the PHY port for a new increment value by programming the PHY
* ETH_GLTSYN_SHADJ_L and ETH_GLTSYN_SHADJ_H registers. The actual change is
- * completed by issuing an INIT_INCVAL command.
+ * completed by issuing an ICE_PTP_INIT_INCVAL command.
*/
static int ice_ptp_prep_phy_incval_e810(struct ice_hw *hw, u64 incval)
{
@@ -2875,19 +3021,19 @@ static int ice_ptp_port_cmd_e810(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd)
int err;
switch (cmd) {
- case INIT_TIME:
+ case ICE_PTP_INIT_TIME:
cmd_val = GLTSYN_CMD_INIT_TIME;
break;
- case INIT_INCVAL:
+ case ICE_PTP_INIT_INCVAL:
cmd_val = GLTSYN_CMD_INIT_INCVAL;
break;
- case ADJ_TIME:
+ case ICE_PTP_ADJ_TIME:
cmd_val = GLTSYN_CMD_ADJ_TIME;
break;
- case READ_TIME:
+ case ICE_PTP_READ_TIME:
cmd_val = GLTSYN_CMD_READ_TIME;
break;
- case ADJ_TIME_AT_TIME:
+ case ICE_PTP_ADJ_TIME_AT_TIME:
cmd_val = GLTSYN_CMD_ADJ_INIT_TIME;
break;
case ICE_PTP_NOP:
@@ -3150,6 +3296,21 @@ void ice_ptp_unlock(struct ice_hw *hw)
}
/**
+ * ice_ptp_init_phy_model - Initialize hw->phy_model based on device type
+ * @hw: pointer to the HW structure
+ *
+ * Determine the PHY model for the device, and initialize hw->phy_model
+ * for use by other functions.
+ */
+void ice_ptp_init_phy_model(struct ice_hw *hw)
+{
+ if (ice_is_e810(hw))
+ hw->phy_model = ICE_PHY_E810;
+ else
+ hw->phy_model = ICE_PHY_E822;
+}
+
+/**
* ice_ptp_tmr_cmd - Prepare and trigger a timer sync command
* @hw: pointer to HW struct
* @cmd: the command to issue
@@ -3167,10 +3328,17 @@ static int ice_ptp_tmr_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd)
ice_ptp_src_cmd(hw, cmd);
/* Next, prepare the ports */
- if (ice_is_e810(hw))
+ switch (hw->phy_model) {
+ case ICE_PHY_E810:
err = ice_ptp_port_cmd_e810(hw, cmd);
- else
+ break;
+ case ICE_PHY_E822:
err = ice_ptp_port_cmd_e822(hw, cmd);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ }
+
if (err) {
ice_debug(hw, ICE_DBG_PTP, "Failed to prepare PHY ports for timer command %u, err %d\n",
cmd, err);
@@ -3212,14 +3380,21 @@ int ice_ptp_init_time(struct ice_hw *hw, u64 time)
/* PHY timers */
/* Fill Rx and Tx ports and send msg to PHY */
- if (ice_is_e810(hw))
+ switch (hw->phy_model) {
+ case ICE_PHY_E810:
err = ice_ptp_prep_phy_time_e810(hw, time & 0xFFFFFFFF);
- else
+ break;
+ case ICE_PHY_E822:
err = ice_ptp_prep_phy_time_e822(hw, time & 0xFFFFFFFF);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ }
+
if (err)
return err;
- return ice_ptp_tmr_cmd(hw, INIT_TIME);
+ return ice_ptp_tmr_cmd(hw, ICE_PTP_INIT_TIME);
}
/**
@@ -3232,8 +3407,8 @@ int ice_ptp_init_time(struct ice_hw *hw, u64 time)
*
* 1) Write the increment value to the source timer shadow registers
* 2) Write the increment value to the PHY timer shadow registers
- * 3) Issue an INIT_INCVAL timer command to synchronously switch both the
- * source and port timers to the new increment value at the next clock
+ * 3) Issue an ICE_PTP_INIT_INCVAL timer command to synchronously switch both
+ * the source and port timers to the new increment value at the next clock
* cycle.
*/
int ice_ptp_write_incval(struct ice_hw *hw, u64 incval)
@@ -3247,14 +3422,21 @@ int ice_ptp_write_incval(struct ice_hw *hw, u64 incval)
wr32(hw, GLTSYN_SHADJ_L(tmr_idx), lower_32_bits(incval));
wr32(hw, GLTSYN_SHADJ_H(tmr_idx), upper_32_bits(incval));
- if (ice_is_e810(hw))
+ switch (hw->phy_model) {
+ case ICE_PHY_E810:
err = ice_ptp_prep_phy_incval_e810(hw, incval);
- else
+ break;
+ case ICE_PHY_E822:
err = ice_ptp_prep_phy_incval_e822(hw, incval);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ }
+
if (err)
return err;
- return ice_ptp_tmr_cmd(hw, INIT_INCVAL);
+ return ice_ptp_tmr_cmd(hw, ICE_PTP_INIT_INCVAL);
}
/**
@@ -3288,8 +3470,8 @@ int ice_ptp_write_incval_locked(struct ice_hw *hw, u64 incval)
*
* 1) Write the adjustment to the source timer shadow registers
* 2) Write the adjustment to the PHY timer shadow registers
- * 3) Issue an ADJ_TIME timer command to synchronously apply the adjustment to
- * both the source and port timers at the next clock cycle.
+ * 3) Issue an ICE_PTP_ADJ_TIME timer command to synchronously apply the
+ * adjustment to both the source and port timers at the next clock cycle.
*/
int ice_ptp_adj_clock(struct ice_hw *hw, s32 adj)
{
@@ -3299,21 +3481,28 @@ int ice_ptp_adj_clock(struct ice_hw *hw, s32 adj)
tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
/* Write the desired clock adjustment into the GLTSYN_SHADJ register.
- * For an ADJ_TIME command, this set of registers represents the value
- * to add to the clock time. It supports subtraction by interpreting
- * the value as a 2's complement integer.
+ * For an ICE_PTP_ADJ_TIME command, this set of registers represents
+ * the value to add to the clock time. It supports subtraction by
+ * interpreting the value as a 2's complement integer.
*/
wr32(hw, GLTSYN_SHADJ_L(tmr_idx), 0);
wr32(hw, GLTSYN_SHADJ_H(tmr_idx), adj);
- if (ice_is_e810(hw))
+ switch (hw->phy_model) {
+ case ICE_PHY_E810:
err = ice_ptp_prep_phy_adj_e810(hw, adj);
- else
+ break;
+ case ICE_PHY_E822:
err = ice_ptp_prep_phy_adj_e822(hw, adj);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ }
+
if (err)
return err;
- return ice_ptp_tmr_cmd(hw, ADJ_TIME);
+ return ice_ptp_tmr_cmd(hw, ICE_PTP_ADJ_TIME);
}
/**
@@ -3329,10 +3518,14 @@ int ice_ptp_adj_clock(struct ice_hw *hw, s32 adj)
*/
int ice_read_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx, u64 *tstamp)
{
- if (ice_is_e810(hw))
+ switch (hw->phy_model) {
+ case ICE_PHY_E810:
return ice_read_phy_tstamp_e810(hw, block, idx, tstamp);
- else
+ case ICE_PHY_E822:
return ice_read_phy_tstamp_e822(hw, block, idx, tstamp);
+ default:
+ return -EOPNOTSUPP;
+ }
}
/**
@@ -3341,16 +3534,110 @@ int ice_read_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx, u64 *tstamp)
* @block: the block to read from
* @idx: the timestamp index to reset
*
- * Clear a timestamp, resetting its valid bit, from the timestamp block. For
- * E822 devices, the block is the quad to clear from. For E810 devices, the
- * block is the logical port to clear from.
+ * Clear a timestamp from the timestamp block, discarding its value without
+ * returning it. This resets the memory status bit for the timestamp index
+ * allowing it to be reused for another timestamp in the future.
+ *
+ * For E822 devices, the block number is the PHY quad to clear from. For E810
+ * devices, the block number is the logical port to clear from.
+ *
+ * This function must only be called on a timestamp index whose valid bit is
+ * set according to ice_get_phy_tx_tstamp_ready().
*/
int ice_clear_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx)
{
- if (ice_is_e810(hw))
+ switch (hw->phy_model) {
+ case ICE_PHY_E810:
return ice_clear_phy_tstamp_e810(hw, block, idx);
- else
+ case ICE_PHY_E822:
return ice_clear_phy_tstamp_e822(hw, block, idx);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+/**
+ * ice_is_phy_rclk_present - check recovered clk presence
+ * @hw: pointer to the hw struct
+ *
+ * Check if the PHY Recovered Clock device is present in the netlist
+ * Return:
+ * * true - device found in netlist
+ * * false - device not found
+ */
+bool ice_is_phy_rclk_present(struct ice_hw *hw)
+{
+ if (ice_find_netlist_node(hw, ICE_AQC_LINK_TOPO_NODE_TYPE_CLK_CTRL,
+ ICE_AQC_GET_LINK_TOPO_NODE_NR_C827, NULL) &&
+ ice_find_netlist_node(hw, ICE_AQC_LINK_TOPO_NODE_TYPE_CLK_CTRL,
+ ICE_AQC_GET_LINK_TOPO_NODE_NR_E822_PHY, NULL))
+ return false;
+
+ return true;
+}
+
+/**
+ * ice_is_clock_mux_present_e810t
+ * @hw: pointer to the hw struct
+ *
+ * Check if the Clock Multiplexer device is present in the netlist
+ * Return:
+ * * true - device found in netlist
+ * * false - device not found
+ */
+bool ice_is_clock_mux_present_e810t(struct ice_hw *hw)
+{
+ if (ice_find_netlist_node(hw, ICE_AQC_LINK_TOPO_NODE_TYPE_CLK_MUX,
+ ICE_AQC_GET_LINK_TOPO_NODE_NR_GEN_CLK_MUX,
+ NULL))
+ return false;
+
+ return true;
+}
+
+/**
+ * ice_get_pf_c827_idx - find and return the C827 index for the current pf
+ * @hw: pointer to the hw struct
+ * @idx: index of the found C827 PHY
+ * Return:
+ * * 0 - success
+ * * negative - failure
+ */
+int ice_get_pf_c827_idx(struct ice_hw *hw, u8 *idx)
+{
+ struct ice_aqc_get_link_topo cmd;
+ u8 node_part_number;
+ u16 node_handle;
+ int status;
+ u8 ctx;
+
+ if (hw->mac_type != ICE_MAC_E810)
+ return -ENODEV;
+
+ if (hw->device_id != ICE_DEV_ID_E810C_QSFP) {
+ *idx = C827_0;
+ return 0;
+ }
+
+ memset(&cmd, 0, sizeof(cmd));
+
+ ctx = ICE_AQC_LINK_TOPO_NODE_TYPE_PHY << ICE_AQC_LINK_TOPO_NODE_TYPE_S;
+ ctx |= ICE_AQC_LINK_TOPO_NODE_CTX_PORT << ICE_AQC_LINK_TOPO_NODE_CTX_S;
+ cmd.addr.topo_params.node_type_ctx = ctx;
+
+ status = ice_aq_get_netlist_node(hw, &cmd, &node_part_number,
+ &node_handle);
+ if (status || node_part_number != ICE_AQC_GET_LINK_TOPO_NODE_NR_C827)
+ return -ENOENT;
+
+ if (node_handle == E810C_QSFP_C827_0_HANDLE)
+ *idx = C827_0;
+ else if (node_handle == E810C_QSFP_C827_1_HANDLE)
+ *idx = C827_1;
+ else
+ return -EIO;
+
+ return 0;
}
/**
@@ -3359,10 +3646,14 @@ int ice_clear_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx)
*/
void ice_ptp_reset_ts_memory(struct ice_hw *hw)
{
- if (ice_is_e810(hw))
+ switch (hw->phy_model) {
+ case ICE_PHY_E822:
+ ice_ptp_reset_ts_memory_e822(hw);
+ break;
+ case ICE_PHY_E810:
+ default:
return;
-
- ice_ptp_reset_ts_memory_e822(hw);
+ }
}
/**
@@ -3381,10 +3672,14 @@ int ice_ptp_init_phc(struct ice_hw *hw)
/* Clear event err indications for auxiliary pins */
(void)rd32(hw, GLTSYN_STAT(src_idx));
- if (ice_is_e810(hw))
+ switch (hw->phy_model) {
+ case ICE_PHY_E810:
return ice_ptp_init_phc_e810(hw);
- else
+ case ICE_PHY_E822:
return ice_ptp_init_phc_e822(hw);
+ default:
+ return -EOPNOTSUPP;
+ }
}
/**
@@ -3400,10 +3695,335 @@ int ice_ptp_init_phc(struct ice_hw *hw)
*/
int ice_get_phy_tx_tstamp_ready(struct ice_hw *hw, u8 block, u64 *tstamp_ready)
{
- if (ice_is_e810(hw))
+ switch (hw->phy_model) {
+ case ICE_PHY_E810:
return ice_get_phy_tx_tstamp_ready_e810(hw, block,
tstamp_ready);
- else
+ case ICE_PHY_E822:
return ice_get_phy_tx_tstamp_ready_e822(hw, block,
tstamp_ready);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+/**
+ * ice_is_cgu_present - check for CGU presence
+ * @hw: pointer to the hw struct
+ *
+ * Check if the Clock Generation Unit (CGU) device is present in the netlist
+ * Return:
+ * * true - cgu is present
+ * * false - cgu is not present
+ */
+bool ice_is_cgu_present(struct ice_hw *hw)
+{
+ if (!ice_find_netlist_node(hw, ICE_AQC_LINK_TOPO_NODE_TYPE_CLK_CTRL,
+ ICE_AQC_GET_LINK_TOPO_NODE_NR_ZL30632_80032,
+ NULL)) {
+ hw->cgu_part_number = ICE_AQC_GET_LINK_TOPO_NODE_NR_ZL30632_80032;
+ return true;
+ } else if (!ice_find_netlist_node(hw,
+ ICE_AQC_LINK_TOPO_NODE_TYPE_CLK_CTRL,
+ ICE_AQC_GET_LINK_TOPO_NODE_NR_SI5383_5384,
+ NULL)) {
+ hw->cgu_part_number = ICE_AQC_GET_LINK_TOPO_NODE_NR_SI5383_5384;
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * ice_cgu_get_pin_desc_e823 - get pin description array
+ * @hw: pointer to the hw struct
+ * @input: if request is done against input or output pin
+ * @size: number of inputs/outputs
+ *
+ * Return: pointer to pin description array associated to given hw.
+ */
+static const struct ice_cgu_pin_desc *
+ice_cgu_get_pin_desc_e823(struct ice_hw *hw, bool input, int *size)
+{
+ static const struct ice_cgu_pin_desc *t;
+
+ if (hw->cgu_part_number ==
+ ICE_AQC_GET_LINK_TOPO_NODE_NR_ZL30632_80032) {
+ if (input) {
+ t = ice_e823_zl_cgu_inputs;
+ *size = ARRAY_SIZE(ice_e823_zl_cgu_inputs);
+ } else {
+ t = ice_e823_zl_cgu_outputs;
+ *size = ARRAY_SIZE(ice_e823_zl_cgu_outputs);
+ }
+ } else if (hw->cgu_part_number ==
+ ICE_AQC_GET_LINK_TOPO_NODE_NR_SI5383_5384) {
+ if (input) {
+ t = ice_e823_si_cgu_inputs;
+ *size = ARRAY_SIZE(ice_e823_si_cgu_inputs);
+ } else {
+ t = ice_e823_si_cgu_outputs;
+ *size = ARRAY_SIZE(ice_e823_si_cgu_outputs);
+ }
+ } else {
+ t = NULL;
+ *size = 0;
+ }
+
+ return t;
+}
+
+/**
+ * ice_cgu_get_pin_desc - get pin description array
+ * @hw: pointer to the hw struct
+ * @input: if request is done against input or output pins
+ * @size: size of array returned by function
+ *
+ * Return: pointer to pin description array associated to given hw.
+ */
+static const struct ice_cgu_pin_desc *
+ice_cgu_get_pin_desc(struct ice_hw *hw, bool input, int *size)
+{
+ const struct ice_cgu_pin_desc *t = NULL;
+
+ switch (hw->device_id) {
+ case ICE_DEV_ID_E810C_SFP:
+ if (input) {
+ t = ice_e810t_sfp_cgu_inputs;
+ *size = ARRAY_SIZE(ice_e810t_sfp_cgu_inputs);
+ } else {
+ t = ice_e810t_sfp_cgu_outputs;
+ *size = ARRAY_SIZE(ice_e810t_sfp_cgu_outputs);
+ }
+ break;
+ case ICE_DEV_ID_E810C_QSFP:
+ if (input) {
+ t = ice_e810t_qsfp_cgu_inputs;
+ *size = ARRAY_SIZE(ice_e810t_qsfp_cgu_inputs);
+ } else {
+ t = ice_e810t_qsfp_cgu_outputs;
+ *size = ARRAY_SIZE(ice_e810t_qsfp_cgu_outputs);
+ }
+ break;
+ case ICE_DEV_ID_E823L_10G_BASE_T:
+ case ICE_DEV_ID_E823L_1GBE:
+ case ICE_DEV_ID_E823L_BACKPLANE:
+ case ICE_DEV_ID_E823L_QSFP:
+ case ICE_DEV_ID_E823L_SFP:
+ case ICE_DEV_ID_E823C_10G_BASE_T:
+ case ICE_DEV_ID_E823C_BACKPLANE:
+ case ICE_DEV_ID_E823C_QSFP:
+ case ICE_DEV_ID_E823C_SFP:
+ case ICE_DEV_ID_E823C_SGMII:
+ t = ice_cgu_get_pin_desc_e823(hw, input, size);
+ break;
+ default:
+ break;
+ }
+
+ return t;
+}
+
+/**
+ * ice_cgu_get_pin_type - get pin's type
+ * @hw: pointer to the hw struct
+ * @pin: pin index
+ * @input: if request is done against input or output pin
+ *
+ * Return: type of a pin.
+ */
+enum dpll_pin_type ice_cgu_get_pin_type(struct ice_hw *hw, u8 pin, bool input)
+{
+ const struct ice_cgu_pin_desc *t;
+ int t_size;
+
+ t = ice_cgu_get_pin_desc(hw, input, &t_size);
+
+ if (!t)
+ return 0;
+
+ if (pin >= t_size)
+ return 0;
+
+ return t[pin].type;
+}
+
+/**
+ * ice_cgu_get_pin_freq_supp - get pin's supported frequency
+ * @hw: pointer to the hw struct
+ * @pin: pin index
+ * @input: if request is done against input or output pin
+ * @num: output number of supported frequencies
+ *
+ * Get frequency supported number and array of supported frequencies.
+ *
+ * Return: array of supported frequencies for given pin.
+ */
+struct dpll_pin_frequency *
+ice_cgu_get_pin_freq_supp(struct ice_hw *hw, u8 pin, bool input, u8 *num)
+{
+ const struct ice_cgu_pin_desc *t;
+ int t_size;
+
+ *num = 0;
+ t = ice_cgu_get_pin_desc(hw, input, &t_size);
+ if (!t)
+ return NULL;
+ if (pin >= t_size)
+ return NULL;
+ *num = t[pin].freq_supp_num;
+
+ return t[pin].freq_supp;
+}
+
+/**
+ * ice_cgu_get_pin_name - get pin's name
+ * @hw: pointer to the hw struct
+ * @pin: pin index
+ * @input: if request is done against input or output pin
+ *
+ * Return:
+ * * null terminated char array with name
+ * * NULL in case of failure
+ */
+const char *ice_cgu_get_pin_name(struct ice_hw *hw, u8 pin, bool input)
+{
+ const struct ice_cgu_pin_desc *t;
+ int t_size;
+
+ t = ice_cgu_get_pin_desc(hw, input, &t_size);
+
+ if (!t)
+ return NULL;
+
+ if (pin >= t_size)
+ return NULL;
+
+ return t[pin].name;
+}
+
+/**
+ * ice_get_cgu_state - get the state of the DPLL
+ * @hw: pointer to the hw struct
+ * @dpll_idx: Index of internal DPLL unit
+ * @last_dpll_state: last known state of DPLL
+ * @pin: pointer to a buffer for returning currently active pin
+ * @ref_state: reference clock state
+ * @eec_mode: eec mode of the DPLL
+ * @phase_offset: pointer to a buffer for returning phase offset
+ * @dpll_state: state of the DPLL (output)
+ *
+ * This function will read the state of the DPLL(dpll_idx). Non-null
+ * 'pin', 'ref_state', 'eec_mode' and 'phase_offset' parameters are used to
+ * retrieve currently active pin, state, mode and phase_offset respectively.
+ *
+ * Return: state of the DPLL
+ */
+int ice_get_cgu_state(struct ice_hw *hw, u8 dpll_idx,
+ enum dpll_lock_status last_dpll_state, u8 *pin,
+ u8 *ref_state, u8 *eec_mode, s64 *phase_offset,
+ enum dpll_lock_status *dpll_state)
+{
+ u8 hw_ref_state, hw_dpll_state, hw_eec_mode, hw_config;
+ s64 hw_phase_offset;
+ int status;
+
+ status = ice_aq_get_cgu_dpll_status(hw, dpll_idx, &hw_ref_state,
+ &hw_dpll_state, &hw_config,
+ &hw_phase_offset, &hw_eec_mode);
+ if (status)
+ return status;
+
+ if (pin)
+ /* current ref pin in dpll_state_refsel_status_X register */
+ *pin = hw_config & ICE_AQC_GET_CGU_DPLL_CONFIG_CLK_REF_SEL;
+ if (phase_offset)
+ *phase_offset = hw_phase_offset;
+ if (ref_state)
+ *ref_state = hw_ref_state;
+ if (eec_mode)
+ *eec_mode = hw_eec_mode;
+ if (!dpll_state)
+ return 0;
+
+ /* According to ZL DPLL documentation, once state reach LOCKED_HO_ACQ
+ * it would never return to FREERUN. This aligns to ITU-T G.781
+ * Recommendation. We cannot report HOLDOVER as HO memory is cleared
+ * while switching to another reference.
+ * Only for situations where previous state was either: "LOCKED without
+ * HO_ACQ" or "HOLDOVER" we actually back to FREERUN.
+ */
+ if (hw_dpll_state & ICE_AQC_GET_CGU_DPLL_STATUS_STATE_LOCK) {
+ if (hw_dpll_state & ICE_AQC_GET_CGU_DPLL_STATUS_STATE_HO_READY)
+ *dpll_state = DPLL_LOCK_STATUS_LOCKED_HO_ACQ;
+ else
+ *dpll_state = DPLL_LOCK_STATUS_LOCKED;
+ } else if (last_dpll_state == DPLL_LOCK_STATUS_LOCKED_HO_ACQ ||
+ last_dpll_state == DPLL_LOCK_STATUS_HOLDOVER) {
+ *dpll_state = DPLL_LOCK_STATUS_HOLDOVER;
+ } else {
+ *dpll_state = DPLL_LOCK_STATUS_UNLOCKED;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_get_cgu_rclk_pin_info - get info on available recovered clock pins
+ * @hw: pointer to the hw struct
+ * @base_idx: returns index of first recovered clock pin on device
+ * @pin_num: returns number of recovered clock pins available on device
+ *
+ * Based on hw provide caller info about recovery clock pins available on the
+ * board.
+ *
+ * Return:
+ * * 0 - success, information is valid
+ * * negative - failure, information is not valid
+ */
+int ice_get_cgu_rclk_pin_info(struct ice_hw *hw, u8 *base_idx, u8 *pin_num)
+{
+ u8 phy_idx;
+ int ret;
+
+ switch (hw->device_id) {
+ case ICE_DEV_ID_E810C_SFP:
+ case ICE_DEV_ID_E810C_QSFP:
+
+ ret = ice_get_pf_c827_idx(hw, &phy_idx);
+ if (ret)
+ return ret;
+ *base_idx = E810T_CGU_INPUT_C827(phy_idx, ICE_RCLKA_PIN);
+ *pin_num = ICE_E810_RCLK_PINS_NUM;
+ ret = 0;
+ break;
+ case ICE_DEV_ID_E823L_10G_BASE_T:
+ case ICE_DEV_ID_E823L_1GBE:
+ case ICE_DEV_ID_E823L_BACKPLANE:
+ case ICE_DEV_ID_E823L_QSFP:
+ case ICE_DEV_ID_E823L_SFP:
+ case ICE_DEV_ID_E823C_10G_BASE_T:
+ case ICE_DEV_ID_E823C_BACKPLANE:
+ case ICE_DEV_ID_E823C_QSFP:
+ case ICE_DEV_ID_E823C_SFP:
+ case ICE_DEV_ID_E823C_SGMII:
+ *pin_num = ICE_E822_RCLK_PINS_NUM;
+ ret = 0;
+ if (hw->cgu_part_number ==
+ ICE_AQC_GET_LINK_TOPO_NODE_NR_ZL30632_80032)
+ *base_idx = ZL_REF1P;
+ else if (hw->cgu_part_number ==
+ ICE_AQC_GET_LINK_TOPO_NODE_NR_SI5383_5384)
+ *base_idx = SI_REF1P;
+ else
+ ret = -ENODEV;
+
+ break;
+ default:
+ ret = -ENODEV;
+ break;
+ }
+
+ return ret;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h
index 9aa10b0426fd..6f277e7b06b9 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h
+++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h
@@ -3,13 +3,14 @@
#ifndef _ICE_PTP_HW_H_
#define _ICE_PTP_HW_H_
+#include <linux/dpll.h>
enum ice_ptp_tmr_cmd {
- INIT_TIME,
- INIT_INCVAL,
- ADJ_TIME,
- ADJ_TIME_AT_TIME,
- READ_TIME,
+ ICE_PTP_INIT_TIME,
+ ICE_PTP_INIT_INCVAL,
+ ICE_PTP_ADJ_TIME,
+ ICE_PTP_ADJ_TIME_AT_TIME,
+ ICE_PTP_READ_TIME,
ICE_PTP_NOP,
};
@@ -110,6 +111,77 @@ struct ice_cgu_pll_params_e822 {
u32 post_pll_div;
};
+#define E810C_QSFP_C827_0_HANDLE 2
+#define E810C_QSFP_C827_1_HANDLE 3
+enum ice_e810_c827_idx {
+ C827_0,
+ C827_1
+};
+
+enum ice_phy_rclk_pins {
+ ICE_RCLKA_PIN = 0, /* SCL pin */
+ ICE_RCLKB_PIN, /* SDA pin */
+};
+
+#define ICE_E810_RCLK_PINS_NUM (ICE_RCLKB_PIN + 1)
+#define ICE_E822_RCLK_PINS_NUM (ICE_RCLKA_PIN + 1)
+#define E810T_CGU_INPUT_C827(_phy, _pin) ((_phy) * ICE_E810_RCLK_PINS_NUM + \
+ (_pin) + ZL_REF1P)
+
+enum ice_zl_cgu_in_pins {
+ ZL_REF0P = 0,
+ ZL_REF0N,
+ ZL_REF1P,
+ ZL_REF1N,
+ ZL_REF2P,
+ ZL_REF2N,
+ ZL_REF3P,
+ ZL_REF3N,
+ ZL_REF4P,
+ ZL_REF4N,
+ NUM_ZL_CGU_INPUT_PINS
+};
+
+enum ice_zl_cgu_out_pins {
+ ZL_OUT0 = 0,
+ ZL_OUT1,
+ ZL_OUT2,
+ ZL_OUT3,
+ ZL_OUT4,
+ ZL_OUT5,
+ ZL_OUT6,
+ NUM_ZL_CGU_OUTPUT_PINS
+};
+
+enum ice_si_cgu_in_pins {
+ SI_REF0P = 0,
+ SI_REF0N,
+ SI_REF1P,
+ SI_REF1N,
+ SI_REF2P,
+ SI_REF2N,
+ SI_REF3,
+ SI_REF4,
+ NUM_SI_CGU_INPUT_PINS
+};
+
+enum ice_si_cgu_out_pins {
+ SI_OUT0 = 0,
+ SI_OUT1,
+ SI_OUT2,
+ SI_OUT3,
+ SI_OUT4,
+ NUM_SI_CGU_OUTPUT_PINS
+};
+
+struct ice_cgu_pin_desc {
+ char *name;
+ u8 index;
+ enum dpll_pin_type type;
+ u32 freq_supp_num;
+ struct dpll_pin_frequency *freq_supp;
+};
+
extern const struct
ice_cgu_pll_params_e822 e822_cgu_params[NUM_ICE_TIME_REF_FREQ];
@@ -131,6 +203,7 @@ extern const struct ice_vernier_info_e822 e822_vernier[NUM_ICE_PTP_LNK_SPD];
u8 ice_get_ptp_src_clock_index(struct ice_hw *hw);
bool ice_ptp_lock(struct ice_hw *hw);
void ice_ptp_unlock(struct ice_hw *hw);
+void ice_ptp_src_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd);
int ice_ptp_init_time(struct ice_hw *hw, u64 time);
int ice_ptp_write_incval(struct ice_hw *hw, u64 incval);
int ice_ptp_write_incval_locked(struct ice_hw *hw, u64 incval);
@@ -197,6 +270,22 @@ int ice_ptp_init_phy_e810(struct ice_hw *hw);
int ice_read_sma_ctrl_e810t(struct ice_hw *hw, u8 *data);
int ice_write_sma_ctrl_e810t(struct ice_hw *hw, u8 data);
int ice_read_pca9575_reg_e810t(struct ice_hw *hw, u8 offset, u8 *data);
+bool ice_is_pca9575_present(struct ice_hw *hw);
+bool ice_is_phy_rclk_present(struct ice_hw *hw);
+bool ice_is_clock_mux_present_e810t(struct ice_hw *hw);
+int ice_get_pf_c827_idx(struct ice_hw *hw, u8 *idx);
+bool ice_is_cgu_present(struct ice_hw *hw);
+enum dpll_pin_type ice_cgu_get_pin_type(struct ice_hw *hw, u8 pin, bool input);
+struct dpll_pin_frequency *
+ice_cgu_get_pin_freq_supp(struct ice_hw *hw, u8 pin, bool input, u8 *num);
+const char *ice_cgu_get_pin_name(struct ice_hw *hw, u8 pin, bool input);
+int ice_get_cgu_state(struct ice_hw *hw, u8 dpll_idx,
+ enum dpll_lock_status last_dpll_state, u8 *pin,
+ u8 *ref_state, u8 *eec_mode, s64 *phase_offset,
+ enum dpll_lock_status *dpll_state);
+int ice_get_cgu_rclk_pin_info(struct ice_hw *hw, u8 *base_idx, u8 *pin_num);
+
+void ice_ptp_init_phy_model(struct ice_hw *hw);
#define PFTSYN_SEM_BYTES 4
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 5e353b0cbe6f..bb5d8b681bc2 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -822,6 +822,13 @@ struct ice_mbx_data {
u16 async_watermark_val;
};
+/* PHY model */
+enum ice_phy_model {
+ ICE_PHY_UNSUP = -1,
+ ICE_PHY_E810 = 1,
+ ICE_PHY_E822,
+};
+
/* Port hardware description */
struct ice_hw {
u8 __iomem *hw_addr;
@@ -843,6 +850,7 @@ struct ice_hw {
u8 revision_id;
u8 pf_id; /* device profile info */
+ enum ice_phy_model phy_model;
u16 max_burst_size; /* driver sets this value */
@@ -901,13 +909,13 @@ struct ice_hw {
/* INTRL granularity in 1 us */
u8 intrl_gran;
-#define ICE_PHY_PER_NAC 1
-#define ICE_MAX_QUAD 2
-#define ICE_NUM_QUAD_TYPE 2
-#define ICE_PORTS_PER_QUAD 4
-#define ICE_PHY_0_LAST_QUAD 1
-#define ICE_PORTS_PER_PHY 8
-#define ICE_NUM_EXTERNAL_PORTS ICE_PORTS_PER_PHY
+#define ICE_PHY_PER_NAC_E822 1
+#define ICE_MAX_QUAD 2
+#define ICE_QUADS_PER_PHY_E822 2
+#define ICE_PORTS_PER_PHY_E822 8
+#define ICE_PORTS_PER_QUAD 4
+#define ICE_PORTS_PER_PHY_E810 4
+#define ICE_NUM_EXTERNAL_PORTS (ICE_MAX_QUAD * ICE_PORTS_PER_QUAD)
/* Active package version (currently active) */
struct ice_pkg_ver active_pkg_ver;
@@ -965,6 +973,7 @@ struct ice_hw {
DECLARE_BITMAP(hw_ptype, ICE_FLOW_PTYPE_MAX);
u8 dvm_ena;
u16 io_expander_handle;
+ u8 cgu_part_number;
};
/* Statistics collected by each port, VSI, VEB, and S-channel */
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.h b/drivers/net/ethernet/intel/ice/ice_vf_lib.h
index 48fea6fa0362..31a082e8a827 100644
--- a/drivers/net/ethernet/intel/ice/ice_vf_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.h
@@ -123,6 +123,9 @@ struct ice_vf {
u8 num_req_qs; /* num of queue pairs requested by VF */
u16 num_mac;
u16 num_vf_qs; /* num of queue configured per VF */
+ u8 vlan_strip_ena; /* Outer and Inner VLAN strip enable */
+#define ICE_INNER_VLAN_STRIP_ENA BIT(0)
+#define ICE_OUTER_VLAN_STRIP_ENA BIT(1)
struct ice_mdd_vf_events mdd_rx_events;
struct ice_mdd_vf_events mdd_tx_events;
DECLARE_BITMAP(opcodes_allowlist, VIRTCHNL_OP_MAX);
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
index db97353efd06..01e88b6e43a1 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
@@ -486,6 +486,9 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg)
if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_REQ_QUEUES)
vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_REQ_QUEUES;
+ if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_CRC)
+ vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_CRC;
+
if (vf->driver_caps & VIRTCHNL_VF_CAP_ADV_LINK_SPEED)
vfres->vf_cap_flags |= VIRTCHNL_VF_CAP_ADV_LINK_SPEED;
@@ -1621,6 +1624,15 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
}
for (i = 0; i < qci->num_queue_pairs; i++) {
+ if (!qci->qpair[i].rxq.crc_disable)
+ continue;
+
+ if (!(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_CRC) ||
+ vf->vlan_strip_ena)
+ goto error_param;
+ }
+
+ for (i = 0; i < qci->num_queue_pairs; i++) {
qpi = &qci->qpair[i];
if (qpi->txq.vsi_id != qci->vsi_id ||
qpi->rxq.vsi_id != qci->vsi_id ||
@@ -1666,6 +1678,13 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
vsi->rx_rings[i]->dma = qpi->rxq.dma_ring_addr;
vsi->rx_rings[i]->count = qpi->rxq.ring_len;
+ if (qpi->rxq.crc_disable)
+ vsi->rx_rings[q_idx]->flags |=
+ ICE_RX_FLAGS_CRC_STRIP_DIS;
+ else
+ vsi->rx_rings[q_idx]->flags &=
+ ~ICE_RX_FLAGS_CRC_STRIP_DIS;
+
if (qpi->rxq.databuffer_size != 0 &&
(qpi->rxq.databuffer_size > ((16 * 1024) - 128) ||
qpi->rxq.databuffer_size < 1024))
@@ -2411,6 +2430,21 @@ static int ice_vc_remove_vlan_msg(struct ice_vf *vf, u8 *msg)
}
/**
+ * ice_vsi_is_rxq_crc_strip_dis - check if Rx queue CRC strip is disabled or not
+ * @vsi: pointer to the VF VSI info
+ */
+static bool ice_vsi_is_rxq_crc_strip_dis(struct ice_vsi *vsi)
+{
+ unsigned int i;
+
+ ice_for_each_alloc_rxq(vsi, i)
+ if (vsi->rx_rings[i]->flags & ICE_RX_FLAGS_CRC_STRIP_DIS)
+ return true;
+
+ return false;
+}
+
+/**
* ice_vc_ena_vlan_stripping
* @vf: pointer to the VF info
*
@@ -2439,6 +2473,8 @@ static int ice_vc_ena_vlan_stripping(struct ice_vf *vf)
if (vsi->inner_vlan_ops.ena_stripping(vsi, ETH_P_8021Q))
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ else
+ vf->vlan_strip_ena |= ICE_INNER_VLAN_STRIP_ENA;
error_param:
return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_VLAN_STRIPPING,
@@ -2474,6 +2510,8 @@ static int ice_vc_dis_vlan_stripping(struct ice_vf *vf)
if (vsi->inner_vlan_ops.dis_stripping(vsi))
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ else
+ vf->vlan_strip_ena &= ~ICE_INNER_VLAN_STRIP_ENA;
error_param:
return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING,
@@ -2651,6 +2689,8 @@ static int ice_vf_init_vlan_stripping(struct ice_vf *vf)
{
struct ice_vsi *vsi = ice_get_vf_vsi(vf);
+ vf->vlan_strip_ena = 0;
+
if (!vsi)
return -EINVAL;
@@ -2660,10 +2700,16 @@ static int ice_vf_init_vlan_stripping(struct ice_vf *vf)
if (ice_vf_is_port_vlan_ena(vf) && !ice_is_dvm_ena(&vsi->back->hw))
return 0;
- if (ice_vf_vlan_offload_ena(vf->driver_caps))
- return vsi->inner_vlan_ops.ena_stripping(vsi, ETH_P_8021Q);
- else
- return vsi->inner_vlan_ops.dis_stripping(vsi);
+ if (ice_vf_vlan_offload_ena(vf->driver_caps)) {
+ int err;
+
+ err = vsi->inner_vlan_ops.ena_stripping(vsi, ETH_P_8021Q);
+ if (!err)
+ vf->vlan_strip_ena |= ICE_INNER_VLAN_STRIP_ENA;
+ return err;
+ }
+
+ return vsi->inner_vlan_ops.dis_stripping(vsi);
}
static u16 ice_vc_get_max_vlan_fltrs(struct ice_vf *vf)
@@ -3437,6 +3483,11 @@ static int ice_vc_ena_vlan_stripping_v2_msg(struct ice_vf *vf, u8 *msg)
goto out;
}
+ if (ice_vsi_is_rxq_crc_strip_dis(vsi)) {
+ v_ret = VIRTCHNL_STATUS_ERR_NOT_SUPPORTED;
+ goto out;
+ }
+
ethertype_setting = strip_msg->outer_ethertype_setting;
if (ethertype_setting) {
if (ice_vc_ena_vlan_offload(vsi,
@@ -3457,6 +3508,8 @@ static int ice_vc_ena_vlan_stripping_v2_msg(struct ice_vf *vf, u8 *msg)
* enabled, is extracted in L2TAG1.
*/
ice_vsi_update_l2tsel(vsi, l2tsel);
+
+ vf->vlan_strip_ena |= ICE_OUTER_VLAN_STRIP_ENA;
}
}
@@ -3468,6 +3521,9 @@ static int ice_vc_ena_vlan_stripping_v2_msg(struct ice_vf *vf, u8 *msg)
goto out;
}
+ if (ethertype_setting)
+ vf->vlan_strip_ena |= ICE_INNER_VLAN_STRIP_ENA;
+
out:
return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2,
v_ret, NULL, 0);
@@ -3529,6 +3585,8 @@ static int ice_vc_dis_vlan_stripping_v2_msg(struct ice_vf *vf, u8 *msg)
* in L2TAG1.
*/
ice_vsi_update_l2tsel(vsi, l2tsel);
+
+ vf->vlan_strip_ena &= ~ICE_OUTER_VLAN_STRIP_ENA;
}
}
@@ -3538,6 +3596,9 @@ static int ice_vc_dis_vlan_stripping_v2_msg(struct ice_vf *vf, u8 *msg)
goto out;
}
+ if (ethertype_setting)
+ vf->vlan_strip_ena &= ~ICE_INNER_VLAN_STRIP_ENA;
+
out:
return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2,
v_ret, NULL, 0);
diff --git a/drivers/net/ethernet/intel/idpf/Makefile b/drivers/net/ethernet/intel/idpf/Makefile
new file mode 100644
index 000000000000..6844ead2f3ac
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/Makefile
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) 2023 Intel Corporation
+
+# Makefile for Intel(R) Infrastructure Data Path Function Linux Driver
+
+obj-$(CONFIG_IDPF) += idpf.o
+
+idpf-y := \
+ idpf_controlq.o \
+ idpf_controlq_setup.o \
+ idpf_dev.o \
+ idpf_ethtool.o \
+ idpf_lib.o \
+ idpf_main.o \
+ idpf_singleq_txrx.o \
+ idpf_txrx.o \
+ idpf_virtchnl.o \
+ idpf_vf_dev.o
diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h
new file mode 100644
index 000000000000..bee73353b56a
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/idpf.h
@@ -0,0 +1,968 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2023 Intel Corporation */
+
+#ifndef _IDPF_H_
+#define _IDPF_H_
+
+/* Forward declaration */
+struct idpf_adapter;
+struct idpf_vport;
+struct idpf_vport_max_q;
+
+#include <net/pkt_sched.h>
+#include <linux/aer.h>
+#include <linux/etherdevice.h>
+#include <linux/pci.h>
+#include <linux/bitfield.h>
+#include <linux/sctp.h>
+#include <linux/ethtool.h>
+#include <net/gro.h>
+#include <linux/dim.h>
+
+#include "virtchnl2.h"
+#include "idpf_lan_txrx.h"
+#include "idpf_txrx.h"
+#include "idpf_controlq.h"
+
+#define GETMAXVAL(num_bits) GENMASK((num_bits) - 1, 0)
+
+#define IDPF_NO_FREE_SLOT 0xffff
+
+/* Default Mailbox settings */
+#define IDPF_NUM_FILTERS_PER_MSG 20
+#define IDPF_NUM_DFLT_MBX_Q 2 /* includes both TX and RX */
+#define IDPF_DFLT_MBX_Q_LEN 64
+#define IDPF_DFLT_MBX_ID -1
+/* maximum number of times to try before resetting mailbox */
+#define IDPF_MB_MAX_ERR 20
+#define IDPF_NUM_CHUNKS_PER_MSG(struct_sz, chunk_sz) \
+ ((IDPF_CTLQ_MAX_BUF_LEN - (struct_sz)) / (chunk_sz))
+#define IDPF_WAIT_FOR_EVENT_TIMEO_MIN 2000
+#define IDPF_WAIT_FOR_EVENT_TIMEO 60000
+
+#define IDPF_MAX_WAIT 500
+
+/* available message levels */
+#define IDPF_AVAIL_NETIF_M (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
+
+#define IDPF_DIM_PROFILE_SLOTS 5
+
+#define IDPF_VIRTCHNL_VERSION_MAJOR VIRTCHNL2_VERSION_MAJOR_2
+#define IDPF_VIRTCHNL_VERSION_MINOR VIRTCHNL2_VERSION_MINOR_0
+
+/**
+ * struct idpf_mac_filter
+ * @list: list member field
+ * @macaddr: MAC address
+ * @remove: filter should be removed (virtchnl)
+ * @add: filter should be added (virtchnl)
+ */
+struct idpf_mac_filter {
+ struct list_head list;
+ u8 macaddr[ETH_ALEN];
+ bool remove;
+ bool add;
+};
+
+/**
+ * enum idpf_state - State machine to handle bring up
+ * @__IDPF_STARTUP: Start the state machine
+ * @__IDPF_VER_CHECK: Negotiate virtchnl version
+ * @__IDPF_GET_CAPS: Negotiate capabilities
+ * @__IDPF_INIT_SW: Init based on given capabilities
+ * @__IDPF_STATE_LAST: Must be last, used to determine size
+ */
+enum idpf_state {
+ __IDPF_STARTUP,
+ __IDPF_VER_CHECK,
+ __IDPF_GET_CAPS,
+ __IDPF_INIT_SW,
+ __IDPF_STATE_LAST,
+};
+
+/**
+ * enum idpf_flags - Hard reset causes.
+ * @IDPF_HR_FUNC_RESET: Hard reset when TxRx timeout
+ * @IDPF_HR_DRV_LOAD: Set on driver load for a clean HW
+ * @IDPF_HR_RESET_IN_PROG: Reset in progress
+ * @IDPF_REMOVE_IN_PROG: Driver remove in progress
+ * @IDPF_MB_INTR_MODE: Mailbox in interrupt mode
+ * @IDPF_FLAGS_NBITS: Must be last
+ */
+enum idpf_flags {
+ IDPF_HR_FUNC_RESET,
+ IDPF_HR_DRV_LOAD,
+ IDPF_HR_RESET_IN_PROG,
+ IDPF_REMOVE_IN_PROG,
+ IDPF_MB_INTR_MODE,
+ IDPF_FLAGS_NBITS,
+};
+
+/**
+ * enum idpf_cap_field - Offsets into capabilities struct for specific caps
+ * @IDPF_BASE_CAPS: generic base capabilities
+ * @IDPF_CSUM_CAPS: checksum offload capabilities
+ * @IDPF_SEG_CAPS: segmentation offload capabilities
+ * @IDPF_RSS_CAPS: RSS offload capabilities
+ * @IDPF_HSPLIT_CAPS: Header split capabilities
+ * @IDPF_RSC_CAPS: RSC offload capabilities
+ * @IDPF_OTHER_CAPS: miscellaneous offloads
+ *
+ * Used when checking for a specific capability flag since different capability
+ * sets are not mutually exclusive numerically, the caller must specify which
+ * type of capability they are checking for.
+ */
+enum idpf_cap_field {
+ IDPF_BASE_CAPS = -1,
+ IDPF_CSUM_CAPS = offsetof(struct virtchnl2_get_capabilities,
+ csum_caps),
+ IDPF_SEG_CAPS = offsetof(struct virtchnl2_get_capabilities,
+ seg_caps),
+ IDPF_RSS_CAPS = offsetof(struct virtchnl2_get_capabilities,
+ rss_caps),
+ IDPF_HSPLIT_CAPS = offsetof(struct virtchnl2_get_capabilities,
+ hsplit_caps),
+ IDPF_RSC_CAPS = offsetof(struct virtchnl2_get_capabilities,
+ rsc_caps),
+ IDPF_OTHER_CAPS = offsetof(struct virtchnl2_get_capabilities,
+ other_caps),
+};
+
+/**
+ * enum idpf_vport_state - Current vport state
+ * @__IDPF_VPORT_DOWN: Vport is down
+ * @__IDPF_VPORT_UP: Vport is up
+ * @__IDPF_VPORT_STATE_LAST: Must be last, number of states
+ */
+enum idpf_vport_state {
+ __IDPF_VPORT_DOWN,
+ __IDPF_VPORT_UP,
+ __IDPF_VPORT_STATE_LAST,
+};
+
+/**
+ * struct idpf_netdev_priv - Struct to store vport back pointer
+ * @adapter: Adapter back pointer
+ * @vport: Vport back pointer
+ * @vport_id: Vport identifier
+ * @vport_idx: Relative vport index
+ * @state: See enum idpf_vport_state
+ * @netstats: Packet and byte stats
+ * @stats_lock: Lock to protect stats update
+ */
+struct idpf_netdev_priv {
+ struct idpf_adapter *adapter;
+ struct idpf_vport *vport;
+ u32 vport_id;
+ u16 vport_idx;
+ enum idpf_vport_state state;
+ struct rtnl_link_stats64 netstats;
+ spinlock_t stats_lock;
+};
+
+/**
+ * struct idpf_reset_reg - Reset register offsets/masks
+ * @rstat: Reset status register
+ * @rstat_m: Reset status mask
+ */
+struct idpf_reset_reg {
+ void __iomem *rstat;
+ u32 rstat_m;
+};
+
+/**
+ * struct idpf_vport_max_q - Queue limits
+ * @max_rxq: Maximum number of RX queues supported
+ * @max_txq: Maixmum number of TX queues supported
+ * @max_bufq: In splitq, maximum number of buffer queues supported
+ * @max_complq: In splitq, maximum number of completion queues supported
+ */
+struct idpf_vport_max_q {
+ u16 max_rxq;
+ u16 max_txq;
+ u16 max_bufq;
+ u16 max_complq;
+};
+
+/**
+ * struct idpf_reg_ops - Device specific register operation function pointers
+ * @ctlq_reg_init: Mailbox control queue register initialization
+ * @intr_reg_init: Traffic interrupt register initialization
+ * @mb_intr_reg_init: Mailbox interrupt register initialization
+ * @reset_reg_init: Reset register initialization
+ * @trigger_reset: Trigger a reset to occur
+ */
+struct idpf_reg_ops {
+ void (*ctlq_reg_init)(struct idpf_ctlq_create_info *cq);
+ int (*intr_reg_init)(struct idpf_vport *vport);
+ void (*mb_intr_reg_init)(struct idpf_adapter *adapter);
+ void (*reset_reg_init)(struct idpf_adapter *adapter);
+ void (*trigger_reset)(struct idpf_adapter *adapter,
+ enum idpf_flags trig_cause);
+};
+
+/**
+ * struct idpf_dev_ops - Device specific operations
+ * @reg_ops: Register operations
+ */
+struct idpf_dev_ops {
+ struct idpf_reg_ops reg_ops;
+};
+
+/* These macros allow us to generate an enum and a matching char * array of
+ * stringified enums that are always in sync. Checkpatch issues a bogus warning
+ * about this being a complex macro; but it's wrong, these are never used as a
+ * statement and instead only used to define the enum and array.
+ */
+#define IDPF_FOREACH_VPORT_VC_STATE(STATE) \
+ STATE(IDPF_VC_CREATE_VPORT) \
+ STATE(IDPF_VC_CREATE_VPORT_ERR) \
+ STATE(IDPF_VC_ENA_VPORT) \
+ STATE(IDPF_VC_ENA_VPORT_ERR) \
+ STATE(IDPF_VC_DIS_VPORT) \
+ STATE(IDPF_VC_DIS_VPORT_ERR) \
+ STATE(IDPF_VC_DESTROY_VPORT) \
+ STATE(IDPF_VC_DESTROY_VPORT_ERR) \
+ STATE(IDPF_VC_CONFIG_TXQ) \
+ STATE(IDPF_VC_CONFIG_TXQ_ERR) \
+ STATE(IDPF_VC_CONFIG_RXQ) \
+ STATE(IDPF_VC_CONFIG_RXQ_ERR) \
+ STATE(IDPF_VC_ENA_QUEUES) \
+ STATE(IDPF_VC_ENA_QUEUES_ERR) \
+ STATE(IDPF_VC_DIS_QUEUES) \
+ STATE(IDPF_VC_DIS_QUEUES_ERR) \
+ STATE(IDPF_VC_MAP_IRQ) \
+ STATE(IDPF_VC_MAP_IRQ_ERR) \
+ STATE(IDPF_VC_UNMAP_IRQ) \
+ STATE(IDPF_VC_UNMAP_IRQ_ERR) \
+ STATE(IDPF_VC_ADD_QUEUES) \
+ STATE(IDPF_VC_ADD_QUEUES_ERR) \
+ STATE(IDPF_VC_DEL_QUEUES) \
+ STATE(IDPF_VC_DEL_QUEUES_ERR) \
+ STATE(IDPF_VC_ALLOC_VECTORS) \
+ STATE(IDPF_VC_ALLOC_VECTORS_ERR) \
+ STATE(IDPF_VC_DEALLOC_VECTORS) \
+ STATE(IDPF_VC_DEALLOC_VECTORS_ERR) \
+ STATE(IDPF_VC_SET_SRIOV_VFS) \
+ STATE(IDPF_VC_SET_SRIOV_VFS_ERR) \
+ STATE(IDPF_VC_GET_RSS_LUT) \
+ STATE(IDPF_VC_GET_RSS_LUT_ERR) \
+ STATE(IDPF_VC_SET_RSS_LUT) \
+ STATE(IDPF_VC_SET_RSS_LUT_ERR) \
+ STATE(IDPF_VC_GET_RSS_KEY) \
+ STATE(IDPF_VC_GET_RSS_KEY_ERR) \
+ STATE(IDPF_VC_SET_RSS_KEY) \
+ STATE(IDPF_VC_SET_RSS_KEY_ERR) \
+ STATE(IDPF_VC_GET_STATS) \
+ STATE(IDPF_VC_GET_STATS_ERR) \
+ STATE(IDPF_VC_ADD_MAC_ADDR) \
+ STATE(IDPF_VC_ADD_MAC_ADDR_ERR) \
+ STATE(IDPF_VC_DEL_MAC_ADDR) \
+ STATE(IDPF_VC_DEL_MAC_ADDR_ERR) \
+ STATE(IDPF_VC_GET_PTYPE_INFO) \
+ STATE(IDPF_VC_GET_PTYPE_INFO_ERR) \
+ STATE(IDPF_VC_LOOPBACK_STATE) \
+ STATE(IDPF_VC_LOOPBACK_STATE_ERR) \
+ STATE(IDPF_VC_NBITS)
+
+#define IDPF_GEN_ENUM(ENUM) ENUM,
+#define IDPF_GEN_STRING(STRING) #STRING,
+
+enum idpf_vport_vc_state {
+ IDPF_FOREACH_VPORT_VC_STATE(IDPF_GEN_ENUM)
+};
+
+extern const char * const idpf_vport_vc_state_str[];
+
+/**
+ * enum idpf_vport_reset_cause - Vport soft reset causes
+ * @IDPF_SR_Q_CHANGE: Soft reset queue change
+ * @IDPF_SR_Q_DESC_CHANGE: Soft reset descriptor change
+ * @IDPF_SR_MTU_CHANGE: Soft reset MTU change
+ * @IDPF_SR_RSC_CHANGE: Soft reset RSC change
+ */
+enum idpf_vport_reset_cause {
+ IDPF_SR_Q_CHANGE,
+ IDPF_SR_Q_DESC_CHANGE,
+ IDPF_SR_MTU_CHANGE,
+ IDPF_SR_RSC_CHANGE,
+};
+
+/**
+ * enum idpf_vport_flags - Vport flags
+ * @IDPF_VPORT_DEL_QUEUES: To send delete queues message
+ * @IDPF_VPORT_SW_MARKER: Indicate TX pipe drain software marker packets
+ * processing is done
+ * @IDPF_VPORT_FLAGS_NBITS: Must be last
+ */
+enum idpf_vport_flags {
+ IDPF_VPORT_DEL_QUEUES,
+ IDPF_VPORT_SW_MARKER,
+ IDPF_VPORT_FLAGS_NBITS,
+};
+
+struct idpf_port_stats {
+ struct u64_stats_sync stats_sync;
+ u64_stats_t rx_hw_csum_err;
+ u64_stats_t rx_hsplit;
+ u64_stats_t rx_hsplit_hbo;
+ u64_stats_t rx_bad_descs;
+ u64_stats_t tx_linearize;
+ u64_stats_t tx_busy;
+ u64_stats_t tx_drops;
+ u64_stats_t tx_dma_map_errs;
+ struct virtchnl2_vport_stats vport_stats;
+};
+
+/**
+ * struct idpf_vport - Handle for netdevices and queue resources
+ * @num_txq: Number of allocated TX queues
+ * @num_complq: Number of allocated completion queues
+ * @txq_desc_count: TX queue descriptor count
+ * @complq_desc_count: Completion queue descriptor count
+ * @compln_clean_budget: Work budget for completion clean
+ * @num_txq_grp: Number of TX queue groups
+ * @txq_grps: Array of TX queue groups
+ * @txq_model: Split queue or single queue queuing model
+ * @txqs: Used only in hotpath to get to the right queue very fast
+ * @crc_enable: Enable CRC insertion offload
+ * @num_rxq: Number of allocated RX queues
+ * @num_bufq: Number of allocated buffer queues
+ * @rxq_desc_count: RX queue descriptor count. *MUST* have enough descriptors
+ * to complete all buffer descriptors for all buffer queues in
+ * the worst case.
+ * @num_bufqs_per_qgrp: Buffer queues per RX queue in a given grouping
+ * @bufq_desc_count: Buffer queue descriptor count
+ * @bufq_size: Size of buffers in ring (e.g. 2K, 4K, etc)
+ * @num_rxq_grp: Number of RX queues in a group
+ * @rxq_grps: Total number of RX groups. Number of groups * number of RX per
+ * group will yield total number of RX queues.
+ * @rxq_model: Splitq queue or single queue queuing model
+ * @rx_ptype_lkup: Lookup table for ptypes on RX
+ * @adapter: back pointer to associated adapter
+ * @netdev: Associated net_device. Each vport should have one and only one
+ * associated netdev.
+ * @flags: See enum idpf_vport_flags
+ * @vport_type: Default SRIOV, SIOV, etc.
+ * @vport_id: Device given vport identifier
+ * @idx: Software index in adapter vports struct
+ * @default_vport: Use this vport if one isn't specified
+ * @base_rxd: True if the driver should use base descriptors instead of flex
+ * @num_q_vectors: Number of IRQ vectors allocated
+ * @q_vectors: Array of queue vectors
+ * @q_vector_idxs: Starting index of queue vectors
+ * @max_mtu: device given max possible MTU
+ * @default_mac_addr: device will give a default MAC to use
+ * @rx_itr_profile: RX profiles for Dynamic Interrupt Moderation
+ * @tx_itr_profile: TX profiles for Dynamic Interrupt Moderation
+ * @port_stats: per port csum, header split, and other offload stats
+ * @link_up: True if link is up
+ * @link_speed_mbps: Link speed in mbps
+ * @vc_msg: Virtchnl message buffer
+ * @vc_state: Virtchnl message state
+ * @vchnl_wq: Wait queue for virtchnl messages
+ * @sw_marker_wq: workqueue for marker packets
+ * @vc_buf_lock: Lock to protect virtchnl buffer
+ */
+struct idpf_vport {
+ u16 num_txq;
+ u16 num_complq;
+ u32 txq_desc_count;
+ u32 complq_desc_count;
+ u32 compln_clean_budget;
+ u16 num_txq_grp;
+ struct idpf_txq_group *txq_grps;
+ u32 txq_model;
+ struct idpf_queue **txqs;
+ bool crc_enable;
+
+ u16 num_rxq;
+ u16 num_bufq;
+ u32 rxq_desc_count;
+ u8 num_bufqs_per_qgrp;
+ u32 bufq_desc_count[IDPF_MAX_BUFQS_PER_RXQ_GRP];
+ u32 bufq_size[IDPF_MAX_BUFQS_PER_RXQ_GRP];
+ u16 num_rxq_grp;
+ struct idpf_rxq_group *rxq_grps;
+ u32 rxq_model;
+ struct idpf_rx_ptype_decoded rx_ptype_lkup[IDPF_RX_MAX_PTYPE];
+
+ struct idpf_adapter *adapter;
+ struct net_device *netdev;
+ DECLARE_BITMAP(flags, IDPF_VPORT_FLAGS_NBITS);
+ u16 vport_type;
+ u32 vport_id;
+ u16 idx;
+ bool default_vport;
+ bool base_rxd;
+
+ u16 num_q_vectors;
+ struct idpf_q_vector *q_vectors;
+ u16 *q_vector_idxs;
+ u16 max_mtu;
+ u8 default_mac_addr[ETH_ALEN];
+ u16 rx_itr_profile[IDPF_DIM_PROFILE_SLOTS];
+ u16 tx_itr_profile[IDPF_DIM_PROFILE_SLOTS];
+ struct idpf_port_stats port_stats;
+
+ bool link_up;
+ u32 link_speed_mbps;
+
+ char vc_msg[IDPF_CTLQ_MAX_BUF_LEN];
+ DECLARE_BITMAP(vc_state, IDPF_VC_NBITS);
+
+ wait_queue_head_t vchnl_wq;
+ wait_queue_head_t sw_marker_wq;
+ struct mutex vc_buf_lock;
+};
+
+/**
+ * enum idpf_user_flags
+ * @__IDPF_PROMISC_UC: Unicast promiscuous mode
+ * @__IDPF_PROMISC_MC: Multicast promiscuous mode
+ * @__IDPF_USER_FLAGS_NBITS: Must be last
+ */
+enum idpf_user_flags {
+ __IDPF_PROMISC_UC = 32,
+ __IDPF_PROMISC_MC,
+
+ __IDPF_USER_FLAGS_NBITS,
+};
+
+/**
+ * struct idpf_rss_data - Associated RSS data
+ * @rss_key_size: Size of RSS hash key
+ * @rss_key: RSS hash key
+ * @rss_lut_size: Size of RSS lookup table
+ * @rss_lut: RSS lookup table
+ * @cached_lut: Used to restore previously init RSS lut
+ */
+struct idpf_rss_data {
+ u16 rss_key_size;
+ u8 *rss_key;
+ u16 rss_lut_size;
+ u32 *rss_lut;
+ u32 *cached_lut;
+};
+
+/**
+ * struct idpf_vport_user_config_data - User defined configuration values for
+ * each vport.
+ * @rss_data: See struct idpf_rss_data
+ * @num_req_tx_qs: Number of user requested TX queues through ethtool
+ * @num_req_rx_qs: Number of user requested RX queues through ethtool
+ * @num_req_txq_desc: Number of user requested TX queue descriptors through
+ * ethtool
+ * @num_req_rxq_desc: Number of user requested RX queue descriptors through
+ * ethtool
+ * @user_flags: User toggled config flags
+ * @mac_filter_list: List of MAC filters
+ *
+ * Used to restore configuration after a reset as the vport will get wiped.
+ */
+struct idpf_vport_user_config_data {
+ struct idpf_rss_data rss_data;
+ u16 num_req_tx_qs;
+ u16 num_req_rx_qs;
+ u32 num_req_txq_desc;
+ u32 num_req_rxq_desc;
+ DECLARE_BITMAP(user_flags, __IDPF_USER_FLAGS_NBITS);
+ struct list_head mac_filter_list;
+};
+
+/**
+ * enum idpf_vport_config_flags - Vport config flags
+ * @IDPF_VPORT_REG_NETDEV: Register netdev
+ * @IDPF_VPORT_UP_REQUESTED: Set if interface up is requested on core reset
+ * @IDPF_VPORT_ADD_MAC_REQ: Asynchronous add ether address in flight
+ * @IDPF_VPORT_DEL_MAC_REQ: Asynchronous delete ether address in flight
+ * @IDPF_VPORT_CONFIG_FLAGS_NBITS: Must be last
+ */
+enum idpf_vport_config_flags {
+ IDPF_VPORT_REG_NETDEV,
+ IDPF_VPORT_UP_REQUESTED,
+ IDPF_VPORT_ADD_MAC_REQ,
+ IDPF_VPORT_DEL_MAC_REQ,
+ IDPF_VPORT_CONFIG_FLAGS_NBITS,
+};
+
+/**
+ * struct idpf_avail_queue_info
+ * @avail_rxq: Available RX queues
+ * @avail_txq: Available TX queues
+ * @avail_bufq: Available buffer queues
+ * @avail_complq: Available completion queues
+ *
+ * Maintain total queues available after allocating max queues to each vport.
+ */
+struct idpf_avail_queue_info {
+ u16 avail_rxq;
+ u16 avail_txq;
+ u16 avail_bufq;
+ u16 avail_complq;
+};
+
+/**
+ * struct idpf_vector_info - Utility structure to pass function arguments as a
+ * structure
+ * @num_req_vecs: Vectors required based on the number of queues updated by the
+ * user via ethtool
+ * @num_curr_vecs: Current number of vectors, must be >= @num_req_vecs
+ * @index: Relative starting index for vectors
+ * @default_vport: Vectors are for default vport
+ */
+struct idpf_vector_info {
+ u16 num_req_vecs;
+ u16 num_curr_vecs;
+ u16 index;
+ bool default_vport;
+};
+
+/**
+ * struct idpf_vector_lifo - Stack to maintain vector indexes used for vector
+ * distribution algorithm
+ * @top: Points to stack top i.e. next available vector index
+ * @base: Always points to start of the free pool
+ * @size: Total size of the vector stack
+ * @vec_idx: Array to store all the vector indexes
+ *
+ * Vector stack maintains all the relative vector indexes at the *adapter*
+ * level. This stack is divided into 2 parts, first one is called as 'default
+ * pool' and other one is called 'free pool'. Vector distribution algorithm
+ * gives priority to default vports in a way that at least IDPF_MIN_Q_VEC
+ * vectors are allocated per default vport and the relative vector indexes for
+ * those are maintained in default pool. Free pool contains all the unallocated
+ * vector indexes which can be allocated on-demand basis. Mailbox vector index
+ * is maintained in the default pool of the stack.
+ */
+struct idpf_vector_lifo {
+ u16 top;
+ u16 base;
+ u16 size;
+ u16 *vec_idx;
+};
+
+/**
+ * struct idpf_vport_config - Vport configuration data
+ * @user_config: see struct idpf_vport_user_config_data
+ * @max_q: Maximum possible queues
+ * @req_qs_chunks: Queue chunk data for requested queues
+ * @mac_filter_list_lock: Lock to protect mac filters
+ * @flags: See enum idpf_vport_config_flags
+ */
+struct idpf_vport_config {
+ struct idpf_vport_user_config_data user_config;
+ struct idpf_vport_max_q max_q;
+ void *req_qs_chunks;
+ spinlock_t mac_filter_list_lock;
+ DECLARE_BITMAP(flags, IDPF_VPORT_CONFIG_FLAGS_NBITS);
+};
+
+/**
+ * struct idpf_adapter - Device data struct generated on probe
+ * @pdev: PCI device struct given on probe
+ * @virt_ver_maj: Virtchnl version major
+ * @virt_ver_min: Virtchnl version minor
+ * @msg_enable: Debug message level enabled
+ * @mb_wait_count: Number of times mailbox was attempted initialization
+ * @state: Init state machine
+ * @flags: See enum idpf_flags
+ * @reset_reg: See struct idpf_reset_reg
+ * @hw: Device access data
+ * @num_req_msix: Requested number of MSIX vectors
+ * @num_avail_msix: Available number of MSIX vectors
+ * @num_msix_entries: Number of entries in MSIX table
+ * @msix_entries: MSIX table
+ * @req_vec_chunks: Requested vector chunk data
+ * @mb_vector: Mailbox vector data
+ * @vector_stack: Stack to store the msix vector indexes
+ * @irq_mb_handler: Handler for hard interrupt for mailbox
+ * @tx_timeout_count: Number of TX timeouts that have occurred
+ * @avail_queues: Device given queue limits
+ * @vports: Array to store vports created by the driver
+ * @netdevs: Associated Vport netdevs
+ * @vport_params_reqd: Vport params requested
+ * @vport_params_recvd: Vport params received
+ * @vport_ids: Array of device given vport identifiers
+ * @vport_config: Vport config parameters
+ * @max_vports: Maximum vports that can be allocated
+ * @num_alloc_vports: Current number of vports allocated
+ * @next_vport: Next free slot in pf->vport[] - 0-based!
+ * @init_task: Initialization task
+ * @init_wq: Workqueue for initialization task
+ * @serv_task: Periodically recurring maintenance task
+ * @serv_wq: Workqueue for service task
+ * @mbx_task: Task to handle mailbox interrupts
+ * @mbx_wq: Workqueue for mailbox responses
+ * @vc_event_task: Task to handle out of band virtchnl event notifications
+ * @vc_event_wq: Workqueue for virtchnl events
+ * @stats_task: Periodic statistics retrieval task
+ * @stats_wq: Workqueue for statistics task
+ * @caps: Negotiated capabilities with device
+ * @vchnl_wq: Wait queue for virtchnl messages
+ * @vc_state: Virtchnl message state
+ * @vc_msg: Virtchnl message buffer
+ * @dev_ops: See idpf_dev_ops
+ * @num_vfs: Number of allocated VFs through sysfs. PF does not directly talk
+ * to VFs but is used to initialize them
+ * @crc_enable: Enable CRC insertion offload
+ * @req_tx_splitq: TX split or single queue model to request
+ * @req_rx_splitq: RX split or single queue model to request
+ * @vport_ctrl_lock: Lock to protect the vport control flow
+ * @vector_lock: Lock to protect vector distribution
+ * @queue_lock: Lock to protect queue distribution
+ * @vc_buf_lock: Lock to protect virtchnl buffer
+ */
+struct idpf_adapter {
+ struct pci_dev *pdev;
+ u32 virt_ver_maj;
+ u32 virt_ver_min;
+
+ u32 msg_enable;
+ u32 mb_wait_count;
+ enum idpf_state state;
+ DECLARE_BITMAP(flags, IDPF_FLAGS_NBITS);
+ struct idpf_reset_reg reset_reg;
+ struct idpf_hw hw;
+ u16 num_req_msix;
+ u16 num_avail_msix;
+ u16 num_msix_entries;
+ struct msix_entry *msix_entries;
+ struct virtchnl2_alloc_vectors *req_vec_chunks;
+ struct idpf_q_vector mb_vector;
+ struct idpf_vector_lifo vector_stack;
+ irqreturn_t (*irq_mb_handler)(int irq, void *data);
+
+ u32 tx_timeout_count;
+ struct idpf_avail_queue_info avail_queues;
+ struct idpf_vport **vports;
+ struct net_device **netdevs;
+ struct virtchnl2_create_vport **vport_params_reqd;
+ struct virtchnl2_create_vport **vport_params_recvd;
+ u32 *vport_ids;
+
+ struct idpf_vport_config **vport_config;
+ u16 max_vports;
+ u16 num_alloc_vports;
+ u16 next_vport;
+
+ struct delayed_work init_task;
+ struct workqueue_struct *init_wq;
+ struct delayed_work serv_task;
+ struct workqueue_struct *serv_wq;
+ struct delayed_work mbx_task;
+ struct workqueue_struct *mbx_wq;
+ struct delayed_work vc_event_task;
+ struct workqueue_struct *vc_event_wq;
+ struct delayed_work stats_task;
+ struct workqueue_struct *stats_wq;
+ struct virtchnl2_get_capabilities caps;
+
+ wait_queue_head_t vchnl_wq;
+ DECLARE_BITMAP(vc_state, IDPF_VC_NBITS);
+ char vc_msg[IDPF_CTLQ_MAX_BUF_LEN];
+ struct idpf_dev_ops dev_ops;
+ int num_vfs;
+ bool crc_enable;
+ bool req_tx_splitq;
+ bool req_rx_splitq;
+
+ struct mutex vport_ctrl_lock;
+ struct mutex vector_lock;
+ struct mutex queue_lock;
+ struct mutex vc_buf_lock;
+};
+
+/**
+ * idpf_is_queue_model_split - check if queue model is split
+ * @q_model: queue model single or split
+ *
+ * Returns true if queue model is split else false
+ */
+static inline int idpf_is_queue_model_split(u16 q_model)
+{
+ return q_model == VIRTCHNL2_QUEUE_MODEL_SPLIT;
+}
+
+#define idpf_is_cap_ena(adapter, field, flag) \
+ idpf_is_capability_ena(adapter, false, field, flag)
+#define idpf_is_cap_ena_all(adapter, field, flag) \
+ idpf_is_capability_ena(adapter, true, field, flag)
+
+bool idpf_is_capability_ena(struct idpf_adapter *adapter, bool all,
+ enum idpf_cap_field field, u64 flag);
+
+#define IDPF_CAP_RSS (\
+ VIRTCHNL2_CAP_RSS_IPV4_TCP |\
+ VIRTCHNL2_CAP_RSS_IPV4_TCP |\
+ VIRTCHNL2_CAP_RSS_IPV4_UDP |\
+ VIRTCHNL2_CAP_RSS_IPV4_SCTP |\
+ VIRTCHNL2_CAP_RSS_IPV4_OTHER |\
+ VIRTCHNL2_CAP_RSS_IPV6_TCP |\
+ VIRTCHNL2_CAP_RSS_IPV6_TCP |\
+ VIRTCHNL2_CAP_RSS_IPV6_UDP |\
+ VIRTCHNL2_CAP_RSS_IPV6_SCTP |\
+ VIRTCHNL2_CAP_RSS_IPV6_OTHER)
+
+#define IDPF_CAP_RSC (\
+ VIRTCHNL2_CAP_RSC_IPV4_TCP |\
+ VIRTCHNL2_CAP_RSC_IPV6_TCP)
+
+#define IDPF_CAP_HSPLIT (\
+ VIRTCHNL2_CAP_RX_HSPLIT_AT_L4V4 |\
+ VIRTCHNL2_CAP_RX_HSPLIT_AT_L4V6)
+
+#define IDPF_CAP_RX_CSUM_L4V4 (\
+ VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_TCP |\
+ VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_UDP)
+
+#define IDPF_CAP_RX_CSUM_L4V6 (\
+ VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_TCP |\
+ VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_UDP)
+
+#define IDPF_CAP_RX_CSUM (\
+ VIRTCHNL2_CAP_RX_CSUM_L3_IPV4 |\
+ VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_TCP |\
+ VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_UDP |\
+ VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_TCP |\
+ VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_UDP)
+
+#define IDPF_CAP_SCTP_CSUM (\
+ VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_SCTP |\
+ VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_SCTP |\
+ VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_SCTP |\
+ VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_SCTP)
+
+#define IDPF_CAP_TUNNEL_TX_CSUM (\
+ VIRTCHNL2_CAP_TX_CSUM_L3_SINGLE_TUNNEL |\
+ VIRTCHNL2_CAP_TX_CSUM_L4_SINGLE_TUNNEL)
+
+/**
+ * idpf_get_reserved_vecs - Get reserved vectors
+ * @adapter: private data struct
+ */
+static inline u16 idpf_get_reserved_vecs(struct idpf_adapter *adapter)
+{
+ return le16_to_cpu(adapter->caps.num_allocated_vectors);
+}
+
+/**
+ * idpf_get_default_vports - Get default number of vports
+ * @adapter: private data struct
+ */
+static inline u16 idpf_get_default_vports(struct idpf_adapter *adapter)
+{
+ return le16_to_cpu(adapter->caps.default_num_vports);
+}
+
+/**
+ * idpf_get_max_vports - Get max number of vports
+ * @adapter: private data struct
+ */
+static inline u16 idpf_get_max_vports(struct idpf_adapter *adapter)
+{
+ return le16_to_cpu(adapter->caps.max_vports);
+}
+
+/**
+ * idpf_get_max_tx_bufs - Get max scatter-gather buffers supported by the device
+ * @adapter: private data struct
+ */
+static inline unsigned int idpf_get_max_tx_bufs(struct idpf_adapter *adapter)
+{
+ return adapter->caps.max_sg_bufs_per_tx_pkt;
+}
+
+/**
+ * idpf_get_min_tx_pkt_len - Get min packet length supported by the device
+ * @adapter: private data struct
+ */
+static inline u8 idpf_get_min_tx_pkt_len(struct idpf_adapter *adapter)
+{
+ u8 pkt_len = adapter->caps.min_sso_packet_len;
+
+ return pkt_len ? pkt_len : IDPF_TX_MIN_PKT_LEN;
+}
+
+/**
+ * idpf_get_reg_addr - Get BAR0 register address
+ * @adapter: private data struct
+ * @reg_offset: register offset value
+ *
+ * Based on the register offset, return the actual BAR0 register address
+ */
+static inline void __iomem *idpf_get_reg_addr(struct idpf_adapter *adapter,
+ resource_size_t reg_offset)
+{
+ return (void __iomem *)(adapter->hw.hw_addr + reg_offset);
+}
+
+/**
+ * idpf_is_reset_detected - check if we were reset at some point
+ * @adapter: driver specific private structure
+ *
+ * Returns true if we are either in reset currently or were previously reset.
+ */
+static inline bool idpf_is_reset_detected(struct idpf_adapter *adapter)
+{
+ if (!adapter->hw.arq)
+ return true;
+
+ return !(readl(idpf_get_reg_addr(adapter, adapter->hw.arq->reg.len)) &
+ adapter->hw.arq->reg.len_mask);
+}
+
+/**
+ * idpf_is_reset_in_prog - check if reset is in progress
+ * @adapter: driver specific private structure
+ *
+ * Returns true if hard reset is in progress, false otherwise
+ */
+static inline bool idpf_is_reset_in_prog(struct idpf_adapter *adapter)
+{
+ return (test_bit(IDPF_HR_RESET_IN_PROG, adapter->flags) ||
+ test_bit(IDPF_HR_FUNC_RESET, adapter->flags) ||
+ test_bit(IDPF_HR_DRV_LOAD, adapter->flags));
+}
+
+/**
+ * idpf_netdev_to_vport - get a vport handle from a netdev
+ * @netdev: network interface device structure
+ */
+static inline struct idpf_vport *idpf_netdev_to_vport(struct net_device *netdev)
+{
+ struct idpf_netdev_priv *np = netdev_priv(netdev);
+
+ return np->vport;
+}
+
+/**
+ * idpf_netdev_to_adapter - Get adapter handle from a netdev
+ * @netdev: Network interface device structure
+ */
+static inline struct idpf_adapter *idpf_netdev_to_adapter(struct net_device *netdev)
+{
+ struct idpf_netdev_priv *np = netdev_priv(netdev);
+
+ return np->adapter;
+}
+
+/**
+ * idpf_is_feature_ena - Determine if a particular feature is enabled
+ * @vport: Vport to check
+ * @feature: Netdev flag to check
+ *
+ * Returns true or false if a particular feature is enabled.
+ */
+static inline bool idpf_is_feature_ena(const struct idpf_vport *vport,
+ netdev_features_t feature)
+{
+ return vport->netdev->features & feature;
+}
+
+/**
+ * idpf_get_max_tx_hdr_size -- get the size of tx header
+ * @adapter: Driver specific private structure
+ */
+static inline u16 idpf_get_max_tx_hdr_size(struct idpf_adapter *adapter)
+{
+ return le16_to_cpu(adapter->caps.max_tx_hdr_size);
+}
+
+/**
+ * idpf_vport_ctrl_lock - Acquire the vport control lock
+ * @netdev: Network interface device structure
+ *
+ * This lock should be used by non-datapath code to protect against vport
+ * destruction.
+ */
+static inline void idpf_vport_ctrl_lock(struct net_device *netdev)
+{
+ struct idpf_netdev_priv *np = netdev_priv(netdev);
+
+ mutex_lock(&np->adapter->vport_ctrl_lock);
+}
+
+/**
+ * idpf_vport_ctrl_unlock - Release the vport control lock
+ * @netdev: Network interface device structure
+ */
+static inline void idpf_vport_ctrl_unlock(struct net_device *netdev)
+{
+ struct idpf_netdev_priv *np = netdev_priv(netdev);
+
+ mutex_unlock(&np->adapter->vport_ctrl_lock);
+}
+
+void idpf_statistics_task(struct work_struct *work);
+void idpf_init_task(struct work_struct *work);
+void idpf_service_task(struct work_struct *work);
+void idpf_mbx_task(struct work_struct *work);
+void idpf_vc_event_task(struct work_struct *work);
+void idpf_dev_ops_init(struct idpf_adapter *adapter);
+void idpf_vf_dev_ops_init(struct idpf_adapter *adapter);
+int idpf_vport_adjust_qs(struct idpf_vport *vport);
+int idpf_init_dflt_mbx(struct idpf_adapter *adapter);
+void idpf_deinit_dflt_mbx(struct idpf_adapter *adapter);
+int idpf_vc_core_init(struct idpf_adapter *adapter);
+void idpf_vc_core_deinit(struct idpf_adapter *adapter);
+int idpf_intr_req(struct idpf_adapter *adapter);
+void idpf_intr_rel(struct idpf_adapter *adapter);
+int idpf_get_reg_intr_vecs(struct idpf_vport *vport,
+ struct idpf_vec_regs *reg_vals);
+u16 idpf_get_max_tx_hdr_size(struct idpf_adapter *adapter);
+int idpf_send_delete_queues_msg(struct idpf_vport *vport);
+int idpf_send_add_queues_msg(const struct idpf_vport *vport, u16 num_tx_q,
+ u16 num_complq, u16 num_rx_q, u16 num_rx_bufq);
+int idpf_initiate_soft_reset(struct idpf_vport *vport,
+ enum idpf_vport_reset_cause reset_cause);
+int idpf_send_enable_vport_msg(struct idpf_vport *vport);
+int idpf_send_disable_vport_msg(struct idpf_vport *vport);
+int idpf_send_destroy_vport_msg(struct idpf_vport *vport);
+int idpf_send_get_rx_ptype_msg(struct idpf_vport *vport);
+int idpf_send_ena_dis_loopback_msg(struct idpf_vport *vport);
+int idpf_send_get_set_rss_key_msg(struct idpf_vport *vport, bool get);
+int idpf_send_get_set_rss_lut_msg(struct idpf_vport *vport, bool get);
+int idpf_send_dealloc_vectors_msg(struct idpf_adapter *adapter);
+int idpf_send_alloc_vectors_msg(struct idpf_adapter *adapter, u16 num_vectors);
+void idpf_deinit_task(struct idpf_adapter *adapter);
+int idpf_req_rel_vector_indexes(struct idpf_adapter *adapter,
+ u16 *q_vector_idxs,
+ struct idpf_vector_info *vec_info);
+int idpf_vport_alloc_vec_indexes(struct idpf_vport *vport);
+int idpf_send_get_stats_msg(struct idpf_vport *vport);
+int idpf_get_vec_ids(struct idpf_adapter *adapter,
+ u16 *vecids, int num_vecids,
+ struct virtchnl2_vector_chunks *chunks);
+int idpf_recv_mb_msg(struct idpf_adapter *adapter, u32 op,
+ void *msg, int msg_size);
+int idpf_send_mb_msg(struct idpf_adapter *adapter, u32 op,
+ u16 msg_size, u8 *msg);
+void idpf_set_ethtool_ops(struct net_device *netdev);
+int idpf_vport_alloc_max_qs(struct idpf_adapter *adapter,
+ struct idpf_vport_max_q *max_q);
+void idpf_vport_dealloc_max_qs(struct idpf_adapter *adapter,
+ struct idpf_vport_max_q *max_q);
+int idpf_add_del_mac_filters(struct idpf_vport *vport,
+ struct idpf_netdev_priv *np,
+ bool add, bool async);
+int idpf_set_promiscuous(struct idpf_adapter *adapter,
+ struct idpf_vport_user_config_data *config_data,
+ u32 vport_id);
+int idpf_send_disable_queues_msg(struct idpf_vport *vport);
+void idpf_vport_init(struct idpf_vport *vport, struct idpf_vport_max_q *max_q);
+u32 idpf_get_vport_id(struct idpf_vport *vport);
+int idpf_vport_queue_ids_init(struct idpf_vport *vport);
+int idpf_queue_reg_init(struct idpf_vport *vport);
+int idpf_send_config_queues_msg(struct idpf_vport *vport);
+int idpf_send_enable_queues_msg(struct idpf_vport *vport);
+int idpf_send_create_vport_msg(struct idpf_adapter *adapter,
+ struct idpf_vport_max_q *max_q);
+int idpf_check_supported_desc_ids(struct idpf_vport *vport);
+void idpf_vport_intr_write_itr(struct idpf_q_vector *q_vector,
+ u16 itr, bool tx);
+int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map);
+int idpf_send_set_sriov_vfs_msg(struct idpf_adapter *adapter, u16 num_vfs);
+int idpf_sriov_configure(struct pci_dev *pdev, int num_vfs);
+
+#endif /* !_IDPF_H_ */
diff --git a/drivers/net/ethernet/intel/idpf/idpf_controlq.c b/drivers/net/ethernet/intel/idpf/idpf_controlq.c
new file mode 100644
index 000000000000..c7f43d2fcd13
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/idpf_controlq.c
@@ -0,0 +1,621 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2023 Intel Corporation */
+
+#include "idpf_controlq.h"
+
+/**
+ * idpf_ctlq_setup_regs - initialize control queue registers
+ * @cq: pointer to the specific control queue
+ * @q_create_info: structs containing info for each queue to be initialized
+ */
+static void idpf_ctlq_setup_regs(struct idpf_ctlq_info *cq,
+ struct idpf_ctlq_create_info *q_create_info)
+{
+ /* set control queue registers in our local struct */
+ cq->reg.head = q_create_info->reg.head;
+ cq->reg.tail = q_create_info->reg.tail;
+ cq->reg.len = q_create_info->reg.len;
+ cq->reg.bah = q_create_info->reg.bah;
+ cq->reg.bal = q_create_info->reg.bal;
+ cq->reg.len_mask = q_create_info->reg.len_mask;
+ cq->reg.len_ena_mask = q_create_info->reg.len_ena_mask;
+ cq->reg.head_mask = q_create_info->reg.head_mask;
+}
+
+/**
+ * idpf_ctlq_init_regs - Initialize control queue registers
+ * @hw: pointer to hw struct
+ * @cq: pointer to the specific Control queue
+ * @is_rxq: true if receive control queue, false otherwise
+ *
+ * Initialize registers. The caller is expected to have already initialized the
+ * descriptor ring memory and buffer memory
+ */
+static void idpf_ctlq_init_regs(struct idpf_hw *hw, struct idpf_ctlq_info *cq,
+ bool is_rxq)
+{
+ /* Update tail to post pre-allocated buffers for rx queues */
+ if (is_rxq)
+ wr32(hw, cq->reg.tail, (u32)(cq->ring_size - 1));
+
+ /* For non-Mailbox control queues only TAIL need to be set */
+ if (cq->q_id != -1)
+ return;
+
+ /* Clear Head for both send or receive */
+ wr32(hw, cq->reg.head, 0);
+
+ /* set starting point */
+ wr32(hw, cq->reg.bal, lower_32_bits(cq->desc_ring.pa));
+ wr32(hw, cq->reg.bah, upper_32_bits(cq->desc_ring.pa));
+ wr32(hw, cq->reg.len, (cq->ring_size | cq->reg.len_ena_mask));
+}
+
+/**
+ * idpf_ctlq_init_rxq_bufs - populate receive queue descriptors with buf
+ * @cq: pointer to the specific Control queue
+ *
+ * Record the address of the receive queue DMA buffers in the descriptors.
+ * The buffers must have been previously allocated.
+ */
+static void idpf_ctlq_init_rxq_bufs(struct idpf_ctlq_info *cq)
+{
+ int i;
+
+ for (i = 0; i < cq->ring_size; i++) {
+ struct idpf_ctlq_desc *desc = IDPF_CTLQ_DESC(cq, i);
+ struct idpf_dma_mem *bi = cq->bi.rx_buff[i];
+
+ /* No buffer to post to descriptor, continue */
+ if (!bi)
+ continue;
+
+ desc->flags =
+ cpu_to_le16(IDPF_CTLQ_FLAG_BUF | IDPF_CTLQ_FLAG_RD);
+ desc->opcode = 0;
+ desc->datalen = cpu_to_le16(bi->size);
+ desc->ret_val = 0;
+ desc->v_opcode_dtype = 0;
+ desc->v_retval = 0;
+ desc->params.indirect.addr_high =
+ cpu_to_le32(upper_32_bits(bi->pa));
+ desc->params.indirect.addr_low =
+ cpu_to_le32(lower_32_bits(bi->pa));
+ desc->params.indirect.param0 = 0;
+ desc->params.indirect.sw_cookie = 0;
+ desc->params.indirect.v_flags = 0;
+ }
+}
+
+/**
+ * idpf_ctlq_shutdown - shutdown the CQ
+ * @hw: pointer to hw struct
+ * @cq: pointer to the specific Control queue
+ *
+ * The main shutdown routine for any controq queue
+ */
+static void idpf_ctlq_shutdown(struct idpf_hw *hw, struct idpf_ctlq_info *cq)
+{
+ mutex_lock(&cq->cq_lock);
+
+ /* free ring buffers and the ring itself */
+ idpf_ctlq_dealloc_ring_res(hw, cq);
+
+ /* Set ring_size to 0 to indicate uninitialized queue */
+ cq->ring_size = 0;
+
+ mutex_unlock(&cq->cq_lock);
+ mutex_destroy(&cq->cq_lock);
+}
+
+/**
+ * idpf_ctlq_add - add one control queue
+ * @hw: pointer to hardware struct
+ * @qinfo: info for queue to be created
+ * @cq_out: (output) double pointer to control queue to be created
+ *
+ * Allocate and initialize a control queue and add it to the control queue list.
+ * The cq parameter will be allocated/initialized and passed back to the caller
+ * if no errors occur.
+ *
+ * Note: idpf_ctlq_init must be called prior to any calls to idpf_ctlq_add
+ */
+int idpf_ctlq_add(struct idpf_hw *hw,
+ struct idpf_ctlq_create_info *qinfo,
+ struct idpf_ctlq_info **cq_out)
+{
+ struct idpf_ctlq_info *cq;
+ bool is_rxq = false;
+ int err;
+
+ cq = kzalloc(sizeof(*cq), GFP_KERNEL);
+ if (!cq)
+ return -ENOMEM;
+
+ cq->cq_type = qinfo->type;
+ cq->q_id = qinfo->id;
+ cq->buf_size = qinfo->buf_size;
+ cq->ring_size = qinfo->len;
+
+ cq->next_to_use = 0;
+ cq->next_to_clean = 0;
+ cq->next_to_post = cq->ring_size - 1;
+
+ switch (qinfo->type) {
+ case IDPF_CTLQ_TYPE_MAILBOX_RX:
+ is_rxq = true;
+ fallthrough;
+ case IDPF_CTLQ_TYPE_MAILBOX_TX:
+ err = idpf_ctlq_alloc_ring_res(hw, cq);
+ break;
+ default:
+ err = -EBADR;
+ break;
+ }
+
+ if (err)
+ goto init_free_q;
+
+ if (is_rxq) {
+ idpf_ctlq_init_rxq_bufs(cq);
+ } else {
+ /* Allocate the array of msg pointers for TX queues */
+ cq->bi.tx_msg = kcalloc(qinfo->len,
+ sizeof(struct idpf_ctlq_msg *),
+ GFP_KERNEL);
+ if (!cq->bi.tx_msg) {
+ err = -ENOMEM;
+ goto init_dealloc_q_mem;
+ }
+ }
+
+ idpf_ctlq_setup_regs(cq, qinfo);
+
+ idpf_ctlq_init_regs(hw, cq, is_rxq);
+
+ mutex_init(&cq->cq_lock);
+
+ list_add(&cq->cq_list, &hw->cq_list_head);
+
+ *cq_out = cq;
+
+ return 0;
+
+init_dealloc_q_mem:
+ /* free ring buffers and the ring itself */
+ idpf_ctlq_dealloc_ring_res(hw, cq);
+init_free_q:
+ kfree(cq);
+
+ return err;
+}
+
+/**
+ * idpf_ctlq_remove - deallocate and remove specified control queue
+ * @hw: pointer to hardware struct
+ * @cq: pointer to control queue to be removed
+ */
+void idpf_ctlq_remove(struct idpf_hw *hw,
+ struct idpf_ctlq_info *cq)
+{
+ list_del(&cq->cq_list);
+ idpf_ctlq_shutdown(hw, cq);
+ kfree(cq);
+}
+
+/**
+ * idpf_ctlq_init - main initialization routine for all control queues
+ * @hw: pointer to hardware struct
+ * @num_q: number of queues to initialize
+ * @q_info: array of structs containing info for each queue to be initialized
+ *
+ * This initializes any number and any type of control queues. This is an all
+ * or nothing routine; if one fails, all previously allocated queues will be
+ * destroyed. This must be called prior to using the individual add/remove
+ * APIs.
+ */
+int idpf_ctlq_init(struct idpf_hw *hw, u8 num_q,
+ struct idpf_ctlq_create_info *q_info)
+{
+ struct idpf_ctlq_info *cq, *tmp;
+ int err;
+ int i;
+
+ INIT_LIST_HEAD(&hw->cq_list_head);
+
+ for (i = 0; i < num_q; i++) {
+ struct idpf_ctlq_create_info *qinfo = q_info + i;
+
+ err = idpf_ctlq_add(hw, qinfo, &cq);
+ if (err)
+ goto init_destroy_qs;
+ }
+
+ return 0;
+
+init_destroy_qs:
+ list_for_each_entry_safe(cq, tmp, &hw->cq_list_head, cq_list)
+ idpf_ctlq_remove(hw, cq);
+
+ return err;
+}
+
+/**
+ * idpf_ctlq_deinit - destroy all control queues
+ * @hw: pointer to hw struct
+ */
+void idpf_ctlq_deinit(struct idpf_hw *hw)
+{
+ struct idpf_ctlq_info *cq, *tmp;
+
+ list_for_each_entry_safe(cq, tmp, &hw->cq_list_head, cq_list)
+ idpf_ctlq_remove(hw, cq);
+}
+
+/**
+ * idpf_ctlq_send - send command to Control Queue (CTQ)
+ * @hw: pointer to hw struct
+ * @cq: handle to control queue struct to send on
+ * @num_q_msg: number of messages to send on control queue
+ * @q_msg: pointer to array of queue messages to be sent
+ *
+ * The caller is expected to allocate DMAable buffers and pass them to the
+ * send routine via the q_msg struct / control queue specific data struct.
+ * The control queue will hold a reference to each send message until
+ * the completion for that message has been cleaned.
+ */
+int idpf_ctlq_send(struct idpf_hw *hw, struct idpf_ctlq_info *cq,
+ u16 num_q_msg, struct idpf_ctlq_msg q_msg[])
+{
+ struct idpf_ctlq_desc *desc;
+ int num_desc_avail;
+ int err = 0;
+ int i;
+
+ mutex_lock(&cq->cq_lock);
+
+ /* Ensure there are enough descriptors to send all messages */
+ num_desc_avail = IDPF_CTLQ_DESC_UNUSED(cq);
+ if (num_desc_avail == 0 || num_desc_avail < num_q_msg) {
+ err = -ENOSPC;
+ goto err_unlock;
+ }
+
+ for (i = 0; i < num_q_msg; i++) {
+ struct idpf_ctlq_msg *msg = &q_msg[i];
+
+ desc = IDPF_CTLQ_DESC(cq, cq->next_to_use);
+
+ desc->opcode = cpu_to_le16(msg->opcode);
+ desc->pfid_vfid = cpu_to_le16(msg->func_id);
+
+ desc->v_opcode_dtype = cpu_to_le32(msg->cookie.mbx.chnl_opcode);
+ desc->v_retval = cpu_to_le32(msg->cookie.mbx.chnl_retval);
+
+ desc->flags = cpu_to_le16((msg->host_id & IDPF_HOST_ID_MASK) <<
+ IDPF_CTLQ_FLAG_HOST_ID_S);
+ if (msg->data_len) {
+ struct idpf_dma_mem *buff = msg->ctx.indirect.payload;
+
+ desc->datalen |= cpu_to_le16(msg->data_len);
+ desc->flags |= cpu_to_le16(IDPF_CTLQ_FLAG_BUF);
+ desc->flags |= cpu_to_le16(IDPF_CTLQ_FLAG_RD);
+
+ /* Update the address values in the desc with the pa
+ * value for respective buffer
+ */
+ desc->params.indirect.addr_high =
+ cpu_to_le32(upper_32_bits(buff->pa));
+ desc->params.indirect.addr_low =
+ cpu_to_le32(lower_32_bits(buff->pa));
+
+ memcpy(&desc->params, msg->ctx.indirect.context,
+ IDPF_INDIRECT_CTX_SIZE);
+ } else {
+ memcpy(&desc->params, msg->ctx.direct,
+ IDPF_DIRECT_CTX_SIZE);
+ }
+
+ /* Store buffer info */
+ cq->bi.tx_msg[cq->next_to_use] = msg;
+
+ (cq->next_to_use)++;
+ if (cq->next_to_use == cq->ring_size)
+ cq->next_to_use = 0;
+ }
+
+ /* Force memory write to complete before letting hardware
+ * know that there are new descriptors to fetch.
+ */
+ dma_wmb();
+
+ wr32(hw, cq->reg.tail, cq->next_to_use);
+
+err_unlock:
+ mutex_unlock(&cq->cq_lock);
+
+ return err;
+}
+
+/**
+ * idpf_ctlq_clean_sq - reclaim send descriptors on HW write back for the
+ * requested queue
+ * @cq: pointer to the specific Control queue
+ * @clean_count: (input|output) number of descriptors to clean as input, and
+ * number of descriptors actually cleaned as output
+ * @msg_status: (output) pointer to msg pointer array to be populated; needs
+ * to be allocated by caller
+ *
+ * Returns an array of message pointers associated with the cleaned
+ * descriptors. The pointers are to the original ctlq_msgs sent on the cleaned
+ * descriptors. The status will be returned for each; any messages that failed
+ * to send will have a non-zero status. The caller is expected to free original
+ * ctlq_msgs and free or reuse the DMA buffers.
+ */
+int idpf_ctlq_clean_sq(struct idpf_ctlq_info *cq, u16 *clean_count,
+ struct idpf_ctlq_msg *msg_status[])
+{
+ struct idpf_ctlq_desc *desc;
+ u16 i, num_to_clean;
+ u16 ntc, desc_err;
+
+ if (*clean_count == 0)
+ return 0;
+ if (*clean_count > cq->ring_size)
+ return -EBADR;
+
+ mutex_lock(&cq->cq_lock);
+
+ ntc = cq->next_to_clean;
+
+ num_to_clean = *clean_count;
+
+ for (i = 0; i < num_to_clean; i++) {
+ /* Fetch next descriptor and check if marked as done */
+ desc = IDPF_CTLQ_DESC(cq, ntc);
+ if (!(le16_to_cpu(desc->flags) & IDPF_CTLQ_FLAG_DD))
+ break;
+
+ /* strip off FW internal code */
+ desc_err = le16_to_cpu(desc->ret_val) & 0xff;
+
+ msg_status[i] = cq->bi.tx_msg[ntc];
+ msg_status[i]->status = desc_err;
+
+ cq->bi.tx_msg[ntc] = NULL;
+
+ /* Zero out any stale data */
+ memset(desc, 0, sizeof(*desc));
+
+ ntc++;
+ if (ntc == cq->ring_size)
+ ntc = 0;
+ }
+
+ cq->next_to_clean = ntc;
+
+ mutex_unlock(&cq->cq_lock);
+
+ /* Return number of descriptors actually cleaned */
+ *clean_count = i;
+
+ return 0;
+}
+
+/**
+ * idpf_ctlq_post_rx_buffs - post buffers to descriptor ring
+ * @hw: pointer to hw struct
+ * @cq: pointer to control queue handle
+ * @buff_count: (input|output) input is number of buffers caller is trying to
+ * return; output is number of buffers that were not posted
+ * @buffs: array of pointers to dma mem structs to be given to hardware
+ *
+ * Caller uses this function to return DMA buffers to the descriptor ring after
+ * consuming them; buff_count will be the number of buffers.
+ *
+ * Note: this function needs to be called after a receive call even
+ * if there are no DMA buffers to be returned, i.e. buff_count = 0,
+ * buffs = NULL to support direct commands
+ */
+int idpf_ctlq_post_rx_buffs(struct idpf_hw *hw, struct idpf_ctlq_info *cq,
+ u16 *buff_count, struct idpf_dma_mem **buffs)
+{
+ struct idpf_ctlq_desc *desc;
+ u16 ntp = cq->next_to_post;
+ bool buffs_avail = false;
+ u16 tbp = ntp + 1;
+ int i = 0;
+
+ if (*buff_count > cq->ring_size)
+ return -EBADR;
+
+ if (*buff_count > 0)
+ buffs_avail = true;
+
+ mutex_lock(&cq->cq_lock);
+
+ if (tbp >= cq->ring_size)
+ tbp = 0;
+
+ if (tbp == cq->next_to_clean)
+ /* Nothing to do */
+ goto post_buffs_out;
+
+ /* Post buffers for as many as provided or up until the last one used */
+ while (ntp != cq->next_to_clean) {
+ desc = IDPF_CTLQ_DESC(cq, ntp);
+
+ if (cq->bi.rx_buff[ntp])
+ goto fill_desc;
+ if (!buffs_avail) {
+ /* If the caller hasn't given us any buffers or
+ * there are none left, search the ring itself
+ * for an available buffer to move to this
+ * entry starting at the next entry in the ring
+ */
+ tbp = ntp + 1;
+
+ /* Wrap ring if necessary */
+ if (tbp >= cq->ring_size)
+ tbp = 0;
+
+ while (tbp != cq->next_to_clean) {
+ if (cq->bi.rx_buff[tbp]) {
+ cq->bi.rx_buff[ntp] =
+ cq->bi.rx_buff[tbp];
+ cq->bi.rx_buff[tbp] = NULL;
+
+ /* Found a buffer, no need to
+ * search anymore
+ */
+ break;
+ }
+
+ /* Wrap ring if necessary */
+ tbp++;
+ if (tbp >= cq->ring_size)
+ tbp = 0;
+ }
+
+ if (tbp == cq->next_to_clean)
+ goto post_buffs_out;
+ } else {
+ /* Give back pointer to DMA buffer */
+ cq->bi.rx_buff[ntp] = buffs[i];
+ i++;
+
+ if (i >= *buff_count)
+ buffs_avail = false;
+ }
+
+fill_desc:
+ desc->flags =
+ cpu_to_le16(IDPF_CTLQ_FLAG_BUF | IDPF_CTLQ_FLAG_RD);
+
+ /* Post buffers to descriptor */
+ desc->datalen = cpu_to_le16(cq->bi.rx_buff[ntp]->size);
+ desc->params.indirect.addr_high =
+ cpu_to_le32(upper_32_bits(cq->bi.rx_buff[ntp]->pa));
+ desc->params.indirect.addr_low =
+ cpu_to_le32(lower_32_bits(cq->bi.rx_buff[ntp]->pa));
+
+ ntp++;
+ if (ntp == cq->ring_size)
+ ntp = 0;
+ }
+
+post_buffs_out:
+ /* Only update tail if buffers were actually posted */
+ if (cq->next_to_post != ntp) {
+ if (ntp)
+ /* Update next_to_post to ntp - 1 since current ntp
+ * will not have a buffer
+ */
+ cq->next_to_post = ntp - 1;
+ else
+ /* Wrap to end of end ring since current ntp is 0 */
+ cq->next_to_post = cq->ring_size - 1;
+
+ wr32(hw, cq->reg.tail, cq->next_to_post);
+ }
+
+ mutex_unlock(&cq->cq_lock);
+
+ /* return the number of buffers that were not posted */
+ *buff_count = *buff_count - i;
+
+ return 0;
+}
+
+/**
+ * idpf_ctlq_recv - receive control queue message call back
+ * @cq: pointer to control queue handle to receive on
+ * @num_q_msg: (input|output) input number of messages that should be received;
+ * output number of messages actually received
+ * @q_msg: (output) array of received control queue messages on this q;
+ * needs to be pre-allocated by caller for as many messages as requested
+ *
+ * Called by interrupt handler or polling mechanism. Caller is expected
+ * to free buffers
+ */
+int idpf_ctlq_recv(struct idpf_ctlq_info *cq, u16 *num_q_msg,
+ struct idpf_ctlq_msg *q_msg)
+{
+ u16 num_to_clean, ntc, flags;
+ struct idpf_ctlq_desc *desc;
+ int err = 0;
+ u16 i;
+
+ if (*num_q_msg == 0)
+ return 0;
+ else if (*num_q_msg > cq->ring_size)
+ return -EBADR;
+
+ /* take the lock before we start messing with the ring */
+ mutex_lock(&cq->cq_lock);
+
+ ntc = cq->next_to_clean;
+
+ num_to_clean = *num_q_msg;
+
+ for (i = 0; i < num_to_clean; i++) {
+ /* Fetch next descriptor and check if marked as done */
+ desc = IDPF_CTLQ_DESC(cq, ntc);
+ flags = le16_to_cpu(desc->flags);
+
+ if (!(flags & IDPF_CTLQ_FLAG_DD))
+ break;
+
+ q_msg[i].vmvf_type = (flags &
+ (IDPF_CTLQ_FLAG_FTYPE_VM |
+ IDPF_CTLQ_FLAG_FTYPE_PF)) >>
+ IDPF_CTLQ_FLAG_FTYPE_S;
+
+ if (flags & IDPF_CTLQ_FLAG_ERR)
+ err = -EBADMSG;
+
+ q_msg[i].cookie.mbx.chnl_opcode =
+ le32_to_cpu(desc->v_opcode_dtype);
+ q_msg[i].cookie.mbx.chnl_retval =
+ le32_to_cpu(desc->v_retval);
+
+ q_msg[i].opcode = le16_to_cpu(desc->opcode);
+ q_msg[i].data_len = le16_to_cpu(desc->datalen);
+ q_msg[i].status = le16_to_cpu(desc->ret_val);
+
+ if (desc->datalen) {
+ memcpy(q_msg[i].ctx.indirect.context,
+ &desc->params.indirect, IDPF_INDIRECT_CTX_SIZE);
+
+ /* Assign pointer to dma buffer to ctlq_msg array
+ * to be given to upper layer
+ */
+ q_msg[i].ctx.indirect.payload = cq->bi.rx_buff[ntc];
+
+ /* Zero out pointer to DMA buffer info;
+ * will be repopulated by post buffers API
+ */
+ cq->bi.rx_buff[ntc] = NULL;
+ } else {
+ memcpy(q_msg[i].ctx.direct, desc->params.raw,
+ IDPF_DIRECT_CTX_SIZE);
+ }
+
+ /* Zero out stale data in descriptor */
+ memset(desc, 0, sizeof(struct idpf_ctlq_desc));
+
+ ntc++;
+ if (ntc == cq->ring_size)
+ ntc = 0;
+ }
+
+ cq->next_to_clean = ntc;
+
+ mutex_unlock(&cq->cq_lock);
+
+ *num_q_msg = i;
+ if (*num_q_msg == 0)
+ err = -ENOMSG;
+
+ return err;
+}
diff --git a/drivers/net/ethernet/intel/idpf/idpf_controlq.h b/drivers/net/ethernet/intel/idpf/idpf_controlq.h
new file mode 100644
index 000000000000..c1aba09e9856
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/idpf_controlq.h
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2023 Intel Corporation */
+
+#ifndef _IDPF_CONTROLQ_H_
+#define _IDPF_CONTROLQ_H_
+
+#include <linux/slab.h>
+
+#include "idpf_controlq_api.h"
+
+/* Maximum buffer length for all control queue types */
+#define IDPF_CTLQ_MAX_BUF_LEN 4096
+
+#define IDPF_CTLQ_DESC(R, i) \
+ (&(((struct idpf_ctlq_desc *)((R)->desc_ring.va))[i]))
+
+#define IDPF_CTLQ_DESC_UNUSED(R) \
+ ((u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->ring_size) + \
+ (R)->next_to_clean - (R)->next_to_use - 1))
+
+/* Control Queue default settings */
+#define IDPF_CTRL_SQ_CMD_TIMEOUT 250 /* msecs */
+
+struct idpf_ctlq_desc {
+ /* Control queue descriptor flags */
+ __le16 flags;
+ /* Control queue message opcode */
+ __le16 opcode;
+ __le16 datalen; /* 0 for direct commands */
+ union {
+ __le16 ret_val;
+ __le16 pfid_vfid;
+#define IDPF_CTLQ_DESC_VF_ID_S 0
+#define IDPF_CTLQ_DESC_VF_ID_M (0x7FF << IDPF_CTLQ_DESC_VF_ID_S)
+#define IDPF_CTLQ_DESC_PF_ID_S 11
+#define IDPF_CTLQ_DESC_PF_ID_M (0x1F << IDPF_CTLQ_DESC_PF_ID_S)
+ };
+
+ /* Virtchnl message opcode and virtchnl descriptor type
+ * v_opcode=[27:0], v_dtype=[31:28]
+ */
+ __le32 v_opcode_dtype;
+ /* Virtchnl return value */
+ __le32 v_retval;
+ union {
+ struct {
+ __le32 param0;
+ __le32 param1;
+ __le32 param2;
+ __le32 param3;
+ } direct;
+ struct {
+ __le32 param0;
+ __le16 sw_cookie;
+ /* Virtchnl flags */
+ __le16 v_flags;
+ __le32 addr_high;
+ __le32 addr_low;
+ } indirect;
+ u8 raw[16];
+ } params;
+};
+
+/* Flags sub-structure
+ * |0 |1 |2 |3 |4 |5 |6 |7 |8 |9 |10 |11 |12 |13 |14 |15 |
+ * |DD |CMP|ERR| * RSV * |FTYPE | *RSV* |RD |VFC|BUF| HOST_ID |
+ */
+/* command flags and offsets */
+#define IDPF_CTLQ_FLAG_DD_S 0
+#define IDPF_CTLQ_FLAG_CMP_S 1
+#define IDPF_CTLQ_FLAG_ERR_S 2
+#define IDPF_CTLQ_FLAG_FTYPE_S 6
+#define IDPF_CTLQ_FLAG_RD_S 10
+#define IDPF_CTLQ_FLAG_VFC_S 11
+#define IDPF_CTLQ_FLAG_BUF_S 12
+#define IDPF_CTLQ_FLAG_HOST_ID_S 13
+
+#define IDPF_CTLQ_FLAG_DD BIT(IDPF_CTLQ_FLAG_DD_S) /* 0x1 */
+#define IDPF_CTLQ_FLAG_CMP BIT(IDPF_CTLQ_FLAG_CMP_S) /* 0x2 */
+#define IDPF_CTLQ_FLAG_ERR BIT(IDPF_CTLQ_FLAG_ERR_S) /* 0x4 */
+#define IDPF_CTLQ_FLAG_FTYPE_VM BIT(IDPF_CTLQ_FLAG_FTYPE_S) /* 0x40 */
+#define IDPF_CTLQ_FLAG_FTYPE_PF BIT(IDPF_CTLQ_FLAG_FTYPE_S + 1) /* 0x80 */
+#define IDPF_CTLQ_FLAG_RD BIT(IDPF_CTLQ_FLAG_RD_S) /* 0x400 */
+#define IDPF_CTLQ_FLAG_VFC BIT(IDPF_CTLQ_FLAG_VFC_S) /* 0x800 */
+#define IDPF_CTLQ_FLAG_BUF BIT(IDPF_CTLQ_FLAG_BUF_S) /* 0x1000 */
+
+/* Host ID is a special field that has 3b and not a 1b flag */
+#define IDPF_CTLQ_FLAG_HOST_ID_M MAKE_MASK(0x7000UL, IDPF_CTLQ_FLAG_HOST_ID_S)
+
+struct idpf_mbxq_desc {
+ u8 pad[8]; /* CTLQ flags/opcode/len/retval fields */
+ u32 chnl_opcode; /* avoid confusion with desc->opcode */
+ u32 chnl_retval; /* ditto for desc->retval */
+ u32 pf_vf_id; /* used by CP when sending to PF */
+};
+
+/* Define the driver hardware struct to replace other control structs as needed
+ * Align to ctlq_hw_info
+ */
+struct idpf_hw {
+ void __iomem *hw_addr;
+ resource_size_t hw_addr_len;
+
+ struct idpf_adapter *back;
+
+ /* control queue - send and receive */
+ struct idpf_ctlq_info *asq;
+ struct idpf_ctlq_info *arq;
+
+ /* pci info */
+ u16 device_id;
+ u16 vendor_id;
+ u16 subsystem_device_id;
+ u16 subsystem_vendor_id;
+ u8 revision_id;
+ bool adapter_stopped;
+
+ struct list_head cq_list_head;
+};
+
+int idpf_ctlq_alloc_ring_res(struct idpf_hw *hw,
+ struct idpf_ctlq_info *cq);
+
+void idpf_ctlq_dealloc_ring_res(struct idpf_hw *hw, struct idpf_ctlq_info *cq);
+
+/* prototype for functions used for dynamic memory allocation */
+void *idpf_alloc_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem,
+ u64 size);
+void idpf_free_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem);
+#endif /* _IDPF_CONTROLQ_H_ */
diff --git a/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h b/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h
new file mode 100644
index 000000000000..8dee098bbfb0
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2023 Intel Corporation */
+
+#ifndef _IDPF_CONTROLQ_API_H_
+#define _IDPF_CONTROLQ_API_H_
+
+#include "idpf_mem.h"
+
+struct idpf_hw;
+
+/* Used for queue init, response and events */
+enum idpf_ctlq_type {
+ IDPF_CTLQ_TYPE_MAILBOX_TX = 0,
+ IDPF_CTLQ_TYPE_MAILBOX_RX = 1,
+ IDPF_CTLQ_TYPE_CONFIG_TX = 2,
+ IDPF_CTLQ_TYPE_CONFIG_RX = 3,
+ IDPF_CTLQ_TYPE_EVENT_RX = 4,
+ IDPF_CTLQ_TYPE_RDMA_TX = 5,
+ IDPF_CTLQ_TYPE_RDMA_RX = 6,
+ IDPF_CTLQ_TYPE_RDMA_COMPL = 7
+};
+
+/* Generic Control Queue Structures */
+struct idpf_ctlq_reg {
+ /* used for queue tracking */
+ u32 head;
+ u32 tail;
+ /* Below applies only to default mb (if present) */
+ u32 len;
+ u32 bah;
+ u32 bal;
+ u32 len_mask;
+ u32 len_ena_mask;
+ u32 head_mask;
+};
+
+/* Generic queue msg structure */
+struct idpf_ctlq_msg {
+ u8 vmvf_type; /* represents the source of the message on recv */
+#define IDPF_VMVF_TYPE_VF 0
+#define IDPF_VMVF_TYPE_VM 1
+#define IDPF_VMVF_TYPE_PF 2
+ u8 host_id;
+ /* 3b field used only when sending a message to CP - to be used in
+ * combination with target func_id to route the message
+ */
+#define IDPF_HOST_ID_MASK 0x7
+
+ u16 opcode;
+ u16 data_len; /* data_len = 0 when no payload is attached */
+ union {
+ u16 func_id; /* when sending a message */
+ u16 status; /* when receiving a message */
+ };
+ union {
+ struct {
+ u32 chnl_opcode;
+ u32 chnl_retval;
+ } mbx;
+ } cookie;
+ union {
+#define IDPF_DIRECT_CTX_SIZE 16
+#define IDPF_INDIRECT_CTX_SIZE 8
+ /* 16 bytes of context can be provided or 8 bytes of context
+ * plus the address of a DMA buffer
+ */
+ u8 direct[IDPF_DIRECT_CTX_SIZE];
+ struct {
+ u8 context[IDPF_INDIRECT_CTX_SIZE];
+ struct idpf_dma_mem *payload;
+ } indirect;
+ } ctx;
+};
+
+/* Generic queue info structures */
+/* MB, CONFIG and EVENT q do not have extended info */
+struct idpf_ctlq_create_info {
+ enum idpf_ctlq_type type;
+ int id; /* absolute queue offset passed as input
+ * -1 for default mailbox if present
+ */
+ u16 len; /* Queue length passed as input */
+ u16 buf_size; /* buffer size passed as input */
+ u64 base_address; /* output, HPA of the Queue start */
+ struct idpf_ctlq_reg reg; /* registers accessed by ctlqs */
+
+ int ext_info_size;
+ void *ext_info; /* Specific to q type */
+};
+
+/* Control Queue information */
+struct idpf_ctlq_info {
+ struct list_head cq_list;
+
+ enum idpf_ctlq_type cq_type;
+ int q_id;
+ struct mutex cq_lock; /* control queue lock */
+ /* used for interrupt processing */
+ u16 next_to_use;
+ u16 next_to_clean;
+ u16 next_to_post; /* starting descriptor to post buffers
+ * to after recev
+ */
+
+ struct idpf_dma_mem desc_ring; /* descriptor ring memory
+ * idpf_dma_mem is defined in OSdep.h
+ */
+ union {
+ struct idpf_dma_mem **rx_buff;
+ struct idpf_ctlq_msg **tx_msg;
+ } bi;
+
+ u16 buf_size; /* queue buffer size */
+ u16 ring_size; /* Number of descriptors */
+ struct idpf_ctlq_reg reg; /* registers accessed by ctlqs */
+};
+
+/**
+ * enum idpf_mbx_opc - PF/VF mailbox commands
+ * @idpf_mbq_opc_send_msg_to_cp: used by PF or VF to send a message to its CP
+ */
+enum idpf_mbx_opc {
+ idpf_mbq_opc_send_msg_to_cp = 0x0801,
+};
+
+/* API supported for control queue management */
+/* Will init all required q including default mb. "q_info" is an array of
+ * create_info structs equal to the number of control queues to be created.
+ */
+int idpf_ctlq_init(struct idpf_hw *hw, u8 num_q,
+ struct idpf_ctlq_create_info *q_info);
+
+/* Allocate and initialize a single control queue, which will be added to the
+ * control queue list; returns a handle to the created control queue
+ */
+int idpf_ctlq_add(struct idpf_hw *hw,
+ struct idpf_ctlq_create_info *qinfo,
+ struct idpf_ctlq_info **cq);
+
+/* Deinitialize and deallocate a single control queue */
+void idpf_ctlq_remove(struct idpf_hw *hw,
+ struct idpf_ctlq_info *cq);
+
+/* Sends messages to HW and will also free the buffer*/
+int idpf_ctlq_send(struct idpf_hw *hw,
+ struct idpf_ctlq_info *cq,
+ u16 num_q_msg,
+ struct idpf_ctlq_msg q_msg[]);
+
+/* Receives messages and called by interrupt handler/polling
+ * initiated by app/process. Also caller is supposed to free the buffers
+ */
+int idpf_ctlq_recv(struct idpf_ctlq_info *cq, u16 *num_q_msg,
+ struct idpf_ctlq_msg *q_msg);
+
+/* Reclaims send descriptors on HW write back */
+int idpf_ctlq_clean_sq(struct idpf_ctlq_info *cq, u16 *clean_count,
+ struct idpf_ctlq_msg *msg_status[]);
+
+/* Indicate RX buffers are done being processed */
+int idpf_ctlq_post_rx_buffs(struct idpf_hw *hw,
+ struct idpf_ctlq_info *cq,
+ u16 *buff_count,
+ struct idpf_dma_mem **buffs);
+
+/* Will destroy all q including the default mb */
+void idpf_ctlq_deinit(struct idpf_hw *hw);
+
+#endif /* _IDPF_CONTROLQ_API_H_ */
diff --git a/drivers/net/ethernet/intel/idpf/idpf_controlq_setup.c b/drivers/net/ethernet/intel/idpf/idpf_controlq_setup.c
new file mode 100644
index 000000000000..a942a6385d06
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/idpf_controlq_setup.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2023 Intel Corporation */
+
+#include "idpf_controlq.h"
+
+/**
+ * idpf_ctlq_alloc_desc_ring - Allocate Control Queue (CQ) rings
+ * @hw: pointer to hw struct
+ * @cq: pointer to the specific Control queue
+ */
+static int idpf_ctlq_alloc_desc_ring(struct idpf_hw *hw,
+ struct idpf_ctlq_info *cq)
+{
+ size_t size = cq->ring_size * sizeof(struct idpf_ctlq_desc);
+
+ cq->desc_ring.va = idpf_alloc_dma_mem(hw, &cq->desc_ring, size);
+ if (!cq->desc_ring.va)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/**
+ * idpf_ctlq_alloc_bufs - Allocate Control Queue (CQ) buffers
+ * @hw: pointer to hw struct
+ * @cq: pointer to the specific Control queue
+ *
+ * Allocate the buffer head for all control queues, and if it's a receive
+ * queue, allocate DMA buffers
+ */
+static int idpf_ctlq_alloc_bufs(struct idpf_hw *hw,
+ struct idpf_ctlq_info *cq)
+{
+ int i;
+
+ /* Do not allocate DMA buffers for transmit queues */
+ if (cq->cq_type == IDPF_CTLQ_TYPE_MAILBOX_TX)
+ return 0;
+
+ /* We'll be allocating the buffer info memory first, then we can
+ * allocate the mapped buffers for the event processing
+ */
+ cq->bi.rx_buff = kcalloc(cq->ring_size, sizeof(struct idpf_dma_mem *),
+ GFP_KERNEL);
+ if (!cq->bi.rx_buff)
+ return -ENOMEM;
+
+ /* allocate the mapped buffers (except for the last one) */
+ for (i = 0; i < cq->ring_size - 1; i++) {
+ struct idpf_dma_mem *bi;
+ int num = 1; /* number of idpf_dma_mem to be allocated */
+
+ cq->bi.rx_buff[i] = kcalloc(num, sizeof(struct idpf_dma_mem),
+ GFP_KERNEL);
+ if (!cq->bi.rx_buff[i])
+ goto unwind_alloc_cq_bufs;
+
+ bi = cq->bi.rx_buff[i];
+
+ bi->va = idpf_alloc_dma_mem(hw, bi, cq->buf_size);
+ if (!bi->va) {
+ /* unwind will not free the failed entry */
+ kfree(cq->bi.rx_buff[i]);
+ goto unwind_alloc_cq_bufs;
+ }
+ }
+
+ return 0;
+
+unwind_alloc_cq_bufs:
+ /* don't try to free the one that failed... */
+ i--;
+ for (; i >= 0; i--) {
+ idpf_free_dma_mem(hw, cq->bi.rx_buff[i]);
+ kfree(cq->bi.rx_buff[i]);
+ }
+ kfree(cq->bi.rx_buff);
+
+ return -ENOMEM;
+}
+
+/**
+ * idpf_ctlq_free_desc_ring - Free Control Queue (CQ) rings
+ * @hw: pointer to hw struct
+ * @cq: pointer to the specific Control queue
+ *
+ * This assumes the posted send buffers have already been cleaned
+ * and de-allocated
+ */
+static void idpf_ctlq_free_desc_ring(struct idpf_hw *hw,
+ struct idpf_ctlq_info *cq)
+{
+ idpf_free_dma_mem(hw, &cq->desc_ring);
+}
+
+/**
+ * idpf_ctlq_free_bufs - Free CQ buffer info elements
+ * @hw: pointer to hw struct
+ * @cq: pointer to the specific Control queue
+ *
+ * Free the DMA buffers for RX queues, and DMA buffer header for both RX and TX
+ * queues. The upper layers are expected to manage freeing of TX DMA buffers
+ */
+static void idpf_ctlq_free_bufs(struct idpf_hw *hw, struct idpf_ctlq_info *cq)
+{
+ void *bi;
+
+ if (cq->cq_type == IDPF_CTLQ_TYPE_MAILBOX_RX) {
+ int i;
+
+ /* free DMA buffers for rx queues*/
+ for (i = 0; i < cq->ring_size; i++) {
+ if (cq->bi.rx_buff[i]) {
+ idpf_free_dma_mem(hw, cq->bi.rx_buff[i]);
+ kfree(cq->bi.rx_buff[i]);
+ }
+ }
+
+ bi = (void *)cq->bi.rx_buff;
+ } else {
+ bi = (void *)cq->bi.tx_msg;
+ }
+
+ /* free the buffer header */
+ kfree(bi);
+}
+
+/**
+ * idpf_ctlq_dealloc_ring_res - Free memory allocated for control queue
+ * @hw: pointer to hw struct
+ * @cq: pointer to the specific Control queue
+ *
+ * Free the memory used by the ring, buffers and other related structures
+ */
+void idpf_ctlq_dealloc_ring_res(struct idpf_hw *hw, struct idpf_ctlq_info *cq)
+{
+ /* free ring buffers and the ring itself */
+ idpf_ctlq_free_bufs(hw, cq);
+ idpf_ctlq_free_desc_ring(hw, cq);
+}
+
+/**
+ * idpf_ctlq_alloc_ring_res - allocate memory for descriptor ring and bufs
+ * @hw: pointer to hw struct
+ * @cq: pointer to control queue struct
+ *
+ * Do *NOT* hold cq_lock when calling this as the memory allocation routines
+ * called are not going to be atomic context safe
+ */
+int idpf_ctlq_alloc_ring_res(struct idpf_hw *hw, struct idpf_ctlq_info *cq)
+{
+ int err;
+
+ /* allocate the ring memory */
+ err = idpf_ctlq_alloc_desc_ring(hw, cq);
+ if (err)
+ return err;
+
+ /* allocate buffers in the rings */
+ err = idpf_ctlq_alloc_bufs(hw, cq);
+ if (err)
+ goto idpf_init_cq_free_ring;
+
+ /* success! */
+ return 0;
+
+idpf_init_cq_free_ring:
+ idpf_free_dma_mem(hw, &cq->desc_ring);
+
+ return err;
+}
diff --git a/drivers/net/ethernet/intel/idpf/idpf_dev.c b/drivers/net/ethernet/intel/idpf/idpf_dev.c
new file mode 100644
index 000000000000..34ad1ac46b78
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/idpf_dev.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2023 Intel Corporation */
+
+#include "idpf.h"
+#include "idpf_lan_pf_regs.h"
+
+#define IDPF_PF_ITR_IDX_SPACING 0x4
+
+/**
+ * idpf_ctlq_reg_init - initialize default mailbox registers
+ * @cq: pointer to the array of create control queues
+ */
+static void idpf_ctlq_reg_init(struct idpf_ctlq_create_info *cq)
+{
+ int i;
+
+ for (i = 0; i < IDPF_NUM_DFLT_MBX_Q; i++) {
+ struct idpf_ctlq_create_info *ccq = cq + i;
+
+ switch (ccq->type) {
+ case IDPF_CTLQ_TYPE_MAILBOX_TX:
+ /* set head and tail registers in our local struct */
+ ccq->reg.head = PF_FW_ATQH;
+ ccq->reg.tail = PF_FW_ATQT;
+ ccq->reg.len = PF_FW_ATQLEN;
+ ccq->reg.bah = PF_FW_ATQBAH;
+ ccq->reg.bal = PF_FW_ATQBAL;
+ ccq->reg.len_mask = PF_FW_ATQLEN_ATQLEN_M;
+ ccq->reg.len_ena_mask = PF_FW_ATQLEN_ATQENABLE_M;
+ ccq->reg.head_mask = PF_FW_ATQH_ATQH_M;
+ break;
+ case IDPF_CTLQ_TYPE_MAILBOX_RX:
+ /* set head and tail registers in our local struct */
+ ccq->reg.head = PF_FW_ARQH;
+ ccq->reg.tail = PF_FW_ARQT;
+ ccq->reg.len = PF_FW_ARQLEN;
+ ccq->reg.bah = PF_FW_ARQBAH;
+ ccq->reg.bal = PF_FW_ARQBAL;
+ ccq->reg.len_mask = PF_FW_ARQLEN_ARQLEN_M;
+ ccq->reg.len_ena_mask = PF_FW_ARQLEN_ARQENABLE_M;
+ ccq->reg.head_mask = PF_FW_ARQH_ARQH_M;
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+/**
+ * idpf_mb_intr_reg_init - Initialize mailbox interrupt register
+ * @adapter: adapter structure
+ */
+static void idpf_mb_intr_reg_init(struct idpf_adapter *adapter)
+{
+ struct idpf_intr_reg *intr = &adapter->mb_vector.intr_reg;
+ u32 dyn_ctl = le32_to_cpu(adapter->caps.mailbox_dyn_ctl);
+
+ intr->dyn_ctl = idpf_get_reg_addr(adapter, dyn_ctl);
+ intr->dyn_ctl_intena_m = PF_GLINT_DYN_CTL_INTENA_M;
+ intr->dyn_ctl_itridx_m = PF_GLINT_DYN_CTL_ITR_INDX_M;
+ intr->icr_ena = idpf_get_reg_addr(adapter, PF_INT_DIR_OICR_ENA);
+ intr->icr_ena_ctlq_m = PF_INT_DIR_OICR_ENA_M;
+}
+
+/**
+ * idpf_intr_reg_init - Initialize interrupt registers
+ * @vport: virtual port structure
+ */
+static int idpf_intr_reg_init(struct idpf_vport *vport)
+{
+ struct idpf_adapter *adapter = vport->adapter;
+ int num_vecs = vport->num_q_vectors;
+ struct idpf_vec_regs *reg_vals;
+ int num_regs, i, err = 0;
+ u32 rx_itr, tx_itr;
+ u16 total_vecs;
+
+ total_vecs = idpf_get_reserved_vecs(vport->adapter);
+ reg_vals = kcalloc(total_vecs, sizeof(struct idpf_vec_regs),
+ GFP_KERNEL);
+ if (!reg_vals)
+ return -ENOMEM;
+
+ num_regs = idpf_get_reg_intr_vecs(vport, reg_vals);
+ if (num_regs < num_vecs) {
+ err = -EINVAL;
+ goto free_reg_vals;
+ }
+
+ for (i = 0; i < num_vecs; i++) {
+ struct idpf_q_vector *q_vector = &vport->q_vectors[i];
+ u16 vec_id = vport->q_vector_idxs[i] - IDPF_MBX_Q_VEC;
+ struct idpf_intr_reg *intr = &q_vector->intr_reg;
+ u32 spacing;
+
+ intr->dyn_ctl = idpf_get_reg_addr(adapter,
+ reg_vals[vec_id].dyn_ctl_reg);
+ intr->dyn_ctl_intena_m = PF_GLINT_DYN_CTL_INTENA_M;
+ intr->dyn_ctl_itridx_s = PF_GLINT_DYN_CTL_ITR_INDX_S;
+ intr->dyn_ctl_intrvl_s = PF_GLINT_DYN_CTL_INTERVAL_S;
+
+ spacing = IDPF_ITR_IDX_SPACING(reg_vals[vec_id].itrn_index_spacing,
+ IDPF_PF_ITR_IDX_SPACING);
+ rx_itr = PF_GLINT_ITR_ADDR(VIRTCHNL2_ITR_IDX_0,
+ reg_vals[vec_id].itrn_reg,
+ spacing);
+ tx_itr = PF_GLINT_ITR_ADDR(VIRTCHNL2_ITR_IDX_1,
+ reg_vals[vec_id].itrn_reg,
+ spacing);
+ intr->rx_itr = idpf_get_reg_addr(adapter, rx_itr);
+ intr->tx_itr = idpf_get_reg_addr(adapter, tx_itr);
+ }
+
+free_reg_vals:
+ kfree(reg_vals);
+
+ return err;
+}
+
+/**
+ * idpf_reset_reg_init - Initialize reset registers
+ * @adapter: Driver specific private structure
+ */
+static void idpf_reset_reg_init(struct idpf_adapter *adapter)
+{
+ adapter->reset_reg.rstat = idpf_get_reg_addr(adapter, PFGEN_RSTAT);
+ adapter->reset_reg.rstat_m = PFGEN_RSTAT_PFR_STATE_M;
+}
+
+/**
+ * idpf_trigger_reset - trigger reset
+ * @adapter: Driver specific private structure
+ * @trig_cause: Reason to trigger a reset
+ */
+static void idpf_trigger_reset(struct idpf_adapter *adapter,
+ enum idpf_flags __always_unused trig_cause)
+{
+ u32 reset_reg;
+
+ reset_reg = readl(idpf_get_reg_addr(adapter, PFGEN_CTRL));
+ writel(reset_reg | PFGEN_CTRL_PFSWR,
+ idpf_get_reg_addr(adapter, PFGEN_CTRL));
+}
+
+/**
+ * idpf_reg_ops_init - Initialize register API function pointers
+ * @adapter: Driver specific private structure
+ */
+static void idpf_reg_ops_init(struct idpf_adapter *adapter)
+{
+ adapter->dev_ops.reg_ops.ctlq_reg_init = idpf_ctlq_reg_init;
+ adapter->dev_ops.reg_ops.intr_reg_init = idpf_intr_reg_init;
+ adapter->dev_ops.reg_ops.mb_intr_reg_init = idpf_mb_intr_reg_init;
+ adapter->dev_ops.reg_ops.reset_reg_init = idpf_reset_reg_init;
+ adapter->dev_ops.reg_ops.trigger_reset = idpf_trigger_reset;
+}
+
+/**
+ * idpf_dev_ops_init - Initialize device API function pointers
+ * @adapter: Driver specific private structure
+ */
+void idpf_dev_ops_init(struct idpf_adapter *adapter)
+{
+ idpf_reg_ops_init(adapter);
+}
diff --git a/drivers/net/ethernet/intel/idpf/idpf_devids.h b/drivers/net/ethernet/intel/idpf/idpf_devids.h
new file mode 100644
index 000000000000..5154a52ae61c
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/idpf_devids.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2023 Intel Corporation */
+
+#ifndef _IDPF_DEVIDS_H_
+#define _IDPF_DEVIDS_H_
+
+#define IDPF_DEV_ID_PF 0x1452
+#define IDPF_DEV_ID_VF 0x145C
+
+#endif /* _IDPF_DEVIDS_H_ */
diff --git a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c
new file mode 100644
index 000000000000..52ea38669f85
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c
@@ -0,0 +1,1369 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2023 Intel Corporation */
+
+#include "idpf.h"
+
+/**
+ * idpf_get_rxnfc - command to get RX flow classification rules
+ * @netdev: network interface device structure
+ * @cmd: ethtool rxnfc command
+ * @rule_locs: pointer to store rule locations
+ *
+ * Returns Success if the command is supported.
+ */
+static int idpf_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
+ u32 __always_unused *rule_locs)
+{
+ struct idpf_vport *vport;
+
+ idpf_vport_ctrl_lock(netdev);
+ vport = idpf_netdev_to_vport(netdev);
+
+ switch (cmd->cmd) {
+ case ETHTOOL_GRXRINGS:
+ cmd->data = vport->num_rxq;
+ idpf_vport_ctrl_unlock(netdev);
+
+ return 0;
+ default:
+ break;
+ }
+
+ idpf_vport_ctrl_unlock(netdev);
+
+ return -EOPNOTSUPP;
+}
+
+/**
+ * idpf_get_rxfh_key_size - get the RSS hash key size
+ * @netdev: network interface device structure
+ *
+ * Returns the key size on success, error value on failure.
+ */
+static u32 idpf_get_rxfh_key_size(struct net_device *netdev)
+{
+ struct idpf_netdev_priv *np = netdev_priv(netdev);
+ struct idpf_vport_user_config_data *user_config;
+
+ if (!idpf_is_cap_ena_all(np->adapter, IDPF_RSS_CAPS, IDPF_CAP_RSS))
+ return -EOPNOTSUPP;
+
+ user_config = &np->adapter->vport_config[np->vport_idx]->user_config;
+
+ return user_config->rss_data.rss_key_size;
+}
+
+/**
+ * idpf_get_rxfh_indir_size - get the rx flow hash indirection table size
+ * @netdev: network interface device structure
+ *
+ * Returns the table size on success, error value on failure.
+ */
+static u32 idpf_get_rxfh_indir_size(struct net_device *netdev)
+{
+ struct idpf_netdev_priv *np = netdev_priv(netdev);
+ struct idpf_vport_user_config_data *user_config;
+
+ if (!idpf_is_cap_ena_all(np->adapter, IDPF_RSS_CAPS, IDPF_CAP_RSS))
+ return -EOPNOTSUPP;
+
+ user_config = &np->adapter->vport_config[np->vport_idx]->user_config;
+
+ return user_config->rss_data.rss_lut_size;
+}
+
+/**
+ * idpf_get_rxfh - get the rx flow hash indirection table
+ * @netdev: network interface device structure
+ * @indir: indirection table
+ * @key: hash key
+ * @hfunc: hash function in use
+ *
+ * Reads the indirection table directly from the hardware. Always returns 0.
+ */
+static int idpf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
+ u8 *hfunc)
+{
+ struct idpf_netdev_priv *np = netdev_priv(netdev);
+ struct idpf_rss_data *rss_data;
+ struct idpf_adapter *adapter;
+ int err = 0;
+ u16 i;
+
+ idpf_vport_ctrl_lock(netdev);
+
+ adapter = np->adapter;
+
+ if (!idpf_is_cap_ena_all(adapter, IDPF_RSS_CAPS, IDPF_CAP_RSS)) {
+ err = -EOPNOTSUPP;
+ goto unlock_mutex;
+ }
+
+ rss_data = &adapter->vport_config[np->vport_idx]->user_config.rss_data;
+ if (np->state != __IDPF_VPORT_UP)
+ goto unlock_mutex;
+
+ if (hfunc)
+ *hfunc = ETH_RSS_HASH_TOP;
+
+ if (key)
+ memcpy(key, rss_data->rss_key, rss_data->rss_key_size);
+
+ if (indir) {
+ for (i = 0; i < rss_data->rss_lut_size; i++)
+ indir[i] = rss_data->rss_lut[i];
+ }
+
+unlock_mutex:
+ idpf_vport_ctrl_unlock(netdev);
+
+ return err;
+}
+
+/**
+ * idpf_set_rxfh - set the rx flow hash indirection table
+ * @netdev: network interface device structure
+ * @indir: indirection table
+ * @key: hash key
+ * @hfunc: hash function to use
+ *
+ * Returns -EINVAL if the table specifies an invalid queue id, otherwise
+ * returns 0 after programming the table.
+ */
+static int idpf_set_rxfh(struct net_device *netdev, const u32 *indir,
+ const u8 *key, const u8 hfunc)
+{
+ struct idpf_netdev_priv *np = netdev_priv(netdev);
+ struct idpf_rss_data *rss_data;
+ struct idpf_adapter *adapter;
+ struct idpf_vport *vport;
+ int err = 0;
+ u16 lut;
+
+ idpf_vport_ctrl_lock(netdev);
+ vport = idpf_netdev_to_vport(netdev);
+
+ adapter = vport->adapter;
+
+ if (!idpf_is_cap_ena_all(adapter, IDPF_RSS_CAPS, IDPF_CAP_RSS)) {
+ err = -EOPNOTSUPP;
+ goto unlock_mutex;
+ }
+
+ rss_data = &adapter->vport_config[vport->idx]->user_config.rss_data;
+ if (np->state != __IDPF_VPORT_UP)
+ goto unlock_mutex;
+
+ if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) {
+ err = -EOPNOTSUPP;
+ goto unlock_mutex;
+ }
+
+ if (key)
+ memcpy(rss_data->rss_key, key, rss_data->rss_key_size);
+
+ if (indir) {
+ for (lut = 0; lut < rss_data->rss_lut_size; lut++)
+ rss_data->rss_lut[lut] = indir[lut];
+ }
+
+ err = idpf_config_rss(vport);
+
+unlock_mutex:
+ idpf_vport_ctrl_unlock(netdev);
+
+ return err;
+}
+
+/**
+ * idpf_get_channels: get the number of channels supported by the device
+ * @netdev: network interface device structure
+ * @ch: channel information structure
+ *
+ * Report maximum of TX and RX. Report one extra channel to match our MailBox
+ * Queue.
+ */
+static void idpf_get_channels(struct net_device *netdev,
+ struct ethtool_channels *ch)
+{
+ struct idpf_netdev_priv *np = netdev_priv(netdev);
+ struct idpf_vport_config *vport_config;
+ u16 num_txq, num_rxq;
+ u16 combined;
+
+ vport_config = np->adapter->vport_config[np->vport_idx];
+
+ num_txq = vport_config->user_config.num_req_tx_qs;
+ num_rxq = vport_config->user_config.num_req_rx_qs;
+
+ combined = min(num_txq, num_rxq);
+
+ /* Report maximum channels */
+ ch->max_combined = min_t(u16, vport_config->max_q.max_txq,
+ vport_config->max_q.max_rxq);
+ ch->max_rx = vport_config->max_q.max_rxq;
+ ch->max_tx = vport_config->max_q.max_txq;
+
+ ch->max_other = IDPF_MAX_MBXQ;
+ ch->other_count = IDPF_MAX_MBXQ;
+
+ ch->combined_count = combined;
+ ch->rx_count = num_rxq - combined;
+ ch->tx_count = num_txq - combined;
+}
+
+/**
+ * idpf_set_channels: set the new channel count
+ * @netdev: network interface device structure
+ * @ch: channel information structure
+ *
+ * Negotiate a new number of channels with CP. Returns 0 on success, negative
+ * on failure.
+ */
+static int idpf_set_channels(struct net_device *netdev,
+ struct ethtool_channels *ch)
+{
+ struct idpf_vport_config *vport_config;
+ u16 combined, num_txq, num_rxq;
+ unsigned int num_req_tx_q;
+ unsigned int num_req_rx_q;
+ struct idpf_vport *vport;
+ struct device *dev;
+ int err = 0;
+ u16 idx;
+
+ idpf_vport_ctrl_lock(netdev);
+ vport = idpf_netdev_to_vport(netdev);
+
+ idx = vport->idx;
+ vport_config = vport->adapter->vport_config[idx];
+
+ num_txq = vport_config->user_config.num_req_tx_qs;
+ num_rxq = vport_config->user_config.num_req_rx_qs;
+
+ combined = min(num_txq, num_rxq);
+
+ /* these checks are for cases where user didn't specify a particular
+ * value on cmd line but we get non-zero value anyway via
+ * get_channels(); look at ethtool.c in ethtool repository (the user
+ * space part), particularly, do_schannels() routine
+ */
+ if (ch->combined_count == combined)
+ ch->combined_count = 0;
+ if (ch->combined_count && ch->rx_count == num_rxq - combined)
+ ch->rx_count = 0;
+ if (ch->combined_count && ch->tx_count == num_txq - combined)
+ ch->tx_count = 0;
+
+ num_req_tx_q = ch->combined_count + ch->tx_count;
+ num_req_rx_q = ch->combined_count + ch->rx_count;
+
+ dev = &vport->adapter->pdev->dev;
+ /* It's possible to specify number of queues that exceeds max.
+ * Stack checks max combined_count and max [tx|rx]_count but not the
+ * max combined_count + [tx|rx]_count. These checks should catch that.
+ */
+ if (num_req_tx_q > vport_config->max_q.max_txq) {
+ dev_info(dev, "Maximum TX queues is %d\n",
+ vport_config->max_q.max_txq);
+ err = -EINVAL;
+ goto unlock_mutex;
+ }
+ if (num_req_rx_q > vport_config->max_q.max_rxq) {
+ dev_info(dev, "Maximum RX queues is %d\n",
+ vport_config->max_q.max_rxq);
+ err = -EINVAL;
+ goto unlock_mutex;
+ }
+
+ if (num_req_tx_q == num_txq && num_req_rx_q == num_rxq)
+ goto unlock_mutex;
+
+ vport_config->user_config.num_req_tx_qs = num_req_tx_q;
+ vport_config->user_config.num_req_rx_qs = num_req_rx_q;
+
+ err = idpf_initiate_soft_reset(vport, IDPF_SR_Q_CHANGE);
+ if (err) {
+ /* roll back queue change */
+ vport_config->user_config.num_req_tx_qs = num_txq;
+ vport_config->user_config.num_req_rx_qs = num_rxq;
+ }
+
+unlock_mutex:
+ idpf_vport_ctrl_unlock(netdev);
+
+ return err;
+}
+
+/**
+ * idpf_get_ringparam - Get ring parameters
+ * @netdev: network interface device structure
+ * @ring: ethtool ringparam structure
+ * @kring: unused
+ * @ext_ack: unused
+ *
+ * Returns current ring parameters. TX and RX rings are reported separately,
+ * but the number of rings is not reported.
+ */
+static void idpf_get_ringparam(struct net_device *netdev,
+ struct ethtool_ringparam *ring,
+ struct kernel_ethtool_ringparam *kring,
+ struct netlink_ext_ack *ext_ack)
+{
+ struct idpf_vport *vport;
+
+ idpf_vport_ctrl_lock(netdev);
+ vport = idpf_netdev_to_vport(netdev);
+
+ ring->rx_max_pending = IDPF_MAX_RXQ_DESC;
+ ring->tx_max_pending = IDPF_MAX_TXQ_DESC;
+ ring->rx_pending = vport->rxq_desc_count;
+ ring->tx_pending = vport->txq_desc_count;
+
+ idpf_vport_ctrl_unlock(netdev);
+}
+
+/**
+ * idpf_set_ringparam - Set ring parameters
+ * @netdev: network interface device structure
+ * @ring: ethtool ringparam structure
+ * @kring: unused
+ * @ext_ack: unused
+ *
+ * Sets ring parameters. TX and RX rings are controlled separately, but the
+ * number of rings is not specified, so all rings get the same settings.
+ */
+static int idpf_set_ringparam(struct net_device *netdev,
+ struct ethtool_ringparam *ring,
+ struct kernel_ethtool_ringparam *kring,
+ struct netlink_ext_ack *ext_ack)
+{
+ struct idpf_vport_user_config_data *config_data;
+ u32 new_rx_count, new_tx_count;
+ struct idpf_vport *vport;
+ int i, err = 0;
+ u16 idx;
+
+ idpf_vport_ctrl_lock(netdev);
+ vport = idpf_netdev_to_vport(netdev);
+
+ idx = vport->idx;
+
+ if (ring->tx_pending < IDPF_MIN_TXQ_DESC) {
+ netdev_err(netdev, "Descriptors requested (Tx: %u) is less than min supported (%u)\n",
+ ring->tx_pending,
+ IDPF_MIN_TXQ_DESC);
+ err = -EINVAL;
+ goto unlock_mutex;
+ }
+
+ if (ring->rx_pending < IDPF_MIN_RXQ_DESC) {
+ netdev_err(netdev, "Descriptors requested (Rx: %u) is less than min supported (%u)\n",
+ ring->rx_pending,
+ IDPF_MIN_RXQ_DESC);
+ err = -EINVAL;
+ goto unlock_mutex;
+ }
+
+ new_rx_count = ALIGN(ring->rx_pending, IDPF_REQ_RXQ_DESC_MULTIPLE);
+ if (new_rx_count != ring->rx_pending)
+ netdev_info(netdev, "Requested Rx descriptor count rounded up to %u\n",
+ new_rx_count);
+
+ new_tx_count = ALIGN(ring->tx_pending, IDPF_REQ_DESC_MULTIPLE);
+ if (new_tx_count != ring->tx_pending)
+ netdev_info(netdev, "Requested Tx descriptor count rounded up to %u\n",
+ new_tx_count);
+
+ if (new_tx_count == vport->txq_desc_count &&
+ new_rx_count == vport->rxq_desc_count)
+ goto unlock_mutex;
+
+ config_data = &vport->adapter->vport_config[idx]->user_config;
+ config_data->num_req_txq_desc = new_tx_count;
+ config_data->num_req_rxq_desc = new_rx_count;
+
+ /* Since we adjusted the RX completion queue count, the RX buffer queue
+ * descriptor count needs to be adjusted as well
+ */
+ for (i = 0; i < vport->num_bufqs_per_qgrp; i++)
+ vport->bufq_desc_count[i] =
+ IDPF_RX_BUFQ_DESC_COUNT(new_rx_count,
+ vport->num_bufqs_per_qgrp);
+
+ err = idpf_initiate_soft_reset(vport, IDPF_SR_Q_DESC_CHANGE);
+
+unlock_mutex:
+ idpf_vport_ctrl_unlock(netdev);
+
+ return err;
+}
+
+/**
+ * struct idpf_stats - definition for an ethtool statistic
+ * @stat_string: statistic name to display in ethtool -S output
+ * @sizeof_stat: the sizeof() the stat, must be no greater than sizeof(u64)
+ * @stat_offset: offsetof() the stat from a base pointer
+ *
+ * This structure defines a statistic to be added to the ethtool stats buffer.
+ * It defines a statistic as offset from a common base pointer. Stats should
+ * be defined in constant arrays using the IDPF_STAT macro, with every element
+ * of the array using the same _type for calculating the sizeof_stat and
+ * stat_offset.
+ *
+ * The @sizeof_stat is expected to be sizeof(u8), sizeof(u16), sizeof(u32) or
+ * sizeof(u64). Other sizes are not expected and will produce a WARN_ONCE from
+ * the idpf_add_ethtool_stat() helper function.
+ *
+ * The @stat_string is interpreted as a format string, allowing formatted
+ * values to be inserted while looping over multiple structures for a given
+ * statistics array. Thus, every statistic string in an array should have the
+ * same type and number of format specifiers, to be formatted by variadic
+ * arguments to the idpf_add_stat_string() helper function.
+ */
+struct idpf_stats {
+ char stat_string[ETH_GSTRING_LEN];
+ int sizeof_stat;
+ int stat_offset;
+};
+
+/* Helper macro to define an idpf_stat structure with proper size and type.
+ * Use this when defining constant statistics arrays. Note that @_type expects
+ * only a type name and is used multiple times.
+ */
+#define IDPF_STAT(_type, _name, _stat) { \
+ .stat_string = _name, \
+ .sizeof_stat = sizeof_field(_type, _stat), \
+ .stat_offset = offsetof(_type, _stat) \
+}
+
+/* Helper macro for defining some statistics related to queues */
+#define IDPF_QUEUE_STAT(_name, _stat) \
+ IDPF_STAT(struct idpf_queue, _name, _stat)
+
+/* Stats associated with a Tx queue */
+static const struct idpf_stats idpf_gstrings_tx_queue_stats[] = {
+ IDPF_QUEUE_STAT("pkts", q_stats.tx.packets),
+ IDPF_QUEUE_STAT("bytes", q_stats.tx.bytes),
+ IDPF_QUEUE_STAT("lso_pkts", q_stats.tx.lso_pkts),
+};
+
+/* Stats associated with an Rx queue */
+static const struct idpf_stats idpf_gstrings_rx_queue_stats[] = {
+ IDPF_QUEUE_STAT("pkts", q_stats.rx.packets),
+ IDPF_QUEUE_STAT("bytes", q_stats.rx.bytes),
+ IDPF_QUEUE_STAT("rx_gro_hw_pkts", q_stats.rx.rsc_pkts),
+};
+
+#define IDPF_TX_QUEUE_STATS_LEN ARRAY_SIZE(idpf_gstrings_tx_queue_stats)
+#define IDPF_RX_QUEUE_STATS_LEN ARRAY_SIZE(idpf_gstrings_rx_queue_stats)
+
+#define IDPF_PORT_STAT(_name, _stat) \
+ IDPF_STAT(struct idpf_vport, _name, _stat)
+
+static const struct idpf_stats idpf_gstrings_port_stats[] = {
+ IDPF_PORT_STAT("rx-csum_errors", port_stats.rx_hw_csum_err),
+ IDPF_PORT_STAT("rx-hsplit", port_stats.rx_hsplit),
+ IDPF_PORT_STAT("rx-hsplit_hbo", port_stats.rx_hsplit_hbo),
+ IDPF_PORT_STAT("rx-bad_descs", port_stats.rx_bad_descs),
+ IDPF_PORT_STAT("tx-skb_drops", port_stats.tx_drops),
+ IDPF_PORT_STAT("tx-dma_map_errs", port_stats.tx_dma_map_errs),
+ IDPF_PORT_STAT("tx-linearized_pkts", port_stats.tx_linearize),
+ IDPF_PORT_STAT("tx-busy_events", port_stats.tx_busy),
+ IDPF_PORT_STAT("rx-unicast_pkts", port_stats.vport_stats.rx_unicast),
+ IDPF_PORT_STAT("rx-multicast_pkts", port_stats.vport_stats.rx_multicast),
+ IDPF_PORT_STAT("rx-broadcast_pkts", port_stats.vport_stats.rx_broadcast),
+ IDPF_PORT_STAT("rx-unknown_protocol", port_stats.vport_stats.rx_unknown_protocol),
+ IDPF_PORT_STAT("tx-unicast_pkts", port_stats.vport_stats.tx_unicast),
+ IDPF_PORT_STAT("tx-multicast_pkts", port_stats.vport_stats.tx_multicast),
+ IDPF_PORT_STAT("tx-broadcast_pkts", port_stats.vport_stats.tx_broadcast),
+};
+
+#define IDPF_PORT_STATS_LEN ARRAY_SIZE(idpf_gstrings_port_stats)
+
+/**
+ * __idpf_add_qstat_strings - copy stat strings into ethtool buffer
+ * @p: ethtool supplied buffer
+ * @stats: stat definitions array
+ * @size: size of the stats array
+ * @type: stat type
+ * @idx: stat index
+ *
+ * Format and copy the strings described by stats into the buffer pointed at
+ * by p.
+ */
+static void __idpf_add_qstat_strings(u8 **p, const struct idpf_stats *stats,
+ const unsigned int size, const char *type,
+ unsigned int idx)
+{
+ unsigned int i;
+
+ for (i = 0; i < size; i++)
+ ethtool_sprintf(p, "%s_q-%u_%s",
+ type, idx, stats[i].stat_string);
+}
+
+/**
+ * idpf_add_qstat_strings - Copy queue stat strings into ethtool buffer
+ * @p: ethtool supplied buffer
+ * @stats: stat definitions array
+ * @type: stat type
+ * @idx: stat idx
+ *
+ * Format and copy the strings described by the const static stats value into
+ * the buffer pointed at by p.
+ *
+ * The parameter @stats is evaluated twice, so parameters with side effects
+ * should be avoided. Additionally, stats must be an array such that
+ * ARRAY_SIZE can be called on it.
+ */
+#define idpf_add_qstat_strings(p, stats, type, idx) \
+ __idpf_add_qstat_strings(p, stats, ARRAY_SIZE(stats), type, idx)
+
+/**
+ * idpf_add_stat_strings - Copy port stat strings into ethtool buffer
+ * @p: ethtool buffer
+ * @stats: struct to copy from
+ * @size: size of stats array to copy from
+ */
+static void idpf_add_stat_strings(u8 **p, const struct idpf_stats *stats,
+ const unsigned int size)
+{
+ unsigned int i;
+
+ for (i = 0; i < size; i++)
+ ethtool_sprintf(p, "%s", stats[i].stat_string);
+}
+
+/**
+ * idpf_get_stat_strings - Get stat strings
+ * @netdev: network interface device structure
+ * @data: buffer for string data
+ *
+ * Builds the statistics string table
+ */
+static void idpf_get_stat_strings(struct net_device *netdev, u8 *data)
+{
+ struct idpf_netdev_priv *np = netdev_priv(netdev);
+ struct idpf_vport_config *vport_config;
+ unsigned int i;
+
+ idpf_add_stat_strings(&data, idpf_gstrings_port_stats,
+ IDPF_PORT_STATS_LEN);
+
+ vport_config = np->adapter->vport_config[np->vport_idx];
+ /* It's critical that we always report a constant number of strings and
+ * that the strings are reported in the same order regardless of how
+ * many queues are actually in use.
+ */
+ for (i = 0; i < vport_config->max_q.max_txq; i++)
+ idpf_add_qstat_strings(&data, idpf_gstrings_tx_queue_stats,
+ "tx", i);
+
+ for (i = 0; i < vport_config->max_q.max_rxq; i++)
+ idpf_add_qstat_strings(&data, idpf_gstrings_rx_queue_stats,
+ "rx", i);
+
+ page_pool_ethtool_stats_get_strings(data);
+}
+
+/**
+ * idpf_get_strings - Get string set
+ * @netdev: network interface device structure
+ * @sset: id of string set
+ * @data: buffer for string data
+ *
+ * Builds string tables for various string sets
+ */
+static void idpf_get_strings(struct net_device *netdev, u32 sset, u8 *data)
+{
+ switch (sset) {
+ case ETH_SS_STATS:
+ idpf_get_stat_strings(netdev, data);
+ break;
+ default:
+ break;
+ }
+}
+
+/**
+ * idpf_get_sset_count - Get length of string set
+ * @netdev: network interface device structure
+ * @sset: id of string set
+ *
+ * Reports size of various string tables.
+ */
+static int idpf_get_sset_count(struct net_device *netdev, int sset)
+{
+ struct idpf_netdev_priv *np = netdev_priv(netdev);
+ struct idpf_vport_config *vport_config;
+ u16 max_txq, max_rxq;
+ unsigned int size;
+
+ if (sset != ETH_SS_STATS)
+ return -EINVAL;
+
+ vport_config = np->adapter->vport_config[np->vport_idx];
+ /* This size reported back here *must* be constant throughout the
+ * lifecycle of the netdevice, i.e. we must report the maximum length
+ * even for queues that don't technically exist. This is due to the
+ * fact that this userspace API uses three separate ioctl calls to get
+ * stats data but has no way to communicate back to userspace when that
+ * size has changed, which can typically happen as a result of changing
+ * number of queues. If the number/order of stats change in the middle
+ * of this call chain it will lead to userspace crashing/accessing bad
+ * data through buffer under/overflow.
+ */
+ max_txq = vport_config->max_q.max_txq;
+ max_rxq = vport_config->max_q.max_rxq;
+
+ size = IDPF_PORT_STATS_LEN + (IDPF_TX_QUEUE_STATS_LEN * max_txq) +
+ (IDPF_RX_QUEUE_STATS_LEN * max_rxq);
+ size += page_pool_ethtool_stats_get_count();
+
+ return size;
+}
+
+/**
+ * idpf_add_one_ethtool_stat - copy the stat into the supplied buffer
+ * @data: location to store the stat value
+ * @pstat: old stat pointer to copy from
+ * @stat: the stat definition
+ *
+ * Copies the stat data defined by the pointer and stat structure pair into
+ * the memory supplied as data. If the pointer is null, data will be zero'd.
+ */
+static void idpf_add_one_ethtool_stat(u64 *data, void *pstat,
+ const struct idpf_stats *stat)
+{
+ char *p;
+
+ if (!pstat) {
+ /* Ensure that the ethtool data buffer is zero'd for any stats
+ * which don't have a valid pointer.
+ */
+ *data = 0;
+ return;
+ }
+
+ p = (char *)pstat + stat->stat_offset;
+ switch (stat->sizeof_stat) {
+ case sizeof(u64):
+ *data = *((u64 *)p);
+ break;
+ case sizeof(u32):
+ *data = *((u32 *)p);
+ break;
+ case sizeof(u16):
+ *data = *((u16 *)p);
+ break;
+ case sizeof(u8):
+ *data = *((u8 *)p);
+ break;
+ default:
+ WARN_ONCE(1, "unexpected stat size for %s",
+ stat->stat_string);
+ *data = 0;
+ }
+}
+
+/**
+ * idpf_add_queue_stats - copy queue statistics into supplied buffer
+ * @data: ethtool stats buffer
+ * @q: the queue to copy
+ *
+ * Queue statistics must be copied while protected by u64_stats_fetch_begin,
+ * so we can't directly use idpf_add_ethtool_stats. Assumes that queue stats
+ * are defined in idpf_gstrings_queue_stats. If the queue pointer is null,
+ * zero out the queue stat values and update the data pointer. Otherwise
+ * safely copy the stats from the queue into the supplied buffer and update
+ * the data pointer when finished.
+ *
+ * This function expects to be called while under rcu_read_lock().
+ */
+static void idpf_add_queue_stats(u64 **data, struct idpf_queue *q)
+{
+ const struct idpf_stats *stats;
+ unsigned int start;
+ unsigned int size;
+ unsigned int i;
+
+ if (q->q_type == VIRTCHNL2_QUEUE_TYPE_RX) {
+ size = IDPF_RX_QUEUE_STATS_LEN;
+ stats = idpf_gstrings_rx_queue_stats;
+ } else {
+ size = IDPF_TX_QUEUE_STATS_LEN;
+ stats = idpf_gstrings_tx_queue_stats;
+ }
+
+ /* To avoid invalid statistics values, ensure that we keep retrying
+ * the copy until we get a consistent value according to
+ * u64_stats_fetch_retry.
+ */
+ do {
+ start = u64_stats_fetch_begin(&q->stats_sync);
+ for (i = 0; i < size; i++)
+ idpf_add_one_ethtool_stat(&(*data)[i], q, &stats[i]);
+ } while (u64_stats_fetch_retry(&q->stats_sync, start));
+
+ /* Once we successfully copy the stats in, update the data pointer */
+ *data += size;
+}
+
+/**
+ * idpf_add_empty_queue_stats - Add stats for a non-existent queue
+ * @data: pointer to data buffer
+ * @qtype: type of data queue
+ *
+ * We must report a constant length of stats back to userspace regardless of
+ * how many queues are actually in use because stats collection happens over
+ * three separate ioctls and there's no way to notify userspace the size
+ * changed between those calls. This adds empty to data to the stats since we
+ * don't have a real queue to refer to for this stats slot.
+ */
+static void idpf_add_empty_queue_stats(u64 **data, u16 qtype)
+{
+ unsigned int i;
+ int stats_len;
+
+ if (qtype == VIRTCHNL2_QUEUE_TYPE_RX)
+ stats_len = IDPF_RX_QUEUE_STATS_LEN;
+ else
+ stats_len = IDPF_TX_QUEUE_STATS_LEN;
+
+ for (i = 0; i < stats_len; i++)
+ (*data)[i] = 0;
+ *data += stats_len;
+}
+
+/**
+ * idpf_add_port_stats - Copy port stats into ethtool buffer
+ * @vport: virtual port struct
+ * @data: ethtool buffer to copy into
+ */
+static void idpf_add_port_stats(struct idpf_vport *vport, u64 **data)
+{
+ unsigned int size = IDPF_PORT_STATS_LEN;
+ unsigned int start;
+ unsigned int i;
+
+ do {
+ start = u64_stats_fetch_begin(&vport->port_stats.stats_sync);
+ for (i = 0; i < size; i++)
+ idpf_add_one_ethtool_stat(&(*data)[i], vport,
+ &idpf_gstrings_port_stats[i]);
+ } while (u64_stats_fetch_retry(&vport->port_stats.stats_sync, start));
+
+ *data += size;
+}
+
+/**
+ * idpf_collect_queue_stats - accumulate various per queue stats
+ * into port level stats
+ * @vport: pointer to vport struct
+ **/
+static void idpf_collect_queue_stats(struct idpf_vport *vport)
+{
+ struct idpf_port_stats *pstats = &vport->port_stats;
+ int i, j;
+
+ /* zero out port stats since they're actually tracked in per
+ * queue stats; this is only for reporting
+ */
+ u64_stats_update_begin(&pstats->stats_sync);
+ u64_stats_set(&pstats->rx_hw_csum_err, 0);
+ u64_stats_set(&pstats->rx_hsplit, 0);
+ u64_stats_set(&pstats->rx_hsplit_hbo, 0);
+ u64_stats_set(&pstats->rx_bad_descs, 0);
+ u64_stats_set(&pstats->tx_linearize, 0);
+ u64_stats_set(&pstats->tx_busy, 0);
+ u64_stats_set(&pstats->tx_drops, 0);
+ u64_stats_set(&pstats->tx_dma_map_errs, 0);
+ u64_stats_update_end(&pstats->stats_sync);
+
+ for (i = 0; i < vport->num_rxq_grp; i++) {
+ struct idpf_rxq_group *rxq_grp = &vport->rxq_grps[i];
+ u16 num_rxq;
+
+ if (idpf_is_queue_model_split(vport->rxq_model))
+ num_rxq = rxq_grp->splitq.num_rxq_sets;
+ else
+ num_rxq = rxq_grp->singleq.num_rxq;
+
+ for (j = 0; j < num_rxq; j++) {
+ u64 hw_csum_err, hsplit, hsplit_hbo, bad_descs;
+ struct idpf_rx_queue_stats *stats;
+ struct idpf_queue *rxq;
+ unsigned int start;
+
+ if (idpf_is_queue_model_split(vport->rxq_model))
+ rxq = &rxq_grp->splitq.rxq_sets[j]->rxq;
+ else
+ rxq = rxq_grp->singleq.rxqs[j];
+
+ if (!rxq)
+ continue;
+
+ do {
+ start = u64_stats_fetch_begin(&rxq->stats_sync);
+
+ stats = &rxq->q_stats.rx;
+ hw_csum_err = u64_stats_read(&stats->hw_csum_err);
+ hsplit = u64_stats_read(&stats->hsplit_pkts);
+ hsplit_hbo = u64_stats_read(&stats->hsplit_buf_ovf);
+ bad_descs = u64_stats_read(&stats->bad_descs);
+ } while (u64_stats_fetch_retry(&rxq->stats_sync, start));
+
+ u64_stats_update_begin(&pstats->stats_sync);
+ u64_stats_add(&pstats->rx_hw_csum_err, hw_csum_err);
+ u64_stats_add(&pstats->rx_hsplit, hsplit);
+ u64_stats_add(&pstats->rx_hsplit_hbo, hsplit_hbo);
+ u64_stats_add(&pstats->rx_bad_descs, bad_descs);
+ u64_stats_update_end(&pstats->stats_sync);
+ }
+ }
+
+ for (i = 0; i < vport->num_txq_grp; i++) {
+ struct idpf_txq_group *txq_grp = &vport->txq_grps[i];
+
+ for (j = 0; j < txq_grp->num_txq; j++) {
+ u64 linearize, qbusy, skb_drops, dma_map_errs;
+ struct idpf_queue *txq = txq_grp->txqs[j];
+ struct idpf_tx_queue_stats *stats;
+ unsigned int start;
+
+ if (!txq)
+ continue;
+
+ do {
+ start = u64_stats_fetch_begin(&txq->stats_sync);
+
+ stats = &txq->q_stats.tx;
+ linearize = u64_stats_read(&stats->linearize);
+ qbusy = u64_stats_read(&stats->q_busy);
+ skb_drops = u64_stats_read(&stats->skb_drops);
+ dma_map_errs = u64_stats_read(&stats->dma_map_errs);
+ } while (u64_stats_fetch_retry(&txq->stats_sync, start));
+
+ u64_stats_update_begin(&pstats->stats_sync);
+ u64_stats_add(&pstats->tx_linearize, linearize);
+ u64_stats_add(&pstats->tx_busy, qbusy);
+ u64_stats_add(&pstats->tx_drops, skb_drops);
+ u64_stats_add(&pstats->tx_dma_map_errs, dma_map_errs);
+ u64_stats_update_end(&pstats->stats_sync);
+ }
+ }
+}
+
+/**
+ * idpf_get_ethtool_stats - report device statistics
+ * @netdev: network interface device structure
+ * @stats: ethtool statistics structure
+ * @data: pointer to data buffer
+ *
+ * All statistics are added to the data buffer as an array of u64.
+ */
+static void idpf_get_ethtool_stats(struct net_device *netdev,
+ struct ethtool_stats __always_unused *stats,
+ u64 *data)
+{
+ struct idpf_netdev_priv *np = netdev_priv(netdev);
+ struct idpf_vport_config *vport_config;
+ struct page_pool_stats pp_stats = { };
+ struct idpf_vport *vport;
+ unsigned int total = 0;
+ unsigned int i, j;
+ bool is_splitq;
+ u16 qtype;
+
+ idpf_vport_ctrl_lock(netdev);
+ vport = idpf_netdev_to_vport(netdev);
+
+ if (np->state != __IDPF_VPORT_UP) {
+ idpf_vport_ctrl_unlock(netdev);
+
+ return;
+ }
+
+ rcu_read_lock();
+
+ idpf_collect_queue_stats(vport);
+ idpf_add_port_stats(vport, &data);
+
+ for (i = 0; i < vport->num_txq_grp; i++) {
+ struct idpf_txq_group *txq_grp = &vport->txq_grps[i];
+
+ qtype = VIRTCHNL2_QUEUE_TYPE_TX;
+
+ for (j = 0; j < txq_grp->num_txq; j++, total++) {
+ struct idpf_queue *txq = txq_grp->txqs[j];
+
+ if (!txq)
+ idpf_add_empty_queue_stats(&data, qtype);
+ else
+ idpf_add_queue_stats(&data, txq);
+ }
+ }
+
+ vport_config = vport->adapter->vport_config[vport->idx];
+ /* It is critical we provide a constant number of stats back to
+ * userspace regardless of how many queues are actually in use because
+ * there is no way to inform userspace the size has changed between
+ * ioctl calls. This will fill in any missing stats with zero.
+ */
+ for (; total < vport_config->max_q.max_txq; total++)
+ idpf_add_empty_queue_stats(&data, VIRTCHNL2_QUEUE_TYPE_TX);
+ total = 0;
+
+ is_splitq = idpf_is_queue_model_split(vport->rxq_model);
+
+ for (i = 0; i < vport->num_rxq_grp; i++) {
+ struct idpf_rxq_group *rxq_grp = &vport->rxq_grps[i];
+ u16 num_rxq;
+
+ qtype = VIRTCHNL2_QUEUE_TYPE_RX;
+
+ if (is_splitq)
+ num_rxq = rxq_grp->splitq.num_rxq_sets;
+ else
+ num_rxq = rxq_grp->singleq.num_rxq;
+
+ for (j = 0; j < num_rxq; j++, total++) {
+ struct idpf_queue *rxq;
+
+ if (is_splitq)
+ rxq = &rxq_grp->splitq.rxq_sets[j]->rxq;
+ else
+ rxq = rxq_grp->singleq.rxqs[j];
+ if (!rxq)
+ idpf_add_empty_queue_stats(&data, qtype);
+ else
+ idpf_add_queue_stats(&data, rxq);
+
+ /* In splitq mode, don't get page pool stats here since
+ * the pools are attached to the buffer queues
+ */
+ if (is_splitq)
+ continue;
+
+ if (rxq)
+ page_pool_get_stats(rxq->pp, &pp_stats);
+ }
+ }
+
+ for (i = 0; i < vport->num_rxq_grp; i++) {
+ for (j = 0; j < vport->num_bufqs_per_qgrp; j++) {
+ struct idpf_queue *rxbufq =
+ &vport->rxq_grps[i].splitq.bufq_sets[j].bufq;
+
+ page_pool_get_stats(rxbufq->pp, &pp_stats);
+ }
+ }
+
+ for (; total < vport_config->max_q.max_rxq; total++)
+ idpf_add_empty_queue_stats(&data, VIRTCHNL2_QUEUE_TYPE_RX);
+
+ page_pool_ethtool_stats_get(data, &pp_stats);
+
+ rcu_read_unlock();
+
+ idpf_vport_ctrl_unlock(netdev);
+}
+
+/**
+ * idpf_find_rxq - find rxq from q index
+ * @vport: virtual port associated to queue
+ * @q_num: q index used to find queue
+ *
+ * returns pointer to rx queue
+ */
+static struct idpf_queue *idpf_find_rxq(struct idpf_vport *vport, int q_num)
+{
+ int q_grp, q_idx;
+
+ if (!idpf_is_queue_model_split(vport->rxq_model))
+ return vport->rxq_grps->singleq.rxqs[q_num];
+
+ q_grp = q_num / IDPF_DFLT_SPLITQ_RXQ_PER_GROUP;
+ q_idx = q_num % IDPF_DFLT_SPLITQ_RXQ_PER_GROUP;
+
+ return &vport->rxq_grps[q_grp].splitq.rxq_sets[q_idx]->rxq;
+}
+
+/**
+ * idpf_find_txq - find txq from q index
+ * @vport: virtual port associated to queue
+ * @q_num: q index used to find queue
+ *
+ * returns pointer to tx queue
+ */
+static struct idpf_queue *idpf_find_txq(struct idpf_vport *vport, int q_num)
+{
+ int q_grp;
+
+ if (!idpf_is_queue_model_split(vport->txq_model))
+ return vport->txqs[q_num];
+
+ q_grp = q_num / IDPF_DFLT_SPLITQ_TXQ_PER_GROUP;
+
+ return vport->txq_grps[q_grp].complq;
+}
+
+/**
+ * __idpf_get_q_coalesce - get ITR values for specific queue
+ * @ec: ethtool structure to fill with driver's coalesce settings
+ * @q: quuee of Rx or Tx
+ */
+static void __idpf_get_q_coalesce(struct ethtool_coalesce *ec,
+ struct idpf_queue *q)
+{
+ if (q->q_type == VIRTCHNL2_QUEUE_TYPE_RX) {
+ ec->use_adaptive_rx_coalesce =
+ IDPF_ITR_IS_DYNAMIC(q->q_vector->rx_intr_mode);
+ ec->rx_coalesce_usecs = q->q_vector->rx_itr_value;
+ } else {
+ ec->use_adaptive_tx_coalesce =
+ IDPF_ITR_IS_DYNAMIC(q->q_vector->tx_intr_mode);
+ ec->tx_coalesce_usecs = q->q_vector->tx_itr_value;
+ }
+}
+
+/**
+ * idpf_get_q_coalesce - get ITR values for specific queue
+ * @netdev: pointer to the netdev associated with this query
+ * @ec: coalesce settings to program the device with
+ * @q_num: update ITR/INTRL (coalesce) settings for this queue number/index
+ *
+ * Return 0 on success, and negative on failure
+ */
+static int idpf_get_q_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *ec,
+ u32 q_num)
+{
+ struct idpf_netdev_priv *np = netdev_priv(netdev);
+ struct idpf_vport *vport;
+ int err = 0;
+
+ idpf_vport_ctrl_lock(netdev);
+ vport = idpf_netdev_to_vport(netdev);
+
+ if (np->state != __IDPF_VPORT_UP)
+ goto unlock_mutex;
+
+ if (q_num >= vport->num_rxq && q_num >= vport->num_txq) {
+ err = -EINVAL;
+ goto unlock_mutex;
+ }
+
+ if (q_num < vport->num_rxq)
+ __idpf_get_q_coalesce(ec, idpf_find_rxq(vport, q_num));
+
+ if (q_num < vport->num_txq)
+ __idpf_get_q_coalesce(ec, idpf_find_txq(vport, q_num));
+
+unlock_mutex:
+ idpf_vport_ctrl_unlock(netdev);
+
+ return err;
+}
+
+/**
+ * idpf_get_coalesce - get ITR values as requested by user
+ * @netdev: pointer to the netdev associated with this query
+ * @ec: coalesce settings to be filled
+ * @kec: unused
+ * @extack: unused
+ *
+ * Return 0 on success, and negative on failure
+ */
+static int idpf_get_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kec,
+ struct netlink_ext_ack *extack)
+{
+ /* Return coalesce based on queue number zero */
+ return idpf_get_q_coalesce(netdev, ec, 0);
+}
+
+/**
+ * idpf_get_per_q_coalesce - get ITR values as requested by user
+ * @netdev: pointer to the netdev associated with this query
+ * @q_num: queue for which the itr values has to retrieved
+ * @ec: coalesce settings to be filled
+ *
+ * Return 0 on success, and negative on failure
+ */
+
+static int idpf_get_per_q_coalesce(struct net_device *netdev, u32 q_num,
+ struct ethtool_coalesce *ec)
+{
+ return idpf_get_q_coalesce(netdev, ec, q_num);
+}
+
+/**
+ * __idpf_set_q_coalesce - set ITR values for specific queue
+ * @ec: ethtool structure from user to update ITR settings
+ * @q: queue for which itr values has to be set
+ * @is_rxq: is queue type rx
+ *
+ * Returns 0 on success, negative otherwise.
+ */
+static int __idpf_set_q_coalesce(struct ethtool_coalesce *ec,
+ struct idpf_queue *q, bool is_rxq)
+{
+ u32 use_adaptive_coalesce, coalesce_usecs;
+ struct idpf_q_vector *qv = q->q_vector;
+ bool is_dim_ena = false;
+ u16 itr_val;
+
+ if (is_rxq) {
+ is_dim_ena = IDPF_ITR_IS_DYNAMIC(qv->rx_intr_mode);
+ use_adaptive_coalesce = ec->use_adaptive_rx_coalesce;
+ coalesce_usecs = ec->rx_coalesce_usecs;
+ itr_val = qv->rx_itr_value;
+ } else {
+ is_dim_ena = IDPF_ITR_IS_DYNAMIC(qv->tx_intr_mode);
+ use_adaptive_coalesce = ec->use_adaptive_tx_coalesce;
+ coalesce_usecs = ec->tx_coalesce_usecs;
+ itr_val = qv->tx_itr_value;
+ }
+ if (coalesce_usecs != itr_val && use_adaptive_coalesce) {
+ netdev_err(q->vport->netdev, "Cannot set coalesce usecs if adaptive enabled\n");
+
+ return -EINVAL;
+ }
+
+ if (is_dim_ena && use_adaptive_coalesce)
+ return 0;
+
+ if (coalesce_usecs > IDPF_ITR_MAX) {
+ netdev_err(q->vport->netdev,
+ "Invalid value, %d-usecs range is 0-%d\n",
+ coalesce_usecs, IDPF_ITR_MAX);
+
+ return -EINVAL;
+ }
+
+ if (coalesce_usecs % 2) {
+ coalesce_usecs--;
+ netdev_info(q->vport->netdev,
+ "HW only supports even ITR values, ITR rounded to %d\n",
+ coalesce_usecs);
+ }
+
+ if (is_rxq) {
+ qv->rx_itr_value = coalesce_usecs;
+ if (use_adaptive_coalesce) {
+ qv->rx_intr_mode = IDPF_ITR_DYNAMIC;
+ } else {
+ qv->rx_intr_mode = !IDPF_ITR_DYNAMIC;
+ idpf_vport_intr_write_itr(qv, qv->rx_itr_value,
+ false);
+ }
+ } else {
+ qv->tx_itr_value = coalesce_usecs;
+ if (use_adaptive_coalesce) {
+ qv->tx_intr_mode = IDPF_ITR_DYNAMIC;
+ } else {
+ qv->tx_intr_mode = !IDPF_ITR_DYNAMIC;
+ idpf_vport_intr_write_itr(qv, qv->tx_itr_value, true);
+ }
+ }
+
+ /* Update of static/dynamic itr will be taken care when interrupt is
+ * fired
+ */
+ return 0;
+}
+
+/**
+ * idpf_set_q_coalesce - set ITR values for specific queue
+ * @vport: vport associated to the queue that need updating
+ * @ec: coalesce settings to program the device with
+ * @q_num: update ITR/INTRL (coalesce) settings for this queue number/index
+ * @is_rxq: is queue type rx
+ *
+ * Return 0 on success, and negative on failure
+ */
+static int idpf_set_q_coalesce(struct idpf_vport *vport,
+ struct ethtool_coalesce *ec,
+ int q_num, bool is_rxq)
+{
+ struct idpf_queue *q;
+
+ q = is_rxq ? idpf_find_rxq(vport, q_num) : idpf_find_txq(vport, q_num);
+
+ if (q && __idpf_set_q_coalesce(ec, q, is_rxq))
+ return -EINVAL;
+
+ return 0;
+}
+
+/**
+ * idpf_set_coalesce - set ITR values as requested by user
+ * @netdev: pointer to the netdev associated with this query
+ * @ec: coalesce settings to program the device with
+ * @kec: unused
+ * @extack: unused
+ *
+ * Return 0 on success, and negative on failure
+ */
+static int idpf_set_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kec,
+ struct netlink_ext_ack *extack)
+{
+ struct idpf_netdev_priv *np = netdev_priv(netdev);
+ struct idpf_vport *vport;
+ int i, err = 0;
+
+ idpf_vport_ctrl_lock(netdev);
+ vport = idpf_netdev_to_vport(netdev);
+
+ if (np->state != __IDPF_VPORT_UP)
+ goto unlock_mutex;
+
+ for (i = 0; i < vport->num_txq; i++) {
+ err = idpf_set_q_coalesce(vport, ec, i, false);
+ if (err)
+ goto unlock_mutex;
+ }
+
+ for (i = 0; i < vport->num_rxq; i++) {
+ err = idpf_set_q_coalesce(vport, ec, i, true);
+ if (err)
+ goto unlock_mutex;
+ }
+
+unlock_mutex:
+ idpf_vport_ctrl_unlock(netdev);
+
+ return err;
+}
+
+/**
+ * idpf_set_per_q_coalesce - set ITR values as requested by user
+ * @netdev: pointer to the netdev associated with this query
+ * @q_num: queue for which the itr values has to be set
+ * @ec: coalesce settings to program the device with
+ *
+ * Return 0 on success, and negative on failure
+ */
+static int idpf_set_per_q_coalesce(struct net_device *netdev, u32 q_num,
+ struct ethtool_coalesce *ec)
+{
+ struct idpf_vport *vport;
+ int err;
+
+ idpf_vport_ctrl_lock(netdev);
+ vport = idpf_netdev_to_vport(netdev);
+
+ err = idpf_set_q_coalesce(vport, ec, q_num, false);
+ if (err) {
+ idpf_vport_ctrl_unlock(netdev);
+
+ return err;
+ }
+
+ err = idpf_set_q_coalesce(vport, ec, q_num, true);
+
+ idpf_vport_ctrl_unlock(netdev);
+
+ return err;
+}
+
+/**
+ * idpf_get_msglevel - Get debug message level
+ * @netdev: network interface device structure
+ *
+ * Returns current debug message level.
+ */
+static u32 idpf_get_msglevel(struct net_device *netdev)
+{
+ struct idpf_adapter *adapter = idpf_netdev_to_adapter(netdev);
+
+ return adapter->msg_enable;
+}
+
+/**
+ * idpf_set_msglevel - Set debug message level
+ * @netdev: network interface device structure
+ * @data: message level
+ *
+ * Set current debug message level. Higher values cause the driver to
+ * be noisier.
+ */
+static void idpf_set_msglevel(struct net_device *netdev, u32 data)
+{
+ struct idpf_adapter *adapter = idpf_netdev_to_adapter(netdev);
+
+ adapter->msg_enable = data;
+}
+
+/**
+ * idpf_get_link_ksettings - Get Link Speed and Duplex settings
+ * @netdev: network interface device structure
+ * @cmd: ethtool command
+ *
+ * Reports speed/duplex settings.
+ **/
+static int idpf_get_link_ksettings(struct net_device *netdev,
+ struct ethtool_link_ksettings *cmd)
+{
+ struct idpf_vport *vport;
+
+ idpf_vport_ctrl_lock(netdev);
+ vport = idpf_netdev_to_vport(netdev);
+
+ ethtool_link_ksettings_zero_link_mode(cmd, supported);
+ cmd->base.autoneg = AUTONEG_DISABLE;
+ cmd->base.port = PORT_NONE;
+ if (vport->link_up) {
+ cmd->base.duplex = DUPLEX_FULL;
+ cmd->base.speed = vport->link_speed_mbps;
+ } else {
+ cmd->base.duplex = DUPLEX_UNKNOWN;
+ cmd->base.speed = SPEED_UNKNOWN;
+ }
+
+ idpf_vport_ctrl_unlock(netdev);
+
+ return 0;
+}
+
+static const struct ethtool_ops idpf_ethtool_ops = {
+ .supported_coalesce_params = ETHTOOL_COALESCE_USECS |
+ ETHTOOL_COALESCE_USE_ADAPTIVE,
+ .get_msglevel = idpf_get_msglevel,
+ .set_msglevel = idpf_set_msglevel,
+ .get_link = ethtool_op_get_link,
+ .get_coalesce = idpf_get_coalesce,
+ .set_coalesce = idpf_set_coalesce,
+ .get_per_queue_coalesce = idpf_get_per_q_coalesce,
+ .set_per_queue_coalesce = idpf_set_per_q_coalesce,
+ .get_ethtool_stats = idpf_get_ethtool_stats,
+ .get_strings = idpf_get_strings,
+ .get_sset_count = idpf_get_sset_count,
+ .get_channels = idpf_get_channels,
+ .get_rxnfc = idpf_get_rxnfc,
+ .get_rxfh_key_size = idpf_get_rxfh_key_size,
+ .get_rxfh_indir_size = idpf_get_rxfh_indir_size,
+ .get_rxfh = idpf_get_rxfh,
+ .set_rxfh = idpf_set_rxfh,
+ .set_channels = idpf_set_channels,
+ .get_ringparam = idpf_get_ringparam,
+ .set_ringparam = idpf_set_ringparam,
+ .get_link_ksettings = idpf_get_link_ksettings,
+};
+
+/**
+ * idpf_set_ethtool_ops - Initialize ethtool ops struct
+ * @netdev: network interface device structure
+ *
+ * Sets ethtool ops struct in our netdev so that ethtool can call
+ * our functions.
+ */
+void idpf_set_ethtool_ops(struct net_device *netdev)
+{
+ netdev->ethtool_ops = &idpf_ethtool_ops;
+}
diff --git a/drivers/net/ethernet/intel/idpf/idpf_lan_pf_regs.h b/drivers/net/ethernet/intel/idpf/idpf_lan_pf_regs.h
new file mode 100644
index 000000000000..24edb8a6ec2e
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/idpf_lan_pf_regs.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2023 Intel Corporation */
+
+#ifndef _IDPF_LAN_PF_REGS_H_
+#define _IDPF_LAN_PF_REGS_H_
+
+/* Receive queues */
+#define PF_QRX_BASE 0x00000000
+#define PF_QRX_TAIL(_QRX) (PF_QRX_BASE + (((_QRX) * 0x1000)))
+#define PF_QRX_BUFFQ_BASE 0x03000000
+#define PF_QRX_BUFFQ_TAIL(_QRX) (PF_QRX_BUFFQ_BASE + (((_QRX) * 0x1000)))
+
+/* Transmit queues */
+#define PF_QTX_BASE 0x05000000
+#define PF_QTX_COMM_DBELL(_DBQM) (PF_QTX_BASE + ((_DBQM) * 0x1000))
+
+/* Control(PF Mailbox) Queue */
+#define PF_FW_BASE 0x08400000
+
+#define PF_FW_ARQBAL (PF_FW_BASE)
+#define PF_FW_ARQBAH (PF_FW_BASE + 0x4)
+#define PF_FW_ARQLEN (PF_FW_BASE + 0x8)
+#define PF_FW_ARQLEN_ARQLEN_S 0
+#define PF_FW_ARQLEN_ARQLEN_M GENMASK(12, 0)
+#define PF_FW_ARQLEN_ARQVFE_S 28
+#define PF_FW_ARQLEN_ARQVFE_M BIT(PF_FW_ARQLEN_ARQVFE_S)
+#define PF_FW_ARQLEN_ARQOVFL_S 29
+#define PF_FW_ARQLEN_ARQOVFL_M BIT(PF_FW_ARQLEN_ARQOVFL_S)
+#define PF_FW_ARQLEN_ARQCRIT_S 30
+#define PF_FW_ARQLEN_ARQCRIT_M BIT(PF_FW_ARQLEN_ARQCRIT_S)
+#define PF_FW_ARQLEN_ARQENABLE_S 31
+#define PF_FW_ARQLEN_ARQENABLE_M BIT(PF_FW_ARQLEN_ARQENABLE_S)
+#define PF_FW_ARQH (PF_FW_BASE + 0xC)
+#define PF_FW_ARQH_ARQH_S 0
+#define PF_FW_ARQH_ARQH_M GENMASK(12, 0)
+#define PF_FW_ARQT (PF_FW_BASE + 0x10)
+
+#define PF_FW_ATQBAL (PF_FW_BASE + 0x14)
+#define PF_FW_ATQBAH (PF_FW_BASE + 0x18)
+#define PF_FW_ATQLEN (PF_FW_BASE + 0x1C)
+#define PF_FW_ATQLEN_ATQLEN_S 0
+#define PF_FW_ATQLEN_ATQLEN_M GENMASK(9, 0)
+#define PF_FW_ATQLEN_ATQVFE_S 28
+#define PF_FW_ATQLEN_ATQVFE_M BIT(PF_FW_ATQLEN_ATQVFE_S)
+#define PF_FW_ATQLEN_ATQOVFL_S 29
+#define PF_FW_ATQLEN_ATQOVFL_M BIT(PF_FW_ATQLEN_ATQOVFL_S)
+#define PF_FW_ATQLEN_ATQCRIT_S 30
+#define PF_FW_ATQLEN_ATQCRIT_M BIT(PF_FW_ATQLEN_ATQCRIT_S)
+#define PF_FW_ATQLEN_ATQENABLE_S 31
+#define PF_FW_ATQLEN_ATQENABLE_M BIT(PF_FW_ATQLEN_ATQENABLE_S)
+#define PF_FW_ATQH (PF_FW_BASE + 0x20)
+#define PF_FW_ATQH_ATQH_S 0
+#define PF_FW_ATQH_ATQH_M GENMASK(9, 0)
+#define PF_FW_ATQT (PF_FW_BASE + 0x24)
+
+/* Interrupts */
+#define PF_GLINT_BASE 0x08900000
+#define PF_GLINT_DYN_CTL(_INT) (PF_GLINT_BASE + ((_INT) * 0x1000))
+#define PF_GLINT_DYN_CTL_INTENA_S 0
+#define PF_GLINT_DYN_CTL_INTENA_M BIT(PF_GLINT_DYN_CTL_INTENA_S)
+#define PF_GLINT_DYN_CTL_CLEARPBA_S 1
+#define PF_GLINT_DYN_CTL_CLEARPBA_M BIT(PF_GLINT_DYN_CTL_CLEARPBA_S)
+#define PF_GLINT_DYN_CTL_SWINT_TRIG_S 2
+#define PF_GLINT_DYN_CTL_SWINT_TRIG_M BIT(PF_GLINT_DYN_CTL_SWINT_TRIG_S)
+#define PF_GLINT_DYN_CTL_ITR_INDX_S 3
+#define PF_GLINT_DYN_CTL_ITR_INDX_M GENMASK(4, 3)
+#define PF_GLINT_DYN_CTL_INTERVAL_S 5
+#define PF_GLINT_DYN_CTL_INTERVAL_M BIT(PF_GLINT_DYN_CTL_INTERVAL_S)
+#define PF_GLINT_DYN_CTL_SW_ITR_INDX_ENA_S 24
+#define PF_GLINT_DYN_CTL_SW_ITR_INDX_ENA_M BIT(PF_GLINT_DYN_CTL_SW_ITR_INDX_ENA_S)
+#define PF_GLINT_DYN_CTL_SW_ITR_INDX_S 25
+#define PF_GLINT_DYN_CTL_SW_ITR_INDX_M BIT(PF_GLINT_DYN_CTL_SW_ITR_INDX_S)
+#define PF_GLINT_DYN_CTL_WB_ON_ITR_S 30
+#define PF_GLINT_DYN_CTL_WB_ON_ITR_M BIT(PF_GLINT_DYN_CTL_WB_ON_ITR_S)
+#define PF_GLINT_DYN_CTL_INTENA_MSK_S 31
+#define PF_GLINT_DYN_CTL_INTENA_MSK_M BIT(PF_GLINT_DYN_CTL_INTENA_MSK_S)
+/* _ITR is ITR index, _INT is interrupt index, _itrn_indx_spacing is
+ * spacing b/w itrn registers of the same vector.
+ */
+#define PF_GLINT_ITR_ADDR(_ITR, _reg_start, _itrn_indx_spacing) \
+ ((_reg_start) + ((_ITR) * (_itrn_indx_spacing)))
+/* For PF, itrn_indx_spacing is 4 and itrn_reg_spacing is 0x1000 */
+#define PF_GLINT_ITR(_ITR, _INT) \
+ (PF_GLINT_BASE + (((_ITR) + 1) * 4) + ((_INT) * 0x1000))
+#define PF_GLINT_ITR_MAX_INDEX 2
+#define PF_GLINT_ITR_INTERVAL_S 0
+#define PF_GLINT_ITR_INTERVAL_M GENMASK(11, 0)
+
+/* Generic registers */
+#define PF_INT_DIR_OICR_ENA 0x08406000
+#define PF_INT_DIR_OICR_ENA_S 0
+#define PF_INT_DIR_OICR_ENA_M GENMASK(31, 0)
+#define PF_INT_DIR_OICR 0x08406004
+#define PF_INT_DIR_OICR_TSYN_EVNT 0
+#define PF_INT_DIR_OICR_PHY_TS_0 BIT(1)
+#define PF_INT_DIR_OICR_PHY_TS_1 BIT(2)
+#define PF_INT_DIR_OICR_CAUSE 0x08406008
+#define PF_INT_DIR_OICR_CAUSE_CAUSE_S 0
+#define PF_INT_DIR_OICR_CAUSE_CAUSE_M GENMASK(31, 0)
+#define PF_INT_PBA_CLEAR 0x0840600C
+
+#define PF_FUNC_RID 0x08406010
+#define PF_FUNC_RID_FUNCTION_NUMBER_S 0
+#define PF_FUNC_RID_FUNCTION_NUMBER_M GENMASK(2, 0)
+#define PF_FUNC_RID_DEVICE_NUMBER_S 3
+#define PF_FUNC_RID_DEVICE_NUMBER_M GENMASK(7, 3)
+#define PF_FUNC_RID_BUS_NUMBER_S 8
+#define PF_FUNC_RID_BUS_NUMBER_M GENMASK(15, 8)
+
+/* Reset registers */
+#define PFGEN_RTRIG 0x08407000
+#define PFGEN_RTRIG_CORER_S 0
+#define PFGEN_RTRIG_CORER_M BIT(0)
+#define PFGEN_RTRIG_LINKR_S 1
+#define PFGEN_RTRIG_LINKR_M BIT(1)
+#define PFGEN_RTRIG_IMCR_S 2
+#define PFGEN_RTRIG_IMCR_M BIT(2)
+#define PFGEN_RSTAT 0x08407008 /* PFR Status */
+#define PFGEN_RSTAT_PFR_STATE_S 0
+#define PFGEN_RSTAT_PFR_STATE_M GENMASK(1, 0)
+#define PFGEN_CTRL 0x0840700C
+#define PFGEN_CTRL_PFSWR BIT(0)
+
+#endif
diff --git a/drivers/net/ethernet/intel/idpf/idpf_lan_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_lan_txrx.h
new file mode 100644
index 000000000000..a5752dcab888
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/idpf_lan_txrx.h
@@ -0,0 +1,293 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2023 Intel Corporation */
+
+#ifndef _IDPF_LAN_TXRX_H_
+#define _IDPF_LAN_TXRX_H_
+
+enum idpf_rss_hash {
+ IDPF_HASH_INVALID = 0,
+ /* Values 1 - 28 are reserved for future use */
+ IDPF_HASH_NONF_UNICAST_IPV4_UDP = 29,
+ IDPF_HASH_NONF_MULTICAST_IPV4_UDP,
+ IDPF_HASH_NONF_IPV4_UDP,
+ IDPF_HASH_NONF_IPV4_TCP_SYN_NO_ACK,
+ IDPF_HASH_NONF_IPV4_TCP,
+ IDPF_HASH_NONF_IPV4_SCTP,
+ IDPF_HASH_NONF_IPV4_OTHER,
+ IDPF_HASH_FRAG_IPV4,
+ /* Values 37-38 are reserved */
+ IDPF_HASH_NONF_UNICAST_IPV6_UDP = 39,
+ IDPF_HASH_NONF_MULTICAST_IPV6_UDP,
+ IDPF_HASH_NONF_IPV6_UDP,
+ IDPF_HASH_NONF_IPV6_TCP_SYN_NO_ACK,
+ IDPF_HASH_NONF_IPV6_TCP,
+ IDPF_HASH_NONF_IPV6_SCTP,
+ IDPF_HASH_NONF_IPV6_OTHER,
+ IDPF_HASH_FRAG_IPV6,
+ IDPF_HASH_NONF_RSVD47,
+ IDPF_HASH_NONF_FCOE_OX,
+ IDPF_HASH_NONF_FCOE_RX,
+ IDPF_HASH_NONF_FCOE_OTHER,
+ /* Values 51-62 are reserved */
+ IDPF_HASH_L2_PAYLOAD = 63,
+
+ IDPF_HASH_MAX
+};
+
+/* Supported RSS offloads */
+#define IDPF_DEFAULT_RSS_HASH \
+ (BIT_ULL(IDPF_HASH_NONF_IPV4_UDP) | \
+ BIT_ULL(IDPF_HASH_NONF_IPV4_SCTP) | \
+ BIT_ULL(IDPF_HASH_NONF_IPV4_TCP) | \
+ BIT_ULL(IDPF_HASH_NONF_IPV4_OTHER) | \
+ BIT_ULL(IDPF_HASH_FRAG_IPV4) | \
+ BIT_ULL(IDPF_HASH_NONF_IPV6_UDP) | \
+ BIT_ULL(IDPF_HASH_NONF_IPV6_TCP) | \
+ BIT_ULL(IDPF_HASH_NONF_IPV6_SCTP) | \
+ BIT_ULL(IDPF_HASH_NONF_IPV6_OTHER) | \
+ BIT_ULL(IDPF_HASH_FRAG_IPV6) | \
+ BIT_ULL(IDPF_HASH_L2_PAYLOAD))
+
+#define IDPF_DEFAULT_RSS_HASH_EXPANDED (IDPF_DEFAULT_RSS_HASH | \
+ BIT_ULL(IDPF_HASH_NONF_IPV4_TCP_SYN_NO_ACK) | \
+ BIT_ULL(IDPF_HASH_NONF_UNICAST_IPV4_UDP) | \
+ BIT_ULL(IDPF_HASH_NONF_MULTICAST_IPV4_UDP) | \
+ BIT_ULL(IDPF_HASH_NONF_IPV6_TCP_SYN_NO_ACK) | \
+ BIT_ULL(IDPF_HASH_NONF_UNICAST_IPV6_UDP) | \
+ BIT_ULL(IDPF_HASH_NONF_MULTICAST_IPV6_UDP))
+
+/* For idpf_splitq_base_tx_compl_desc */
+#define IDPF_TXD_COMPLQ_GEN_S 15
+#define IDPF_TXD_COMPLQ_GEN_M BIT_ULL(IDPF_TXD_COMPLQ_GEN_S)
+#define IDPF_TXD_COMPLQ_COMPL_TYPE_S 11
+#define IDPF_TXD_COMPLQ_COMPL_TYPE_M GENMASK_ULL(13, 11)
+#define IDPF_TXD_COMPLQ_QID_S 0
+#define IDPF_TXD_COMPLQ_QID_M GENMASK_ULL(9, 0)
+
+/* For base mode TX descriptors */
+
+#define IDPF_TXD_CTX_QW0_TUNN_L4T_CS_S 23
+#define IDPF_TXD_CTX_QW0_TUNN_L4T_CS_M BIT_ULL(IDPF_TXD_CTX_QW0_TUNN_L4T_CS_S)
+#define IDPF_TXD_CTX_QW0_TUNN_DECTTL_S 19
+#define IDPF_TXD_CTX_QW0_TUNN_DECTTL_M \
+ (0xFULL << IDPF_TXD_CTX_QW0_TUNN_DECTTL_S)
+#define IDPF_TXD_CTX_QW0_TUNN_NATLEN_S 12
+#define IDPF_TXD_CTX_QW0_TUNN_NATLEN_M \
+ (0X7FULL << IDPF_TXD_CTX_QW0_TUNN_NATLEN_S)
+#define IDPF_TXD_CTX_QW0_TUNN_EIP_NOINC_S 11
+#define IDPF_TXD_CTX_QW0_TUNN_EIP_NOINC_M \
+ BIT_ULL(IDPF_TXD_CTX_QW0_TUNN_EIP_NOINC_S)
+#define IDPF_TXD_CTX_EIP_NOINC_IPID_CONST \
+ IDPF_TXD_CTX_QW0_TUNN_EIP_NOINC_M
+#define IDPF_TXD_CTX_QW0_TUNN_NATT_S 9
+#define IDPF_TXD_CTX_QW0_TUNN_NATT_M (0x3ULL << IDPF_TXD_CTX_QW0_TUNN_NATT_S)
+#define IDPF_TXD_CTX_UDP_TUNNELING BIT_ULL(IDPF_TXD_CTX_QW0_TUNN_NATT_S)
+#define IDPF_TXD_CTX_GRE_TUNNELING (0x2ULL << IDPF_TXD_CTX_QW0_TUNN_NATT_S)
+#define IDPF_TXD_CTX_QW0_TUNN_EXT_IPLEN_S 2
+#define IDPF_TXD_CTX_QW0_TUNN_EXT_IPLEN_M \
+ (0x3FULL << IDPF_TXD_CTX_QW0_TUNN_EXT_IPLEN_S)
+#define IDPF_TXD_CTX_QW0_TUNN_EXT_IP_S 0
+#define IDPF_TXD_CTX_QW0_TUNN_EXT_IP_M \
+ (0x3ULL << IDPF_TXD_CTX_QW0_TUNN_EXT_IP_S)
+
+#define IDPF_TXD_CTX_QW1_MSS_S 50
+#define IDPF_TXD_CTX_QW1_MSS_M GENMASK_ULL(63, 50)
+#define IDPF_TXD_CTX_QW1_TSO_LEN_S 30
+#define IDPF_TXD_CTX_QW1_TSO_LEN_M GENMASK_ULL(47, 30)
+#define IDPF_TXD_CTX_QW1_CMD_S 4
+#define IDPF_TXD_CTX_QW1_CMD_M GENMASK_ULL(15, 4)
+#define IDPF_TXD_CTX_QW1_DTYPE_S 0
+#define IDPF_TXD_CTX_QW1_DTYPE_M GENMASK_ULL(3, 0)
+#define IDPF_TXD_QW1_L2TAG1_S 48
+#define IDPF_TXD_QW1_L2TAG1_M GENMASK_ULL(63, 48)
+#define IDPF_TXD_QW1_TX_BUF_SZ_S 34
+#define IDPF_TXD_QW1_TX_BUF_SZ_M GENMASK_ULL(47, 34)
+#define IDPF_TXD_QW1_OFFSET_S 16
+#define IDPF_TXD_QW1_OFFSET_M GENMASK_ULL(33, 16)
+#define IDPF_TXD_QW1_CMD_S 4
+#define IDPF_TXD_QW1_CMD_M GENMASK_ULL(15, 4)
+#define IDPF_TXD_QW1_DTYPE_S 0
+#define IDPF_TXD_QW1_DTYPE_M GENMASK_ULL(3, 0)
+
+/* TX Completion Descriptor Completion Types */
+#define IDPF_TXD_COMPLT_ITR_FLUSH 0
+/* Descriptor completion type 1 is reserved */
+#define IDPF_TXD_COMPLT_RS 2
+/* Descriptor completion type 3 is reserved */
+#define IDPF_TXD_COMPLT_RE 4
+#define IDPF_TXD_COMPLT_SW_MARKER 5
+
+enum idpf_tx_desc_dtype_value {
+ IDPF_TX_DESC_DTYPE_DATA = 0,
+ IDPF_TX_DESC_DTYPE_CTX = 1,
+ /* DTYPE 2 is reserved
+ * DTYPE 3 is free for future use
+ * DTYPE 4 is reserved
+ */
+ IDPF_TX_DESC_DTYPE_FLEX_TSO_CTX = 5,
+ /* DTYPE 6 is reserved */
+ IDPF_TX_DESC_DTYPE_FLEX_L2TAG1_L2TAG2 = 7,
+ /* DTYPE 8, 9 are free for future use
+ * DTYPE 10 is reserved
+ * DTYPE 11 is free for future use
+ */
+ IDPF_TX_DESC_DTYPE_FLEX_FLOW_SCHE = 12,
+ /* DTYPE 13, 14 are free for future use */
+
+ /* DESC_DONE - HW has completed write-back of descriptor */
+ IDPF_TX_DESC_DTYPE_DESC_DONE = 15,
+};
+
+enum idpf_tx_ctx_desc_cmd_bits {
+ IDPF_TX_CTX_DESC_TSO = 0x01,
+ IDPF_TX_CTX_DESC_TSYN = 0x02,
+ IDPF_TX_CTX_DESC_IL2TAG2 = 0x04,
+ IDPF_TX_CTX_DESC_RSVD = 0x08,
+ IDPF_TX_CTX_DESC_SWTCH_NOTAG = 0x00,
+ IDPF_TX_CTX_DESC_SWTCH_UPLINK = 0x10,
+ IDPF_TX_CTX_DESC_SWTCH_LOCAL = 0x20,
+ IDPF_TX_CTX_DESC_SWTCH_VSI = 0x30,
+ IDPF_TX_CTX_DESC_FILT_AU_EN = 0x40,
+ IDPF_TX_CTX_DESC_FILT_AU_EVICT = 0x80,
+ IDPF_TX_CTX_DESC_RSVD1 = 0xF00
+};
+
+enum idpf_tx_desc_len_fields {
+ /* Note: These are predefined bit offsets */
+ IDPF_TX_DESC_LEN_MACLEN_S = 0, /* 7 BITS */
+ IDPF_TX_DESC_LEN_IPLEN_S = 7, /* 7 BITS */
+ IDPF_TX_DESC_LEN_L4_LEN_S = 14 /* 4 BITS */
+};
+
+enum idpf_tx_base_desc_cmd_bits {
+ IDPF_TX_DESC_CMD_EOP = BIT(0),
+ IDPF_TX_DESC_CMD_RS = BIT(1),
+ /* only on VFs else RSVD */
+ IDPF_TX_DESC_CMD_ICRC = BIT(2),
+ IDPF_TX_DESC_CMD_IL2TAG1 = BIT(3),
+ IDPF_TX_DESC_CMD_RSVD1 = BIT(4),
+ IDPF_TX_DESC_CMD_IIPT_IPV6 = BIT(5),
+ IDPF_TX_DESC_CMD_IIPT_IPV4 = BIT(6),
+ IDPF_TX_DESC_CMD_IIPT_IPV4_CSUM = GENMASK(6, 5),
+ IDPF_TX_DESC_CMD_RSVD2 = BIT(7),
+ IDPF_TX_DESC_CMD_L4T_EOFT_TCP = BIT(8),
+ IDPF_TX_DESC_CMD_L4T_EOFT_SCTP = BIT(9),
+ IDPF_TX_DESC_CMD_L4T_EOFT_UDP = GENMASK(9, 8),
+ IDPF_TX_DESC_CMD_RSVD3 = BIT(10),
+ IDPF_TX_DESC_CMD_RSVD4 = BIT(11),
+};
+
+/* Transmit descriptors */
+/* splitq tx buf, singleq tx buf and singleq compl desc */
+struct idpf_base_tx_desc {
+ __le64 buf_addr; /* Address of descriptor's data buf */
+ __le64 qw1; /* type_cmd_offset_bsz_l2tag1 */
+}; /* read used with buffer queues */
+
+struct idpf_splitq_tx_compl_desc {
+ /* qid=[10:0] comptype=[13:11] rsvd=[14] gen=[15] */
+ __le16 qid_comptype_gen;
+ union {
+ __le16 q_head; /* Queue head */
+ __le16 compl_tag; /* Completion tag */
+ } q_head_compl_tag;
+ u8 ts[3];
+ u8 rsvd; /* Reserved */
+}; /* writeback used with completion queues */
+
+/* Context descriptors */
+struct idpf_base_tx_ctx_desc {
+ struct {
+ __le32 tunneling_params;
+ __le16 l2tag2;
+ __le16 rsvd1;
+ } qw0;
+ __le64 qw1; /* type_cmd_tlen_mss/rt_hint */
+};
+
+/* Common cmd field defines for all desc except Flex Flow Scheduler (0x0C) */
+enum idpf_tx_flex_desc_cmd_bits {
+ IDPF_TX_FLEX_DESC_CMD_EOP = BIT(0),
+ IDPF_TX_FLEX_DESC_CMD_RS = BIT(1),
+ IDPF_TX_FLEX_DESC_CMD_RE = BIT(2),
+ IDPF_TX_FLEX_DESC_CMD_IL2TAG1 = BIT(3),
+ IDPF_TX_FLEX_DESC_CMD_DUMMY = BIT(4),
+ IDPF_TX_FLEX_DESC_CMD_CS_EN = BIT(5),
+ IDPF_TX_FLEX_DESC_CMD_FILT_AU_EN = BIT(6),
+ IDPF_TX_FLEX_DESC_CMD_FILT_AU_EVICT = BIT(7),
+};
+
+struct idpf_flex_tx_desc {
+ __le64 buf_addr; /* Packet buffer address */
+ struct {
+#define IDPF_FLEX_TXD_QW1_DTYPE_S 0
+#define IDPF_FLEX_TXD_QW1_DTYPE_M GENMASK(4, 0)
+#define IDPF_FLEX_TXD_QW1_CMD_S 5
+#define IDPF_FLEX_TXD_QW1_CMD_M GENMASK(15, 5)
+ __le16 cmd_dtype;
+ /* DTYPE=IDPF_TX_DESC_DTYPE_FLEX_L2TAG1_L2TAG2 (0x07) */
+ struct {
+ __le16 l2tag1;
+ __le16 l2tag2;
+ } l2tags;
+ __le16 buf_size;
+ } qw1;
+};
+
+struct idpf_flex_tx_sched_desc {
+ __le64 buf_addr; /* Packet buffer address */
+
+ /* DTYPE = IDPF_TX_DESC_DTYPE_FLEX_FLOW_SCHE_16B (0x0C) */
+ struct {
+ u8 cmd_dtype;
+#define IDPF_TXD_FLEX_FLOW_DTYPE_M GENMASK(4, 0)
+#define IDPF_TXD_FLEX_FLOW_CMD_EOP BIT(5)
+#define IDPF_TXD_FLEX_FLOW_CMD_CS_EN BIT(6)
+#define IDPF_TXD_FLEX_FLOW_CMD_RE BIT(7)
+
+ /* [23:23] Horizon Overflow bit, [22:0] timestamp */
+ u8 ts[3];
+#define IDPF_TXD_FLOW_SCH_HORIZON_OVERFLOW_M BIT(7)
+
+ __le16 compl_tag;
+ __le16 rxr_bufsize;
+#define IDPF_TXD_FLEX_FLOW_RXR BIT(14)
+#define IDPF_TXD_FLEX_FLOW_BUFSIZE_M GENMASK(13, 0)
+ } qw1;
+};
+
+/* Common cmd fields for all flex context descriptors
+ * Note: these defines already account for the 5 bit dtype in the cmd_dtype
+ * field
+ */
+enum idpf_tx_flex_ctx_desc_cmd_bits {
+ IDPF_TX_FLEX_CTX_DESC_CMD_TSO = BIT(5),
+ IDPF_TX_FLEX_CTX_DESC_CMD_TSYN_EN = BIT(6),
+ IDPF_TX_FLEX_CTX_DESC_CMD_L2TAG2 = BIT(7),
+ IDPF_TX_FLEX_CTX_DESC_CMD_SWTCH_UPLNK = BIT(9),
+ IDPF_TX_FLEX_CTX_DESC_CMD_SWTCH_LOCAL = BIT(10),
+ IDPF_TX_FLEX_CTX_DESC_CMD_SWTCH_TARGETVSI = GENMASK(10, 9),
+};
+
+/* Standard flex descriptor TSO context quad word */
+struct idpf_flex_tx_tso_ctx_qw {
+ __le32 flex_tlen;
+#define IDPF_TXD_FLEX_CTX_TLEN_M GENMASK(17, 0)
+#define IDPF_TXD_FLEX_TSO_CTX_FLEX_S 24
+ __le16 mss_rt;
+#define IDPF_TXD_FLEX_CTX_MSS_RT_M GENMASK(13, 0)
+ u8 hdr_len;
+ u8 flex;
+};
+
+struct idpf_flex_tx_ctx_desc {
+ /* DTYPE = IDPF_TX_DESC_DTYPE_FLEX_TSO_CTX (0x05) */
+ struct {
+ struct idpf_flex_tx_tso_ctx_qw qw0;
+ struct {
+ __le16 cmd_dtype;
+ u8 flex[6];
+ } qw1;
+ } tso;
+};
+#endif /* _IDPF_LAN_TXRX_H_ */
diff --git a/drivers/net/ethernet/intel/idpf/idpf_lan_vf_regs.h b/drivers/net/ethernet/intel/idpf/idpf_lan_vf_regs.h
new file mode 100644
index 000000000000..3d73b6c76863
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/idpf_lan_vf_regs.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2023 Intel Corporation */
+
+#ifndef _IDPF_LAN_VF_REGS_H_
+#define _IDPF_LAN_VF_REGS_H_
+
+/* Reset */
+#define VFGEN_RSTAT 0x00008800
+#define VFGEN_RSTAT_VFR_STATE_S 0
+#define VFGEN_RSTAT_VFR_STATE_M GENMASK(1, 0)
+
+/* Control(VF Mailbox) Queue */
+#define VF_BASE 0x00006000
+
+#define VF_ATQBAL (VF_BASE + 0x1C00)
+#define VF_ATQBAH (VF_BASE + 0x1800)
+#define VF_ATQLEN (VF_BASE + 0x0800)
+#define VF_ATQLEN_ATQLEN_S 0
+#define VF_ATQLEN_ATQLEN_M GENMASK(9, 0)
+#define VF_ATQLEN_ATQVFE_S 28
+#define VF_ATQLEN_ATQVFE_M BIT(VF_ATQLEN_ATQVFE_S)
+#define VF_ATQLEN_ATQOVFL_S 29
+#define VF_ATQLEN_ATQOVFL_M BIT(VF_ATQLEN_ATQOVFL_S)
+#define VF_ATQLEN_ATQCRIT_S 30
+#define VF_ATQLEN_ATQCRIT_M BIT(VF_ATQLEN_ATQCRIT_S)
+#define VF_ATQLEN_ATQENABLE_S 31
+#define VF_ATQLEN_ATQENABLE_M BIT(VF_ATQLEN_ATQENABLE_S)
+#define VF_ATQH (VF_BASE + 0x0400)
+#define VF_ATQH_ATQH_S 0
+#define VF_ATQH_ATQH_M GENMASK(9, 0)
+#define VF_ATQT (VF_BASE + 0x2400)
+
+#define VF_ARQBAL (VF_BASE + 0x0C00)
+#define VF_ARQBAH (VF_BASE)
+#define VF_ARQLEN (VF_BASE + 0x2000)
+#define VF_ARQLEN_ARQLEN_S 0
+#define VF_ARQLEN_ARQLEN_M GENMASK(9, 0)
+#define VF_ARQLEN_ARQVFE_S 28
+#define VF_ARQLEN_ARQVFE_M BIT(VF_ARQLEN_ARQVFE_S)
+#define VF_ARQLEN_ARQOVFL_S 29
+#define VF_ARQLEN_ARQOVFL_M BIT(VF_ARQLEN_ARQOVFL_S)
+#define VF_ARQLEN_ARQCRIT_S 30
+#define VF_ARQLEN_ARQCRIT_M BIT(VF_ARQLEN_ARQCRIT_S)
+#define VF_ARQLEN_ARQENABLE_S 31
+#define VF_ARQLEN_ARQENABLE_M BIT(VF_ARQLEN_ARQENABLE_S)
+#define VF_ARQH (VF_BASE + 0x1400)
+#define VF_ARQH_ARQH_S 0
+#define VF_ARQH_ARQH_M GENMASK(12, 0)
+#define VF_ARQT (VF_BASE + 0x1000)
+
+/* Transmit queues */
+#define VF_QTX_TAIL_BASE 0x00000000
+#define VF_QTX_TAIL(_QTX) (VF_QTX_TAIL_BASE + (_QTX) * 0x4)
+#define VF_QTX_TAIL_EXT_BASE 0x00040000
+#define VF_QTX_TAIL_EXT(_QTX) (VF_QTX_TAIL_EXT_BASE + ((_QTX) * 4))
+
+/* Receive queues */
+#define VF_QRX_TAIL_BASE 0x00002000
+#define VF_QRX_TAIL(_QRX) (VF_QRX_TAIL_BASE + ((_QRX) * 4))
+#define VF_QRX_TAIL_EXT_BASE 0x00050000
+#define VF_QRX_TAIL_EXT(_QRX) (VF_QRX_TAIL_EXT_BASE + ((_QRX) * 4))
+#define VF_QRXB_TAIL_BASE 0x00060000
+#define VF_QRXB_TAIL(_QRX) (VF_QRXB_TAIL_BASE + ((_QRX) * 4))
+
+/* Interrupts */
+#define VF_INT_DYN_CTL0 0x00005C00
+#define VF_INT_DYN_CTL0_INTENA_S 0
+#define VF_INT_DYN_CTL0_INTENA_M BIT(VF_INT_DYN_CTL0_INTENA_S)
+#define VF_INT_DYN_CTL0_ITR_INDX_S 3
+#define VF_INT_DYN_CTL0_ITR_INDX_M GENMASK(4, 3)
+#define VF_INT_DYN_CTLN(_INT) (0x00003800 + ((_INT) * 4))
+#define VF_INT_DYN_CTLN_EXT(_INT) (0x00070000 + ((_INT) * 4))
+#define VF_INT_DYN_CTLN_INTENA_S 0
+#define VF_INT_DYN_CTLN_INTENA_M BIT(VF_INT_DYN_CTLN_INTENA_S)
+#define VF_INT_DYN_CTLN_CLEARPBA_S 1
+#define VF_INT_DYN_CTLN_CLEARPBA_M BIT(VF_INT_DYN_CTLN_CLEARPBA_S)
+#define VF_INT_DYN_CTLN_SWINT_TRIG_S 2
+#define VF_INT_DYN_CTLN_SWINT_TRIG_M BIT(VF_INT_DYN_CTLN_SWINT_TRIG_S)
+#define VF_INT_DYN_CTLN_ITR_INDX_S 3
+#define VF_INT_DYN_CTLN_ITR_INDX_M GENMASK(4, 3)
+#define VF_INT_DYN_CTLN_INTERVAL_S 5
+#define VF_INT_DYN_CTLN_INTERVAL_M BIT(VF_INT_DYN_CTLN_INTERVAL_S)
+#define VF_INT_DYN_CTLN_SW_ITR_INDX_ENA_S 24
+#define VF_INT_DYN_CTLN_SW_ITR_INDX_ENA_M BIT(VF_INT_DYN_CTLN_SW_ITR_INDX_ENA_S)
+#define VF_INT_DYN_CTLN_SW_ITR_INDX_S 25
+#define VF_INT_DYN_CTLN_SW_ITR_INDX_M BIT(VF_INT_DYN_CTLN_SW_ITR_INDX_S)
+#define VF_INT_DYN_CTLN_WB_ON_ITR_S 30
+#define VF_INT_DYN_CTLN_WB_ON_ITR_M BIT(VF_INT_DYN_CTLN_WB_ON_ITR_S)
+#define VF_INT_DYN_CTLN_INTENA_MSK_S 31
+#define VF_INT_DYN_CTLN_INTENA_MSK_M BIT(VF_INT_DYN_CTLN_INTENA_MSK_S)
+/* _ITR is ITR index, _INT is interrupt index, _itrn_indx_spacing is spacing
+ * b/w itrn registers of the same vector
+ */
+#define VF_INT_ITR0(_ITR) (0x00004C00 + ((_ITR) * 4))
+#define VF_INT_ITRN_ADDR(_ITR, _reg_start, _itrn_indx_spacing) \
+ ((_reg_start) + ((_ITR) * (_itrn_indx_spacing)))
+/* For VF with 16 vector support, itrn_reg_spacing is 0x4, itrn_indx_spacing
+ * is 0x40 and base register offset is 0x00002800
+ */
+#define VF_INT_ITRN(_INT, _ITR) \
+ (0x00002800 + ((_INT) * 4) + ((_ITR) * 0x40))
+/* For VF with 64 vector support, itrn_reg_spacing is 0x4, itrn_indx_spacing
+ * is 0x100 and base register offset is 0x00002C00
+ */
+#define VF_INT_ITRN_64(_INT, _ITR) \
+ (0x00002C00 + ((_INT) * 4) + ((_ITR) * 0x100))
+/* For VF with 2k vector support, itrn_reg_spacing is 0x4, itrn_indx_spacing
+ * is 0x2000 and base register offset is 0x00072000
+ */
+#define VF_INT_ITRN_2K(_INT, _ITR) \
+ (0x00072000 + ((_INT) * 4) + ((_ITR) * 0x2000))
+#define VF_INT_ITRN_MAX_INDEX 2
+#define VF_INT_ITRN_INTERVAL_S 0
+#define VF_INT_ITRN_INTERVAL_M GENMASK(11, 0)
+#define VF_INT_PBA_CLEAR 0x00008900
+
+#define VF_INT_ICR0_ENA1 0x00005000
+#define VF_INT_ICR0_ENA1_ADMINQ_S 30
+#define VF_INT_ICR0_ENA1_ADMINQ_M BIT(VF_INT_ICR0_ENA1_ADMINQ_S)
+#define VF_INT_ICR0_ENA1_RSVD_S 31
+#define VF_INT_ICR01 0x00004800
+#define VF_QF_HENA(_i) (0x0000C400 + ((_i) * 4))
+#define VF_QF_HENA_MAX_INDX 1
+#define VF_QF_HKEY(_i) (0x0000CC00 + ((_i) * 4))
+#define VF_QF_HKEY_MAX_INDX 12
+#define VF_QF_HLUT(_i) (0x0000D000 + ((_i) * 4))
+#define VF_QF_HLUT_MAX_INDX 15
+#endif
diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c
new file mode 100644
index 000000000000..19809b0ddcd9
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c
@@ -0,0 +1,2379 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2023 Intel Corporation */
+
+#include "idpf.h"
+
+static const struct net_device_ops idpf_netdev_ops_splitq;
+static const struct net_device_ops idpf_netdev_ops_singleq;
+
+const char * const idpf_vport_vc_state_str[] = {
+ IDPF_FOREACH_VPORT_VC_STATE(IDPF_GEN_STRING)
+};
+
+/**
+ * idpf_init_vector_stack - Fill the MSIX vector stack with vector index
+ * @adapter: private data struct
+ *
+ * Return 0 on success, error on failure
+ */
+static int idpf_init_vector_stack(struct idpf_adapter *adapter)
+{
+ struct idpf_vector_lifo *stack;
+ u16 min_vec;
+ u32 i;
+
+ mutex_lock(&adapter->vector_lock);
+ min_vec = adapter->num_msix_entries - adapter->num_avail_msix;
+ stack = &adapter->vector_stack;
+ stack->size = adapter->num_msix_entries;
+ /* set the base and top to point at start of the 'free pool' to
+ * distribute the unused vectors on-demand basis
+ */
+ stack->base = min_vec;
+ stack->top = min_vec;
+
+ stack->vec_idx = kcalloc(stack->size, sizeof(u16), GFP_KERNEL);
+ if (!stack->vec_idx) {
+ mutex_unlock(&adapter->vector_lock);
+
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < stack->size; i++)
+ stack->vec_idx[i] = i;
+
+ mutex_unlock(&adapter->vector_lock);
+
+ return 0;
+}
+
+/**
+ * idpf_deinit_vector_stack - zero out the MSIX vector stack
+ * @adapter: private data struct
+ */
+static void idpf_deinit_vector_stack(struct idpf_adapter *adapter)
+{
+ struct idpf_vector_lifo *stack;
+
+ mutex_lock(&adapter->vector_lock);
+ stack = &adapter->vector_stack;
+ kfree(stack->vec_idx);
+ stack->vec_idx = NULL;
+ mutex_unlock(&adapter->vector_lock);
+}
+
+/**
+ * idpf_mb_intr_rel_irq - Free the IRQ association with the OS
+ * @adapter: adapter structure
+ *
+ * This will also disable interrupt mode and queue up mailbox task. Mailbox
+ * task will reschedule itself if not in interrupt mode.
+ */
+static void idpf_mb_intr_rel_irq(struct idpf_adapter *adapter)
+{
+ clear_bit(IDPF_MB_INTR_MODE, adapter->flags);
+ free_irq(adapter->msix_entries[0].vector, adapter);
+ queue_delayed_work(adapter->mbx_wq, &adapter->mbx_task, 0);
+}
+
+/**
+ * idpf_intr_rel - Release interrupt capabilities and free memory
+ * @adapter: adapter to disable interrupts on
+ */
+void idpf_intr_rel(struct idpf_adapter *adapter)
+{
+ int err;
+
+ if (!adapter->msix_entries)
+ return;
+
+ idpf_mb_intr_rel_irq(adapter);
+ pci_free_irq_vectors(adapter->pdev);
+
+ err = idpf_send_dealloc_vectors_msg(adapter);
+ if (err)
+ dev_err(&adapter->pdev->dev,
+ "Failed to deallocate vectors: %d\n", err);
+
+ idpf_deinit_vector_stack(adapter);
+ kfree(adapter->msix_entries);
+ adapter->msix_entries = NULL;
+}
+
+/**
+ * idpf_mb_intr_clean - Interrupt handler for the mailbox
+ * @irq: interrupt number
+ * @data: pointer to the adapter structure
+ */
+static irqreturn_t idpf_mb_intr_clean(int __always_unused irq, void *data)
+{
+ struct idpf_adapter *adapter = (struct idpf_adapter *)data;
+
+ queue_delayed_work(adapter->mbx_wq, &adapter->mbx_task, 0);
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * idpf_mb_irq_enable - Enable MSIX interrupt for the mailbox
+ * @adapter: adapter to get the hardware address for register write
+ */
+static void idpf_mb_irq_enable(struct idpf_adapter *adapter)
+{
+ struct idpf_intr_reg *intr = &adapter->mb_vector.intr_reg;
+ u32 val;
+
+ val = intr->dyn_ctl_intena_m | intr->dyn_ctl_itridx_m;
+ writel(val, intr->dyn_ctl);
+ writel(intr->icr_ena_ctlq_m, intr->icr_ena);
+}
+
+/**
+ * idpf_mb_intr_req_irq - Request irq for the mailbox interrupt
+ * @adapter: adapter structure to pass to the mailbox irq handler
+ */
+static int idpf_mb_intr_req_irq(struct idpf_adapter *adapter)
+{
+ struct idpf_q_vector *mb_vector = &adapter->mb_vector;
+ int irq_num, mb_vidx = 0, err;
+
+ irq_num = adapter->msix_entries[mb_vidx].vector;
+ mb_vector->name = kasprintf(GFP_KERNEL, "%s-%s-%d",
+ dev_driver_string(&adapter->pdev->dev),
+ "Mailbox", mb_vidx);
+ err = request_irq(irq_num, adapter->irq_mb_handler, 0,
+ mb_vector->name, adapter);
+ if (err) {
+ dev_err(&adapter->pdev->dev,
+ "IRQ request for mailbox failed, error: %d\n", err);
+
+ return err;
+ }
+
+ set_bit(IDPF_MB_INTR_MODE, adapter->flags);
+
+ return 0;
+}
+
+/**
+ * idpf_set_mb_vec_id - Set vector index for mailbox
+ * @adapter: adapter structure to access the vector chunks
+ *
+ * The first vector id in the requested vector chunks from the CP is for
+ * the mailbox
+ */
+static void idpf_set_mb_vec_id(struct idpf_adapter *adapter)
+{
+ if (adapter->req_vec_chunks)
+ adapter->mb_vector.v_idx =
+ le16_to_cpu(adapter->caps.mailbox_vector_id);
+ else
+ adapter->mb_vector.v_idx = 0;
+}
+
+/**
+ * idpf_mb_intr_init - Initialize the mailbox interrupt
+ * @adapter: adapter structure to store the mailbox vector
+ */
+static int idpf_mb_intr_init(struct idpf_adapter *adapter)
+{
+ adapter->dev_ops.reg_ops.mb_intr_reg_init(adapter);
+ adapter->irq_mb_handler = idpf_mb_intr_clean;
+
+ return idpf_mb_intr_req_irq(adapter);
+}
+
+/**
+ * idpf_vector_lifo_push - push MSIX vector index onto stack
+ * @adapter: private data struct
+ * @vec_idx: vector index to store
+ */
+static int idpf_vector_lifo_push(struct idpf_adapter *adapter, u16 vec_idx)
+{
+ struct idpf_vector_lifo *stack = &adapter->vector_stack;
+
+ lockdep_assert_held(&adapter->vector_lock);
+
+ if (stack->top == stack->base) {
+ dev_err(&adapter->pdev->dev, "Exceeded the vector stack limit: %d\n",
+ stack->top);
+ return -EINVAL;
+ }
+
+ stack->vec_idx[--stack->top] = vec_idx;
+
+ return 0;
+}
+
+/**
+ * idpf_vector_lifo_pop - pop MSIX vector index from stack
+ * @adapter: private data struct
+ */
+static int idpf_vector_lifo_pop(struct idpf_adapter *adapter)
+{
+ struct idpf_vector_lifo *stack = &adapter->vector_stack;
+
+ lockdep_assert_held(&adapter->vector_lock);
+
+ if (stack->top == stack->size) {
+ dev_err(&adapter->pdev->dev, "No interrupt vectors are available to distribute!\n");
+
+ return -EINVAL;
+ }
+
+ return stack->vec_idx[stack->top++];
+}
+
+/**
+ * idpf_vector_stash - Store the vector indexes onto the stack
+ * @adapter: private data struct
+ * @q_vector_idxs: vector index array
+ * @vec_info: info related to the number of vectors
+ *
+ * This function is a no-op if there are no vectors indexes to be stashed
+ */
+static void idpf_vector_stash(struct idpf_adapter *adapter, u16 *q_vector_idxs,
+ struct idpf_vector_info *vec_info)
+{
+ int i, base = 0;
+ u16 vec_idx;
+
+ lockdep_assert_held(&adapter->vector_lock);
+
+ if (!vec_info->num_curr_vecs)
+ return;
+
+ /* For default vports, no need to stash vector allocated from the
+ * default pool onto the stack
+ */
+ if (vec_info->default_vport)
+ base = IDPF_MIN_Q_VEC;
+
+ for (i = vec_info->num_curr_vecs - 1; i >= base ; i--) {
+ vec_idx = q_vector_idxs[i];
+ idpf_vector_lifo_push(adapter, vec_idx);
+ adapter->num_avail_msix++;
+ }
+}
+
+/**
+ * idpf_req_rel_vector_indexes - Request or release MSIX vector indexes
+ * @adapter: driver specific private structure
+ * @q_vector_idxs: vector index array
+ * @vec_info: info related to the number of vectors
+ *
+ * This is the core function to distribute the MSIX vectors acquired from the
+ * OS. It expects the caller to pass the number of vectors required and
+ * also previously allocated. First, it stashes previously allocated vector
+ * indexes on to the stack and then figures out if it can allocate requested
+ * vectors. It can wait on acquiring the mutex lock. If the caller passes 0 as
+ * requested vectors, then this function just stashes the already allocated
+ * vectors and returns 0.
+ *
+ * Returns actual number of vectors allocated on success, error value on failure
+ * If 0 is returned, implies the stack has no vectors to allocate which is also
+ * a failure case for the caller
+ */
+int idpf_req_rel_vector_indexes(struct idpf_adapter *adapter,
+ u16 *q_vector_idxs,
+ struct idpf_vector_info *vec_info)
+{
+ u16 num_req_vecs, num_alloc_vecs = 0, max_vecs;
+ struct idpf_vector_lifo *stack;
+ int i, j, vecid;
+
+ mutex_lock(&adapter->vector_lock);
+ stack = &adapter->vector_stack;
+ num_req_vecs = vec_info->num_req_vecs;
+
+ /* Stash interrupt vector indexes onto the stack if required */
+ idpf_vector_stash(adapter, q_vector_idxs, vec_info);
+
+ if (!num_req_vecs)
+ goto rel_lock;
+
+ if (vec_info->default_vport) {
+ /* As IDPF_MIN_Q_VEC per default vport is put aside in the
+ * default pool of the stack, use them for default vports
+ */
+ j = vec_info->index * IDPF_MIN_Q_VEC + IDPF_MBX_Q_VEC;
+ for (i = 0; i < IDPF_MIN_Q_VEC; i++) {
+ q_vector_idxs[num_alloc_vecs++] = stack->vec_idx[j++];
+ num_req_vecs--;
+ }
+ }
+
+ /* Find if stack has enough vector to allocate */
+ max_vecs = min(adapter->num_avail_msix, num_req_vecs);
+
+ for (j = 0; j < max_vecs; j++) {
+ vecid = idpf_vector_lifo_pop(adapter);
+ q_vector_idxs[num_alloc_vecs++] = vecid;
+ }
+ adapter->num_avail_msix -= max_vecs;
+
+rel_lock:
+ mutex_unlock(&adapter->vector_lock);
+
+ return num_alloc_vecs;
+}
+
+/**
+ * idpf_intr_req - Request interrupt capabilities
+ * @adapter: adapter to enable interrupts on
+ *
+ * Returns 0 on success, negative on failure
+ */
+int idpf_intr_req(struct idpf_adapter *adapter)
+{
+ u16 default_vports = idpf_get_default_vports(adapter);
+ int num_q_vecs, total_vecs, num_vec_ids;
+ int min_vectors, v_actual, err;
+ unsigned int vector;
+ u16 *vecids;
+
+ total_vecs = idpf_get_reserved_vecs(adapter);
+ num_q_vecs = total_vecs - IDPF_MBX_Q_VEC;
+
+ err = idpf_send_alloc_vectors_msg(adapter, num_q_vecs);
+ if (err) {
+ dev_err(&adapter->pdev->dev,
+ "Failed to allocate %d vectors: %d\n", num_q_vecs, err);
+
+ return -EAGAIN;
+ }
+
+ min_vectors = IDPF_MBX_Q_VEC + IDPF_MIN_Q_VEC * default_vports;
+ v_actual = pci_alloc_irq_vectors(adapter->pdev, min_vectors,
+ total_vecs, PCI_IRQ_MSIX);
+ if (v_actual < min_vectors) {
+ dev_err(&adapter->pdev->dev, "Failed to allocate MSIX vectors: %d\n",
+ v_actual);
+ err = -EAGAIN;
+ goto send_dealloc_vecs;
+ }
+
+ adapter->msix_entries = kcalloc(v_actual, sizeof(struct msix_entry),
+ GFP_KERNEL);
+
+ if (!adapter->msix_entries) {
+ err = -ENOMEM;
+ goto free_irq;
+ }
+
+ idpf_set_mb_vec_id(adapter);
+
+ vecids = kcalloc(total_vecs, sizeof(u16), GFP_KERNEL);
+ if (!vecids) {
+ err = -ENOMEM;
+ goto free_msix;
+ }
+
+ if (adapter->req_vec_chunks) {
+ struct virtchnl2_vector_chunks *vchunks;
+ struct virtchnl2_alloc_vectors *ac;
+
+ ac = adapter->req_vec_chunks;
+ vchunks = &ac->vchunks;
+
+ num_vec_ids = idpf_get_vec_ids(adapter, vecids, total_vecs,
+ vchunks);
+ if (num_vec_ids < v_actual) {
+ err = -EINVAL;
+ goto free_vecids;
+ }
+ } else {
+ int i;
+
+ for (i = 0; i < v_actual; i++)
+ vecids[i] = i;
+ }
+
+ for (vector = 0; vector < v_actual; vector++) {
+ adapter->msix_entries[vector].entry = vecids[vector];
+ adapter->msix_entries[vector].vector =
+ pci_irq_vector(adapter->pdev, vector);
+ }
+
+ adapter->num_req_msix = total_vecs;
+ adapter->num_msix_entries = v_actual;
+ /* 'num_avail_msix' is used to distribute excess vectors to the vports
+ * after considering the minimum vectors required per each default
+ * vport
+ */
+ adapter->num_avail_msix = v_actual - min_vectors;
+
+ /* Fill MSIX vector lifo stack with vector indexes */
+ err = idpf_init_vector_stack(adapter);
+ if (err)
+ goto free_vecids;
+
+ err = idpf_mb_intr_init(adapter);
+ if (err)
+ goto deinit_vec_stack;
+ idpf_mb_irq_enable(adapter);
+ kfree(vecids);
+
+ return 0;
+
+deinit_vec_stack:
+ idpf_deinit_vector_stack(adapter);
+free_vecids:
+ kfree(vecids);
+free_msix:
+ kfree(adapter->msix_entries);
+ adapter->msix_entries = NULL;
+free_irq:
+ pci_free_irq_vectors(adapter->pdev);
+send_dealloc_vecs:
+ idpf_send_dealloc_vectors_msg(adapter);
+
+ return err;
+}
+
+/**
+ * idpf_find_mac_filter - Search filter list for specific mac filter
+ * @vconfig: Vport config structure
+ * @macaddr: The MAC address
+ *
+ * Returns ptr to the filter object or NULL. Must be called while holding the
+ * mac_filter_list_lock.
+ **/
+static struct idpf_mac_filter *idpf_find_mac_filter(struct idpf_vport_config *vconfig,
+ const u8 *macaddr)
+{
+ struct idpf_mac_filter *f;
+
+ if (!macaddr)
+ return NULL;
+
+ list_for_each_entry(f, &vconfig->user_config.mac_filter_list, list) {
+ if (ether_addr_equal(macaddr, f->macaddr))
+ return f;
+ }
+
+ return NULL;
+}
+
+/**
+ * __idpf_del_mac_filter - Delete a MAC filter from the filter list
+ * @vport_config: Vport config structure
+ * @macaddr: The MAC address
+ *
+ * Returns 0 on success, error value on failure
+ **/
+static int __idpf_del_mac_filter(struct idpf_vport_config *vport_config,
+ const u8 *macaddr)
+{
+ struct idpf_mac_filter *f;
+
+ spin_lock_bh(&vport_config->mac_filter_list_lock);
+ f = idpf_find_mac_filter(vport_config, macaddr);
+ if (f) {
+ list_del(&f->list);
+ kfree(f);
+ }
+ spin_unlock_bh(&vport_config->mac_filter_list_lock);
+
+ return 0;
+}
+
+/**
+ * idpf_del_mac_filter - Delete a MAC filter from the filter list
+ * @vport: Main vport structure
+ * @np: Netdev private structure
+ * @macaddr: The MAC address
+ * @async: Don't wait for return message
+ *
+ * Removes filter from list and if interface is up, tells hardware about the
+ * removed filter.
+ **/
+static int idpf_del_mac_filter(struct idpf_vport *vport,
+ struct idpf_netdev_priv *np,
+ const u8 *macaddr, bool async)
+{
+ struct idpf_vport_config *vport_config;
+ struct idpf_mac_filter *f;
+
+ vport_config = np->adapter->vport_config[np->vport_idx];
+
+ spin_lock_bh(&vport_config->mac_filter_list_lock);
+ f = idpf_find_mac_filter(vport_config, macaddr);
+ if (f) {
+ f->remove = true;
+ } else {
+ spin_unlock_bh(&vport_config->mac_filter_list_lock);
+
+ return -EINVAL;
+ }
+ spin_unlock_bh(&vport_config->mac_filter_list_lock);
+
+ if (np->state == __IDPF_VPORT_UP) {
+ int err;
+
+ err = idpf_add_del_mac_filters(vport, np, false, async);
+ if (err)
+ return err;
+ }
+
+ return __idpf_del_mac_filter(vport_config, macaddr);
+}
+
+/**
+ * __idpf_add_mac_filter - Add mac filter helper function
+ * @vport_config: Vport config structure
+ * @macaddr: Address to add
+ *
+ * Takes mac_filter_list_lock spinlock to add new filter to list.
+ */
+static int __idpf_add_mac_filter(struct idpf_vport_config *vport_config,
+ const u8 *macaddr)
+{
+ struct idpf_mac_filter *f;
+
+ spin_lock_bh(&vport_config->mac_filter_list_lock);
+
+ f = idpf_find_mac_filter(vport_config, macaddr);
+ if (f) {
+ f->remove = false;
+ spin_unlock_bh(&vport_config->mac_filter_list_lock);
+
+ return 0;
+ }
+
+ f = kzalloc(sizeof(*f), GFP_ATOMIC);
+ if (!f) {
+ spin_unlock_bh(&vport_config->mac_filter_list_lock);
+
+ return -ENOMEM;
+ }
+
+ ether_addr_copy(f->macaddr, macaddr);
+ list_add_tail(&f->list, &vport_config->user_config.mac_filter_list);
+ f->add = true;
+
+ spin_unlock_bh(&vport_config->mac_filter_list_lock);
+
+ return 0;
+}
+
+/**
+ * idpf_add_mac_filter - Add a mac filter to the filter list
+ * @vport: Main vport structure
+ * @np: Netdev private structure
+ * @macaddr: The MAC address
+ * @async: Don't wait for return message
+ *
+ * Returns 0 on success or error on failure. If interface is up, we'll also
+ * send the virtchnl message to tell hardware about the filter.
+ **/
+static int idpf_add_mac_filter(struct idpf_vport *vport,
+ struct idpf_netdev_priv *np,
+ const u8 *macaddr, bool async)
+{
+ struct idpf_vport_config *vport_config;
+ int err;
+
+ vport_config = np->adapter->vport_config[np->vport_idx];
+ err = __idpf_add_mac_filter(vport_config, macaddr);
+ if (err)
+ return err;
+
+ if (np->state == __IDPF_VPORT_UP)
+ err = idpf_add_del_mac_filters(vport, np, true, async);
+
+ return err;
+}
+
+/**
+ * idpf_del_all_mac_filters - Delete all MAC filters in list
+ * @vport: main vport struct
+ *
+ * Takes mac_filter_list_lock spinlock. Deletes all filters
+ */
+static void idpf_del_all_mac_filters(struct idpf_vport *vport)
+{
+ struct idpf_vport_config *vport_config;
+ struct idpf_mac_filter *f, *ftmp;
+
+ vport_config = vport->adapter->vport_config[vport->idx];
+ spin_lock_bh(&vport_config->mac_filter_list_lock);
+
+ list_for_each_entry_safe(f, ftmp, &vport_config->user_config.mac_filter_list,
+ list) {
+ list_del(&f->list);
+ kfree(f);
+ }
+
+ spin_unlock_bh(&vport_config->mac_filter_list_lock);
+}
+
+/**
+ * idpf_restore_mac_filters - Re-add all MAC filters in list
+ * @vport: main vport struct
+ *
+ * Takes mac_filter_list_lock spinlock. Sets add field to true for filters to
+ * resync filters back to HW.
+ */
+static void idpf_restore_mac_filters(struct idpf_vport *vport)
+{
+ struct idpf_vport_config *vport_config;
+ struct idpf_mac_filter *f;
+
+ vport_config = vport->adapter->vport_config[vport->idx];
+ spin_lock_bh(&vport_config->mac_filter_list_lock);
+
+ list_for_each_entry(f, &vport_config->user_config.mac_filter_list, list)
+ f->add = true;
+
+ spin_unlock_bh(&vport_config->mac_filter_list_lock);
+
+ idpf_add_del_mac_filters(vport, netdev_priv(vport->netdev),
+ true, false);
+}
+
+/**
+ * idpf_remove_mac_filters - Remove all MAC filters in list
+ * @vport: main vport struct
+ *
+ * Takes mac_filter_list_lock spinlock. Sets remove field to true for filters
+ * to remove filters in HW.
+ */
+static void idpf_remove_mac_filters(struct idpf_vport *vport)
+{
+ struct idpf_vport_config *vport_config;
+ struct idpf_mac_filter *f;
+
+ vport_config = vport->adapter->vport_config[vport->idx];
+ spin_lock_bh(&vport_config->mac_filter_list_lock);
+
+ list_for_each_entry(f, &vport_config->user_config.mac_filter_list, list)
+ f->remove = true;
+
+ spin_unlock_bh(&vport_config->mac_filter_list_lock);
+
+ idpf_add_del_mac_filters(vport, netdev_priv(vport->netdev),
+ false, false);
+}
+
+/**
+ * idpf_deinit_mac_addr - deinitialize mac address for vport
+ * @vport: main vport structure
+ */
+static void idpf_deinit_mac_addr(struct idpf_vport *vport)
+{
+ struct idpf_vport_config *vport_config;
+ struct idpf_mac_filter *f;
+
+ vport_config = vport->adapter->vport_config[vport->idx];
+
+ spin_lock_bh(&vport_config->mac_filter_list_lock);
+
+ f = idpf_find_mac_filter(vport_config, vport->default_mac_addr);
+ if (f) {
+ list_del(&f->list);
+ kfree(f);
+ }
+
+ spin_unlock_bh(&vport_config->mac_filter_list_lock);
+}
+
+/**
+ * idpf_init_mac_addr - initialize mac address for vport
+ * @vport: main vport structure
+ * @netdev: pointer to netdev struct associated with this vport
+ */
+static int idpf_init_mac_addr(struct idpf_vport *vport,
+ struct net_device *netdev)
+{
+ struct idpf_netdev_priv *np = netdev_priv(netdev);
+ struct idpf_adapter *adapter = vport->adapter;
+ int err;
+
+ if (is_valid_ether_addr(vport->default_mac_addr)) {
+ eth_hw_addr_set(netdev, vport->default_mac_addr);
+ ether_addr_copy(netdev->perm_addr, vport->default_mac_addr);
+
+ return idpf_add_mac_filter(vport, np, vport->default_mac_addr,
+ false);
+ }
+
+ if (!idpf_is_cap_ena(adapter, IDPF_OTHER_CAPS,
+ VIRTCHNL2_CAP_MACFILTER)) {
+ dev_err(&adapter->pdev->dev,
+ "MAC address is not provided and capability is not set\n");
+
+ return -EINVAL;
+ }
+
+ eth_hw_addr_random(netdev);
+ err = idpf_add_mac_filter(vport, np, netdev->dev_addr, false);
+ if (err)
+ return err;
+
+ dev_info(&adapter->pdev->dev, "Invalid MAC address %pM, using random %pM\n",
+ vport->default_mac_addr, netdev->dev_addr);
+ ether_addr_copy(vport->default_mac_addr, netdev->dev_addr);
+
+ return 0;
+}
+
+/**
+ * idpf_cfg_netdev - Allocate, configure and register a netdev
+ * @vport: main vport structure
+ *
+ * Returns 0 on success, negative value on failure.
+ */
+static int idpf_cfg_netdev(struct idpf_vport *vport)
+{
+ struct idpf_adapter *adapter = vport->adapter;
+ struct idpf_vport_config *vport_config;
+ netdev_features_t dflt_features;
+ netdev_features_t offloads = 0;
+ struct idpf_netdev_priv *np;
+ struct net_device *netdev;
+ u16 idx = vport->idx;
+ int err;
+
+ vport_config = adapter->vport_config[idx];
+
+ /* It's possible we already have a netdev allocated and registered for
+ * this vport
+ */
+ if (test_bit(IDPF_VPORT_REG_NETDEV, vport_config->flags)) {
+ netdev = adapter->netdevs[idx];
+ np = netdev_priv(netdev);
+ np->vport = vport;
+ np->vport_idx = vport->idx;
+ np->vport_id = vport->vport_id;
+ vport->netdev = netdev;
+
+ return idpf_init_mac_addr(vport, netdev);
+ }
+
+ netdev = alloc_etherdev_mqs(sizeof(struct idpf_netdev_priv),
+ vport_config->max_q.max_txq,
+ vport_config->max_q.max_rxq);
+ if (!netdev)
+ return -ENOMEM;
+
+ vport->netdev = netdev;
+ np = netdev_priv(netdev);
+ np->vport = vport;
+ np->adapter = adapter;
+ np->vport_idx = vport->idx;
+ np->vport_id = vport->vport_id;
+
+ spin_lock_init(&np->stats_lock);
+
+ err = idpf_init_mac_addr(vport, netdev);
+ if (err) {
+ free_netdev(vport->netdev);
+ vport->netdev = NULL;
+
+ return err;
+ }
+
+ /* assign netdev_ops */
+ if (idpf_is_queue_model_split(vport->txq_model))
+ netdev->netdev_ops = &idpf_netdev_ops_splitq;
+ else
+ netdev->netdev_ops = &idpf_netdev_ops_singleq;
+
+ /* setup watchdog timeout value to be 5 second */
+ netdev->watchdog_timeo = 5 * HZ;
+
+ /* configure default MTU size */
+ netdev->min_mtu = ETH_MIN_MTU;
+ netdev->max_mtu = vport->max_mtu;
+
+ dflt_features = NETIF_F_SG |
+ NETIF_F_HIGHDMA;
+
+ if (idpf_is_cap_ena_all(adapter, IDPF_RSS_CAPS, IDPF_CAP_RSS))
+ dflt_features |= NETIF_F_RXHASH;
+ if (idpf_is_cap_ena_all(adapter, IDPF_CSUM_CAPS, IDPF_CAP_RX_CSUM_L4V4))
+ dflt_features |= NETIF_F_IP_CSUM;
+ if (idpf_is_cap_ena_all(adapter, IDPF_CSUM_CAPS, IDPF_CAP_RX_CSUM_L4V6))
+ dflt_features |= NETIF_F_IPV6_CSUM;
+ if (idpf_is_cap_ena(adapter, IDPF_CSUM_CAPS, IDPF_CAP_RX_CSUM))
+ dflt_features |= NETIF_F_RXCSUM;
+ if (idpf_is_cap_ena_all(adapter, IDPF_CSUM_CAPS, IDPF_CAP_SCTP_CSUM))
+ dflt_features |= NETIF_F_SCTP_CRC;
+
+ if (idpf_is_cap_ena(adapter, IDPF_SEG_CAPS, VIRTCHNL2_CAP_SEG_IPV4_TCP))
+ dflt_features |= NETIF_F_TSO;
+ if (idpf_is_cap_ena(adapter, IDPF_SEG_CAPS, VIRTCHNL2_CAP_SEG_IPV6_TCP))
+ dflt_features |= NETIF_F_TSO6;
+ if (idpf_is_cap_ena_all(adapter, IDPF_SEG_CAPS,
+ VIRTCHNL2_CAP_SEG_IPV4_UDP |
+ VIRTCHNL2_CAP_SEG_IPV6_UDP))
+ dflt_features |= NETIF_F_GSO_UDP_L4;
+ if (idpf_is_cap_ena_all(adapter, IDPF_RSC_CAPS, IDPF_CAP_RSC))
+ offloads |= NETIF_F_GRO_HW;
+ /* advertise to stack only if offloads for encapsulated packets is
+ * supported
+ */
+ if (idpf_is_cap_ena(vport->adapter, IDPF_SEG_CAPS,
+ VIRTCHNL2_CAP_SEG_TX_SINGLE_TUNNEL)) {
+ offloads |= NETIF_F_GSO_UDP_TUNNEL |
+ NETIF_F_GSO_GRE |
+ NETIF_F_GSO_GRE_CSUM |
+ NETIF_F_GSO_PARTIAL |
+ NETIF_F_GSO_UDP_TUNNEL_CSUM |
+ NETIF_F_GSO_IPXIP4 |
+ NETIF_F_GSO_IPXIP6 |
+ 0;
+
+ if (!idpf_is_cap_ena_all(vport->adapter, IDPF_CSUM_CAPS,
+ IDPF_CAP_TUNNEL_TX_CSUM))
+ netdev->gso_partial_features |=
+ NETIF_F_GSO_UDP_TUNNEL_CSUM;
+
+ netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM;
+ offloads |= NETIF_F_TSO_MANGLEID;
+ }
+ if (idpf_is_cap_ena(adapter, IDPF_OTHER_CAPS, VIRTCHNL2_CAP_LOOPBACK))
+ offloads |= NETIF_F_LOOPBACK;
+
+ netdev->features |= dflt_features;
+ netdev->hw_features |= dflt_features | offloads;
+ netdev->hw_enc_features |= dflt_features | offloads;
+ idpf_set_ethtool_ops(netdev);
+ SET_NETDEV_DEV(netdev, &adapter->pdev->dev);
+
+ /* carrier off on init to avoid Tx hangs */
+ netif_carrier_off(netdev);
+
+ /* make sure transmit queues start off as stopped */
+ netif_tx_stop_all_queues(netdev);
+
+ /* The vport can be arbitrarily released so we need to also track
+ * netdevs in the adapter struct
+ */
+ adapter->netdevs[idx] = netdev;
+
+ return 0;
+}
+
+/**
+ * idpf_get_free_slot - get the next non-NULL location index in array
+ * @adapter: adapter in which to look for a free vport slot
+ */
+static int idpf_get_free_slot(struct idpf_adapter *adapter)
+{
+ unsigned int i;
+
+ for (i = 0; i < adapter->max_vports; i++) {
+ if (!adapter->vports[i])
+ return i;
+ }
+
+ return IDPF_NO_FREE_SLOT;
+}
+
+/**
+ * idpf_remove_features - Turn off feature configs
+ * @vport: virtual port structure
+ */
+static void idpf_remove_features(struct idpf_vport *vport)
+{
+ struct idpf_adapter *adapter = vport->adapter;
+
+ if (idpf_is_cap_ena(adapter, IDPF_OTHER_CAPS, VIRTCHNL2_CAP_MACFILTER))
+ idpf_remove_mac_filters(vport);
+}
+
+/**
+ * idpf_vport_stop - Disable a vport
+ * @vport: vport to disable
+ */
+static void idpf_vport_stop(struct idpf_vport *vport)
+{
+ struct idpf_netdev_priv *np = netdev_priv(vport->netdev);
+
+ if (np->state <= __IDPF_VPORT_DOWN)
+ return;
+
+ netif_carrier_off(vport->netdev);
+ netif_tx_disable(vport->netdev);
+
+ idpf_send_disable_vport_msg(vport);
+ idpf_send_disable_queues_msg(vport);
+ idpf_send_map_unmap_queue_vector_msg(vport, false);
+ /* Normally we ask for queues in create_vport, but if the number of
+ * initially requested queues have changed, for example via ethtool
+ * set channels, we do delete queues and then add the queues back
+ * instead of deleting and reallocating the vport.
+ */
+ if (test_and_clear_bit(IDPF_VPORT_DEL_QUEUES, vport->flags))
+ idpf_send_delete_queues_msg(vport);
+
+ idpf_remove_features(vport);
+
+ vport->link_up = false;
+ idpf_vport_intr_deinit(vport);
+ idpf_vport_intr_rel(vport);
+ idpf_vport_queues_rel(vport);
+ np->state = __IDPF_VPORT_DOWN;
+}
+
+/**
+ * idpf_stop - Disables a network interface
+ * @netdev: network interface device structure
+ *
+ * The stop entry point is called when an interface is de-activated by the OS,
+ * and the netdevice enters the DOWN state. The hardware is still under the
+ * driver's control, but the netdev interface is disabled.
+ *
+ * Returns success only - not allowed to fail
+ */
+static int idpf_stop(struct net_device *netdev)
+{
+ struct idpf_netdev_priv *np = netdev_priv(netdev);
+ struct idpf_vport *vport;
+
+ if (test_bit(IDPF_REMOVE_IN_PROG, np->adapter->flags))
+ return 0;
+
+ idpf_vport_ctrl_lock(netdev);
+ vport = idpf_netdev_to_vport(netdev);
+
+ idpf_vport_stop(vport);
+
+ idpf_vport_ctrl_unlock(netdev);
+
+ return 0;
+}
+
+/**
+ * idpf_decfg_netdev - Unregister the netdev
+ * @vport: vport for which netdev to be unregistered
+ */
+static void idpf_decfg_netdev(struct idpf_vport *vport)
+{
+ struct idpf_adapter *adapter = vport->adapter;
+
+ unregister_netdev(vport->netdev);
+ free_netdev(vport->netdev);
+ vport->netdev = NULL;
+
+ adapter->netdevs[vport->idx] = NULL;
+}
+
+/**
+ * idpf_vport_rel - Delete a vport and free its resources
+ * @vport: the vport being removed
+ */
+static void idpf_vport_rel(struct idpf_vport *vport)
+{
+ struct idpf_adapter *adapter = vport->adapter;
+ struct idpf_vport_config *vport_config;
+ struct idpf_vector_info vec_info;
+ struct idpf_rss_data *rss_data;
+ struct idpf_vport_max_q max_q;
+ u16 idx = vport->idx;
+ int i;
+
+ vport_config = adapter->vport_config[vport->idx];
+ idpf_deinit_rss(vport);
+ rss_data = &vport_config->user_config.rss_data;
+ kfree(rss_data->rss_key);
+ rss_data->rss_key = NULL;
+
+ idpf_send_destroy_vport_msg(vport);
+
+ /* Set all bits as we dont know on which vc_state the vport vhnl_wq
+ * is waiting on and wakeup the virtchnl workqueue even if it is
+ * waiting for the response as we are going down
+ */
+ for (i = 0; i < IDPF_VC_NBITS; i++)
+ set_bit(i, vport->vc_state);
+ wake_up(&vport->vchnl_wq);
+
+ mutex_destroy(&vport->vc_buf_lock);
+
+ /* Clear all the bits */
+ for (i = 0; i < IDPF_VC_NBITS; i++)
+ clear_bit(i, vport->vc_state);
+
+ /* Release all max queues allocated to the adapter's pool */
+ max_q.max_rxq = vport_config->max_q.max_rxq;
+ max_q.max_txq = vport_config->max_q.max_txq;
+ max_q.max_bufq = vport_config->max_q.max_bufq;
+ max_q.max_complq = vport_config->max_q.max_complq;
+ idpf_vport_dealloc_max_qs(adapter, &max_q);
+
+ /* Release all the allocated vectors on the stack */
+ vec_info.num_req_vecs = 0;
+ vec_info.num_curr_vecs = vport->num_q_vectors;
+ vec_info.default_vport = vport->default_vport;
+
+ idpf_req_rel_vector_indexes(adapter, vport->q_vector_idxs, &vec_info);
+
+ kfree(vport->q_vector_idxs);
+ vport->q_vector_idxs = NULL;
+
+ kfree(adapter->vport_params_recvd[idx]);
+ adapter->vport_params_recvd[idx] = NULL;
+ kfree(adapter->vport_params_reqd[idx]);
+ adapter->vport_params_reqd[idx] = NULL;
+ if (adapter->vport_config[idx]) {
+ kfree(adapter->vport_config[idx]->req_qs_chunks);
+ adapter->vport_config[idx]->req_qs_chunks = NULL;
+ }
+ kfree(vport);
+ adapter->num_alloc_vports--;
+}
+
+/**
+ * idpf_vport_dealloc - cleanup and release a given vport
+ * @vport: pointer to idpf vport structure
+ *
+ * returns nothing
+ */
+static void idpf_vport_dealloc(struct idpf_vport *vport)
+{
+ struct idpf_adapter *adapter = vport->adapter;
+ unsigned int i = vport->idx;
+
+ idpf_deinit_mac_addr(vport);
+ idpf_vport_stop(vport);
+
+ if (!test_bit(IDPF_HR_RESET_IN_PROG, adapter->flags))
+ idpf_decfg_netdev(vport);
+ if (test_bit(IDPF_REMOVE_IN_PROG, adapter->flags))
+ idpf_del_all_mac_filters(vport);
+
+ if (adapter->netdevs[i]) {
+ struct idpf_netdev_priv *np = netdev_priv(adapter->netdevs[i]);
+
+ np->vport = NULL;
+ }
+
+ idpf_vport_rel(vport);
+
+ adapter->vports[i] = NULL;
+ adapter->next_vport = idpf_get_free_slot(adapter);
+}
+
+/**
+ * idpf_vport_alloc - Allocates the next available struct vport in the adapter
+ * @adapter: board private structure
+ * @max_q: vport max queue info
+ *
+ * returns a pointer to a vport on success, NULL on failure.
+ */
+static struct idpf_vport *idpf_vport_alloc(struct idpf_adapter *adapter,
+ struct idpf_vport_max_q *max_q)
+{
+ struct idpf_rss_data *rss_data;
+ u16 idx = adapter->next_vport;
+ struct idpf_vport *vport;
+ u16 num_max_q;
+
+ if (idx == IDPF_NO_FREE_SLOT)
+ return NULL;
+
+ vport = kzalloc(sizeof(*vport), GFP_KERNEL);
+ if (!vport)
+ return vport;
+
+ if (!adapter->vport_config[idx]) {
+ struct idpf_vport_config *vport_config;
+
+ vport_config = kzalloc(sizeof(*vport_config), GFP_KERNEL);
+ if (!vport_config) {
+ kfree(vport);
+
+ return NULL;
+ }
+
+ adapter->vport_config[idx] = vport_config;
+ }
+
+ vport->idx = idx;
+ vport->adapter = adapter;
+ vport->compln_clean_budget = IDPF_TX_COMPLQ_CLEAN_BUDGET;
+ vport->default_vport = adapter->num_alloc_vports <
+ idpf_get_default_vports(adapter);
+
+ num_max_q = max(max_q->max_txq, max_q->max_rxq);
+ vport->q_vector_idxs = kcalloc(num_max_q, sizeof(u16), GFP_KERNEL);
+ if (!vport->q_vector_idxs) {
+ kfree(vport);
+
+ return NULL;
+ }
+ idpf_vport_init(vport, max_q);
+
+ /* This alloc is done separate from the LUT because it's not strictly
+ * dependent on how many queues we have. If we change number of queues
+ * and soft reset we'll need a new LUT but the key can remain the same
+ * for as long as the vport exists.
+ */
+ rss_data = &adapter->vport_config[idx]->user_config.rss_data;
+ rss_data->rss_key = kzalloc(rss_data->rss_key_size, GFP_KERNEL);
+ if (!rss_data->rss_key) {
+ kfree(vport);
+
+ return NULL;
+ }
+ /* Initialize default rss key */
+ netdev_rss_key_fill((void *)rss_data->rss_key, rss_data->rss_key_size);
+
+ /* fill vport slot in the adapter struct */
+ adapter->vports[idx] = vport;
+ adapter->vport_ids[idx] = idpf_get_vport_id(vport);
+
+ adapter->num_alloc_vports++;
+ /* prepare adapter->next_vport for next use */
+ adapter->next_vport = idpf_get_free_slot(adapter);
+
+ return vport;
+}
+
+/**
+ * idpf_get_stats64 - get statistics for network device structure
+ * @netdev: network interface device structure
+ * @stats: main device statistics structure
+ */
+static void idpf_get_stats64(struct net_device *netdev,
+ struct rtnl_link_stats64 *stats)
+{
+ struct idpf_netdev_priv *np = netdev_priv(netdev);
+
+ spin_lock_bh(&np->stats_lock);
+ *stats = np->netstats;
+ spin_unlock_bh(&np->stats_lock);
+}
+
+/**
+ * idpf_statistics_task - Delayed task to get statistics over mailbox
+ * @work: work_struct handle to our data
+ */
+void idpf_statistics_task(struct work_struct *work)
+{
+ struct idpf_adapter *adapter;
+ int i;
+
+ adapter = container_of(work, struct idpf_adapter, stats_task.work);
+
+ for (i = 0; i < adapter->max_vports; i++) {
+ struct idpf_vport *vport = adapter->vports[i];
+
+ if (vport && !test_bit(IDPF_HR_RESET_IN_PROG, adapter->flags))
+ idpf_send_get_stats_msg(vport);
+ }
+
+ queue_delayed_work(adapter->stats_wq, &adapter->stats_task,
+ msecs_to_jiffies(10000));
+}
+
+/**
+ * idpf_mbx_task - Delayed task to handle mailbox responses
+ * @work: work_struct handle
+ */
+void idpf_mbx_task(struct work_struct *work)
+{
+ struct idpf_adapter *adapter;
+
+ adapter = container_of(work, struct idpf_adapter, mbx_task.work);
+
+ if (test_bit(IDPF_MB_INTR_MODE, adapter->flags))
+ idpf_mb_irq_enable(adapter);
+ else
+ queue_delayed_work(adapter->mbx_wq, &adapter->mbx_task,
+ msecs_to_jiffies(300));
+
+ idpf_recv_mb_msg(adapter, VIRTCHNL2_OP_UNKNOWN, NULL, 0);
+}
+
+/**
+ * idpf_service_task - Delayed task for handling mailbox responses
+ * @work: work_struct handle to our data
+ *
+ */
+void idpf_service_task(struct work_struct *work)
+{
+ struct idpf_adapter *adapter;
+
+ adapter = container_of(work, struct idpf_adapter, serv_task.work);
+
+ if (idpf_is_reset_detected(adapter) &&
+ !idpf_is_reset_in_prog(adapter) &&
+ !test_bit(IDPF_REMOVE_IN_PROG, adapter->flags)) {
+ dev_info(&adapter->pdev->dev, "HW reset detected\n");
+ set_bit(IDPF_HR_FUNC_RESET, adapter->flags);
+ queue_delayed_work(adapter->vc_event_wq,
+ &adapter->vc_event_task,
+ msecs_to_jiffies(10));
+ }
+
+ queue_delayed_work(adapter->serv_wq, &adapter->serv_task,
+ msecs_to_jiffies(300));
+}
+
+/**
+ * idpf_restore_features - Restore feature configs
+ * @vport: virtual port structure
+ */
+static void idpf_restore_features(struct idpf_vport *vport)
+{
+ struct idpf_adapter *adapter = vport->adapter;
+
+ if (idpf_is_cap_ena(adapter, IDPF_OTHER_CAPS, VIRTCHNL2_CAP_MACFILTER))
+ idpf_restore_mac_filters(vport);
+}
+
+/**
+ * idpf_set_real_num_queues - set number of queues for netdev
+ * @vport: virtual port structure
+ *
+ * Returns 0 on success, negative on failure.
+ */
+static int idpf_set_real_num_queues(struct idpf_vport *vport)
+{
+ int err;
+
+ err = netif_set_real_num_rx_queues(vport->netdev, vport->num_rxq);
+ if (err)
+ return err;
+
+ return netif_set_real_num_tx_queues(vport->netdev, vport->num_txq);
+}
+
+/**
+ * idpf_up_complete - Complete interface up sequence
+ * @vport: virtual port structure
+ *
+ * Returns 0 on success, negative on failure.
+ */
+static int idpf_up_complete(struct idpf_vport *vport)
+{
+ struct idpf_netdev_priv *np = netdev_priv(vport->netdev);
+
+ if (vport->link_up && !netif_carrier_ok(vport->netdev)) {
+ netif_carrier_on(vport->netdev);
+ netif_tx_start_all_queues(vport->netdev);
+ }
+
+ np->state = __IDPF_VPORT_UP;
+
+ return 0;
+}
+
+/**
+ * idpf_rx_init_buf_tail - Write initial buffer ring tail value
+ * @vport: virtual port struct
+ */
+static void idpf_rx_init_buf_tail(struct idpf_vport *vport)
+{
+ int i, j;
+
+ for (i = 0; i < vport->num_rxq_grp; i++) {
+ struct idpf_rxq_group *grp = &vport->rxq_grps[i];
+
+ if (idpf_is_queue_model_split(vport->rxq_model)) {
+ for (j = 0; j < vport->num_bufqs_per_qgrp; j++) {
+ struct idpf_queue *q =
+ &grp->splitq.bufq_sets[j].bufq;
+
+ writel(q->next_to_alloc, q->tail);
+ }
+ } else {
+ for (j = 0; j < grp->singleq.num_rxq; j++) {
+ struct idpf_queue *q =
+ grp->singleq.rxqs[j];
+
+ writel(q->next_to_alloc, q->tail);
+ }
+ }
+ }
+}
+
+/**
+ * idpf_vport_open - Bring up a vport
+ * @vport: vport to bring up
+ * @alloc_res: allocate queue resources
+ */
+static int idpf_vport_open(struct idpf_vport *vport, bool alloc_res)
+{
+ struct idpf_netdev_priv *np = netdev_priv(vport->netdev);
+ struct idpf_adapter *adapter = vport->adapter;
+ struct idpf_vport_config *vport_config;
+ int err;
+
+ if (np->state != __IDPF_VPORT_DOWN)
+ return -EBUSY;
+
+ /* we do not allow interface up just yet */
+ netif_carrier_off(vport->netdev);
+
+ if (alloc_res) {
+ err = idpf_vport_queues_alloc(vport);
+ if (err)
+ return err;
+ }
+
+ err = idpf_vport_intr_alloc(vport);
+ if (err) {
+ dev_err(&adapter->pdev->dev, "Failed to allocate interrupts for vport %u: %d\n",
+ vport->vport_id, err);
+ goto queues_rel;
+ }
+
+ err = idpf_vport_queue_ids_init(vport);
+ if (err) {
+ dev_err(&adapter->pdev->dev, "Failed to initialize queue ids for vport %u: %d\n",
+ vport->vport_id, err);
+ goto intr_rel;
+ }
+
+ err = idpf_vport_intr_init(vport);
+ if (err) {
+ dev_err(&adapter->pdev->dev, "Failed to initialize interrupts for vport %u: %d\n",
+ vport->vport_id, err);
+ goto intr_rel;
+ }
+
+ err = idpf_rx_bufs_init_all(vport);
+ if (err) {
+ dev_err(&adapter->pdev->dev, "Failed to initialize RX buffers for vport %u: %d\n",
+ vport->vport_id, err);
+ goto intr_rel;
+ }
+
+ err = idpf_queue_reg_init(vport);
+ if (err) {
+ dev_err(&adapter->pdev->dev, "Failed to initialize queue registers for vport %u: %d\n",
+ vport->vport_id, err);
+ goto intr_rel;
+ }
+
+ idpf_rx_init_buf_tail(vport);
+
+ err = idpf_send_config_queues_msg(vport);
+ if (err) {
+ dev_err(&adapter->pdev->dev, "Failed to configure queues for vport %u, %d\n",
+ vport->vport_id, err);
+ goto intr_deinit;
+ }
+
+ err = idpf_send_map_unmap_queue_vector_msg(vport, true);
+ if (err) {
+ dev_err(&adapter->pdev->dev, "Failed to map queue vectors for vport %u: %d\n",
+ vport->vport_id, err);
+ goto intr_deinit;
+ }
+
+ err = idpf_send_enable_queues_msg(vport);
+ if (err) {
+ dev_err(&adapter->pdev->dev, "Failed to enable queues for vport %u: %d\n",
+ vport->vport_id, err);
+ goto unmap_queue_vectors;
+ }
+
+ err = idpf_send_enable_vport_msg(vport);
+ if (err) {
+ dev_err(&adapter->pdev->dev, "Failed to enable vport %u: %d\n",
+ vport->vport_id, err);
+ err = -EAGAIN;
+ goto disable_queues;
+ }
+
+ idpf_restore_features(vport);
+
+ vport_config = adapter->vport_config[vport->idx];
+ if (vport_config->user_config.rss_data.rss_lut)
+ err = idpf_config_rss(vport);
+ else
+ err = idpf_init_rss(vport);
+ if (err) {
+ dev_err(&adapter->pdev->dev, "Failed to initialize RSS for vport %u: %d\n",
+ vport->vport_id, err);
+ goto disable_vport;
+ }
+
+ err = idpf_up_complete(vport);
+ if (err) {
+ dev_err(&adapter->pdev->dev, "Failed to complete interface up for vport %u: %d\n",
+ vport->vport_id, err);
+ goto deinit_rss;
+ }
+
+ return 0;
+
+deinit_rss:
+ idpf_deinit_rss(vport);
+disable_vport:
+ idpf_send_disable_vport_msg(vport);
+disable_queues:
+ idpf_send_disable_queues_msg(vport);
+unmap_queue_vectors:
+ idpf_send_map_unmap_queue_vector_msg(vport, false);
+intr_deinit:
+ idpf_vport_intr_deinit(vport);
+intr_rel:
+ idpf_vport_intr_rel(vport);
+queues_rel:
+ idpf_vport_queues_rel(vport);
+
+ return err;
+}
+
+/**
+ * idpf_init_task - Delayed initialization task
+ * @work: work_struct handle to our data
+ *
+ * Init task finishes up pending work started in probe. Due to the asynchronous
+ * nature in which the device communicates with hardware, we may have to wait
+ * several milliseconds to get a response. Instead of busy polling in probe,
+ * pulling it out into a delayed work task prevents us from bogging down the
+ * whole system waiting for a response from hardware.
+ */
+void idpf_init_task(struct work_struct *work)
+{
+ struct idpf_vport_config *vport_config;
+ struct idpf_vport_max_q max_q;
+ struct idpf_adapter *adapter;
+ struct idpf_netdev_priv *np;
+ struct idpf_vport *vport;
+ u16 num_default_vports;
+ struct pci_dev *pdev;
+ bool default_vport;
+ int index, err;
+
+ adapter = container_of(work, struct idpf_adapter, init_task.work);
+
+ num_default_vports = idpf_get_default_vports(adapter);
+ if (adapter->num_alloc_vports < num_default_vports)
+ default_vport = true;
+ else
+ default_vport = false;
+
+ err = idpf_vport_alloc_max_qs(adapter, &max_q);
+ if (err)
+ goto unwind_vports;
+
+ err = idpf_send_create_vport_msg(adapter, &max_q);
+ if (err) {
+ idpf_vport_dealloc_max_qs(adapter, &max_q);
+ goto unwind_vports;
+ }
+
+ pdev = adapter->pdev;
+ vport = idpf_vport_alloc(adapter, &max_q);
+ if (!vport) {
+ err = -EFAULT;
+ dev_err(&pdev->dev, "failed to allocate vport: %d\n",
+ err);
+ idpf_vport_dealloc_max_qs(adapter, &max_q);
+ goto unwind_vports;
+ }
+
+ index = vport->idx;
+ vport_config = adapter->vport_config[index];
+
+ init_waitqueue_head(&vport->sw_marker_wq);
+ init_waitqueue_head(&vport->vchnl_wq);
+
+ mutex_init(&vport->vc_buf_lock);
+ spin_lock_init(&vport_config->mac_filter_list_lock);
+
+ INIT_LIST_HEAD(&vport_config->user_config.mac_filter_list);
+
+ err = idpf_check_supported_desc_ids(vport);
+ if (err) {
+ dev_err(&pdev->dev, "failed to get required descriptor ids\n");
+ goto cfg_netdev_err;
+ }
+
+ if (idpf_cfg_netdev(vport))
+ goto cfg_netdev_err;
+
+ err = idpf_send_get_rx_ptype_msg(vport);
+ if (err)
+ goto handle_err;
+
+ /* Once state is put into DOWN, driver is ready for dev_open */
+ np = netdev_priv(vport->netdev);
+ np->state = __IDPF_VPORT_DOWN;
+ if (test_and_clear_bit(IDPF_VPORT_UP_REQUESTED, vport_config->flags))
+ idpf_vport_open(vport, true);
+
+ /* Spawn and return 'idpf_init_task' work queue until all the
+ * default vports are created
+ */
+ if (adapter->num_alloc_vports < num_default_vports) {
+ queue_delayed_work(adapter->init_wq, &adapter->init_task,
+ msecs_to_jiffies(5 * (adapter->pdev->devfn & 0x07)));
+
+ return;
+ }
+
+ for (index = 0; index < adapter->max_vports; index++) {
+ if (adapter->netdevs[index] &&
+ !test_bit(IDPF_VPORT_REG_NETDEV,
+ adapter->vport_config[index]->flags)) {
+ register_netdev(adapter->netdevs[index]);
+ set_bit(IDPF_VPORT_REG_NETDEV,
+ adapter->vport_config[index]->flags);
+ }
+ }
+
+ /* As all the required vports are created, clear the reset flag
+ * unconditionally here in case we were in reset and the link was down.
+ */
+ clear_bit(IDPF_HR_RESET_IN_PROG, adapter->flags);
+ /* Start the statistics task now */
+ queue_delayed_work(adapter->stats_wq, &adapter->stats_task,
+ msecs_to_jiffies(10 * (pdev->devfn & 0x07)));
+
+ return;
+
+handle_err:
+ idpf_decfg_netdev(vport);
+cfg_netdev_err:
+ idpf_vport_rel(vport);
+ adapter->vports[index] = NULL;
+unwind_vports:
+ if (default_vport) {
+ for (index = 0; index < adapter->max_vports; index++) {
+ if (adapter->vports[index])
+ idpf_vport_dealloc(adapter->vports[index]);
+ }
+ }
+ clear_bit(IDPF_HR_RESET_IN_PROG, adapter->flags);
+}
+
+/**
+ * idpf_sriov_ena - Enable or change number of VFs
+ * @adapter: private data struct
+ * @num_vfs: number of VFs to allocate
+ */
+static int idpf_sriov_ena(struct idpf_adapter *adapter, int num_vfs)
+{
+ struct device *dev = &adapter->pdev->dev;
+ int err;
+
+ err = idpf_send_set_sriov_vfs_msg(adapter, num_vfs);
+ if (err) {
+ dev_err(dev, "Failed to allocate VFs: %d\n", err);
+
+ return err;
+ }
+
+ err = pci_enable_sriov(adapter->pdev, num_vfs);
+ if (err) {
+ idpf_send_set_sriov_vfs_msg(adapter, 0);
+ dev_err(dev, "Failed to enable SR-IOV: %d\n", err);
+
+ return err;
+ }
+
+ adapter->num_vfs = num_vfs;
+
+ return num_vfs;
+}
+
+/**
+ * idpf_sriov_configure - Configure the requested VFs
+ * @pdev: pointer to a pci_dev structure
+ * @num_vfs: number of vfs to allocate
+ *
+ * Enable or change the number of VFs. Called when the user updates the number
+ * of VFs in sysfs.
+ **/
+int idpf_sriov_configure(struct pci_dev *pdev, int num_vfs)
+{
+ struct idpf_adapter *adapter = pci_get_drvdata(pdev);
+
+ if (!idpf_is_cap_ena(adapter, IDPF_OTHER_CAPS, VIRTCHNL2_CAP_SRIOV)) {
+ dev_info(&pdev->dev, "SR-IOV is not supported on this device\n");
+
+ return -EOPNOTSUPP;
+ }
+
+ if (num_vfs)
+ return idpf_sriov_ena(adapter, num_vfs);
+
+ if (pci_vfs_assigned(pdev)) {
+ dev_warn(&pdev->dev, "Unable to free VFs because some are assigned to VMs\n");
+
+ return -EBUSY;
+ }
+
+ pci_disable_sriov(adapter->pdev);
+ idpf_send_set_sriov_vfs_msg(adapter, 0);
+ adapter->num_vfs = 0;
+
+ return 0;
+}
+
+/**
+ * idpf_deinit_task - Device deinit routine
+ * @adapter: Driver specific private structure
+ *
+ * Extended remove logic which will be used for
+ * hard reset as well
+ */
+void idpf_deinit_task(struct idpf_adapter *adapter)
+{
+ unsigned int i;
+
+ /* Wait until the init_task is done else this thread might release
+ * the resources first and the other thread might end up in a bad state
+ */
+ cancel_delayed_work_sync(&adapter->init_task);
+
+ if (!adapter->vports)
+ return;
+
+ cancel_delayed_work_sync(&adapter->stats_task);
+
+ for (i = 0; i < adapter->max_vports; i++) {
+ if (adapter->vports[i])
+ idpf_vport_dealloc(adapter->vports[i]);
+ }
+}
+
+/**
+ * idpf_check_reset_complete - check that reset is complete
+ * @hw: pointer to hw struct
+ * @reset_reg: struct with reset registers
+ *
+ * Returns 0 if device is ready to use, or -EBUSY if it's in reset.
+ **/
+static int idpf_check_reset_complete(struct idpf_hw *hw,
+ struct idpf_reset_reg *reset_reg)
+{
+ struct idpf_adapter *adapter = hw->back;
+ int i;
+
+ for (i = 0; i < 2000; i++) {
+ u32 reg_val = readl(reset_reg->rstat);
+
+ /* 0xFFFFFFFF might be read if other side hasn't cleared the
+ * register for us yet and 0xFFFFFFFF is not a valid value for
+ * the register, so treat that as invalid.
+ */
+ if (reg_val != 0xFFFFFFFF && (reg_val & reset_reg->rstat_m))
+ return 0;
+
+ usleep_range(5000, 10000);
+ }
+
+ dev_warn(&adapter->pdev->dev, "Device reset timeout!\n");
+ /* Clear the reset flag unconditionally here since the reset
+ * technically isn't in progress anymore from the driver's perspective
+ */
+ clear_bit(IDPF_HR_RESET_IN_PROG, adapter->flags);
+
+ return -EBUSY;
+}
+
+/**
+ * idpf_set_vport_state - Set the vport state to be after the reset
+ * @adapter: Driver specific private structure
+ */
+static void idpf_set_vport_state(struct idpf_adapter *adapter)
+{
+ u16 i;
+
+ for (i = 0; i < adapter->max_vports; i++) {
+ struct idpf_netdev_priv *np;
+
+ if (!adapter->netdevs[i])
+ continue;
+
+ np = netdev_priv(adapter->netdevs[i]);
+ if (np->state == __IDPF_VPORT_UP)
+ set_bit(IDPF_VPORT_UP_REQUESTED,
+ adapter->vport_config[i]->flags);
+ }
+}
+
+/**
+ * idpf_init_hard_reset - Initiate a hardware reset
+ * @adapter: Driver specific private structure
+ *
+ * Deallocate the vports and all the resources associated with them and
+ * reallocate. Also reinitialize the mailbox. Return 0 on success,
+ * negative on failure.
+ */
+static int idpf_init_hard_reset(struct idpf_adapter *adapter)
+{
+ struct idpf_reg_ops *reg_ops = &adapter->dev_ops.reg_ops;
+ struct device *dev = &adapter->pdev->dev;
+ struct net_device *netdev;
+ int err;
+ u16 i;
+
+ mutex_lock(&adapter->vport_ctrl_lock);
+
+ dev_info(dev, "Device HW Reset initiated\n");
+
+ /* Avoid TX hangs on reset */
+ for (i = 0; i < adapter->max_vports; i++) {
+ netdev = adapter->netdevs[i];
+ if (!netdev)
+ continue;
+
+ netif_carrier_off(netdev);
+ netif_tx_disable(netdev);
+ }
+
+ /* Prepare for reset */
+ if (test_and_clear_bit(IDPF_HR_DRV_LOAD, adapter->flags)) {
+ reg_ops->trigger_reset(adapter, IDPF_HR_DRV_LOAD);
+ } else if (test_and_clear_bit(IDPF_HR_FUNC_RESET, adapter->flags)) {
+ bool is_reset = idpf_is_reset_detected(adapter);
+
+ idpf_set_vport_state(adapter);
+ idpf_vc_core_deinit(adapter);
+ if (!is_reset)
+ reg_ops->trigger_reset(adapter, IDPF_HR_FUNC_RESET);
+ idpf_deinit_dflt_mbx(adapter);
+ } else {
+ dev_err(dev, "Unhandled hard reset cause\n");
+ err = -EBADRQC;
+ goto unlock_mutex;
+ }
+
+ /* Wait for reset to complete */
+ err = idpf_check_reset_complete(&adapter->hw, &adapter->reset_reg);
+ if (err) {
+ dev_err(dev, "The driver was unable to contact the device's firmware. Check that the FW is running. Driver state= 0x%x\n",
+ adapter->state);
+ goto unlock_mutex;
+ }
+
+ /* Reset is complete and so start building the driver resources again */
+ err = idpf_init_dflt_mbx(adapter);
+ if (err) {
+ dev_err(dev, "Failed to initialize default mailbox: %d\n", err);
+ goto unlock_mutex;
+ }
+
+ /* Initialize the state machine, also allocate memory and request
+ * resources
+ */
+ err = idpf_vc_core_init(adapter);
+ if (err) {
+ idpf_deinit_dflt_mbx(adapter);
+ goto unlock_mutex;
+ }
+
+ /* Wait till all the vports are initialized to release the reset lock,
+ * else user space callbacks may access uninitialized vports
+ */
+ while (test_bit(IDPF_HR_RESET_IN_PROG, adapter->flags))
+ msleep(100);
+
+unlock_mutex:
+ mutex_unlock(&adapter->vport_ctrl_lock);
+
+ return err;
+}
+
+/**
+ * idpf_vc_event_task - Handle virtchannel event logic
+ * @work: work queue struct
+ */
+void idpf_vc_event_task(struct work_struct *work)
+{
+ struct idpf_adapter *adapter;
+
+ adapter = container_of(work, struct idpf_adapter, vc_event_task.work);
+
+ if (test_bit(IDPF_REMOVE_IN_PROG, adapter->flags))
+ return;
+
+ if (test_bit(IDPF_HR_FUNC_RESET, adapter->flags) ||
+ test_bit(IDPF_HR_DRV_LOAD, adapter->flags)) {
+ set_bit(IDPF_HR_RESET_IN_PROG, adapter->flags);
+ idpf_init_hard_reset(adapter);
+ }
+}
+
+/**
+ * idpf_initiate_soft_reset - Initiate a software reset
+ * @vport: virtual port data struct
+ * @reset_cause: reason for the soft reset
+ *
+ * Soft reset only reallocs vport queue resources. Returns 0 on success,
+ * negative on failure.
+ */
+int idpf_initiate_soft_reset(struct idpf_vport *vport,
+ enum idpf_vport_reset_cause reset_cause)
+{
+ struct idpf_netdev_priv *np = netdev_priv(vport->netdev);
+ enum idpf_vport_state current_state = np->state;
+ struct idpf_adapter *adapter = vport->adapter;
+ struct idpf_vport *new_vport;
+ int err, i;
+
+ /* If the system is low on memory, we can end up in bad state if we
+ * free all the memory for queue resources and try to allocate them
+ * again. Instead, we can pre-allocate the new resources before doing
+ * anything and bailing if the alloc fails.
+ *
+ * Make a clone of the existing vport to mimic its current
+ * configuration, then modify the new structure with any requested
+ * changes. Once the allocation of the new resources is done, stop the
+ * existing vport and copy the configuration to the main vport. If an
+ * error occurred, the existing vport will be untouched.
+ *
+ */
+ new_vport = kzalloc(sizeof(*vport), GFP_KERNEL);
+ if (!new_vport)
+ return -ENOMEM;
+
+ /* This purposely avoids copying the end of the struct because it
+ * contains wait_queues and mutexes and other stuff we don't want to
+ * mess with. Nothing below should use those variables from new_vport
+ * and should instead always refer to them in vport if they need to.
+ */
+ memcpy(new_vport, vport, offsetof(struct idpf_vport, vc_state));
+
+ /* Adjust resource parameters prior to reallocating resources */
+ switch (reset_cause) {
+ case IDPF_SR_Q_CHANGE:
+ err = idpf_vport_adjust_qs(new_vport);
+ if (err)
+ goto free_vport;
+ break;
+ case IDPF_SR_Q_DESC_CHANGE:
+ /* Update queue parameters before allocating resources */
+ idpf_vport_calc_num_q_desc(new_vport);
+ break;
+ case IDPF_SR_MTU_CHANGE:
+ case IDPF_SR_RSC_CHANGE:
+ break;
+ default:
+ dev_err(&adapter->pdev->dev, "Unhandled soft reset cause\n");
+ err = -EINVAL;
+ goto free_vport;
+ }
+
+ err = idpf_vport_queues_alloc(new_vport);
+ if (err)
+ goto free_vport;
+ if (current_state <= __IDPF_VPORT_DOWN) {
+ idpf_send_delete_queues_msg(vport);
+ } else {
+ set_bit(IDPF_VPORT_DEL_QUEUES, vport->flags);
+ idpf_vport_stop(vport);
+ }
+
+ idpf_deinit_rss(vport);
+ /* We're passing in vport here because we need its wait_queue
+ * to send a message and it should be getting all the vport
+ * config data out of the adapter but we need to be careful not
+ * to add code to add_queues to change the vport config within
+ * vport itself as it will be wiped with a memcpy later.
+ */
+ err = idpf_send_add_queues_msg(vport, new_vport->num_txq,
+ new_vport->num_complq,
+ new_vport->num_rxq,
+ new_vport->num_bufq);
+ if (err)
+ goto err_reset;
+
+ /* Same comment as above regarding avoiding copying the wait_queues and
+ * mutexes applies here. We do not want to mess with those if possible.
+ */
+ memcpy(vport, new_vport, offsetof(struct idpf_vport, vc_state));
+
+ /* Since idpf_vport_queues_alloc was called with new_port, the queue
+ * back pointers are currently pointing to the local new_vport. Reset
+ * the backpointers to the original vport here
+ */
+ for (i = 0; i < vport->num_txq_grp; i++) {
+ struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i];
+ int j;
+
+ tx_qgrp->vport = vport;
+ for (j = 0; j < tx_qgrp->num_txq; j++)
+ tx_qgrp->txqs[j]->vport = vport;
+
+ if (idpf_is_queue_model_split(vport->txq_model))
+ tx_qgrp->complq->vport = vport;
+ }
+
+ for (i = 0; i < vport->num_rxq_grp; i++) {
+ struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i];
+ struct idpf_queue *q;
+ u16 num_rxq;
+ int j;
+
+ rx_qgrp->vport = vport;
+ for (j = 0; j < vport->num_bufqs_per_qgrp; j++)
+ rx_qgrp->splitq.bufq_sets[j].bufq.vport = vport;
+
+ if (idpf_is_queue_model_split(vport->rxq_model))
+ num_rxq = rx_qgrp->splitq.num_rxq_sets;
+ else
+ num_rxq = rx_qgrp->singleq.num_rxq;
+
+ for (j = 0; j < num_rxq; j++) {
+ if (idpf_is_queue_model_split(vport->rxq_model))
+ q = &rx_qgrp->splitq.rxq_sets[j]->rxq;
+ else
+ q = rx_qgrp->singleq.rxqs[j];
+ q->vport = vport;
+ }
+ }
+
+ if (reset_cause == IDPF_SR_Q_CHANGE)
+ idpf_vport_alloc_vec_indexes(vport);
+
+ err = idpf_set_real_num_queues(vport);
+ if (err)
+ goto err_reset;
+
+ if (current_state == __IDPF_VPORT_UP)
+ err = idpf_vport_open(vport, false);
+
+ kfree(new_vport);
+
+ return err;
+
+err_reset:
+ idpf_vport_queues_rel(new_vport);
+free_vport:
+ kfree(new_vport);
+
+ return err;
+}
+
+/**
+ * idpf_addr_sync - Callback for dev_(mc|uc)_sync to add address
+ * @netdev: the netdevice
+ * @addr: address to add
+ *
+ * Called by __dev_(mc|uc)_sync when an address needs to be added. We call
+ * __dev_(uc|mc)_sync from .set_rx_mode. Kernel takes addr_list_lock spinlock
+ * meaning we cannot sleep in this context. Due to this, we have to add the
+ * filter and send the virtchnl message asynchronously without waiting for the
+ * response from the other side. We won't know whether or not the operation
+ * actually succeeded until we get the message back. Returns 0 on success,
+ * negative on failure.
+ */
+static int idpf_addr_sync(struct net_device *netdev, const u8 *addr)
+{
+ struct idpf_netdev_priv *np = netdev_priv(netdev);
+
+ return idpf_add_mac_filter(np->vport, np, addr, true);
+}
+
+/**
+ * idpf_addr_unsync - Callback for dev_(mc|uc)_sync to remove address
+ * @netdev: the netdevice
+ * @addr: address to add
+ *
+ * Called by __dev_(mc|uc)_sync when an address needs to be added. We call
+ * __dev_(uc|mc)_sync from .set_rx_mode. Kernel takes addr_list_lock spinlock
+ * meaning we cannot sleep in this context. Due to this we have to delete the
+ * filter and send the virtchnl message asynchronously without waiting for the
+ * return from the other side. We won't know whether or not the operation
+ * actually succeeded until we get the message back. Returns 0 on success,
+ * negative on failure.
+ */
+static int idpf_addr_unsync(struct net_device *netdev, const u8 *addr)
+{
+ struct idpf_netdev_priv *np = netdev_priv(netdev);
+
+ /* Under some circumstances, we might receive a request to delete
+ * our own device address from our uc list. Because we store the
+ * device address in the VSI's MAC filter list, we need to ignore
+ * such requests and not delete our device address from this list.
+ */
+ if (ether_addr_equal(addr, netdev->dev_addr))
+ return 0;
+
+ idpf_del_mac_filter(np->vport, np, addr, true);
+
+ return 0;
+}
+
+/**
+ * idpf_set_rx_mode - NDO callback to set the netdev filters
+ * @netdev: network interface device structure
+ *
+ * Stack takes addr_list_lock spinlock before calling our .set_rx_mode. We
+ * cannot sleep in this context.
+ */
+static void idpf_set_rx_mode(struct net_device *netdev)
+{
+ struct idpf_netdev_priv *np = netdev_priv(netdev);
+ struct idpf_vport_user_config_data *config_data;
+ struct idpf_adapter *adapter;
+ bool changed = false;
+ struct device *dev;
+ int err;
+
+ adapter = np->adapter;
+ dev = &adapter->pdev->dev;
+
+ if (idpf_is_cap_ena(adapter, IDPF_OTHER_CAPS, VIRTCHNL2_CAP_MACFILTER)) {
+ __dev_uc_sync(netdev, idpf_addr_sync, idpf_addr_unsync);
+ __dev_mc_sync(netdev, idpf_addr_sync, idpf_addr_unsync);
+ }
+
+ if (!idpf_is_cap_ena(adapter, IDPF_OTHER_CAPS, VIRTCHNL2_CAP_PROMISC))
+ return;
+
+ config_data = &adapter->vport_config[np->vport_idx]->user_config;
+ /* IFF_PROMISC enables both unicast and multicast promiscuous,
+ * while IFF_ALLMULTI only enables multicast such that:
+ *
+ * promisc + allmulti = unicast | multicast
+ * promisc + !allmulti = unicast | multicast
+ * !promisc + allmulti = multicast
+ */
+ if ((netdev->flags & IFF_PROMISC) &&
+ !test_and_set_bit(__IDPF_PROMISC_UC, config_data->user_flags)) {
+ changed = true;
+ dev_info(&adapter->pdev->dev, "Entering promiscuous mode\n");
+ if (!test_and_set_bit(__IDPF_PROMISC_MC, adapter->flags))
+ dev_info(dev, "Entering multicast promiscuous mode\n");
+ }
+
+ if (!(netdev->flags & IFF_PROMISC) &&
+ test_and_clear_bit(__IDPF_PROMISC_UC, config_data->user_flags)) {
+ changed = true;
+ dev_info(dev, "Leaving promiscuous mode\n");
+ }
+
+ if (netdev->flags & IFF_ALLMULTI &&
+ !test_and_set_bit(__IDPF_PROMISC_MC, config_data->user_flags)) {
+ changed = true;
+ dev_info(dev, "Entering multicast promiscuous mode\n");
+ }
+
+ if (!(netdev->flags & (IFF_ALLMULTI | IFF_PROMISC)) &&
+ test_and_clear_bit(__IDPF_PROMISC_MC, config_data->user_flags)) {
+ changed = true;
+ dev_info(dev, "Leaving multicast promiscuous mode\n");
+ }
+
+ if (!changed)
+ return;
+
+ err = idpf_set_promiscuous(adapter, config_data, np->vport_id);
+ if (err)
+ dev_err(dev, "Failed to set promiscuous mode: %d\n", err);
+}
+
+/**
+ * idpf_vport_manage_rss_lut - disable/enable RSS
+ * @vport: the vport being changed
+ *
+ * In the event of disable request for RSS, this function will zero out RSS
+ * LUT, while in the event of enable request for RSS, it will reconfigure RSS
+ * LUT with the default LUT configuration.
+ */
+static int idpf_vport_manage_rss_lut(struct idpf_vport *vport)
+{
+ bool ena = idpf_is_feature_ena(vport, NETIF_F_RXHASH);
+ struct idpf_rss_data *rss_data;
+ u16 idx = vport->idx;
+ int lut_size;
+
+ rss_data = &vport->adapter->vport_config[idx]->user_config.rss_data;
+ lut_size = rss_data->rss_lut_size * sizeof(u32);
+
+ if (ena) {
+ /* This will contain the default or user configured LUT */
+ memcpy(rss_data->rss_lut, rss_data->cached_lut, lut_size);
+ } else {
+ /* Save a copy of the current LUT to be restored later if
+ * requested.
+ */
+ memcpy(rss_data->cached_lut, rss_data->rss_lut, lut_size);
+
+ /* Zero out the current LUT to disable */
+ memset(rss_data->rss_lut, 0, lut_size);
+ }
+
+ return idpf_config_rss(vport);
+}
+
+/**
+ * idpf_set_features - set the netdev feature flags
+ * @netdev: ptr to the netdev being adjusted
+ * @features: the feature set that the stack is suggesting
+ */
+static int idpf_set_features(struct net_device *netdev,
+ netdev_features_t features)
+{
+ netdev_features_t changed = netdev->features ^ features;
+ struct idpf_adapter *adapter;
+ struct idpf_vport *vport;
+ int err = 0;
+
+ idpf_vport_ctrl_lock(netdev);
+ vport = idpf_netdev_to_vport(netdev);
+
+ adapter = vport->adapter;
+
+ if (idpf_is_reset_in_prog(adapter)) {
+ dev_err(&adapter->pdev->dev, "Device is resetting, changing netdev features temporarily unavailable.\n");
+ err = -EBUSY;
+ goto unlock_mutex;
+ }
+
+ if (changed & NETIF_F_RXHASH) {
+ netdev->features ^= NETIF_F_RXHASH;
+ err = idpf_vport_manage_rss_lut(vport);
+ if (err)
+ goto unlock_mutex;
+ }
+
+ if (changed & NETIF_F_GRO_HW) {
+ netdev->features ^= NETIF_F_GRO_HW;
+ err = idpf_initiate_soft_reset(vport, IDPF_SR_RSC_CHANGE);
+ if (err)
+ goto unlock_mutex;
+ }
+
+ if (changed & NETIF_F_LOOPBACK) {
+ netdev->features ^= NETIF_F_LOOPBACK;
+ err = idpf_send_ena_dis_loopback_msg(vport);
+ }
+
+unlock_mutex:
+ idpf_vport_ctrl_unlock(netdev);
+
+ return err;
+}
+
+/**
+ * idpf_open - Called when a network interface becomes active
+ * @netdev: network interface device structure
+ *
+ * The open entry point is called when a network interface is made
+ * active by the system (IFF_UP). At this point all resources needed
+ * for transmit and receive operations are allocated, the interrupt
+ * handler is registered with the OS, the netdev watchdog is enabled,
+ * and the stack is notified that the interface is ready.
+ *
+ * Returns 0 on success, negative value on failure
+ */
+static int idpf_open(struct net_device *netdev)
+{
+ struct idpf_vport *vport;
+ int err;
+
+ idpf_vport_ctrl_lock(netdev);
+ vport = idpf_netdev_to_vport(netdev);
+
+ err = idpf_vport_open(vport, true);
+
+ idpf_vport_ctrl_unlock(netdev);
+
+ return err;
+}
+
+/**
+ * idpf_change_mtu - NDO callback to change the MTU
+ * @netdev: network interface device structure
+ * @new_mtu: new value for maximum frame size
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int idpf_change_mtu(struct net_device *netdev, int new_mtu)
+{
+ struct idpf_vport *vport;
+ int err;
+
+ idpf_vport_ctrl_lock(netdev);
+ vport = idpf_netdev_to_vport(netdev);
+
+ netdev->mtu = new_mtu;
+
+ err = idpf_initiate_soft_reset(vport, IDPF_SR_MTU_CHANGE);
+
+ idpf_vport_ctrl_unlock(netdev);
+
+ return err;
+}
+
+/**
+ * idpf_features_check - Validate packet conforms to limits
+ * @skb: skb buffer
+ * @netdev: This port's netdev
+ * @features: Offload features that the stack believes apply
+ */
+static netdev_features_t idpf_features_check(struct sk_buff *skb,
+ struct net_device *netdev,
+ netdev_features_t features)
+{
+ struct idpf_vport *vport = idpf_netdev_to_vport(netdev);
+ struct idpf_adapter *adapter = vport->adapter;
+ size_t len;
+
+ /* No point in doing any of this if neither checksum nor GSO are
+ * being requested for this frame. We can rule out both by just
+ * checking for CHECKSUM_PARTIAL
+ */
+ if (skb->ip_summed != CHECKSUM_PARTIAL)
+ return features;
+
+ /* We cannot support GSO if the MSS is going to be less than
+ * 88 bytes. If it is then we need to drop support for GSO.
+ */
+ if (skb_is_gso(skb) &&
+ (skb_shinfo(skb)->gso_size < IDPF_TX_TSO_MIN_MSS))
+ features &= ~NETIF_F_GSO_MASK;
+
+ /* Ensure MACLEN is <= 126 bytes (63 words) and not an odd size */
+ len = skb_network_offset(skb);
+ if (unlikely(len & ~(126)))
+ goto unsupported;
+
+ len = skb_network_header_len(skb);
+ if (unlikely(len > idpf_get_max_tx_hdr_size(adapter)))
+ goto unsupported;
+
+ if (!skb->encapsulation)
+ return features;
+
+ /* L4TUNLEN can support 127 words */
+ len = skb_inner_network_header(skb) - skb_transport_header(skb);
+ if (unlikely(len & ~(127 * 2)))
+ goto unsupported;
+
+ /* IPLEN can support at most 127 dwords */
+ len = skb_inner_network_header_len(skb);
+ if (unlikely(len > idpf_get_max_tx_hdr_size(adapter)))
+ goto unsupported;
+
+ /* No need to validate L4LEN as TCP is the only protocol with a
+ * a flexible value and we support all possible values supported
+ * by TCP, which is at most 15 dwords
+ */
+
+ return features;
+
+unsupported:
+ return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
+}
+
+/**
+ * idpf_set_mac - NDO callback to set port mac address
+ * @netdev: network interface device structure
+ * @p: pointer to an address structure
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int idpf_set_mac(struct net_device *netdev, void *p)
+{
+ struct idpf_netdev_priv *np = netdev_priv(netdev);
+ struct idpf_vport_config *vport_config;
+ struct sockaddr *addr = p;
+ struct idpf_vport *vport;
+ int err = 0;
+
+ idpf_vport_ctrl_lock(netdev);
+ vport = idpf_netdev_to_vport(netdev);
+
+ if (!idpf_is_cap_ena(vport->adapter, IDPF_OTHER_CAPS,
+ VIRTCHNL2_CAP_MACFILTER)) {
+ dev_info(&vport->adapter->pdev->dev, "Setting MAC address is not supported\n");
+ err = -EOPNOTSUPP;
+ goto unlock_mutex;
+ }
+
+ if (!is_valid_ether_addr(addr->sa_data)) {
+ dev_info(&vport->adapter->pdev->dev, "Invalid MAC address: %pM\n",
+ addr->sa_data);
+ err = -EADDRNOTAVAIL;
+ goto unlock_mutex;
+ }
+
+ if (ether_addr_equal(netdev->dev_addr, addr->sa_data))
+ goto unlock_mutex;
+
+ vport_config = vport->adapter->vport_config[vport->idx];
+ err = idpf_add_mac_filter(vport, np, addr->sa_data, false);
+ if (err) {
+ __idpf_del_mac_filter(vport_config, addr->sa_data);
+ goto unlock_mutex;
+ }
+
+ if (is_valid_ether_addr(vport->default_mac_addr))
+ idpf_del_mac_filter(vport, np, vport->default_mac_addr, false);
+
+ ether_addr_copy(vport->default_mac_addr, addr->sa_data);
+ eth_hw_addr_set(netdev, addr->sa_data);
+
+unlock_mutex:
+ idpf_vport_ctrl_unlock(netdev);
+
+ return err;
+}
+
+/**
+ * idpf_alloc_dma_mem - Allocate dma memory
+ * @hw: pointer to hw struct
+ * @mem: pointer to dma_mem struct
+ * @size: size of the memory to allocate
+ */
+void *idpf_alloc_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem, u64 size)
+{
+ struct idpf_adapter *adapter = hw->back;
+ size_t sz = ALIGN(size, 4096);
+
+ mem->va = dma_alloc_coherent(&adapter->pdev->dev, sz,
+ &mem->pa, GFP_KERNEL);
+ mem->size = sz;
+
+ return mem->va;
+}
+
+/**
+ * idpf_free_dma_mem - Free the allocated dma memory
+ * @hw: pointer to hw struct
+ * @mem: pointer to dma_mem struct
+ */
+void idpf_free_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem)
+{
+ struct idpf_adapter *adapter = hw->back;
+
+ dma_free_coherent(&adapter->pdev->dev, mem->size,
+ mem->va, mem->pa);
+ mem->size = 0;
+ mem->va = NULL;
+ mem->pa = 0;
+}
+
+static const struct net_device_ops idpf_netdev_ops_splitq = {
+ .ndo_open = idpf_open,
+ .ndo_stop = idpf_stop,
+ .ndo_start_xmit = idpf_tx_splitq_start,
+ .ndo_features_check = idpf_features_check,
+ .ndo_set_rx_mode = idpf_set_rx_mode,
+ .ndo_validate_addr = eth_validate_addr,
+ .ndo_set_mac_address = idpf_set_mac,
+ .ndo_change_mtu = idpf_change_mtu,
+ .ndo_get_stats64 = idpf_get_stats64,
+ .ndo_set_features = idpf_set_features,
+ .ndo_tx_timeout = idpf_tx_timeout,
+};
+
+static const struct net_device_ops idpf_netdev_ops_singleq = {
+ .ndo_open = idpf_open,
+ .ndo_stop = idpf_stop,
+ .ndo_start_xmit = idpf_tx_singleq_start,
+ .ndo_features_check = idpf_features_check,
+ .ndo_set_rx_mode = idpf_set_rx_mode,
+ .ndo_validate_addr = eth_validate_addr,
+ .ndo_set_mac_address = idpf_set_mac,
+ .ndo_change_mtu = idpf_change_mtu,
+ .ndo_get_stats64 = idpf_get_stats64,
+ .ndo_set_features = idpf_set_features,
+ .ndo_tx_timeout = idpf_tx_timeout,
+};
diff --git a/drivers/net/ethernet/intel/idpf/idpf_main.c b/drivers/net/ethernet/intel/idpf/idpf_main.c
new file mode 100644
index 000000000000..e1febc74cefd
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/idpf_main.c
@@ -0,0 +1,279 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2023 Intel Corporation */
+
+#include "idpf.h"
+#include "idpf_devids.h"
+
+#define DRV_SUMMARY "Intel(R) Infrastructure Data Path Function Linux Driver"
+
+MODULE_DESCRIPTION(DRV_SUMMARY);
+MODULE_LICENSE("GPL");
+
+/**
+ * idpf_remove - Device removal routine
+ * @pdev: PCI device information struct
+ */
+static void idpf_remove(struct pci_dev *pdev)
+{
+ struct idpf_adapter *adapter = pci_get_drvdata(pdev);
+ int i;
+
+ set_bit(IDPF_REMOVE_IN_PROG, adapter->flags);
+
+ /* Wait until vc_event_task is done to consider if any hard reset is
+ * in progress else we may go ahead and release the resources but the
+ * thread doing the hard reset might continue the init path and
+ * end up in bad state.
+ */
+ cancel_delayed_work_sync(&adapter->vc_event_task);
+ if (adapter->num_vfs)
+ idpf_sriov_configure(pdev, 0);
+
+ idpf_vc_core_deinit(adapter);
+ /* Be a good citizen and leave the device clean on exit */
+ adapter->dev_ops.reg_ops.trigger_reset(adapter, IDPF_HR_FUNC_RESET);
+ idpf_deinit_dflt_mbx(adapter);
+
+ if (!adapter->netdevs)
+ goto destroy_wqs;
+
+ /* There are some cases where it's possible to still have netdevs
+ * registered with the stack at this point, e.g. if the driver detected
+ * a HW reset and rmmod is called before it fully recovers. Unregister
+ * any stale netdevs here.
+ */
+ for (i = 0; i < adapter->max_vports; i++) {
+ if (!adapter->netdevs[i])
+ continue;
+ if (adapter->netdevs[i]->reg_state != NETREG_UNINITIALIZED)
+ unregister_netdev(adapter->netdevs[i]);
+ free_netdev(adapter->netdevs[i]);
+ adapter->netdevs[i] = NULL;
+ }
+
+destroy_wqs:
+ destroy_workqueue(adapter->init_wq);
+ destroy_workqueue(adapter->serv_wq);
+ destroy_workqueue(adapter->mbx_wq);
+ destroy_workqueue(adapter->stats_wq);
+ destroy_workqueue(adapter->vc_event_wq);
+
+ for (i = 0; i < adapter->max_vports; i++) {
+ kfree(adapter->vport_config[i]);
+ adapter->vport_config[i] = NULL;
+ }
+ kfree(adapter->vport_config);
+ adapter->vport_config = NULL;
+ kfree(adapter->netdevs);
+ adapter->netdevs = NULL;
+
+ mutex_destroy(&adapter->vport_ctrl_lock);
+ mutex_destroy(&adapter->vector_lock);
+ mutex_destroy(&adapter->queue_lock);
+ mutex_destroy(&adapter->vc_buf_lock);
+
+ pci_set_drvdata(pdev, NULL);
+ kfree(adapter);
+}
+
+/**
+ * idpf_shutdown - PCI callback for shutting down device
+ * @pdev: PCI device information struct
+ */
+static void idpf_shutdown(struct pci_dev *pdev)
+{
+ idpf_remove(pdev);
+
+ if (system_state == SYSTEM_POWER_OFF)
+ pci_set_power_state(pdev, PCI_D3hot);
+}
+
+/**
+ * idpf_cfg_hw - Initialize HW struct
+ * @adapter: adapter to setup hw struct for
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int idpf_cfg_hw(struct idpf_adapter *adapter)
+{
+ struct pci_dev *pdev = adapter->pdev;
+ struct idpf_hw *hw = &adapter->hw;
+
+ hw->hw_addr = pcim_iomap_table(pdev)[0];
+ if (!hw->hw_addr) {
+ pci_err(pdev, "failed to allocate PCI iomap table\n");
+
+ return -ENOMEM;
+ }
+
+ hw->back = adapter;
+
+ return 0;
+}
+
+/**
+ * idpf_probe - Device initialization routine
+ * @pdev: PCI device information struct
+ * @ent: entry in idpf_pci_tbl
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int idpf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+ struct device *dev = &pdev->dev;
+ struct idpf_adapter *adapter;
+ int err;
+
+ adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
+ if (!adapter)
+ return -ENOMEM;
+
+ adapter->req_tx_splitq = true;
+ adapter->req_rx_splitq = true;
+
+ switch (ent->device) {
+ case IDPF_DEV_ID_PF:
+ idpf_dev_ops_init(adapter);
+ break;
+ case IDPF_DEV_ID_VF:
+ idpf_vf_dev_ops_init(adapter);
+ adapter->crc_enable = true;
+ break;
+ default:
+ err = -ENODEV;
+ dev_err(&pdev->dev, "Unexpected dev ID 0x%x in idpf probe\n",
+ ent->device);
+ goto err_free;
+ }
+
+ adapter->pdev = pdev;
+ err = pcim_enable_device(pdev);
+ if (err)
+ goto err_free;
+
+ err = pcim_iomap_regions(pdev, BIT(0), pci_name(pdev));
+ if (err) {
+ pci_err(pdev, "pcim_iomap_regions failed %pe\n", ERR_PTR(err));
+
+ goto err_free;
+ }
+
+ /* set up for high or low dma */
+ err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
+ if (err) {
+ pci_err(pdev, "DMA configuration failed: %pe\n", ERR_PTR(err));
+
+ goto err_free;
+ }
+
+ pci_set_master(pdev);
+ pci_set_drvdata(pdev, adapter);
+
+ adapter->init_wq = alloc_workqueue("%s-%s-init", 0, 0,
+ dev_driver_string(dev),
+ dev_name(dev));
+ if (!adapter->init_wq) {
+ dev_err(dev, "Failed to allocate init workqueue\n");
+ err = -ENOMEM;
+ goto err_free;
+ }
+
+ adapter->serv_wq = alloc_workqueue("%s-%s-service", 0, 0,
+ dev_driver_string(dev),
+ dev_name(dev));
+ if (!adapter->serv_wq) {
+ dev_err(dev, "Failed to allocate service workqueue\n");
+ err = -ENOMEM;
+ goto err_serv_wq_alloc;
+ }
+
+ adapter->mbx_wq = alloc_workqueue("%s-%s-mbx", 0, 0,
+ dev_driver_string(dev),
+ dev_name(dev));
+ if (!adapter->mbx_wq) {
+ dev_err(dev, "Failed to allocate mailbox workqueue\n");
+ err = -ENOMEM;
+ goto err_mbx_wq_alloc;
+ }
+
+ adapter->stats_wq = alloc_workqueue("%s-%s-stats", 0, 0,
+ dev_driver_string(dev),
+ dev_name(dev));
+ if (!adapter->stats_wq) {
+ dev_err(dev, "Failed to allocate workqueue\n");
+ err = -ENOMEM;
+ goto err_stats_wq_alloc;
+ }
+
+ adapter->vc_event_wq = alloc_workqueue("%s-%s-vc_event", 0, 0,
+ dev_driver_string(dev),
+ dev_name(dev));
+ if (!adapter->vc_event_wq) {
+ dev_err(dev, "Failed to allocate virtchnl event workqueue\n");
+ err = -ENOMEM;
+ goto err_vc_event_wq_alloc;
+ }
+
+ /* setup msglvl */
+ adapter->msg_enable = netif_msg_init(-1, IDPF_AVAIL_NETIF_M);
+
+ err = idpf_cfg_hw(adapter);
+ if (err) {
+ dev_err(dev, "Failed to configure HW structure for adapter: %d\n",
+ err);
+ goto err_cfg_hw;
+ }
+
+ mutex_init(&adapter->vport_ctrl_lock);
+ mutex_init(&adapter->vector_lock);
+ mutex_init(&adapter->queue_lock);
+ mutex_init(&adapter->vc_buf_lock);
+
+ init_waitqueue_head(&adapter->vchnl_wq);
+
+ INIT_DELAYED_WORK(&adapter->init_task, idpf_init_task);
+ INIT_DELAYED_WORK(&adapter->serv_task, idpf_service_task);
+ INIT_DELAYED_WORK(&adapter->mbx_task, idpf_mbx_task);
+ INIT_DELAYED_WORK(&adapter->stats_task, idpf_statistics_task);
+ INIT_DELAYED_WORK(&adapter->vc_event_task, idpf_vc_event_task);
+
+ adapter->dev_ops.reg_ops.reset_reg_init(adapter);
+ set_bit(IDPF_HR_DRV_LOAD, adapter->flags);
+ queue_delayed_work(adapter->vc_event_wq, &adapter->vc_event_task,
+ msecs_to_jiffies(10 * (pdev->devfn & 0x07)));
+
+ return 0;
+
+err_cfg_hw:
+ destroy_workqueue(adapter->vc_event_wq);
+err_vc_event_wq_alloc:
+ destroy_workqueue(adapter->stats_wq);
+err_stats_wq_alloc:
+ destroy_workqueue(adapter->mbx_wq);
+err_mbx_wq_alloc:
+ destroy_workqueue(adapter->serv_wq);
+err_serv_wq_alloc:
+ destroy_workqueue(adapter->init_wq);
+err_free:
+ kfree(adapter);
+ return err;
+}
+
+/* idpf_pci_tbl - PCI Dev idpf ID Table
+ */
+static const struct pci_device_id idpf_pci_tbl[] = {
+ { PCI_VDEVICE(INTEL, IDPF_DEV_ID_PF)},
+ { PCI_VDEVICE(INTEL, IDPF_DEV_ID_VF)},
+ { /* Sentinel */ }
+};
+MODULE_DEVICE_TABLE(pci, idpf_pci_tbl);
+
+static struct pci_driver idpf_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = idpf_pci_tbl,
+ .probe = idpf_probe,
+ .sriov_configure = idpf_sriov_configure,
+ .remove = idpf_remove,
+ .shutdown = idpf_shutdown,
+};
+module_pci_driver(idpf_driver);
diff --git a/drivers/net/ethernet/intel/idpf/idpf_mem.h b/drivers/net/ethernet/intel/idpf/idpf_mem.h
new file mode 100644
index 000000000000..b21a04fccf0f
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/idpf_mem.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2023 Intel Corporation */
+
+#ifndef _IDPF_MEM_H_
+#define _IDPF_MEM_H_
+
+#include <linux/io.h>
+
+struct idpf_dma_mem {
+ void *va;
+ dma_addr_t pa;
+ size_t size;
+};
+
+#define wr32(a, reg, value) writel((value), ((a)->hw_addr + (reg)))
+#define rd32(a, reg) readl((a)->hw_addr + (reg))
+#define wr64(a, reg, value) writeq((value), ((a)->hw_addr + (reg)))
+#define rd64(a, reg) readq((a)->hw_addr + (reg))
+
+#endif /* _IDPF_MEM_H_ */
diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
new file mode 100644
index 000000000000..81288a17da2a
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
@@ -0,0 +1,1183 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2023 Intel Corporation */
+
+#include "idpf.h"
+
+/**
+ * idpf_tx_singleq_csum - Enable tx checksum offloads
+ * @skb: pointer to skb
+ * @off: pointer to struct that holds offload parameters
+ *
+ * Returns 0 or error (negative) if checksum offload cannot be executed, 1
+ * otherwise.
+ */
+static int idpf_tx_singleq_csum(struct sk_buff *skb,
+ struct idpf_tx_offload_params *off)
+{
+ u32 l4_len, l3_len, l2_len;
+ union {
+ struct iphdr *v4;
+ struct ipv6hdr *v6;
+ unsigned char *hdr;
+ } ip;
+ union {
+ struct tcphdr *tcp;
+ unsigned char *hdr;
+ } l4;
+ u32 offset, cmd = 0;
+ u8 l4_proto = 0;
+ __be16 frag_off;
+ bool is_tso;
+
+ if (skb->ip_summed != CHECKSUM_PARTIAL)
+ return 0;
+
+ ip.hdr = skb_network_header(skb);
+ l4.hdr = skb_transport_header(skb);
+
+ /* compute outer L2 header size */
+ l2_len = ip.hdr - skb->data;
+ offset = FIELD_PREP(0x3F << IDPF_TX_DESC_LEN_MACLEN_S, l2_len / 2);
+ is_tso = !!(off->tx_flags & IDPF_TX_FLAGS_TSO);
+ if (skb->encapsulation) {
+ u32 tunnel = 0;
+
+ /* define outer network header type */
+ if (off->tx_flags & IDPF_TX_FLAGS_IPV4) {
+ /* The stack computes the IP header already, the only
+ * time we need the hardware to recompute it is in the
+ * case of TSO.
+ */
+ tunnel |= is_tso ?
+ IDPF_TX_CTX_EXT_IP_IPV4 :
+ IDPF_TX_CTX_EXT_IP_IPV4_NO_CSUM;
+
+ l4_proto = ip.v4->protocol;
+ } else if (off->tx_flags & IDPF_TX_FLAGS_IPV6) {
+ tunnel |= IDPF_TX_CTX_EXT_IP_IPV6;
+
+ l4_proto = ip.v6->nexthdr;
+ if (ipv6_ext_hdr(l4_proto))
+ ipv6_skip_exthdr(skb, skb_network_offset(skb) +
+ sizeof(*ip.v6),
+ &l4_proto, &frag_off);
+ }
+
+ /* define outer transport */
+ switch (l4_proto) {
+ case IPPROTO_UDP:
+ tunnel |= IDPF_TXD_CTX_UDP_TUNNELING;
+ break;
+ case IPPROTO_GRE:
+ tunnel |= IDPF_TXD_CTX_GRE_TUNNELING;
+ break;
+ case IPPROTO_IPIP:
+ case IPPROTO_IPV6:
+ l4.hdr = skb_inner_network_header(skb);
+ break;
+ default:
+ if (is_tso)
+ return -1;
+
+ skb_checksum_help(skb);
+
+ return 0;
+ }
+ off->tx_flags |= IDPF_TX_FLAGS_TUNNEL;
+
+ /* compute outer L3 header size */
+ tunnel |= FIELD_PREP(IDPF_TXD_CTX_QW0_TUNN_EXT_IPLEN_M,
+ (l4.hdr - ip.hdr) / 4);
+
+ /* switch IP header pointer from outer to inner header */
+ ip.hdr = skb_inner_network_header(skb);
+
+ /* compute tunnel header size */
+ tunnel |= FIELD_PREP(IDPF_TXD_CTX_QW0_TUNN_NATLEN_M,
+ (ip.hdr - l4.hdr) / 2);
+
+ /* indicate if we need to offload outer UDP header */
+ if (is_tso &&
+ !(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) &&
+ (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM))
+ tunnel |= IDPF_TXD_CTX_QW0_TUNN_L4T_CS_M;
+
+ /* record tunnel offload values */
+ off->cd_tunneling |= tunnel;
+
+ /* switch L4 header pointer from outer to inner */
+ l4.hdr = skb_inner_transport_header(skb);
+ l4_proto = 0;
+
+ /* reset type as we transition from outer to inner headers */
+ off->tx_flags &= ~(IDPF_TX_FLAGS_IPV4 | IDPF_TX_FLAGS_IPV6);
+ if (ip.v4->version == 4)
+ off->tx_flags |= IDPF_TX_FLAGS_IPV4;
+ if (ip.v6->version == 6)
+ off->tx_flags |= IDPF_TX_FLAGS_IPV6;
+ }
+
+ /* Enable IP checksum offloads */
+ if (off->tx_flags & IDPF_TX_FLAGS_IPV4) {
+ l4_proto = ip.v4->protocol;
+ /* See comment above regarding need for HW to recompute IP
+ * header checksum in the case of TSO.
+ */
+ if (is_tso)
+ cmd |= IDPF_TX_DESC_CMD_IIPT_IPV4_CSUM;
+ else
+ cmd |= IDPF_TX_DESC_CMD_IIPT_IPV4;
+
+ } else if (off->tx_flags & IDPF_TX_FLAGS_IPV6) {
+ cmd |= IDPF_TX_DESC_CMD_IIPT_IPV6;
+ l4_proto = ip.v6->nexthdr;
+ if (ipv6_ext_hdr(l4_proto))
+ ipv6_skip_exthdr(skb, skb_network_offset(skb) +
+ sizeof(*ip.v6), &l4_proto,
+ &frag_off);
+ } else {
+ return -1;
+ }
+
+ /* compute inner L3 header size */
+ l3_len = l4.hdr - ip.hdr;
+ offset |= (l3_len / 4) << IDPF_TX_DESC_LEN_IPLEN_S;
+
+ /* Enable L4 checksum offloads */
+ switch (l4_proto) {
+ case IPPROTO_TCP:
+ /* enable checksum offloads */
+ cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_TCP;
+ l4_len = l4.tcp->doff;
+ break;
+ case IPPROTO_UDP:
+ /* enable UDP checksum offload */
+ cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_UDP;
+ l4_len = sizeof(struct udphdr) >> 2;
+ break;
+ case IPPROTO_SCTP:
+ /* enable SCTP checksum offload */
+ cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_SCTP;
+ l4_len = sizeof(struct sctphdr) >> 2;
+ break;
+ default:
+ if (is_tso)
+ return -1;
+
+ skb_checksum_help(skb);
+
+ return 0;
+ }
+
+ offset |= l4_len << IDPF_TX_DESC_LEN_L4_LEN_S;
+ off->td_cmd |= cmd;
+ off->hdr_offsets |= offset;
+
+ return 1;
+}
+
+/**
+ * idpf_tx_singleq_map - Build the Tx base descriptor
+ * @tx_q: queue to send buffer on
+ * @first: first buffer info buffer to use
+ * @offloads: pointer to struct that holds offload parameters
+ *
+ * This function loops over the skb data pointed to by *first
+ * and gets a physical address for each memory location and programs
+ * it and the length into the transmit base mode descriptor.
+ */
+static void idpf_tx_singleq_map(struct idpf_queue *tx_q,
+ struct idpf_tx_buf *first,
+ struct idpf_tx_offload_params *offloads)
+{
+ u32 offsets = offloads->hdr_offsets;
+ struct idpf_tx_buf *tx_buf = first;
+ struct idpf_base_tx_desc *tx_desc;
+ struct sk_buff *skb = first->skb;
+ u64 td_cmd = offloads->td_cmd;
+ unsigned int data_len, size;
+ u16 i = tx_q->next_to_use;
+ struct netdev_queue *nq;
+ skb_frag_t *frag;
+ dma_addr_t dma;
+ u64 td_tag = 0;
+
+ data_len = skb->data_len;
+ size = skb_headlen(skb);
+
+ tx_desc = IDPF_BASE_TX_DESC(tx_q, i);
+
+ dma = dma_map_single(tx_q->dev, skb->data, size, DMA_TO_DEVICE);
+
+ /* write each descriptor with CRC bit */
+ if (tx_q->vport->crc_enable)
+ td_cmd |= IDPF_TX_DESC_CMD_ICRC;
+
+ for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
+ unsigned int max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED;
+
+ if (dma_mapping_error(tx_q->dev, dma))
+ return idpf_tx_dma_map_error(tx_q, skb, first, i);
+
+ /* record length, and DMA address */
+ dma_unmap_len_set(tx_buf, len, size);
+ dma_unmap_addr_set(tx_buf, dma, dma);
+
+ /* align size to end of page */
+ max_data += -dma & (IDPF_TX_MAX_READ_REQ_SIZE - 1);
+ tx_desc->buf_addr = cpu_to_le64(dma);
+
+ /* account for data chunks larger than the hardware
+ * can handle
+ */
+ while (unlikely(size > IDPF_TX_MAX_DESC_DATA)) {
+ tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd,
+ offsets,
+ max_data,
+ td_tag);
+ tx_desc++;
+ i++;
+
+ if (i == tx_q->desc_count) {
+ tx_desc = IDPF_BASE_TX_DESC(tx_q, 0);
+ i = 0;
+ }
+
+ dma += max_data;
+ size -= max_data;
+
+ max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED;
+ tx_desc->buf_addr = cpu_to_le64(dma);
+ }
+
+ if (!data_len)
+ break;
+
+ tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, offsets,
+ size, td_tag);
+ tx_desc++;
+ i++;
+
+ if (i == tx_q->desc_count) {
+ tx_desc = IDPF_BASE_TX_DESC(tx_q, 0);
+ i = 0;
+ }
+
+ size = skb_frag_size(frag);
+ data_len -= size;
+
+ dma = skb_frag_dma_map(tx_q->dev, frag, 0, size,
+ DMA_TO_DEVICE);
+
+ tx_buf = &tx_q->tx_buf[i];
+ }
+
+ skb_tx_timestamp(first->skb);
+
+ /* write last descriptor with RS and EOP bits */
+ td_cmd |= (u64)(IDPF_TX_DESC_CMD_EOP | IDPF_TX_DESC_CMD_RS);
+
+ tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, offsets,
+ size, td_tag);
+
+ IDPF_SINGLEQ_BUMP_RING_IDX(tx_q, i);
+
+ /* set next_to_watch value indicating a packet is present */
+ first->next_to_watch = tx_desc;
+
+ nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx);
+ netdev_tx_sent_queue(nq, first->bytecount);
+
+ idpf_tx_buf_hw_update(tx_q, i, netdev_xmit_more());
+}
+
+/**
+ * idpf_tx_singleq_get_ctx_desc - grab next desc and update buffer ring
+ * @txq: queue to put context descriptor on
+ *
+ * Since the TX buffer rings mimics the descriptor ring, update the tx buffer
+ * ring entry to reflect that this index is a context descriptor
+ */
+static struct idpf_base_tx_ctx_desc *
+idpf_tx_singleq_get_ctx_desc(struct idpf_queue *txq)
+{
+ struct idpf_base_tx_ctx_desc *ctx_desc;
+ int ntu = txq->next_to_use;
+
+ memset(&txq->tx_buf[ntu], 0, sizeof(struct idpf_tx_buf));
+ txq->tx_buf[ntu].ctx_entry = true;
+
+ ctx_desc = IDPF_BASE_TX_CTX_DESC(txq, ntu);
+
+ IDPF_SINGLEQ_BUMP_RING_IDX(txq, ntu);
+ txq->next_to_use = ntu;
+
+ return ctx_desc;
+}
+
+/**
+ * idpf_tx_singleq_build_ctx_desc - populate context descriptor
+ * @txq: queue to send buffer on
+ * @offload: offload parameter structure
+ **/
+static void idpf_tx_singleq_build_ctx_desc(struct idpf_queue *txq,
+ struct idpf_tx_offload_params *offload)
+{
+ struct idpf_base_tx_ctx_desc *desc = idpf_tx_singleq_get_ctx_desc(txq);
+ u64 qw1 = (u64)IDPF_TX_DESC_DTYPE_CTX;
+
+ if (offload->tso_segs) {
+ qw1 |= IDPF_TX_CTX_DESC_TSO << IDPF_TXD_CTX_QW1_CMD_S;
+ qw1 |= ((u64)offload->tso_len << IDPF_TXD_CTX_QW1_TSO_LEN_S) &
+ IDPF_TXD_CTX_QW1_TSO_LEN_M;
+ qw1 |= ((u64)offload->mss << IDPF_TXD_CTX_QW1_MSS_S) &
+ IDPF_TXD_CTX_QW1_MSS_M;
+
+ u64_stats_update_begin(&txq->stats_sync);
+ u64_stats_inc(&txq->q_stats.tx.lso_pkts);
+ u64_stats_update_end(&txq->stats_sync);
+ }
+
+ desc->qw0.tunneling_params = cpu_to_le32(offload->cd_tunneling);
+
+ desc->qw0.l2tag2 = 0;
+ desc->qw0.rsvd1 = 0;
+ desc->qw1 = cpu_to_le64(qw1);
+}
+
+/**
+ * idpf_tx_singleq_frame - Sends buffer on Tx ring using base descriptors
+ * @skb: send buffer
+ * @tx_q: queue to send buffer on
+ *
+ * Returns NETDEV_TX_OK if sent, else an error code
+ */
+static netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb,
+ struct idpf_queue *tx_q)
+{
+ struct idpf_tx_offload_params offload = { };
+ struct idpf_tx_buf *first;
+ unsigned int count;
+ __be16 protocol;
+ int csum, tso;
+
+ count = idpf_tx_desc_count_required(tx_q, skb);
+ if (unlikely(!count))
+ return idpf_tx_drop_skb(tx_q, skb);
+
+ if (idpf_tx_maybe_stop_common(tx_q,
+ count + IDPF_TX_DESCS_PER_CACHE_LINE +
+ IDPF_TX_DESCS_FOR_CTX)) {
+ idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false);
+
+ return NETDEV_TX_BUSY;
+ }
+
+ protocol = vlan_get_protocol(skb);
+ if (protocol == htons(ETH_P_IP))
+ offload.tx_flags |= IDPF_TX_FLAGS_IPV4;
+ else if (protocol == htons(ETH_P_IPV6))
+ offload.tx_flags |= IDPF_TX_FLAGS_IPV6;
+
+ tso = idpf_tso(skb, &offload);
+ if (tso < 0)
+ goto out_drop;
+
+ csum = idpf_tx_singleq_csum(skb, &offload);
+ if (csum < 0)
+ goto out_drop;
+
+ if (tso || offload.cd_tunneling)
+ idpf_tx_singleq_build_ctx_desc(tx_q, &offload);
+
+ /* record the location of the first descriptor for this packet */
+ first = &tx_q->tx_buf[tx_q->next_to_use];
+ first->skb = skb;
+
+ if (tso) {
+ first->gso_segs = offload.tso_segs;
+ first->bytecount = skb->len + ((first->gso_segs - 1) * offload.tso_hdr_len);
+ } else {
+ first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN);
+ first->gso_segs = 1;
+ }
+ idpf_tx_singleq_map(tx_q, first, &offload);
+
+ return NETDEV_TX_OK;
+
+out_drop:
+ return idpf_tx_drop_skb(tx_q, skb);
+}
+
+/**
+ * idpf_tx_singleq_start - Selects the right Tx queue to send buffer
+ * @skb: send buffer
+ * @netdev: network interface device structure
+ *
+ * Returns NETDEV_TX_OK if sent, else an error code
+ */
+netdev_tx_t idpf_tx_singleq_start(struct sk_buff *skb,
+ struct net_device *netdev)
+{
+ struct idpf_vport *vport = idpf_netdev_to_vport(netdev);
+ struct idpf_queue *tx_q;
+
+ tx_q = vport->txqs[skb_get_queue_mapping(skb)];
+
+ /* hardware can't handle really short frames, hardware padding works
+ * beyond this point
+ */
+ if (skb_put_padto(skb, IDPF_TX_MIN_PKT_LEN)) {
+ idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false);
+
+ return NETDEV_TX_OK;
+ }
+
+ return idpf_tx_singleq_frame(skb, tx_q);
+}
+
+/**
+ * idpf_tx_singleq_clean - Reclaim resources from queue
+ * @tx_q: Tx queue to clean
+ * @napi_budget: Used to determine if we are in netpoll
+ * @cleaned: returns number of packets cleaned
+ *
+ */
+static bool idpf_tx_singleq_clean(struct idpf_queue *tx_q, int napi_budget,
+ int *cleaned)
+{
+ unsigned int budget = tx_q->vport->compln_clean_budget;
+ unsigned int total_bytes = 0, total_pkts = 0;
+ struct idpf_base_tx_desc *tx_desc;
+ s16 ntc = tx_q->next_to_clean;
+ struct idpf_netdev_priv *np;
+ struct idpf_tx_buf *tx_buf;
+ struct idpf_vport *vport;
+ struct netdev_queue *nq;
+ bool dont_wake;
+
+ tx_desc = IDPF_BASE_TX_DESC(tx_q, ntc);
+ tx_buf = &tx_q->tx_buf[ntc];
+ ntc -= tx_q->desc_count;
+
+ do {
+ struct idpf_base_tx_desc *eop_desc;
+
+ /* If this entry in the ring was used as a context descriptor,
+ * it's corresponding entry in the buffer ring will indicate as
+ * such. We can skip this descriptor since there is no buffer
+ * to clean.
+ */
+ if (tx_buf->ctx_entry) {
+ /* Clear this flag here to avoid stale flag values when
+ * this buffer is used for actual data in the future.
+ * There are cases where the tx_buf struct / the flags
+ * field will not be cleared before being reused.
+ */
+ tx_buf->ctx_entry = false;
+ goto fetch_next_txq_desc;
+ }
+
+ /* if next_to_watch is not set then no work pending */
+ eop_desc = (struct idpf_base_tx_desc *)tx_buf->next_to_watch;
+ if (!eop_desc)
+ break;
+
+ /* prevent any other reads prior to eop_desc */
+ smp_rmb();
+
+ /* if the descriptor isn't done, no work yet to do */
+ if (!(eop_desc->qw1 &
+ cpu_to_le64(IDPF_TX_DESC_DTYPE_DESC_DONE)))
+ break;
+
+ /* clear next_to_watch to prevent false hangs */
+ tx_buf->next_to_watch = NULL;
+
+ /* update the statistics for this packet */
+ total_bytes += tx_buf->bytecount;
+ total_pkts += tx_buf->gso_segs;
+
+ napi_consume_skb(tx_buf->skb, napi_budget);
+
+ /* unmap skb header data */
+ dma_unmap_single(tx_q->dev,
+ dma_unmap_addr(tx_buf, dma),
+ dma_unmap_len(tx_buf, len),
+ DMA_TO_DEVICE);
+
+ /* clear tx_buf data */
+ tx_buf->skb = NULL;
+ dma_unmap_len_set(tx_buf, len, 0);
+
+ /* unmap remaining buffers */
+ while (tx_desc != eop_desc) {
+ tx_buf++;
+ tx_desc++;
+ ntc++;
+ if (unlikely(!ntc)) {
+ ntc -= tx_q->desc_count;
+ tx_buf = tx_q->tx_buf;
+ tx_desc = IDPF_BASE_TX_DESC(tx_q, 0);
+ }
+
+ /* unmap any remaining paged data */
+ if (dma_unmap_len(tx_buf, len)) {
+ dma_unmap_page(tx_q->dev,
+ dma_unmap_addr(tx_buf, dma),
+ dma_unmap_len(tx_buf, len),
+ DMA_TO_DEVICE);
+ dma_unmap_len_set(tx_buf, len, 0);
+ }
+ }
+
+ /* update budget only if we did something */
+ budget--;
+
+fetch_next_txq_desc:
+ tx_buf++;
+ tx_desc++;
+ ntc++;
+ if (unlikely(!ntc)) {
+ ntc -= tx_q->desc_count;
+ tx_buf = tx_q->tx_buf;
+ tx_desc = IDPF_BASE_TX_DESC(tx_q, 0);
+ }
+ } while (likely(budget));
+
+ ntc += tx_q->desc_count;
+ tx_q->next_to_clean = ntc;
+
+ *cleaned += total_pkts;
+
+ u64_stats_update_begin(&tx_q->stats_sync);
+ u64_stats_add(&tx_q->q_stats.tx.packets, total_pkts);
+ u64_stats_add(&tx_q->q_stats.tx.bytes, total_bytes);
+ u64_stats_update_end(&tx_q->stats_sync);
+
+ vport = tx_q->vport;
+ np = netdev_priv(vport->netdev);
+ nq = netdev_get_tx_queue(vport->netdev, tx_q->idx);
+
+ dont_wake = np->state != __IDPF_VPORT_UP ||
+ !netif_carrier_ok(vport->netdev);
+ __netif_txq_completed_wake(nq, total_pkts, total_bytes,
+ IDPF_DESC_UNUSED(tx_q), IDPF_TX_WAKE_THRESH,
+ dont_wake);
+
+ return !!budget;
+}
+
+/**
+ * idpf_tx_singleq_clean_all - Clean all Tx queues
+ * @q_vec: queue vector
+ * @budget: Used to determine if we are in netpoll
+ * @cleaned: returns number of packets cleaned
+ *
+ * Returns false if clean is not complete else returns true
+ */
+static bool idpf_tx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget,
+ int *cleaned)
+{
+ u16 num_txq = q_vec->num_txq;
+ bool clean_complete = true;
+ int i, budget_per_q;
+
+ budget_per_q = num_txq ? max(budget / num_txq, 1) : 0;
+ for (i = 0; i < num_txq; i++) {
+ struct idpf_queue *q;
+
+ q = q_vec->tx[i];
+ clean_complete &= idpf_tx_singleq_clean(q, budget_per_q,
+ cleaned);
+ }
+
+ return clean_complete;
+}
+
+/**
+ * idpf_rx_singleq_test_staterr - tests bits in Rx descriptor
+ * status and error fields
+ * @rx_desc: pointer to receive descriptor (in le64 format)
+ * @stat_err_bits: value to mask
+ *
+ * This function does some fast chicanery in order to return the
+ * value of the mask which is really only used for boolean tests.
+ * The status_error_ptype_len doesn't need to be shifted because it begins
+ * at offset zero.
+ */
+static bool idpf_rx_singleq_test_staterr(const union virtchnl2_rx_desc *rx_desc,
+ const u64 stat_err_bits)
+{
+ return !!(rx_desc->base_wb.qword1.status_error_ptype_len &
+ cpu_to_le64(stat_err_bits));
+}
+
+/**
+ * idpf_rx_singleq_is_non_eop - process handling of non-EOP buffers
+ * @rxq: Rx ring being processed
+ * @rx_desc: Rx descriptor for current buffer
+ * @skb: Current socket buffer containing buffer in progress
+ * @ntc: next to clean
+ */
+static bool idpf_rx_singleq_is_non_eop(struct idpf_queue *rxq,
+ union virtchnl2_rx_desc *rx_desc,
+ struct sk_buff *skb, u16 ntc)
+{
+ /* if we are the last buffer then there is nothing else to do */
+ if (likely(idpf_rx_singleq_test_staterr(rx_desc, IDPF_RXD_EOF_SINGLEQ)))
+ return false;
+
+ return true;
+}
+
+/**
+ * idpf_rx_singleq_csum - Indicate in skb if checksum is good
+ * @rxq: Rx ring being processed
+ * @skb: skb currently being received and modified
+ * @csum_bits: checksum bits from descriptor
+ * @ptype: the packet type decoded by hardware
+ *
+ * skb->protocol must be set before this function is called
+ */
+static void idpf_rx_singleq_csum(struct idpf_queue *rxq, struct sk_buff *skb,
+ struct idpf_rx_csum_decoded *csum_bits,
+ u16 ptype)
+{
+ struct idpf_rx_ptype_decoded decoded;
+ bool ipv4, ipv6;
+
+ /* check if Rx checksum is enabled */
+ if (unlikely(!(rxq->vport->netdev->features & NETIF_F_RXCSUM)))
+ return;
+
+ /* check if HW has decoded the packet and checksum */
+ if (unlikely(!(csum_bits->l3l4p)))
+ return;
+
+ decoded = rxq->vport->rx_ptype_lkup[ptype];
+ if (unlikely(!(decoded.known && decoded.outer_ip)))
+ return;
+
+ ipv4 = IDPF_RX_PTYPE_TO_IPV(&decoded, IDPF_RX_PTYPE_OUTER_IPV4);
+ ipv6 = IDPF_RX_PTYPE_TO_IPV(&decoded, IDPF_RX_PTYPE_OUTER_IPV6);
+
+ /* Check if there were any checksum errors */
+ if (unlikely(ipv4 && (csum_bits->ipe || csum_bits->eipe)))
+ goto checksum_fail;
+
+ /* Device could not do any checksum offload for certain extension
+ * headers as indicated by setting IPV6EXADD bit
+ */
+ if (unlikely(ipv6 && csum_bits->ipv6exadd))
+ return;
+
+ /* check for L4 errors and handle packets that were not able to be
+ * checksummed due to arrival speed
+ */
+ if (unlikely(csum_bits->l4e))
+ goto checksum_fail;
+
+ if (unlikely(csum_bits->nat && csum_bits->eudpe))
+ goto checksum_fail;
+
+ /* Handle packets that were not able to be checksummed due to arrival
+ * speed, in this case the stack can compute the csum.
+ */
+ if (unlikely(csum_bits->pprs))
+ return;
+
+ /* If there is an outer header present that might contain a checksum
+ * we need to bump the checksum level by 1 to reflect the fact that
+ * we are indicating we validated the inner checksum.
+ */
+ if (decoded.tunnel_type >= IDPF_RX_PTYPE_TUNNEL_IP_GRENAT)
+ skb->csum_level = 1;
+
+ /* Only report checksum unnecessary for ICMP, TCP, UDP, or SCTP */
+ switch (decoded.inner_prot) {
+ case IDPF_RX_PTYPE_INNER_PROT_ICMP:
+ case IDPF_RX_PTYPE_INNER_PROT_TCP:
+ case IDPF_RX_PTYPE_INNER_PROT_UDP:
+ case IDPF_RX_PTYPE_INNER_PROT_SCTP:
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ return;
+ default:
+ return;
+ }
+
+checksum_fail:
+ u64_stats_update_begin(&rxq->stats_sync);
+ u64_stats_inc(&rxq->q_stats.rx.hw_csum_err);
+ u64_stats_update_end(&rxq->stats_sync);
+}
+
+/**
+ * idpf_rx_singleq_base_csum - Indicate in skb if hw indicated a good cksum
+ * @rx_q: Rx completion queue
+ * @skb: skb currently being received and modified
+ * @rx_desc: the receive descriptor
+ * @ptype: Rx packet type
+ *
+ * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte
+ * descriptor writeback format.
+ **/
+static void idpf_rx_singleq_base_csum(struct idpf_queue *rx_q,
+ struct sk_buff *skb,
+ union virtchnl2_rx_desc *rx_desc,
+ u16 ptype)
+{
+ struct idpf_rx_csum_decoded csum_bits;
+ u32 rx_error, rx_status;
+ u64 qword;
+
+ qword = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len);
+
+ rx_status = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_STATUS_M, qword);
+ rx_error = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M, qword);
+
+ csum_bits.ipe = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_IPE_M, rx_error);
+ csum_bits.eipe = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_EIPE_M,
+ rx_error);
+ csum_bits.l4e = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_L4E_M, rx_error);
+ csum_bits.pprs = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_PPRS_M,
+ rx_error);
+ csum_bits.l3l4p = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_STATUS_L3L4P_M,
+ rx_status);
+ csum_bits.ipv6exadd = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_STATUS_IPV6EXADD_M,
+ rx_status);
+ csum_bits.nat = 0;
+ csum_bits.eudpe = 0;
+
+ idpf_rx_singleq_csum(rx_q, skb, &csum_bits, ptype);
+}
+
+/**
+ * idpf_rx_singleq_flex_csum - Indicate in skb if hw indicated a good cksum
+ * @rx_q: Rx completion queue
+ * @skb: skb currently being received and modified
+ * @rx_desc: the receive descriptor
+ * @ptype: Rx packet type
+ *
+ * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible
+ * descriptor writeback format.
+ **/
+static void idpf_rx_singleq_flex_csum(struct idpf_queue *rx_q,
+ struct sk_buff *skb,
+ union virtchnl2_rx_desc *rx_desc,
+ u16 ptype)
+{
+ struct idpf_rx_csum_decoded csum_bits;
+ u16 rx_status0, rx_status1;
+
+ rx_status0 = le16_to_cpu(rx_desc->flex_nic_wb.status_error0);
+ rx_status1 = le16_to_cpu(rx_desc->flex_nic_wb.status_error1);
+
+ csum_bits.ipe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_IPE_M,
+ rx_status0);
+ csum_bits.eipe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EIPE_M,
+ rx_status0);
+ csum_bits.l4e = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_L4E_M,
+ rx_status0);
+ csum_bits.eudpe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_M,
+ rx_status0);
+ csum_bits.l3l4p = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_L3L4P_M,
+ rx_status0);
+ csum_bits.ipv6exadd = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_IPV6EXADD_M,
+ rx_status0);
+ csum_bits.nat = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS1_NAT_M,
+ rx_status1);
+ csum_bits.pprs = 0;
+
+ idpf_rx_singleq_csum(rx_q, skb, &csum_bits, ptype);
+}
+
+/**
+ * idpf_rx_singleq_base_hash - set the hash value in the skb
+ * @rx_q: Rx completion queue
+ * @skb: skb currently being received and modified
+ * @rx_desc: specific descriptor
+ * @decoded: Decoded Rx packet type related fields
+ *
+ * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte
+ * descriptor writeback format.
+ **/
+static void idpf_rx_singleq_base_hash(struct idpf_queue *rx_q,
+ struct sk_buff *skb,
+ union virtchnl2_rx_desc *rx_desc,
+ struct idpf_rx_ptype_decoded *decoded)
+{
+ u64 mask, qw1;
+
+ if (unlikely(!(rx_q->vport->netdev->features & NETIF_F_RXHASH)))
+ return;
+
+ mask = VIRTCHNL2_RX_BASE_DESC_FLTSTAT_RSS_HASH_M;
+ qw1 = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len);
+
+ if (FIELD_GET(mask, qw1) == mask) {
+ u32 hash = le32_to_cpu(rx_desc->base_wb.qword0.hi_dword.rss);
+
+ skb_set_hash(skb, hash, idpf_ptype_to_htype(decoded));
+ }
+}
+
+/**
+ * idpf_rx_singleq_flex_hash - set the hash value in the skb
+ * @rx_q: Rx completion queue
+ * @skb: skb currently being received and modified
+ * @rx_desc: specific descriptor
+ * @decoded: Decoded Rx packet type related fields
+ *
+ * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible
+ * descriptor writeback format.
+ **/
+static void idpf_rx_singleq_flex_hash(struct idpf_queue *rx_q,
+ struct sk_buff *skb,
+ union virtchnl2_rx_desc *rx_desc,
+ struct idpf_rx_ptype_decoded *decoded)
+{
+ if (unlikely(!(rx_q->vport->netdev->features & NETIF_F_RXHASH)))
+ return;
+
+ if (FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_RSS_VALID_M,
+ le16_to_cpu(rx_desc->flex_nic_wb.status_error0)))
+ skb_set_hash(skb, le32_to_cpu(rx_desc->flex_nic_wb.rss_hash),
+ idpf_ptype_to_htype(decoded));
+}
+
+/**
+ * idpf_rx_singleq_process_skb_fields - Populate skb header fields from Rx
+ * descriptor
+ * @rx_q: Rx ring being processed
+ * @skb: pointer to current skb being populated
+ * @rx_desc: descriptor for skb
+ * @ptype: packet type
+ *
+ * This function checks the ring, descriptor, and packet information in
+ * order to populate the hash, checksum, VLAN, protocol, and
+ * other fields within the skb.
+ */
+static void idpf_rx_singleq_process_skb_fields(struct idpf_queue *rx_q,
+ struct sk_buff *skb,
+ union virtchnl2_rx_desc *rx_desc,
+ u16 ptype)
+{
+ struct idpf_rx_ptype_decoded decoded =
+ rx_q->vport->rx_ptype_lkup[ptype];
+
+ /* modifies the skb - consumes the enet header */
+ skb->protocol = eth_type_trans(skb, rx_q->vport->netdev);
+
+ /* Check if we're using base mode descriptor IDs */
+ if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M) {
+ idpf_rx_singleq_base_hash(rx_q, skb, rx_desc, &decoded);
+ idpf_rx_singleq_base_csum(rx_q, skb, rx_desc, ptype);
+ } else {
+ idpf_rx_singleq_flex_hash(rx_q, skb, rx_desc, &decoded);
+ idpf_rx_singleq_flex_csum(rx_q, skb, rx_desc, ptype);
+ }
+}
+
+/**
+ * idpf_rx_singleq_buf_hw_alloc_all - Replace used receive buffers
+ * @rx_q: queue for which the hw buffers are allocated
+ * @cleaned_count: number of buffers to replace
+ *
+ * Returns false if all allocations were successful, true if any fail
+ */
+bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rx_q,
+ u16 cleaned_count)
+{
+ struct virtchnl2_singleq_rx_buf_desc *desc;
+ u16 nta = rx_q->next_to_alloc;
+ struct idpf_rx_buf *buf;
+
+ if (!cleaned_count)
+ return false;
+
+ desc = IDPF_SINGLEQ_RX_BUF_DESC(rx_q, nta);
+ buf = &rx_q->rx_buf.buf[nta];
+
+ do {
+ dma_addr_t addr;
+
+ addr = idpf_alloc_page(rx_q->pp, buf, rx_q->rx_buf_size);
+ if (unlikely(addr == DMA_MAPPING_ERROR))
+ break;
+
+ /* Refresh the desc even if buffer_addrs didn't change
+ * because each write-back erases this info.
+ */
+ desc->pkt_addr = cpu_to_le64(addr);
+ desc->hdr_addr = 0;
+ desc++;
+
+ buf++;
+ nta++;
+ if (unlikely(nta == rx_q->desc_count)) {
+ desc = IDPF_SINGLEQ_RX_BUF_DESC(rx_q, 0);
+ buf = rx_q->rx_buf.buf;
+ nta = 0;
+ }
+
+ cleaned_count--;
+ } while (cleaned_count);
+
+ if (rx_q->next_to_alloc != nta) {
+ idpf_rx_buf_hw_update(rx_q, nta);
+ rx_q->next_to_alloc = nta;
+ }
+
+ return !!cleaned_count;
+}
+
+/**
+ * idpf_rx_singleq_extract_base_fields - Extract fields from the Rx descriptor
+ * @rx_q: Rx descriptor queue
+ * @rx_desc: the descriptor to process
+ * @fields: storage for extracted values
+ *
+ * Decode the Rx descriptor and extract relevant information including the
+ * size and Rx packet type.
+ *
+ * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte
+ * descriptor writeback format.
+ */
+static void idpf_rx_singleq_extract_base_fields(struct idpf_queue *rx_q,
+ union virtchnl2_rx_desc *rx_desc,
+ struct idpf_rx_extracted *fields)
+{
+ u64 qword;
+
+ qword = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len);
+
+ fields->size = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_LEN_PBUF_M, qword);
+ fields->rx_ptype = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_PTYPE_M, qword);
+}
+
+/**
+ * idpf_rx_singleq_extract_flex_fields - Extract fields from the Rx descriptor
+ * @rx_q: Rx descriptor queue
+ * @rx_desc: the descriptor to process
+ * @fields: storage for extracted values
+ *
+ * Decode the Rx descriptor and extract relevant information including the
+ * size and Rx packet type.
+ *
+ * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible
+ * descriptor writeback format.
+ */
+static void idpf_rx_singleq_extract_flex_fields(struct idpf_queue *rx_q,
+ union virtchnl2_rx_desc *rx_desc,
+ struct idpf_rx_extracted *fields)
+{
+ fields->size = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_M,
+ le16_to_cpu(rx_desc->flex_nic_wb.pkt_len));
+ fields->rx_ptype = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PTYPE_M,
+ le16_to_cpu(rx_desc->flex_nic_wb.ptype_flex_flags0));
+}
+
+/**
+ * idpf_rx_singleq_extract_fields - Extract fields from the Rx descriptor
+ * @rx_q: Rx descriptor queue
+ * @rx_desc: the descriptor to process
+ * @fields: storage for extracted values
+ *
+ */
+static void idpf_rx_singleq_extract_fields(struct idpf_queue *rx_q,
+ union virtchnl2_rx_desc *rx_desc,
+ struct idpf_rx_extracted *fields)
+{
+ if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M)
+ idpf_rx_singleq_extract_base_fields(rx_q, rx_desc, fields);
+ else
+ idpf_rx_singleq_extract_flex_fields(rx_q, rx_desc, fields);
+}
+
+/**
+ * idpf_rx_singleq_clean - Reclaim resources after receive completes
+ * @rx_q: rx queue to clean
+ * @budget: Total limit on number of packets to process
+ *
+ * Returns true if there's any budget left (e.g. the clean is finished)
+ */
+static int idpf_rx_singleq_clean(struct idpf_queue *rx_q, int budget)
+{
+ unsigned int total_rx_bytes = 0, total_rx_pkts = 0;
+ struct sk_buff *skb = rx_q->skb;
+ u16 ntc = rx_q->next_to_clean;
+ u16 cleaned_count = 0;
+ bool failure = false;
+
+ /* Process Rx packets bounded by budget */
+ while (likely(total_rx_pkts < (unsigned int)budget)) {
+ struct idpf_rx_extracted fields = { };
+ union virtchnl2_rx_desc *rx_desc;
+ struct idpf_rx_buf *rx_buf;
+
+ /* get the Rx desc from Rx queue based on 'next_to_clean' */
+ rx_desc = IDPF_RX_DESC(rx_q, ntc);
+
+ /* status_error_ptype_len will always be zero for unused
+ * descriptors because it's cleared in cleanup, and overlaps
+ * with hdr_addr which is always zero because packet split
+ * isn't used, if the hardware wrote DD then the length will be
+ * non-zero
+ */
+#define IDPF_RXD_DD VIRTCHNL2_RX_BASE_DESC_STATUS_DD_M
+ if (!idpf_rx_singleq_test_staterr(rx_desc,
+ IDPF_RXD_DD))
+ break;
+
+ /* This memory barrier is needed to keep us from reading
+ * any other fields out of the rx_desc
+ */
+ dma_rmb();
+
+ idpf_rx_singleq_extract_fields(rx_q, rx_desc, &fields);
+
+ rx_buf = &rx_q->rx_buf.buf[ntc];
+ if (!fields.size) {
+ idpf_rx_put_page(rx_buf);
+ goto skip_data;
+ }
+
+ idpf_rx_sync_for_cpu(rx_buf, fields.size);
+ skb = rx_q->skb;
+ if (skb)
+ idpf_rx_add_frag(rx_buf, skb, fields.size);
+ else
+ skb = idpf_rx_construct_skb(rx_q, rx_buf, fields.size);
+
+ /* exit if we failed to retrieve a buffer */
+ if (!skb)
+ break;
+
+skip_data:
+ IDPF_SINGLEQ_BUMP_RING_IDX(rx_q, ntc);
+
+ cleaned_count++;
+
+ /* skip if it is non EOP desc */
+ if (idpf_rx_singleq_is_non_eop(rx_q, rx_desc, skb, ntc))
+ continue;
+
+#define IDPF_RXD_ERR_S FIELD_PREP(VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M, \
+ VIRTCHNL2_RX_BASE_DESC_ERROR_RXE_M)
+ if (unlikely(idpf_rx_singleq_test_staterr(rx_desc,
+ IDPF_RXD_ERR_S))) {
+ dev_kfree_skb_any(skb);
+ skb = NULL;
+ continue;
+ }
+
+ /* pad skb if needed (to make valid ethernet frame) */
+ if (eth_skb_pad(skb)) {
+ skb = NULL;
+ continue;
+ }
+
+ /* probably a little skewed due to removing CRC */
+ total_rx_bytes += skb->len;
+
+ /* protocol */
+ idpf_rx_singleq_process_skb_fields(rx_q, skb,
+ rx_desc, fields.rx_ptype);
+
+ /* send completed skb up the stack */
+ napi_gro_receive(&rx_q->q_vector->napi, skb);
+ skb = NULL;
+
+ /* update budget accounting */
+ total_rx_pkts++;
+ }
+
+ rx_q->skb = skb;
+
+ rx_q->next_to_clean = ntc;
+
+ if (cleaned_count)
+ failure = idpf_rx_singleq_buf_hw_alloc_all(rx_q, cleaned_count);
+
+ u64_stats_update_begin(&rx_q->stats_sync);
+ u64_stats_add(&rx_q->q_stats.rx.packets, total_rx_pkts);
+ u64_stats_add(&rx_q->q_stats.rx.bytes, total_rx_bytes);
+ u64_stats_update_end(&rx_q->stats_sync);
+
+ /* guarantee a trip back through this routine if there was a failure */
+ return failure ? budget : (int)total_rx_pkts;
+}
+
+/**
+ * idpf_rx_singleq_clean_all - Clean all Rx queues
+ * @q_vec: queue vector
+ * @budget: Used to determine if we are in netpoll
+ * @cleaned: returns number of packets cleaned
+ *
+ * Returns false if clean is not complete else returns true
+ */
+static bool idpf_rx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget,
+ int *cleaned)
+{
+ u16 num_rxq = q_vec->num_rxq;
+ bool clean_complete = true;
+ int budget_per_q, i;
+
+ /* We attempt to distribute budget to each Rx queue fairly, but don't
+ * allow the budget to go below 1 because that would exit polling early.
+ */
+ budget_per_q = num_rxq ? max(budget / num_rxq, 1) : 0;
+ for (i = 0; i < num_rxq; i++) {
+ struct idpf_queue *rxq = q_vec->rx[i];
+ int pkts_cleaned_per_q;
+
+ pkts_cleaned_per_q = idpf_rx_singleq_clean(rxq, budget_per_q);
+
+ /* if we clean as many as budgeted, we must not be done */
+ if (pkts_cleaned_per_q >= budget_per_q)
+ clean_complete = false;
+ *cleaned += pkts_cleaned_per_q;
+ }
+
+ return clean_complete;
+}
+
+/**
+ * idpf_vport_singleq_napi_poll - NAPI handler
+ * @napi: struct from which you get q_vector
+ * @budget: budget provided by stack
+ */
+int idpf_vport_singleq_napi_poll(struct napi_struct *napi, int budget)
+{
+ struct idpf_q_vector *q_vector =
+ container_of(napi, struct idpf_q_vector, napi);
+ bool clean_complete;
+ int work_done = 0;
+
+ /* Handle case where we are called by netpoll with a budget of 0 */
+ if (budget <= 0) {
+ idpf_tx_singleq_clean_all(q_vector, budget, &work_done);
+
+ return budget;
+ }
+
+ clean_complete = idpf_rx_singleq_clean_all(q_vector, budget,
+ &work_done);
+ clean_complete &= idpf_tx_singleq_clean_all(q_vector, budget,
+ &work_done);
+
+ /* If work not completed, return budget and polling will return */
+ if (!clean_complete)
+ return budget;
+
+ work_done = min_t(int, work_done, budget - 1);
+
+ /* Exit the polling mode, but don't re-enable interrupts if stack might
+ * poll us due to busy-polling
+ */
+ if (likely(napi_complete_done(napi, work_done)))
+ idpf_vport_intr_update_itr_ena_irq(q_vector);
+
+ return work_done;
+}
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
new file mode 100644
index 000000000000..6fa79898c42c
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
@@ -0,0 +1,4289 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2023 Intel Corporation */
+
+#include "idpf.h"
+
+/**
+ * idpf_buf_lifo_push - push a buffer pointer onto stack
+ * @stack: pointer to stack struct
+ * @buf: pointer to buf to push
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int idpf_buf_lifo_push(struct idpf_buf_lifo *stack,
+ struct idpf_tx_stash *buf)
+{
+ if (unlikely(stack->top == stack->size))
+ return -ENOSPC;
+
+ stack->bufs[stack->top++] = buf;
+
+ return 0;
+}
+
+/**
+ * idpf_buf_lifo_pop - pop a buffer pointer from stack
+ * @stack: pointer to stack struct
+ **/
+static struct idpf_tx_stash *idpf_buf_lifo_pop(struct idpf_buf_lifo *stack)
+{
+ if (unlikely(!stack->top))
+ return NULL;
+
+ return stack->bufs[--stack->top];
+}
+
+/**
+ * idpf_tx_timeout - Respond to a Tx Hang
+ * @netdev: network interface device structure
+ * @txqueue: TX queue
+ */
+void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue)
+{
+ struct idpf_adapter *adapter = idpf_netdev_to_adapter(netdev);
+
+ adapter->tx_timeout_count++;
+
+ netdev_err(netdev, "Detected Tx timeout: Count %d, Queue %d\n",
+ adapter->tx_timeout_count, txqueue);
+ if (!idpf_is_reset_in_prog(adapter)) {
+ set_bit(IDPF_HR_FUNC_RESET, adapter->flags);
+ queue_delayed_work(adapter->vc_event_wq,
+ &adapter->vc_event_task,
+ msecs_to_jiffies(10));
+ }
+}
+
+/**
+ * idpf_tx_buf_rel - Release a Tx buffer
+ * @tx_q: the queue that owns the buffer
+ * @tx_buf: the buffer to free
+ */
+static void idpf_tx_buf_rel(struct idpf_queue *tx_q, struct idpf_tx_buf *tx_buf)
+{
+ if (tx_buf->skb) {
+ if (dma_unmap_len(tx_buf, len))
+ dma_unmap_single(tx_q->dev,
+ dma_unmap_addr(tx_buf, dma),
+ dma_unmap_len(tx_buf, len),
+ DMA_TO_DEVICE);
+ dev_kfree_skb_any(tx_buf->skb);
+ } else if (dma_unmap_len(tx_buf, len)) {
+ dma_unmap_page(tx_q->dev,
+ dma_unmap_addr(tx_buf, dma),
+ dma_unmap_len(tx_buf, len),
+ DMA_TO_DEVICE);
+ }
+
+ tx_buf->next_to_watch = NULL;
+ tx_buf->skb = NULL;
+ tx_buf->compl_tag = IDPF_SPLITQ_TX_INVAL_COMPL_TAG;
+ dma_unmap_len_set(tx_buf, len, 0);
+}
+
+/**
+ * idpf_tx_buf_rel_all - Free any empty Tx buffers
+ * @txq: queue to be cleaned
+ */
+static void idpf_tx_buf_rel_all(struct idpf_queue *txq)
+{
+ u16 i;
+
+ /* Buffers already cleared, nothing to do */
+ if (!txq->tx_buf)
+ return;
+
+ /* Free all the Tx buffer sk_buffs */
+ for (i = 0; i < txq->desc_count; i++)
+ idpf_tx_buf_rel(txq, &txq->tx_buf[i]);
+
+ kfree(txq->tx_buf);
+ txq->tx_buf = NULL;
+
+ if (!txq->buf_stack.bufs)
+ return;
+
+ for (i = 0; i < txq->buf_stack.size; i++)
+ kfree(txq->buf_stack.bufs[i]);
+
+ kfree(txq->buf_stack.bufs);
+ txq->buf_stack.bufs = NULL;
+}
+
+/**
+ * idpf_tx_desc_rel - Free Tx resources per queue
+ * @txq: Tx descriptor ring for a specific queue
+ * @bufq: buffer q or completion q
+ *
+ * Free all transmit software resources
+ */
+static void idpf_tx_desc_rel(struct idpf_queue *txq, bool bufq)
+{
+ if (bufq)
+ idpf_tx_buf_rel_all(txq);
+
+ if (!txq->desc_ring)
+ return;
+
+ dmam_free_coherent(txq->dev, txq->size, txq->desc_ring, txq->dma);
+ txq->desc_ring = NULL;
+ txq->next_to_alloc = 0;
+ txq->next_to_use = 0;
+ txq->next_to_clean = 0;
+}
+
+/**
+ * idpf_tx_desc_rel_all - Free Tx Resources for All Queues
+ * @vport: virtual port structure
+ *
+ * Free all transmit software resources
+ */
+static void idpf_tx_desc_rel_all(struct idpf_vport *vport)
+{
+ int i, j;
+
+ if (!vport->txq_grps)
+ return;
+
+ for (i = 0; i < vport->num_txq_grp; i++) {
+ struct idpf_txq_group *txq_grp = &vport->txq_grps[i];
+
+ for (j = 0; j < txq_grp->num_txq; j++)
+ idpf_tx_desc_rel(txq_grp->txqs[j], true);
+
+ if (idpf_is_queue_model_split(vport->txq_model))
+ idpf_tx_desc_rel(txq_grp->complq, false);
+ }
+}
+
+/**
+ * idpf_tx_buf_alloc_all - Allocate memory for all buffer resources
+ * @tx_q: queue for which the buffers are allocated
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int idpf_tx_buf_alloc_all(struct idpf_queue *tx_q)
+{
+ int buf_size;
+ int i;
+
+ /* Allocate book keeping buffers only. Buffers to be supplied to HW
+ * are allocated by kernel network stack and received as part of skb
+ */
+ buf_size = sizeof(struct idpf_tx_buf) * tx_q->desc_count;
+ tx_q->tx_buf = kzalloc(buf_size, GFP_KERNEL);
+ if (!tx_q->tx_buf)
+ return -ENOMEM;
+
+ /* Initialize tx_bufs with invalid completion tags */
+ for (i = 0; i < tx_q->desc_count; i++)
+ tx_q->tx_buf[i].compl_tag = IDPF_SPLITQ_TX_INVAL_COMPL_TAG;
+
+ /* Initialize tx buf stack for out-of-order completions if
+ * flow scheduling offload is enabled
+ */
+ tx_q->buf_stack.bufs =
+ kcalloc(tx_q->desc_count, sizeof(struct idpf_tx_stash *),
+ GFP_KERNEL);
+ if (!tx_q->buf_stack.bufs)
+ return -ENOMEM;
+
+ tx_q->buf_stack.size = tx_q->desc_count;
+ tx_q->buf_stack.top = tx_q->desc_count;
+
+ for (i = 0; i < tx_q->desc_count; i++) {
+ tx_q->buf_stack.bufs[i] = kzalloc(sizeof(*tx_q->buf_stack.bufs[i]),
+ GFP_KERNEL);
+ if (!tx_q->buf_stack.bufs[i])
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/**
+ * idpf_tx_desc_alloc - Allocate the Tx descriptors
+ * @tx_q: the tx ring to set up
+ * @bufq: buffer or completion queue
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int idpf_tx_desc_alloc(struct idpf_queue *tx_q, bool bufq)
+{
+ struct device *dev = tx_q->dev;
+ u32 desc_sz;
+ int err;
+
+ if (bufq) {
+ err = idpf_tx_buf_alloc_all(tx_q);
+ if (err)
+ goto err_alloc;
+
+ desc_sz = sizeof(struct idpf_base_tx_desc);
+ } else {
+ desc_sz = sizeof(struct idpf_splitq_tx_compl_desc);
+ }
+
+ tx_q->size = tx_q->desc_count * desc_sz;
+
+ /* Allocate descriptors also round up to nearest 4K */
+ tx_q->size = ALIGN(tx_q->size, 4096);
+ tx_q->desc_ring = dmam_alloc_coherent(dev, tx_q->size, &tx_q->dma,
+ GFP_KERNEL);
+ if (!tx_q->desc_ring) {
+ dev_err(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n",
+ tx_q->size);
+ err = -ENOMEM;
+ goto err_alloc;
+ }
+
+ tx_q->next_to_alloc = 0;
+ tx_q->next_to_use = 0;
+ tx_q->next_to_clean = 0;
+ set_bit(__IDPF_Q_GEN_CHK, tx_q->flags);
+
+ return 0;
+
+err_alloc:
+ idpf_tx_desc_rel(tx_q, bufq);
+
+ return err;
+}
+
+/**
+ * idpf_tx_desc_alloc_all - allocate all queues Tx resources
+ * @vport: virtual port private structure
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int idpf_tx_desc_alloc_all(struct idpf_vport *vport)
+{
+ struct device *dev = &vport->adapter->pdev->dev;
+ int err = 0;
+ int i, j;
+
+ /* Setup buffer queues. In single queue model buffer queues and
+ * completion queues will be same
+ */
+ for (i = 0; i < vport->num_txq_grp; i++) {
+ for (j = 0; j < vport->txq_grps[i].num_txq; j++) {
+ struct idpf_queue *txq = vport->txq_grps[i].txqs[j];
+ u8 gen_bits = 0;
+ u16 bufidx_mask;
+
+ err = idpf_tx_desc_alloc(txq, true);
+ if (err) {
+ dev_err(dev, "Allocation for Tx Queue %u failed\n",
+ i);
+ goto err_out;
+ }
+
+ if (!idpf_is_queue_model_split(vport->txq_model))
+ continue;
+
+ txq->compl_tag_cur_gen = 0;
+
+ /* Determine the number of bits in the bufid
+ * mask and add one to get the start of the
+ * generation bits
+ */
+ bufidx_mask = txq->desc_count - 1;
+ while (bufidx_mask >> 1) {
+ txq->compl_tag_gen_s++;
+ bufidx_mask = bufidx_mask >> 1;
+ }
+ txq->compl_tag_gen_s++;
+
+ gen_bits = IDPF_TX_SPLITQ_COMPL_TAG_WIDTH -
+ txq->compl_tag_gen_s;
+ txq->compl_tag_gen_max = GETMAXVAL(gen_bits);
+
+ /* Set bufid mask based on location of first
+ * gen bit; it cannot simply be the descriptor
+ * ring size-1 since we can have size values
+ * where not all of those bits are set.
+ */
+ txq->compl_tag_bufid_m =
+ GETMAXVAL(txq->compl_tag_gen_s);
+ }
+
+ if (!idpf_is_queue_model_split(vport->txq_model))
+ continue;
+
+ /* Setup completion queues */
+ err = idpf_tx_desc_alloc(vport->txq_grps[i].complq, false);
+ if (err) {
+ dev_err(dev, "Allocation for Tx Completion Queue %u failed\n",
+ i);
+ goto err_out;
+ }
+ }
+
+err_out:
+ if (err)
+ idpf_tx_desc_rel_all(vport);
+
+ return err;
+}
+
+/**
+ * idpf_rx_page_rel - Release an rx buffer page
+ * @rxq: the queue that owns the buffer
+ * @rx_buf: the buffer to free
+ */
+static void idpf_rx_page_rel(struct idpf_queue *rxq, struct idpf_rx_buf *rx_buf)
+{
+ if (unlikely(!rx_buf->page))
+ return;
+
+ page_pool_put_full_page(rxq->pp, rx_buf->page, false);
+
+ rx_buf->page = NULL;
+ rx_buf->page_offset = 0;
+}
+
+/**
+ * idpf_rx_hdr_buf_rel_all - Release header buffer memory
+ * @rxq: queue to use
+ */
+static void idpf_rx_hdr_buf_rel_all(struct idpf_queue *rxq)
+{
+ struct idpf_adapter *adapter = rxq->vport->adapter;
+
+ dma_free_coherent(&adapter->pdev->dev,
+ rxq->desc_count * IDPF_HDR_BUF_SIZE,
+ rxq->rx_buf.hdr_buf_va,
+ rxq->rx_buf.hdr_buf_pa);
+ rxq->rx_buf.hdr_buf_va = NULL;
+}
+
+/**
+ * idpf_rx_buf_rel_all - Free all Rx buffer resources for a queue
+ * @rxq: queue to be cleaned
+ */
+static void idpf_rx_buf_rel_all(struct idpf_queue *rxq)
+{
+ u16 i;
+
+ /* queue already cleared, nothing to do */
+ if (!rxq->rx_buf.buf)
+ return;
+
+ /* Free all the bufs allocated and given to hw on Rx queue */
+ for (i = 0; i < rxq->desc_count; i++)
+ idpf_rx_page_rel(rxq, &rxq->rx_buf.buf[i]);
+
+ if (rxq->rx_hsplit_en)
+ idpf_rx_hdr_buf_rel_all(rxq);
+
+ page_pool_destroy(rxq->pp);
+ rxq->pp = NULL;
+
+ kfree(rxq->rx_buf.buf);
+ rxq->rx_buf.buf = NULL;
+}
+
+/**
+ * idpf_rx_desc_rel - Free a specific Rx q resources
+ * @rxq: queue to clean the resources from
+ * @bufq: buffer q or completion q
+ * @q_model: single or split q model
+ *
+ * Free a specific rx queue resources
+ */
+static void idpf_rx_desc_rel(struct idpf_queue *rxq, bool bufq, s32 q_model)
+{
+ if (!rxq)
+ return;
+
+ if (!bufq && idpf_is_queue_model_split(q_model) && rxq->skb) {
+ dev_kfree_skb_any(rxq->skb);
+ rxq->skb = NULL;
+ }
+
+ if (bufq || !idpf_is_queue_model_split(q_model))
+ idpf_rx_buf_rel_all(rxq);
+
+ rxq->next_to_alloc = 0;
+ rxq->next_to_clean = 0;
+ rxq->next_to_use = 0;
+ if (!rxq->desc_ring)
+ return;
+
+ dmam_free_coherent(rxq->dev, rxq->size, rxq->desc_ring, rxq->dma);
+ rxq->desc_ring = NULL;
+}
+
+/**
+ * idpf_rx_desc_rel_all - Free Rx Resources for All Queues
+ * @vport: virtual port structure
+ *
+ * Free all rx queues resources
+ */
+static void idpf_rx_desc_rel_all(struct idpf_vport *vport)
+{
+ struct idpf_rxq_group *rx_qgrp;
+ u16 num_rxq;
+ int i, j;
+
+ if (!vport->rxq_grps)
+ return;
+
+ for (i = 0; i < vport->num_rxq_grp; i++) {
+ rx_qgrp = &vport->rxq_grps[i];
+
+ if (!idpf_is_queue_model_split(vport->rxq_model)) {
+ for (j = 0; j < rx_qgrp->singleq.num_rxq; j++)
+ idpf_rx_desc_rel(rx_qgrp->singleq.rxqs[j],
+ false, vport->rxq_model);
+ continue;
+ }
+
+ num_rxq = rx_qgrp->splitq.num_rxq_sets;
+ for (j = 0; j < num_rxq; j++)
+ idpf_rx_desc_rel(&rx_qgrp->splitq.rxq_sets[j]->rxq,
+ false, vport->rxq_model);
+
+ if (!rx_qgrp->splitq.bufq_sets)
+ continue;
+
+ for (j = 0; j < vport->num_bufqs_per_qgrp; j++) {
+ struct idpf_bufq_set *bufq_set =
+ &rx_qgrp->splitq.bufq_sets[j];
+
+ idpf_rx_desc_rel(&bufq_set->bufq, true,
+ vport->rxq_model);
+ }
+ }
+}
+
+/**
+ * idpf_rx_buf_hw_update - Store the new tail and head values
+ * @rxq: queue to bump
+ * @val: new head index
+ */
+void idpf_rx_buf_hw_update(struct idpf_queue *rxq, u32 val)
+{
+ rxq->next_to_use = val;
+
+ if (unlikely(!rxq->tail))
+ return;
+
+ /* writel has an implicit memory barrier */
+ writel(val, rxq->tail);
+}
+
+/**
+ * idpf_rx_hdr_buf_alloc_all - Allocate memory for header buffers
+ * @rxq: ring to use
+ *
+ * Returns 0 on success, negative on failure.
+ */
+static int idpf_rx_hdr_buf_alloc_all(struct idpf_queue *rxq)
+{
+ struct idpf_adapter *adapter = rxq->vport->adapter;
+
+ rxq->rx_buf.hdr_buf_va =
+ dma_alloc_coherent(&adapter->pdev->dev,
+ IDPF_HDR_BUF_SIZE * rxq->desc_count,
+ &rxq->rx_buf.hdr_buf_pa,
+ GFP_KERNEL);
+ if (!rxq->rx_buf.hdr_buf_va)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/**
+ * idpf_rx_post_buf_refill - Post buffer id to refill queue
+ * @refillq: refill queue to post to
+ * @buf_id: buffer id to post
+ */
+static void idpf_rx_post_buf_refill(struct idpf_sw_queue *refillq, u16 buf_id)
+{
+ u16 nta = refillq->next_to_alloc;
+
+ /* store the buffer ID and the SW maintained GEN bit to the refillq */
+ refillq->ring[nta] =
+ ((buf_id << IDPF_RX_BI_BUFID_S) & IDPF_RX_BI_BUFID_M) |
+ (!!(test_bit(__IDPF_Q_GEN_CHK, refillq->flags)) <<
+ IDPF_RX_BI_GEN_S);
+
+ if (unlikely(++nta == refillq->desc_count)) {
+ nta = 0;
+ change_bit(__IDPF_Q_GEN_CHK, refillq->flags);
+ }
+ refillq->next_to_alloc = nta;
+}
+
+/**
+ * idpf_rx_post_buf_desc - Post buffer to bufq descriptor ring
+ * @bufq: buffer queue to post to
+ * @buf_id: buffer id to post
+ *
+ * Returns false if buffer could not be allocated, true otherwise.
+ */
+static bool idpf_rx_post_buf_desc(struct idpf_queue *bufq, u16 buf_id)
+{
+ struct virtchnl2_splitq_rx_buf_desc *splitq_rx_desc = NULL;
+ u16 nta = bufq->next_to_alloc;
+ struct idpf_rx_buf *buf;
+ dma_addr_t addr;
+
+ splitq_rx_desc = IDPF_SPLITQ_RX_BUF_DESC(bufq, nta);
+ buf = &bufq->rx_buf.buf[buf_id];
+
+ if (bufq->rx_hsplit_en) {
+ splitq_rx_desc->hdr_addr =
+ cpu_to_le64(bufq->rx_buf.hdr_buf_pa +
+ (u32)buf_id * IDPF_HDR_BUF_SIZE);
+ }
+
+ addr = idpf_alloc_page(bufq->pp, buf, bufq->rx_buf_size);
+ if (unlikely(addr == DMA_MAPPING_ERROR))
+ return false;
+
+ splitq_rx_desc->pkt_addr = cpu_to_le64(addr);
+ splitq_rx_desc->qword0.buf_id = cpu_to_le16(buf_id);
+
+ nta++;
+ if (unlikely(nta == bufq->desc_count))
+ nta = 0;
+ bufq->next_to_alloc = nta;
+
+ return true;
+}
+
+/**
+ * idpf_rx_post_init_bufs - Post initial buffers to bufq
+ * @bufq: buffer queue to post working set to
+ * @working_set: number of buffers to put in working set
+ *
+ * Returns true if @working_set bufs were posted successfully, false otherwise.
+ */
+static bool idpf_rx_post_init_bufs(struct idpf_queue *bufq, u16 working_set)
+{
+ int i;
+
+ for (i = 0; i < working_set; i++) {
+ if (!idpf_rx_post_buf_desc(bufq, i))
+ return false;
+ }
+
+ idpf_rx_buf_hw_update(bufq,
+ bufq->next_to_alloc & ~(bufq->rx_buf_stride - 1));
+
+ return true;
+}
+
+/**
+ * idpf_rx_create_page_pool - Create a page pool
+ * @rxbufq: RX queue to create page pool for
+ *
+ * Returns &page_pool on success, casted -errno on failure
+ */
+static struct page_pool *idpf_rx_create_page_pool(struct idpf_queue *rxbufq)
+{
+ struct page_pool_params pp = {
+ .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV,
+ .order = 0,
+ .pool_size = rxbufq->desc_count,
+ .nid = NUMA_NO_NODE,
+ .dev = rxbufq->vport->netdev->dev.parent,
+ .max_len = PAGE_SIZE,
+ .dma_dir = DMA_FROM_DEVICE,
+ .offset = 0,
+ };
+
+ if (rxbufq->rx_buf_size == IDPF_RX_BUF_2048)
+ pp.flags |= PP_FLAG_PAGE_FRAG;
+
+ return page_pool_create(&pp);
+}
+
+/**
+ * idpf_rx_buf_alloc_all - Allocate memory for all buffer resources
+ * @rxbufq: queue for which the buffers are allocated; equivalent to
+ * rxq when operating in singleq mode
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int idpf_rx_buf_alloc_all(struct idpf_queue *rxbufq)
+{
+ int err = 0;
+
+ /* Allocate book keeping buffers */
+ rxbufq->rx_buf.buf = kcalloc(rxbufq->desc_count,
+ sizeof(struct idpf_rx_buf), GFP_KERNEL);
+ if (!rxbufq->rx_buf.buf) {
+ err = -ENOMEM;
+ goto rx_buf_alloc_all_out;
+ }
+
+ if (rxbufq->rx_hsplit_en) {
+ err = idpf_rx_hdr_buf_alloc_all(rxbufq);
+ if (err)
+ goto rx_buf_alloc_all_out;
+ }
+
+ /* Allocate buffers to be given to HW. */
+ if (idpf_is_queue_model_split(rxbufq->vport->rxq_model)) {
+ int working_set = IDPF_RX_BUFQ_WORKING_SET(rxbufq);
+
+ if (!idpf_rx_post_init_bufs(rxbufq, working_set))
+ err = -ENOMEM;
+ } else {
+ if (idpf_rx_singleq_buf_hw_alloc_all(rxbufq,
+ rxbufq->desc_count - 1))
+ err = -ENOMEM;
+ }
+
+rx_buf_alloc_all_out:
+ if (err)
+ idpf_rx_buf_rel_all(rxbufq);
+
+ return err;
+}
+
+/**
+ * idpf_rx_bufs_init - Initialize page pool, allocate rx bufs, and post to HW
+ * @rxbufq: RX queue to create page pool for
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int idpf_rx_bufs_init(struct idpf_queue *rxbufq)
+{
+ struct page_pool *pool;
+
+ pool = idpf_rx_create_page_pool(rxbufq);
+ if (IS_ERR(pool))
+ return PTR_ERR(pool);
+
+ rxbufq->pp = pool;
+
+ return idpf_rx_buf_alloc_all(rxbufq);
+}
+
+/**
+ * idpf_rx_bufs_init_all - Initialize all RX bufs
+ * @vport: virtual port struct
+ *
+ * Returns 0 on success, negative on failure
+ */
+int idpf_rx_bufs_init_all(struct idpf_vport *vport)
+{
+ struct idpf_rxq_group *rx_qgrp;
+ struct idpf_queue *q;
+ int i, j, err;
+
+ for (i = 0; i < vport->num_rxq_grp; i++) {
+ rx_qgrp = &vport->rxq_grps[i];
+
+ /* Allocate bufs for the rxq itself in singleq */
+ if (!idpf_is_queue_model_split(vport->rxq_model)) {
+ int num_rxq = rx_qgrp->singleq.num_rxq;
+
+ for (j = 0; j < num_rxq; j++) {
+ q = rx_qgrp->singleq.rxqs[j];
+ err = idpf_rx_bufs_init(q);
+ if (err)
+ return err;
+ }
+
+ continue;
+ }
+
+ /* Otherwise, allocate bufs for the buffer queues */
+ for (j = 0; j < vport->num_bufqs_per_qgrp; j++) {
+ q = &rx_qgrp->splitq.bufq_sets[j].bufq;
+ err = idpf_rx_bufs_init(q);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * idpf_rx_desc_alloc - Allocate queue Rx resources
+ * @rxq: Rx queue for which the resources are setup
+ * @bufq: buffer or completion queue
+ * @q_model: single or split queue model
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int idpf_rx_desc_alloc(struct idpf_queue *rxq, bool bufq, s32 q_model)
+{
+ struct device *dev = rxq->dev;
+
+ if (bufq)
+ rxq->size = rxq->desc_count *
+ sizeof(struct virtchnl2_splitq_rx_buf_desc);
+ else
+ rxq->size = rxq->desc_count *
+ sizeof(union virtchnl2_rx_desc);
+
+ /* Allocate descriptors and also round up to nearest 4K */
+ rxq->size = ALIGN(rxq->size, 4096);
+ rxq->desc_ring = dmam_alloc_coherent(dev, rxq->size,
+ &rxq->dma, GFP_KERNEL);
+ if (!rxq->desc_ring) {
+ dev_err(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
+ rxq->size);
+ return -ENOMEM;
+ }
+
+ rxq->next_to_alloc = 0;
+ rxq->next_to_clean = 0;
+ rxq->next_to_use = 0;
+ set_bit(__IDPF_Q_GEN_CHK, rxq->flags);
+
+ return 0;
+}
+
+/**
+ * idpf_rx_desc_alloc_all - allocate all RX queues resources
+ * @vport: virtual port structure
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int idpf_rx_desc_alloc_all(struct idpf_vport *vport)
+{
+ struct device *dev = &vport->adapter->pdev->dev;
+ struct idpf_rxq_group *rx_qgrp;
+ struct idpf_queue *q;
+ int i, j, err;
+ u16 num_rxq;
+
+ for (i = 0; i < vport->num_rxq_grp; i++) {
+ rx_qgrp = &vport->rxq_grps[i];
+ if (idpf_is_queue_model_split(vport->rxq_model))
+ num_rxq = rx_qgrp->splitq.num_rxq_sets;
+ else
+ num_rxq = rx_qgrp->singleq.num_rxq;
+
+ for (j = 0; j < num_rxq; j++) {
+ if (idpf_is_queue_model_split(vport->rxq_model))
+ q = &rx_qgrp->splitq.rxq_sets[j]->rxq;
+ else
+ q = rx_qgrp->singleq.rxqs[j];
+ err = idpf_rx_desc_alloc(q, false, vport->rxq_model);
+ if (err) {
+ dev_err(dev, "Memory allocation for Rx Queue %u failed\n",
+ i);
+ goto err_out;
+ }
+ }
+
+ if (!idpf_is_queue_model_split(vport->rxq_model))
+ continue;
+
+ for (j = 0; j < vport->num_bufqs_per_qgrp; j++) {
+ q = &rx_qgrp->splitq.bufq_sets[j].bufq;
+ err = idpf_rx_desc_alloc(q, true, vport->rxq_model);
+ if (err) {
+ dev_err(dev, "Memory allocation for Rx Buffer Queue %u failed\n",
+ i);
+ goto err_out;
+ }
+ }
+ }
+
+ return 0;
+
+err_out:
+ idpf_rx_desc_rel_all(vport);
+
+ return err;
+}
+
+/**
+ * idpf_txq_group_rel - Release all resources for txq groups
+ * @vport: vport to release txq groups on
+ */
+static void idpf_txq_group_rel(struct idpf_vport *vport)
+{
+ int i, j;
+
+ if (!vport->txq_grps)
+ return;
+
+ for (i = 0; i < vport->num_txq_grp; i++) {
+ struct idpf_txq_group *txq_grp = &vport->txq_grps[i];
+
+ for (j = 0; j < txq_grp->num_txq; j++) {
+ kfree(txq_grp->txqs[j]);
+ txq_grp->txqs[j] = NULL;
+ }
+ kfree(txq_grp->complq);
+ txq_grp->complq = NULL;
+ }
+ kfree(vport->txq_grps);
+ vport->txq_grps = NULL;
+}
+
+/**
+ * idpf_rxq_sw_queue_rel - Release software queue resources
+ * @rx_qgrp: rx queue group with software queues
+ */
+static void idpf_rxq_sw_queue_rel(struct idpf_rxq_group *rx_qgrp)
+{
+ int i, j;
+
+ for (i = 0; i < rx_qgrp->vport->num_bufqs_per_qgrp; i++) {
+ struct idpf_bufq_set *bufq_set = &rx_qgrp->splitq.bufq_sets[i];
+
+ for (j = 0; j < bufq_set->num_refillqs; j++) {
+ kfree(bufq_set->refillqs[j].ring);
+ bufq_set->refillqs[j].ring = NULL;
+ }
+ kfree(bufq_set->refillqs);
+ bufq_set->refillqs = NULL;
+ }
+}
+
+/**
+ * idpf_rxq_group_rel - Release all resources for rxq groups
+ * @vport: vport to release rxq groups on
+ */
+static void idpf_rxq_group_rel(struct idpf_vport *vport)
+{
+ int i;
+
+ if (!vport->rxq_grps)
+ return;
+
+ for (i = 0; i < vport->num_rxq_grp; i++) {
+ struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i];
+ u16 num_rxq;
+ int j;
+
+ if (idpf_is_queue_model_split(vport->rxq_model)) {
+ num_rxq = rx_qgrp->splitq.num_rxq_sets;
+ for (j = 0; j < num_rxq; j++) {
+ kfree(rx_qgrp->splitq.rxq_sets[j]);
+ rx_qgrp->splitq.rxq_sets[j] = NULL;
+ }
+
+ idpf_rxq_sw_queue_rel(rx_qgrp);
+ kfree(rx_qgrp->splitq.bufq_sets);
+ rx_qgrp->splitq.bufq_sets = NULL;
+ } else {
+ num_rxq = rx_qgrp->singleq.num_rxq;
+ for (j = 0; j < num_rxq; j++) {
+ kfree(rx_qgrp->singleq.rxqs[j]);
+ rx_qgrp->singleq.rxqs[j] = NULL;
+ }
+ }
+ }
+ kfree(vport->rxq_grps);
+ vport->rxq_grps = NULL;
+}
+
+/**
+ * idpf_vport_queue_grp_rel_all - Release all queue groups
+ * @vport: vport to release queue groups for
+ */
+static void idpf_vport_queue_grp_rel_all(struct idpf_vport *vport)
+{
+ idpf_txq_group_rel(vport);
+ idpf_rxq_group_rel(vport);
+}
+
+/**
+ * idpf_vport_queues_rel - Free memory for all queues
+ * @vport: virtual port
+ *
+ * Free the memory allocated for queues associated to a vport
+ */
+void idpf_vport_queues_rel(struct idpf_vport *vport)
+{
+ idpf_tx_desc_rel_all(vport);
+ idpf_rx_desc_rel_all(vport);
+ idpf_vport_queue_grp_rel_all(vport);
+
+ kfree(vport->txqs);
+ vport->txqs = NULL;
+}
+
+/**
+ * idpf_vport_init_fast_path_txqs - Initialize fast path txq array
+ * @vport: vport to init txqs on
+ *
+ * We get a queue index from skb->queue_mapping and we need a fast way to
+ * dereference the queue from queue groups. This allows us to quickly pull a
+ * txq based on a queue index.
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int idpf_vport_init_fast_path_txqs(struct idpf_vport *vport)
+{
+ int i, j, k = 0;
+
+ vport->txqs = kcalloc(vport->num_txq, sizeof(struct idpf_queue *),
+ GFP_KERNEL);
+
+ if (!vport->txqs)
+ return -ENOMEM;
+
+ for (i = 0; i < vport->num_txq_grp; i++) {
+ struct idpf_txq_group *tx_grp = &vport->txq_grps[i];
+
+ for (j = 0; j < tx_grp->num_txq; j++, k++) {
+ vport->txqs[k] = tx_grp->txqs[j];
+ vport->txqs[k]->idx = k;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * idpf_vport_init_num_qs - Initialize number of queues
+ * @vport: vport to initialize queues
+ * @vport_msg: data to be filled into vport
+ */
+void idpf_vport_init_num_qs(struct idpf_vport *vport,
+ struct virtchnl2_create_vport *vport_msg)
+{
+ struct idpf_vport_user_config_data *config_data;
+ u16 idx = vport->idx;
+
+ config_data = &vport->adapter->vport_config[idx]->user_config;
+ vport->num_txq = le16_to_cpu(vport_msg->num_tx_q);
+ vport->num_rxq = le16_to_cpu(vport_msg->num_rx_q);
+ /* number of txqs and rxqs in config data will be zeros only in the
+ * driver load path and we dont update them there after
+ */
+ if (!config_data->num_req_tx_qs && !config_data->num_req_rx_qs) {
+ config_data->num_req_tx_qs = le16_to_cpu(vport_msg->num_tx_q);
+ config_data->num_req_rx_qs = le16_to_cpu(vport_msg->num_rx_q);
+ }
+
+ if (idpf_is_queue_model_split(vport->txq_model))
+ vport->num_complq = le16_to_cpu(vport_msg->num_tx_complq);
+ if (idpf_is_queue_model_split(vport->rxq_model))
+ vport->num_bufq = le16_to_cpu(vport_msg->num_rx_bufq);
+
+ /* Adjust number of buffer queues per Rx queue group. */
+ if (!idpf_is_queue_model_split(vport->rxq_model)) {
+ vport->num_bufqs_per_qgrp = 0;
+ vport->bufq_size[0] = IDPF_RX_BUF_2048;
+
+ return;
+ }
+
+ vport->num_bufqs_per_qgrp = IDPF_MAX_BUFQS_PER_RXQ_GRP;
+ /* Bufq[0] default buffer size is 4K
+ * Bufq[1] default buffer size is 2K
+ */
+ vport->bufq_size[0] = IDPF_RX_BUF_4096;
+ vport->bufq_size[1] = IDPF_RX_BUF_2048;
+}
+
+/**
+ * idpf_vport_calc_num_q_desc - Calculate number of queue groups
+ * @vport: vport to calculate q groups for
+ */
+void idpf_vport_calc_num_q_desc(struct idpf_vport *vport)
+{
+ struct idpf_vport_user_config_data *config_data;
+ int num_bufqs = vport->num_bufqs_per_qgrp;
+ u32 num_req_txq_desc, num_req_rxq_desc;
+ u16 idx = vport->idx;
+ int i;
+
+ config_data = &vport->adapter->vport_config[idx]->user_config;
+ num_req_txq_desc = config_data->num_req_txq_desc;
+ num_req_rxq_desc = config_data->num_req_rxq_desc;
+
+ vport->complq_desc_count = 0;
+ if (num_req_txq_desc) {
+ vport->txq_desc_count = num_req_txq_desc;
+ if (idpf_is_queue_model_split(vport->txq_model)) {
+ vport->complq_desc_count = num_req_txq_desc;
+ if (vport->complq_desc_count < IDPF_MIN_TXQ_COMPLQ_DESC)
+ vport->complq_desc_count =
+ IDPF_MIN_TXQ_COMPLQ_DESC;
+ }
+ } else {
+ vport->txq_desc_count = IDPF_DFLT_TX_Q_DESC_COUNT;
+ if (idpf_is_queue_model_split(vport->txq_model))
+ vport->complq_desc_count =
+ IDPF_DFLT_TX_COMPLQ_DESC_COUNT;
+ }
+
+ if (num_req_rxq_desc)
+ vport->rxq_desc_count = num_req_rxq_desc;
+ else
+ vport->rxq_desc_count = IDPF_DFLT_RX_Q_DESC_COUNT;
+
+ for (i = 0; i < num_bufqs; i++) {
+ if (!vport->bufq_desc_count[i])
+ vport->bufq_desc_count[i] =
+ IDPF_RX_BUFQ_DESC_COUNT(vport->rxq_desc_count,
+ num_bufqs);
+ }
+}
+
+/**
+ * idpf_vport_calc_total_qs - Calculate total number of queues
+ * @adapter: private data struct
+ * @vport_idx: vport idx to retrieve vport pointer
+ * @vport_msg: message to fill with data
+ * @max_q: vport max queue info
+ *
+ * Return 0 on success, error value on failure.
+ */
+int idpf_vport_calc_total_qs(struct idpf_adapter *adapter, u16 vport_idx,
+ struct virtchnl2_create_vport *vport_msg,
+ struct idpf_vport_max_q *max_q)
+{
+ int dflt_splitq_txq_grps = 0, dflt_singleq_txqs = 0;
+ int dflt_splitq_rxq_grps = 0, dflt_singleq_rxqs = 0;
+ u16 num_req_tx_qs = 0, num_req_rx_qs = 0;
+ struct idpf_vport_config *vport_config;
+ u16 num_txq_grps, num_rxq_grps;
+ u32 num_qs;
+
+ vport_config = adapter->vport_config[vport_idx];
+ if (vport_config) {
+ num_req_tx_qs = vport_config->user_config.num_req_tx_qs;
+ num_req_rx_qs = vport_config->user_config.num_req_rx_qs;
+ } else {
+ int num_cpus;
+
+ /* Restrict num of queues to cpus online as a default
+ * configuration to give best performance. User can always
+ * override to a max number of queues via ethtool.
+ */
+ num_cpus = num_online_cpus();
+
+ dflt_splitq_txq_grps = min_t(int, max_q->max_txq, num_cpus);
+ dflt_singleq_txqs = min_t(int, max_q->max_txq, num_cpus);
+ dflt_splitq_rxq_grps = min_t(int, max_q->max_rxq, num_cpus);
+ dflt_singleq_rxqs = min_t(int, max_q->max_rxq, num_cpus);
+ }
+
+ if (idpf_is_queue_model_split(le16_to_cpu(vport_msg->txq_model))) {
+ num_txq_grps = num_req_tx_qs ? num_req_tx_qs : dflt_splitq_txq_grps;
+ vport_msg->num_tx_complq = cpu_to_le16(num_txq_grps *
+ IDPF_COMPLQ_PER_GROUP);
+ vport_msg->num_tx_q = cpu_to_le16(num_txq_grps *
+ IDPF_DFLT_SPLITQ_TXQ_PER_GROUP);
+ } else {
+ num_txq_grps = IDPF_DFLT_SINGLEQ_TX_Q_GROUPS;
+ num_qs = num_txq_grps * (num_req_tx_qs ? num_req_tx_qs :
+ dflt_singleq_txqs);
+ vport_msg->num_tx_q = cpu_to_le16(num_qs);
+ vport_msg->num_tx_complq = 0;
+ }
+ if (idpf_is_queue_model_split(le16_to_cpu(vport_msg->rxq_model))) {
+ num_rxq_grps = num_req_rx_qs ? num_req_rx_qs : dflt_splitq_rxq_grps;
+ vport_msg->num_rx_bufq = cpu_to_le16(num_rxq_grps *
+ IDPF_MAX_BUFQS_PER_RXQ_GRP);
+ vport_msg->num_rx_q = cpu_to_le16(num_rxq_grps *
+ IDPF_DFLT_SPLITQ_RXQ_PER_GROUP);
+ } else {
+ num_rxq_grps = IDPF_DFLT_SINGLEQ_RX_Q_GROUPS;
+ num_qs = num_rxq_grps * (num_req_rx_qs ? num_req_rx_qs :
+ dflt_singleq_rxqs);
+ vport_msg->num_rx_q = cpu_to_le16(num_qs);
+ vport_msg->num_rx_bufq = 0;
+ }
+
+ return 0;
+}
+
+/**
+ * idpf_vport_calc_num_q_groups - Calculate number of queue groups
+ * @vport: vport to calculate q groups for
+ */
+void idpf_vport_calc_num_q_groups(struct idpf_vport *vport)
+{
+ if (idpf_is_queue_model_split(vport->txq_model))
+ vport->num_txq_grp = vport->num_txq;
+ else
+ vport->num_txq_grp = IDPF_DFLT_SINGLEQ_TX_Q_GROUPS;
+
+ if (idpf_is_queue_model_split(vport->rxq_model))
+ vport->num_rxq_grp = vport->num_rxq;
+ else
+ vport->num_rxq_grp = IDPF_DFLT_SINGLEQ_RX_Q_GROUPS;
+}
+
+/**
+ * idpf_vport_calc_numq_per_grp - Calculate number of queues per group
+ * @vport: vport to calculate queues for
+ * @num_txq: return parameter for number of TX queues
+ * @num_rxq: return parameter for number of RX queues
+ */
+static void idpf_vport_calc_numq_per_grp(struct idpf_vport *vport,
+ u16 *num_txq, u16 *num_rxq)
+{
+ if (idpf_is_queue_model_split(vport->txq_model))
+ *num_txq = IDPF_DFLT_SPLITQ_TXQ_PER_GROUP;
+ else
+ *num_txq = vport->num_txq;
+
+ if (idpf_is_queue_model_split(vport->rxq_model))
+ *num_rxq = IDPF_DFLT_SPLITQ_RXQ_PER_GROUP;
+ else
+ *num_rxq = vport->num_rxq;
+}
+
+/**
+ * idpf_rxq_set_descids - set the descids supported by this queue
+ * @vport: virtual port data structure
+ * @q: rx queue for which descids are set
+ *
+ */
+static void idpf_rxq_set_descids(struct idpf_vport *vport, struct idpf_queue *q)
+{
+ if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) {
+ q->rxdids = VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M;
+ } else {
+ if (vport->base_rxd)
+ q->rxdids = VIRTCHNL2_RXDID_1_32B_BASE_M;
+ else
+ q->rxdids = VIRTCHNL2_RXDID_2_FLEX_SQ_NIC_M;
+ }
+}
+
+/**
+ * idpf_txq_group_alloc - Allocate all txq group resources
+ * @vport: vport to allocate txq groups for
+ * @num_txq: number of txqs to allocate for each group
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int idpf_txq_group_alloc(struct idpf_vport *vport, u16 num_txq)
+{
+ int err, i;
+
+ vport->txq_grps = kcalloc(vport->num_txq_grp,
+ sizeof(*vport->txq_grps), GFP_KERNEL);
+ if (!vport->txq_grps)
+ return -ENOMEM;
+
+ for (i = 0; i < vport->num_txq_grp; i++) {
+ struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i];
+ struct idpf_adapter *adapter = vport->adapter;
+ int j;
+
+ tx_qgrp->vport = vport;
+ tx_qgrp->num_txq = num_txq;
+
+ for (j = 0; j < tx_qgrp->num_txq; j++) {
+ tx_qgrp->txqs[j] = kzalloc(sizeof(*tx_qgrp->txqs[j]),
+ GFP_KERNEL);
+ if (!tx_qgrp->txqs[j]) {
+ err = -ENOMEM;
+ goto err_alloc;
+ }
+ }
+
+ for (j = 0; j < tx_qgrp->num_txq; j++) {
+ struct idpf_queue *q = tx_qgrp->txqs[j];
+
+ q->dev = &adapter->pdev->dev;
+ q->desc_count = vport->txq_desc_count;
+ q->tx_max_bufs = idpf_get_max_tx_bufs(adapter);
+ q->tx_min_pkt_len = idpf_get_min_tx_pkt_len(adapter);
+ q->vport = vport;
+ q->txq_grp = tx_qgrp;
+ hash_init(q->sched_buf_hash);
+
+ if (!idpf_is_cap_ena(adapter, IDPF_OTHER_CAPS,
+ VIRTCHNL2_CAP_SPLITQ_QSCHED))
+ set_bit(__IDPF_Q_FLOW_SCH_EN, q->flags);
+ }
+
+ if (!idpf_is_queue_model_split(vport->txq_model))
+ continue;
+
+ tx_qgrp->complq = kcalloc(IDPF_COMPLQ_PER_GROUP,
+ sizeof(*tx_qgrp->complq),
+ GFP_KERNEL);
+ if (!tx_qgrp->complq) {
+ err = -ENOMEM;
+ goto err_alloc;
+ }
+
+ tx_qgrp->complq->dev = &adapter->pdev->dev;
+ tx_qgrp->complq->desc_count = vport->complq_desc_count;
+ tx_qgrp->complq->vport = vport;
+ tx_qgrp->complq->txq_grp = tx_qgrp;
+ }
+
+ return 0;
+
+err_alloc:
+ idpf_txq_group_rel(vport);
+
+ return err;
+}
+
+/**
+ * idpf_rxq_group_alloc - Allocate all rxq group resources
+ * @vport: vport to allocate rxq groups for
+ * @num_rxq: number of rxqs to allocate for each group
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int idpf_rxq_group_alloc(struct idpf_vport *vport, u16 num_rxq)
+{
+ struct idpf_adapter *adapter = vport->adapter;
+ struct idpf_queue *q;
+ int i, k, err = 0;
+
+ vport->rxq_grps = kcalloc(vport->num_rxq_grp,
+ sizeof(struct idpf_rxq_group), GFP_KERNEL);
+ if (!vport->rxq_grps)
+ return -ENOMEM;
+
+ for (i = 0; i < vport->num_rxq_grp; i++) {
+ struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i];
+ int j;
+
+ rx_qgrp->vport = vport;
+ if (!idpf_is_queue_model_split(vport->rxq_model)) {
+ rx_qgrp->singleq.num_rxq = num_rxq;
+ for (j = 0; j < num_rxq; j++) {
+ rx_qgrp->singleq.rxqs[j] =
+ kzalloc(sizeof(*rx_qgrp->singleq.rxqs[j]),
+ GFP_KERNEL);
+ if (!rx_qgrp->singleq.rxqs[j]) {
+ err = -ENOMEM;
+ goto err_alloc;
+ }
+ }
+ goto skip_splitq_rx_init;
+ }
+ rx_qgrp->splitq.num_rxq_sets = num_rxq;
+
+ for (j = 0; j < num_rxq; j++) {
+ rx_qgrp->splitq.rxq_sets[j] =
+ kzalloc(sizeof(struct idpf_rxq_set),
+ GFP_KERNEL);
+ if (!rx_qgrp->splitq.rxq_sets[j]) {
+ err = -ENOMEM;
+ goto err_alloc;
+ }
+ }
+
+ rx_qgrp->splitq.bufq_sets = kcalloc(vport->num_bufqs_per_qgrp,
+ sizeof(struct idpf_bufq_set),
+ GFP_KERNEL);
+ if (!rx_qgrp->splitq.bufq_sets) {
+ err = -ENOMEM;
+ goto err_alloc;
+ }
+
+ for (j = 0; j < vport->num_bufqs_per_qgrp; j++) {
+ struct idpf_bufq_set *bufq_set =
+ &rx_qgrp->splitq.bufq_sets[j];
+ int swq_size = sizeof(struct idpf_sw_queue);
+
+ q = &rx_qgrp->splitq.bufq_sets[j].bufq;
+ q->dev = &adapter->pdev->dev;
+ q->desc_count = vport->bufq_desc_count[j];
+ q->vport = vport;
+ q->rxq_grp = rx_qgrp;
+ q->idx = j;
+ q->rx_buf_size = vport->bufq_size[j];
+ q->rx_buffer_low_watermark = IDPF_LOW_WATERMARK;
+ q->rx_buf_stride = IDPF_RX_BUF_STRIDE;
+ if (idpf_is_cap_ena_all(adapter, IDPF_HSPLIT_CAPS,
+ IDPF_CAP_HSPLIT) &&
+ idpf_is_queue_model_split(vport->rxq_model)) {
+ q->rx_hsplit_en = true;
+ q->rx_hbuf_size = IDPF_HDR_BUF_SIZE;
+ }
+
+ bufq_set->num_refillqs = num_rxq;
+ bufq_set->refillqs = kcalloc(num_rxq, swq_size,
+ GFP_KERNEL);
+ if (!bufq_set->refillqs) {
+ err = -ENOMEM;
+ goto err_alloc;
+ }
+ for (k = 0; k < bufq_set->num_refillqs; k++) {
+ struct idpf_sw_queue *refillq =
+ &bufq_set->refillqs[k];
+
+ refillq->dev = &vport->adapter->pdev->dev;
+ refillq->desc_count =
+ vport->bufq_desc_count[j];
+ set_bit(__IDPF_Q_GEN_CHK, refillq->flags);
+ set_bit(__IDPF_RFLQ_GEN_CHK, refillq->flags);
+ refillq->ring = kcalloc(refillq->desc_count,
+ sizeof(u16),
+ GFP_KERNEL);
+ if (!refillq->ring) {
+ err = -ENOMEM;
+ goto err_alloc;
+ }
+ }
+ }
+
+skip_splitq_rx_init:
+ for (j = 0; j < num_rxq; j++) {
+ if (!idpf_is_queue_model_split(vport->rxq_model)) {
+ q = rx_qgrp->singleq.rxqs[j];
+ goto setup_rxq;
+ }
+ q = &rx_qgrp->splitq.rxq_sets[j]->rxq;
+ rx_qgrp->splitq.rxq_sets[j]->refillq0 =
+ &rx_qgrp->splitq.bufq_sets[0].refillqs[j];
+ if (vport->num_bufqs_per_qgrp > IDPF_SINGLE_BUFQ_PER_RXQ_GRP)
+ rx_qgrp->splitq.rxq_sets[j]->refillq1 =
+ &rx_qgrp->splitq.bufq_sets[1].refillqs[j];
+
+ if (idpf_is_cap_ena_all(adapter, IDPF_HSPLIT_CAPS,
+ IDPF_CAP_HSPLIT) &&
+ idpf_is_queue_model_split(vport->rxq_model)) {
+ q->rx_hsplit_en = true;
+ q->rx_hbuf_size = IDPF_HDR_BUF_SIZE;
+ }
+
+setup_rxq:
+ q->dev = &adapter->pdev->dev;
+ q->desc_count = vport->rxq_desc_count;
+ q->vport = vport;
+ q->rxq_grp = rx_qgrp;
+ q->idx = (i * num_rxq) + j;
+ /* In splitq mode, RXQ buffer size should be
+ * set to that of the first buffer queue
+ * associated with this RXQ
+ */
+ q->rx_buf_size = vport->bufq_size[0];
+ q->rx_buffer_low_watermark = IDPF_LOW_WATERMARK;
+ q->rx_max_pkt_size = vport->netdev->mtu +
+ IDPF_PACKET_HDR_PAD;
+ idpf_rxq_set_descids(vport, q);
+ }
+ }
+
+err_alloc:
+ if (err)
+ idpf_rxq_group_rel(vport);
+
+ return err;
+}
+
+/**
+ * idpf_vport_queue_grp_alloc_all - Allocate all queue groups/resources
+ * @vport: vport with qgrps to allocate
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int idpf_vport_queue_grp_alloc_all(struct idpf_vport *vport)
+{
+ u16 num_txq, num_rxq;
+ int err;
+
+ idpf_vport_calc_numq_per_grp(vport, &num_txq, &num_rxq);
+
+ err = idpf_txq_group_alloc(vport, num_txq);
+ if (err)
+ goto err_out;
+
+ err = idpf_rxq_group_alloc(vport, num_rxq);
+ if (err)
+ goto err_out;
+
+ return 0;
+
+err_out:
+ idpf_vport_queue_grp_rel_all(vport);
+
+ return err;
+}
+
+/**
+ * idpf_vport_queues_alloc - Allocate memory for all queues
+ * @vport: virtual port
+ *
+ * Allocate memory for queues associated with a vport. Returns 0 on success,
+ * negative on failure.
+ */
+int idpf_vport_queues_alloc(struct idpf_vport *vport)
+{
+ int err;
+
+ err = idpf_vport_queue_grp_alloc_all(vport);
+ if (err)
+ goto err_out;
+
+ err = idpf_tx_desc_alloc_all(vport);
+ if (err)
+ goto err_out;
+
+ err = idpf_rx_desc_alloc_all(vport);
+ if (err)
+ goto err_out;
+
+ err = idpf_vport_init_fast_path_txqs(vport);
+ if (err)
+ goto err_out;
+
+ return 0;
+
+err_out:
+ idpf_vport_queues_rel(vport);
+
+ return err;
+}
+
+/**
+ * idpf_tx_handle_sw_marker - Handle queue marker packet
+ * @tx_q: tx queue to handle software marker
+ */
+static void idpf_tx_handle_sw_marker(struct idpf_queue *tx_q)
+{
+ struct idpf_vport *vport = tx_q->vport;
+ int i;
+
+ clear_bit(__IDPF_Q_SW_MARKER, tx_q->flags);
+ /* Hardware must write marker packets to all queues associated with
+ * completion queues. So check if all queues received marker packets
+ */
+ for (i = 0; i < vport->num_txq; i++)
+ /* If we're still waiting on any other TXQ marker completions,
+ * just return now since we cannot wake up the marker_wq yet.
+ */
+ if (test_bit(__IDPF_Q_SW_MARKER, vport->txqs[i]->flags))
+ return;
+
+ /* Drain complete */
+ set_bit(IDPF_VPORT_SW_MARKER, vport->flags);
+ wake_up(&vport->sw_marker_wq);
+}
+
+/**
+ * idpf_tx_splitq_clean_hdr - Clean TX buffer resources for header portion of
+ * packet
+ * @tx_q: tx queue to clean buffer from
+ * @tx_buf: buffer to be cleaned
+ * @cleaned: pointer to stats struct to track cleaned packets/bytes
+ * @napi_budget: Used to determine if we are in netpoll
+ */
+static void idpf_tx_splitq_clean_hdr(struct idpf_queue *tx_q,
+ struct idpf_tx_buf *tx_buf,
+ struct idpf_cleaned_stats *cleaned,
+ int napi_budget)
+{
+ napi_consume_skb(tx_buf->skb, napi_budget);
+
+ if (dma_unmap_len(tx_buf, len)) {
+ dma_unmap_single(tx_q->dev,
+ dma_unmap_addr(tx_buf, dma),
+ dma_unmap_len(tx_buf, len),
+ DMA_TO_DEVICE);
+
+ dma_unmap_len_set(tx_buf, len, 0);
+ }
+
+ /* clear tx_buf data */
+ tx_buf->skb = NULL;
+
+ cleaned->bytes += tx_buf->bytecount;
+ cleaned->packets += tx_buf->gso_segs;
+}
+
+/**
+ * idpf_tx_clean_stashed_bufs - clean bufs that were stored for
+ * out of order completions
+ * @txq: queue to clean
+ * @compl_tag: completion tag of packet to clean (from completion descriptor)
+ * @cleaned: pointer to stats struct to track cleaned packets/bytes
+ * @budget: Used to determine if we are in netpoll
+ */
+static void idpf_tx_clean_stashed_bufs(struct idpf_queue *txq, u16 compl_tag,
+ struct idpf_cleaned_stats *cleaned,
+ int budget)
+{
+ struct idpf_tx_stash *stash;
+ struct hlist_node *tmp_buf;
+
+ /* Buffer completion */
+ hash_for_each_possible_safe(txq->sched_buf_hash, stash, tmp_buf,
+ hlist, compl_tag) {
+ if (unlikely(stash->buf.compl_tag != (int)compl_tag))
+ continue;
+
+ if (stash->buf.skb) {
+ idpf_tx_splitq_clean_hdr(txq, &stash->buf, cleaned,
+ budget);
+ } else if (dma_unmap_len(&stash->buf, len)) {
+ dma_unmap_page(txq->dev,
+ dma_unmap_addr(&stash->buf, dma),
+ dma_unmap_len(&stash->buf, len),
+ DMA_TO_DEVICE);
+ dma_unmap_len_set(&stash->buf, len, 0);
+ }
+
+ /* Push shadow buf back onto stack */
+ idpf_buf_lifo_push(&txq->buf_stack, stash);
+
+ hash_del(&stash->hlist);
+ }
+}
+
+/**
+ * idpf_stash_flow_sch_buffers - store buffer parameters info to be freed at a
+ * later time (only relevant for flow scheduling mode)
+ * @txq: Tx queue to clean
+ * @tx_buf: buffer to store
+ */
+static int idpf_stash_flow_sch_buffers(struct idpf_queue *txq,
+ struct idpf_tx_buf *tx_buf)
+{
+ struct idpf_tx_stash *stash;
+
+ if (unlikely(!dma_unmap_addr(tx_buf, dma) &&
+ !dma_unmap_len(tx_buf, len)))
+ return 0;
+
+ stash = idpf_buf_lifo_pop(&txq->buf_stack);
+ if (unlikely(!stash)) {
+ net_err_ratelimited("%s: No out-of-order TX buffers left!\n",
+ txq->vport->netdev->name);
+
+ return -ENOMEM;
+ }
+
+ /* Store buffer params in shadow buffer */
+ stash->buf.skb = tx_buf->skb;
+ stash->buf.bytecount = tx_buf->bytecount;
+ stash->buf.gso_segs = tx_buf->gso_segs;
+ dma_unmap_addr_set(&stash->buf, dma, dma_unmap_addr(tx_buf, dma));
+ dma_unmap_len_set(&stash->buf, len, dma_unmap_len(tx_buf, len));
+ stash->buf.compl_tag = tx_buf->compl_tag;
+
+ /* Add buffer to buf_hash table to be freed later */
+ hash_add(txq->sched_buf_hash, &stash->hlist, stash->buf.compl_tag);
+
+ memset(tx_buf, 0, sizeof(struct idpf_tx_buf));
+
+ /* Reinitialize buf_id portion of tag */
+ tx_buf->compl_tag = IDPF_SPLITQ_TX_INVAL_COMPL_TAG;
+
+ return 0;
+}
+
+#define idpf_tx_splitq_clean_bump_ntc(txq, ntc, desc, buf) \
+do { \
+ (ntc)++; \
+ if (unlikely(!(ntc))) { \
+ ntc -= (txq)->desc_count; \
+ buf = (txq)->tx_buf; \
+ desc = IDPF_FLEX_TX_DESC(txq, 0); \
+ } else { \
+ (buf)++; \
+ (desc)++; \
+ } \
+} while (0)
+
+/**
+ * idpf_tx_splitq_clean - Reclaim resources from buffer queue
+ * @tx_q: Tx queue to clean
+ * @end: queue index until which it should be cleaned
+ * @napi_budget: Used to determine if we are in netpoll
+ * @cleaned: pointer to stats struct to track cleaned packets/bytes
+ * @descs_only: true if queue is using flow-based scheduling and should
+ * not clean buffers at this time
+ *
+ * Cleans the queue descriptor ring. If the queue is using queue-based
+ * scheduling, the buffers will be cleaned as well. If the queue is using
+ * flow-based scheduling, only the descriptors are cleaned at this time.
+ * Separate packet completion events will be reported on the completion queue,
+ * and the buffers will be cleaned separately. The stats are not updated from
+ * this function when using flow-based scheduling.
+ */
+static void idpf_tx_splitq_clean(struct idpf_queue *tx_q, u16 end,
+ int napi_budget,
+ struct idpf_cleaned_stats *cleaned,
+ bool descs_only)
+{
+ union idpf_tx_flex_desc *next_pending_desc = NULL;
+ union idpf_tx_flex_desc *tx_desc;
+ s16 ntc = tx_q->next_to_clean;
+ struct idpf_tx_buf *tx_buf;
+
+ tx_desc = IDPF_FLEX_TX_DESC(tx_q, ntc);
+ next_pending_desc = IDPF_FLEX_TX_DESC(tx_q, end);
+ tx_buf = &tx_q->tx_buf[ntc];
+ ntc -= tx_q->desc_count;
+
+ while (tx_desc != next_pending_desc) {
+ union idpf_tx_flex_desc *eop_desc;
+
+ /* If this entry in the ring was used as a context descriptor,
+ * it's corresponding entry in the buffer ring will have an
+ * invalid completion tag since no buffer was used. We can
+ * skip this descriptor since there is no buffer to clean.
+ */
+ if (unlikely(tx_buf->compl_tag == IDPF_SPLITQ_TX_INVAL_COMPL_TAG))
+ goto fetch_next_txq_desc;
+
+ eop_desc = (union idpf_tx_flex_desc *)tx_buf->next_to_watch;
+
+ /* clear next_to_watch to prevent false hangs */
+ tx_buf->next_to_watch = NULL;
+
+ if (descs_only) {
+ if (idpf_stash_flow_sch_buffers(tx_q, tx_buf))
+ goto tx_splitq_clean_out;
+
+ while (tx_desc != eop_desc) {
+ idpf_tx_splitq_clean_bump_ntc(tx_q, ntc,
+ tx_desc, tx_buf);
+
+ if (dma_unmap_len(tx_buf, len)) {
+ if (idpf_stash_flow_sch_buffers(tx_q,
+ tx_buf))
+ goto tx_splitq_clean_out;
+ }
+ }
+ } else {
+ idpf_tx_splitq_clean_hdr(tx_q, tx_buf, cleaned,
+ napi_budget);
+
+ /* unmap remaining buffers */
+ while (tx_desc != eop_desc) {
+ idpf_tx_splitq_clean_bump_ntc(tx_q, ntc,
+ tx_desc, tx_buf);
+
+ /* unmap any remaining paged data */
+ if (dma_unmap_len(tx_buf, len)) {
+ dma_unmap_page(tx_q->dev,
+ dma_unmap_addr(tx_buf, dma),
+ dma_unmap_len(tx_buf, len),
+ DMA_TO_DEVICE);
+ dma_unmap_len_set(tx_buf, len, 0);
+ }
+ }
+ }
+
+fetch_next_txq_desc:
+ idpf_tx_splitq_clean_bump_ntc(tx_q, ntc, tx_desc, tx_buf);
+ }
+
+tx_splitq_clean_out:
+ ntc += tx_q->desc_count;
+ tx_q->next_to_clean = ntc;
+}
+
+#define idpf_tx_clean_buf_ring_bump_ntc(txq, ntc, buf) \
+do { \
+ (buf)++; \
+ (ntc)++; \
+ if (unlikely((ntc) == (txq)->desc_count)) { \
+ buf = (txq)->tx_buf; \
+ ntc = 0; \
+ } \
+} while (0)
+
+/**
+ * idpf_tx_clean_buf_ring - clean flow scheduling TX queue buffers
+ * @txq: queue to clean
+ * @compl_tag: completion tag of packet to clean (from completion descriptor)
+ * @cleaned: pointer to stats struct to track cleaned packets/bytes
+ * @budget: Used to determine if we are in netpoll
+ *
+ * Cleans all buffers associated with the input completion tag either from the
+ * TX buffer ring or from the hash table if the buffers were previously
+ * stashed. Returns the byte/segment count for the cleaned packet associated
+ * this completion tag.
+ */
+static bool idpf_tx_clean_buf_ring(struct idpf_queue *txq, u16 compl_tag,
+ struct idpf_cleaned_stats *cleaned,
+ int budget)
+{
+ u16 idx = compl_tag & txq->compl_tag_bufid_m;
+ struct idpf_tx_buf *tx_buf = NULL;
+ u16 ntc = txq->next_to_clean;
+ u16 num_descs_cleaned = 0;
+ u16 orig_idx = idx;
+
+ tx_buf = &txq->tx_buf[idx];
+
+ while (tx_buf->compl_tag == (int)compl_tag) {
+ if (tx_buf->skb) {
+ idpf_tx_splitq_clean_hdr(txq, tx_buf, cleaned, budget);
+ } else if (dma_unmap_len(tx_buf, len)) {
+ dma_unmap_page(txq->dev,
+ dma_unmap_addr(tx_buf, dma),
+ dma_unmap_len(tx_buf, len),
+ DMA_TO_DEVICE);
+ dma_unmap_len_set(tx_buf, len, 0);
+ }
+
+ memset(tx_buf, 0, sizeof(struct idpf_tx_buf));
+ tx_buf->compl_tag = IDPF_SPLITQ_TX_INVAL_COMPL_TAG;
+
+ num_descs_cleaned++;
+ idpf_tx_clean_buf_ring_bump_ntc(txq, idx, tx_buf);
+ }
+
+ /* If we didn't clean anything on the ring for this completion, there's
+ * nothing more to do.
+ */
+ if (unlikely(!num_descs_cleaned))
+ return false;
+
+ /* Otherwise, if we did clean a packet on the ring directly, it's safe
+ * to assume that the descriptors starting from the original
+ * next_to_clean up until the previously cleaned packet can be reused.
+ * Therefore, we will go back in the ring and stash any buffers still
+ * in the ring into the hash table to be cleaned later.
+ */
+ tx_buf = &txq->tx_buf[ntc];
+ while (tx_buf != &txq->tx_buf[orig_idx]) {
+ idpf_stash_flow_sch_buffers(txq, tx_buf);
+ idpf_tx_clean_buf_ring_bump_ntc(txq, ntc, tx_buf);
+ }
+
+ /* Finally, update next_to_clean to reflect the work that was just done
+ * on the ring, if any. If the packet was only cleaned from the hash
+ * table, the ring will not be impacted, therefore we should not touch
+ * next_to_clean. The updated idx is used here
+ */
+ txq->next_to_clean = idx;
+
+ return true;
+}
+
+/**
+ * idpf_tx_handle_rs_completion - clean a single packet and all of its buffers
+ * whether on the buffer ring or in the hash table
+ * @txq: Tx ring to clean
+ * @desc: pointer to completion queue descriptor to extract completion
+ * information from
+ * @cleaned: pointer to stats struct to track cleaned packets/bytes
+ * @budget: Used to determine if we are in netpoll
+ *
+ * Returns bytes/packets cleaned
+ */
+static void idpf_tx_handle_rs_completion(struct idpf_queue *txq,
+ struct idpf_splitq_tx_compl_desc *desc,
+ struct idpf_cleaned_stats *cleaned,
+ int budget)
+{
+ u16 compl_tag;
+
+ if (!test_bit(__IDPF_Q_FLOW_SCH_EN, txq->flags)) {
+ u16 head = le16_to_cpu(desc->q_head_compl_tag.q_head);
+
+ return idpf_tx_splitq_clean(txq, head, budget, cleaned, false);
+ }
+
+ compl_tag = le16_to_cpu(desc->q_head_compl_tag.compl_tag);
+
+ /* If we didn't clean anything on the ring, this packet must be
+ * in the hash table. Go clean it there.
+ */
+ if (!idpf_tx_clean_buf_ring(txq, compl_tag, cleaned, budget))
+ idpf_tx_clean_stashed_bufs(txq, compl_tag, cleaned, budget);
+}
+
+/**
+ * idpf_tx_clean_complq - Reclaim resources on completion queue
+ * @complq: Tx ring to clean
+ * @budget: Used to determine if we are in netpoll
+ * @cleaned: returns number of packets cleaned
+ *
+ * Returns true if there's any budget left (e.g. the clean is finished)
+ */
+static bool idpf_tx_clean_complq(struct idpf_queue *complq, int budget,
+ int *cleaned)
+{
+ struct idpf_splitq_tx_compl_desc *tx_desc;
+ struct idpf_vport *vport = complq->vport;
+ s16 ntc = complq->next_to_clean;
+ struct idpf_netdev_priv *np;
+ unsigned int complq_budget;
+ bool complq_ok = true;
+ int i;
+
+ complq_budget = vport->compln_clean_budget;
+ tx_desc = IDPF_SPLITQ_TX_COMPLQ_DESC(complq, ntc);
+ ntc -= complq->desc_count;
+
+ do {
+ struct idpf_cleaned_stats cleaned_stats = { };
+ struct idpf_queue *tx_q;
+ int rel_tx_qid;
+ u16 hw_head;
+ u8 ctype; /* completion type */
+ u16 gen;
+
+ /* if the descriptor isn't done, no work yet to do */
+ gen = (le16_to_cpu(tx_desc->qid_comptype_gen) &
+ IDPF_TXD_COMPLQ_GEN_M) >> IDPF_TXD_COMPLQ_GEN_S;
+ if (test_bit(__IDPF_Q_GEN_CHK, complq->flags) != gen)
+ break;
+
+ /* Find necessary info of TX queue to clean buffers */
+ rel_tx_qid = (le16_to_cpu(tx_desc->qid_comptype_gen) &
+ IDPF_TXD_COMPLQ_QID_M) >> IDPF_TXD_COMPLQ_QID_S;
+ if (rel_tx_qid >= complq->txq_grp->num_txq ||
+ !complq->txq_grp->txqs[rel_tx_qid]) {
+ dev_err(&complq->vport->adapter->pdev->dev,
+ "TxQ not found\n");
+ goto fetch_next_desc;
+ }
+ tx_q = complq->txq_grp->txqs[rel_tx_qid];
+
+ /* Determine completion type */
+ ctype = (le16_to_cpu(tx_desc->qid_comptype_gen) &
+ IDPF_TXD_COMPLQ_COMPL_TYPE_M) >>
+ IDPF_TXD_COMPLQ_COMPL_TYPE_S;
+ switch (ctype) {
+ case IDPF_TXD_COMPLT_RE:
+ hw_head = le16_to_cpu(tx_desc->q_head_compl_tag.q_head);
+
+ idpf_tx_splitq_clean(tx_q, hw_head, budget,
+ &cleaned_stats, true);
+ break;
+ case IDPF_TXD_COMPLT_RS:
+ idpf_tx_handle_rs_completion(tx_q, tx_desc,
+ &cleaned_stats, budget);
+ break;
+ case IDPF_TXD_COMPLT_SW_MARKER:
+ idpf_tx_handle_sw_marker(tx_q);
+ break;
+ default:
+ dev_err(&tx_q->vport->adapter->pdev->dev,
+ "Unknown TX completion type: %d\n",
+ ctype);
+ goto fetch_next_desc;
+ }
+
+ u64_stats_update_begin(&tx_q->stats_sync);
+ u64_stats_add(&tx_q->q_stats.tx.packets, cleaned_stats.packets);
+ u64_stats_add(&tx_q->q_stats.tx.bytes, cleaned_stats.bytes);
+ tx_q->cleaned_pkts += cleaned_stats.packets;
+ tx_q->cleaned_bytes += cleaned_stats.bytes;
+ complq->num_completions++;
+ u64_stats_update_end(&tx_q->stats_sync);
+
+fetch_next_desc:
+ tx_desc++;
+ ntc++;
+ if (unlikely(!ntc)) {
+ ntc -= complq->desc_count;
+ tx_desc = IDPF_SPLITQ_TX_COMPLQ_DESC(complq, 0);
+ change_bit(__IDPF_Q_GEN_CHK, complq->flags);
+ }
+
+ prefetch(tx_desc);
+
+ /* update budget accounting */
+ complq_budget--;
+ } while (likely(complq_budget));
+
+ /* Store the state of the complq to be used later in deciding if a
+ * TXQ can be started again
+ */
+ if (unlikely(IDPF_TX_COMPLQ_PENDING(complq->txq_grp) >
+ IDPF_TX_COMPLQ_OVERFLOW_THRESH(complq)))
+ complq_ok = false;
+
+ np = netdev_priv(complq->vport->netdev);
+ for (i = 0; i < complq->txq_grp->num_txq; ++i) {
+ struct idpf_queue *tx_q = complq->txq_grp->txqs[i];
+ struct netdev_queue *nq;
+ bool dont_wake;
+
+ /* We didn't clean anything on this queue, move along */
+ if (!tx_q->cleaned_bytes)
+ continue;
+
+ *cleaned += tx_q->cleaned_pkts;
+
+ /* Update BQL */
+ nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx);
+
+ dont_wake = !complq_ok || IDPF_TX_BUF_RSV_LOW(tx_q) ||
+ np->state != __IDPF_VPORT_UP ||
+ !netif_carrier_ok(tx_q->vport->netdev);
+ /* Check if the TXQ needs to and can be restarted */
+ __netif_txq_completed_wake(nq, tx_q->cleaned_pkts, tx_q->cleaned_bytes,
+ IDPF_DESC_UNUSED(tx_q), IDPF_TX_WAKE_THRESH,
+ dont_wake);
+
+ /* Reset cleaned stats for the next time this queue is
+ * cleaned
+ */
+ tx_q->cleaned_bytes = 0;
+ tx_q->cleaned_pkts = 0;
+ }
+
+ ntc += complq->desc_count;
+ complq->next_to_clean = ntc;
+
+ return !!complq_budget;
+}
+
+/**
+ * idpf_tx_splitq_build_ctb - populate command tag and size for queue
+ * based scheduling descriptors
+ * @desc: descriptor to populate
+ * @params: pointer to tx params struct
+ * @td_cmd: command to be filled in desc
+ * @size: size of buffer
+ */
+void idpf_tx_splitq_build_ctb(union idpf_tx_flex_desc *desc,
+ struct idpf_tx_splitq_params *params,
+ u16 td_cmd, u16 size)
+{
+ desc->q.qw1.cmd_dtype =
+ cpu_to_le16(params->dtype & IDPF_FLEX_TXD_QW1_DTYPE_M);
+ desc->q.qw1.cmd_dtype |=
+ cpu_to_le16((td_cmd << IDPF_FLEX_TXD_QW1_CMD_S) &
+ IDPF_FLEX_TXD_QW1_CMD_M);
+ desc->q.qw1.buf_size = cpu_to_le16((u16)size);
+ desc->q.qw1.l2tags.l2tag1 = cpu_to_le16(params->td_tag);
+}
+
+/**
+ * idpf_tx_splitq_build_flow_desc - populate command tag and size for flow
+ * scheduling descriptors
+ * @desc: descriptor to populate
+ * @params: pointer to tx params struct
+ * @td_cmd: command to be filled in desc
+ * @size: size of buffer
+ */
+void idpf_tx_splitq_build_flow_desc(union idpf_tx_flex_desc *desc,
+ struct idpf_tx_splitq_params *params,
+ u16 td_cmd, u16 size)
+{
+ desc->flow.qw1.cmd_dtype = (u16)params->dtype | td_cmd;
+ desc->flow.qw1.rxr_bufsize = cpu_to_le16((u16)size);
+ desc->flow.qw1.compl_tag = cpu_to_le16(params->compl_tag);
+}
+
+/**
+ * idpf_tx_maybe_stop_common - 1st level check for common Tx stop conditions
+ * @tx_q: the queue to be checked
+ * @size: number of descriptors we want to assure is available
+ *
+ * Returns 0 if stop is not needed
+ */
+int idpf_tx_maybe_stop_common(struct idpf_queue *tx_q, unsigned int size)
+{
+ struct netdev_queue *nq;
+
+ if (likely(IDPF_DESC_UNUSED(tx_q) >= size))
+ return 0;
+
+ u64_stats_update_begin(&tx_q->stats_sync);
+ u64_stats_inc(&tx_q->q_stats.tx.q_busy);
+ u64_stats_update_end(&tx_q->stats_sync);
+
+ nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx);
+
+ return netif_txq_maybe_stop(nq, IDPF_DESC_UNUSED(tx_q), size, size);
+}
+
+/**
+ * idpf_tx_maybe_stop_splitq - 1st level check for Tx splitq stop conditions
+ * @tx_q: the queue to be checked
+ * @descs_needed: number of descriptors required for this packet
+ *
+ * Returns 0 if stop is not needed
+ */
+static int idpf_tx_maybe_stop_splitq(struct idpf_queue *tx_q,
+ unsigned int descs_needed)
+{
+ if (idpf_tx_maybe_stop_common(tx_q, descs_needed))
+ goto splitq_stop;
+
+ /* If there are too many outstanding completions expected on the
+ * completion queue, stop the TX queue to give the device some time to
+ * catch up
+ */
+ if (unlikely(IDPF_TX_COMPLQ_PENDING(tx_q->txq_grp) >
+ IDPF_TX_COMPLQ_OVERFLOW_THRESH(tx_q->txq_grp->complq)))
+ goto splitq_stop;
+
+ /* Also check for available book keeping buffers; if we are low, stop
+ * the queue to wait for more completions
+ */
+ if (unlikely(IDPF_TX_BUF_RSV_LOW(tx_q)))
+ goto splitq_stop;
+
+ return 0;
+
+splitq_stop:
+ u64_stats_update_begin(&tx_q->stats_sync);
+ u64_stats_inc(&tx_q->q_stats.tx.q_busy);
+ u64_stats_update_end(&tx_q->stats_sync);
+ netif_stop_subqueue(tx_q->vport->netdev, tx_q->idx);
+
+ return -EBUSY;
+}
+
+/**
+ * idpf_tx_buf_hw_update - Store the new tail value
+ * @tx_q: queue to bump
+ * @val: new tail index
+ * @xmit_more: more skb's pending
+ *
+ * The naming here is special in that 'hw' signals that this function is about
+ * to do a register write to update our queue status. We know this can only
+ * mean tail here as HW should be owning head for TX.
+ */
+void idpf_tx_buf_hw_update(struct idpf_queue *tx_q, u32 val,
+ bool xmit_more)
+{
+ struct netdev_queue *nq;
+
+ nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx);
+ tx_q->next_to_use = val;
+
+ idpf_tx_maybe_stop_common(tx_q, IDPF_TX_DESC_NEEDED);
+
+ /* Force memory writes to complete before letting h/w
+ * know there are new descriptors to fetch. (Only
+ * applicable for weak-ordered memory model archs,
+ * such as IA-64).
+ */
+ wmb();
+
+ /* notify HW of packet */
+ if (netif_xmit_stopped(nq) || !xmit_more)
+ writel(val, tx_q->tail);
+}
+
+/**
+ * idpf_tx_desc_count_required - calculate number of Tx descriptors needed
+ * @txq: queue to send buffer on
+ * @skb: send buffer
+ *
+ * Returns number of data descriptors needed for this skb.
+ */
+unsigned int idpf_tx_desc_count_required(struct idpf_queue *txq,
+ struct sk_buff *skb)
+{
+ const struct skb_shared_info *shinfo;
+ unsigned int count = 0, i;
+
+ count += !!skb_headlen(skb);
+
+ if (!skb_is_nonlinear(skb))
+ return count;
+
+ shinfo = skb_shinfo(skb);
+ for (i = 0; i < shinfo->nr_frags; i++) {
+ unsigned int size;
+
+ size = skb_frag_size(&shinfo->frags[i]);
+
+ /* We only need to use the idpf_size_to_txd_count check if the
+ * fragment is going to span multiple descriptors,
+ * i.e. size >= 16K.
+ */
+ if (size >= SZ_16K)
+ count += idpf_size_to_txd_count(size);
+ else
+ count++;
+ }
+
+ if (idpf_chk_linearize(skb, txq->tx_max_bufs, count)) {
+ if (__skb_linearize(skb))
+ return 0;
+
+ count = idpf_size_to_txd_count(skb->len);
+ u64_stats_update_begin(&txq->stats_sync);
+ u64_stats_inc(&txq->q_stats.tx.linearize);
+ u64_stats_update_end(&txq->stats_sync);
+ }
+
+ return count;
+}
+
+/**
+ * idpf_tx_dma_map_error - handle TX DMA map errors
+ * @txq: queue to send buffer on
+ * @skb: send buffer
+ * @first: original first buffer info buffer for packet
+ * @idx: starting point on ring to unwind
+ */
+void idpf_tx_dma_map_error(struct idpf_queue *txq, struct sk_buff *skb,
+ struct idpf_tx_buf *first, u16 idx)
+{
+ u64_stats_update_begin(&txq->stats_sync);
+ u64_stats_inc(&txq->q_stats.tx.dma_map_errs);
+ u64_stats_update_end(&txq->stats_sync);
+
+ /* clear dma mappings for failed tx_buf map */
+ for (;;) {
+ struct idpf_tx_buf *tx_buf;
+
+ tx_buf = &txq->tx_buf[idx];
+ idpf_tx_buf_rel(txq, tx_buf);
+ if (tx_buf == first)
+ break;
+ if (idx == 0)
+ idx = txq->desc_count;
+ idx--;
+ }
+
+ if (skb_is_gso(skb)) {
+ union idpf_tx_flex_desc *tx_desc;
+
+ /* If we failed a DMA mapping for a TSO packet, we will have
+ * used one additional descriptor for a context
+ * descriptor. Reset that here.
+ */
+ tx_desc = IDPF_FLEX_TX_DESC(txq, idx);
+ memset(tx_desc, 0, sizeof(struct idpf_flex_tx_ctx_desc));
+ if (idx == 0)
+ idx = txq->desc_count;
+ idx--;
+ }
+
+ /* Update tail in case netdev_xmit_more was previously true */
+ idpf_tx_buf_hw_update(txq, idx, false);
+}
+
+/**
+ * idpf_tx_splitq_bump_ntu - adjust NTU and generation
+ * @txq: the tx ring to wrap
+ * @ntu: ring index to bump
+ */
+static unsigned int idpf_tx_splitq_bump_ntu(struct idpf_queue *txq, u16 ntu)
+{
+ ntu++;
+
+ if (ntu == txq->desc_count) {
+ ntu = 0;
+ txq->compl_tag_cur_gen = IDPF_TX_ADJ_COMPL_TAG_GEN(txq);
+ }
+
+ return ntu;
+}
+
+/**
+ * idpf_tx_splitq_map - Build the Tx flex descriptor
+ * @tx_q: queue to send buffer on
+ * @params: pointer to splitq params struct
+ * @first: first buffer info buffer to use
+ *
+ * This function loops over the skb data pointed to by *first
+ * and gets a physical address for each memory location and programs
+ * it and the length into the transmit flex descriptor.
+ */
+static void idpf_tx_splitq_map(struct idpf_queue *tx_q,
+ struct idpf_tx_splitq_params *params,
+ struct idpf_tx_buf *first)
+{
+ union idpf_tx_flex_desc *tx_desc;
+ unsigned int data_len, size;
+ struct idpf_tx_buf *tx_buf;
+ u16 i = tx_q->next_to_use;
+ struct netdev_queue *nq;
+ struct sk_buff *skb;
+ skb_frag_t *frag;
+ u16 td_cmd = 0;
+ dma_addr_t dma;
+
+ skb = first->skb;
+
+ td_cmd = params->offload.td_cmd;
+
+ data_len = skb->data_len;
+ size = skb_headlen(skb);
+
+ tx_desc = IDPF_FLEX_TX_DESC(tx_q, i);
+
+ dma = dma_map_single(tx_q->dev, skb->data, size, DMA_TO_DEVICE);
+
+ tx_buf = first;
+
+ params->compl_tag =
+ (tx_q->compl_tag_cur_gen << tx_q->compl_tag_gen_s) | i;
+
+ for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
+ unsigned int max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED;
+
+ if (dma_mapping_error(tx_q->dev, dma))
+ return idpf_tx_dma_map_error(tx_q, skb, first, i);
+
+ tx_buf->compl_tag = params->compl_tag;
+
+ /* record length, and DMA address */
+ dma_unmap_len_set(tx_buf, len, size);
+ dma_unmap_addr_set(tx_buf, dma, dma);
+
+ /* buf_addr is in same location for both desc types */
+ tx_desc->q.buf_addr = cpu_to_le64(dma);
+
+ /* The stack can send us fragments that are too large for a
+ * single descriptor i.e. frag size > 16K-1. We will need to
+ * split the fragment across multiple descriptors in this case.
+ * To adhere to HW alignment restrictions, the fragment needs
+ * to be split such that the first chunk ends on a 4K boundary
+ * and all subsequent chunks start on a 4K boundary. We still
+ * want to send as much data as possible though, so our
+ * intermediate descriptor chunk size will be 12K.
+ *
+ * For example, consider a 32K fragment mapped to DMA addr 2600.
+ * ------------------------------------------------------------
+ * | frag_size = 32K |
+ * ------------------------------------------------------------
+ * |2600 |16384 |28672
+ *
+ * 3 descriptors will be used for this fragment. The HW expects
+ * the descriptors to contain the following:
+ * ------------------------------------------------------------
+ * | size = 13784 | size = 12K | size = 6696 |
+ * | dma = 2600 | dma = 16384 | dma = 28672 |
+ * ------------------------------------------------------------
+ *
+ * We need to first adjust the max_data for the first chunk so
+ * that it ends on a 4K boundary. By negating the value of the
+ * DMA address and taking only the low order bits, we're
+ * effectively calculating
+ * 4K - (DMA addr lower order bits) =
+ * bytes to next boundary.
+ *
+ * Add that to our base aligned max_data (12K) and we have
+ * our first chunk size. In the example above,
+ * 13784 = 12K + (4096-2600)
+ *
+ * After guaranteeing the first chunk ends on a 4K boundary, we
+ * will give the intermediate descriptors 12K chunks and
+ * whatever is left to the final descriptor. This ensures that
+ * all descriptors used for the remaining chunks of the
+ * fragment start on a 4K boundary and we use as few
+ * descriptors as possible.
+ */
+ max_data += -dma & (IDPF_TX_MAX_READ_REQ_SIZE - 1);
+ while (unlikely(size > IDPF_TX_MAX_DESC_DATA)) {
+ idpf_tx_splitq_build_desc(tx_desc, params, td_cmd,
+ max_data);
+
+ tx_desc++;
+ i++;
+
+ if (i == tx_q->desc_count) {
+ tx_desc = IDPF_FLEX_TX_DESC(tx_q, 0);
+ i = 0;
+ tx_q->compl_tag_cur_gen =
+ IDPF_TX_ADJ_COMPL_TAG_GEN(tx_q);
+ }
+
+ /* Since this packet has a buffer that is going to span
+ * multiple descriptors, it's going to leave holes in
+ * to the TX buffer ring. To ensure these holes do not
+ * cause issues in the cleaning routines, we will clear
+ * them of any stale data and assign them the same
+ * completion tag as the current packet. Then when the
+ * packet is being cleaned, the cleaning routines will
+ * simply pass over these holes and finish cleaning the
+ * rest of the packet.
+ */
+ memset(&tx_q->tx_buf[i], 0, sizeof(struct idpf_tx_buf));
+ tx_q->tx_buf[i].compl_tag = params->compl_tag;
+
+ /* Adjust the DMA offset and the remaining size of the
+ * fragment. On the first iteration of this loop,
+ * max_data will be >= 12K and <= 16K-1. On any
+ * subsequent iteration of this loop, max_data will
+ * always be 12K.
+ */
+ dma += max_data;
+ size -= max_data;
+
+ /* Reset max_data since remaining chunks will be 12K
+ * at most
+ */
+ max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED;
+
+ /* buf_addr is in same location for both desc types */
+ tx_desc->q.buf_addr = cpu_to_le64(dma);
+ }
+
+ if (!data_len)
+ break;
+
+ idpf_tx_splitq_build_desc(tx_desc, params, td_cmd, size);
+ tx_desc++;
+ i++;
+
+ if (i == tx_q->desc_count) {
+ tx_desc = IDPF_FLEX_TX_DESC(tx_q, 0);
+ i = 0;
+ tx_q->compl_tag_cur_gen = IDPF_TX_ADJ_COMPL_TAG_GEN(tx_q);
+ }
+
+ size = skb_frag_size(frag);
+ data_len -= size;
+
+ dma = skb_frag_dma_map(tx_q->dev, frag, 0, size,
+ DMA_TO_DEVICE);
+
+ tx_buf = &tx_q->tx_buf[i];
+ }
+
+ /* record SW timestamp if HW timestamp is not available */
+ skb_tx_timestamp(skb);
+
+ /* write last descriptor with RS and EOP bits */
+ td_cmd |= params->eop_cmd;
+ idpf_tx_splitq_build_desc(tx_desc, params, td_cmd, size);
+ i = idpf_tx_splitq_bump_ntu(tx_q, i);
+
+ /* set next_to_watch value indicating a packet is present */
+ first->next_to_watch = tx_desc;
+
+ tx_q->txq_grp->num_completions_pending++;
+
+ /* record bytecount for BQL */
+ nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx);
+ netdev_tx_sent_queue(nq, first->bytecount);
+
+ idpf_tx_buf_hw_update(tx_q, i, netdev_xmit_more());
+}
+
+/**
+ * idpf_tso - computes mss and TSO length to prepare for TSO
+ * @skb: pointer to skb
+ * @off: pointer to struct that holds offload parameters
+ *
+ * Returns error (negative) if TSO was requested but cannot be applied to the
+ * given skb, 0 if TSO does not apply to the given skb, or 1 otherwise.
+ */
+int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off)
+{
+ const struct skb_shared_info *shinfo = skb_shinfo(skb);
+ union {
+ struct iphdr *v4;
+ struct ipv6hdr *v6;
+ unsigned char *hdr;
+ } ip;
+ union {
+ struct tcphdr *tcp;
+ struct udphdr *udp;
+ unsigned char *hdr;
+ } l4;
+ u32 paylen, l4_start;
+ int err;
+
+ if (!shinfo->gso_size)
+ return 0;
+
+ err = skb_cow_head(skb, 0);
+ if (err < 0)
+ return err;
+
+ ip.hdr = skb_network_header(skb);
+ l4.hdr = skb_transport_header(skb);
+
+ /* initialize outer IP header fields */
+ if (ip.v4->version == 4) {
+ ip.v4->tot_len = 0;
+ ip.v4->check = 0;
+ } else if (ip.v6->version == 6) {
+ ip.v6->payload_len = 0;
+ }
+
+ l4_start = skb_transport_offset(skb);
+
+ /* remove payload length from checksum */
+ paylen = skb->len - l4_start;
+
+ switch (shinfo->gso_type & ~SKB_GSO_DODGY) {
+ case SKB_GSO_TCPV4:
+ case SKB_GSO_TCPV6:
+ csum_replace_by_diff(&l4.tcp->check,
+ (__force __wsum)htonl(paylen));
+ off->tso_hdr_len = __tcp_hdrlen(l4.tcp) + l4_start;
+ break;
+ case SKB_GSO_UDP_L4:
+ csum_replace_by_diff(&l4.udp->check,
+ (__force __wsum)htonl(paylen));
+ /* compute length of segmentation header */
+ off->tso_hdr_len = sizeof(struct udphdr) + l4_start;
+ l4.udp->len = htons(shinfo->gso_size + sizeof(struct udphdr));
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ off->tso_len = skb->len - off->tso_hdr_len;
+ off->mss = shinfo->gso_size;
+ off->tso_segs = shinfo->gso_segs;
+
+ off->tx_flags |= IDPF_TX_FLAGS_TSO;
+
+ return 1;
+}
+
+/**
+ * __idpf_chk_linearize - Check skb is not using too many buffers
+ * @skb: send buffer
+ * @max_bufs: maximum number of buffers
+ *
+ * For TSO we need to count the TSO header and segment payload separately. As
+ * such we need to check cases where we have max_bufs-1 fragments or more as we
+ * can potentially require max_bufs+1 DMA transactions, 1 for the TSO header, 1
+ * for the segment payload in the first descriptor, and another max_buf-1 for
+ * the fragments.
+ */
+static bool __idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs)
+{
+ const struct skb_shared_info *shinfo = skb_shinfo(skb);
+ const skb_frag_t *frag, *stale;
+ int nr_frags, sum;
+
+ /* no need to check if number of frags is less than max_bufs - 1 */
+ nr_frags = shinfo->nr_frags;
+ if (nr_frags < (max_bufs - 1))
+ return false;
+
+ /* We need to walk through the list and validate that each group
+ * of max_bufs-2 fragments totals at least gso_size.
+ */
+ nr_frags -= max_bufs - 2;
+ frag = &shinfo->frags[0];
+
+ /* Initialize size to the negative value of gso_size minus 1. We use
+ * this as the worst case scenario in which the frag ahead of us only
+ * provides one byte which is why we are limited to max_bufs-2
+ * descriptors for a single transmit as the header and previous
+ * fragment are already consuming 2 descriptors.
+ */
+ sum = 1 - shinfo->gso_size;
+
+ /* Add size of frags 0 through 4 to create our initial sum */
+ sum += skb_frag_size(frag++);
+ sum += skb_frag_size(frag++);
+ sum += skb_frag_size(frag++);
+ sum += skb_frag_size(frag++);
+ sum += skb_frag_size(frag++);
+
+ /* Walk through fragments adding latest fragment, testing it, and
+ * then removing stale fragments from the sum.
+ */
+ for (stale = &shinfo->frags[0];; stale++) {
+ int stale_size = skb_frag_size(stale);
+
+ sum += skb_frag_size(frag++);
+
+ /* The stale fragment may present us with a smaller
+ * descriptor than the actual fragment size. To account
+ * for that we need to remove all the data on the front and
+ * figure out what the remainder would be in the last
+ * descriptor associated with the fragment.
+ */
+ if (stale_size > IDPF_TX_MAX_DESC_DATA) {
+ int align_pad = -(skb_frag_off(stale)) &
+ (IDPF_TX_MAX_READ_REQ_SIZE - 1);
+
+ sum -= align_pad;
+ stale_size -= align_pad;
+
+ do {
+ sum -= IDPF_TX_MAX_DESC_DATA_ALIGNED;
+ stale_size -= IDPF_TX_MAX_DESC_DATA_ALIGNED;
+ } while (stale_size > IDPF_TX_MAX_DESC_DATA);
+ }
+
+ /* if sum is negative we failed to make sufficient progress */
+ if (sum < 0)
+ return true;
+
+ if (!nr_frags--)
+ break;
+
+ sum -= stale_size;
+ }
+
+ return false;
+}
+
+/**
+ * idpf_chk_linearize - Check if skb exceeds max descriptors per packet
+ * @skb: send buffer
+ * @max_bufs: maximum scatter gather buffers for single packet
+ * @count: number of buffers this packet needs
+ *
+ * Make sure we don't exceed maximum scatter gather buffers for a single
+ * packet. We have to do some special checking around the boundary (max_bufs-1)
+ * if TSO is on since we need count the TSO header and payload separately.
+ * E.g.: a packet with 7 fragments can require 9 DMA transactions; 1 for TSO
+ * header, 1 for segment payload, and then 7 for the fragments.
+ */
+bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs,
+ unsigned int count)
+{
+ if (likely(count < max_bufs))
+ return false;
+ if (skb_is_gso(skb))
+ return __idpf_chk_linearize(skb, max_bufs);
+
+ return count > max_bufs;
+}
+
+/**
+ * idpf_tx_splitq_get_ctx_desc - grab next desc and update buffer ring
+ * @txq: queue to put context descriptor on
+ *
+ * Since the TX buffer rings mimics the descriptor ring, update the tx buffer
+ * ring entry to reflect that this index is a context descriptor
+ */
+static struct idpf_flex_tx_ctx_desc *
+idpf_tx_splitq_get_ctx_desc(struct idpf_queue *txq)
+{
+ struct idpf_flex_tx_ctx_desc *desc;
+ int i = txq->next_to_use;
+
+ memset(&txq->tx_buf[i], 0, sizeof(struct idpf_tx_buf));
+ txq->tx_buf[i].compl_tag = IDPF_SPLITQ_TX_INVAL_COMPL_TAG;
+
+ /* grab the next descriptor */
+ desc = IDPF_FLEX_TX_CTX_DESC(txq, i);
+ txq->next_to_use = idpf_tx_splitq_bump_ntu(txq, i);
+
+ return desc;
+}
+
+/**
+ * idpf_tx_drop_skb - free the SKB and bump tail if necessary
+ * @tx_q: queue to send buffer on
+ * @skb: pointer to skb
+ */
+netdev_tx_t idpf_tx_drop_skb(struct idpf_queue *tx_q, struct sk_buff *skb)
+{
+ u64_stats_update_begin(&tx_q->stats_sync);
+ u64_stats_inc(&tx_q->q_stats.tx.skb_drops);
+ u64_stats_update_end(&tx_q->stats_sync);
+
+ idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false);
+
+ dev_kfree_skb(skb);
+
+ return NETDEV_TX_OK;
+}
+
+/**
+ * idpf_tx_splitq_frame - Sends buffer on Tx ring using flex descriptors
+ * @skb: send buffer
+ * @tx_q: queue to send buffer on
+ *
+ * Returns NETDEV_TX_OK if sent, else an error code
+ */
+static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb,
+ struct idpf_queue *tx_q)
+{
+ struct idpf_tx_splitq_params tx_params = { };
+ struct idpf_tx_buf *first;
+ unsigned int count;
+ int tso;
+
+ count = idpf_tx_desc_count_required(tx_q, skb);
+ if (unlikely(!count))
+ return idpf_tx_drop_skb(tx_q, skb);
+
+ tso = idpf_tso(skb, &tx_params.offload);
+ if (unlikely(tso < 0))
+ return idpf_tx_drop_skb(tx_q, skb);
+
+ /* Check for splitq specific TX resources */
+ count += (IDPF_TX_DESCS_PER_CACHE_LINE + tso);
+ if (idpf_tx_maybe_stop_splitq(tx_q, count)) {
+ idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false);
+
+ return NETDEV_TX_BUSY;
+ }
+
+ if (tso) {
+ /* If tso is needed, set up context desc */
+ struct idpf_flex_tx_ctx_desc *ctx_desc =
+ idpf_tx_splitq_get_ctx_desc(tx_q);
+
+ ctx_desc->tso.qw1.cmd_dtype =
+ cpu_to_le16(IDPF_TX_DESC_DTYPE_FLEX_TSO_CTX |
+ IDPF_TX_FLEX_CTX_DESC_CMD_TSO);
+ ctx_desc->tso.qw0.flex_tlen =
+ cpu_to_le32(tx_params.offload.tso_len &
+ IDPF_TXD_FLEX_CTX_TLEN_M);
+ ctx_desc->tso.qw0.mss_rt =
+ cpu_to_le16(tx_params.offload.mss &
+ IDPF_TXD_FLEX_CTX_MSS_RT_M);
+ ctx_desc->tso.qw0.hdr_len = tx_params.offload.tso_hdr_len;
+
+ u64_stats_update_begin(&tx_q->stats_sync);
+ u64_stats_inc(&tx_q->q_stats.tx.lso_pkts);
+ u64_stats_update_end(&tx_q->stats_sync);
+ }
+
+ /* record the location of the first descriptor for this packet */
+ first = &tx_q->tx_buf[tx_q->next_to_use];
+ first->skb = skb;
+
+ if (tso) {
+ first->gso_segs = tx_params.offload.tso_segs;
+ first->bytecount = skb->len +
+ ((first->gso_segs - 1) * tx_params.offload.tso_hdr_len);
+ } else {
+ first->gso_segs = 1;
+ first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN);
+ }
+
+ if (test_bit(__IDPF_Q_FLOW_SCH_EN, tx_q->flags)) {
+ tx_params.dtype = IDPF_TX_DESC_DTYPE_FLEX_FLOW_SCHE;
+ tx_params.eop_cmd = IDPF_TXD_FLEX_FLOW_CMD_EOP;
+ /* Set the RE bit to catch any packets that may have not been
+ * stashed during RS completion cleaning. MIN_GAP is set to
+ * MIN_RING size to ensure it will be set at least once each
+ * time around the ring.
+ */
+ if (!(tx_q->next_to_use % IDPF_TX_SPLITQ_RE_MIN_GAP)) {
+ tx_params.eop_cmd |= IDPF_TXD_FLEX_FLOW_CMD_RE;
+ tx_q->txq_grp->num_completions_pending++;
+ }
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL)
+ tx_params.offload.td_cmd |= IDPF_TXD_FLEX_FLOW_CMD_CS_EN;
+
+ } else {
+ tx_params.dtype = IDPF_TX_DESC_DTYPE_FLEX_L2TAG1_L2TAG2;
+ tx_params.eop_cmd = IDPF_TXD_LAST_DESC_CMD;
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL)
+ tx_params.offload.td_cmd |= IDPF_TX_FLEX_DESC_CMD_CS_EN;
+ }
+
+ idpf_tx_splitq_map(tx_q, &tx_params, first);
+
+ return NETDEV_TX_OK;
+}
+
+/**
+ * idpf_tx_splitq_start - Selects the right Tx queue to send buffer
+ * @skb: send buffer
+ * @netdev: network interface device structure
+ *
+ * Returns NETDEV_TX_OK if sent, else an error code
+ */
+netdev_tx_t idpf_tx_splitq_start(struct sk_buff *skb,
+ struct net_device *netdev)
+{
+ struct idpf_vport *vport = idpf_netdev_to_vport(netdev);
+ struct idpf_queue *tx_q;
+
+ if (unlikely(skb_get_queue_mapping(skb) >= vport->num_txq)) {
+ dev_kfree_skb_any(skb);
+
+ return NETDEV_TX_OK;
+ }
+
+ tx_q = vport->txqs[skb_get_queue_mapping(skb)];
+
+ /* hardware can't handle really short frames, hardware padding works
+ * beyond this point
+ */
+ if (skb_put_padto(skb, tx_q->tx_min_pkt_len)) {
+ idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false);
+
+ return NETDEV_TX_OK;
+ }
+
+ return idpf_tx_splitq_frame(skb, tx_q);
+}
+
+/**
+ * idpf_ptype_to_htype - get a hash type
+ * @decoded: Decoded Rx packet type related fields
+ *
+ * Returns appropriate hash type (such as PKT_HASH_TYPE_L2/L3/L4) to be used by
+ * skb_set_hash based on PTYPE as parsed by HW Rx pipeline and is part of
+ * Rx desc.
+ */
+enum pkt_hash_types idpf_ptype_to_htype(const struct idpf_rx_ptype_decoded *decoded)
+{
+ if (!decoded->known)
+ return PKT_HASH_TYPE_NONE;
+ if (decoded->payload_layer == IDPF_RX_PTYPE_PAYLOAD_LAYER_PAY2 &&
+ decoded->inner_prot)
+ return PKT_HASH_TYPE_L4;
+ if (decoded->payload_layer == IDPF_RX_PTYPE_PAYLOAD_LAYER_PAY2 &&
+ decoded->outer_ip)
+ return PKT_HASH_TYPE_L3;
+ if (decoded->outer_ip == IDPF_RX_PTYPE_OUTER_L2)
+ return PKT_HASH_TYPE_L2;
+
+ return PKT_HASH_TYPE_NONE;
+}
+
+/**
+ * idpf_rx_hash - set the hash value in the skb
+ * @rxq: Rx descriptor ring packet is being transacted on
+ * @skb: pointer to current skb being populated
+ * @rx_desc: Receive descriptor
+ * @decoded: Decoded Rx packet type related fields
+ */
+static void idpf_rx_hash(struct idpf_queue *rxq, struct sk_buff *skb,
+ struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc,
+ struct idpf_rx_ptype_decoded *decoded)
+{
+ u32 hash;
+
+ if (unlikely(!idpf_is_feature_ena(rxq->vport, NETIF_F_RXHASH)))
+ return;
+
+ hash = le16_to_cpu(rx_desc->hash1) |
+ (rx_desc->ff2_mirrid_hash2.hash2 << 16) |
+ (rx_desc->hash3 << 24);
+
+ skb_set_hash(skb, hash, idpf_ptype_to_htype(decoded));
+}
+
+/**
+ * idpf_rx_csum - Indicate in skb if checksum is good
+ * @rxq: Rx descriptor ring packet is being transacted on
+ * @skb: pointer to current skb being populated
+ * @csum_bits: checksum fields extracted from the descriptor
+ * @decoded: Decoded Rx packet type related fields
+ *
+ * skb->protocol must be set before this function is called
+ */
+static void idpf_rx_csum(struct idpf_queue *rxq, struct sk_buff *skb,
+ struct idpf_rx_csum_decoded *csum_bits,
+ struct idpf_rx_ptype_decoded *decoded)
+{
+ bool ipv4, ipv6;
+
+ /* check if Rx checksum is enabled */
+ if (unlikely(!idpf_is_feature_ena(rxq->vport, NETIF_F_RXCSUM)))
+ return;
+
+ /* check if HW has decoded the packet and checksum */
+ if (!(csum_bits->l3l4p))
+ return;
+
+ ipv4 = IDPF_RX_PTYPE_TO_IPV(decoded, IDPF_RX_PTYPE_OUTER_IPV4);
+ ipv6 = IDPF_RX_PTYPE_TO_IPV(decoded, IDPF_RX_PTYPE_OUTER_IPV6);
+
+ if (ipv4 && (csum_bits->ipe || csum_bits->eipe))
+ goto checksum_fail;
+
+ if (ipv6 && csum_bits->ipv6exadd)
+ return;
+
+ /* check for L4 errors and handle packets that were not able to be
+ * checksummed
+ */
+ if (csum_bits->l4e)
+ goto checksum_fail;
+
+ /* Only report checksum unnecessary for ICMP, TCP, UDP, or SCTP */
+ switch (decoded->inner_prot) {
+ case IDPF_RX_PTYPE_INNER_PROT_ICMP:
+ case IDPF_RX_PTYPE_INNER_PROT_TCP:
+ case IDPF_RX_PTYPE_INNER_PROT_UDP:
+ if (!csum_bits->raw_csum_inv) {
+ u16 csum = csum_bits->raw_csum;
+
+ skb->csum = csum_unfold((__force __sum16)~swab16(csum));
+ skb->ip_summed = CHECKSUM_COMPLETE;
+ } else {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ }
+ break;
+ case IDPF_RX_PTYPE_INNER_PROT_SCTP:
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ break;
+ default:
+ break;
+ }
+
+ return;
+
+checksum_fail:
+ u64_stats_update_begin(&rxq->stats_sync);
+ u64_stats_inc(&rxq->q_stats.rx.hw_csum_err);
+ u64_stats_update_end(&rxq->stats_sync);
+}
+
+/**
+ * idpf_rx_splitq_extract_csum_bits - Extract checksum bits from descriptor
+ * @rx_desc: receive descriptor
+ * @csum: structure to extract checksum fields
+ *
+ **/
+static void idpf_rx_splitq_extract_csum_bits(struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc,
+ struct idpf_rx_csum_decoded *csum)
+{
+ u8 qword0, qword1;
+
+ qword0 = rx_desc->status_err0_qw0;
+ qword1 = rx_desc->status_err0_qw1;
+
+ csum->ipe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_IPE_M,
+ qword1);
+ csum->eipe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EIPE_M,
+ qword1);
+ csum->l4e = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_L4E_M,
+ qword1);
+ csum->l3l4p = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_L3L4P_M,
+ qword1);
+ csum->ipv6exadd = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_IPV6EXADD_M,
+ qword0);
+ csum->raw_csum_inv = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_RAW_CSUM_INV_M,
+ le16_to_cpu(rx_desc->ptype_err_fflags0));
+ csum->raw_csum = le16_to_cpu(rx_desc->misc.raw_cs);
+}
+
+/**
+ * idpf_rx_rsc - Set the RSC fields in the skb
+ * @rxq : Rx descriptor ring packet is being transacted on
+ * @skb : pointer to current skb being populated
+ * @rx_desc: Receive descriptor
+ * @decoded: Decoded Rx packet type related fields
+ *
+ * Return 0 on success and error code on failure
+ *
+ * Populate the skb fields with the total number of RSC segments, RSC payload
+ * length and packet type.
+ */
+static int idpf_rx_rsc(struct idpf_queue *rxq, struct sk_buff *skb,
+ struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc,
+ struct idpf_rx_ptype_decoded *decoded)
+{
+ u16 rsc_segments, rsc_seg_len;
+ bool ipv4, ipv6;
+ int len;
+
+ if (unlikely(!decoded->outer_ip))
+ return -EINVAL;
+
+ rsc_seg_len = le16_to_cpu(rx_desc->misc.rscseglen);
+ if (unlikely(!rsc_seg_len))
+ return -EINVAL;
+
+ ipv4 = IDPF_RX_PTYPE_TO_IPV(decoded, IDPF_RX_PTYPE_OUTER_IPV4);
+ ipv6 = IDPF_RX_PTYPE_TO_IPV(decoded, IDPF_RX_PTYPE_OUTER_IPV6);
+
+ if (unlikely(!(ipv4 ^ ipv6)))
+ return -EINVAL;
+
+ rsc_segments = DIV_ROUND_UP(skb->data_len, rsc_seg_len);
+ if (unlikely(rsc_segments == 1))
+ return 0;
+
+ NAPI_GRO_CB(skb)->count = rsc_segments;
+ skb_shinfo(skb)->gso_size = rsc_seg_len;
+
+ skb_reset_network_header(skb);
+ len = skb->len - skb_transport_offset(skb);
+
+ if (ipv4) {
+ struct iphdr *ipv4h = ip_hdr(skb);
+
+ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+
+ /* Reset and set transport header offset in skb */
+ skb_set_transport_header(skb, sizeof(struct iphdr));
+
+ /* Compute the TCP pseudo header checksum*/
+ tcp_hdr(skb)->check =
+ ~tcp_v4_check(len, ipv4h->saddr, ipv4h->daddr, 0);
+ } else {
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+
+ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
+ skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+ tcp_hdr(skb)->check =
+ ~tcp_v6_check(len, &ipv6h->saddr, &ipv6h->daddr, 0);
+ }
+
+ tcp_gro_complete(skb);
+
+ u64_stats_update_begin(&rxq->stats_sync);
+ u64_stats_inc(&rxq->q_stats.rx.rsc_pkts);
+ u64_stats_update_end(&rxq->stats_sync);
+
+ return 0;
+}
+
+/**
+ * idpf_rx_process_skb_fields - Populate skb header fields from Rx descriptor
+ * @rxq: Rx descriptor ring packet is being transacted on
+ * @skb: pointer to current skb being populated
+ * @rx_desc: Receive descriptor
+ *
+ * This function checks the ring, descriptor, and packet information in
+ * order to populate the hash, checksum, protocol, and
+ * other fields within the skb.
+ */
+static int idpf_rx_process_skb_fields(struct idpf_queue *rxq,
+ struct sk_buff *skb,
+ struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc)
+{
+ struct idpf_rx_csum_decoded csum_bits = { };
+ struct idpf_rx_ptype_decoded decoded;
+ u16 rx_ptype;
+
+ rx_ptype = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_M,
+ le16_to_cpu(rx_desc->ptype_err_fflags0));
+
+ decoded = rxq->vport->rx_ptype_lkup[rx_ptype];
+ /* If we don't know the ptype we can't do anything else with it. Just
+ * pass it up the stack as-is.
+ */
+ if (!decoded.known)
+ return 0;
+
+ /* process RSS/hash */
+ idpf_rx_hash(rxq, skb, rx_desc, &decoded);
+
+ skb->protocol = eth_type_trans(skb, rxq->vport->netdev);
+
+ if (FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_M,
+ le16_to_cpu(rx_desc->hdrlen_flags)))
+ return idpf_rx_rsc(rxq, skb, rx_desc, &decoded);
+
+ idpf_rx_splitq_extract_csum_bits(rx_desc, &csum_bits);
+ idpf_rx_csum(rxq, skb, &csum_bits, &decoded);
+
+ return 0;
+}
+
+/**
+ * idpf_rx_add_frag - Add contents of Rx buffer to sk_buff as a frag
+ * @rx_buf: buffer containing page to add
+ * @skb: sk_buff to place the data into
+ * @size: packet length from rx_desc
+ *
+ * This function will add the data contained in rx_buf->page to the skb.
+ * It will just attach the page as a frag to the skb.
+ * The function will then update the page offset.
+ */
+void idpf_rx_add_frag(struct idpf_rx_buf *rx_buf, struct sk_buff *skb,
+ unsigned int size)
+{
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buf->page,
+ rx_buf->page_offset, size, rx_buf->truesize);
+
+ rx_buf->page = NULL;
+}
+
+/**
+ * idpf_rx_construct_skb - Allocate skb and populate it
+ * @rxq: Rx descriptor queue
+ * @rx_buf: Rx buffer to pull data from
+ * @size: the length of the packet
+ *
+ * This function allocates an skb. It then populates it with the page
+ * data from the current receive descriptor, taking care to set up the
+ * skb correctly.
+ */
+struct sk_buff *idpf_rx_construct_skb(struct idpf_queue *rxq,
+ struct idpf_rx_buf *rx_buf,
+ unsigned int size)
+{
+ unsigned int headlen;
+ struct sk_buff *skb;
+ void *va;
+
+ va = page_address(rx_buf->page) + rx_buf->page_offset;
+
+ /* prefetch first cache line of first page */
+ net_prefetch(va);
+ /* allocate a skb to store the frags */
+ skb = __napi_alloc_skb(&rxq->q_vector->napi, IDPF_RX_HDR_SIZE,
+ GFP_ATOMIC);
+ if (unlikely(!skb)) {
+ idpf_rx_put_page(rx_buf);
+
+ return NULL;
+ }
+
+ skb_record_rx_queue(skb, rxq->idx);
+ skb_mark_for_recycle(skb);
+
+ /* Determine available headroom for copy */
+ headlen = size;
+ if (headlen > IDPF_RX_HDR_SIZE)
+ headlen = eth_get_headlen(skb->dev, va, IDPF_RX_HDR_SIZE);
+
+ /* align pull length to size of long to optimize memcpy performance */
+ memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
+
+ /* if we exhaust the linear part then add what is left as a frag */
+ size -= headlen;
+ if (!size) {
+ idpf_rx_put_page(rx_buf);
+
+ return skb;
+ }
+
+ skb_add_rx_frag(skb, 0, rx_buf->page, rx_buf->page_offset + headlen,
+ size, rx_buf->truesize);
+
+ /* Since we're giving the page to the stack, clear our reference to it.
+ * We'll get a new one during buffer posting.
+ */
+ rx_buf->page = NULL;
+
+ return skb;
+}
+
+/**
+ * idpf_rx_hdr_construct_skb - Allocate skb and populate it from header buffer
+ * @rxq: Rx descriptor queue
+ * @va: Rx buffer to pull data from
+ * @size: the length of the packet
+ *
+ * This function allocates an skb. It then populates it with the page data from
+ * the current receive descriptor, taking care to set up the skb correctly.
+ * This specifically uses a header buffer to start building the skb.
+ */
+static struct sk_buff *idpf_rx_hdr_construct_skb(struct idpf_queue *rxq,
+ const void *va,
+ unsigned int size)
+{
+ struct sk_buff *skb;
+
+ /* allocate a skb to store the frags */
+ skb = __napi_alloc_skb(&rxq->q_vector->napi, size, GFP_ATOMIC);
+ if (unlikely(!skb))
+ return NULL;
+
+ skb_record_rx_queue(skb, rxq->idx);
+
+ memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
+
+ /* More than likely, a payload fragment, which will use a page from
+ * page_pool will be added to the SKB so mark it for recycle
+ * preemptively. And if not, it's inconsequential.
+ */
+ skb_mark_for_recycle(skb);
+
+ return skb;
+}
+
+/**
+ * idpf_rx_splitq_test_staterr - tests bits in Rx descriptor
+ * status and error fields
+ * @stat_err_field: field from descriptor to test bits in
+ * @stat_err_bits: value to mask
+ *
+ */
+static bool idpf_rx_splitq_test_staterr(const u8 stat_err_field,
+ const u8 stat_err_bits)
+{
+ return !!(stat_err_field & stat_err_bits);
+}
+
+/**
+ * idpf_rx_splitq_is_eop - process handling of EOP buffers
+ * @rx_desc: Rx descriptor for current buffer
+ *
+ * If the buffer is an EOP buffer, this function exits returning true,
+ * otherwise return false indicating that this is in fact a non-EOP buffer.
+ */
+static bool idpf_rx_splitq_is_eop(struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc)
+{
+ /* if we are the last buffer then there is nothing else to do */
+ return likely(idpf_rx_splitq_test_staterr(rx_desc->status_err0_qw1,
+ IDPF_RXD_EOF_SPLITQ));
+}
+
+/**
+ * idpf_rx_splitq_clean - Clean completed descriptors from Rx queue
+ * @rxq: Rx descriptor queue to retrieve receive buffer queue
+ * @budget: Total limit on number of packets to process
+ *
+ * This function provides a "bounce buffer" approach to Rx interrupt
+ * processing. The advantage to this is that on systems that have
+ * expensive overhead for IOMMU access this provides a means of avoiding
+ * it by maintaining the mapping of the page to the system.
+ *
+ * Returns amount of work completed
+ */
+static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget)
+{
+ int total_rx_bytes = 0, total_rx_pkts = 0;
+ struct idpf_queue *rx_bufq = NULL;
+ struct sk_buff *skb = rxq->skb;
+ u16 ntc = rxq->next_to_clean;
+
+ /* Process Rx packets bounded by budget */
+ while (likely(total_rx_pkts < budget)) {
+ struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc;
+ struct idpf_sw_queue *refillq = NULL;
+ struct idpf_rxq_set *rxq_set = NULL;
+ struct idpf_rx_buf *rx_buf = NULL;
+ union virtchnl2_rx_desc *desc;
+ unsigned int pkt_len = 0;
+ unsigned int hdr_len = 0;
+ u16 gen_id, buf_id = 0;
+ /* Header buffer overflow only valid for header split */
+ bool hbo = false;
+ int bufq_id;
+ u8 rxdid;
+
+ /* get the Rx desc from Rx queue based on 'next_to_clean' */
+ desc = IDPF_RX_DESC(rxq, ntc);
+ rx_desc = (struct virtchnl2_rx_flex_desc_adv_nic_3 *)desc;
+
+ /* This memory barrier is needed to keep us from reading
+ * any other fields out of the rx_desc
+ */
+ dma_rmb();
+
+ /* if the descriptor isn't done, no work yet to do */
+ gen_id = le16_to_cpu(rx_desc->pktlen_gen_bufq_id);
+ gen_id = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_M, gen_id);
+
+ if (test_bit(__IDPF_Q_GEN_CHK, rxq->flags) != gen_id)
+ break;
+
+ rxdid = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_RXDID_M,
+ rx_desc->rxdid_ucast);
+ if (rxdid != VIRTCHNL2_RXDID_2_FLEX_SPLITQ) {
+ IDPF_RX_BUMP_NTC(rxq, ntc);
+ u64_stats_update_begin(&rxq->stats_sync);
+ u64_stats_inc(&rxq->q_stats.rx.bad_descs);
+ u64_stats_update_end(&rxq->stats_sync);
+ continue;
+ }
+
+ pkt_len = le16_to_cpu(rx_desc->pktlen_gen_bufq_id);
+ pkt_len = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_PBUF_M,
+ pkt_len);
+
+ hbo = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_HBO_M,
+ rx_desc->status_err0_qw1);
+
+ if (unlikely(hbo)) {
+ /* If a header buffer overflow, occurs, i.e. header is
+ * too large to fit in the header split buffer, HW will
+ * put the entire packet, including headers, in the
+ * data/payload buffer.
+ */
+ u64_stats_update_begin(&rxq->stats_sync);
+ u64_stats_inc(&rxq->q_stats.rx.hsplit_buf_ovf);
+ u64_stats_update_end(&rxq->stats_sync);
+ goto bypass_hsplit;
+ }
+
+ hdr_len = le16_to_cpu(rx_desc->hdrlen_flags);
+ hdr_len = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_HDR_M,
+ hdr_len);
+
+bypass_hsplit:
+ bufq_id = le16_to_cpu(rx_desc->pktlen_gen_bufq_id);
+ bufq_id = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_M,
+ bufq_id);
+
+ rxq_set = container_of(rxq, struct idpf_rxq_set, rxq);
+ if (!bufq_id)
+ refillq = rxq_set->refillq0;
+ else
+ refillq = rxq_set->refillq1;
+
+ /* retrieve buffer from the rxq */
+ rx_bufq = &rxq->rxq_grp->splitq.bufq_sets[bufq_id].bufq;
+
+ buf_id = le16_to_cpu(rx_desc->buf_id);
+
+ rx_buf = &rx_bufq->rx_buf.buf[buf_id];
+
+ if (hdr_len) {
+ const void *va = (u8 *)rx_bufq->rx_buf.hdr_buf_va +
+ (u32)buf_id * IDPF_HDR_BUF_SIZE;
+
+ skb = idpf_rx_hdr_construct_skb(rxq, va, hdr_len);
+ u64_stats_update_begin(&rxq->stats_sync);
+ u64_stats_inc(&rxq->q_stats.rx.hsplit_pkts);
+ u64_stats_update_end(&rxq->stats_sync);
+ }
+
+ if (pkt_len) {
+ idpf_rx_sync_for_cpu(rx_buf, pkt_len);
+ if (skb)
+ idpf_rx_add_frag(rx_buf, skb, pkt_len);
+ else
+ skb = idpf_rx_construct_skb(rxq, rx_buf,
+ pkt_len);
+ } else {
+ idpf_rx_put_page(rx_buf);
+ }
+
+ /* exit if we failed to retrieve a buffer */
+ if (!skb)
+ break;
+
+ idpf_rx_post_buf_refill(refillq, buf_id);
+
+ IDPF_RX_BUMP_NTC(rxq, ntc);
+ /* skip if it is non EOP desc */
+ if (!idpf_rx_splitq_is_eop(rx_desc))
+ continue;
+
+ /* pad skb if needed (to make valid ethernet frame) */
+ if (eth_skb_pad(skb)) {
+ skb = NULL;
+ continue;
+ }
+
+ /* probably a little skewed due to removing CRC */
+ total_rx_bytes += skb->len;
+
+ /* protocol */
+ if (unlikely(idpf_rx_process_skb_fields(rxq, skb, rx_desc))) {
+ dev_kfree_skb_any(skb);
+ skb = NULL;
+ continue;
+ }
+
+ /* send completed skb up the stack */
+ napi_gro_receive(&rxq->q_vector->napi, skb);
+ skb = NULL;
+
+ /* update budget accounting */
+ total_rx_pkts++;
+ }
+
+ rxq->next_to_clean = ntc;
+
+ rxq->skb = skb;
+ u64_stats_update_begin(&rxq->stats_sync);
+ u64_stats_add(&rxq->q_stats.rx.packets, total_rx_pkts);
+ u64_stats_add(&rxq->q_stats.rx.bytes, total_rx_bytes);
+ u64_stats_update_end(&rxq->stats_sync);
+
+ /* guarantee a trip back through this routine if there was a failure */
+ return total_rx_pkts;
+}
+
+/**
+ * idpf_rx_update_bufq_desc - Update buffer queue descriptor
+ * @bufq: Pointer to the buffer queue
+ * @refill_desc: SW Refill queue descriptor containing buffer ID
+ * @buf_desc: Buffer queue descriptor
+ *
+ * Return 0 on success and negative on failure.
+ */
+static int idpf_rx_update_bufq_desc(struct idpf_queue *bufq, u16 refill_desc,
+ struct virtchnl2_splitq_rx_buf_desc *buf_desc)
+{
+ struct idpf_rx_buf *buf;
+ dma_addr_t addr;
+ u16 buf_id;
+
+ buf_id = FIELD_GET(IDPF_RX_BI_BUFID_M, refill_desc);
+
+ buf = &bufq->rx_buf.buf[buf_id];
+
+ addr = idpf_alloc_page(bufq->pp, buf, bufq->rx_buf_size);
+ if (unlikely(addr == DMA_MAPPING_ERROR))
+ return -ENOMEM;
+
+ buf_desc->pkt_addr = cpu_to_le64(addr);
+ buf_desc->qword0.buf_id = cpu_to_le16(buf_id);
+
+ if (!bufq->rx_hsplit_en)
+ return 0;
+
+ buf_desc->hdr_addr = cpu_to_le64(bufq->rx_buf.hdr_buf_pa +
+ (u32)buf_id * IDPF_HDR_BUF_SIZE);
+
+ return 0;
+}
+
+/**
+ * idpf_rx_clean_refillq - Clean refill queue buffers
+ * @bufq: buffer queue to post buffers back to
+ * @refillq: refill queue to clean
+ *
+ * This function takes care of the buffer refill management
+ */
+static void idpf_rx_clean_refillq(struct idpf_queue *bufq,
+ struct idpf_sw_queue *refillq)
+{
+ struct virtchnl2_splitq_rx_buf_desc *buf_desc;
+ u16 bufq_nta = bufq->next_to_alloc;
+ u16 ntc = refillq->next_to_clean;
+ int cleaned = 0;
+ u16 gen;
+
+ buf_desc = IDPF_SPLITQ_RX_BUF_DESC(bufq, bufq_nta);
+
+ /* make sure we stop at ring wrap in the unlikely case ring is full */
+ while (likely(cleaned < refillq->desc_count)) {
+ u16 refill_desc = IDPF_SPLITQ_RX_BI_DESC(refillq, ntc);
+ bool failure;
+
+ gen = FIELD_GET(IDPF_RX_BI_GEN_M, refill_desc);
+ if (test_bit(__IDPF_RFLQ_GEN_CHK, refillq->flags) != gen)
+ break;
+
+ failure = idpf_rx_update_bufq_desc(bufq, refill_desc,
+ buf_desc);
+ if (failure)
+ break;
+
+ if (unlikely(++ntc == refillq->desc_count)) {
+ change_bit(__IDPF_RFLQ_GEN_CHK, refillq->flags);
+ ntc = 0;
+ }
+
+ if (unlikely(++bufq_nta == bufq->desc_count)) {
+ buf_desc = IDPF_SPLITQ_RX_BUF_DESC(bufq, 0);
+ bufq_nta = 0;
+ } else {
+ buf_desc++;
+ }
+
+ cleaned++;
+ }
+
+ if (!cleaned)
+ return;
+
+ /* We want to limit how many transactions on the bus we trigger with
+ * tail writes so we only do it in strides. It's also important we
+ * align the write to a multiple of 8 as required by HW.
+ */
+ if (((bufq->next_to_use <= bufq_nta ? 0 : bufq->desc_count) +
+ bufq_nta - bufq->next_to_use) >= IDPF_RX_BUF_POST_STRIDE)
+ idpf_rx_buf_hw_update(bufq, ALIGN_DOWN(bufq_nta,
+ IDPF_RX_BUF_POST_STRIDE));
+
+ /* update next to alloc since we have filled the ring */
+ refillq->next_to_clean = ntc;
+ bufq->next_to_alloc = bufq_nta;
+}
+
+/**
+ * idpf_rx_clean_refillq_all - Clean all refill queues
+ * @bufq: buffer queue with refill queues
+ *
+ * Iterates through all refill queues assigned to the buffer queue assigned to
+ * this vector. Returns true if clean is complete within budget, false
+ * otherwise.
+ */
+static void idpf_rx_clean_refillq_all(struct idpf_queue *bufq)
+{
+ struct idpf_bufq_set *bufq_set;
+ int i;
+
+ bufq_set = container_of(bufq, struct idpf_bufq_set, bufq);
+ for (i = 0; i < bufq_set->num_refillqs; i++)
+ idpf_rx_clean_refillq(bufq, &bufq_set->refillqs[i]);
+}
+
+/**
+ * idpf_vport_intr_clean_queues - MSIX mode Interrupt Handler
+ * @irq: interrupt number
+ * @data: pointer to a q_vector
+ *
+ */
+static irqreturn_t idpf_vport_intr_clean_queues(int __always_unused irq,
+ void *data)
+{
+ struct idpf_q_vector *q_vector = (struct idpf_q_vector *)data;
+
+ q_vector->total_events++;
+ napi_schedule(&q_vector->napi);
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * idpf_vport_intr_napi_del_all - Unregister napi for all q_vectors in vport
+ * @vport: virtual port structure
+ *
+ */
+static void idpf_vport_intr_napi_del_all(struct idpf_vport *vport)
+{
+ u16 v_idx;
+
+ for (v_idx = 0; v_idx < vport->num_q_vectors; v_idx++)
+ netif_napi_del(&vport->q_vectors[v_idx].napi);
+}
+
+/**
+ * idpf_vport_intr_napi_dis_all - Disable NAPI for all q_vectors in the vport
+ * @vport: main vport structure
+ */
+static void idpf_vport_intr_napi_dis_all(struct idpf_vport *vport)
+{
+ int v_idx;
+
+ for (v_idx = 0; v_idx < vport->num_q_vectors; v_idx++)
+ napi_disable(&vport->q_vectors[v_idx].napi);
+}
+
+/**
+ * idpf_vport_intr_rel - Free memory allocated for interrupt vectors
+ * @vport: virtual port
+ *
+ * Free the memory allocated for interrupt vectors associated to a vport
+ */
+void idpf_vport_intr_rel(struct idpf_vport *vport)
+{
+ int i, j, v_idx;
+
+ for (v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) {
+ struct idpf_q_vector *q_vector = &vport->q_vectors[v_idx];
+
+ kfree(q_vector->bufq);
+ q_vector->bufq = NULL;
+ kfree(q_vector->tx);
+ q_vector->tx = NULL;
+ kfree(q_vector->rx);
+ q_vector->rx = NULL;
+ }
+
+ /* Clean up the mapping of queues to vectors */
+ for (i = 0; i < vport->num_rxq_grp; i++) {
+ struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i];
+
+ if (idpf_is_queue_model_split(vport->rxq_model))
+ for (j = 0; j < rx_qgrp->splitq.num_rxq_sets; j++)
+ rx_qgrp->splitq.rxq_sets[j]->rxq.q_vector = NULL;
+ else
+ for (j = 0; j < rx_qgrp->singleq.num_rxq; j++)
+ rx_qgrp->singleq.rxqs[j]->q_vector = NULL;
+ }
+
+ if (idpf_is_queue_model_split(vport->txq_model))
+ for (i = 0; i < vport->num_txq_grp; i++)
+ vport->txq_grps[i].complq->q_vector = NULL;
+ else
+ for (i = 0; i < vport->num_txq_grp; i++)
+ for (j = 0; j < vport->txq_grps[i].num_txq; j++)
+ vport->txq_grps[i].txqs[j]->q_vector = NULL;
+
+ kfree(vport->q_vectors);
+ vport->q_vectors = NULL;
+}
+
+/**
+ * idpf_vport_intr_rel_irq - Free the IRQ association with the OS
+ * @vport: main vport structure
+ */
+static void idpf_vport_intr_rel_irq(struct idpf_vport *vport)
+{
+ struct idpf_adapter *adapter = vport->adapter;
+ int vector;
+
+ for (vector = 0; vector < vport->num_q_vectors; vector++) {
+ struct idpf_q_vector *q_vector = &vport->q_vectors[vector];
+ int irq_num, vidx;
+
+ /* free only the irqs that were actually requested */
+ if (!q_vector)
+ continue;
+
+ vidx = vport->q_vector_idxs[vector];
+ irq_num = adapter->msix_entries[vidx].vector;
+
+ /* clear the affinity_mask in the IRQ descriptor */
+ irq_set_affinity_hint(irq_num, NULL);
+ free_irq(irq_num, q_vector);
+ }
+}
+
+/**
+ * idpf_vport_intr_dis_irq_all - Disable all interrupt
+ * @vport: main vport structure
+ */
+static void idpf_vport_intr_dis_irq_all(struct idpf_vport *vport)
+{
+ struct idpf_q_vector *q_vector = vport->q_vectors;
+ int q_idx;
+
+ for (q_idx = 0; q_idx < vport->num_q_vectors; q_idx++)
+ writel(0, q_vector[q_idx].intr_reg.dyn_ctl);
+}
+
+/**
+ * idpf_vport_intr_buildreg_itr - Enable default interrupt generation settings
+ * @q_vector: pointer to q_vector
+ * @type: itr index
+ * @itr: itr value
+ */
+static u32 idpf_vport_intr_buildreg_itr(struct idpf_q_vector *q_vector,
+ const int type, u16 itr)
+{
+ u32 itr_val;
+
+ itr &= IDPF_ITR_MASK;
+ /* Don't clear PBA because that can cause lost interrupts that
+ * came in while we were cleaning/polling
+ */
+ itr_val = q_vector->intr_reg.dyn_ctl_intena_m |
+ (type << q_vector->intr_reg.dyn_ctl_itridx_s) |
+ (itr << (q_vector->intr_reg.dyn_ctl_intrvl_s - 1));
+
+ return itr_val;
+}
+
+/**
+ * idpf_update_dim_sample - Update dim sample with packets and bytes
+ * @q_vector: the vector associated with the interrupt
+ * @dim_sample: dim sample to update
+ * @dim: dim instance structure
+ * @packets: total packets
+ * @bytes: total bytes
+ *
+ * Update the dim sample with the packets and bytes which are passed to this
+ * function. Set the dim state appropriately if the dim settings gets stale.
+ */
+static void idpf_update_dim_sample(struct idpf_q_vector *q_vector,
+ struct dim_sample *dim_sample,
+ struct dim *dim, u64 packets, u64 bytes)
+{
+ dim_update_sample(q_vector->total_events, packets, bytes, dim_sample);
+ dim_sample->comp_ctr = 0;
+
+ /* if dim settings get stale, like when not updated for 1 second or
+ * longer, force it to start again. This addresses the frequent case
+ * of an idle queue being switched to by the scheduler.
+ */
+ if (ktime_ms_delta(dim_sample->time, dim->start_sample.time) >= HZ)
+ dim->state = DIM_START_MEASURE;
+}
+
+/**
+ * idpf_net_dim - Update net DIM algorithm
+ * @q_vector: the vector associated with the interrupt
+ *
+ * Create a DIM sample and notify net_dim() so that it can possibly decide
+ * a new ITR value based on incoming packets, bytes, and interrupts.
+ *
+ * This function is a no-op if the queue is not configured to dynamic ITR.
+ */
+static void idpf_net_dim(struct idpf_q_vector *q_vector)
+{
+ struct dim_sample dim_sample = { };
+ u64 packets, bytes;
+ u32 i;
+
+ if (!IDPF_ITR_IS_DYNAMIC(q_vector->tx_intr_mode))
+ goto check_rx_itr;
+
+ for (i = 0, packets = 0, bytes = 0; i < q_vector->num_txq; i++) {
+ struct idpf_queue *txq = q_vector->tx[i];
+ unsigned int start;
+
+ do {
+ start = u64_stats_fetch_begin(&txq->stats_sync);
+ packets += u64_stats_read(&txq->q_stats.tx.packets);
+ bytes += u64_stats_read(&txq->q_stats.tx.bytes);
+ } while (u64_stats_fetch_retry(&txq->stats_sync, start));
+ }
+
+ idpf_update_dim_sample(q_vector, &dim_sample, &q_vector->tx_dim,
+ packets, bytes);
+ net_dim(&q_vector->tx_dim, dim_sample);
+
+check_rx_itr:
+ if (!IDPF_ITR_IS_DYNAMIC(q_vector->rx_intr_mode))
+ return;
+
+ for (i = 0, packets = 0, bytes = 0; i < q_vector->num_rxq; i++) {
+ struct idpf_queue *rxq = q_vector->rx[i];
+ unsigned int start;
+
+ do {
+ start = u64_stats_fetch_begin(&rxq->stats_sync);
+ packets += u64_stats_read(&rxq->q_stats.rx.packets);
+ bytes += u64_stats_read(&rxq->q_stats.rx.bytes);
+ } while (u64_stats_fetch_retry(&rxq->stats_sync, start));
+ }
+
+ idpf_update_dim_sample(q_vector, &dim_sample, &q_vector->rx_dim,
+ packets, bytes);
+ net_dim(&q_vector->rx_dim, dim_sample);
+}
+
+/**
+ * idpf_vport_intr_update_itr_ena_irq - Update itr and re-enable MSIX interrupt
+ * @q_vector: q_vector for which itr is being updated and interrupt enabled
+ *
+ * Update the net_dim() algorithm and re-enable the interrupt associated with
+ * this vector.
+ */
+void idpf_vport_intr_update_itr_ena_irq(struct idpf_q_vector *q_vector)
+{
+ u32 intval;
+
+ /* net_dim() updates ITR out-of-band using a work item */
+ idpf_net_dim(q_vector);
+
+ intval = idpf_vport_intr_buildreg_itr(q_vector,
+ IDPF_NO_ITR_UPDATE_IDX, 0);
+
+ writel(intval, q_vector->intr_reg.dyn_ctl);
+}
+
+/**
+ * idpf_vport_intr_req_irq - get MSI-X vectors from the OS for the vport
+ * @vport: main vport structure
+ * @basename: name for the vector
+ */
+static int idpf_vport_intr_req_irq(struct idpf_vport *vport, char *basename)
+{
+ struct idpf_adapter *adapter = vport->adapter;
+ int vector, err, irq_num, vidx;
+ const char *vec_name;
+
+ for (vector = 0; vector < vport->num_q_vectors; vector++) {
+ struct idpf_q_vector *q_vector = &vport->q_vectors[vector];
+
+ vidx = vport->q_vector_idxs[vector];
+ irq_num = adapter->msix_entries[vidx].vector;
+
+ if (q_vector->num_rxq && q_vector->num_txq)
+ vec_name = "TxRx";
+ else if (q_vector->num_rxq)
+ vec_name = "Rx";
+ else if (q_vector->num_txq)
+ vec_name = "Tx";
+ else
+ continue;
+
+ q_vector->name = kasprintf(GFP_KERNEL, "%s-%s-%d",
+ basename, vec_name, vidx);
+
+ err = request_irq(irq_num, idpf_vport_intr_clean_queues, 0,
+ q_vector->name, q_vector);
+ if (err) {
+ netdev_err(vport->netdev,
+ "Request_irq failed, error: %d\n", err);
+ goto free_q_irqs;
+ }
+ /* assign the mask for this irq */
+ irq_set_affinity_hint(irq_num, &q_vector->affinity_mask);
+ }
+
+ return 0;
+
+free_q_irqs:
+ while (--vector >= 0) {
+ vidx = vport->q_vector_idxs[vector];
+ irq_num = adapter->msix_entries[vidx].vector;
+ free_irq(irq_num, &vport->q_vectors[vector]);
+ }
+
+ return err;
+}
+
+/**
+ * idpf_vport_intr_write_itr - Write ITR value to the ITR register
+ * @q_vector: q_vector structure
+ * @itr: Interrupt throttling rate
+ * @tx: Tx or Rx ITR
+ */
+void idpf_vport_intr_write_itr(struct idpf_q_vector *q_vector, u16 itr, bool tx)
+{
+ struct idpf_intr_reg *intr_reg;
+
+ if (tx && !q_vector->tx)
+ return;
+ else if (!tx && !q_vector->rx)
+ return;
+
+ intr_reg = &q_vector->intr_reg;
+ writel(ITR_REG_ALIGN(itr) >> IDPF_ITR_GRAN_S,
+ tx ? intr_reg->tx_itr : intr_reg->rx_itr);
+}
+
+/**
+ * idpf_vport_intr_ena_irq_all - Enable IRQ for the given vport
+ * @vport: main vport structure
+ */
+static void idpf_vport_intr_ena_irq_all(struct idpf_vport *vport)
+{
+ bool dynamic;
+ int q_idx;
+ u16 itr;
+
+ for (q_idx = 0; q_idx < vport->num_q_vectors; q_idx++) {
+ struct idpf_q_vector *qv = &vport->q_vectors[q_idx];
+
+ /* Set the initial ITR values */
+ if (qv->num_txq) {
+ dynamic = IDPF_ITR_IS_DYNAMIC(qv->tx_intr_mode);
+ itr = vport->tx_itr_profile[qv->tx_dim.profile_ix];
+ idpf_vport_intr_write_itr(qv, dynamic ?
+ itr : qv->tx_itr_value,
+ true);
+ }
+
+ if (qv->num_rxq) {
+ dynamic = IDPF_ITR_IS_DYNAMIC(qv->rx_intr_mode);
+ itr = vport->rx_itr_profile[qv->rx_dim.profile_ix];
+ idpf_vport_intr_write_itr(qv, dynamic ?
+ itr : qv->rx_itr_value,
+ false);
+ }
+
+ if (qv->num_txq || qv->num_rxq)
+ idpf_vport_intr_update_itr_ena_irq(qv);
+ }
+}
+
+/**
+ * idpf_vport_intr_deinit - Release all vector associations for the vport
+ * @vport: main vport structure
+ */
+void idpf_vport_intr_deinit(struct idpf_vport *vport)
+{
+ idpf_vport_intr_napi_dis_all(vport);
+ idpf_vport_intr_napi_del_all(vport);
+ idpf_vport_intr_dis_irq_all(vport);
+ idpf_vport_intr_rel_irq(vport);
+}
+
+/**
+ * idpf_tx_dim_work - Call back from the stack
+ * @work: work queue structure
+ */
+static void idpf_tx_dim_work(struct work_struct *work)
+{
+ struct idpf_q_vector *q_vector;
+ struct idpf_vport *vport;
+ struct dim *dim;
+ u16 itr;
+
+ dim = container_of(work, struct dim, work);
+ q_vector = container_of(dim, struct idpf_q_vector, tx_dim);
+ vport = q_vector->vport;
+
+ if (dim->profile_ix >= ARRAY_SIZE(vport->tx_itr_profile))
+ dim->profile_ix = ARRAY_SIZE(vport->tx_itr_profile) - 1;
+
+ /* look up the values in our local table */
+ itr = vport->tx_itr_profile[dim->profile_ix];
+
+ idpf_vport_intr_write_itr(q_vector, itr, true);
+
+ dim->state = DIM_START_MEASURE;
+}
+
+/**
+ * idpf_rx_dim_work - Call back from the stack
+ * @work: work queue structure
+ */
+static void idpf_rx_dim_work(struct work_struct *work)
+{
+ struct idpf_q_vector *q_vector;
+ struct idpf_vport *vport;
+ struct dim *dim;
+ u16 itr;
+
+ dim = container_of(work, struct dim, work);
+ q_vector = container_of(dim, struct idpf_q_vector, rx_dim);
+ vport = q_vector->vport;
+
+ if (dim->profile_ix >= ARRAY_SIZE(vport->rx_itr_profile))
+ dim->profile_ix = ARRAY_SIZE(vport->rx_itr_profile) - 1;
+
+ /* look up the values in our local table */
+ itr = vport->rx_itr_profile[dim->profile_ix];
+
+ idpf_vport_intr_write_itr(q_vector, itr, false);
+
+ dim->state = DIM_START_MEASURE;
+}
+
+/**
+ * idpf_init_dim - Set up dynamic interrupt moderation
+ * @qv: q_vector structure
+ */
+static void idpf_init_dim(struct idpf_q_vector *qv)
+{
+ INIT_WORK(&qv->tx_dim.work, idpf_tx_dim_work);
+ qv->tx_dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+ qv->tx_dim.profile_ix = IDPF_DIM_DEFAULT_PROFILE_IX;
+
+ INIT_WORK(&qv->rx_dim.work, idpf_rx_dim_work);
+ qv->rx_dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+ qv->rx_dim.profile_ix = IDPF_DIM_DEFAULT_PROFILE_IX;
+}
+
+/**
+ * idpf_vport_intr_napi_ena_all - Enable NAPI for all q_vectors in the vport
+ * @vport: main vport structure
+ */
+static void idpf_vport_intr_napi_ena_all(struct idpf_vport *vport)
+{
+ int q_idx;
+
+ for (q_idx = 0; q_idx < vport->num_q_vectors; q_idx++) {
+ struct idpf_q_vector *q_vector = &vport->q_vectors[q_idx];
+
+ idpf_init_dim(q_vector);
+ napi_enable(&q_vector->napi);
+ }
+}
+
+/**
+ * idpf_tx_splitq_clean_all- Clean completion queues
+ * @q_vec: queue vector
+ * @budget: Used to determine if we are in netpoll
+ * @cleaned: returns number of packets cleaned
+ *
+ * Returns false if clean is not complete else returns true
+ */
+static bool idpf_tx_splitq_clean_all(struct idpf_q_vector *q_vec,
+ int budget, int *cleaned)
+{
+ u16 num_txq = q_vec->num_txq;
+ bool clean_complete = true;
+ int i, budget_per_q;
+
+ if (unlikely(!num_txq))
+ return true;
+
+ budget_per_q = DIV_ROUND_UP(budget, num_txq);
+ for (i = 0; i < num_txq; i++)
+ clean_complete &= idpf_tx_clean_complq(q_vec->tx[i],
+ budget_per_q, cleaned);
+
+ return clean_complete;
+}
+
+/**
+ * idpf_rx_splitq_clean_all- Clean completion queues
+ * @q_vec: queue vector
+ * @budget: Used to determine if we are in netpoll
+ * @cleaned: returns number of packets cleaned
+ *
+ * Returns false if clean is not complete else returns true
+ */
+static bool idpf_rx_splitq_clean_all(struct idpf_q_vector *q_vec, int budget,
+ int *cleaned)
+{
+ u16 num_rxq = q_vec->num_rxq;
+ bool clean_complete = true;
+ int pkts_cleaned = 0;
+ int i, budget_per_q;
+
+ /* We attempt to distribute budget to each Rx queue fairly, but don't
+ * allow the budget to go below 1 because that would exit polling early.
+ */
+ budget_per_q = num_rxq ? max(budget / num_rxq, 1) : 0;
+ for (i = 0; i < num_rxq; i++) {
+ struct idpf_queue *rxq = q_vec->rx[i];
+ int pkts_cleaned_per_q;
+
+ pkts_cleaned_per_q = idpf_rx_splitq_clean(rxq, budget_per_q);
+ /* if we clean as many as budgeted, we must not be done */
+ if (pkts_cleaned_per_q >= budget_per_q)
+ clean_complete = false;
+ pkts_cleaned += pkts_cleaned_per_q;
+ }
+ *cleaned = pkts_cleaned;
+
+ for (i = 0; i < q_vec->num_bufq; i++)
+ idpf_rx_clean_refillq_all(q_vec->bufq[i]);
+
+ return clean_complete;
+}
+
+/**
+ * idpf_vport_splitq_napi_poll - NAPI handler
+ * @napi: struct from which you get q_vector
+ * @budget: budget provided by stack
+ */
+static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget)
+{
+ struct idpf_q_vector *q_vector =
+ container_of(napi, struct idpf_q_vector, napi);
+ bool clean_complete;
+ int work_done = 0;
+
+ /* Handle case where we are called by netpoll with a budget of 0 */
+ if (unlikely(!budget)) {
+ idpf_tx_splitq_clean_all(q_vector, budget, &work_done);
+
+ return 0;
+ }
+
+ clean_complete = idpf_rx_splitq_clean_all(q_vector, budget, &work_done);
+ clean_complete &= idpf_tx_splitq_clean_all(q_vector, budget, &work_done);
+
+ /* If work not completed, return budget and polling will return */
+ if (!clean_complete)
+ return budget;
+
+ work_done = min_t(int, work_done, budget - 1);
+
+ /* Exit the polling mode, but don't re-enable interrupts if stack might
+ * poll us due to busy-polling
+ */
+ if (likely(napi_complete_done(napi, work_done)))
+ idpf_vport_intr_update_itr_ena_irq(q_vector);
+
+ /* Switch to poll mode in the tear-down path after sending disable
+ * queues virtchnl message, as the interrupts will be disabled after
+ * that
+ */
+ if (unlikely(q_vector->num_txq && test_bit(__IDPF_Q_POLL_MODE,
+ q_vector->tx[0]->flags)))
+ return budget;
+ else
+ return work_done;
+}
+
+/**
+ * idpf_vport_intr_map_vector_to_qs - Map vectors to queues
+ * @vport: virtual port
+ *
+ * Mapping for vectors to queues
+ */
+static void idpf_vport_intr_map_vector_to_qs(struct idpf_vport *vport)
+{
+ u16 num_txq_grp = vport->num_txq_grp;
+ int i, j, qv_idx, bufq_vidx = 0;
+ struct idpf_rxq_group *rx_qgrp;
+ struct idpf_txq_group *tx_qgrp;
+ struct idpf_queue *q, *bufq;
+ u16 q_index;
+
+ for (i = 0, qv_idx = 0; i < vport->num_rxq_grp; i++) {
+ u16 num_rxq;
+
+ rx_qgrp = &vport->rxq_grps[i];
+ if (idpf_is_queue_model_split(vport->rxq_model))
+ num_rxq = rx_qgrp->splitq.num_rxq_sets;
+ else
+ num_rxq = rx_qgrp->singleq.num_rxq;
+
+ for (j = 0; j < num_rxq; j++) {
+ if (qv_idx >= vport->num_q_vectors)
+ qv_idx = 0;
+
+ if (idpf_is_queue_model_split(vport->rxq_model))
+ q = &rx_qgrp->splitq.rxq_sets[j]->rxq;
+ else
+ q = rx_qgrp->singleq.rxqs[j];
+ q->q_vector = &vport->q_vectors[qv_idx];
+ q_index = q->q_vector->num_rxq;
+ q->q_vector->rx[q_index] = q;
+ q->q_vector->num_rxq++;
+ qv_idx++;
+ }
+
+ if (idpf_is_queue_model_split(vport->rxq_model)) {
+ for (j = 0; j < vport->num_bufqs_per_qgrp; j++) {
+ bufq = &rx_qgrp->splitq.bufq_sets[j].bufq;
+ bufq->q_vector = &vport->q_vectors[bufq_vidx];
+ q_index = bufq->q_vector->num_bufq;
+ bufq->q_vector->bufq[q_index] = bufq;
+ bufq->q_vector->num_bufq++;
+ }
+ if (++bufq_vidx >= vport->num_q_vectors)
+ bufq_vidx = 0;
+ }
+ }
+
+ for (i = 0, qv_idx = 0; i < num_txq_grp; i++) {
+ u16 num_txq;
+
+ tx_qgrp = &vport->txq_grps[i];
+ num_txq = tx_qgrp->num_txq;
+
+ if (idpf_is_queue_model_split(vport->txq_model)) {
+ if (qv_idx >= vport->num_q_vectors)
+ qv_idx = 0;
+
+ q = tx_qgrp->complq;
+ q->q_vector = &vport->q_vectors[qv_idx];
+ q_index = q->q_vector->num_txq;
+ q->q_vector->tx[q_index] = q;
+ q->q_vector->num_txq++;
+ qv_idx++;
+ } else {
+ for (j = 0; j < num_txq; j++) {
+ if (qv_idx >= vport->num_q_vectors)
+ qv_idx = 0;
+
+ q = tx_qgrp->txqs[j];
+ q->q_vector = &vport->q_vectors[qv_idx];
+ q_index = q->q_vector->num_txq;
+ q->q_vector->tx[q_index] = q;
+ q->q_vector->num_txq++;
+
+ qv_idx++;
+ }
+ }
+ }
+}
+
+/**
+ * idpf_vport_intr_init_vec_idx - Initialize the vector indexes
+ * @vport: virtual port
+ *
+ * Initialize vector indexes with values returened over mailbox
+ */
+static int idpf_vport_intr_init_vec_idx(struct idpf_vport *vport)
+{
+ struct idpf_adapter *adapter = vport->adapter;
+ struct virtchnl2_alloc_vectors *ac;
+ u16 *vecids, total_vecs;
+ int i;
+
+ ac = adapter->req_vec_chunks;
+ if (!ac) {
+ for (i = 0; i < vport->num_q_vectors; i++)
+ vport->q_vectors[i].v_idx = vport->q_vector_idxs[i];
+
+ return 0;
+ }
+
+ total_vecs = idpf_get_reserved_vecs(adapter);
+ vecids = kcalloc(total_vecs, sizeof(u16), GFP_KERNEL);
+ if (!vecids)
+ return -ENOMEM;
+
+ idpf_get_vec_ids(adapter, vecids, total_vecs, &ac->vchunks);
+
+ for (i = 0; i < vport->num_q_vectors; i++)
+ vport->q_vectors[i].v_idx = vecids[vport->q_vector_idxs[i]];
+
+ kfree(vecids);
+
+ return 0;
+}
+
+/**
+ * idpf_vport_intr_napi_add_all- Register napi handler for all qvectors
+ * @vport: virtual port structure
+ */
+static void idpf_vport_intr_napi_add_all(struct idpf_vport *vport)
+{
+ int (*napi_poll)(struct napi_struct *napi, int budget);
+ u16 v_idx;
+
+ if (idpf_is_queue_model_split(vport->txq_model))
+ napi_poll = idpf_vport_splitq_napi_poll;
+ else
+ napi_poll = idpf_vport_singleq_napi_poll;
+
+ for (v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) {
+ struct idpf_q_vector *q_vector = &vport->q_vectors[v_idx];
+
+ netif_napi_add(vport->netdev, &q_vector->napi, napi_poll);
+
+ /* only set affinity_mask if the CPU is online */
+ if (cpu_online(v_idx))
+ cpumask_set_cpu(v_idx, &q_vector->affinity_mask);
+ }
+}
+
+/**
+ * idpf_vport_intr_alloc - Allocate memory for interrupt vectors
+ * @vport: virtual port
+ *
+ * We allocate one q_vector per queue interrupt. If allocation fails we
+ * return -ENOMEM.
+ */
+int idpf_vport_intr_alloc(struct idpf_vport *vport)
+{
+ u16 txqs_per_vector, rxqs_per_vector, bufqs_per_vector;
+ struct idpf_q_vector *q_vector;
+ int v_idx, err;
+
+ vport->q_vectors = kcalloc(vport->num_q_vectors,
+ sizeof(struct idpf_q_vector), GFP_KERNEL);
+ if (!vport->q_vectors)
+ return -ENOMEM;
+
+ txqs_per_vector = DIV_ROUND_UP(vport->num_txq, vport->num_q_vectors);
+ rxqs_per_vector = DIV_ROUND_UP(vport->num_rxq, vport->num_q_vectors);
+ bufqs_per_vector = vport->num_bufqs_per_qgrp *
+ DIV_ROUND_UP(vport->num_rxq_grp,
+ vport->num_q_vectors);
+
+ for (v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) {
+ q_vector = &vport->q_vectors[v_idx];
+ q_vector->vport = vport;
+
+ q_vector->tx_itr_value = IDPF_ITR_TX_DEF;
+ q_vector->tx_intr_mode = IDPF_ITR_DYNAMIC;
+ q_vector->tx_itr_idx = VIRTCHNL2_ITR_IDX_1;
+
+ q_vector->rx_itr_value = IDPF_ITR_RX_DEF;
+ q_vector->rx_intr_mode = IDPF_ITR_DYNAMIC;
+ q_vector->rx_itr_idx = VIRTCHNL2_ITR_IDX_0;
+
+ q_vector->tx = kcalloc(txqs_per_vector,
+ sizeof(struct idpf_queue *),
+ GFP_KERNEL);
+ if (!q_vector->tx) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ q_vector->rx = kcalloc(rxqs_per_vector,
+ sizeof(struct idpf_queue *),
+ GFP_KERNEL);
+ if (!q_vector->rx) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ if (!idpf_is_queue_model_split(vport->rxq_model))
+ continue;
+
+ q_vector->bufq = kcalloc(bufqs_per_vector,
+ sizeof(struct idpf_queue *),
+ GFP_KERNEL);
+ if (!q_vector->bufq) {
+ err = -ENOMEM;
+ goto error;
+ }
+ }
+
+ return 0;
+
+error:
+ idpf_vport_intr_rel(vport);
+
+ return err;
+}
+
+/**
+ * idpf_vport_intr_init - Setup all vectors for the given vport
+ * @vport: virtual port
+ *
+ * Returns 0 on success or negative on failure
+ */
+int idpf_vport_intr_init(struct idpf_vport *vport)
+{
+ char *int_name;
+ int err;
+
+ err = idpf_vport_intr_init_vec_idx(vport);
+ if (err)
+ return err;
+
+ idpf_vport_intr_map_vector_to_qs(vport);
+ idpf_vport_intr_napi_add_all(vport);
+ idpf_vport_intr_napi_ena_all(vport);
+
+ err = vport->adapter->dev_ops.reg_ops.intr_reg_init(vport);
+ if (err)
+ goto unroll_vectors_alloc;
+
+ int_name = kasprintf(GFP_KERNEL, "%s-%s",
+ dev_driver_string(&vport->adapter->pdev->dev),
+ vport->netdev->name);
+
+ err = idpf_vport_intr_req_irq(vport, int_name);
+ if (err)
+ goto unroll_vectors_alloc;
+
+ idpf_vport_intr_ena_irq_all(vport);
+
+ return 0;
+
+unroll_vectors_alloc:
+ idpf_vport_intr_napi_dis_all(vport);
+ idpf_vport_intr_napi_del_all(vport);
+
+ return err;
+}
+
+/**
+ * idpf_config_rss - Send virtchnl messages to configure RSS
+ * @vport: virtual port
+ *
+ * Return 0 on success, negative on failure
+ */
+int idpf_config_rss(struct idpf_vport *vport)
+{
+ int err;
+
+ err = idpf_send_get_set_rss_key_msg(vport, false);
+ if (err)
+ return err;
+
+ return idpf_send_get_set_rss_lut_msg(vport, false);
+}
+
+/**
+ * idpf_fill_dflt_rss_lut - Fill the indirection table with the default values
+ * @vport: virtual port structure
+ */
+static void idpf_fill_dflt_rss_lut(struct idpf_vport *vport)
+{
+ struct idpf_adapter *adapter = vport->adapter;
+ u16 num_active_rxq = vport->num_rxq;
+ struct idpf_rss_data *rss_data;
+ int i;
+
+ rss_data = &adapter->vport_config[vport->idx]->user_config.rss_data;
+
+ for (i = 0; i < rss_data->rss_lut_size; i++) {
+ rss_data->rss_lut[i] = i % num_active_rxq;
+ rss_data->cached_lut[i] = rss_data->rss_lut[i];
+ }
+}
+
+/**
+ * idpf_init_rss - Allocate and initialize RSS resources
+ * @vport: virtual port
+ *
+ * Return 0 on success, negative on failure
+ */
+int idpf_init_rss(struct idpf_vport *vport)
+{
+ struct idpf_adapter *adapter = vport->adapter;
+ struct idpf_rss_data *rss_data;
+ u32 lut_size;
+
+ rss_data = &adapter->vport_config[vport->idx]->user_config.rss_data;
+
+ lut_size = rss_data->rss_lut_size * sizeof(u32);
+ rss_data->rss_lut = kzalloc(lut_size, GFP_KERNEL);
+ if (!rss_data->rss_lut)
+ return -ENOMEM;
+
+ rss_data->cached_lut = kzalloc(lut_size, GFP_KERNEL);
+ if (!rss_data->cached_lut) {
+ kfree(rss_data->rss_lut);
+ rss_data->rss_lut = NULL;
+
+ return -ENOMEM;
+ }
+
+ /* Fill the default RSS lut values */
+ idpf_fill_dflt_rss_lut(vport);
+
+ return idpf_config_rss(vport);
+}
+
+/**
+ * idpf_deinit_rss - Release RSS resources
+ * @vport: virtual port
+ */
+void idpf_deinit_rss(struct idpf_vport *vport)
+{
+ struct idpf_adapter *adapter = vport->adapter;
+ struct idpf_rss_data *rss_data;
+
+ rss_data = &adapter->vport_config[vport->idx]->user_config.rss_data;
+ kfree(rss_data->cached_lut);
+ rss_data->cached_lut = NULL;
+ kfree(rss_data->rss_lut);
+ rss_data->rss_lut = NULL;
+}
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
new file mode 100644
index 000000000000..df76493faa75
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
@@ -0,0 +1,1023 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2023 Intel Corporation */
+
+#ifndef _IDPF_TXRX_H_
+#define _IDPF_TXRX_H_
+
+#include <net/page_pool/helpers.h>
+#include <net/tcp.h>
+#include <net/netdev_queues.h>
+
+#define IDPF_LARGE_MAX_Q 256
+#define IDPF_MAX_Q 16
+#define IDPF_MIN_Q 2
+/* Mailbox Queue */
+#define IDPF_MAX_MBXQ 1
+
+#define IDPF_MIN_TXQ_DESC 64
+#define IDPF_MIN_RXQ_DESC 64
+#define IDPF_MIN_TXQ_COMPLQ_DESC 256
+#define IDPF_MAX_QIDS 256
+
+/* Number of descriptors in a queue should be a multiple of 32. RX queue
+ * descriptors alone should be a multiple of IDPF_REQ_RXQ_DESC_MULTIPLE
+ * to achieve BufQ descriptors aligned to 32
+ */
+#define IDPF_REQ_DESC_MULTIPLE 32
+#define IDPF_REQ_RXQ_DESC_MULTIPLE (IDPF_MAX_BUFQS_PER_RXQ_GRP * 32)
+#define IDPF_MIN_TX_DESC_NEEDED (MAX_SKB_FRAGS + 6)
+#define IDPF_TX_WAKE_THRESH ((u16)IDPF_MIN_TX_DESC_NEEDED * 2)
+
+#define IDPF_MAX_DESCS 8160
+#define IDPF_MAX_TXQ_DESC ALIGN_DOWN(IDPF_MAX_DESCS, IDPF_REQ_DESC_MULTIPLE)
+#define IDPF_MAX_RXQ_DESC ALIGN_DOWN(IDPF_MAX_DESCS, IDPF_REQ_RXQ_DESC_MULTIPLE)
+#define MIN_SUPPORT_TXDID (\
+ VIRTCHNL2_TXDID_FLEX_FLOW_SCHED |\
+ VIRTCHNL2_TXDID_FLEX_TSO_CTX)
+
+#define IDPF_DFLT_SINGLEQ_TX_Q_GROUPS 1
+#define IDPF_DFLT_SINGLEQ_RX_Q_GROUPS 1
+#define IDPF_DFLT_SINGLEQ_TXQ_PER_GROUP 4
+#define IDPF_DFLT_SINGLEQ_RXQ_PER_GROUP 4
+
+#define IDPF_COMPLQ_PER_GROUP 1
+#define IDPF_SINGLE_BUFQ_PER_RXQ_GRP 1
+#define IDPF_MAX_BUFQS_PER_RXQ_GRP 2
+#define IDPF_BUFQ2_ENA 1
+#define IDPF_NUMQ_PER_CHUNK 1
+
+#define IDPF_DFLT_SPLITQ_TXQ_PER_GROUP 1
+#define IDPF_DFLT_SPLITQ_RXQ_PER_GROUP 1
+
+/* Default vector sharing */
+#define IDPF_MBX_Q_VEC 1
+#define IDPF_MIN_Q_VEC 1
+
+#define IDPF_DFLT_TX_Q_DESC_COUNT 512
+#define IDPF_DFLT_TX_COMPLQ_DESC_COUNT 512
+#define IDPF_DFLT_RX_Q_DESC_COUNT 512
+
+/* IMPORTANT: We absolutely _cannot_ have more buffers in the system than a
+ * given RX completion queue has descriptors. This includes _ALL_ buffer
+ * queues. E.g.: If you have two buffer queues of 512 descriptors and buffers,
+ * you have a total of 1024 buffers so your RX queue _must_ have at least that
+ * many descriptors. This macro divides a given number of RX descriptors by
+ * number of buffer queues to calculate how many descriptors each buffer queue
+ * can have without overrunning the RX queue.
+ *
+ * If you give hardware more buffers than completion descriptors what will
+ * happen is that if hardware gets a chance to post more than ring wrap of
+ * descriptors before SW gets an interrupt and overwrites SW head, the gen bit
+ * in the descriptor will be wrong. Any overwritten descriptors' buffers will
+ * be gone forever and SW has no reasonable way to tell that this has happened.
+ * From SW perspective, when we finally get an interrupt, it looks like we're
+ * still waiting for descriptor to be done, stalling forever.
+ */
+#define IDPF_RX_BUFQ_DESC_COUNT(RXD, NUM_BUFQ) ((RXD) / (NUM_BUFQ))
+
+#define IDPF_RX_BUFQ_WORKING_SET(rxq) ((rxq)->desc_count - 1)
+
+#define IDPF_RX_BUMP_NTC(rxq, ntc) \
+do { \
+ if (unlikely(++(ntc) == (rxq)->desc_count)) { \
+ ntc = 0; \
+ change_bit(__IDPF_Q_GEN_CHK, (rxq)->flags); \
+ } \
+} while (0)
+
+#define IDPF_SINGLEQ_BUMP_RING_IDX(q, idx) \
+do { \
+ if (unlikely(++(idx) == (q)->desc_count)) \
+ idx = 0; \
+} while (0)
+
+#define IDPF_RX_HDR_SIZE 256
+#define IDPF_RX_BUF_2048 2048
+#define IDPF_RX_BUF_4096 4096
+#define IDPF_RX_BUF_STRIDE 32
+#define IDPF_RX_BUF_POST_STRIDE 16
+#define IDPF_LOW_WATERMARK 64
+/* Size of header buffer specifically for header split */
+#define IDPF_HDR_BUF_SIZE 256
+#define IDPF_PACKET_HDR_PAD \
+ (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN * 2)
+#define IDPF_TX_TSO_MIN_MSS 88
+
+/* Minimum number of descriptors between 2 descriptors with the RE bit set;
+ * only relevant in flow scheduling mode
+ */
+#define IDPF_TX_SPLITQ_RE_MIN_GAP 64
+
+#define IDPF_RX_BI_BUFID_S 0
+#define IDPF_RX_BI_BUFID_M GENMASK(14, 0)
+#define IDPF_RX_BI_GEN_S 15
+#define IDPF_RX_BI_GEN_M BIT(IDPF_RX_BI_GEN_S)
+#define IDPF_RXD_EOF_SPLITQ VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_EOF_M
+#define IDPF_RXD_EOF_SINGLEQ VIRTCHNL2_RX_BASE_DESC_STATUS_EOF_M
+
+#define IDPF_SINGLEQ_RX_BUF_DESC(rxq, i) \
+ (&(((struct virtchnl2_singleq_rx_buf_desc *)((rxq)->desc_ring))[i]))
+#define IDPF_SPLITQ_RX_BUF_DESC(rxq, i) \
+ (&(((struct virtchnl2_splitq_rx_buf_desc *)((rxq)->desc_ring))[i]))
+#define IDPF_SPLITQ_RX_BI_DESC(rxq, i) ((((rxq)->ring))[i])
+
+#define IDPF_BASE_TX_DESC(txq, i) \
+ (&(((struct idpf_base_tx_desc *)((txq)->desc_ring))[i]))
+#define IDPF_BASE_TX_CTX_DESC(txq, i) \
+ (&(((struct idpf_base_tx_ctx_desc *)((txq)->desc_ring))[i]))
+#define IDPF_SPLITQ_TX_COMPLQ_DESC(txcq, i) \
+ (&(((struct idpf_splitq_tx_compl_desc *)((txcq)->desc_ring))[i]))
+
+#define IDPF_FLEX_TX_DESC(txq, i) \
+ (&(((union idpf_tx_flex_desc *)((txq)->desc_ring))[i]))
+#define IDPF_FLEX_TX_CTX_DESC(txq, i) \
+ (&(((struct idpf_flex_tx_ctx_desc *)((txq)->desc_ring))[i]))
+
+#define IDPF_DESC_UNUSED(txq) \
+ ((((txq)->next_to_clean > (txq)->next_to_use) ? 0 : (txq)->desc_count) + \
+ (txq)->next_to_clean - (txq)->next_to_use - 1)
+
+#define IDPF_TX_BUF_RSV_UNUSED(txq) ((txq)->buf_stack.top)
+#define IDPF_TX_BUF_RSV_LOW(txq) (IDPF_TX_BUF_RSV_UNUSED(txq) < \
+ (txq)->desc_count >> 2)
+
+#define IDPF_TX_COMPLQ_OVERFLOW_THRESH(txcq) ((txcq)->desc_count >> 1)
+/* Determine the absolute number of completions pending, i.e. the number of
+ * completions that are expected to arrive on the TX completion queue.
+ */
+#define IDPF_TX_COMPLQ_PENDING(txq) \
+ (((txq)->num_completions_pending >= (txq)->complq->num_completions ? \
+ 0 : U64_MAX) + \
+ (txq)->num_completions_pending - (txq)->complq->num_completions)
+
+#define IDPF_TX_SPLITQ_COMPL_TAG_WIDTH 16
+#define IDPF_SPLITQ_TX_INVAL_COMPL_TAG -1
+/* Adjust the generation for the completion tag and wrap if necessary */
+#define IDPF_TX_ADJ_COMPL_TAG_GEN(txq) \
+ ((++(txq)->compl_tag_cur_gen) >= (txq)->compl_tag_gen_max ? \
+ 0 : (txq)->compl_tag_cur_gen)
+
+#define IDPF_TXD_LAST_DESC_CMD (IDPF_TX_DESC_CMD_EOP | IDPF_TX_DESC_CMD_RS)
+
+#define IDPF_TX_FLAGS_TSO BIT(0)
+#define IDPF_TX_FLAGS_IPV4 BIT(1)
+#define IDPF_TX_FLAGS_IPV6 BIT(2)
+#define IDPF_TX_FLAGS_TUNNEL BIT(3)
+
+union idpf_tx_flex_desc {
+ struct idpf_flex_tx_desc q; /* queue based scheduling */
+ struct idpf_flex_tx_sched_desc flow; /* flow based scheduling */
+};
+
+/**
+ * struct idpf_tx_buf
+ * @next_to_watch: Next descriptor to clean
+ * @skb: Pointer to the skb
+ * @dma: DMA address
+ * @len: DMA length
+ * @bytecount: Number of bytes
+ * @gso_segs: Number of GSO segments
+ * @compl_tag: Splitq only, unique identifier for a buffer. Used to compare
+ * with completion tag returned in buffer completion event.
+ * Because the completion tag is expected to be the same in all
+ * data descriptors for a given packet, and a single packet can
+ * span multiple buffers, we need this field to track all
+ * buffers associated with this completion tag independently of
+ * the buf_id. The tag consists of a N bit buf_id and M upper
+ * order "generation bits". See compl_tag_bufid_m and
+ * compl_tag_gen_s in struct idpf_queue. We'll use a value of -1
+ * to indicate the tag is not valid.
+ * @ctx_entry: Singleq only. Used to indicate the corresponding entry
+ * in the descriptor ring was used for a context descriptor and
+ * this buffer entry should be skipped.
+ */
+struct idpf_tx_buf {
+ void *next_to_watch;
+ struct sk_buff *skb;
+ DEFINE_DMA_UNMAP_ADDR(dma);
+ DEFINE_DMA_UNMAP_LEN(len);
+ unsigned int bytecount;
+ unsigned short gso_segs;
+
+ union {
+ int compl_tag;
+
+ bool ctx_entry;
+ };
+};
+
+struct idpf_tx_stash {
+ struct hlist_node hlist;
+ struct idpf_tx_buf buf;
+};
+
+/**
+ * struct idpf_buf_lifo - LIFO for managing OOO completions
+ * @top: Used to know how many buffers are left
+ * @size: Total size of LIFO
+ * @bufs: Backing array
+ */
+struct idpf_buf_lifo {
+ u16 top;
+ u16 size;
+ struct idpf_tx_stash **bufs;
+};
+
+/**
+ * struct idpf_tx_offload_params - Offload parameters for a given packet
+ * @tx_flags: Feature flags enabled for this packet
+ * @hdr_offsets: Offset parameter for single queue model
+ * @cd_tunneling: Type of tunneling enabled for single queue model
+ * @tso_len: Total length of payload to segment
+ * @mss: Segment size
+ * @tso_segs: Number of segments to be sent
+ * @tso_hdr_len: Length of headers to be duplicated
+ * @td_cmd: Command field to be inserted into descriptor
+ */
+struct idpf_tx_offload_params {
+ u32 tx_flags;
+
+ u32 hdr_offsets;
+ u32 cd_tunneling;
+
+ u32 tso_len;
+ u16 mss;
+ u16 tso_segs;
+ u16 tso_hdr_len;
+
+ u16 td_cmd;
+};
+
+/**
+ * struct idpf_tx_splitq_params
+ * @dtype: General descriptor info
+ * @eop_cmd: Type of EOP
+ * @compl_tag: Associated tag for completion
+ * @td_tag: Descriptor tunneling tag
+ * @offload: Offload parameters
+ */
+struct idpf_tx_splitq_params {
+ enum idpf_tx_desc_dtype_value dtype;
+ u16 eop_cmd;
+ union {
+ u16 compl_tag;
+ u16 td_tag;
+ };
+
+ struct idpf_tx_offload_params offload;
+};
+
+enum idpf_tx_ctx_desc_eipt_offload {
+ IDPF_TX_CTX_EXT_IP_NONE = 0x0,
+ IDPF_TX_CTX_EXT_IP_IPV6 = 0x1,
+ IDPF_TX_CTX_EXT_IP_IPV4_NO_CSUM = 0x2,
+ IDPF_TX_CTX_EXT_IP_IPV4 = 0x3
+};
+
+/* Checksum offload bits decoded from the receive descriptor. */
+struct idpf_rx_csum_decoded {
+ u32 l3l4p : 1;
+ u32 ipe : 1;
+ u32 eipe : 1;
+ u32 eudpe : 1;
+ u32 ipv6exadd : 1;
+ u32 l4e : 1;
+ u32 pprs : 1;
+ u32 nat : 1;
+ u32 raw_csum_inv : 1;
+ u32 raw_csum : 16;
+};
+
+struct idpf_rx_extracted {
+ unsigned int size;
+ u16 rx_ptype;
+};
+
+#define IDPF_TX_COMPLQ_CLEAN_BUDGET 256
+#define IDPF_TX_MIN_PKT_LEN 17
+#define IDPF_TX_DESCS_FOR_SKB_DATA_PTR 1
+#define IDPF_TX_DESCS_PER_CACHE_LINE (L1_CACHE_BYTES / \
+ sizeof(struct idpf_flex_tx_desc))
+#define IDPF_TX_DESCS_FOR_CTX 1
+/* TX descriptors needed, worst case */
+#define IDPF_TX_DESC_NEEDED (MAX_SKB_FRAGS + IDPF_TX_DESCS_FOR_CTX + \
+ IDPF_TX_DESCS_PER_CACHE_LINE + \
+ IDPF_TX_DESCS_FOR_SKB_DATA_PTR)
+
+/* The size limit for a transmit buffer in a descriptor is (16K - 1).
+ * In order to align with the read requests we will align the value to
+ * the nearest 4K which represents our maximum read request size.
+ */
+#define IDPF_TX_MAX_READ_REQ_SIZE SZ_4K
+#define IDPF_TX_MAX_DESC_DATA (SZ_16K - 1)
+#define IDPF_TX_MAX_DESC_DATA_ALIGNED \
+ ALIGN_DOWN(IDPF_TX_MAX_DESC_DATA, IDPF_TX_MAX_READ_REQ_SIZE)
+
+#define IDPF_RX_DMA_ATTR \
+ (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
+#define IDPF_RX_DESC(rxq, i) \
+ (&(((union virtchnl2_rx_desc *)((rxq)->desc_ring))[i]))
+
+struct idpf_rx_buf {
+ struct page *page;
+ unsigned int page_offset;
+ u16 truesize;
+};
+
+#define IDPF_RX_MAX_PTYPE_PROTO_IDS 32
+#define IDPF_RX_MAX_PTYPE_SZ (sizeof(struct virtchnl2_ptype) + \
+ (sizeof(u16) * IDPF_RX_MAX_PTYPE_PROTO_IDS))
+#define IDPF_RX_PTYPE_HDR_SZ sizeof(struct virtchnl2_get_ptype_info)
+#define IDPF_RX_MAX_PTYPES_PER_BUF \
+ DIV_ROUND_DOWN_ULL((IDPF_CTLQ_MAX_BUF_LEN - IDPF_RX_PTYPE_HDR_SZ), \
+ IDPF_RX_MAX_PTYPE_SZ)
+
+#define IDPF_GET_PTYPE_SIZE(p) struct_size((p), proto_id, (p)->proto_id_count)
+
+#define IDPF_TUN_IP_GRE (\
+ IDPF_PTYPE_TUNNEL_IP |\
+ IDPF_PTYPE_TUNNEL_IP_GRENAT)
+
+#define IDPF_TUN_IP_GRE_MAC (\
+ IDPF_TUN_IP_GRE |\
+ IDPF_PTYPE_TUNNEL_IP_GRENAT_MAC)
+
+#define IDPF_RX_MAX_PTYPE 1024
+#define IDPF_RX_MAX_BASE_PTYPE 256
+#define IDPF_INVALID_PTYPE_ID 0xFFFF
+
+/* Packet type non-ip values */
+enum idpf_rx_ptype_l2 {
+ IDPF_RX_PTYPE_L2_RESERVED = 0,
+ IDPF_RX_PTYPE_L2_MAC_PAY2 = 1,
+ IDPF_RX_PTYPE_L2_TIMESYNC_PAY2 = 2,
+ IDPF_RX_PTYPE_L2_FIP_PAY2 = 3,
+ IDPF_RX_PTYPE_L2_OUI_PAY2 = 4,
+ IDPF_RX_PTYPE_L2_MACCNTRL_PAY2 = 5,
+ IDPF_RX_PTYPE_L2_LLDP_PAY2 = 6,
+ IDPF_RX_PTYPE_L2_ECP_PAY2 = 7,
+ IDPF_RX_PTYPE_L2_EVB_PAY2 = 8,
+ IDPF_RX_PTYPE_L2_QCN_PAY2 = 9,
+ IDPF_RX_PTYPE_L2_EAPOL_PAY2 = 10,
+ IDPF_RX_PTYPE_L2_ARP = 11,
+};
+
+enum idpf_rx_ptype_outer_ip {
+ IDPF_RX_PTYPE_OUTER_L2 = 0,
+ IDPF_RX_PTYPE_OUTER_IP = 1,
+};
+
+#define IDPF_RX_PTYPE_TO_IPV(ptype, ipv) \
+ (((ptype)->outer_ip == IDPF_RX_PTYPE_OUTER_IP) && \
+ ((ptype)->outer_ip_ver == (ipv)))
+
+enum idpf_rx_ptype_outer_ip_ver {
+ IDPF_RX_PTYPE_OUTER_NONE = 0,
+ IDPF_RX_PTYPE_OUTER_IPV4 = 1,
+ IDPF_RX_PTYPE_OUTER_IPV6 = 2,
+};
+
+enum idpf_rx_ptype_outer_fragmented {
+ IDPF_RX_PTYPE_NOT_FRAG = 0,
+ IDPF_RX_PTYPE_FRAG = 1,
+};
+
+enum idpf_rx_ptype_tunnel_type {
+ IDPF_RX_PTYPE_TUNNEL_NONE = 0,
+ IDPF_RX_PTYPE_TUNNEL_IP_IP = 1,
+ IDPF_RX_PTYPE_TUNNEL_IP_GRENAT = 2,
+ IDPF_RX_PTYPE_TUNNEL_IP_GRENAT_MAC = 3,
+ IDPF_RX_PTYPE_TUNNEL_IP_GRENAT_MAC_VLAN = 4,
+};
+
+enum idpf_rx_ptype_tunnel_end_prot {
+ IDPF_RX_PTYPE_TUNNEL_END_NONE = 0,
+ IDPF_RX_PTYPE_TUNNEL_END_IPV4 = 1,
+ IDPF_RX_PTYPE_TUNNEL_END_IPV6 = 2,
+};
+
+enum idpf_rx_ptype_inner_prot {
+ IDPF_RX_PTYPE_INNER_PROT_NONE = 0,
+ IDPF_RX_PTYPE_INNER_PROT_UDP = 1,
+ IDPF_RX_PTYPE_INNER_PROT_TCP = 2,
+ IDPF_RX_PTYPE_INNER_PROT_SCTP = 3,
+ IDPF_RX_PTYPE_INNER_PROT_ICMP = 4,
+ IDPF_RX_PTYPE_INNER_PROT_TIMESYNC = 5,
+};
+
+enum idpf_rx_ptype_payload_layer {
+ IDPF_RX_PTYPE_PAYLOAD_LAYER_NONE = 0,
+ IDPF_RX_PTYPE_PAYLOAD_LAYER_PAY2 = 1,
+ IDPF_RX_PTYPE_PAYLOAD_LAYER_PAY3 = 2,
+ IDPF_RX_PTYPE_PAYLOAD_LAYER_PAY4 = 3,
+};
+
+enum idpf_tunnel_state {
+ IDPF_PTYPE_TUNNEL_IP = BIT(0),
+ IDPF_PTYPE_TUNNEL_IP_GRENAT = BIT(1),
+ IDPF_PTYPE_TUNNEL_IP_GRENAT_MAC = BIT(2),
+};
+
+struct idpf_ptype_state {
+ bool outer_ip;
+ bool outer_frag;
+ u8 tunnel_state;
+};
+
+struct idpf_rx_ptype_decoded {
+ u32 ptype:10;
+ u32 known:1;
+ u32 outer_ip:1;
+ u32 outer_ip_ver:2;
+ u32 outer_frag:1;
+ u32 tunnel_type:3;
+ u32 tunnel_end_prot:2;
+ u32 tunnel_end_frag:1;
+ u32 inner_prot:4;
+ u32 payload_layer:3;
+};
+
+/**
+ * enum idpf_queue_flags_t
+ * @__IDPF_Q_GEN_CHK: Queues operating in splitq mode use a generation bit to
+ * identify new descriptor writebacks on the ring. HW sets
+ * the gen bit to 1 on the first writeback of any given
+ * descriptor. After the ring wraps, HW sets the gen bit of
+ * those descriptors to 0, and continues flipping
+ * 0->1 or 1->0 on each ring wrap. SW maintains its own
+ * gen bit to know what value will indicate writebacks on
+ * the next pass around the ring. E.g. it is initialized
+ * to 1 and knows that reading a gen bit of 1 in any
+ * descriptor on the initial pass of the ring indicates a
+ * writeback. It also flips on every ring wrap.
+ * @__IDPF_RFLQ_GEN_CHK: Refill queues are SW only, so Q_GEN acts as the HW bit
+ * and RFLGQ_GEN is the SW bit.
+ * @__IDPF_Q_FLOW_SCH_EN: Enable flow scheduling
+ * @__IDPF_Q_SW_MARKER: Used to indicate TX queue marker completions
+ * @__IDPF_Q_POLL_MODE: Enable poll mode
+ * @__IDPF_Q_FLAGS_NBITS: Must be last
+ */
+enum idpf_queue_flags_t {
+ __IDPF_Q_GEN_CHK,
+ __IDPF_RFLQ_GEN_CHK,
+ __IDPF_Q_FLOW_SCH_EN,
+ __IDPF_Q_SW_MARKER,
+ __IDPF_Q_POLL_MODE,
+
+ __IDPF_Q_FLAGS_NBITS,
+};
+
+/**
+ * struct idpf_vec_regs
+ * @dyn_ctl_reg: Dynamic control interrupt register offset
+ * @itrn_reg: Interrupt Throttling Rate register offset
+ * @itrn_index_spacing: Register spacing between ITR registers of the same
+ * vector
+ */
+struct idpf_vec_regs {
+ u32 dyn_ctl_reg;
+ u32 itrn_reg;
+ u32 itrn_index_spacing;
+};
+
+/**
+ * struct idpf_intr_reg
+ * @dyn_ctl: Dynamic control interrupt register
+ * @dyn_ctl_intena_m: Mask for dyn_ctl interrupt enable
+ * @dyn_ctl_itridx_s: Register bit offset for ITR index
+ * @dyn_ctl_itridx_m: Mask for ITR index
+ * @dyn_ctl_intrvl_s: Register bit offset for ITR interval
+ * @rx_itr: RX ITR register
+ * @tx_itr: TX ITR register
+ * @icr_ena: Interrupt cause register offset
+ * @icr_ena_ctlq_m: Mask for ICR
+ */
+struct idpf_intr_reg {
+ void __iomem *dyn_ctl;
+ u32 dyn_ctl_intena_m;
+ u32 dyn_ctl_itridx_s;
+ u32 dyn_ctl_itridx_m;
+ u32 dyn_ctl_intrvl_s;
+ void __iomem *rx_itr;
+ void __iomem *tx_itr;
+ void __iomem *icr_ena;
+ u32 icr_ena_ctlq_m;
+};
+
+/**
+ * struct idpf_q_vector
+ * @vport: Vport back pointer
+ * @affinity_mask: CPU affinity mask
+ * @napi: napi handler
+ * @v_idx: Vector index
+ * @intr_reg: See struct idpf_intr_reg
+ * @num_txq: Number of TX queues
+ * @tx: Array of TX queues to service
+ * @tx_dim: Data for TX net_dim algorithm
+ * @tx_itr_value: TX interrupt throttling rate
+ * @tx_intr_mode: Dynamic ITR or not
+ * @tx_itr_idx: TX ITR index
+ * @num_rxq: Number of RX queues
+ * @rx: Array of RX queues to service
+ * @rx_dim: Data for RX net_dim algorithm
+ * @rx_itr_value: RX interrupt throttling rate
+ * @rx_intr_mode: Dynamic ITR or not
+ * @rx_itr_idx: RX ITR index
+ * @num_bufq: Number of buffer queues
+ * @bufq: Array of buffer queues to service
+ * @total_events: Number of interrupts processed
+ * @name: Queue vector name
+ */
+struct idpf_q_vector {
+ struct idpf_vport *vport;
+ cpumask_t affinity_mask;
+ struct napi_struct napi;
+ u16 v_idx;
+ struct idpf_intr_reg intr_reg;
+
+ u16 num_txq;
+ struct idpf_queue **tx;
+ struct dim tx_dim;
+ u16 tx_itr_value;
+ bool tx_intr_mode;
+ u32 tx_itr_idx;
+
+ u16 num_rxq;
+ struct idpf_queue **rx;
+ struct dim rx_dim;
+ u16 rx_itr_value;
+ bool rx_intr_mode;
+ u32 rx_itr_idx;
+
+ u16 num_bufq;
+ struct idpf_queue **bufq;
+
+ u16 total_events;
+ char *name;
+};
+
+struct idpf_rx_queue_stats {
+ u64_stats_t packets;
+ u64_stats_t bytes;
+ u64_stats_t rsc_pkts;
+ u64_stats_t hw_csum_err;
+ u64_stats_t hsplit_pkts;
+ u64_stats_t hsplit_buf_ovf;
+ u64_stats_t bad_descs;
+};
+
+struct idpf_tx_queue_stats {
+ u64_stats_t packets;
+ u64_stats_t bytes;
+ u64_stats_t lso_pkts;
+ u64_stats_t linearize;
+ u64_stats_t q_busy;
+ u64_stats_t skb_drops;
+ u64_stats_t dma_map_errs;
+};
+
+struct idpf_cleaned_stats {
+ u32 packets;
+ u32 bytes;
+};
+
+union idpf_queue_stats {
+ struct idpf_rx_queue_stats rx;
+ struct idpf_tx_queue_stats tx;
+};
+
+#define IDPF_ITR_DYNAMIC 1
+#define IDPF_ITR_MAX 0x1FE0
+#define IDPF_ITR_20K 0x0032
+#define IDPF_ITR_GRAN_S 1 /* Assume ITR granularity is 2us */
+#define IDPF_ITR_MASK 0x1FFE /* ITR register value alignment mask */
+#define ITR_REG_ALIGN(setting) ((setting) & IDPF_ITR_MASK)
+#define IDPF_ITR_IS_DYNAMIC(itr_mode) (itr_mode)
+#define IDPF_ITR_TX_DEF IDPF_ITR_20K
+#define IDPF_ITR_RX_DEF IDPF_ITR_20K
+/* Index used for 'No ITR' update in DYN_CTL register */
+#define IDPF_NO_ITR_UPDATE_IDX 3
+#define IDPF_ITR_IDX_SPACING(spacing, dflt) (spacing ? spacing : dflt)
+#define IDPF_DIM_DEFAULT_PROFILE_IX 1
+
+/**
+ * struct idpf_queue
+ * @dev: Device back pointer for DMA mapping
+ * @vport: Back pointer to associated vport
+ * @txq_grp: See struct idpf_txq_group
+ * @rxq_grp: See struct idpf_rxq_group
+ * @idx: For buffer queue, it is used as group id, either 0 or 1. On clean,
+ * buffer queue uses this index to determine which group of refill queues
+ * to clean.
+ * For TX queue, it is used as index to map between TX queue group and
+ * hot path TX pointers stored in vport. Used in both singleq/splitq.
+ * For RX queue, it is used to index to total RX queue across groups and
+ * used for skb reporting.
+ * @tail: Tail offset. Used for both queue models single and split. In splitq
+ * model relevant only for TX queue and RX queue.
+ * @tx_buf: See struct idpf_tx_buf
+ * @rx_buf: Struct with RX buffer related members
+ * @rx_buf.buf: See struct idpf_rx_buf
+ * @rx_buf.hdr_buf_pa: DMA handle
+ * @rx_buf.hdr_buf_va: Virtual address
+ * @pp: Page pool pointer
+ * @skb: Pointer to the skb
+ * @q_type: Queue type (TX, RX, TX completion, RX buffer)
+ * @q_id: Queue id
+ * @desc_count: Number of descriptors
+ * @next_to_use: Next descriptor to use. Relevant in both split & single txq
+ * and bufq.
+ * @next_to_clean: Next descriptor to clean. In split queue model, only
+ * relevant to TX completion queue and RX queue.
+ * @next_to_alloc: RX buffer to allocate at. Used only for RX. In splitq model
+ * only relevant to RX queue.
+ * @flags: See enum idpf_queue_flags_t
+ * @q_stats: See union idpf_queue_stats
+ * @stats_sync: See struct u64_stats_sync
+ * @cleaned_bytes: Splitq only, TXQ only: When a TX completion is received on
+ * the TX completion queue, it can be for any TXQ associated
+ * with that completion queue. This means we can clean up to
+ * N TXQs during a single call to clean the completion queue.
+ * cleaned_bytes|pkts tracks the clean stats per TXQ during
+ * that single call to clean the completion queue. By doing so,
+ * we can update BQL with aggregate cleaned stats for each TXQ
+ * only once at the end of the cleaning routine.
+ * @cleaned_pkts: Number of packets cleaned for the above said case
+ * @rx_hsplit_en: RX headsplit enable
+ * @rx_hbuf_size: Header buffer size
+ * @rx_buf_size: Buffer size
+ * @rx_max_pkt_size: RX max packet size
+ * @rx_buf_stride: RX buffer stride
+ * @rx_buffer_low_watermark: RX buffer low watermark
+ * @rxdids: Supported RX descriptor ids
+ * @q_vector: Backreference to associated vector
+ * @size: Length of descriptor ring in bytes
+ * @dma: Physical address of ring
+ * @desc_ring: Descriptor ring memory
+ * @tx_max_bufs: Max buffers that can be transmitted with scatter-gather
+ * @tx_min_pkt_len: Min supported packet length
+ * @num_completions: Only relevant for TX completion queue. It tracks the
+ * number of completions received to compare against the
+ * number of completions pending, as accumulated by the
+ * TX queues.
+ * @buf_stack: Stack of empty buffers to store buffer info for out of order
+ * buffer completions. See struct idpf_buf_lifo.
+ * @compl_tag_bufid_m: Completion tag buffer id mask
+ * @compl_tag_gen_s: Completion tag generation bit
+ * The format of the completion tag will change based on the TXQ
+ * descriptor ring size so that we can maintain roughly the same level
+ * of "uniqueness" across all descriptor sizes. For example, if the
+ * TXQ descriptor ring size is 64 (the minimum size supported), the
+ * completion tag will be formatted as below:
+ * 15 6 5 0
+ * --------------------------------
+ * | GEN=0-1023 |IDX = 0-63|
+ * --------------------------------
+ *
+ * This gives us 64*1024 = 65536 possible unique values. Similarly, if
+ * the TXQ descriptor ring size is 8160 (the maximum size supported),
+ * the completion tag will be formatted as below:
+ * 15 13 12 0
+ * --------------------------------
+ * |GEN | IDX = 0-8159 |
+ * --------------------------------
+ *
+ * This gives us 8*8160 = 65280 possible unique values.
+ * @compl_tag_cur_gen: Used to keep track of current completion tag generation
+ * @compl_tag_gen_max: To determine when compl_tag_cur_gen should be reset
+ * @sched_buf_hash: Hash table to stores buffers
+ */
+struct idpf_queue {
+ struct device *dev;
+ struct idpf_vport *vport;
+ union {
+ struct idpf_txq_group *txq_grp;
+ struct idpf_rxq_group *rxq_grp;
+ };
+ u16 idx;
+ void __iomem *tail;
+ union {
+ struct idpf_tx_buf *tx_buf;
+ struct {
+ struct idpf_rx_buf *buf;
+ dma_addr_t hdr_buf_pa;
+ void *hdr_buf_va;
+ } rx_buf;
+ };
+ struct page_pool *pp;
+ struct sk_buff *skb;
+ u16 q_type;
+ u32 q_id;
+ u16 desc_count;
+
+ u16 next_to_use;
+ u16 next_to_clean;
+ u16 next_to_alloc;
+ DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS);
+
+ union idpf_queue_stats q_stats;
+ struct u64_stats_sync stats_sync;
+
+ u32 cleaned_bytes;
+ u16 cleaned_pkts;
+
+ bool rx_hsplit_en;
+ u16 rx_hbuf_size;
+ u16 rx_buf_size;
+ u16 rx_max_pkt_size;
+ u16 rx_buf_stride;
+ u8 rx_buffer_low_watermark;
+ u64 rxdids;
+ struct idpf_q_vector *q_vector;
+ unsigned int size;
+ dma_addr_t dma;
+ void *desc_ring;
+
+ u16 tx_max_bufs;
+ u8 tx_min_pkt_len;
+
+ u32 num_completions;
+
+ struct idpf_buf_lifo buf_stack;
+
+ u16 compl_tag_bufid_m;
+ u16 compl_tag_gen_s;
+
+ u16 compl_tag_cur_gen;
+ u16 compl_tag_gen_max;
+
+ DECLARE_HASHTABLE(sched_buf_hash, 12);
+} ____cacheline_internodealigned_in_smp;
+
+/**
+ * struct idpf_sw_queue
+ * @next_to_clean: Next descriptor to clean
+ * @next_to_alloc: Buffer to allocate at
+ * @flags: See enum idpf_queue_flags_t
+ * @ring: Pointer to the ring
+ * @desc_count: Descriptor count
+ * @dev: Device back pointer for DMA mapping
+ *
+ * Software queues are used in splitq mode to manage buffers between rxq
+ * producer and the bufq consumer. These are required in order to maintain a
+ * lockless buffer management system and are strictly software only constructs.
+ */
+struct idpf_sw_queue {
+ u16 next_to_clean;
+ u16 next_to_alloc;
+ DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS);
+ u16 *ring;
+ u16 desc_count;
+ struct device *dev;
+} ____cacheline_internodealigned_in_smp;
+
+/**
+ * struct idpf_rxq_set
+ * @rxq: RX queue
+ * @refillq0: Pointer to refill queue 0
+ * @refillq1: Pointer to refill queue 1
+ *
+ * Splitq only. idpf_rxq_set associates an rxq with at an array of refillqs.
+ * Each rxq needs a refillq to return used buffers back to the respective bufq.
+ * Bufqs then clean these refillqs for buffers to give to hardware.
+ */
+struct idpf_rxq_set {
+ struct idpf_queue rxq;
+ struct idpf_sw_queue *refillq0;
+ struct idpf_sw_queue *refillq1;
+};
+
+/**
+ * struct idpf_bufq_set
+ * @bufq: Buffer queue
+ * @num_refillqs: Number of refill queues. This is always equal to num_rxq_sets
+ * in idpf_rxq_group.
+ * @refillqs: Pointer to refill queues array.
+ *
+ * Splitq only. idpf_bufq_set associates a bufq to an array of refillqs.
+ * In this bufq_set, there will be one refillq for each rxq in this rxq_group.
+ * Used buffers received by rxqs will be put on refillqs which bufqs will
+ * clean to return new buffers back to hardware.
+ *
+ * Buffers needed by some number of rxqs associated in this rxq_group are
+ * managed by at most two bufqs (depending on performance configuration).
+ */
+struct idpf_bufq_set {
+ struct idpf_queue bufq;
+ int num_refillqs;
+ struct idpf_sw_queue *refillqs;
+};
+
+/**
+ * struct idpf_rxq_group
+ * @vport: Vport back pointer
+ * @singleq: Struct with single queue related members
+ * @singleq.num_rxq: Number of RX queues associated
+ * @singleq.rxqs: Array of RX queue pointers
+ * @splitq: Struct with split queue related members
+ * @splitq.num_rxq_sets: Number of RX queue sets
+ * @splitq.rxq_sets: Array of RX queue sets
+ * @splitq.bufq_sets: Buffer queue set pointer
+ *
+ * In singleq mode, an rxq_group is simply an array of rxqs. In splitq, a
+ * rxq_group contains all the rxqs, bufqs and refillqs needed to
+ * manage buffers in splitq mode.
+ */
+struct idpf_rxq_group {
+ struct idpf_vport *vport;
+
+ union {
+ struct {
+ u16 num_rxq;
+ struct idpf_queue *rxqs[IDPF_LARGE_MAX_Q];
+ } singleq;
+ struct {
+ u16 num_rxq_sets;
+ struct idpf_rxq_set *rxq_sets[IDPF_LARGE_MAX_Q];
+ struct idpf_bufq_set *bufq_sets;
+ } splitq;
+ };
+};
+
+/**
+ * struct idpf_txq_group
+ * @vport: Vport back pointer
+ * @num_txq: Number of TX queues associated
+ * @txqs: Array of TX queue pointers
+ * @complq: Associated completion queue pointer, split queue only
+ * @num_completions_pending: Total number of completions pending for the
+ * completion queue, acculumated for all TX queues
+ * associated with that completion queue.
+ *
+ * Between singleq and splitq, a txq_group is largely the same except for the
+ * complq. In splitq a single complq is responsible for handling completions
+ * for some number of txqs associated in this txq_group.
+ */
+struct idpf_txq_group {
+ struct idpf_vport *vport;
+
+ u16 num_txq;
+ struct idpf_queue *txqs[IDPF_LARGE_MAX_Q];
+
+ struct idpf_queue *complq;
+
+ u32 num_completions_pending;
+};
+
+/**
+ * idpf_size_to_txd_count - Get number of descriptors needed for large Tx frag
+ * @size: transmit request size in bytes
+ *
+ * In the case where a large frag (>= 16K) needs to be split across multiple
+ * descriptors, we need to assume that we can have no more than 12K of data
+ * per descriptor due to hardware alignment restrictions (4K alignment).
+ */
+static inline u32 idpf_size_to_txd_count(unsigned int size)
+{
+ return DIV_ROUND_UP(size, IDPF_TX_MAX_DESC_DATA_ALIGNED);
+}
+
+/**
+ * idpf_tx_singleq_build_ctob - populate command tag offset and size
+ * @td_cmd: Command to be filled in desc
+ * @td_offset: Offset to be filled in desc
+ * @size: Size of the buffer
+ * @td_tag: td tag to be filled
+ *
+ * Returns the 64 bit value populated with the input parameters
+ */
+static inline __le64 idpf_tx_singleq_build_ctob(u64 td_cmd, u64 td_offset,
+ unsigned int size, u64 td_tag)
+{
+ return cpu_to_le64(IDPF_TX_DESC_DTYPE_DATA |
+ (td_cmd << IDPF_TXD_QW1_CMD_S) |
+ (td_offset << IDPF_TXD_QW1_OFFSET_S) |
+ ((u64)size << IDPF_TXD_QW1_TX_BUF_SZ_S) |
+ (td_tag << IDPF_TXD_QW1_L2TAG1_S));
+}
+
+void idpf_tx_splitq_build_ctb(union idpf_tx_flex_desc *desc,
+ struct idpf_tx_splitq_params *params,
+ u16 td_cmd, u16 size);
+void idpf_tx_splitq_build_flow_desc(union idpf_tx_flex_desc *desc,
+ struct idpf_tx_splitq_params *params,
+ u16 td_cmd, u16 size);
+/**
+ * idpf_tx_splitq_build_desc - determine which type of data descriptor to build
+ * @desc: descriptor to populate
+ * @params: pointer to tx params struct
+ * @td_cmd: command to be filled in desc
+ * @size: size of buffer
+ */
+static inline void idpf_tx_splitq_build_desc(union idpf_tx_flex_desc *desc,
+ struct idpf_tx_splitq_params *params,
+ u16 td_cmd, u16 size)
+{
+ if (params->dtype == IDPF_TX_DESC_DTYPE_FLEX_L2TAG1_L2TAG2)
+ idpf_tx_splitq_build_ctb(desc, params, td_cmd, size);
+ else
+ idpf_tx_splitq_build_flow_desc(desc, params, td_cmd, size);
+}
+
+/**
+ * idpf_alloc_page - Allocate a new RX buffer from the page pool
+ * @pool: page_pool to allocate from
+ * @buf: metadata struct to populate with page info
+ * @buf_size: 2K or 4K
+ *
+ * Returns &dma_addr_t to be passed to HW for Rx, %DMA_MAPPING_ERROR otherwise.
+ */
+static inline dma_addr_t idpf_alloc_page(struct page_pool *pool,
+ struct idpf_rx_buf *buf,
+ unsigned int buf_size)
+{
+ if (buf_size == IDPF_RX_BUF_2048)
+ buf->page = page_pool_dev_alloc_frag(pool, &buf->page_offset,
+ buf_size);
+ else
+ buf->page = page_pool_dev_alloc_pages(pool);
+
+ if (!buf->page)
+ return DMA_MAPPING_ERROR;
+
+ buf->truesize = buf_size;
+
+ return page_pool_get_dma_addr(buf->page) + buf->page_offset +
+ pool->p.offset;
+}
+
+/**
+ * idpf_rx_put_page - Return RX buffer page to pool
+ * @rx_buf: RX buffer metadata struct
+ */
+static inline void idpf_rx_put_page(struct idpf_rx_buf *rx_buf)
+{
+ page_pool_put_page(rx_buf->page->pp, rx_buf->page,
+ rx_buf->truesize, true);
+ rx_buf->page = NULL;
+}
+
+/**
+ * idpf_rx_sync_for_cpu - Synchronize DMA buffer
+ * @rx_buf: RX buffer metadata struct
+ * @len: frame length from descriptor
+ */
+static inline void idpf_rx_sync_for_cpu(struct idpf_rx_buf *rx_buf, u32 len)
+{
+ struct page *page = rx_buf->page;
+ struct page_pool *pp = page->pp;
+
+ dma_sync_single_range_for_cpu(pp->p.dev,
+ page_pool_get_dma_addr(page),
+ rx_buf->page_offset + pp->p.offset, len,
+ page_pool_get_dma_dir(pp));
+}
+
+int idpf_vport_singleq_napi_poll(struct napi_struct *napi, int budget);
+void idpf_vport_init_num_qs(struct idpf_vport *vport,
+ struct virtchnl2_create_vport *vport_msg);
+void idpf_vport_calc_num_q_desc(struct idpf_vport *vport);
+int idpf_vport_calc_total_qs(struct idpf_adapter *adapter, u16 vport_index,
+ struct virtchnl2_create_vport *vport_msg,
+ struct idpf_vport_max_q *max_q);
+void idpf_vport_calc_num_q_groups(struct idpf_vport *vport);
+int idpf_vport_queues_alloc(struct idpf_vport *vport);
+void idpf_vport_queues_rel(struct idpf_vport *vport);
+void idpf_vport_intr_rel(struct idpf_vport *vport);
+int idpf_vport_intr_alloc(struct idpf_vport *vport);
+void idpf_vport_intr_update_itr_ena_irq(struct idpf_q_vector *q_vector);
+void idpf_vport_intr_deinit(struct idpf_vport *vport);
+int idpf_vport_intr_init(struct idpf_vport *vport);
+enum pkt_hash_types idpf_ptype_to_htype(const struct idpf_rx_ptype_decoded *decoded);
+int idpf_config_rss(struct idpf_vport *vport);
+int idpf_init_rss(struct idpf_vport *vport);
+void idpf_deinit_rss(struct idpf_vport *vport);
+int idpf_rx_bufs_init_all(struct idpf_vport *vport);
+void idpf_rx_add_frag(struct idpf_rx_buf *rx_buf, struct sk_buff *skb,
+ unsigned int size);
+struct sk_buff *idpf_rx_construct_skb(struct idpf_queue *rxq,
+ struct idpf_rx_buf *rx_buf,
+ unsigned int size);
+bool idpf_init_rx_buf_hw_alloc(struct idpf_queue *rxq, struct idpf_rx_buf *buf);
+void idpf_rx_buf_hw_update(struct idpf_queue *rxq, u32 val);
+void idpf_tx_buf_hw_update(struct idpf_queue *tx_q, u32 val,
+ bool xmit_more);
+unsigned int idpf_size_to_txd_count(unsigned int size);
+netdev_tx_t idpf_tx_drop_skb(struct idpf_queue *tx_q, struct sk_buff *skb);
+void idpf_tx_dma_map_error(struct idpf_queue *txq, struct sk_buff *skb,
+ struct idpf_tx_buf *first, u16 ring_idx);
+unsigned int idpf_tx_desc_count_required(struct idpf_queue *txq,
+ struct sk_buff *skb);
+bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs,
+ unsigned int count);
+int idpf_tx_maybe_stop_common(struct idpf_queue *tx_q, unsigned int size);
+void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue);
+netdev_tx_t idpf_tx_splitq_start(struct sk_buff *skb,
+ struct net_device *netdev);
+netdev_tx_t idpf_tx_singleq_start(struct sk_buff *skb,
+ struct net_device *netdev);
+bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rxq,
+ u16 cleaned_count);
+int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off);
+
+#endif /* !_IDPF_TXRX_H_ */
diff --git a/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c b/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c
new file mode 100644
index 000000000000..8ade4e3a9fe1
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c
@@ -0,0 +1,163 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2023 Intel Corporation */
+
+#include "idpf.h"
+#include "idpf_lan_vf_regs.h"
+
+#define IDPF_VF_ITR_IDX_SPACING 0x40
+
+/**
+ * idpf_vf_ctlq_reg_init - initialize default mailbox registers
+ * @cq: pointer to the array of create control queues
+ */
+static void idpf_vf_ctlq_reg_init(struct idpf_ctlq_create_info *cq)
+{
+ int i;
+
+ for (i = 0; i < IDPF_NUM_DFLT_MBX_Q; i++) {
+ struct idpf_ctlq_create_info *ccq = cq + i;
+
+ switch (ccq->type) {
+ case IDPF_CTLQ_TYPE_MAILBOX_TX:
+ /* set head and tail registers in our local struct */
+ ccq->reg.head = VF_ATQH;
+ ccq->reg.tail = VF_ATQT;
+ ccq->reg.len = VF_ATQLEN;
+ ccq->reg.bah = VF_ATQBAH;
+ ccq->reg.bal = VF_ATQBAL;
+ ccq->reg.len_mask = VF_ATQLEN_ATQLEN_M;
+ ccq->reg.len_ena_mask = VF_ATQLEN_ATQENABLE_M;
+ ccq->reg.head_mask = VF_ATQH_ATQH_M;
+ break;
+ case IDPF_CTLQ_TYPE_MAILBOX_RX:
+ /* set head and tail registers in our local struct */
+ ccq->reg.head = VF_ARQH;
+ ccq->reg.tail = VF_ARQT;
+ ccq->reg.len = VF_ARQLEN;
+ ccq->reg.bah = VF_ARQBAH;
+ ccq->reg.bal = VF_ARQBAL;
+ ccq->reg.len_mask = VF_ARQLEN_ARQLEN_M;
+ ccq->reg.len_ena_mask = VF_ARQLEN_ARQENABLE_M;
+ ccq->reg.head_mask = VF_ARQH_ARQH_M;
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+/**
+ * idpf_vf_mb_intr_reg_init - Initialize the mailbox register
+ * @adapter: adapter structure
+ */
+static void idpf_vf_mb_intr_reg_init(struct idpf_adapter *adapter)
+{
+ struct idpf_intr_reg *intr = &adapter->mb_vector.intr_reg;
+ u32 dyn_ctl = le32_to_cpu(adapter->caps.mailbox_dyn_ctl);
+
+ intr->dyn_ctl = idpf_get_reg_addr(adapter, dyn_ctl);
+ intr->dyn_ctl_intena_m = VF_INT_DYN_CTL0_INTENA_M;
+ intr->dyn_ctl_itridx_m = VF_INT_DYN_CTL0_ITR_INDX_M;
+ intr->icr_ena = idpf_get_reg_addr(adapter, VF_INT_ICR0_ENA1);
+ intr->icr_ena_ctlq_m = VF_INT_ICR0_ENA1_ADMINQ_M;
+}
+
+/**
+ * idpf_vf_intr_reg_init - Initialize interrupt registers
+ * @vport: virtual port structure
+ */
+static int idpf_vf_intr_reg_init(struct idpf_vport *vport)
+{
+ struct idpf_adapter *adapter = vport->adapter;
+ int num_vecs = vport->num_q_vectors;
+ struct idpf_vec_regs *reg_vals;
+ int num_regs, i, err = 0;
+ u32 rx_itr, tx_itr;
+ u16 total_vecs;
+
+ total_vecs = idpf_get_reserved_vecs(vport->adapter);
+ reg_vals = kcalloc(total_vecs, sizeof(struct idpf_vec_regs),
+ GFP_KERNEL);
+ if (!reg_vals)
+ return -ENOMEM;
+
+ num_regs = idpf_get_reg_intr_vecs(vport, reg_vals);
+ if (num_regs < num_vecs) {
+ err = -EINVAL;
+ goto free_reg_vals;
+ }
+
+ for (i = 0; i < num_vecs; i++) {
+ struct idpf_q_vector *q_vector = &vport->q_vectors[i];
+ u16 vec_id = vport->q_vector_idxs[i] - IDPF_MBX_Q_VEC;
+ struct idpf_intr_reg *intr = &q_vector->intr_reg;
+ u32 spacing;
+
+ intr->dyn_ctl = idpf_get_reg_addr(adapter,
+ reg_vals[vec_id].dyn_ctl_reg);
+ intr->dyn_ctl_intena_m = VF_INT_DYN_CTLN_INTENA_M;
+ intr->dyn_ctl_itridx_s = VF_INT_DYN_CTLN_ITR_INDX_S;
+
+ spacing = IDPF_ITR_IDX_SPACING(reg_vals[vec_id].itrn_index_spacing,
+ IDPF_VF_ITR_IDX_SPACING);
+ rx_itr = VF_INT_ITRN_ADDR(VIRTCHNL2_ITR_IDX_0,
+ reg_vals[vec_id].itrn_reg,
+ spacing);
+ tx_itr = VF_INT_ITRN_ADDR(VIRTCHNL2_ITR_IDX_1,
+ reg_vals[vec_id].itrn_reg,
+ spacing);
+ intr->rx_itr = idpf_get_reg_addr(adapter, rx_itr);
+ intr->tx_itr = idpf_get_reg_addr(adapter, tx_itr);
+ }
+
+free_reg_vals:
+ kfree(reg_vals);
+
+ return err;
+}
+
+/**
+ * idpf_vf_reset_reg_init - Initialize reset registers
+ * @adapter: Driver specific private structure
+ */
+static void idpf_vf_reset_reg_init(struct idpf_adapter *adapter)
+{
+ adapter->reset_reg.rstat = idpf_get_reg_addr(adapter, VFGEN_RSTAT);
+ adapter->reset_reg.rstat_m = VFGEN_RSTAT_VFR_STATE_M;
+}
+
+/**
+ * idpf_vf_trigger_reset - trigger reset
+ * @adapter: Driver specific private structure
+ * @trig_cause: Reason to trigger a reset
+ */
+static void idpf_vf_trigger_reset(struct idpf_adapter *adapter,
+ enum idpf_flags trig_cause)
+{
+ /* Do not send VIRTCHNL2_OP_RESET_VF message on driver unload */
+ if (trig_cause == IDPF_HR_FUNC_RESET &&
+ !test_bit(IDPF_REMOVE_IN_PROG, adapter->flags))
+ idpf_send_mb_msg(adapter, VIRTCHNL2_OP_RESET_VF, 0, NULL);
+}
+
+/**
+ * idpf_vf_reg_ops_init - Initialize register API function pointers
+ * @adapter: Driver specific private structure
+ */
+static void idpf_vf_reg_ops_init(struct idpf_adapter *adapter)
+{
+ adapter->dev_ops.reg_ops.ctlq_reg_init = idpf_vf_ctlq_reg_init;
+ adapter->dev_ops.reg_ops.intr_reg_init = idpf_vf_intr_reg_init;
+ adapter->dev_ops.reg_ops.mb_intr_reg_init = idpf_vf_mb_intr_reg_init;
+ adapter->dev_ops.reg_ops.reset_reg_init = idpf_vf_reset_reg_init;
+ adapter->dev_ops.reg_ops.trigger_reset = idpf_vf_trigger_reset;
+}
+
+/**
+ * idpf_vf_dev_ops_init - Initialize device API function pointers
+ * @adapter: Driver specific private structure
+ */
+void idpf_vf_dev_ops_init(struct idpf_adapter *adapter)
+{
+ idpf_vf_reg_ops_init(adapter);
+}
diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
new file mode 100644
index 000000000000..9bc85b2f1709
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
@@ -0,0 +1,3791 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2023 Intel Corporation */
+
+#include "idpf.h"
+
+/**
+ * idpf_recv_event_msg - Receive virtchnl event message
+ * @vport: virtual port structure
+ * @ctlq_msg: message to copy from
+ *
+ * Receive virtchnl event message
+ */
+static void idpf_recv_event_msg(struct idpf_vport *vport,
+ struct idpf_ctlq_msg *ctlq_msg)
+{
+ struct idpf_netdev_priv *np = netdev_priv(vport->netdev);
+ struct virtchnl2_event *v2e;
+ bool link_status;
+ u32 event;
+
+ v2e = (struct virtchnl2_event *)ctlq_msg->ctx.indirect.payload->va;
+ event = le32_to_cpu(v2e->event);
+
+ switch (event) {
+ case VIRTCHNL2_EVENT_LINK_CHANGE:
+ vport->link_speed_mbps = le32_to_cpu(v2e->link_speed);
+ link_status = v2e->link_status;
+
+ if (vport->link_up == link_status)
+ break;
+
+ vport->link_up = link_status;
+ if (np->state == __IDPF_VPORT_UP) {
+ if (vport->link_up) {
+ netif_carrier_on(vport->netdev);
+ netif_tx_start_all_queues(vport->netdev);
+ } else {
+ netif_tx_stop_all_queues(vport->netdev);
+ netif_carrier_off(vport->netdev);
+ }
+ }
+ break;
+ default:
+ dev_err(&vport->adapter->pdev->dev,
+ "Unknown event %d from PF\n", event);
+ break;
+ }
+}
+
+/**
+ * idpf_mb_clean - Reclaim the send mailbox queue entries
+ * @adapter: Driver specific private structure
+ *
+ * Reclaim the send mailbox queue entries to be used to send further messages
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int idpf_mb_clean(struct idpf_adapter *adapter)
+{
+ u16 i, num_q_msg = IDPF_DFLT_MBX_Q_LEN;
+ struct idpf_ctlq_msg **q_msg;
+ struct idpf_dma_mem *dma_mem;
+ int err;
+
+ q_msg = kcalloc(num_q_msg, sizeof(struct idpf_ctlq_msg *), GFP_ATOMIC);
+ if (!q_msg)
+ return -ENOMEM;
+
+ err = idpf_ctlq_clean_sq(adapter->hw.asq, &num_q_msg, q_msg);
+ if (err)
+ goto err_kfree;
+
+ for (i = 0; i < num_q_msg; i++) {
+ if (!q_msg[i])
+ continue;
+ dma_mem = q_msg[i]->ctx.indirect.payload;
+ if (dma_mem)
+ dma_free_coherent(&adapter->pdev->dev, dma_mem->size,
+ dma_mem->va, dma_mem->pa);
+ kfree(q_msg[i]);
+ kfree(dma_mem);
+ }
+
+err_kfree:
+ kfree(q_msg);
+
+ return err;
+}
+
+/**
+ * idpf_send_mb_msg - Send message over mailbox
+ * @adapter: Driver specific private structure
+ * @op: virtchnl opcode
+ * @msg_size: size of the payload
+ * @msg: pointer to buffer holding the payload
+ *
+ * Will prepare the control queue message and initiates the send api
+ *
+ * Returns 0 on success, negative on failure
+ */
+int idpf_send_mb_msg(struct idpf_adapter *adapter, u32 op,
+ u16 msg_size, u8 *msg)
+{
+ struct idpf_ctlq_msg *ctlq_msg;
+ struct idpf_dma_mem *dma_mem;
+ int err;
+
+ /* If we are here and a reset is detected nothing much can be
+ * done. This thread should silently abort and expected to
+ * be corrected with a new run either by user or driver
+ * flows after reset
+ */
+ if (idpf_is_reset_detected(adapter))
+ return 0;
+
+ err = idpf_mb_clean(adapter);
+ if (err)
+ return err;
+
+ ctlq_msg = kzalloc(sizeof(*ctlq_msg), GFP_ATOMIC);
+ if (!ctlq_msg)
+ return -ENOMEM;
+
+ dma_mem = kzalloc(sizeof(*dma_mem), GFP_ATOMIC);
+ if (!dma_mem) {
+ err = -ENOMEM;
+ goto dma_mem_error;
+ }
+
+ ctlq_msg->opcode = idpf_mbq_opc_send_msg_to_cp;
+ ctlq_msg->func_id = 0;
+ ctlq_msg->data_len = msg_size;
+ ctlq_msg->cookie.mbx.chnl_opcode = op;
+ ctlq_msg->cookie.mbx.chnl_retval = 0;
+ dma_mem->size = IDPF_CTLQ_MAX_BUF_LEN;
+ dma_mem->va = dma_alloc_coherent(&adapter->pdev->dev, dma_mem->size,
+ &dma_mem->pa, GFP_ATOMIC);
+ if (!dma_mem->va) {
+ err = -ENOMEM;
+ goto dma_alloc_error;
+ }
+ memcpy(dma_mem->va, msg, msg_size);
+ ctlq_msg->ctx.indirect.payload = dma_mem;
+
+ err = idpf_ctlq_send(&adapter->hw, adapter->hw.asq, 1, ctlq_msg);
+ if (err)
+ goto send_error;
+
+ return 0;
+
+send_error:
+ dma_free_coherent(&adapter->pdev->dev, dma_mem->size, dma_mem->va,
+ dma_mem->pa);
+dma_alloc_error:
+ kfree(dma_mem);
+dma_mem_error:
+ kfree(ctlq_msg);
+
+ return err;
+}
+
+/**
+ * idpf_find_vport - Find vport pointer from control queue message
+ * @adapter: driver specific private structure
+ * @vport: address of vport pointer to copy the vport from adapters vport list
+ * @ctlq_msg: control queue message
+ *
+ * Return 0 on success, error value on failure. Also this function does check
+ * for the opcodes which expect to receive payload and return error value if
+ * it is not the case.
+ */
+static int idpf_find_vport(struct idpf_adapter *adapter,
+ struct idpf_vport **vport,
+ struct idpf_ctlq_msg *ctlq_msg)
+{
+ bool no_op = false, vid_found = false;
+ int i, err = 0;
+ char *vc_msg;
+ u32 v_id;
+
+ vc_msg = kcalloc(IDPF_CTLQ_MAX_BUF_LEN, sizeof(char), GFP_KERNEL);
+ if (!vc_msg)
+ return -ENOMEM;
+
+ if (ctlq_msg->data_len) {
+ size_t payload_size = ctlq_msg->ctx.indirect.payload->size;
+
+ if (!payload_size) {
+ dev_err(&adapter->pdev->dev, "Failed to receive payload buffer\n");
+ kfree(vc_msg);
+
+ return -EINVAL;
+ }
+
+ memcpy(vc_msg, ctlq_msg->ctx.indirect.payload->va,
+ min_t(size_t, payload_size, IDPF_CTLQ_MAX_BUF_LEN));
+ }
+
+ switch (ctlq_msg->cookie.mbx.chnl_opcode) {
+ case VIRTCHNL2_OP_VERSION:
+ case VIRTCHNL2_OP_GET_CAPS:
+ case VIRTCHNL2_OP_CREATE_VPORT:
+ case VIRTCHNL2_OP_SET_SRIOV_VFS:
+ case VIRTCHNL2_OP_ALLOC_VECTORS:
+ case VIRTCHNL2_OP_DEALLOC_VECTORS:
+ case VIRTCHNL2_OP_GET_PTYPE_INFO:
+ goto free_vc_msg;
+ case VIRTCHNL2_OP_ENABLE_VPORT:
+ case VIRTCHNL2_OP_DISABLE_VPORT:
+ case VIRTCHNL2_OP_DESTROY_VPORT:
+ v_id = le32_to_cpu(((struct virtchnl2_vport *)vc_msg)->vport_id);
+ break;
+ case VIRTCHNL2_OP_CONFIG_TX_QUEUES:
+ v_id = le32_to_cpu(((struct virtchnl2_config_tx_queues *)vc_msg)->vport_id);
+ break;
+ case VIRTCHNL2_OP_CONFIG_RX_QUEUES:
+ v_id = le32_to_cpu(((struct virtchnl2_config_rx_queues *)vc_msg)->vport_id);
+ break;
+ case VIRTCHNL2_OP_ENABLE_QUEUES:
+ case VIRTCHNL2_OP_DISABLE_QUEUES:
+ case VIRTCHNL2_OP_DEL_QUEUES:
+ v_id = le32_to_cpu(((struct virtchnl2_del_ena_dis_queues *)vc_msg)->vport_id);
+ break;
+ case VIRTCHNL2_OP_ADD_QUEUES:
+ v_id = le32_to_cpu(((struct virtchnl2_add_queues *)vc_msg)->vport_id);
+ break;
+ case VIRTCHNL2_OP_MAP_QUEUE_VECTOR:
+ case VIRTCHNL2_OP_UNMAP_QUEUE_VECTOR:
+ v_id = le32_to_cpu(((struct virtchnl2_queue_vector_maps *)vc_msg)->vport_id);
+ break;
+ case VIRTCHNL2_OP_GET_STATS:
+ v_id = le32_to_cpu(((struct virtchnl2_vport_stats *)vc_msg)->vport_id);
+ break;
+ case VIRTCHNL2_OP_GET_RSS_LUT:
+ case VIRTCHNL2_OP_SET_RSS_LUT:
+ v_id = le32_to_cpu(((struct virtchnl2_rss_lut *)vc_msg)->vport_id);
+ break;
+ case VIRTCHNL2_OP_GET_RSS_KEY:
+ case VIRTCHNL2_OP_SET_RSS_KEY:
+ v_id = le32_to_cpu(((struct virtchnl2_rss_key *)vc_msg)->vport_id);
+ break;
+ case VIRTCHNL2_OP_EVENT:
+ v_id = le32_to_cpu(((struct virtchnl2_event *)vc_msg)->vport_id);
+ break;
+ case VIRTCHNL2_OP_LOOPBACK:
+ v_id = le32_to_cpu(((struct virtchnl2_loopback *)vc_msg)->vport_id);
+ break;
+ case VIRTCHNL2_OP_CONFIG_PROMISCUOUS_MODE:
+ v_id = le32_to_cpu(((struct virtchnl2_promisc_info *)vc_msg)->vport_id);
+ break;
+ case VIRTCHNL2_OP_ADD_MAC_ADDR:
+ case VIRTCHNL2_OP_DEL_MAC_ADDR:
+ v_id = le32_to_cpu(((struct virtchnl2_mac_addr_list *)vc_msg)->vport_id);
+ break;
+ default:
+ no_op = true;
+ break;
+ }
+
+ if (no_op)
+ goto free_vc_msg;
+
+ for (i = 0; i < idpf_get_max_vports(adapter); i++) {
+ if (adapter->vport_ids[i] == v_id) {
+ vid_found = true;
+ break;
+ }
+ }
+
+ if (vid_found)
+ *vport = adapter->vports[i];
+ else
+ err = -EINVAL;
+
+free_vc_msg:
+ kfree(vc_msg);
+
+ return err;
+}
+
+/**
+ * idpf_copy_data_to_vc_buf - Copy the virtchnl response data into the buffer.
+ * @adapter: driver specific private structure
+ * @vport: virtual port structure
+ * @ctlq_msg: msg to copy from
+ * @err_enum: err bit to set on error
+ *
+ * Copies the payload from ctlq_msg into virtchnl buffer. Returns 0 on success,
+ * negative on failure.
+ */
+static int idpf_copy_data_to_vc_buf(struct idpf_adapter *adapter,
+ struct idpf_vport *vport,
+ struct idpf_ctlq_msg *ctlq_msg,
+ enum idpf_vport_vc_state err_enum)
+{
+ if (ctlq_msg->cookie.mbx.chnl_retval) {
+ if (vport)
+ set_bit(err_enum, vport->vc_state);
+ else
+ set_bit(err_enum, adapter->vc_state);
+
+ return -EINVAL;
+ }
+
+ if (vport)
+ memcpy(vport->vc_msg, ctlq_msg->ctx.indirect.payload->va,
+ min_t(int, ctlq_msg->ctx.indirect.payload->size,
+ IDPF_CTLQ_MAX_BUF_LEN));
+ else
+ memcpy(adapter->vc_msg, ctlq_msg->ctx.indirect.payload->va,
+ min_t(int, ctlq_msg->ctx.indirect.payload->size,
+ IDPF_CTLQ_MAX_BUF_LEN));
+
+ return 0;
+}
+
+/**
+ * idpf_recv_vchnl_op - helper function with common logic when handling the
+ * reception of VIRTCHNL OPs.
+ * @adapter: driver specific private structure
+ * @vport: virtual port structure
+ * @ctlq_msg: msg to copy from
+ * @state: state bit used on timeout check
+ * @err_state: err bit to set on error
+ */
+static void idpf_recv_vchnl_op(struct idpf_adapter *adapter,
+ struct idpf_vport *vport,
+ struct idpf_ctlq_msg *ctlq_msg,
+ enum idpf_vport_vc_state state,
+ enum idpf_vport_vc_state err_state)
+{
+ wait_queue_head_t *vchnl_wq;
+ int err;
+
+ if (vport)
+ vchnl_wq = &vport->vchnl_wq;
+ else
+ vchnl_wq = &adapter->vchnl_wq;
+
+ err = idpf_copy_data_to_vc_buf(adapter, vport, ctlq_msg, err_state);
+ if (wq_has_sleeper(vchnl_wq)) {
+ if (vport)
+ set_bit(state, vport->vc_state);
+ else
+ set_bit(state, adapter->vc_state);
+
+ wake_up(vchnl_wq);
+ } else {
+ if (!err) {
+ dev_warn(&adapter->pdev->dev, "opcode %d received without waiting thread\n",
+ ctlq_msg->cookie.mbx.chnl_opcode);
+ } else {
+ /* Clear the errors since there is no sleeper to pass
+ * them on
+ */
+ if (vport)
+ clear_bit(err_state, vport->vc_state);
+ else
+ clear_bit(err_state, adapter->vc_state);
+ }
+ }
+}
+
+/**
+ * idpf_recv_mb_msg - Receive message over mailbox
+ * @adapter: Driver specific private structure
+ * @op: virtchannel operation code
+ * @msg: Received message holding buffer
+ * @msg_size: message size
+ *
+ * Will receive control queue message and posts the receive buffer. Returns 0
+ * on success and negative on failure.
+ */
+int idpf_recv_mb_msg(struct idpf_adapter *adapter, u32 op,
+ void *msg, int msg_size)
+{
+ struct idpf_vport *vport = NULL;
+ struct idpf_ctlq_msg ctlq_msg;
+ struct idpf_dma_mem *dma_mem;
+ bool work_done = false;
+ int num_retry = 2000;
+ u16 num_q_msg;
+ int err;
+
+ while (1) {
+ struct idpf_vport_config *vport_config;
+ int payload_size = 0;
+
+ /* Try to get one message */
+ num_q_msg = 1;
+ dma_mem = NULL;
+ err = idpf_ctlq_recv(adapter->hw.arq, &num_q_msg, &ctlq_msg);
+ /* If no message then decide if we have to retry based on
+ * opcode
+ */
+ if (err || !num_q_msg) {
+ /* Increasing num_retry to consider the delayed
+ * responses because of large number of VF's mailbox
+ * messages. If the mailbox message is received from
+ * the other side, we come out of the sleep cycle
+ * immediately else we wait for more time.
+ */
+ if (!op || !num_retry--)
+ break;
+ if (test_bit(IDPF_REMOVE_IN_PROG, adapter->flags)) {
+ err = -EIO;
+ break;
+ }
+ msleep(20);
+ continue;
+ }
+
+ /* If we are here a message is received. Check if we are looking
+ * for a specific message based on opcode. If it is different
+ * ignore and post buffers
+ */
+ if (op && ctlq_msg.cookie.mbx.chnl_opcode != op)
+ goto post_buffs;
+
+ err = idpf_find_vport(adapter, &vport, &ctlq_msg);
+ if (err)
+ goto post_buffs;
+
+ if (ctlq_msg.data_len)
+ payload_size = ctlq_msg.ctx.indirect.payload->size;
+
+ /* All conditions are met. Either a message requested is
+ * received or we received a message to be processed
+ */
+ switch (ctlq_msg.cookie.mbx.chnl_opcode) {
+ case VIRTCHNL2_OP_VERSION:
+ case VIRTCHNL2_OP_GET_CAPS:
+ if (ctlq_msg.cookie.mbx.chnl_retval) {
+ dev_err(&adapter->pdev->dev, "Failure initializing, vc op: %u retval: %u\n",
+ ctlq_msg.cookie.mbx.chnl_opcode,
+ ctlq_msg.cookie.mbx.chnl_retval);
+ err = -EBADMSG;
+ } else if (msg) {
+ memcpy(msg, ctlq_msg.ctx.indirect.payload->va,
+ min_t(int, payload_size, msg_size));
+ }
+ work_done = true;
+ break;
+ case VIRTCHNL2_OP_CREATE_VPORT:
+ idpf_recv_vchnl_op(adapter, NULL, &ctlq_msg,
+ IDPF_VC_CREATE_VPORT,
+ IDPF_VC_CREATE_VPORT_ERR);
+ break;
+ case VIRTCHNL2_OP_ENABLE_VPORT:
+ idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
+ IDPF_VC_ENA_VPORT,
+ IDPF_VC_ENA_VPORT_ERR);
+ break;
+ case VIRTCHNL2_OP_DISABLE_VPORT:
+ idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
+ IDPF_VC_DIS_VPORT,
+ IDPF_VC_DIS_VPORT_ERR);
+ break;
+ case VIRTCHNL2_OP_DESTROY_VPORT:
+ idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
+ IDPF_VC_DESTROY_VPORT,
+ IDPF_VC_DESTROY_VPORT_ERR);
+ break;
+ case VIRTCHNL2_OP_CONFIG_TX_QUEUES:
+ idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
+ IDPF_VC_CONFIG_TXQ,
+ IDPF_VC_CONFIG_TXQ_ERR);
+ break;
+ case VIRTCHNL2_OP_CONFIG_RX_QUEUES:
+ idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
+ IDPF_VC_CONFIG_RXQ,
+ IDPF_VC_CONFIG_RXQ_ERR);
+ break;
+ case VIRTCHNL2_OP_ENABLE_QUEUES:
+ idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
+ IDPF_VC_ENA_QUEUES,
+ IDPF_VC_ENA_QUEUES_ERR);
+ break;
+ case VIRTCHNL2_OP_DISABLE_QUEUES:
+ idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
+ IDPF_VC_DIS_QUEUES,
+ IDPF_VC_DIS_QUEUES_ERR);
+ break;
+ case VIRTCHNL2_OP_ADD_QUEUES:
+ idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
+ IDPF_VC_ADD_QUEUES,
+ IDPF_VC_ADD_QUEUES_ERR);
+ break;
+ case VIRTCHNL2_OP_DEL_QUEUES:
+ idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
+ IDPF_VC_DEL_QUEUES,
+ IDPF_VC_DEL_QUEUES_ERR);
+ break;
+ case VIRTCHNL2_OP_MAP_QUEUE_VECTOR:
+ idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
+ IDPF_VC_MAP_IRQ,
+ IDPF_VC_MAP_IRQ_ERR);
+ break;
+ case VIRTCHNL2_OP_UNMAP_QUEUE_VECTOR:
+ idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
+ IDPF_VC_UNMAP_IRQ,
+ IDPF_VC_UNMAP_IRQ_ERR);
+ break;
+ case VIRTCHNL2_OP_GET_STATS:
+ idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
+ IDPF_VC_GET_STATS,
+ IDPF_VC_GET_STATS_ERR);
+ break;
+ case VIRTCHNL2_OP_GET_RSS_LUT:
+ idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
+ IDPF_VC_GET_RSS_LUT,
+ IDPF_VC_GET_RSS_LUT_ERR);
+ break;
+ case VIRTCHNL2_OP_SET_RSS_LUT:
+ idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
+ IDPF_VC_SET_RSS_LUT,
+ IDPF_VC_SET_RSS_LUT_ERR);
+ break;
+ case VIRTCHNL2_OP_GET_RSS_KEY:
+ idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
+ IDPF_VC_GET_RSS_KEY,
+ IDPF_VC_GET_RSS_KEY_ERR);
+ break;
+ case VIRTCHNL2_OP_SET_RSS_KEY:
+ idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
+ IDPF_VC_SET_RSS_KEY,
+ IDPF_VC_SET_RSS_KEY_ERR);
+ break;
+ case VIRTCHNL2_OP_SET_SRIOV_VFS:
+ idpf_recv_vchnl_op(adapter, NULL, &ctlq_msg,
+ IDPF_VC_SET_SRIOV_VFS,
+ IDPF_VC_SET_SRIOV_VFS_ERR);
+ break;
+ case VIRTCHNL2_OP_ALLOC_VECTORS:
+ idpf_recv_vchnl_op(adapter, NULL, &ctlq_msg,
+ IDPF_VC_ALLOC_VECTORS,
+ IDPF_VC_ALLOC_VECTORS_ERR);
+ break;
+ case VIRTCHNL2_OP_DEALLOC_VECTORS:
+ idpf_recv_vchnl_op(adapter, NULL, &ctlq_msg,
+ IDPF_VC_DEALLOC_VECTORS,
+ IDPF_VC_DEALLOC_VECTORS_ERR);
+ break;
+ case VIRTCHNL2_OP_GET_PTYPE_INFO:
+ idpf_recv_vchnl_op(adapter, NULL, &ctlq_msg,
+ IDPF_VC_GET_PTYPE_INFO,
+ IDPF_VC_GET_PTYPE_INFO_ERR);
+ break;
+ case VIRTCHNL2_OP_LOOPBACK:
+ idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
+ IDPF_VC_LOOPBACK_STATE,
+ IDPF_VC_LOOPBACK_STATE_ERR);
+ break;
+ case VIRTCHNL2_OP_CONFIG_PROMISCUOUS_MODE:
+ /* This message can only be sent asynchronously. As
+ * such we'll have lost the context in which it was
+ * called and thus can only really report if it looks
+ * like an error occurred. Don't bother setting ERR bit
+ * or waking chnl_wq since no work queue will be waiting
+ * to read the message.
+ */
+ if (ctlq_msg.cookie.mbx.chnl_retval) {
+ dev_err(&adapter->pdev->dev, "Failed to set promiscuous mode: %d\n",
+ ctlq_msg.cookie.mbx.chnl_retval);
+ }
+ break;
+ case VIRTCHNL2_OP_ADD_MAC_ADDR:
+ vport_config = adapter->vport_config[vport->idx];
+ if (test_and_clear_bit(IDPF_VPORT_ADD_MAC_REQ,
+ vport_config->flags)) {
+ /* Message was sent asynchronously. We don't
+ * normally print errors here, instead
+ * prefer to handle errors in the function
+ * calling wait_for_event. However, if
+ * asynchronous, the context in which the
+ * message was sent is lost. We can't really do
+ * anything about at it this point, but we
+ * should at a minimum indicate that it looks
+ * like something went wrong. Also don't bother
+ * setting ERR bit or waking vchnl_wq since no
+ * one will be waiting to read the async
+ * message.
+ */
+ if (ctlq_msg.cookie.mbx.chnl_retval)
+ dev_err(&adapter->pdev->dev, "Failed to add MAC address: %d\n",
+ ctlq_msg.cookie.mbx.chnl_retval);
+ break;
+ }
+ idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
+ IDPF_VC_ADD_MAC_ADDR,
+ IDPF_VC_ADD_MAC_ADDR_ERR);
+ break;
+ case VIRTCHNL2_OP_DEL_MAC_ADDR:
+ vport_config = adapter->vport_config[vport->idx];
+ if (test_and_clear_bit(IDPF_VPORT_DEL_MAC_REQ,
+ vport_config->flags)) {
+ /* Message was sent asynchronously like the
+ * VIRTCHNL2_OP_ADD_MAC_ADDR
+ */
+ if (ctlq_msg.cookie.mbx.chnl_retval)
+ dev_err(&adapter->pdev->dev, "Failed to delete MAC address: %d\n",
+ ctlq_msg.cookie.mbx.chnl_retval);
+ break;
+ }
+ idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
+ IDPF_VC_DEL_MAC_ADDR,
+ IDPF_VC_DEL_MAC_ADDR_ERR);
+ break;
+ case VIRTCHNL2_OP_EVENT:
+ idpf_recv_event_msg(vport, &ctlq_msg);
+ break;
+ default:
+ dev_warn(&adapter->pdev->dev,
+ "Unhandled virtchnl response %d\n",
+ ctlq_msg.cookie.mbx.chnl_opcode);
+ break;
+ }
+
+post_buffs:
+ if (ctlq_msg.data_len)
+ dma_mem = ctlq_msg.ctx.indirect.payload;
+ else
+ num_q_msg = 0;
+
+ err = idpf_ctlq_post_rx_buffs(&adapter->hw, adapter->hw.arq,
+ &num_q_msg, &dma_mem);
+ /* If post failed clear the only buffer we supplied */
+ if (err && dma_mem)
+ dma_free_coherent(&adapter->pdev->dev, dma_mem->size,
+ dma_mem->va, dma_mem->pa);
+
+ /* Applies only if we are looking for a specific opcode */
+ if (work_done)
+ break;
+ }
+
+ return err;
+}
+
+/**
+ * __idpf_wait_for_event - wrapper function for wait on virtchannel response
+ * @adapter: Driver private data structure
+ * @vport: virtual port structure
+ * @state: check on state upon timeout
+ * @err_check: check if this specific error bit is set
+ * @timeout: Max time to wait
+ *
+ * Checks if state is set upon expiry of timeout. Returns 0 on success,
+ * negative on failure.
+ */
+static int __idpf_wait_for_event(struct idpf_adapter *adapter,
+ struct idpf_vport *vport,
+ enum idpf_vport_vc_state state,
+ enum idpf_vport_vc_state err_check,
+ int timeout)
+{
+ int time_to_wait, num_waits;
+ wait_queue_head_t *vchnl_wq;
+ unsigned long *vc_state;
+
+ time_to_wait = ((timeout <= IDPF_MAX_WAIT) ? timeout : IDPF_MAX_WAIT);
+ num_waits = ((timeout <= IDPF_MAX_WAIT) ? 1 : timeout / IDPF_MAX_WAIT);
+
+ if (vport) {
+ vchnl_wq = &vport->vchnl_wq;
+ vc_state = vport->vc_state;
+ } else {
+ vchnl_wq = &adapter->vchnl_wq;
+ vc_state = adapter->vc_state;
+ }
+
+ while (num_waits) {
+ int event;
+
+ /* If we are here and a reset is detected do not wait but
+ * return. Reset timing is out of drivers control. So
+ * while we are cleaning resources as part of reset if the
+ * underlying HW mailbox is gone, wait on mailbox messages
+ * is not meaningful
+ */
+ if (idpf_is_reset_detected(adapter))
+ return 0;
+
+ event = wait_event_timeout(*vchnl_wq,
+ test_and_clear_bit(state, vc_state),
+ msecs_to_jiffies(time_to_wait));
+ if (event) {
+ if (test_and_clear_bit(err_check, vc_state)) {
+ dev_err(&adapter->pdev->dev, "VC response error %s\n",
+ idpf_vport_vc_state_str[err_check]);
+
+ return -EINVAL;
+ }
+
+ return 0;
+ }
+ num_waits--;
+ }
+
+ /* Timeout occurred */
+ dev_err(&adapter->pdev->dev, "VC timeout, state = %s\n",
+ idpf_vport_vc_state_str[state]);
+
+ return -ETIMEDOUT;
+}
+
+/**
+ * idpf_min_wait_for_event - wait for virtchannel response
+ * @adapter: Driver private data structure
+ * @vport: virtual port structure
+ * @state: check on state upon timeout
+ * @err_check: check if this specific error bit is set
+ *
+ * Returns 0 on success, negative on failure.
+ */
+static int idpf_min_wait_for_event(struct idpf_adapter *adapter,
+ struct idpf_vport *vport,
+ enum idpf_vport_vc_state state,
+ enum idpf_vport_vc_state err_check)
+{
+ return __idpf_wait_for_event(adapter, vport, state, err_check,
+ IDPF_WAIT_FOR_EVENT_TIMEO_MIN);
+}
+
+/**
+ * idpf_wait_for_event - wait for virtchannel response
+ * @adapter: Driver private data structure
+ * @vport: virtual port structure
+ * @state: check on state upon timeout after 500ms
+ * @err_check: check if this specific error bit is set
+ *
+ * Returns 0 on success, negative on failure.
+ */
+static int idpf_wait_for_event(struct idpf_adapter *adapter,
+ struct idpf_vport *vport,
+ enum idpf_vport_vc_state state,
+ enum idpf_vport_vc_state err_check)
+{
+ /* Increasing the timeout in __IDPF_INIT_SW flow to consider large
+ * number of VF's mailbox message responses. When a message is received
+ * on mailbox, this thread is woken up by the idpf_recv_mb_msg before
+ * the timeout expires. Only in the error case i.e. if no message is
+ * received on mailbox, we wait for the complete timeout which is
+ * less likely to happen.
+ */
+ return __idpf_wait_for_event(adapter, vport, state, err_check,
+ IDPF_WAIT_FOR_EVENT_TIMEO);
+}
+
+/**
+ * idpf_wait_for_marker_event - wait for software marker response
+ * @vport: virtual port data structure
+ *
+ * Returns 0 success, negative on failure.
+ **/
+static int idpf_wait_for_marker_event(struct idpf_vport *vport)
+{
+ int event;
+ int i;
+
+ for (i = 0; i < vport->num_txq; i++)
+ set_bit(__IDPF_Q_SW_MARKER, vport->txqs[i]->flags);
+
+ event = wait_event_timeout(vport->sw_marker_wq,
+ test_and_clear_bit(IDPF_VPORT_SW_MARKER,
+ vport->flags),
+ msecs_to_jiffies(500));
+
+ for (i = 0; i < vport->num_txq; i++)
+ clear_bit(__IDPF_Q_POLL_MODE, vport->txqs[i]->flags);
+
+ if (event)
+ return 0;
+
+ dev_warn(&vport->adapter->pdev->dev, "Failed to receive marker packets\n");
+
+ return -ETIMEDOUT;
+}
+
+/**
+ * idpf_send_ver_msg - send virtchnl version message
+ * @adapter: Driver specific private structure
+ *
+ * Send virtchnl version message. Returns 0 on success, negative on failure.
+ */
+static int idpf_send_ver_msg(struct idpf_adapter *adapter)
+{
+ struct virtchnl2_version_info vvi;
+
+ if (adapter->virt_ver_maj) {
+ vvi.major = cpu_to_le32(adapter->virt_ver_maj);
+ vvi.minor = cpu_to_le32(adapter->virt_ver_min);
+ } else {
+ vvi.major = cpu_to_le32(IDPF_VIRTCHNL_VERSION_MAJOR);
+ vvi.minor = cpu_to_le32(IDPF_VIRTCHNL_VERSION_MINOR);
+ }
+
+ return idpf_send_mb_msg(adapter, VIRTCHNL2_OP_VERSION, sizeof(vvi),
+ (u8 *)&vvi);
+}
+
+/**
+ * idpf_recv_ver_msg - Receive virtchnl version message
+ * @adapter: Driver specific private structure
+ *
+ * Receive virtchnl version message. Returns 0 on success, -EAGAIN if we need
+ * to send version message again, otherwise negative on failure.
+ */
+static int idpf_recv_ver_msg(struct idpf_adapter *adapter)
+{
+ struct virtchnl2_version_info vvi;
+ u32 major, minor;
+ int err;
+
+ err = idpf_recv_mb_msg(adapter, VIRTCHNL2_OP_VERSION, &vvi,
+ sizeof(vvi));
+ if (err)
+ return err;
+
+ major = le32_to_cpu(vvi.major);
+ minor = le32_to_cpu(vvi.minor);
+
+ if (major > IDPF_VIRTCHNL_VERSION_MAJOR) {
+ dev_warn(&adapter->pdev->dev,
+ "Virtchnl major version (%d) greater than supported\n",
+ major);
+
+ return -EINVAL;
+ }
+
+ if (major == IDPF_VIRTCHNL_VERSION_MAJOR &&
+ minor > IDPF_VIRTCHNL_VERSION_MINOR)
+ dev_warn(&adapter->pdev->dev,
+ "Virtchnl minor version (%d) didn't match\n", minor);
+
+ /* If we have a mismatch, resend version to update receiver on what
+ * version we will use.
+ */
+ if (!adapter->virt_ver_maj &&
+ major != IDPF_VIRTCHNL_VERSION_MAJOR &&
+ minor != IDPF_VIRTCHNL_VERSION_MINOR)
+ err = -EAGAIN;
+
+ adapter->virt_ver_maj = major;
+ adapter->virt_ver_min = minor;
+
+ return err;
+}
+
+/**
+ * idpf_send_get_caps_msg - Send virtchnl get capabilities message
+ * @adapter: Driver specific private structure
+ *
+ * Send virtchl get capabilities message. Returns 0 on success, negative on
+ * failure.
+ */
+static int idpf_send_get_caps_msg(struct idpf_adapter *adapter)
+{
+ struct virtchnl2_get_capabilities caps = { };
+
+ caps.csum_caps =
+ cpu_to_le32(VIRTCHNL2_CAP_TX_CSUM_L3_IPV4 |
+ VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_TCP |
+ VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_UDP |
+ VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_SCTP |
+ VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_TCP |
+ VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_UDP |
+ VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_SCTP |
+ VIRTCHNL2_CAP_RX_CSUM_L3_IPV4 |
+ VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_TCP |
+ VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_UDP |
+ VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_SCTP |
+ VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_TCP |
+ VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_UDP |
+ VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_SCTP |
+ VIRTCHNL2_CAP_TX_CSUM_L3_SINGLE_TUNNEL |
+ VIRTCHNL2_CAP_RX_CSUM_L3_SINGLE_TUNNEL |
+ VIRTCHNL2_CAP_TX_CSUM_L4_SINGLE_TUNNEL |
+ VIRTCHNL2_CAP_RX_CSUM_L4_SINGLE_TUNNEL |
+ VIRTCHNL2_CAP_RX_CSUM_GENERIC);
+
+ caps.seg_caps =
+ cpu_to_le32(VIRTCHNL2_CAP_SEG_IPV4_TCP |
+ VIRTCHNL2_CAP_SEG_IPV4_UDP |
+ VIRTCHNL2_CAP_SEG_IPV4_SCTP |
+ VIRTCHNL2_CAP_SEG_IPV6_TCP |
+ VIRTCHNL2_CAP_SEG_IPV6_UDP |
+ VIRTCHNL2_CAP_SEG_IPV6_SCTP |
+ VIRTCHNL2_CAP_SEG_TX_SINGLE_TUNNEL);
+
+ caps.rss_caps =
+ cpu_to_le64(VIRTCHNL2_CAP_RSS_IPV4_TCP |
+ VIRTCHNL2_CAP_RSS_IPV4_UDP |
+ VIRTCHNL2_CAP_RSS_IPV4_SCTP |
+ VIRTCHNL2_CAP_RSS_IPV4_OTHER |
+ VIRTCHNL2_CAP_RSS_IPV6_TCP |
+ VIRTCHNL2_CAP_RSS_IPV6_UDP |
+ VIRTCHNL2_CAP_RSS_IPV6_SCTP |
+ VIRTCHNL2_CAP_RSS_IPV6_OTHER);
+
+ caps.hsplit_caps =
+ cpu_to_le32(VIRTCHNL2_CAP_RX_HSPLIT_AT_L4V4 |
+ VIRTCHNL2_CAP_RX_HSPLIT_AT_L4V6);
+
+ caps.rsc_caps =
+ cpu_to_le32(VIRTCHNL2_CAP_RSC_IPV4_TCP |
+ VIRTCHNL2_CAP_RSC_IPV6_TCP);
+
+ caps.other_caps =
+ cpu_to_le64(VIRTCHNL2_CAP_SRIOV |
+ VIRTCHNL2_CAP_MACFILTER |
+ VIRTCHNL2_CAP_SPLITQ_QSCHED |
+ VIRTCHNL2_CAP_PROMISC |
+ VIRTCHNL2_CAP_LOOPBACK);
+
+ return idpf_send_mb_msg(adapter, VIRTCHNL2_OP_GET_CAPS, sizeof(caps),
+ (u8 *)&caps);
+}
+
+/**
+ * idpf_recv_get_caps_msg - Receive virtchnl get capabilities message
+ * @adapter: Driver specific private structure
+ *
+ * Receive virtchnl get capabilities message. Returns 0 on success, negative on
+ * failure.
+ */
+static int idpf_recv_get_caps_msg(struct idpf_adapter *adapter)
+{
+ return idpf_recv_mb_msg(adapter, VIRTCHNL2_OP_GET_CAPS, &adapter->caps,
+ sizeof(struct virtchnl2_get_capabilities));
+}
+
+/**
+ * idpf_vport_alloc_max_qs - Allocate max queues for a vport
+ * @adapter: Driver specific private structure
+ * @max_q: vport max queue structure
+ */
+int idpf_vport_alloc_max_qs(struct idpf_adapter *adapter,
+ struct idpf_vport_max_q *max_q)
+{
+ struct idpf_avail_queue_info *avail_queues = &adapter->avail_queues;
+ struct virtchnl2_get_capabilities *caps = &adapter->caps;
+ u16 default_vports = idpf_get_default_vports(adapter);
+ int max_rx_q, max_tx_q;
+
+ mutex_lock(&adapter->queue_lock);
+
+ max_rx_q = le16_to_cpu(caps->max_rx_q) / default_vports;
+ max_tx_q = le16_to_cpu(caps->max_tx_q) / default_vports;
+ if (adapter->num_alloc_vports < default_vports) {
+ max_q->max_rxq = min_t(u16, max_rx_q, IDPF_MAX_Q);
+ max_q->max_txq = min_t(u16, max_tx_q, IDPF_MAX_Q);
+ } else {
+ max_q->max_rxq = IDPF_MIN_Q;
+ max_q->max_txq = IDPF_MIN_Q;
+ }
+ max_q->max_bufq = max_q->max_rxq * IDPF_MAX_BUFQS_PER_RXQ_GRP;
+ max_q->max_complq = max_q->max_txq;
+
+ if (avail_queues->avail_rxq < max_q->max_rxq ||
+ avail_queues->avail_txq < max_q->max_txq ||
+ avail_queues->avail_bufq < max_q->max_bufq ||
+ avail_queues->avail_complq < max_q->max_complq) {
+ mutex_unlock(&adapter->queue_lock);
+
+ return -EINVAL;
+ }
+
+ avail_queues->avail_rxq -= max_q->max_rxq;
+ avail_queues->avail_txq -= max_q->max_txq;
+ avail_queues->avail_bufq -= max_q->max_bufq;
+ avail_queues->avail_complq -= max_q->max_complq;
+
+ mutex_unlock(&adapter->queue_lock);
+
+ return 0;
+}
+
+/**
+ * idpf_vport_dealloc_max_qs - Deallocate max queues of a vport
+ * @adapter: Driver specific private structure
+ * @max_q: vport max queue structure
+ */
+void idpf_vport_dealloc_max_qs(struct idpf_adapter *adapter,
+ struct idpf_vport_max_q *max_q)
+{
+ struct idpf_avail_queue_info *avail_queues;
+
+ mutex_lock(&adapter->queue_lock);
+ avail_queues = &adapter->avail_queues;
+
+ avail_queues->avail_rxq += max_q->max_rxq;
+ avail_queues->avail_txq += max_q->max_txq;
+ avail_queues->avail_bufq += max_q->max_bufq;
+ avail_queues->avail_complq += max_q->max_complq;
+
+ mutex_unlock(&adapter->queue_lock);
+}
+
+/**
+ * idpf_init_avail_queues - Initialize available queues on the device
+ * @adapter: Driver specific private structure
+ */
+static void idpf_init_avail_queues(struct idpf_adapter *adapter)
+{
+ struct idpf_avail_queue_info *avail_queues = &adapter->avail_queues;
+ struct virtchnl2_get_capabilities *caps = &adapter->caps;
+
+ avail_queues->avail_rxq = le16_to_cpu(caps->max_rx_q);
+ avail_queues->avail_txq = le16_to_cpu(caps->max_tx_q);
+ avail_queues->avail_bufq = le16_to_cpu(caps->max_rx_bufq);
+ avail_queues->avail_complq = le16_to_cpu(caps->max_tx_complq);
+}
+
+/**
+ * idpf_get_reg_intr_vecs - Get vector queue register offset
+ * @vport: virtual port structure
+ * @reg_vals: Register offsets to store in
+ *
+ * Returns number of registers that got populated
+ */
+int idpf_get_reg_intr_vecs(struct idpf_vport *vport,
+ struct idpf_vec_regs *reg_vals)
+{
+ struct virtchnl2_vector_chunks *chunks;
+ struct idpf_vec_regs reg_val;
+ u16 num_vchunks, num_vec;
+ int num_regs = 0, i, j;
+
+ chunks = &vport->adapter->req_vec_chunks->vchunks;
+ num_vchunks = le16_to_cpu(chunks->num_vchunks);
+
+ for (j = 0; j < num_vchunks; j++) {
+ struct virtchnl2_vector_chunk *chunk;
+ u32 dynctl_reg_spacing;
+ u32 itrn_reg_spacing;
+
+ chunk = &chunks->vchunks[j];
+ num_vec = le16_to_cpu(chunk->num_vectors);
+ reg_val.dyn_ctl_reg = le32_to_cpu(chunk->dynctl_reg_start);
+ reg_val.itrn_reg = le32_to_cpu(chunk->itrn_reg_start);
+ reg_val.itrn_index_spacing = le32_to_cpu(chunk->itrn_index_spacing);
+
+ dynctl_reg_spacing = le32_to_cpu(chunk->dynctl_reg_spacing);
+ itrn_reg_spacing = le32_to_cpu(chunk->itrn_reg_spacing);
+
+ for (i = 0; i < num_vec; i++) {
+ reg_vals[num_regs].dyn_ctl_reg = reg_val.dyn_ctl_reg;
+ reg_vals[num_regs].itrn_reg = reg_val.itrn_reg;
+ reg_vals[num_regs].itrn_index_spacing =
+ reg_val.itrn_index_spacing;
+
+ reg_val.dyn_ctl_reg += dynctl_reg_spacing;
+ reg_val.itrn_reg += itrn_reg_spacing;
+ num_regs++;
+ }
+ }
+
+ return num_regs;
+}
+
+/**
+ * idpf_vport_get_q_reg - Get the queue registers for the vport
+ * @reg_vals: register values needing to be set
+ * @num_regs: amount we expect to fill
+ * @q_type: queue model
+ * @chunks: queue regs received over mailbox
+ *
+ * This function parses the queue register offsets from the queue register
+ * chunk information, with a specific queue type and stores it into the array
+ * passed as an argument. It returns the actual number of queue registers that
+ * are filled.
+ */
+static int idpf_vport_get_q_reg(u32 *reg_vals, int num_regs, u32 q_type,
+ struct virtchnl2_queue_reg_chunks *chunks)
+{
+ u16 num_chunks = le16_to_cpu(chunks->num_chunks);
+ int reg_filled = 0, i;
+ u32 reg_val;
+
+ while (num_chunks--) {
+ struct virtchnl2_queue_reg_chunk *chunk;
+ u16 num_q;
+
+ chunk = &chunks->chunks[num_chunks];
+ if (le32_to_cpu(chunk->type) != q_type)
+ continue;
+
+ num_q = le32_to_cpu(chunk->num_queues);
+ reg_val = le64_to_cpu(chunk->qtail_reg_start);
+ for (i = 0; i < num_q && reg_filled < num_regs ; i++) {
+ reg_vals[reg_filled++] = reg_val;
+ reg_val += le32_to_cpu(chunk->qtail_reg_spacing);
+ }
+ }
+
+ return reg_filled;
+}
+
+/**
+ * __idpf_queue_reg_init - initialize queue registers
+ * @vport: virtual port structure
+ * @reg_vals: registers we are initializing
+ * @num_regs: how many registers there are in total
+ * @q_type: queue model
+ *
+ * Return number of queues that are initialized
+ */
+static int __idpf_queue_reg_init(struct idpf_vport *vport, u32 *reg_vals,
+ int num_regs, u32 q_type)
+{
+ struct idpf_adapter *adapter = vport->adapter;
+ struct idpf_queue *q;
+ int i, j, k = 0;
+
+ switch (q_type) {
+ case VIRTCHNL2_QUEUE_TYPE_TX:
+ for (i = 0; i < vport->num_txq_grp; i++) {
+ struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i];
+
+ for (j = 0; j < tx_qgrp->num_txq && k < num_regs; j++, k++)
+ tx_qgrp->txqs[j]->tail =
+ idpf_get_reg_addr(adapter, reg_vals[k]);
+ }
+ break;
+ case VIRTCHNL2_QUEUE_TYPE_RX:
+ for (i = 0; i < vport->num_rxq_grp; i++) {
+ struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i];
+ u16 num_rxq = rx_qgrp->singleq.num_rxq;
+
+ for (j = 0; j < num_rxq && k < num_regs; j++, k++) {
+ q = rx_qgrp->singleq.rxqs[j];
+ q->tail = idpf_get_reg_addr(adapter,
+ reg_vals[k]);
+ }
+ }
+ break;
+ case VIRTCHNL2_QUEUE_TYPE_RX_BUFFER:
+ for (i = 0; i < vport->num_rxq_grp; i++) {
+ struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i];
+ u8 num_bufqs = vport->num_bufqs_per_qgrp;
+
+ for (j = 0; j < num_bufqs && k < num_regs; j++, k++) {
+ q = &rx_qgrp->splitq.bufq_sets[j].bufq;
+ q->tail = idpf_get_reg_addr(adapter,
+ reg_vals[k]);
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ return k;
+}
+
+/**
+ * idpf_queue_reg_init - initialize queue registers
+ * @vport: virtual port structure
+ *
+ * Return 0 on success, negative on failure
+ */
+int idpf_queue_reg_init(struct idpf_vport *vport)
+{
+ struct virtchnl2_create_vport *vport_params;
+ struct virtchnl2_queue_reg_chunks *chunks;
+ struct idpf_vport_config *vport_config;
+ u16 vport_idx = vport->idx;
+ int num_regs, ret = 0;
+ u32 *reg_vals;
+
+ /* We may never deal with more than 256 same type of queues */
+ reg_vals = kzalloc(sizeof(void *) * IDPF_LARGE_MAX_Q, GFP_KERNEL);
+ if (!reg_vals)
+ return -ENOMEM;
+
+ vport_config = vport->adapter->vport_config[vport_idx];
+ if (vport_config->req_qs_chunks) {
+ struct virtchnl2_add_queues *vc_aq =
+ (struct virtchnl2_add_queues *)vport_config->req_qs_chunks;
+ chunks = &vc_aq->chunks;
+ } else {
+ vport_params = vport->adapter->vport_params_recvd[vport_idx];
+ chunks = &vport_params->chunks;
+ }
+
+ /* Initialize Tx queue tail register address */
+ num_regs = idpf_vport_get_q_reg(reg_vals, IDPF_LARGE_MAX_Q,
+ VIRTCHNL2_QUEUE_TYPE_TX,
+ chunks);
+ if (num_regs < vport->num_txq) {
+ ret = -EINVAL;
+ goto free_reg_vals;
+ }
+
+ num_regs = __idpf_queue_reg_init(vport, reg_vals, num_regs,
+ VIRTCHNL2_QUEUE_TYPE_TX);
+ if (num_regs < vport->num_txq) {
+ ret = -EINVAL;
+ goto free_reg_vals;
+ }
+
+ /* Initialize Rx/buffer queue tail register address based on Rx queue
+ * model
+ */
+ if (idpf_is_queue_model_split(vport->rxq_model)) {
+ num_regs = idpf_vport_get_q_reg(reg_vals, IDPF_LARGE_MAX_Q,
+ VIRTCHNL2_QUEUE_TYPE_RX_BUFFER,
+ chunks);
+ if (num_regs < vport->num_bufq) {
+ ret = -EINVAL;
+ goto free_reg_vals;
+ }
+
+ num_regs = __idpf_queue_reg_init(vport, reg_vals, num_regs,
+ VIRTCHNL2_QUEUE_TYPE_RX_BUFFER);
+ if (num_regs < vport->num_bufq) {
+ ret = -EINVAL;
+ goto free_reg_vals;
+ }
+ } else {
+ num_regs = idpf_vport_get_q_reg(reg_vals, IDPF_LARGE_MAX_Q,
+ VIRTCHNL2_QUEUE_TYPE_RX,
+ chunks);
+ if (num_regs < vport->num_rxq) {
+ ret = -EINVAL;
+ goto free_reg_vals;
+ }
+
+ num_regs = __idpf_queue_reg_init(vport, reg_vals, num_regs,
+ VIRTCHNL2_QUEUE_TYPE_RX);
+ if (num_regs < vport->num_rxq) {
+ ret = -EINVAL;
+ goto free_reg_vals;
+ }
+ }
+
+free_reg_vals:
+ kfree(reg_vals);
+
+ return ret;
+}
+
+/**
+ * idpf_send_create_vport_msg - Send virtchnl create vport message
+ * @adapter: Driver specific private structure
+ * @max_q: vport max queue info
+ *
+ * send virtchnl creae vport message
+ *
+ * Returns 0 on success, negative on failure
+ */
+int idpf_send_create_vport_msg(struct idpf_adapter *adapter,
+ struct idpf_vport_max_q *max_q)
+{
+ struct virtchnl2_create_vport *vport_msg;
+ u16 idx = adapter->next_vport;
+ int err, buf_size;
+
+ buf_size = sizeof(struct virtchnl2_create_vport);
+ if (!adapter->vport_params_reqd[idx]) {
+ adapter->vport_params_reqd[idx] = kzalloc(buf_size,
+ GFP_KERNEL);
+ if (!adapter->vport_params_reqd[idx])
+ return -ENOMEM;
+ }
+
+ vport_msg = adapter->vport_params_reqd[idx];
+ vport_msg->vport_type = cpu_to_le16(VIRTCHNL2_VPORT_TYPE_DEFAULT);
+ vport_msg->vport_index = cpu_to_le16(idx);
+
+ if (adapter->req_tx_splitq)
+ vport_msg->txq_model = cpu_to_le16(VIRTCHNL2_QUEUE_MODEL_SPLIT);
+ else
+ vport_msg->txq_model = cpu_to_le16(VIRTCHNL2_QUEUE_MODEL_SINGLE);
+
+ if (adapter->req_rx_splitq)
+ vport_msg->rxq_model = cpu_to_le16(VIRTCHNL2_QUEUE_MODEL_SPLIT);
+ else
+ vport_msg->rxq_model = cpu_to_le16(VIRTCHNL2_QUEUE_MODEL_SINGLE);
+
+ err = idpf_vport_calc_total_qs(adapter, idx, vport_msg, max_q);
+ if (err) {
+ dev_err(&adapter->pdev->dev, "Enough queues are not available");
+
+ return err;
+ }
+
+ mutex_lock(&adapter->vc_buf_lock);
+
+ err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_CREATE_VPORT, buf_size,
+ (u8 *)vport_msg);
+ if (err)
+ goto rel_lock;
+
+ err = idpf_wait_for_event(adapter, NULL, IDPF_VC_CREATE_VPORT,
+ IDPF_VC_CREATE_VPORT_ERR);
+ if (err) {
+ dev_err(&adapter->pdev->dev, "Failed to receive create vport message");
+
+ goto rel_lock;
+ }
+
+ if (!adapter->vport_params_recvd[idx]) {
+ adapter->vport_params_recvd[idx] = kzalloc(IDPF_CTLQ_MAX_BUF_LEN,
+ GFP_KERNEL);
+ if (!adapter->vport_params_recvd[idx]) {
+ err = -ENOMEM;
+ goto rel_lock;
+ }
+ }
+
+ vport_msg = adapter->vport_params_recvd[idx];
+ memcpy(vport_msg, adapter->vc_msg, IDPF_CTLQ_MAX_BUF_LEN);
+
+rel_lock:
+ mutex_unlock(&adapter->vc_buf_lock);
+
+ return err;
+}
+
+/**
+ * idpf_check_supported_desc_ids - Verify we have required descriptor support
+ * @vport: virtual port structure
+ *
+ * Return 0 on success, error on failure
+ */
+int idpf_check_supported_desc_ids(struct idpf_vport *vport)
+{
+ struct idpf_adapter *adapter = vport->adapter;
+ struct virtchnl2_create_vport *vport_msg;
+ u64 rx_desc_ids, tx_desc_ids;
+
+ vport_msg = adapter->vport_params_recvd[vport->idx];
+
+ rx_desc_ids = le64_to_cpu(vport_msg->rx_desc_ids);
+ tx_desc_ids = le64_to_cpu(vport_msg->tx_desc_ids);
+
+ if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) {
+ if (!(rx_desc_ids & VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M)) {
+ dev_info(&adapter->pdev->dev, "Minimum RX descriptor support not provided, using the default\n");
+ vport_msg->rx_desc_ids = cpu_to_le64(VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M);
+ }
+ } else {
+ if (!(rx_desc_ids & VIRTCHNL2_RXDID_2_FLEX_SQ_NIC_M))
+ vport->base_rxd = true;
+ }
+
+ if (vport->txq_model != VIRTCHNL2_QUEUE_MODEL_SPLIT)
+ return 0;
+
+ if ((tx_desc_ids & MIN_SUPPORT_TXDID) != MIN_SUPPORT_TXDID) {
+ dev_info(&adapter->pdev->dev, "Minimum TX descriptor support not provided, using the default\n");
+ vport_msg->tx_desc_ids = cpu_to_le64(MIN_SUPPORT_TXDID);
+ }
+
+ return 0;
+}
+
+/**
+ * idpf_send_destroy_vport_msg - Send virtchnl destroy vport message
+ * @vport: virtual port data structure
+ *
+ * Send virtchnl destroy vport message. Returns 0 on success, negative on
+ * failure.
+ */
+int idpf_send_destroy_vport_msg(struct idpf_vport *vport)
+{
+ struct idpf_adapter *adapter = vport->adapter;
+ struct virtchnl2_vport v_id;
+ int err;
+
+ v_id.vport_id = cpu_to_le32(vport->vport_id);
+
+ mutex_lock(&vport->vc_buf_lock);
+
+ err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_DESTROY_VPORT,
+ sizeof(v_id), (u8 *)&v_id);
+ if (err)
+ goto rel_lock;
+
+ err = idpf_min_wait_for_event(adapter, vport, IDPF_VC_DESTROY_VPORT,
+ IDPF_VC_DESTROY_VPORT_ERR);
+
+rel_lock:
+ mutex_unlock(&vport->vc_buf_lock);
+
+ return err;
+}
+
+/**
+ * idpf_send_enable_vport_msg - Send virtchnl enable vport message
+ * @vport: virtual port data structure
+ *
+ * Send enable vport virtchnl message. Returns 0 on success, negative on
+ * failure.
+ */
+int idpf_send_enable_vport_msg(struct idpf_vport *vport)
+{
+ struct idpf_adapter *adapter = vport->adapter;
+ struct virtchnl2_vport v_id;
+ int err;
+
+ v_id.vport_id = cpu_to_le32(vport->vport_id);
+
+ mutex_lock(&vport->vc_buf_lock);
+
+ err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_ENABLE_VPORT,
+ sizeof(v_id), (u8 *)&v_id);
+ if (err)
+ goto rel_lock;
+
+ err = idpf_wait_for_event(adapter, vport, IDPF_VC_ENA_VPORT,
+ IDPF_VC_ENA_VPORT_ERR);
+
+rel_lock:
+ mutex_unlock(&vport->vc_buf_lock);
+
+ return err;
+}
+
+/**
+ * idpf_send_disable_vport_msg - Send virtchnl disable vport message
+ * @vport: virtual port data structure
+ *
+ * Send disable vport virtchnl message. Returns 0 on success, negative on
+ * failure.
+ */
+int idpf_send_disable_vport_msg(struct idpf_vport *vport)
+{
+ struct idpf_adapter *adapter = vport->adapter;
+ struct virtchnl2_vport v_id;
+ int err;
+
+ v_id.vport_id = cpu_to_le32(vport->vport_id);
+
+ mutex_lock(&vport->vc_buf_lock);
+
+ err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_DISABLE_VPORT,
+ sizeof(v_id), (u8 *)&v_id);
+ if (err)
+ goto rel_lock;
+
+ err = idpf_min_wait_for_event(adapter, vport, IDPF_VC_DIS_VPORT,
+ IDPF_VC_DIS_VPORT_ERR);
+
+rel_lock:
+ mutex_unlock(&vport->vc_buf_lock);
+
+ return err;
+}
+
+/**
+ * idpf_send_config_tx_queues_msg - Send virtchnl config tx queues message
+ * @vport: virtual port data structure
+ *
+ * Send config tx queues virtchnl message. Returns 0 on success, negative on
+ * failure.
+ */
+static int idpf_send_config_tx_queues_msg(struct idpf_vport *vport)
+{
+ struct virtchnl2_config_tx_queues *ctq;
+ u32 config_sz, chunk_sz, buf_sz;
+ int totqs, num_msgs, num_chunks;
+ struct virtchnl2_txq_info *qi;
+ int err = 0, i, k = 0;
+
+ totqs = vport->num_txq + vport->num_complq;
+ qi = kcalloc(totqs, sizeof(struct virtchnl2_txq_info), GFP_KERNEL);
+ if (!qi)
+ return -ENOMEM;
+
+ /* Populate the queue info buffer with all queue context info */
+ for (i = 0; i < vport->num_txq_grp; i++) {
+ struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i];
+ int j;
+
+ for (j = 0; j < tx_qgrp->num_txq; j++, k++) {
+ qi[k].queue_id =
+ cpu_to_le32(tx_qgrp->txqs[j]->q_id);
+ qi[k].model =
+ cpu_to_le16(vport->txq_model);
+ qi[k].type =
+ cpu_to_le32(tx_qgrp->txqs[j]->q_type);
+ qi[k].ring_len =
+ cpu_to_le16(tx_qgrp->txqs[j]->desc_count);
+ qi[k].dma_ring_addr =
+ cpu_to_le64(tx_qgrp->txqs[j]->dma);
+ if (idpf_is_queue_model_split(vport->txq_model)) {
+ struct idpf_queue *q = tx_qgrp->txqs[j];
+
+ qi[k].tx_compl_queue_id =
+ cpu_to_le16(tx_qgrp->complq->q_id);
+ qi[k].relative_queue_id = cpu_to_le16(j);
+
+ if (test_bit(__IDPF_Q_FLOW_SCH_EN, q->flags))
+ qi[k].sched_mode =
+ cpu_to_le16(VIRTCHNL2_TXQ_SCHED_MODE_FLOW);
+ else
+ qi[k].sched_mode =
+ cpu_to_le16(VIRTCHNL2_TXQ_SCHED_MODE_QUEUE);
+ } else {
+ qi[k].sched_mode =
+ cpu_to_le16(VIRTCHNL2_TXQ_SCHED_MODE_QUEUE);
+ }
+ }
+
+ if (!idpf_is_queue_model_split(vport->txq_model))
+ continue;
+
+ qi[k].queue_id = cpu_to_le32(tx_qgrp->complq->q_id);
+ qi[k].model = cpu_to_le16(vport->txq_model);
+ qi[k].type = cpu_to_le32(tx_qgrp->complq->q_type);
+ qi[k].ring_len = cpu_to_le16(tx_qgrp->complq->desc_count);
+ qi[k].dma_ring_addr = cpu_to_le64(tx_qgrp->complq->dma);
+
+ k++;
+ }
+
+ /* Make sure accounting agrees */
+ if (k != totqs) {
+ err = -EINVAL;
+ goto error;
+ }
+
+ /* Chunk up the queue contexts into multiple messages to avoid
+ * sending a control queue message buffer that is too large
+ */
+ config_sz = sizeof(struct virtchnl2_config_tx_queues);
+ chunk_sz = sizeof(struct virtchnl2_txq_info);
+
+ num_chunks = min_t(u32, IDPF_NUM_CHUNKS_PER_MSG(config_sz, chunk_sz),
+ totqs);
+ num_msgs = DIV_ROUND_UP(totqs, num_chunks);
+
+ buf_sz = struct_size(ctq, qinfo, num_chunks);
+ ctq = kzalloc(buf_sz, GFP_KERNEL);
+ if (!ctq) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ mutex_lock(&vport->vc_buf_lock);
+
+ for (i = 0, k = 0; i < num_msgs; i++) {
+ memset(ctq, 0, buf_sz);
+ ctq->vport_id = cpu_to_le32(vport->vport_id);
+ ctq->num_qinfo = cpu_to_le16(num_chunks);
+ memcpy(ctq->qinfo, &qi[k], chunk_sz * num_chunks);
+
+ err = idpf_send_mb_msg(vport->adapter,
+ VIRTCHNL2_OP_CONFIG_TX_QUEUES,
+ buf_sz, (u8 *)ctq);
+ if (err)
+ goto mbx_error;
+
+ err = idpf_wait_for_event(vport->adapter, vport,
+ IDPF_VC_CONFIG_TXQ,
+ IDPF_VC_CONFIG_TXQ_ERR);
+ if (err)
+ goto mbx_error;
+
+ k += num_chunks;
+ totqs -= num_chunks;
+ num_chunks = min(num_chunks, totqs);
+ /* Recalculate buffer size */
+ buf_sz = struct_size(ctq, qinfo, num_chunks);
+ }
+
+mbx_error:
+ mutex_unlock(&vport->vc_buf_lock);
+ kfree(ctq);
+error:
+ kfree(qi);
+
+ return err;
+}
+
+/**
+ * idpf_send_config_rx_queues_msg - Send virtchnl config rx queues message
+ * @vport: virtual port data structure
+ *
+ * Send config rx queues virtchnl message. Returns 0 on success, negative on
+ * failure.
+ */
+static int idpf_send_config_rx_queues_msg(struct idpf_vport *vport)
+{
+ struct virtchnl2_config_rx_queues *crq;
+ u32 config_sz, chunk_sz, buf_sz;
+ int totqs, num_msgs, num_chunks;
+ struct virtchnl2_rxq_info *qi;
+ int err = 0, i, k = 0;
+
+ totqs = vport->num_rxq + vport->num_bufq;
+ qi = kcalloc(totqs, sizeof(struct virtchnl2_rxq_info), GFP_KERNEL);
+ if (!qi)
+ return -ENOMEM;
+
+ /* Populate the queue info buffer with all queue context info */
+ for (i = 0; i < vport->num_rxq_grp; i++) {
+ struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i];
+ u16 num_rxq;
+ int j;
+
+ if (!idpf_is_queue_model_split(vport->rxq_model))
+ goto setup_rxqs;
+
+ for (j = 0; j < vport->num_bufqs_per_qgrp; j++, k++) {
+ struct idpf_queue *bufq =
+ &rx_qgrp->splitq.bufq_sets[j].bufq;
+
+ qi[k].queue_id = cpu_to_le32(bufq->q_id);
+ qi[k].model = cpu_to_le16(vport->rxq_model);
+ qi[k].type = cpu_to_le32(bufq->q_type);
+ qi[k].desc_ids = cpu_to_le64(VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M);
+ qi[k].ring_len = cpu_to_le16(bufq->desc_count);
+ qi[k].dma_ring_addr = cpu_to_le64(bufq->dma);
+ qi[k].data_buffer_size = cpu_to_le32(bufq->rx_buf_size);
+ qi[k].buffer_notif_stride = bufq->rx_buf_stride;
+ qi[k].rx_buffer_low_watermark =
+ cpu_to_le16(bufq->rx_buffer_low_watermark);
+ if (idpf_is_feature_ena(vport, NETIF_F_GRO_HW))
+ qi[k].qflags |= cpu_to_le16(VIRTCHNL2_RXQ_RSC);
+ }
+
+setup_rxqs:
+ if (idpf_is_queue_model_split(vport->rxq_model))
+ num_rxq = rx_qgrp->splitq.num_rxq_sets;
+ else
+ num_rxq = rx_qgrp->singleq.num_rxq;
+
+ for (j = 0; j < num_rxq; j++, k++) {
+ struct idpf_queue *rxq;
+
+ if (!idpf_is_queue_model_split(vport->rxq_model)) {
+ rxq = rx_qgrp->singleq.rxqs[j];
+ goto common_qi_fields;
+ }
+ rxq = &rx_qgrp->splitq.rxq_sets[j]->rxq;
+ qi[k].rx_bufq1_id =
+ cpu_to_le16(rxq->rxq_grp->splitq.bufq_sets[0].bufq.q_id);
+ if (vport->num_bufqs_per_qgrp > IDPF_SINGLE_BUFQ_PER_RXQ_GRP) {
+ qi[k].bufq2_ena = IDPF_BUFQ2_ENA;
+ qi[k].rx_bufq2_id =
+ cpu_to_le16(rxq->rxq_grp->splitq.bufq_sets[1].bufq.q_id);
+ }
+ qi[k].rx_buffer_low_watermark =
+ cpu_to_le16(rxq->rx_buffer_low_watermark);
+ if (idpf_is_feature_ena(vport, NETIF_F_GRO_HW))
+ qi[k].qflags |= cpu_to_le16(VIRTCHNL2_RXQ_RSC);
+
+common_qi_fields:
+ if (rxq->rx_hsplit_en) {
+ qi[k].qflags |=
+ cpu_to_le16(VIRTCHNL2_RXQ_HDR_SPLIT);
+ qi[k].hdr_buffer_size =
+ cpu_to_le16(rxq->rx_hbuf_size);
+ }
+ qi[k].queue_id = cpu_to_le32(rxq->q_id);
+ qi[k].model = cpu_to_le16(vport->rxq_model);
+ qi[k].type = cpu_to_le32(rxq->q_type);
+ qi[k].ring_len = cpu_to_le16(rxq->desc_count);
+ qi[k].dma_ring_addr = cpu_to_le64(rxq->dma);
+ qi[k].max_pkt_size = cpu_to_le32(rxq->rx_max_pkt_size);
+ qi[k].data_buffer_size = cpu_to_le32(rxq->rx_buf_size);
+ qi[k].qflags |=
+ cpu_to_le16(VIRTCHNL2_RX_DESC_SIZE_32BYTE);
+ qi[k].desc_ids = cpu_to_le64(rxq->rxdids);
+ }
+ }
+
+ /* Make sure accounting agrees */
+ if (k != totqs) {
+ err = -EINVAL;
+ goto error;
+ }
+
+ /* Chunk up the queue contexts into multiple messages to avoid
+ * sending a control queue message buffer that is too large
+ */
+ config_sz = sizeof(struct virtchnl2_config_rx_queues);
+ chunk_sz = sizeof(struct virtchnl2_rxq_info);
+
+ num_chunks = min_t(u32, IDPF_NUM_CHUNKS_PER_MSG(config_sz, chunk_sz),
+ totqs);
+ num_msgs = DIV_ROUND_UP(totqs, num_chunks);
+
+ buf_sz = struct_size(crq, qinfo, num_chunks);
+ crq = kzalloc(buf_sz, GFP_KERNEL);
+ if (!crq) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ mutex_lock(&vport->vc_buf_lock);
+
+ for (i = 0, k = 0; i < num_msgs; i++) {
+ memset(crq, 0, buf_sz);
+ crq->vport_id = cpu_to_le32(vport->vport_id);
+ crq->num_qinfo = cpu_to_le16(num_chunks);
+ memcpy(crq->qinfo, &qi[k], chunk_sz * num_chunks);
+
+ err = idpf_send_mb_msg(vport->adapter,
+ VIRTCHNL2_OP_CONFIG_RX_QUEUES,
+ buf_sz, (u8 *)crq);
+ if (err)
+ goto mbx_error;
+
+ err = idpf_wait_for_event(vport->adapter, vport,
+ IDPF_VC_CONFIG_RXQ,
+ IDPF_VC_CONFIG_RXQ_ERR);
+ if (err)
+ goto mbx_error;
+
+ k += num_chunks;
+ totqs -= num_chunks;
+ num_chunks = min(num_chunks, totqs);
+ /* Recalculate buffer size */
+ buf_sz = struct_size(crq, qinfo, num_chunks);
+ }
+
+mbx_error:
+ mutex_unlock(&vport->vc_buf_lock);
+ kfree(crq);
+error:
+ kfree(qi);
+
+ return err;
+}
+
+/**
+ * idpf_send_ena_dis_queues_msg - Send virtchnl enable or disable
+ * queues message
+ * @vport: virtual port data structure
+ * @vc_op: virtchnl op code to send
+ *
+ * Send enable or disable queues virtchnl message. Returns 0 on success,
+ * negative on failure.
+ */
+static int idpf_send_ena_dis_queues_msg(struct idpf_vport *vport, u32 vc_op)
+{
+ u32 num_msgs, num_chunks, num_txq, num_rxq, num_q;
+ struct idpf_adapter *adapter = vport->adapter;
+ struct virtchnl2_del_ena_dis_queues *eq;
+ struct virtchnl2_queue_chunks *qcs;
+ struct virtchnl2_queue_chunk *qc;
+ u32 config_sz, chunk_sz, buf_sz;
+ int i, j, k = 0, err = 0;
+
+ /* validate virtchnl op */
+ switch (vc_op) {
+ case VIRTCHNL2_OP_ENABLE_QUEUES:
+ case VIRTCHNL2_OP_DISABLE_QUEUES:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ num_txq = vport->num_txq + vport->num_complq;
+ num_rxq = vport->num_rxq + vport->num_bufq;
+ num_q = num_txq + num_rxq;
+ buf_sz = sizeof(struct virtchnl2_queue_chunk) * num_q;
+ qc = kzalloc(buf_sz, GFP_KERNEL);
+ if (!qc)
+ return -ENOMEM;
+
+ for (i = 0; i < vport->num_txq_grp; i++) {
+ struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i];
+
+ for (j = 0; j < tx_qgrp->num_txq; j++, k++) {
+ qc[k].type = cpu_to_le32(tx_qgrp->txqs[j]->q_type);
+ qc[k].start_queue_id = cpu_to_le32(tx_qgrp->txqs[j]->q_id);
+ qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK);
+ }
+ }
+ if (vport->num_txq != k) {
+ err = -EINVAL;
+ goto error;
+ }
+
+ if (!idpf_is_queue_model_split(vport->txq_model))
+ goto setup_rx;
+
+ for (i = 0; i < vport->num_txq_grp; i++, k++) {
+ struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i];
+
+ qc[k].type = cpu_to_le32(tx_qgrp->complq->q_type);
+ qc[k].start_queue_id = cpu_to_le32(tx_qgrp->complq->q_id);
+ qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK);
+ }
+ if (vport->num_complq != (k - vport->num_txq)) {
+ err = -EINVAL;
+ goto error;
+ }
+
+setup_rx:
+ for (i = 0; i < vport->num_rxq_grp; i++) {
+ struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i];
+
+ if (idpf_is_queue_model_split(vport->rxq_model))
+ num_rxq = rx_qgrp->splitq.num_rxq_sets;
+ else
+ num_rxq = rx_qgrp->singleq.num_rxq;
+
+ for (j = 0; j < num_rxq; j++, k++) {
+ if (idpf_is_queue_model_split(vport->rxq_model)) {
+ qc[k].start_queue_id =
+ cpu_to_le32(rx_qgrp->splitq.rxq_sets[j]->rxq.q_id);
+ qc[k].type =
+ cpu_to_le32(rx_qgrp->splitq.rxq_sets[j]->rxq.q_type);
+ } else {
+ qc[k].start_queue_id =
+ cpu_to_le32(rx_qgrp->singleq.rxqs[j]->q_id);
+ qc[k].type =
+ cpu_to_le32(rx_qgrp->singleq.rxqs[j]->q_type);
+ }
+ qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK);
+ }
+ }
+ if (vport->num_rxq != k - (vport->num_txq + vport->num_complq)) {
+ err = -EINVAL;
+ goto error;
+ }
+
+ if (!idpf_is_queue_model_split(vport->rxq_model))
+ goto send_msg;
+
+ for (i = 0; i < vport->num_rxq_grp; i++) {
+ struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i];
+
+ for (j = 0; j < vport->num_bufqs_per_qgrp; j++, k++) {
+ struct idpf_queue *q;
+
+ q = &rx_qgrp->splitq.bufq_sets[j].bufq;
+ qc[k].type = cpu_to_le32(q->q_type);
+ qc[k].start_queue_id = cpu_to_le32(q->q_id);
+ qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK);
+ }
+ }
+ if (vport->num_bufq != k - (vport->num_txq +
+ vport->num_complq +
+ vport->num_rxq)) {
+ err = -EINVAL;
+ goto error;
+ }
+
+send_msg:
+ /* Chunk up the queue info into multiple messages */
+ config_sz = sizeof(struct virtchnl2_del_ena_dis_queues);
+ chunk_sz = sizeof(struct virtchnl2_queue_chunk);
+
+ num_chunks = min_t(u32, IDPF_NUM_CHUNKS_PER_MSG(config_sz, chunk_sz),
+ num_q);
+ num_msgs = DIV_ROUND_UP(num_q, num_chunks);
+
+ buf_sz = struct_size(eq, chunks.chunks, num_chunks);
+ eq = kzalloc(buf_sz, GFP_KERNEL);
+ if (!eq) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ mutex_lock(&vport->vc_buf_lock);
+
+ for (i = 0, k = 0; i < num_msgs; i++) {
+ memset(eq, 0, buf_sz);
+ eq->vport_id = cpu_to_le32(vport->vport_id);
+ eq->chunks.num_chunks = cpu_to_le16(num_chunks);
+ qcs = &eq->chunks;
+ memcpy(qcs->chunks, &qc[k], chunk_sz * num_chunks);
+
+ err = idpf_send_mb_msg(adapter, vc_op, buf_sz, (u8 *)eq);
+ if (err)
+ goto mbx_error;
+
+ if (vc_op == VIRTCHNL2_OP_ENABLE_QUEUES)
+ err = idpf_wait_for_event(adapter, vport,
+ IDPF_VC_ENA_QUEUES,
+ IDPF_VC_ENA_QUEUES_ERR);
+ else
+ err = idpf_min_wait_for_event(adapter, vport,
+ IDPF_VC_DIS_QUEUES,
+ IDPF_VC_DIS_QUEUES_ERR);
+ if (err)
+ goto mbx_error;
+
+ k += num_chunks;
+ num_q -= num_chunks;
+ num_chunks = min(num_chunks, num_q);
+ /* Recalculate buffer size */
+ buf_sz = struct_size(eq, chunks.chunks, num_chunks);
+ }
+
+mbx_error:
+ mutex_unlock(&vport->vc_buf_lock);
+ kfree(eq);
+error:
+ kfree(qc);
+
+ return err;
+}
+
+/**
+ * idpf_send_map_unmap_queue_vector_msg - Send virtchnl map or unmap queue
+ * vector message
+ * @vport: virtual port data structure
+ * @map: true for map and false for unmap
+ *
+ * Send map or unmap queue vector virtchnl message. Returns 0 on success,
+ * negative on failure.
+ */
+int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map)
+{
+ struct idpf_adapter *adapter = vport->adapter;
+ struct virtchnl2_queue_vector_maps *vqvm;
+ struct virtchnl2_queue_vector *vqv;
+ u32 config_sz, chunk_sz, buf_sz;
+ u32 num_msgs, num_chunks, num_q;
+ int i, j, k = 0, err = 0;
+
+ num_q = vport->num_txq + vport->num_rxq;
+
+ buf_sz = sizeof(struct virtchnl2_queue_vector) * num_q;
+ vqv = kzalloc(buf_sz, GFP_KERNEL);
+ if (!vqv)
+ return -ENOMEM;
+
+ for (i = 0; i < vport->num_txq_grp; i++) {
+ struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i];
+
+ for (j = 0; j < tx_qgrp->num_txq; j++, k++) {
+ vqv[k].queue_type = cpu_to_le32(tx_qgrp->txqs[j]->q_type);
+ vqv[k].queue_id = cpu_to_le32(tx_qgrp->txqs[j]->q_id);
+
+ if (idpf_is_queue_model_split(vport->txq_model)) {
+ vqv[k].vector_id =
+ cpu_to_le16(tx_qgrp->complq->q_vector->v_idx);
+ vqv[k].itr_idx =
+ cpu_to_le32(tx_qgrp->complq->q_vector->tx_itr_idx);
+ } else {
+ vqv[k].vector_id =
+ cpu_to_le16(tx_qgrp->txqs[j]->q_vector->v_idx);
+ vqv[k].itr_idx =
+ cpu_to_le32(tx_qgrp->txqs[j]->q_vector->tx_itr_idx);
+ }
+ }
+ }
+
+ if (vport->num_txq != k) {
+ err = -EINVAL;
+ goto error;
+ }
+
+ for (i = 0; i < vport->num_rxq_grp; i++) {
+ struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i];
+ u16 num_rxq;
+
+ if (idpf_is_queue_model_split(vport->rxq_model))
+ num_rxq = rx_qgrp->splitq.num_rxq_sets;
+ else
+ num_rxq = rx_qgrp->singleq.num_rxq;
+
+ for (j = 0; j < num_rxq; j++, k++) {
+ struct idpf_queue *rxq;
+
+ if (idpf_is_queue_model_split(vport->rxq_model))
+ rxq = &rx_qgrp->splitq.rxq_sets[j]->rxq;
+ else
+ rxq = rx_qgrp->singleq.rxqs[j];
+
+ vqv[k].queue_type = cpu_to_le32(rxq->q_type);
+ vqv[k].queue_id = cpu_to_le32(rxq->q_id);
+ vqv[k].vector_id = cpu_to_le16(rxq->q_vector->v_idx);
+ vqv[k].itr_idx = cpu_to_le32(rxq->q_vector->rx_itr_idx);
+ }
+ }
+
+ if (idpf_is_queue_model_split(vport->txq_model)) {
+ if (vport->num_rxq != k - vport->num_complq) {
+ err = -EINVAL;
+ goto error;
+ }
+ } else {
+ if (vport->num_rxq != k - vport->num_txq) {
+ err = -EINVAL;
+ goto error;
+ }
+ }
+
+ /* Chunk up the vector info into multiple messages */
+ config_sz = sizeof(struct virtchnl2_queue_vector_maps);
+ chunk_sz = sizeof(struct virtchnl2_queue_vector);
+
+ num_chunks = min_t(u32, IDPF_NUM_CHUNKS_PER_MSG(config_sz, chunk_sz),
+ num_q);
+ num_msgs = DIV_ROUND_UP(num_q, num_chunks);
+
+ buf_sz = struct_size(vqvm, qv_maps, num_chunks);
+ vqvm = kzalloc(buf_sz, GFP_KERNEL);
+ if (!vqvm) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ mutex_lock(&vport->vc_buf_lock);
+
+ for (i = 0, k = 0; i < num_msgs; i++) {
+ memset(vqvm, 0, buf_sz);
+ vqvm->vport_id = cpu_to_le32(vport->vport_id);
+ vqvm->num_qv_maps = cpu_to_le16(num_chunks);
+ memcpy(vqvm->qv_maps, &vqv[k], chunk_sz * num_chunks);
+
+ if (map) {
+ err = idpf_send_mb_msg(adapter,
+ VIRTCHNL2_OP_MAP_QUEUE_VECTOR,
+ buf_sz, (u8 *)vqvm);
+ if (!err)
+ err = idpf_wait_for_event(adapter, vport,
+ IDPF_VC_MAP_IRQ,
+ IDPF_VC_MAP_IRQ_ERR);
+ } else {
+ err = idpf_send_mb_msg(adapter,
+ VIRTCHNL2_OP_UNMAP_QUEUE_VECTOR,
+ buf_sz, (u8 *)vqvm);
+ if (!err)
+ err =
+ idpf_min_wait_for_event(adapter, vport,
+ IDPF_VC_UNMAP_IRQ,
+ IDPF_VC_UNMAP_IRQ_ERR);
+ }
+ if (err)
+ goto mbx_error;
+
+ k += num_chunks;
+ num_q -= num_chunks;
+ num_chunks = min(num_chunks, num_q);
+ /* Recalculate buffer size */
+ buf_sz = struct_size(vqvm, qv_maps, num_chunks);
+ }
+
+mbx_error:
+ mutex_unlock(&vport->vc_buf_lock);
+ kfree(vqvm);
+error:
+ kfree(vqv);
+
+ return err;
+}
+
+/**
+ * idpf_send_enable_queues_msg - send enable queues virtchnl message
+ * @vport: Virtual port private data structure
+ *
+ * Will send enable queues virtchnl message. Returns 0 on success, negative on
+ * failure.
+ */
+int idpf_send_enable_queues_msg(struct idpf_vport *vport)
+{
+ return idpf_send_ena_dis_queues_msg(vport, VIRTCHNL2_OP_ENABLE_QUEUES);
+}
+
+/**
+ * idpf_send_disable_queues_msg - send disable queues virtchnl message
+ * @vport: Virtual port private data structure
+ *
+ * Will send disable queues virtchnl message. Returns 0 on success, negative
+ * on failure.
+ */
+int idpf_send_disable_queues_msg(struct idpf_vport *vport)
+{
+ int err, i;
+
+ err = idpf_send_ena_dis_queues_msg(vport, VIRTCHNL2_OP_DISABLE_QUEUES);
+ if (err)
+ return err;
+
+ /* switch to poll mode as interrupts will be disabled after disable
+ * queues virtchnl message is sent
+ */
+ for (i = 0; i < vport->num_txq; i++)
+ set_bit(__IDPF_Q_POLL_MODE, vport->txqs[i]->flags);
+
+ /* schedule the napi to receive all the marker packets */
+ for (i = 0; i < vport->num_q_vectors; i++)
+ napi_schedule(&vport->q_vectors[i].napi);
+
+ return idpf_wait_for_marker_event(vport);
+}
+
+/**
+ * idpf_convert_reg_to_queue_chunks - Copy queue chunk information to the right
+ * structure
+ * @dchunks: Destination chunks to store data to
+ * @schunks: Source chunks to copy data from
+ * @num_chunks: number of chunks to copy
+ */
+static void idpf_convert_reg_to_queue_chunks(struct virtchnl2_queue_chunk *dchunks,
+ struct virtchnl2_queue_reg_chunk *schunks,
+ u16 num_chunks)
+{
+ u16 i;
+
+ for (i = 0; i < num_chunks; i++) {
+ dchunks[i].type = schunks[i].type;
+ dchunks[i].start_queue_id = schunks[i].start_queue_id;
+ dchunks[i].num_queues = schunks[i].num_queues;
+ }
+}
+
+/**
+ * idpf_send_delete_queues_msg - send delete queues virtchnl message
+ * @vport: Virtual port private data structure
+ *
+ * Will send delete queues virtchnl message. Return 0 on success, negative on
+ * failure.
+ */
+int idpf_send_delete_queues_msg(struct idpf_vport *vport)
+{
+ struct idpf_adapter *adapter = vport->adapter;
+ struct virtchnl2_create_vport *vport_params;
+ struct virtchnl2_queue_reg_chunks *chunks;
+ struct virtchnl2_del_ena_dis_queues *eq;
+ struct idpf_vport_config *vport_config;
+ u16 vport_idx = vport->idx;
+ int buf_size, err;
+ u16 num_chunks;
+
+ vport_config = adapter->vport_config[vport_idx];
+ if (vport_config->req_qs_chunks) {
+ struct virtchnl2_add_queues *vc_aq =
+ (struct virtchnl2_add_queues *)vport_config->req_qs_chunks;
+ chunks = &vc_aq->chunks;
+ } else {
+ vport_params = adapter->vport_params_recvd[vport_idx];
+ chunks = &vport_params->chunks;
+ }
+
+ num_chunks = le16_to_cpu(chunks->num_chunks);
+ buf_size = struct_size(eq, chunks.chunks, num_chunks);
+
+ eq = kzalloc(buf_size, GFP_KERNEL);
+ if (!eq)
+ return -ENOMEM;
+
+ eq->vport_id = cpu_to_le32(vport->vport_id);
+ eq->chunks.num_chunks = cpu_to_le16(num_chunks);
+
+ idpf_convert_reg_to_queue_chunks(eq->chunks.chunks, chunks->chunks,
+ num_chunks);
+
+ mutex_lock(&vport->vc_buf_lock);
+
+ err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_DEL_QUEUES,
+ buf_size, (u8 *)eq);
+ if (err)
+ goto rel_lock;
+
+ err = idpf_min_wait_for_event(adapter, vport, IDPF_VC_DEL_QUEUES,
+ IDPF_VC_DEL_QUEUES_ERR);
+
+rel_lock:
+ mutex_unlock(&vport->vc_buf_lock);
+ kfree(eq);
+
+ return err;
+}
+
+/**
+ * idpf_send_config_queues_msg - Send config queues virtchnl message
+ * @vport: Virtual port private data structure
+ *
+ * Will send config queues virtchnl message. Returns 0 on success, negative on
+ * failure.
+ */
+int idpf_send_config_queues_msg(struct idpf_vport *vport)
+{
+ int err;
+
+ err = idpf_send_config_tx_queues_msg(vport);
+ if (err)
+ return err;
+
+ return idpf_send_config_rx_queues_msg(vport);
+}
+
+/**
+ * idpf_send_add_queues_msg - Send virtchnl add queues message
+ * @vport: Virtual port private data structure
+ * @num_tx_q: number of transmit queues
+ * @num_complq: number of transmit completion queues
+ * @num_rx_q: number of receive queues
+ * @num_rx_bufq: number of receive buffer queues
+ *
+ * Returns 0 on success, negative on failure. vport _MUST_ be const here as
+ * we should not change any fields within vport itself in this function.
+ */
+int idpf_send_add_queues_msg(const struct idpf_vport *vport, u16 num_tx_q,
+ u16 num_complq, u16 num_rx_q, u16 num_rx_bufq)
+{
+ struct idpf_adapter *adapter = vport->adapter;
+ struct idpf_vport_config *vport_config;
+ struct virtchnl2_add_queues aq = { };
+ struct virtchnl2_add_queues *vc_msg;
+ u16 vport_idx = vport->idx;
+ int size, err;
+
+ vport_config = adapter->vport_config[vport_idx];
+
+ aq.vport_id = cpu_to_le32(vport->vport_id);
+ aq.num_tx_q = cpu_to_le16(num_tx_q);
+ aq.num_tx_complq = cpu_to_le16(num_complq);
+ aq.num_rx_q = cpu_to_le16(num_rx_q);
+ aq.num_rx_bufq = cpu_to_le16(num_rx_bufq);
+
+ mutex_lock(&((struct idpf_vport *)vport)->vc_buf_lock);
+
+ err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_ADD_QUEUES,
+ sizeof(struct virtchnl2_add_queues), (u8 *)&aq);
+ if (err)
+ goto rel_lock;
+
+ /* We want vport to be const to prevent incidental code changes making
+ * changes to the vport config. We're making a special exception here
+ * to discard const to use the virtchnl.
+ */
+ err = idpf_wait_for_event(adapter, (struct idpf_vport *)vport,
+ IDPF_VC_ADD_QUEUES, IDPF_VC_ADD_QUEUES_ERR);
+ if (err)
+ goto rel_lock;
+
+ kfree(vport_config->req_qs_chunks);
+ vport_config->req_qs_chunks = NULL;
+
+ vc_msg = (struct virtchnl2_add_queues *)vport->vc_msg;
+ /* compare vc_msg num queues with vport num queues */
+ if (le16_to_cpu(vc_msg->num_tx_q) != num_tx_q ||
+ le16_to_cpu(vc_msg->num_rx_q) != num_rx_q ||
+ le16_to_cpu(vc_msg->num_tx_complq) != num_complq ||
+ le16_to_cpu(vc_msg->num_rx_bufq) != num_rx_bufq) {
+ err = -EINVAL;
+ goto rel_lock;
+ }
+
+ size = struct_size(vc_msg, chunks.chunks,
+ le16_to_cpu(vc_msg->chunks.num_chunks));
+ vport_config->req_qs_chunks = kmemdup(vc_msg, size, GFP_KERNEL);
+ if (!vport_config->req_qs_chunks) {
+ err = -ENOMEM;
+ goto rel_lock;
+ }
+
+rel_lock:
+ mutex_unlock(&((struct idpf_vport *)vport)->vc_buf_lock);
+
+ return err;
+}
+
+/**
+ * idpf_send_alloc_vectors_msg - Send virtchnl alloc vectors message
+ * @adapter: Driver specific private structure
+ * @num_vectors: number of vectors to be allocated
+ *
+ * Returns 0 on success, negative on failure.
+ */
+int idpf_send_alloc_vectors_msg(struct idpf_adapter *adapter, u16 num_vectors)
+{
+ struct virtchnl2_alloc_vectors *alloc_vec, *rcvd_vec;
+ struct virtchnl2_alloc_vectors ac = { };
+ u16 num_vchunks;
+ int size, err;
+
+ ac.num_vectors = cpu_to_le16(num_vectors);
+
+ mutex_lock(&adapter->vc_buf_lock);
+
+ err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_ALLOC_VECTORS,
+ sizeof(ac), (u8 *)&ac);
+ if (err)
+ goto rel_lock;
+
+ err = idpf_wait_for_event(adapter, NULL, IDPF_VC_ALLOC_VECTORS,
+ IDPF_VC_ALLOC_VECTORS_ERR);
+ if (err)
+ goto rel_lock;
+
+ rcvd_vec = (struct virtchnl2_alloc_vectors *)adapter->vc_msg;
+ num_vchunks = le16_to_cpu(rcvd_vec->vchunks.num_vchunks);
+
+ size = struct_size(rcvd_vec, vchunks.vchunks, num_vchunks);
+ if (size > sizeof(adapter->vc_msg)) {
+ err = -EINVAL;
+ goto rel_lock;
+ }
+
+ kfree(adapter->req_vec_chunks);
+ adapter->req_vec_chunks = NULL;
+ adapter->req_vec_chunks = kmemdup(adapter->vc_msg, size, GFP_KERNEL);
+ if (!adapter->req_vec_chunks) {
+ err = -ENOMEM;
+ goto rel_lock;
+ }
+
+ alloc_vec = adapter->req_vec_chunks;
+ if (le16_to_cpu(alloc_vec->num_vectors) < num_vectors) {
+ kfree(adapter->req_vec_chunks);
+ adapter->req_vec_chunks = NULL;
+ err = -EINVAL;
+ }
+
+rel_lock:
+ mutex_unlock(&adapter->vc_buf_lock);
+
+ return err;
+}
+
+/**
+ * idpf_send_dealloc_vectors_msg - Send virtchnl de allocate vectors message
+ * @adapter: Driver specific private structure
+ *
+ * Returns 0 on success, negative on failure.
+ */
+int idpf_send_dealloc_vectors_msg(struct idpf_adapter *adapter)
+{
+ struct virtchnl2_alloc_vectors *ac = adapter->req_vec_chunks;
+ struct virtchnl2_vector_chunks *vcs = &ac->vchunks;
+ int buf_size, err;
+
+ buf_size = struct_size(vcs, vchunks, le16_to_cpu(vcs->num_vchunks));
+
+ mutex_lock(&adapter->vc_buf_lock);
+
+ err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_DEALLOC_VECTORS, buf_size,
+ (u8 *)vcs);
+ if (err)
+ goto rel_lock;
+
+ err = idpf_min_wait_for_event(adapter, NULL, IDPF_VC_DEALLOC_VECTORS,
+ IDPF_VC_DEALLOC_VECTORS_ERR);
+ if (err)
+ goto rel_lock;
+
+ kfree(adapter->req_vec_chunks);
+ adapter->req_vec_chunks = NULL;
+
+rel_lock:
+ mutex_unlock(&adapter->vc_buf_lock);
+
+ return err;
+}
+
+/**
+ * idpf_get_max_vfs - Get max number of vfs supported
+ * @adapter: Driver specific private structure
+ *
+ * Returns max number of VFs
+ */
+static int idpf_get_max_vfs(struct idpf_adapter *adapter)
+{
+ return le16_to_cpu(adapter->caps.max_sriov_vfs);
+}
+
+/**
+ * idpf_send_set_sriov_vfs_msg - Send virtchnl set sriov vfs message
+ * @adapter: Driver specific private structure
+ * @num_vfs: number of virtual functions to be created
+ *
+ * Returns 0 on success, negative on failure.
+ */
+int idpf_send_set_sriov_vfs_msg(struct idpf_adapter *adapter, u16 num_vfs)
+{
+ struct virtchnl2_sriov_vfs_info svi = { };
+ int err;
+
+ svi.num_vfs = cpu_to_le16(num_vfs);
+
+ mutex_lock(&adapter->vc_buf_lock);
+
+ err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_SET_SRIOV_VFS,
+ sizeof(svi), (u8 *)&svi);
+ if (err)
+ goto rel_lock;
+
+ err = idpf_wait_for_event(adapter, NULL, IDPF_VC_SET_SRIOV_VFS,
+ IDPF_VC_SET_SRIOV_VFS_ERR);
+
+rel_lock:
+ mutex_unlock(&adapter->vc_buf_lock);
+
+ return err;
+}
+
+/**
+ * idpf_send_get_stats_msg - Send virtchnl get statistics message
+ * @vport: vport to get stats for
+ *
+ * Returns 0 on success, negative on failure.
+ */
+int idpf_send_get_stats_msg(struct idpf_vport *vport)
+{
+ struct idpf_netdev_priv *np = netdev_priv(vport->netdev);
+ struct rtnl_link_stats64 *netstats = &np->netstats;
+ struct idpf_adapter *adapter = vport->adapter;
+ struct virtchnl2_vport_stats stats_msg = { };
+ struct virtchnl2_vport_stats *stats;
+ int err;
+
+ /* Don't send get_stats message if the link is down */
+ if (np->state <= __IDPF_VPORT_DOWN)
+ return 0;
+
+ stats_msg.vport_id = cpu_to_le32(vport->vport_id);
+
+ mutex_lock(&vport->vc_buf_lock);
+
+ err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_GET_STATS,
+ sizeof(struct virtchnl2_vport_stats),
+ (u8 *)&stats_msg);
+ if (err)
+ goto rel_lock;
+
+ err = idpf_wait_for_event(adapter, vport, IDPF_VC_GET_STATS,
+ IDPF_VC_GET_STATS_ERR);
+ if (err)
+ goto rel_lock;
+
+ stats = (struct virtchnl2_vport_stats *)vport->vc_msg;
+
+ spin_lock_bh(&np->stats_lock);
+
+ netstats->rx_packets = le64_to_cpu(stats->rx_unicast) +
+ le64_to_cpu(stats->rx_multicast) +
+ le64_to_cpu(stats->rx_broadcast);
+ netstats->rx_bytes = le64_to_cpu(stats->rx_bytes);
+ netstats->rx_dropped = le64_to_cpu(stats->rx_discards);
+ netstats->rx_over_errors = le64_to_cpu(stats->rx_overflow_drop);
+ netstats->rx_length_errors = le64_to_cpu(stats->rx_invalid_frame_length);
+
+ netstats->tx_packets = le64_to_cpu(stats->tx_unicast) +
+ le64_to_cpu(stats->tx_multicast) +
+ le64_to_cpu(stats->tx_broadcast);
+ netstats->tx_bytes = le64_to_cpu(stats->tx_bytes);
+ netstats->tx_errors = le64_to_cpu(stats->tx_errors);
+ netstats->tx_dropped = le64_to_cpu(stats->tx_discards);
+
+ vport->port_stats.vport_stats = *stats;
+
+ spin_unlock_bh(&np->stats_lock);
+
+rel_lock:
+ mutex_unlock(&vport->vc_buf_lock);
+
+ return err;
+}
+
+/**
+ * idpf_send_get_set_rss_lut_msg - Send virtchnl get or set rss lut message
+ * @vport: virtual port data structure
+ * @get: flag to set or get rss look up table
+ *
+ * Returns 0 on success, negative on failure.
+ */
+int idpf_send_get_set_rss_lut_msg(struct idpf_vport *vport, bool get)
+{
+ struct idpf_adapter *adapter = vport->adapter;
+ struct virtchnl2_rss_lut *recv_rl;
+ struct idpf_rss_data *rss_data;
+ struct virtchnl2_rss_lut *rl;
+ int buf_size, lut_buf_size;
+ int i, err;
+
+ rss_data = &adapter->vport_config[vport->idx]->user_config.rss_data;
+ buf_size = struct_size(rl, lut, rss_data->rss_lut_size);
+ rl = kzalloc(buf_size, GFP_KERNEL);
+ if (!rl)
+ return -ENOMEM;
+
+ rl->vport_id = cpu_to_le32(vport->vport_id);
+ mutex_lock(&vport->vc_buf_lock);
+
+ if (!get) {
+ rl->lut_entries = cpu_to_le16(rss_data->rss_lut_size);
+ for (i = 0; i < rss_data->rss_lut_size; i++)
+ rl->lut[i] = cpu_to_le32(rss_data->rss_lut[i]);
+
+ err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_SET_RSS_LUT,
+ buf_size, (u8 *)rl);
+ if (err)
+ goto free_mem;
+
+ err = idpf_wait_for_event(adapter, vport, IDPF_VC_SET_RSS_LUT,
+ IDPF_VC_SET_RSS_LUT_ERR);
+
+ goto free_mem;
+ }
+
+ err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_GET_RSS_LUT,
+ buf_size, (u8 *)rl);
+ if (err)
+ goto free_mem;
+
+ err = idpf_wait_for_event(adapter, vport, IDPF_VC_GET_RSS_LUT,
+ IDPF_VC_GET_RSS_LUT_ERR);
+ if (err)
+ goto free_mem;
+
+ recv_rl = (struct virtchnl2_rss_lut *)vport->vc_msg;
+ if (rss_data->rss_lut_size == le16_to_cpu(recv_rl->lut_entries))
+ goto do_memcpy;
+
+ rss_data->rss_lut_size = le16_to_cpu(recv_rl->lut_entries);
+ kfree(rss_data->rss_lut);
+
+ lut_buf_size = rss_data->rss_lut_size * sizeof(u32);
+ rss_data->rss_lut = kzalloc(lut_buf_size, GFP_KERNEL);
+ if (!rss_data->rss_lut) {
+ rss_data->rss_lut_size = 0;
+ err = -ENOMEM;
+ goto free_mem;
+ }
+
+do_memcpy:
+ memcpy(rss_data->rss_lut, vport->vc_msg, rss_data->rss_lut_size);
+free_mem:
+ mutex_unlock(&vport->vc_buf_lock);
+ kfree(rl);
+
+ return err;
+}
+
+/**
+ * idpf_send_get_set_rss_key_msg - Send virtchnl get or set rss key message
+ * @vport: virtual port data structure
+ * @get: flag to set or get rss look up table
+ *
+ * Returns 0 on success, negative on failure
+ */
+int idpf_send_get_set_rss_key_msg(struct idpf_vport *vport, bool get)
+{
+ struct idpf_adapter *adapter = vport->adapter;
+ struct virtchnl2_rss_key *recv_rk;
+ struct idpf_rss_data *rss_data;
+ struct virtchnl2_rss_key *rk;
+ int i, buf_size, err;
+
+ rss_data = &adapter->vport_config[vport->idx]->user_config.rss_data;
+ buf_size = struct_size(rk, key_flex, rss_data->rss_key_size);
+ rk = kzalloc(buf_size, GFP_KERNEL);
+ if (!rk)
+ return -ENOMEM;
+
+ rk->vport_id = cpu_to_le32(vport->vport_id);
+ mutex_lock(&vport->vc_buf_lock);
+
+ if (get) {
+ err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_GET_RSS_KEY,
+ buf_size, (u8 *)rk);
+ if (err)
+ goto error;
+
+ err = idpf_wait_for_event(adapter, vport, IDPF_VC_GET_RSS_KEY,
+ IDPF_VC_GET_RSS_KEY_ERR);
+ if (err)
+ goto error;
+
+ recv_rk = (struct virtchnl2_rss_key *)vport->vc_msg;
+ if (rss_data->rss_key_size !=
+ le16_to_cpu(recv_rk->key_len)) {
+ rss_data->rss_key_size =
+ min_t(u16, NETDEV_RSS_KEY_LEN,
+ le16_to_cpu(recv_rk->key_len));
+ kfree(rss_data->rss_key);
+ rss_data->rss_key = kzalloc(rss_data->rss_key_size,
+ GFP_KERNEL);
+ if (!rss_data->rss_key) {
+ rss_data->rss_key_size = 0;
+ err = -ENOMEM;
+ goto error;
+ }
+ }
+ memcpy(rss_data->rss_key, recv_rk->key_flex,
+ rss_data->rss_key_size);
+ } else {
+ rk->key_len = cpu_to_le16(rss_data->rss_key_size);
+ for (i = 0; i < rss_data->rss_key_size; i++)
+ rk->key_flex[i] = rss_data->rss_key[i];
+
+ err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_SET_RSS_KEY,
+ buf_size, (u8 *)rk);
+ if (err)
+ goto error;
+
+ err = idpf_wait_for_event(adapter, vport, IDPF_VC_SET_RSS_KEY,
+ IDPF_VC_SET_RSS_KEY_ERR);
+ }
+
+error:
+ mutex_unlock(&vport->vc_buf_lock);
+ kfree(rk);
+
+ return err;
+}
+
+/**
+ * idpf_fill_ptype_lookup - Fill L3 specific fields in ptype lookup table
+ * @ptype: ptype lookup table
+ * @pstate: state machine for ptype lookup table
+ * @ipv4: ipv4 or ipv6
+ * @frag: fragmentation allowed
+ *
+ */
+static void idpf_fill_ptype_lookup(struct idpf_rx_ptype_decoded *ptype,
+ struct idpf_ptype_state *pstate,
+ bool ipv4, bool frag)
+{
+ if (!pstate->outer_ip || !pstate->outer_frag) {
+ ptype->outer_ip = IDPF_RX_PTYPE_OUTER_IP;
+ pstate->outer_ip = true;
+
+ if (ipv4)
+ ptype->outer_ip_ver = IDPF_RX_PTYPE_OUTER_IPV4;
+ else
+ ptype->outer_ip_ver = IDPF_RX_PTYPE_OUTER_IPV6;
+
+ if (frag) {
+ ptype->outer_frag = IDPF_RX_PTYPE_FRAG;
+ pstate->outer_frag = true;
+ }
+ } else {
+ ptype->tunnel_type = IDPF_RX_PTYPE_TUNNEL_IP_IP;
+ pstate->tunnel_state = IDPF_PTYPE_TUNNEL_IP;
+
+ if (ipv4)
+ ptype->tunnel_end_prot =
+ IDPF_RX_PTYPE_TUNNEL_END_IPV4;
+ else
+ ptype->tunnel_end_prot =
+ IDPF_RX_PTYPE_TUNNEL_END_IPV6;
+
+ if (frag)
+ ptype->tunnel_end_frag = IDPF_RX_PTYPE_FRAG;
+ }
+}
+
+/**
+ * idpf_send_get_rx_ptype_msg - Send virtchnl for ptype info
+ * @vport: virtual port data structure
+ *
+ * Returns 0 on success, negative on failure.
+ */
+int idpf_send_get_rx_ptype_msg(struct idpf_vport *vport)
+{
+ struct idpf_rx_ptype_decoded *ptype_lkup = vport->rx_ptype_lkup;
+ struct virtchnl2_get_ptype_info get_ptype_info;
+ int max_ptype, ptypes_recvd = 0, ptype_offset;
+ struct idpf_adapter *adapter = vport->adapter;
+ struct virtchnl2_get_ptype_info *ptype_info;
+ u16 next_ptype_id = 0;
+ int err = 0, i, j, k;
+
+ if (idpf_is_queue_model_split(vport->rxq_model))
+ max_ptype = IDPF_RX_MAX_PTYPE;
+ else
+ max_ptype = IDPF_RX_MAX_BASE_PTYPE;
+
+ memset(vport->rx_ptype_lkup, 0, sizeof(vport->rx_ptype_lkup));
+
+ ptype_info = kzalloc(IDPF_CTLQ_MAX_BUF_LEN, GFP_KERNEL);
+ if (!ptype_info)
+ return -ENOMEM;
+
+ mutex_lock(&adapter->vc_buf_lock);
+
+ while (next_ptype_id < max_ptype) {
+ get_ptype_info.start_ptype_id = cpu_to_le16(next_ptype_id);
+
+ if ((next_ptype_id + IDPF_RX_MAX_PTYPES_PER_BUF) > max_ptype)
+ get_ptype_info.num_ptypes =
+ cpu_to_le16(max_ptype - next_ptype_id);
+ else
+ get_ptype_info.num_ptypes =
+ cpu_to_le16(IDPF_RX_MAX_PTYPES_PER_BUF);
+
+ err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_GET_PTYPE_INFO,
+ sizeof(struct virtchnl2_get_ptype_info),
+ (u8 *)&get_ptype_info);
+ if (err)
+ goto vc_buf_unlock;
+
+ err = idpf_wait_for_event(adapter, NULL, IDPF_VC_GET_PTYPE_INFO,
+ IDPF_VC_GET_PTYPE_INFO_ERR);
+ if (err)
+ goto vc_buf_unlock;
+
+ memcpy(ptype_info, adapter->vc_msg, IDPF_CTLQ_MAX_BUF_LEN);
+
+ ptypes_recvd += le16_to_cpu(ptype_info->num_ptypes);
+ if (ptypes_recvd > max_ptype) {
+ err = -EINVAL;
+ goto vc_buf_unlock;
+ }
+
+ next_ptype_id = le16_to_cpu(get_ptype_info.start_ptype_id) +
+ le16_to_cpu(get_ptype_info.num_ptypes);
+
+ ptype_offset = IDPF_RX_PTYPE_HDR_SZ;
+
+ for (i = 0; i < le16_to_cpu(ptype_info->num_ptypes); i++) {
+ struct idpf_ptype_state pstate = { };
+ struct virtchnl2_ptype *ptype;
+ u16 id;
+
+ ptype = (struct virtchnl2_ptype *)
+ ((u8 *)ptype_info + ptype_offset);
+
+ ptype_offset += IDPF_GET_PTYPE_SIZE(ptype);
+ if (ptype_offset > IDPF_CTLQ_MAX_BUF_LEN) {
+ err = -EINVAL;
+ goto vc_buf_unlock;
+ }
+
+ /* 0xFFFF indicates end of ptypes */
+ if (le16_to_cpu(ptype->ptype_id_10) ==
+ IDPF_INVALID_PTYPE_ID) {
+ err = 0;
+ goto vc_buf_unlock;
+ }
+
+ if (idpf_is_queue_model_split(vport->rxq_model))
+ k = le16_to_cpu(ptype->ptype_id_10);
+ else
+ k = ptype->ptype_id_8;
+
+ if (ptype->proto_id_count)
+ ptype_lkup[k].known = 1;
+
+ for (j = 0; j < ptype->proto_id_count; j++) {
+ id = le16_to_cpu(ptype->proto_id[j]);
+ switch (id) {
+ case VIRTCHNL2_PROTO_HDR_GRE:
+ if (pstate.tunnel_state ==
+ IDPF_PTYPE_TUNNEL_IP) {
+ ptype_lkup[k].tunnel_type =
+ IDPF_RX_PTYPE_TUNNEL_IP_GRENAT;
+ pstate.tunnel_state |=
+ IDPF_PTYPE_TUNNEL_IP_GRENAT;
+ }
+ break;
+ case VIRTCHNL2_PROTO_HDR_MAC:
+ ptype_lkup[k].outer_ip =
+ IDPF_RX_PTYPE_OUTER_L2;
+ if (pstate.tunnel_state ==
+ IDPF_TUN_IP_GRE) {
+ ptype_lkup[k].tunnel_type =
+ IDPF_RX_PTYPE_TUNNEL_IP_GRENAT_MAC;
+ pstate.tunnel_state |=
+ IDPF_PTYPE_TUNNEL_IP_GRENAT_MAC;
+ }
+ break;
+ case VIRTCHNL2_PROTO_HDR_IPV4:
+ idpf_fill_ptype_lookup(&ptype_lkup[k],
+ &pstate, true,
+ false);
+ break;
+ case VIRTCHNL2_PROTO_HDR_IPV6:
+ idpf_fill_ptype_lookup(&ptype_lkup[k],
+ &pstate, false,
+ false);
+ break;
+ case VIRTCHNL2_PROTO_HDR_IPV4_FRAG:
+ idpf_fill_ptype_lookup(&ptype_lkup[k],
+ &pstate, true,
+ true);
+ break;
+ case VIRTCHNL2_PROTO_HDR_IPV6_FRAG:
+ idpf_fill_ptype_lookup(&ptype_lkup[k],
+ &pstate, false,
+ true);
+ break;
+ case VIRTCHNL2_PROTO_HDR_UDP:
+ ptype_lkup[k].inner_prot =
+ IDPF_RX_PTYPE_INNER_PROT_UDP;
+ break;
+ case VIRTCHNL2_PROTO_HDR_TCP:
+ ptype_lkup[k].inner_prot =
+ IDPF_RX_PTYPE_INNER_PROT_TCP;
+ break;
+ case VIRTCHNL2_PROTO_HDR_SCTP:
+ ptype_lkup[k].inner_prot =
+ IDPF_RX_PTYPE_INNER_PROT_SCTP;
+ break;
+ case VIRTCHNL2_PROTO_HDR_ICMP:
+ ptype_lkup[k].inner_prot =
+ IDPF_RX_PTYPE_INNER_PROT_ICMP;
+ break;
+ case VIRTCHNL2_PROTO_HDR_PAY:
+ ptype_lkup[k].payload_layer =
+ IDPF_RX_PTYPE_PAYLOAD_LAYER_PAY2;
+ break;
+ case VIRTCHNL2_PROTO_HDR_ICMPV6:
+ case VIRTCHNL2_PROTO_HDR_IPV6_EH:
+ case VIRTCHNL2_PROTO_HDR_PRE_MAC:
+ case VIRTCHNL2_PROTO_HDR_POST_MAC:
+ case VIRTCHNL2_PROTO_HDR_ETHERTYPE:
+ case VIRTCHNL2_PROTO_HDR_SVLAN:
+ case VIRTCHNL2_PROTO_HDR_CVLAN:
+ case VIRTCHNL2_PROTO_HDR_MPLS:
+ case VIRTCHNL2_PROTO_HDR_MMPLS:
+ case VIRTCHNL2_PROTO_HDR_PTP:
+ case VIRTCHNL2_PROTO_HDR_CTRL:
+ case VIRTCHNL2_PROTO_HDR_LLDP:
+ case VIRTCHNL2_PROTO_HDR_ARP:
+ case VIRTCHNL2_PROTO_HDR_ECP:
+ case VIRTCHNL2_PROTO_HDR_EAPOL:
+ case VIRTCHNL2_PROTO_HDR_PPPOD:
+ case VIRTCHNL2_PROTO_HDR_PPPOE:
+ case VIRTCHNL2_PROTO_HDR_IGMP:
+ case VIRTCHNL2_PROTO_HDR_AH:
+ case VIRTCHNL2_PROTO_HDR_ESP:
+ case VIRTCHNL2_PROTO_HDR_IKE:
+ case VIRTCHNL2_PROTO_HDR_NATT_KEEP:
+ case VIRTCHNL2_PROTO_HDR_L2TPV2:
+ case VIRTCHNL2_PROTO_HDR_L2TPV2_CONTROL:
+ case VIRTCHNL2_PROTO_HDR_L2TPV3:
+ case VIRTCHNL2_PROTO_HDR_GTP:
+ case VIRTCHNL2_PROTO_HDR_GTP_EH:
+ case VIRTCHNL2_PROTO_HDR_GTPCV2:
+ case VIRTCHNL2_PROTO_HDR_GTPC_TEID:
+ case VIRTCHNL2_PROTO_HDR_GTPU:
+ case VIRTCHNL2_PROTO_HDR_GTPU_UL:
+ case VIRTCHNL2_PROTO_HDR_GTPU_DL:
+ case VIRTCHNL2_PROTO_HDR_ECPRI:
+ case VIRTCHNL2_PROTO_HDR_VRRP:
+ case VIRTCHNL2_PROTO_HDR_OSPF:
+ case VIRTCHNL2_PROTO_HDR_TUN:
+ case VIRTCHNL2_PROTO_HDR_NVGRE:
+ case VIRTCHNL2_PROTO_HDR_VXLAN:
+ case VIRTCHNL2_PROTO_HDR_VXLAN_GPE:
+ case VIRTCHNL2_PROTO_HDR_GENEVE:
+ case VIRTCHNL2_PROTO_HDR_NSH:
+ case VIRTCHNL2_PROTO_HDR_QUIC:
+ case VIRTCHNL2_PROTO_HDR_PFCP:
+ case VIRTCHNL2_PROTO_HDR_PFCP_NODE:
+ case VIRTCHNL2_PROTO_HDR_PFCP_SESSION:
+ case VIRTCHNL2_PROTO_HDR_RTP:
+ case VIRTCHNL2_PROTO_HDR_NO_PROTO:
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ }
+
+vc_buf_unlock:
+ mutex_unlock(&adapter->vc_buf_lock);
+ kfree(ptype_info);
+
+ return err;
+}
+
+/**
+ * idpf_send_ena_dis_loopback_msg - Send virtchnl enable/disable loopback
+ * message
+ * @vport: virtual port data structure
+ *
+ * Returns 0 on success, negative on failure.
+ */
+int idpf_send_ena_dis_loopback_msg(struct idpf_vport *vport)
+{
+ struct virtchnl2_loopback loopback;
+ int err;
+
+ loopback.vport_id = cpu_to_le32(vport->vport_id);
+ loopback.enable = idpf_is_feature_ena(vport, NETIF_F_LOOPBACK);
+
+ mutex_lock(&vport->vc_buf_lock);
+
+ err = idpf_send_mb_msg(vport->adapter, VIRTCHNL2_OP_LOOPBACK,
+ sizeof(loopback), (u8 *)&loopback);
+ if (err)
+ goto rel_lock;
+
+ err = idpf_wait_for_event(vport->adapter, vport,
+ IDPF_VC_LOOPBACK_STATE,
+ IDPF_VC_LOOPBACK_STATE_ERR);
+
+rel_lock:
+ mutex_unlock(&vport->vc_buf_lock);
+
+ return err;
+}
+
+/**
+ * idpf_find_ctlq - Given a type and id, find ctlq info
+ * @hw: hardware struct
+ * @type: type of ctrlq to find
+ * @id: ctlq id to find
+ *
+ * Returns pointer to found ctlq info struct, NULL otherwise.
+ */
+static struct idpf_ctlq_info *idpf_find_ctlq(struct idpf_hw *hw,
+ enum idpf_ctlq_type type, int id)
+{
+ struct idpf_ctlq_info *cq, *tmp;
+
+ list_for_each_entry_safe(cq, tmp, &hw->cq_list_head, cq_list)
+ if (cq->q_id == id && cq->cq_type == type)
+ return cq;
+
+ return NULL;
+}
+
+/**
+ * idpf_init_dflt_mbx - Setup default mailbox parameters and make request
+ * @adapter: adapter info struct
+ *
+ * Returns 0 on success, negative otherwise
+ */
+int idpf_init_dflt_mbx(struct idpf_adapter *adapter)
+{
+ struct idpf_ctlq_create_info ctlq_info[] = {
+ {
+ .type = IDPF_CTLQ_TYPE_MAILBOX_TX,
+ .id = IDPF_DFLT_MBX_ID,
+ .len = IDPF_DFLT_MBX_Q_LEN,
+ .buf_size = IDPF_CTLQ_MAX_BUF_LEN
+ },
+ {
+ .type = IDPF_CTLQ_TYPE_MAILBOX_RX,
+ .id = IDPF_DFLT_MBX_ID,
+ .len = IDPF_DFLT_MBX_Q_LEN,
+ .buf_size = IDPF_CTLQ_MAX_BUF_LEN
+ }
+ };
+ struct idpf_hw *hw = &adapter->hw;
+ int err;
+
+ adapter->dev_ops.reg_ops.ctlq_reg_init(ctlq_info);
+
+ err = idpf_ctlq_init(hw, IDPF_NUM_DFLT_MBX_Q, ctlq_info);
+ if (err)
+ return err;
+
+ hw->asq = idpf_find_ctlq(hw, IDPF_CTLQ_TYPE_MAILBOX_TX,
+ IDPF_DFLT_MBX_ID);
+ hw->arq = idpf_find_ctlq(hw, IDPF_CTLQ_TYPE_MAILBOX_RX,
+ IDPF_DFLT_MBX_ID);
+
+ if (!hw->asq || !hw->arq) {
+ idpf_ctlq_deinit(hw);
+
+ return -ENOENT;
+ }
+
+ adapter->state = __IDPF_STARTUP;
+
+ return 0;
+}
+
+/**
+ * idpf_deinit_dflt_mbx - Free up ctlqs setup
+ * @adapter: Driver specific private data structure
+ */
+void idpf_deinit_dflt_mbx(struct idpf_adapter *adapter)
+{
+ if (adapter->hw.arq && adapter->hw.asq) {
+ idpf_mb_clean(adapter);
+ idpf_ctlq_deinit(&adapter->hw);
+ }
+ adapter->hw.arq = NULL;
+ adapter->hw.asq = NULL;
+}
+
+/**
+ * idpf_vport_params_buf_rel - Release memory for MailBox resources
+ * @adapter: Driver specific private data structure
+ *
+ * Will release memory to hold the vport parameters received on MailBox
+ */
+static void idpf_vport_params_buf_rel(struct idpf_adapter *adapter)
+{
+ kfree(adapter->vport_params_recvd);
+ adapter->vport_params_recvd = NULL;
+ kfree(adapter->vport_params_reqd);
+ adapter->vport_params_reqd = NULL;
+ kfree(adapter->vport_ids);
+ adapter->vport_ids = NULL;
+}
+
+/**
+ * idpf_vport_params_buf_alloc - Allocate memory for MailBox resources
+ * @adapter: Driver specific private data structure
+ *
+ * Will alloc memory to hold the vport parameters received on MailBox
+ */
+static int idpf_vport_params_buf_alloc(struct idpf_adapter *adapter)
+{
+ u16 num_max_vports = idpf_get_max_vports(adapter);
+
+ adapter->vport_params_reqd = kcalloc(num_max_vports,
+ sizeof(*adapter->vport_params_reqd),
+ GFP_KERNEL);
+ if (!adapter->vport_params_reqd)
+ return -ENOMEM;
+
+ adapter->vport_params_recvd = kcalloc(num_max_vports,
+ sizeof(*adapter->vport_params_recvd),
+ GFP_KERNEL);
+ if (!adapter->vport_params_recvd)
+ goto err_mem;
+
+ adapter->vport_ids = kcalloc(num_max_vports, sizeof(u32), GFP_KERNEL);
+ if (!adapter->vport_ids)
+ goto err_mem;
+
+ if (adapter->vport_config)
+ return 0;
+
+ adapter->vport_config = kcalloc(num_max_vports,
+ sizeof(*adapter->vport_config),
+ GFP_KERNEL);
+ if (!adapter->vport_config)
+ goto err_mem;
+
+ return 0;
+
+err_mem:
+ idpf_vport_params_buf_rel(adapter);
+
+ return -ENOMEM;
+}
+
+/**
+ * idpf_vc_core_init - Initialize state machine and get driver specific
+ * resources
+ * @adapter: Driver specific private structure
+ *
+ * This function will initialize the state machine and request all necessary
+ * resources required by the device driver. Once the state machine is
+ * initialized, allocate memory to store vport specific information and also
+ * requests required interrupts.
+ *
+ * Returns 0 on success, -EAGAIN function will get called again,
+ * otherwise negative on failure.
+ */
+int idpf_vc_core_init(struct idpf_adapter *adapter)
+{
+ int task_delay = 30;
+ u16 num_max_vports;
+ int err = 0;
+
+ while (adapter->state != __IDPF_INIT_SW) {
+ switch (adapter->state) {
+ case __IDPF_STARTUP:
+ if (idpf_send_ver_msg(adapter))
+ goto init_failed;
+ adapter->state = __IDPF_VER_CHECK;
+ goto restart;
+ case __IDPF_VER_CHECK:
+ err = idpf_recv_ver_msg(adapter);
+ if (err == -EIO) {
+ return err;
+ } else if (err == -EAGAIN) {
+ adapter->state = __IDPF_STARTUP;
+ goto restart;
+ } else if (err) {
+ goto init_failed;
+ }
+ if (idpf_send_get_caps_msg(adapter))
+ goto init_failed;
+ adapter->state = __IDPF_GET_CAPS;
+ goto restart;
+ case __IDPF_GET_CAPS:
+ if (idpf_recv_get_caps_msg(adapter))
+ goto init_failed;
+ adapter->state = __IDPF_INIT_SW;
+ break;
+ default:
+ dev_err(&adapter->pdev->dev, "Device is in bad state: %d\n",
+ adapter->state);
+ goto init_failed;
+ }
+ break;
+restart:
+ /* Give enough time before proceeding further with
+ * state machine
+ */
+ msleep(task_delay);
+ }
+
+ pci_sriov_set_totalvfs(adapter->pdev, idpf_get_max_vfs(adapter));
+ num_max_vports = idpf_get_max_vports(adapter);
+ adapter->max_vports = num_max_vports;
+ adapter->vports = kcalloc(num_max_vports, sizeof(*adapter->vports),
+ GFP_KERNEL);
+ if (!adapter->vports)
+ return -ENOMEM;
+
+ if (!adapter->netdevs) {
+ adapter->netdevs = kcalloc(num_max_vports,
+ sizeof(struct net_device *),
+ GFP_KERNEL);
+ if (!adapter->netdevs) {
+ err = -ENOMEM;
+ goto err_netdev_alloc;
+ }
+ }
+
+ err = idpf_vport_params_buf_alloc(adapter);
+ if (err) {
+ dev_err(&adapter->pdev->dev, "Failed to alloc vport params buffer: %d\n",
+ err);
+ goto err_netdev_alloc;
+ }
+
+ /* Start the mailbox task before requesting vectors. This will ensure
+ * vector information response from mailbox is handled
+ */
+ queue_delayed_work(adapter->mbx_wq, &adapter->mbx_task, 0);
+
+ queue_delayed_work(adapter->serv_wq, &adapter->serv_task,
+ msecs_to_jiffies(5 * (adapter->pdev->devfn & 0x07)));
+
+ err = idpf_intr_req(adapter);
+ if (err) {
+ dev_err(&adapter->pdev->dev, "failed to enable interrupt vectors: %d\n",
+ err);
+ goto err_intr_req;
+ }
+
+ idpf_init_avail_queues(adapter);
+
+ /* Skew the delay for init tasks for each function based on fn number
+ * to prevent every function from making the same call simultaneously.
+ */
+ queue_delayed_work(adapter->init_wq, &adapter->init_task,
+ msecs_to_jiffies(5 * (adapter->pdev->devfn & 0x07)));
+
+ goto no_err;
+
+err_intr_req:
+ cancel_delayed_work_sync(&adapter->serv_task);
+ idpf_vport_params_buf_rel(adapter);
+err_netdev_alloc:
+ kfree(adapter->vports);
+ adapter->vports = NULL;
+no_err:
+ return err;
+
+init_failed:
+ /* Don't retry if we're trying to go down, just bail. */
+ if (test_bit(IDPF_REMOVE_IN_PROG, adapter->flags))
+ return err;
+
+ if (++adapter->mb_wait_count > IDPF_MB_MAX_ERR) {
+ dev_err(&adapter->pdev->dev, "Failed to establish mailbox communications with hardware\n");
+
+ return -EFAULT;
+ }
+ /* If it reached here, it is possible that mailbox queue initialization
+ * register writes might not have taken effect. Retry to initialize
+ * the mailbox again
+ */
+ adapter->state = __IDPF_STARTUP;
+ idpf_deinit_dflt_mbx(adapter);
+ set_bit(IDPF_HR_DRV_LOAD, adapter->flags);
+ queue_delayed_work(adapter->vc_event_wq, &adapter->vc_event_task,
+ msecs_to_jiffies(task_delay));
+
+ return -EAGAIN;
+}
+
+/**
+ * idpf_vc_core_deinit - Device deinit routine
+ * @adapter: Driver specific private structure
+ *
+ */
+void idpf_vc_core_deinit(struct idpf_adapter *adapter)
+{
+ int i;
+
+ idpf_deinit_task(adapter);
+ idpf_intr_rel(adapter);
+ /* Set all bits as we dont know on which vc_state the vhnl_wq is
+ * waiting on and wakeup the virtchnl workqueue even if it is waiting
+ * for the response as we are going down
+ */
+ for (i = 0; i < IDPF_VC_NBITS; i++)
+ set_bit(i, adapter->vc_state);
+ wake_up(&adapter->vchnl_wq);
+
+ cancel_delayed_work_sync(&adapter->serv_task);
+ cancel_delayed_work_sync(&adapter->mbx_task);
+
+ idpf_vport_params_buf_rel(adapter);
+
+ /* Clear all the bits */
+ for (i = 0; i < IDPF_VC_NBITS; i++)
+ clear_bit(i, adapter->vc_state);
+
+ kfree(adapter->vports);
+ adapter->vports = NULL;
+}
+
+/**
+ * idpf_vport_alloc_vec_indexes - Get relative vector indexes
+ * @vport: virtual port data struct
+ *
+ * This function requests the vector information required for the vport and
+ * stores the vector indexes received from the 'global vector distribution'
+ * in the vport's queue vectors array.
+ *
+ * Return 0 on success, error on failure
+ */
+int idpf_vport_alloc_vec_indexes(struct idpf_vport *vport)
+{
+ struct idpf_vector_info vec_info;
+ int num_alloc_vecs;
+
+ vec_info.num_curr_vecs = vport->num_q_vectors;
+ vec_info.num_req_vecs = max(vport->num_txq, vport->num_rxq);
+ vec_info.default_vport = vport->default_vport;
+ vec_info.index = vport->idx;
+
+ num_alloc_vecs = idpf_req_rel_vector_indexes(vport->adapter,
+ vport->q_vector_idxs,
+ &vec_info);
+ if (num_alloc_vecs <= 0) {
+ dev_err(&vport->adapter->pdev->dev, "Vector distribution failed: %d\n",
+ num_alloc_vecs);
+ return -EINVAL;
+ }
+
+ vport->num_q_vectors = num_alloc_vecs;
+
+ return 0;
+}
+
+/**
+ * idpf_vport_init - Initialize virtual port
+ * @vport: virtual port to be initialized
+ * @max_q: vport max queue info
+ *
+ * Will initialize vport with the info received through MB earlier
+ */
+void idpf_vport_init(struct idpf_vport *vport, struct idpf_vport_max_q *max_q)
+{
+ struct idpf_adapter *adapter = vport->adapter;
+ struct virtchnl2_create_vport *vport_msg;
+ struct idpf_vport_config *vport_config;
+ u16 tx_itr[] = {2, 8, 64, 128, 256};
+ u16 rx_itr[] = {2, 8, 32, 96, 128};
+ struct idpf_rss_data *rss_data;
+ u16 idx = vport->idx;
+
+ vport_config = adapter->vport_config[idx];
+ rss_data = &vport_config->user_config.rss_data;
+ vport_msg = adapter->vport_params_recvd[idx];
+
+ vport_config->max_q.max_txq = max_q->max_txq;
+ vport_config->max_q.max_rxq = max_q->max_rxq;
+ vport_config->max_q.max_complq = max_q->max_complq;
+ vport_config->max_q.max_bufq = max_q->max_bufq;
+
+ vport->txq_model = le16_to_cpu(vport_msg->txq_model);
+ vport->rxq_model = le16_to_cpu(vport_msg->rxq_model);
+ vport->vport_type = le16_to_cpu(vport_msg->vport_type);
+ vport->vport_id = le32_to_cpu(vport_msg->vport_id);
+
+ rss_data->rss_key_size = min_t(u16, NETDEV_RSS_KEY_LEN,
+ le16_to_cpu(vport_msg->rss_key_size));
+ rss_data->rss_lut_size = le16_to_cpu(vport_msg->rss_lut_size);
+
+ ether_addr_copy(vport->default_mac_addr, vport_msg->default_mac_addr);
+ vport->max_mtu = le16_to_cpu(vport_msg->max_mtu) - IDPF_PACKET_HDR_PAD;
+
+ /* Initialize Tx and Rx profiles for Dynamic Interrupt Moderation */
+ memcpy(vport->rx_itr_profile, rx_itr, IDPF_DIM_PROFILE_SLOTS);
+ memcpy(vport->tx_itr_profile, tx_itr, IDPF_DIM_PROFILE_SLOTS);
+
+ idpf_vport_init_num_qs(vport, vport_msg);
+ idpf_vport_calc_num_q_desc(vport);
+ idpf_vport_calc_num_q_groups(vport);
+ idpf_vport_alloc_vec_indexes(vport);
+
+ vport->crc_enable = adapter->crc_enable;
+}
+
+/**
+ * idpf_get_vec_ids - Initialize vector id from Mailbox parameters
+ * @adapter: adapter structure to get the mailbox vector id
+ * @vecids: Array of vector ids
+ * @num_vecids: number of vector ids
+ * @chunks: vector ids received over mailbox
+ *
+ * Will initialize the mailbox vector id which is received from the
+ * get capabilities and data queue vector ids with ids received as
+ * mailbox parameters.
+ * Returns number of ids filled
+ */
+int idpf_get_vec_ids(struct idpf_adapter *adapter,
+ u16 *vecids, int num_vecids,
+ struct virtchnl2_vector_chunks *chunks)
+{
+ u16 num_chunks = le16_to_cpu(chunks->num_vchunks);
+ int num_vecid_filled = 0;
+ int i, j;
+
+ vecids[num_vecid_filled] = adapter->mb_vector.v_idx;
+ num_vecid_filled++;
+
+ for (j = 0; j < num_chunks; j++) {
+ struct virtchnl2_vector_chunk *chunk;
+ u16 start_vecid, num_vec;
+
+ chunk = &chunks->vchunks[j];
+ num_vec = le16_to_cpu(chunk->num_vectors);
+ start_vecid = le16_to_cpu(chunk->start_vector_id);
+
+ for (i = 0; i < num_vec; i++) {
+ if ((num_vecid_filled + i) < num_vecids) {
+ vecids[num_vecid_filled + i] = start_vecid;
+ start_vecid++;
+ } else {
+ break;
+ }
+ }
+ num_vecid_filled = num_vecid_filled + i;
+ }
+
+ return num_vecid_filled;
+}
+
+/**
+ * idpf_vport_get_queue_ids - Initialize queue id from Mailbox parameters
+ * @qids: Array of queue ids
+ * @num_qids: number of queue ids
+ * @q_type: queue model
+ * @chunks: queue ids received over mailbox
+ *
+ * Will initialize all queue ids with ids received as mailbox parameters
+ * Returns number of ids filled
+ */
+static int idpf_vport_get_queue_ids(u32 *qids, int num_qids, u16 q_type,
+ struct virtchnl2_queue_reg_chunks *chunks)
+{
+ u16 num_chunks = le16_to_cpu(chunks->num_chunks);
+ u32 num_q_id_filled = 0, i;
+ u32 start_q_id, num_q;
+
+ while (num_chunks--) {
+ struct virtchnl2_queue_reg_chunk *chunk;
+
+ chunk = &chunks->chunks[num_chunks];
+ if (le32_to_cpu(chunk->type) != q_type)
+ continue;
+
+ num_q = le32_to_cpu(chunk->num_queues);
+ start_q_id = le32_to_cpu(chunk->start_queue_id);
+
+ for (i = 0; i < num_q; i++) {
+ if ((num_q_id_filled + i) < num_qids) {
+ qids[num_q_id_filled + i] = start_q_id;
+ start_q_id++;
+ } else {
+ break;
+ }
+ }
+ num_q_id_filled = num_q_id_filled + i;
+ }
+
+ return num_q_id_filled;
+}
+
+/**
+ * __idpf_vport_queue_ids_init - Initialize queue ids from Mailbox parameters
+ * @vport: virtual port for which the queues ids are initialized
+ * @qids: queue ids
+ * @num_qids: number of queue ids
+ * @q_type: type of queue
+ *
+ * Will initialize all queue ids with ids received as mailbox
+ * parameters. Returns number of queue ids initialized.
+ */
+static int __idpf_vport_queue_ids_init(struct idpf_vport *vport,
+ const u32 *qids,
+ int num_qids,
+ u32 q_type)
+{
+ struct idpf_queue *q;
+ int i, j, k = 0;
+
+ switch (q_type) {
+ case VIRTCHNL2_QUEUE_TYPE_TX:
+ for (i = 0; i < vport->num_txq_grp; i++) {
+ struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i];
+
+ for (j = 0; j < tx_qgrp->num_txq && k < num_qids; j++, k++) {
+ tx_qgrp->txqs[j]->q_id = qids[k];
+ tx_qgrp->txqs[j]->q_type =
+ VIRTCHNL2_QUEUE_TYPE_TX;
+ }
+ }
+ break;
+ case VIRTCHNL2_QUEUE_TYPE_RX:
+ for (i = 0; i < vport->num_rxq_grp; i++) {
+ struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i];
+ u16 num_rxq;
+
+ if (idpf_is_queue_model_split(vport->rxq_model))
+ num_rxq = rx_qgrp->splitq.num_rxq_sets;
+ else
+ num_rxq = rx_qgrp->singleq.num_rxq;
+
+ for (j = 0; j < num_rxq && k < num_qids; j++, k++) {
+ if (idpf_is_queue_model_split(vport->rxq_model))
+ q = &rx_qgrp->splitq.rxq_sets[j]->rxq;
+ else
+ q = rx_qgrp->singleq.rxqs[j];
+ q->q_id = qids[k];
+ q->q_type = VIRTCHNL2_QUEUE_TYPE_RX;
+ }
+ }
+ break;
+ case VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION:
+ for (i = 0; i < vport->num_txq_grp && k < num_qids; i++, k++) {
+ struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i];
+
+ tx_qgrp->complq->q_id = qids[k];
+ tx_qgrp->complq->q_type =
+ VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION;
+ }
+ break;
+ case VIRTCHNL2_QUEUE_TYPE_RX_BUFFER:
+ for (i = 0; i < vport->num_rxq_grp; i++) {
+ struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i];
+ u8 num_bufqs = vport->num_bufqs_per_qgrp;
+
+ for (j = 0; j < num_bufqs && k < num_qids; j++, k++) {
+ q = &rx_qgrp->splitq.bufq_sets[j].bufq;
+ q->q_id = qids[k];
+ q->q_type = VIRTCHNL2_QUEUE_TYPE_RX_BUFFER;
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ return k;
+}
+
+/**
+ * idpf_vport_queue_ids_init - Initialize queue ids from Mailbox parameters
+ * @vport: virtual port for which the queues ids are initialized
+ *
+ * Will initialize all queue ids with ids received as mailbox parameters.
+ * Returns 0 on success, negative if all the queues are not initialized.
+ */
+int idpf_vport_queue_ids_init(struct idpf_vport *vport)
+{
+ struct virtchnl2_create_vport *vport_params;
+ struct virtchnl2_queue_reg_chunks *chunks;
+ struct idpf_vport_config *vport_config;
+ u16 vport_idx = vport->idx;
+ int num_ids, err = 0;
+ u16 q_type;
+ u32 *qids;
+
+ vport_config = vport->adapter->vport_config[vport_idx];
+ if (vport_config->req_qs_chunks) {
+ struct virtchnl2_add_queues *vc_aq =
+ (struct virtchnl2_add_queues *)vport_config->req_qs_chunks;
+ chunks = &vc_aq->chunks;
+ } else {
+ vport_params = vport->adapter->vport_params_recvd[vport_idx];
+ chunks = &vport_params->chunks;
+ }
+
+ qids = kcalloc(IDPF_MAX_QIDS, sizeof(u32), GFP_KERNEL);
+ if (!qids)
+ return -ENOMEM;
+
+ num_ids = idpf_vport_get_queue_ids(qids, IDPF_MAX_QIDS,
+ VIRTCHNL2_QUEUE_TYPE_TX,
+ chunks);
+ if (num_ids < vport->num_txq) {
+ err = -EINVAL;
+ goto mem_rel;
+ }
+ num_ids = __idpf_vport_queue_ids_init(vport, qids, num_ids,
+ VIRTCHNL2_QUEUE_TYPE_TX);
+ if (num_ids < vport->num_txq) {
+ err = -EINVAL;
+ goto mem_rel;
+ }
+
+ num_ids = idpf_vport_get_queue_ids(qids, IDPF_MAX_QIDS,
+ VIRTCHNL2_QUEUE_TYPE_RX,
+ chunks);
+ if (num_ids < vport->num_rxq) {
+ err = -EINVAL;
+ goto mem_rel;
+ }
+ num_ids = __idpf_vport_queue_ids_init(vport, qids, num_ids,
+ VIRTCHNL2_QUEUE_TYPE_RX);
+ if (num_ids < vport->num_rxq) {
+ err = -EINVAL;
+ goto mem_rel;
+ }
+
+ if (!idpf_is_queue_model_split(vport->txq_model))
+ goto check_rxq;
+
+ q_type = VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION;
+ num_ids = idpf_vport_get_queue_ids(qids, IDPF_MAX_QIDS, q_type, chunks);
+ if (num_ids < vport->num_complq) {
+ err = -EINVAL;
+ goto mem_rel;
+ }
+ num_ids = __idpf_vport_queue_ids_init(vport, qids, num_ids, q_type);
+ if (num_ids < vport->num_complq) {
+ err = -EINVAL;
+ goto mem_rel;
+ }
+
+check_rxq:
+ if (!idpf_is_queue_model_split(vport->rxq_model))
+ goto mem_rel;
+
+ q_type = VIRTCHNL2_QUEUE_TYPE_RX_BUFFER;
+ num_ids = idpf_vport_get_queue_ids(qids, IDPF_MAX_QIDS, q_type, chunks);
+ if (num_ids < vport->num_bufq) {
+ err = -EINVAL;
+ goto mem_rel;
+ }
+ num_ids = __idpf_vport_queue_ids_init(vport, qids, num_ids, q_type);
+ if (num_ids < vport->num_bufq)
+ err = -EINVAL;
+
+mem_rel:
+ kfree(qids);
+
+ return err;
+}
+
+/**
+ * idpf_vport_adjust_qs - Adjust to new requested queues
+ * @vport: virtual port data struct
+ *
+ * Renegotiate queues. Returns 0 on success, negative on failure.
+ */
+int idpf_vport_adjust_qs(struct idpf_vport *vport)
+{
+ struct virtchnl2_create_vport vport_msg;
+ int err;
+
+ vport_msg.txq_model = cpu_to_le16(vport->txq_model);
+ vport_msg.rxq_model = cpu_to_le16(vport->rxq_model);
+ err = idpf_vport_calc_total_qs(vport->adapter, vport->idx, &vport_msg,
+ NULL);
+ if (err)
+ return err;
+
+ idpf_vport_init_num_qs(vport, &vport_msg);
+ idpf_vport_calc_num_q_groups(vport);
+
+ return 0;
+}
+
+/**
+ * idpf_is_capability_ena - Default implementation of capability checking
+ * @adapter: Private data struct
+ * @all: all or one flag
+ * @field: caps field to check for flags
+ * @flag: flag to check
+ *
+ * Return true if all capabilities are supported, false otherwise
+ */
+bool idpf_is_capability_ena(struct idpf_adapter *adapter, bool all,
+ enum idpf_cap_field field, u64 flag)
+{
+ u8 *caps = (u8 *)&adapter->caps;
+ u32 *cap_field;
+
+ if (!caps)
+ return false;
+
+ if (field == IDPF_BASE_CAPS)
+ return false;
+
+ cap_field = (u32 *)(caps + field);
+
+ if (all)
+ return (*cap_field & flag) == flag;
+ else
+ return !!(*cap_field & flag);
+}
+
+/**
+ * idpf_get_vport_id: Get vport id
+ * @vport: virtual port structure
+ *
+ * Return vport id from the adapter persistent data
+ */
+u32 idpf_get_vport_id(struct idpf_vport *vport)
+{
+ struct virtchnl2_create_vport *vport_msg;
+
+ vport_msg = vport->adapter->vport_params_recvd[vport->idx];
+
+ return le32_to_cpu(vport_msg->vport_id);
+}
+
+/**
+ * idpf_add_del_mac_filters - Add/del mac filters
+ * @vport: Virtual port data structure
+ * @np: Netdev private structure
+ * @add: Add or delete flag
+ * @async: Don't wait for return message
+ *
+ * Returns 0 on success, error on failure.
+ **/
+int idpf_add_del_mac_filters(struct idpf_vport *vport,
+ struct idpf_netdev_priv *np,
+ bool add, bool async)
+{
+ struct virtchnl2_mac_addr_list *ma_list = NULL;
+ struct idpf_adapter *adapter = np->adapter;
+ struct idpf_vport_config *vport_config;
+ enum idpf_vport_config_flags mac_flag;
+ struct pci_dev *pdev = adapter->pdev;
+ enum idpf_vport_vc_state vc, vc_err;
+ struct virtchnl2_mac_addr *mac_addr;
+ struct idpf_mac_filter *f, *tmp;
+ u32 num_msgs, total_filters = 0;
+ int i = 0, k, err = 0;
+ u32 vop;
+
+ vport_config = adapter->vport_config[np->vport_idx];
+ spin_lock_bh(&vport_config->mac_filter_list_lock);
+
+ /* Find the number of newly added filters */
+ list_for_each_entry(f, &vport_config->user_config.mac_filter_list,
+ list) {
+ if (add && f->add)
+ total_filters++;
+ else if (!add && f->remove)
+ total_filters++;
+ }
+
+ if (!total_filters) {
+ spin_unlock_bh(&vport_config->mac_filter_list_lock);
+
+ return 0;
+ }
+
+ /* Fill all the new filters into virtchannel message */
+ mac_addr = kcalloc(total_filters, sizeof(struct virtchnl2_mac_addr),
+ GFP_ATOMIC);
+ if (!mac_addr) {
+ err = -ENOMEM;
+ spin_unlock_bh(&vport_config->mac_filter_list_lock);
+ goto error;
+ }
+
+ list_for_each_entry_safe(f, tmp, &vport_config->user_config.mac_filter_list,
+ list) {
+ if (add && f->add) {
+ ether_addr_copy(mac_addr[i].addr, f->macaddr);
+ i++;
+ f->add = false;
+ if (i == total_filters)
+ break;
+ }
+ if (!add && f->remove) {
+ ether_addr_copy(mac_addr[i].addr, f->macaddr);
+ i++;
+ f->remove = false;
+ if (i == total_filters)
+ break;
+ }
+ }
+
+ spin_unlock_bh(&vport_config->mac_filter_list_lock);
+
+ if (add) {
+ vop = VIRTCHNL2_OP_ADD_MAC_ADDR;
+ vc = IDPF_VC_ADD_MAC_ADDR;
+ vc_err = IDPF_VC_ADD_MAC_ADDR_ERR;
+ mac_flag = IDPF_VPORT_ADD_MAC_REQ;
+ } else {
+ vop = VIRTCHNL2_OP_DEL_MAC_ADDR;
+ vc = IDPF_VC_DEL_MAC_ADDR;
+ vc_err = IDPF_VC_DEL_MAC_ADDR_ERR;
+ mac_flag = IDPF_VPORT_DEL_MAC_REQ;
+ }
+
+ /* Chunk up the filters into multiple messages to avoid
+ * sending a control queue message buffer that is too large
+ */
+ num_msgs = DIV_ROUND_UP(total_filters, IDPF_NUM_FILTERS_PER_MSG);
+
+ if (!async)
+ mutex_lock(&vport->vc_buf_lock);
+
+ for (i = 0, k = 0; i < num_msgs; i++) {
+ u32 entries_size, buf_size, num_entries;
+
+ num_entries = min_t(u32, total_filters,
+ IDPF_NUM_FILTERS_PER_MSG);
+ entries_size = sizeof(struct virtchnl2_mac_addr) * num_entries;
+ buf_size = struct_size(ma_list, mac_addr_list, num_entries);
+
+ if (!ma_list || num_entries != IDPF_NUM_FILTERS_PER_MSG) {
+ kfree(ma_list);
+ ma_list = kzalloc(buf_size, GFP_ATOMIC);
+ if (!ma_list) {
+ err = -ENOMEM;
+ goto list_prep_error;
+ }
+ } else {
+ memset(ma_list, 0, buf_size);
+ }
+
+ ma_list->vport_id = cpu_to_le32(np->vport_id);
+ ma_list->num_mac_addr = cpu_to_le16(num_entries);
+ memcpy(ma_list->mac_addr_list, &mac_addr[k], entries_size);
+
+ if (async)
+ set_bit(mac_flag, vport_config->flags);
+
+ err = idpf_send_mb_msg(adapter, vop, buf_size, (u8 *)ma_list);
+ if (err)
+ goto mbx_error;
+
+ if (!async) {
+ err = idpf_wait_for_event(adapter, vport, vc, vc_err);
+ if (err)
+ goto mbx_error;
+ }
+
+ k += num_entries;
+ total_filters -= num_entries;
+ }
+
+mbx_error:
+ if (!async)
+ mutex_unlock(&vport->vc_buf_lock);
+ kfree(ma_list);
+list_prep_error:
+ kfree(mac_addr);
+error:
+ if (err)
+ dev_err(&pdev->dev, "Failed to add or del mac filters %d", err);
+
+ return err;
+}
+
+/**
+ * idpf_set_promiscuous - set promiscuous and send message to mailbox
+ * @adapter: Driver specific private structure
+ * @config_data: Vport specific config data
+ * @vport_id: Vport identifier
+ *
+ * Request to enable promiscuous mode for the vport. Message is sent
+ * asynchronously and won't wait for response. Returns 0 on success, negative
+ * on failure;
+ */
+int idpf_set_promiscuous(struct idpf_adapter *adapter,
+ struct idpf_vport_user_config_data *config_data,
+ u32 vport_id)
+{
+ struct virtchnl2_promisc_info vpi;
+ u16 flags = 0;
+ int err;
+
+ if (test_bit(__IDPF_PROMISC_UC, config_data->user_flags))
+ flags |= VIRTCHNL2_UNICAST_PROMISC;
+ if (test_bit(__IDPF_PROMISC_MC, config_data->user_flags))
+ flags |= VIRTCHNL2_MULTICAST_PROMISC;
+
+ vpi.vport_id = cpu_to_le32(vport_id);
+ vpi.flags = cpu_to_le16(flags);
+
+ err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_CONFIG_PROMISCUOUS_MODE,
+ sizeof(struct virtchnl2_promisc_info),
+ (u8 *)&vpi);
+
+ return err;
+}
diff --git a/drivers/net/ethernet/intel/idpf/virtchnl2.h b/drivers/net/ethernet/intel/idpf/virtchnl2.h
new file mode 100644
index 000000000000..07e72c72d156
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/virtchnl2.h
@@ -0,0 +1,1273 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2023 Intel Corporation */
+
+#ifndef _VIRTCHNL2_H_
+#define _VIRTCHNL2_H_
+
+/* All opcodes associated with virtchnl2 are prefixed with virtchnl2 or
+ * VIRTCHNL2. Any future opcodes, offloads/capabilities, structures,
+ * and defines must be prefixed with virtchnl2 or VIRTCHNL2 to avoid confusion.
+ *
+ * PF/VF uses the virtchnl2 interface defined in this header file to communicate
+ * with device Control Plane (CP). Driver and the CP may run on different
+ * platforms with different endianness. To avoid byte order discrepancies,
+ * all the structures in this header follow little-endian format.
+ *
+ * This is an interface definition file where existing enums and their values
+ * must remain unchanged over time, so we specify explicit values for all enums.
+ */
+
+#include "virtchnl2_lan_desc.h"
+
+/* This macro is used to generate compilation errors if a structure
+ * is not exactly the correct length.
+ */
+#define VIRTCHNL2_CHECK_STRUCT_LEN(n, X) \
+ static_assert((n) == sizeof(struct X))
+
+/* New major set of opcodes introduced and so leaving room for
+ * old misc opcodes to be added in future. Also these opcodes may only
+ * be used if both the PF and VF have successfully negotiated the
+ * VIRTCHNL version as 2.0 during VIRTCHNL2_OP_VERSION exchange.
+ */
+enum virtchnl2_op {
+ VIRTCHNL2_OP_UNKNOWN = 0,
+ VIRTCHNL2_OP_VERSION = 1,
+ VIRTCHNL2_OP_GET_CAPS = 500,
+ VIRTCHNL2_OP_CREATE_VPORT = 501,
+ VIRTCHNL2_OP_DESTROY_VPORT = 502,
+ VIRTCHNL2_OP_ENABLE_VPORT = 503,
+ VIRTCHNL2_OP_DISABLE_VPORT = 504,
+ VIRTCHNL2_OP_CONFIG_TX_QUEUES = 505,
+ VIRTCHNL2_OP_CONFIG_RX_QUEUES = 506,
+ VIRTCHNL2_OP_ENABLE_QUEUES = 507,
+ VIRTCHNL2_OP_DISABLE_QUEUES = 508,
+ VIRTCHNL2_OP_ADD_QUEUES = 509,
+ VIRTCHNL2_OP_DEL_QUEUES = 510,
+ VIRTCHNL2_OP_MAP_QUEUE_VECTOR = 511,
+ VIRTCHNL2_OP_UNMAP_QUEUE_VECTOR = 512,
+ VIRTCHNL2_OP_GET_RSS_KEY = 513,
+ VIRTCHNL2_OP_SET_RSS_KEY = 514,
+ VIRTCHNL2_OP_GET_RSS_LUT = 515,
+ VIRTCHNL2_OP_SET_RSS_LUT = 516,
+ VIRTCHNL2_OP_GET_RSS_HASH = 517,
+ VIRTCHNL2_OP_SET_RSS_HASH = 518,
+ VIRTCHNL2_OP_SET_SRIOV_VFS = 519,
+ VIRTCHNL2_OP_ALLOC_VECTORS = 520,
+ VIRTCHNL2_OP_DEALLOC_VECTORS = 521,
+ VIRTCHNL2_OP_EVENT = 522,
+ VIRTCHNL2_OP_GET_STATS = 523,
+ VIRTCHNL2_OP_RESET_VF = 524,
+ VIRTCHNL2_OP_GET_EDT_CAPS = 525,
+ VIRTCHNL2_OP_GET_PTYPE_INFO = 526,
+ /* Opcode 527 and 528 are reserved for VIRTCHNL2_OP_GET_PTYPE_ID and
+ * VIRTCHNL2_OP_GET_PTYPE_INFO_RAW.
+ * Opcodes 529, 530, 531, 532 and 533 are reserved.
+ */
+ VIRTCHNL2_OP_LOOPBACK = 534,
+ VIRTCHNL2_OP_ADD_MAC_ADDR = 535,
+ VIRTCHNL2_OP_DEL_MAC_ADDR = 536,
+ VIRTCHNL2_OP_CONFIG_PROMISCUOUS_MODE = 537,
+};
+
+/**
+ * enum virtchnl2_vport_type - Type of virtual port.
+ * @VIRTCHNL2_VPORT_TYPE_DEFAULT: Default virtual port type.
+ */
+enum virtchnl2_vport_type {
+ VIRTCHNL2_VPORT_TYPE_DEFAULT = 0,
+};
+
+/**
+ * enum virtchnl2_queue_model - Type of queue model.
+ * @VIRTCHNL2_QUEUE_MODEL_SINGLE: Single queue model.
+ * @VIRTCHNL2_QUEUE_MODEL_SPLIT: Split queue model.
+ *
+ * In the single queue model, the same transmit descriptor queue is used by
+ * software to post descriptors to hardware and by hardware to post completed
+ * descriptors to software.
+ * Likewise, the same receive descriptor queue is used by hardware to post
+ * completions to software and by software to post buffers to hardware.
+ *
+ * In the split queue model, hardware uses transmit completion queues to post
+ * descriptor/buffer completions to software, while software uses transmit
+ * descriptor queues to post descriptors to hardware.
+ * Likewise, hardware posts descriptor completions to the receive descriptor
+ * queue, while software uses receive buffer queues to post buffers to hardware.
+ */
+enum virtchnl2_queue_model {
+ VIRTCHNL2_QUEUE_MODEL_SINGLE = 0,
+ VIRTCHNL2_QUEUE_MODEL_SPLIT = 1,
+};
+
+/* Checksum offload capability flags */
+enum virtchnl2_cap_txrx_csum {
+ VIRTCHNL2_CAP_TX_CSUM_L3_IPV4 = BIT(0),
+ VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_TCP = BIT(1),
+ VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_UDP = BIT(2),
+ VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_SCTP = BIT(3),
+ VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_TCP = BIT(4),
+ VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_UDP = BIT(5),
+ VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_SCTP = BIT(6),
+ VIRTCHNL2_CAP_TX_CSUM_GENERIC = BIT(7),
+ VIRTCHNL2_CAP_RX_CSUM_L3_IPV4 = BIT(8),
+ VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_TCP = BIT(9),
+ VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_UDP = BIT(10),
+ VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_SCTP = BIT(11),
+ VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_TCP = BIT(12),
+ VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_UDP = BIT(13),
+ VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_SCTP = BIT(14),
+ VIRTCHNL2_CAP_RX_CSUM_GENERIC = BIT(15),
+ VIRTCHNL2_CAP_TX_CSUM_L3_SINGLE_TUNNEL = BIT(16),
+ VIRTCHNL2_CAP_TX_CSUM_L3_DOUBLE_TUNNEL = BIT(17),
+ VIRTCHNL2_CAP_RX_CSUM_L3_SINGLE_TUNNEL = BIT(18),
+ VIRTCHNL2_CAP_RX_CSUM_L3_DOUBLE_TUNNEL = BIT(19),
+ VIRTCHNL2_CAP_TX_CSUM_L4_SINGLE_TUNNEL = BIT(20),
+ VIRTCHNL2_CAP_TX_CSUM_L4_DOUBLE_TUNNEL = BIT(21),
+ VIRTCHNL2_CAP_RX_CSUM_L4_SINGLE_TUNNEL = BIT(22),
+ VIRTCHNL2_CAP_RX_CSUM_L4_DOUBLE_TUNNEL = BIT(23),
+};
+
+/* Segmentation offload capability flags */
+enum virtchnl2_cap_seg {
+ VIRTCHNL2_CAP_SEG_IPV4_TCP = BIT(0),
+ VIRTCHNL2_CAP_SEG_IPV4_UDP = BIT(1),
+ VIRTCHNL2_CAP_SEG_IPV4_SCTP = BIT(2),
+ VIRTCHNL2_CAP_SEG_IPV6_TCP = BIT(3),
+ VIRTCHNL2_CAP_SEG_IPV6_UDP = BIT(4),
+ VIRTCHNL2_CAP_SEG_IPV6_SCTP = BIT(5),
+ VIRTCHNL2_CAP_SEG_GENERIC = BIT(6),
+ VIRTCHNL2_CAP_SEG_TX_SINGLE_TUNNEL = BIT(7),
+ VIRTCHNL2_CAP_SEG_TX_DOUBLE_TUNNEL = BIT(8),
+};
+
+/* Receive Side Scaling Flow type capability flags */
+enum virtchnl2_cap_rss {
+ VIRTCHNL2_CAP_RSS_IPV4_TCP = BIT(0),
+ VIRTCHNL2_CAP_RSS_IPV4_UDP = BIT(1),
+ VIRTCHNL2_CAP_RSS_IPV4_SCTP = BIT(2),
+ VIRTCHNL2_CAP_RSS_IPV4_OTHER = BIT(3),
+ VIRTCHNL2_CAP_RSS_IPV6_TCP = BIT(4),
+ VIRTCHNL2_CAP_RSS_IPV6_UDP = BIT(5),
+ VIRTCHNL2_CAP_RSS_IPV6_SCTP = BIT(6),
+ VIRTCHNL2_CAP_RSS_IPV6_OTHER = BIT(7),
+ VIRTCHNL2_CAP_RSS_IPV4_AH = BIT(8),
+ VIRTCHNL2_CAP_RSS_IPV4_ESP = BIT(9),
+ VIRTCHNL2_CAP_RSS_IPV4_AH_ESP = BIT(10),
+ VIRTCHNL2_CAP_RSS_IPV6_AH = BIT(11),
+ VIRTCHNL2_CAP_RSS_IPV6_ESP = BIT(12),
+ VIRTCHNL2_CAP_RSS_IPV6_AH_ESP = BIT(13),
+};
+
+/* Header split capability flags */
+enum virtchnl2_cap_rx_hsplit_at {
+ /* for prepended metadata */
+ VIRTCHNL2_CAP_RX_HSPLIT_AT_L2 = BIT(0),
+ /* all VLANs go into header buffer */
+ VIRTCHNL2_CAP_RX_HSPLIT_AT_L3 = BIT(1),
+ VIRTCHNL2_CAP_RX_HSPLIT_AT_L4V4 = BIT(2),
+ VIRTCHNL2_CAP_RX_HSPLIT_AT_L4V6 = BIT(3),
+};
+
+/* Receive Side Coalescing offload capability flags */
+enum virtchnl2_cap_rsc {
+ VIRTCHNL2_CAP_RSC_IPV4_TCP = BIT(0),
+ VIRTCHNL2_CAP_RSC_IPV4_SCTP = BIT(1),
+ VIRTCHNL2_CAP_RSC_IPV6_TCP = BIT(2),
+ VIRTCHNL2_CAP_RSC_IPV6_SCTP = BIT(3),
+};
+
+/* Other capability flags */
+enum virtchnl2_cap_other {
+ VIRTCHNL2_CAP_RDMA = BIT_ULL(0),
+ VIRTCHNL2_CAP_SRIOV = BIT_ULL(1),
+ VIRTCHNL2_CAP_MACFILTER = BIT_ULL(2),
+ VIRTCHNL2_CAP_FLOW_DIRECTOR = BIT_ULL(3),
+ /* Queue based scheduling using split queue model */
+ VIRTCHNL2_CAP_SPLITQ_QSCHED = BIT_ULL(4),
+ VIRTCHNL2_CAP_CRC = BIT_ULL(5),
+ VIRTCHNL2_CAP_ADQ = BIT_ULL(6),
+ VIRTCHNL2_CAP_WB_ON_ITR = BIT_ULL(7),
+ VIRTCHNL2_CAP_PROMISC = BIT_ULL(8),
+ VIRTCHNL2_CAP_LINK_SPEED = BIT_ULL(9),
+ VIRTCHNL2_CAP_INLINE_IPSEC = BIT_ULL(10),
+ VIRTCHNL2_CAP_LARGE_NUM_QUEUES = BIT_ULL(11),
+ VIRTCHNL2_CAP_VLAN = BIT_ULL(12),
+ VIRTCHNL2_CAP_PTP = BIT_ULL(13),
+ /* EDT: Earliest Departure Time capability used for Timing Wheel */
+ VIRTCHNL2_CAP_EDT = BIT_ULL(14),
+ VIRTCHNL2_CAP_ADV_RSS = BIT_ULL(15),
+ VIRTCHNL2_CAP_FDIR = BIT_ULL(16),
+ VIRTCHNL2_CAP_RX_FLEX_DESC = BIT_ULL(17),
+ VIRTCHNL2_CAP_PTYPE = BIT_ULL(18),
+ VIRTCHNL2_CAP_LOOPBACK = BIT_ULL(19),
+ /* Other capability 20 is reserved */
+
+ /* this must be the last capability */
+ VIRTCHNL2_CAP_OEM = BIT_ULL(63),
+};
+
+/* underlying device type */
+enum virtchl2_device_type {
+ VIRTCHNL2_MEV_DEVICE = 0,
+};
+
+/**
+ * enum virtchnl2_txq_sched_mode - Transmit Queue Scheduling Modes.
+ * @VIRTCHNL2_TXQ_SCHED_MODE_QUEUE: Queue mode is the legacy mode i.e. inorder
+ * completions where descriptors and buffers
+ * are completed at the same time.
+ * @VIRTCHNL2_TXQ_SCHED_MODE_FLOW: Flow scheduling mode allows for out of order
+ * packet processing where descriptors are
+ * cleaned in order, but buffers can be
+ * completed out of order.
+ */
+enum virtchnl2_txq_sched_mode {
+ VIRTCHNL2_TXQ_SCHED_MODE_QUEUE = 0,
+ VIRTCHNL2_TXQ_SCHED_MODE_FLOW = 1,
+};
+
+/**
+ * enum virtchnl2_rxq_flags - Receive Queue Feature flags.
+ * @VIRTCHNL2_RXQ_RSC: Rx queue RSC flag.
+ * @VIRTCHNL2_RXQ_HDR_SPLIT: Rx queue header split flag.
+ * @VIRTCHNL2_RXQ_IMMEDIATE_WRITE_BACK: When set, packet descriptors are flushed
+ * by hardware immediately after processing
+ * each packet.
+ * @VIRTCHNL2_RX_DESC_SIZE_16BYTE: Rx queue 16 byte descriptor size.
+ * @VIRTCHNL2_RX_DESC_SIZE_32BYTE: Rx queue 32 byte descriptor size.
+ */
+enum virtchnl2_rxq_flags {
+ VIRTCHNL2_RXQ_RSC = BIT(0),
+ VIRTCHNL2_RXQ_HDR_SPLIT = BIT(1),
+ VIRTCHNL2_RXQ_IMMEDIATE_WRITE_BACK = BIT(2),
+ VIRTCHNL2_RX_DESC_SIZE_16BYTE = BIT(3),
+ VIRTCHNL2_RX_DESC_SIZE_32BYTE = BIT(4),
+};
+
+/* Type of RSS algorithm */
+enum virtchnl2_rss_alg {
+ VIRTCHNL2_RSS_ALG_TOEPLITZ_ASYMMETRIC = 0,
+ VIRTCHNL2_RSS_ALG_R_ASYMMETRIC = 1,
+ VIRTCHNL2_RSS_ALG_TOEPLITZ_SYMMETRIC = 2,
+ VIRTCHNL2_RSS_ALG_XOR_SYMMETRIC = 3,
+};
+
+/* Type of event */
+enum virtchnl2_event_codes {
+ VIRTCHNL2_EVENT_UNKNOWN = 0,
+ VIRTCHNL2_EVENT_LINK_CHANGE = 1,
+ /* Event type 2, 3 are reserved */
+};
+
+/* Transmit and Receive queue types are valid in legacy as well as split queue
+ * models. With Split Queue model, 2 additional types are introduced -
+ * TX_COMPLETION and RX_BUFFER. In split queue model, receive corresponds to
+ * the queue where hardware posts completions.
+ */
+enum virtchnl2_queue_type {
+ VIRTCHNL2_QUEUE_TYPE_TX = 0,
+ VIRTCHNL2_QUEUE_TYPE_RX = 1,
+ VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION = 2,
+ VIRTCHNL2_QUEUE_TYPE_RX_BUFFER = 3,
+ VIRTCHNL2_QUEUE_TYPE_CONFIG_TX = 4,
+ VIRTCHNL2_QUEUE_TYPE_CONFIG_RX = 5,
+ /* Queue types 6, 7, 8, 9 are reserved */
+ VIRTCHNL2_QUEUE_TYPE_MBX_TX = 10,
+ VIRTCHNL2_QUEUE_TYPE_MBX_RX = 11,
+};
+
+/* Interrupt throttling rate index */
+enum virtchnl2_itr_idx {
+ VIRTCHNL2_ITR_IDX_0 = 0,
+ VIRTCHNL2_ITR_IDX_1 = 1,
+};
+
+/**
+ * enum virtchnl2_mac_addr_type - MAC address types.
+ * @VIRTCHNL2_MAC_ADDR_PRIMARY: PF/VF driver should set this type for the
+ * primary/device unicast MAC address filter for
+ * VIRTCHNL2_OP_ADD_MAC_ADDR and
+ * VIRTCHNL2_OP_DEL_MAC_ADDR. This allows for the
+ * underlying control plane function to accurately
+ * track the MAC address and for VM/function reset.
+ *
+ * @VIRTCHNL2_MAC_ADDR_EXTRA: PF/VF driver should set this type for any extra
+ * unicast and/or multicast filters that are being
+ * added/deleted via VIRTCHNL2_OP_ADD_MAC_ADDR or
+ * VIRTCHNL2_OP_DEL_MAC_ADDR.
+ */
+enum virtchnl2_mac_addr_type {
+ VIRTCHNL2_MAC_ADDR_PRIMARY = 1,
+ VIRTCHNL2_MAC_ADDR_EXTRA = 2,
+};
+
+/* Flags used for promiscuous mode */
+enum virtchnl2_promisc_flags {
+ VIRTCHNL2_UNICAST_PROMISC = BIT(0),
+ VIRTCHNL2_MULTICAST_PROMISC = BIT(1),
+};
+
+/* Protocol header type within a packet segment. A segment consists of one or
+ * more protocol headers that make up a logical group of protocol headers. Each
+ * logical group of protocol headers encapsulates or is encapsulated using/by
+ * tunneling or encapsulation protocols for network virtualization.
+ */
+enum virtchnl2_proto_hdr_type {
+ /* VIRTCHNL2_PROTO_HDR_ANY is a mandatory protocol id */
+ VIRTCHNL2_PROTO_HDR_ANY = 0,
+ VIRTCHNL2_PROTO_HDR_PRE_MAC = 1,
+ /* VIRTCHNL2_PROTO_HDR_MAC is a mandatory protocol id */
+ VIRTCHNL2_PROTO_HDR_MAC = 2,
+ VIRTCHNL2_PROTO_HDR_POST_MAC = 3,
+ VIRTCHNL2_PROTO_HDR_ETHERTYPE = 4,
+ VIRTCHNL2_PROTO_HDR_VLAN = 5,
+ VIRTCHNL2_PROTO_HDR_SVLAN = 6,
+ VIRTCHNL2_PROTO_HDR_CVLAN = 7,
+ VIRTCHNL2_PROTO_HDR_MPLS = 8,
+ VIRTCHNL2_PROTO_HDR_UMPLS = 9,
+ VIRTCHNL2_PROTO_HDR_MMPLS = 10,
+ VIRTCHNL2_PROTO_HDR_PTP = 11,
+ VIRTCHNL2_PROTO_HDR_CTRL = 12,
+ VIRTCHNL2_PROTO_HDR_LLDP = 13,
+ VIRTCHNL2_PROTO_HDR_ARP = 14,
+ VIRTCHNL2_PROTO_HDR_ECP = 15,
+ VIRTCHNL2_PROTO_HDR_EAPOL = 16,
+ VIRTCHNL2_PROTO_HDR_PPPOD = 17,
+ VIRTCHNL2_PROTO_HDR_PPPOE = 18,
+ /* VIRTCHNL2_PROTO_HDR_IPV4 is a mandatory protocol id */
+ VIRTCHNL2_PROTO_HDR_IPV4 = 19,
+ /* IPv4 and IPv6 Fragment header types are only associated to
+ * VIRTCHNL2_PROTO_HDR_IPV4 and VIRTCHNL2_PROTO_HDR_IPV6 respectively,
+ * cannot be used independently.
+ */
+ /* VIRTCHNL2_PROTO_HDR_IPV4_FRAG is a mandatory protocol id */
+ VIRTCHNL2_PROTO_HDR_IPV4_FRAG = 20,
+ /* VIRTCHNL2_PROTO_HDR_IPV6 is a mandatory protocol id */
+ VIRTCHNL2_PROTO_HDR_IPV6 = 21,
+ /* VIRTCHNL2_PROTO_HDR_IPV6_FRAG is a mandatory protocol id */
+ VIRTCHNL2_PROTO_HDR_IPV6_FRAG = 22,
+ VIRTCHNL2_PROTO_HDR_IPV6_EH = 23,
+ /* VIRTCHNL2_PROTO_HDR_UDP is a mandatory protocol id */
+ VIRTCHNL2_PROTO_HDR_UDP = 24,
+ /* VIRTCHNL2_PROTO_HDR_TCP is a mandatory protocol id */
+ VIRTCHNL2_PROTO_HDR_TCP = 25,
+ /* VIRTCHNL2_PROTO_HDR_SCTP is a mandatory protocol id */
+ VIRTCHNL2_PROTO_HDR_SCTP = 26,
+ /* VIRTCHNL2_PROTO_HDR_ICMP is a mandatory protocol id */
+ VIRTCHNL2_PROTO_HDR_ICMP = 27,
+ /* VIRTCHNL2_PROTO_HDR_ICMPV6 is a mandatory protocol id */
+ VIRTCHNL2_PROTO_HDR_ICMPV6 = 28,
+ VIRTCHNL2_PROTO_HDR_IGMP = 29,
+ VIRTCHNL2_PROTO_HDR_AH = 30,
+ VIRTCHNL2_PROTO_HDR_ESP = 31,
+ VIRTCHNL2_PROTO_HDR_IKE = 32,
+ VIRTCHNL2_PROTO_HDR_NATT_KEEP = 33,
+ /* VIRTCHNL2_PROTO_HDR_PAY is a mandatory protocol id */
+ VIRTCHNL2_PROTO_HDR_PAY = 34,
+ VIRTCHNL2_PROTO_HDR_L2TPV2 = 35,
+ VIRTCHNL2_PROTO_HDR_L2TPV2_CONTROL = 36,
+ VIRTCHNL2_PROTO_HDR_L2TPV3 = 37,
+ VIRTCHNL2_PROTO_HDR_GTP = 38,
+ VIRTCHNL2_PROTO_HDR_GTP_EH = 39,
+ VIRTCHNL2_PROTO_HDR_GTPCV2 = 40,
+ VIRTCHNL2_PROTO_HDR_GTPC_TEID = 41,
+ VIRTCHNL2_PROTO_HDR_GTPU = 42,
+ VIRTCHNL2_PROTO_HDR_GTPU_UL = 43,
+ VIRTCHNL2_PROTO_HDR_GTPU_DL = 44,
+ VIRTCHNL2_PROTO_HDR_ECPRI = 45,
+ VIRTCHNL2_PROTO_HDR_VRRP = 46,
+ VIRTCHNL2_PROTO_HDR_OSPF = 47,
+ /* VIRTCHNL2_PROTO_HDR_TUN is a mandatory protocol id */
+ VIRTCHNL2_PROTO_HDR_TUN = 48,
+ VIRTCHNL2_PROTO_HDR_GRE = 49,
+ VIRTCHNL2_PROTO_HDR_NVGRE = 50,
+ VIRTCHNL2_PROTO_HDR_VXLAN = 51,
+ VIRTCHNL2_PROTO_HDR_VXLAN_GPE = 52,
+ VIRTCHNL2_PROTO_HDR_GENEVE = 53,
+ VIRTCHNL2_PROTO_HDR_NSH = 54,
+ VIRTCHNL2_PROTO_HDR_QUIC = 55,
+ VIRTCHNL2_PROTO_HDR_PFCP = 56,
+ VIRTCHNL2_PROTO_HDR_PFCP_NODE = 57,
+ VIRTCHNL2_PROTO_HDR_PFCP_SESSION = 58,
+ VIRTCHNL2_PROTO_HDR_RTP = 59,
+ VIRTCHNL2_PROTO_HDR_ROCE = 60,
+ VIRTCHNL2_PROTO_HDR_ROCEV1 = 61,
+ VIRTCHNL2_PROTO_HDR_ROCEV2 = 62,
+ /* Protocol ids up to 32767 are reserved.
+ * 32768 - 65534 are used for user defined protocol ids.
+ * VIRTCHNL2_PROTO_HDR_NO_PROTO is a mandatory protocol id.
+ */
+ VIRTCHNL2_PROTO_HDR_NO_PROTO = 65535,
+};
+
+enum virtchl2_version {
+ VIRTCHNL2_VERSION_MINOR_0 = 0,
+ VIRTCHNL2_VERSION_MAJOR_2 = 2,
+};
+
+/**
+ * struct virtchnl2_edt_caps - Get EDT granularity and time horizon.
+ * @tstamp_granularity_ns: Timestamp granularity in nanoseconds.
+ * @time_horizon_ns: Total time window in nanoseconds.
+ *
+ * Associated with VIRTCHNL2_OP_GET_EDT_CAPS.
+ */
+struct virtchnl2_edt_caps {
+ __le64 tstamp_granularity_ns;
+ __le64 time_horizon_ns;
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_edt_caps);
+
+/**
+ * struct virtchnl2_version_info - Version information.
+ * @major: Major version.
+ * @minor: Minor version.
+ *
+ * PF/VF posts its version number to the CP. CP responds with its version number
+ * in the same format, along with a return code.
+ * If there is a major version mismatch, then the PF/VF cannot operate.
+ * If there is a minor version mismatch, then the PF/VF can operate but should
+ * add a warning to the system log.
+ *
+ * This version opcode MUST always be specified as == 1, regardless of other
+ * changes in the API. The CP must always respond to this message without
+ * error regardless of version mismatch.
+ *
+ * Associated with VIRTCHNL2_OP_VERSION.
+ */
+struct virtchnl2_version_info {
+ __le32 major;
+ __le32 minor;
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_version_info);
+
+/**
+ * struct virtchnl2_get_capabilities - Capabilities info.
+ * @csum_caps: See enum virtchnl2_cap_txrx_csum.
+ * @seg_caps: See enum virtchnl2_cap_seg.
+ * @hsplit_caps: See enum virtchnl2_cap_rx_hsplit_at.
+ * @rsc_caps: See enum virtchnl2_cap_rsc.
+ * @rss_caps: See enum virtchnl2_cap_rss.
+ * @other_caps: See enum virtchnl2_cap_other.
+ * @mailbox_dyn_ctl: DYN_CTL register offset and vector id for mailbox
+ * provided by CP.
+ * @mailbox_vector_id: Mailbox vector id.
+ * @num_allocated_vectors: Maximum number of allocated vectors for the device.
+ * @max_rx_q: Maximum number of supported Rx queues.
+ * @max_tx_q: Maximum number of supported Tx queues.
+ * @max_rx_bufq: Maximum number of supported buffer queues.
+ * @max_tx_complq: Maximum number of supported completion queues.
+ * @max_sriov_vfs: The PF sends the maximum VFs it is requesting. The CP
+ * responds with the maximum VFs granted.
+ * @max_vports: Maximum number of vports that can be supported.
+ * @default_num_vports: Default number of vports driver should allocate on load.
+ * @max_tx_hdr_size: Max header length hardware can parse/checksum, in bytes.
+ * @max_sg_bufs_per_tx_pkt: Max number of scatter gather buffers that can be
+ * sent per transmit packet without needing to be
+ * linearized.
+ * @pad: Padding.
+ * @reserved: Reserved.
+ * @device_type: See enum virtchl2_device_type.
+ * @min_sso_packet_len: Min packet length supported by device for single
+ * segment offload.
+ * @max_hdr_buf_per_lso: Max number of header buffers that can be used for
+ * an LSO.
+ * @pad1: Padding for future extensions.
+ *
+ * Dataplane driver sends this message to CP to negotiate capabilities and
+ * provides a virtchnl2_get_capabilities structure with its desired
+ * capabilities, max_sriov_vfs and num_allocated_vectors.
+ * CP responds with a virtchnl2_get_capabilities structure updated
+ * with allowed capabilities and the other fields as below.
+ * If PF sets max_sriov_vfs as 0, CP will respond with max number of VFs
+ * that can be created by this PF. For any other value 'n', CP responds
+ * with max_sriov_vfs set to min(n, x) where x is the max number of VFs
+ * allowed by CP's policy. max_sriov_vfs is not applicable for VFs.
+ * If dataplane driver sets num_allocated_vectors as 0, CP will respond with 1
+ * which is default vector associated with the default mailbox. For any other
+ * value 'n', CP responds with a value <= n based on the CP's policy of
+ * max number of vectors for a PF.
+ * CP will respond with the vector ID of mailbox allocated to the PF in
+ * mailbox_vector_id and the number of itr index registers in itr_idx_map.
+ * It also responds with default number of vports that the dataplane driver
+ * should comeup with in default_num_vports and maximum number of vports that
+ * can be supported in max_vports.
+ *
+ * Associated with VIRTCHNL2_OP_GET_CAPS.
+ */
+struct virtchnl2_get_capabilities {
+ __le32 csum_caps;
+ __le32 seg_caps;
+ __le32 hsplit_caps;
+ __le32 rsc_caps;
+ __le64 rss_caps;
+ __le64 other_caps;
+ __le32 mailbox_dyn_ctl;
+ __le16 mailbox_vector_id;
+ __le16 num_allocated_vectors;
+ __le16 max_rx_q;
+ __le16 max_tx_q;
+ __le16 max_rx_bufq;
+ __le16 max_tx_complq;
+ __le16 max_sriov_vfs;
+ __le16 max_vports;
+ __le16 default_num_vports;
+ __le16 max_tx_hdr_size;
+ u8 max_sg_bufs_per_tx_pkt;
+ u8 pad[3];
+ u8 reserved[4];
+ __le32 device_type;
+ u8 min_sso_packet_len;
+ u8 max_hdr_buf_per_lso;
+ u8 pad1[10];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(80, virtchnl2_get_capabilities);
+
+/**
+ * struct virtchnl2_queue_reg_chunk - Single queue chunk.
+ * @type: See enum virtchnl2_queue_type.
+ * @start_queue_id: Start Queue ID.
+ * @num_queues: Number of queues in the chunk.
+ * @pad: Padding.
+ * @qtail_reg_start: Queue tail register offset.
+ * @qtail_reg_spacing: Queue tail register spacing.
+ * @pad1: Padding for future extensions.
+ */
+struct virtchnl2_queue_reg_chunk {
+ __le32 type;
+ __le32 start_queue_id;
+ __le32 num_queues;
+ __le32 pad;
+ __le64 qtail_reg_start;
+ __le32 qtail_reg_spacing;
+ u8 pad1[4];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(32, virtchnl2_queue_reg_chunk);
+
+/**
+ * struct virtchnl2_queue_reg_chunks - Specify several chunks of contiguous
+ * queues.
+ * @num_chunks: Number of chunks.
+ * @pad: Padding.
+ * @chunks: Chunks of queue info.
+ */
+struct virtchnl2_queue_reg_chunks {
+ __le16 num_chunks;
+ u8 pad[6];
+ struct virtchnl2_queue_reg_chunk chunks[];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_queue_reg_chunks);
+
+/**
+ * struct virtchnl2_create_vport - Create vport config info.
+ * @vport_type: See enum virtchnl2_vport_type.
+ * @txq_model: See virtchnl2_queue_model.
+ * @rxq_model: See virtchnl2_queue_model.
+ * @num_tx_q: Number of Tx queues.
+ * @num_tx_complq: Valid only if txq_model is split queue.
+ * @num_rx_q: Number of Rx queues.
+ * @num_rx_bufq: Valid only if rxq_model is split queue.
+ * @default_rx_q: Relative receive queue index to be used as default.
+ * @vport_index: Used to align PF and CP in case of default multiple vports,
+ * it is filled by the PF and CP returns the same value, to
+ * enable the driver to support multiple asynchronous parallel
+ * CREATE_VPORT requests and associate a response to a specific
+ * request.
+ * @max_mtu: Max MTU. CP populates this field on response.
+ * @vport_id: Vport id. CP populates this field on response.
+ * @default_mac_addr: Default MAC address.
+ * @pad: Padding.
+ * @rx_desc_ids: See VIRTCHNL2_RX_DESC_IDS definitions.
+ * @tx_desc_ids: See VIRTCHNL2_TX_DESC_IDS definitions.
+ * @pad1: Padding.
+ * @rss_algorithm: RSS algorithm.
+ * @rss_key_size: RSS key size.
+ * @rss_lut_size: RSS LUT size.
+ * @rx_split_pos: See enum virtchnl2_cap_rx_hsplit_at.
+ * @pad2: Padding.
+ * @chunks: Chunks of contiguous queues.
+ *
+ * PF sends this message to CP to create a vport by filling in required
+ * fields of virtchnl2_create_vport structure.
+ * CP responds with the updated virtchnl2_create_vport structure containing the
+ * necessary fields followed by chunks which in turn will have an array of
+ * num_chunks entries of virtchnl2_queue_chunk structures.
+ *
+ * Associated with VIRTCHNL2_OP_CREATE_VPORT.
+ */
+struct virtchnl2_create_vport {
+ __le16 vport_type;
+ __le16 txq_model;
+ __le16 rxq_model;
+ __le16 num_tx_q;
+ __le16 num_tx_complq;
+ __le16 num_rx_q;
+ __le16 num_rx_bufq;
+ __le16 default_rx_q;
+ __le16 vport_index;
+ /* CP populates the following fields on response */
+ __le16 max_mtu;
+ __le32 vport_id;
+ u8 default_mac_addr[ETH_ALEN];
+ __le16 pad;
+ __le64 rx_desc_ids;
+ __le64 tx_desc_ids;
+ u8 pad1[72];
+ __le32 rss_algorithm;
+ __le16 rss_key_size;
+ __le16 rss_lut_size;
+ __le32 rx_split_pos;
+ u8 pad2[20];
+ struct virtchnl2_queue_reg_chunks chunks;
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(160, virtchnl2_create_vport);
+
+/**
+ * struct virtchnl2_vport - Vport ID info.
+ * @vport_id: Vport id.
+ * @pad: Padding for future extensions.
+ *
+ * PF sends this message to CP to destroy, enable or disable a vport by filling
+ * in the vport_id in virtchnl2_vport structure.
+ * CP responds with the status of the requested operation.
+ *
+ * Associated with VIRTCHNL2_OP_DESTROY_VPORT, VIRTCHNL2_OP_ENABLE_VPORT,
+ * VIRTCHNL2_OP_DISABLE_VPORT.
+ */
+struct virtchnl2_vport {
+ __le32 vport_id;
+ u8 pad[4];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_vport);
+
+/**
+ * struct virtchnl2_txq_info - Transmit queue config info
+ * @dma_ring_addr: DMA address.
+ * @type: See enum virtchnl2_queue_type.
+ * @queue_id: Queue ID.
+ * @relative_queue_id: Valid only if queue model is split and type is transmit
+ * queue. Used in many to one mapping of transmit queues to
+ * completion queue.
+ * @model: See enum virtchnl2_queue_model.
+ * @sched_mode: See enum virtchnl2_txq_sched_mode.
+ * @qflags: TX queue feature flags.
+ * @ring_len: Ring length.
+ * @tx_compl_queue_id: Valid only if queue model is split and type is transmit
+ * queue.
+ * @peer_type: Valid only if queue type is VIRTCHNL2_QUEUE_TYPE_MAILBOX_TX
+ * @peer_rx_queue_id: Valid only if queue type is CONFIG_TX and used to deliver
+ * messages for the respective CONFIG_TX queue.
+ * @pad: Padding.
+ * @egress_pasid: Egress PASID info.
+ * @egress_hdr_pasid: Egress HDR passid.
+ * @egress_buf_pasid: Egress buf passid.
+ * @pad1: Padding for future extensions.
+ */
+struct virtchnl2_txq_info {
+ __le64 dma_ring_addr;
+ __le32 type;
+ __le32 queue_id;
+ __le16 relative_queue_id;
+ __le16 model;
+ __le16 sched_mode;
+ __le16 qflags;
+ __le16 ring_len;
+ __le16 tx_compl_queue_id;
+ __le16 peer_type;
+ __le16 peer_rx_queue_id;
+ u8 pad[4];
+ __le32 egress_pasid;
+ __le32 egress_hdr_pasid;
+ __le32 egress_buf_pasid;
+ u8 pad1[8];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(56, virtchnl2_txq_info);
+
+/**
+ * struct virtchnl2_config_tx_queues - TX queue config.
+ * @vport_id: Vport id.
+ * @num_qinfo: Number of virtchnl2_txq_info structs.
+ * @pad: Padding.
+ * @qinfo: Tx queues config info.
+ *
+ * PF sends this message to set up parameters for one or more transmit queues.
+ * This message contains an array of num_qinfo instances of virtchnl2_txq_info
+ * structures. CP configures requested queues and returns a status code. If
+ * num_qinfo specified is greater than the number of queues associated with the
+ * vport, an error is returned and no queues are configured.
+ *
+ * Associated with VIRTCHNL2_OP_CONFIG_TX_QUEUES.
+ */
+struct virtchnl2_config_tx_queues {
+ __le32 vport_id;
+ __le16 num_qinfo;
+ u8 pad[10];
+ struct virtchnl2_txq_info qinfo[];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_config_tx_queues);
+
+/**
+ * struct virtchnl2_rxq_info - Receive queue config info.
+ * @desc_ids: See VIRTCHNL2_RX_DESC_IDS definitions.
+ * @dma_ring_addr: See VIRTCHNL2_RX_DESC_IDS definitions.
+ * @type: See enum virtchnl2_queue_type.
+ * @queue_id: Queue id.
+ * @model: See enum virtchnl2_queue_model.
+ * @hdr_buffer_size: Header buffer size.
+ * @data_buffer_size: Data buffer size.
+ * @max_pkt_size: Max packet size.
+ * @ring_len: Ring length.
+ * @buffer_notif_stride: Buffer notification stride in units of 32-descriptors.
+ * This field must be a power of 2.
+ * @pad: Padding.
+ * @dma_head_wb_addr: Applicable only for receive buffer queues.
+ * @qflags: Applicable only for receive completion queues.
+ * See enum virtchnl2_rxq_flags.
+ * @rx_buffer_low_watermark: Rx buffer low watermark.
+ * @rx_bufq1_id: Buffer queue index of the first buffer queue associated with
+ * the Rx queue. Valid only in split queue model.
+ * @rx_bufq2_id: Buffer queue index of the second buffer queue associated with
+ * the Rx queue. Valid only in split queue model.
+ * @bufq2_ena: It indicates if there is a second buffer, rx_bufq2_id is valid
+ * only if this field is set.
+ * @pad1: Padding.
+ * @ingress_pasid: Ingress PASID.
+ * @ingress_hdr_pasid: Ingress PASID header.
+ * @ingress_buf_pasid: Ingress PASID buffer.
+ * @pad2: Padding for future extensions.
+ */
+struct virtchnl2_rxq_info {
+ __le64 desc_ids;
+ __le64 dma_ring_addr;
+ __le32 type;
+ __le32 queue_id;
+ __le16 model;
+ __le16 hdr_buffer_size;
+ __le32 data_buffer_size;
+ __le32 max_pkt_size;
+ __le16 ring_len;
+ u8 buffer_notif_stride;
+ u8 pad;
+ __le64 dma_head_wb_addr;
+ __le16 qflags;
+ __le16 rx_buffer_low_watermark;
+ __le16 rx_bufq1_id;
+ __le16 rx_bufq2_id;
+ u8 bufq2_ena;
+ u8 pad1[3];
+ __le32 ingress_pasid;
+ __le32 ingress_hdr_pasid;
+ __le32 ingress_buf_pasid;
+ u8 pad2[16];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(88, virtchnl2_rxq_info);
+
+/**
+ * struct virtchnl2_config_rx_queues - Rx queues config.
+ * @vport_id: Vport id.
+ * @num_qinfo: Number of instances.
+ * @pad: Padding.
+ * @qinfo: Rx queues config info.
+ *
+ * PF sends this message to set up parameters for one or more receive queues.
+ * This message contains an array of num_qinfo instances of virtchnl2_rxq_info
+ * structures. CP configures requested queues and returns a status code.
+ * If the number of queues specified is greater than the number of queues
+ * associated with the vport, an error is returned and no queues are configured.
+ *
+ * Associated with VIRTCHNL2_OP_CONFIG_RX_QUEUES.
+ */
+struct virtchnl2_config_rx_queues {
+ __le32 vport_id;
+ __le16 num_qinfo;
+ u8 pad[18];
+ struct virtchnl2_rxq_info qinfo[];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(24, virtchnl2_config_rx_queues);
+
+/**
+ * struct virtchnl2_add_queues - data for VIRTCHNL2_OP_ADD_QUEUES.
+ * @vport_id: Vport id.
+ * @num_tx_q: Number of Tx qieues.
+ * @num_tx_complq: Number of Tx completion queues.
+ * @num_rx_q: Number of Rx queues.
+ * @num_rx_bufq: Number of Rx buffer queues.
+ * @pad: Padding.
+ * @chunks: Chunks of contiguous queues.
+ *
+ * PF sends this message to request additional transmit/receive queues beyond
+ * the ones that were assigned via CREATE_VPORT request. virtchnl2_add_queues
+ * structure is used to specify the number of each type of queues.
+ * CP responds with the same structure with the actual number of queues assigned
+ * followed by num_chunks of virtchnl2_queue_chunk structures.
+ *
+ * Associated with VIRTCHNL2_OP_ADD_QUEUES.
+ */
+struct virtchnl2_add_queues {
+ __le32 vport_id;
+ __le16 num_tx_q;
+ __le16 num_tx_complq;
+ __le16 num_rx_q;
+ __le16 num_rx_bufq;
+ u8 pad[4];
+ struct virtchnl2_queue_reg_chunks chunks;
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(24, virtchnl2_add_queues);
+
+/**
+ * struct virtchnl2_vector_chunk - Structure to specify a chunk of contiguous
+ * interrupt vectors.
+ * @start_vector_id: Start vector id.
+ * @start_evv_id: Start EVV id.
+ * @num_vectors: Number of vectors.
+ * @pad: Padding.
+ * @dynctl_reg_start: DYN_CTL register offset.
+ * @dynctl_reg_spacing: register spacing between DYN_CTL registers of 2
+ * consecutive vectors.
+ * @itrn_reg_start: ITRN register offset.
+ * @itrn_reg_spacing: Register spacing between dynctl registers of 2
+ * consecutive vectors.
+ * @itrn_index_spacing: Register spacing between itrn registers of the same
+ * vector where n=0..2.
+ * @pad1: Padding for future extensions.
+ *
+ * Register offsets and spacing provided by CP.
+ * Dynamic control registers are used for enabling/disabling/re-enabling
+ * interrupts and updating interrupt rates in the hotpath. Any changes
+ * to interrupt rates in the dynamic control registers will be reflected
+ * in the interrupt throttling rate registers.
+ * itrn registers are used to update interrupt rates for specific
+ * interrupt indices without modifying the state of the interrupt.
+ */
+struct virtchnl2_vector_chunk {
+ __le16 start_vector_id;
+ __le16 start_evv_id;
+ __le16 num_vectors;
+ __le16 pad;
+ __le32 dynctl_reg_start;
+ __le32 dynctl_reg_spacing;
+ __le32 itrn_reg_start;
+ __le32 itrn_reg_spacing;
+ __le32 itrn_index_spacing;
+ u8 pad1[4];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(32, virtchnl2_vector_chunk);
+
+/**
+ * struct virtchnl2_vector_chunks - chunks of contiguous interrupt vectors.
+ * @num_vchunks: number of vector chunks.
+ * @pad: Padding.
+ * @vchunks: Chunks of contiguous vector info.
+ *
+ * PF sends virtchnl2_vector_chunks struct to specify the vectors it is giving
+ * away. CP performs requested action and returns status.
+ *
+ * Associated with VIRTCHNL2_OP_DEALLOC_VECTORS.
+ */
+struct virtchnl2_vector_chunks {
+ __le16 num_vchunks;
+ u8 pad[14];
+ struct virtchnl2_vector_chunk vchunks[];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_vector_chunks);
+
+/**
+ * struct virtchnl2_alloc_vectors - vector allocation info.
+ * @num_vectors: Number of vectors.
+ * @pad: Padding.
+ * @vchunks: Chunks of contiguous vector info.
+ *
+ * PF sends this message to request additional interrupt vectors beyond the
+ * ones that were assigned via GET_CAPS request. virtchnl2_alloc_vectors
+ * structure is used to specify the number of vectors requested. CP responds
+ * with the same structure with the actual number of vectors assigned followed
+ * by virtchnl2_vector_chunks structure identifying the vector ids.
+ *
+ * Associated with VIRTCHNL2_OP_ALLOC_VECTORS.
+ */
+struct virtchnl2_alloc_vectors {
+ __le16 num_vectors;
+ u8 pad[14];
+ struct virtchnl2_vector_chunks vchunks;
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(32, virtchnl2_alloc_vectors);
+
+/**
+ * struct virtchnl2_rss_lut - RSS LUT info.
+ * @vport_id: Vport id.
+ * @lut_entries_start: Start of LUT entries.
+ * @lut_entries: Number of LUT entrties.
+ * @pad: Padding.
+ * @lut: RSS lookup table.
+ *
+ * PF sends this message to get or set RSS lookup table. Only supported if
+ * both PF and CP drivers set the VIRTCHNL2_CAP_RSS bit during configuration
+ * negotiation.
+ *
+ * Associated with VIRTCHNL2_OP_GET_RSS_LUT and VIRTCHNL2_OP_SET_RSS_LUT.
+ */
+struct virtchnl2_rss_lut {
+ __le32 vport_id;
+ __le16 lut_entries_start;
+ __le16 lut_entries;
+ u8 pad[4];
+ __le32 lut[];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(12, virtchnl2_rss_lut);
+
+/**
+ * struct virtchnl2_rss_hash - RSS hash info.
+ * @ptype_groups: Packet type groups bitmap.
+ * @vport_id: Vport id.
+ * @pad: Padding for future extensions.
+ *
+ * PF sends these messages to get and set the hash filter enable bits for RSS.
+ * By default, the CP sets these to all possible traffic types that the
+ * hardware supports. The PF can query this value if it wants to change the
+ * traffic types that are hashed by the hardware.
+ * Only supported if both PF and CP drivers set the VIRTCHNL2_CAP_RSS bit
+ * during configuration negotiation.
+ *
+ * Associated with VIRTCHNL2_OP_GET_RSS_HASH and VIRTCHNL2_OP_SET_RSS_HASH
+ */
+struct virtchnl2_rss_hash {
+ __le64 ptype_groups;
+ __le32 vport_id;
+ u8 pad[4];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_rss_hash);
+
+/**
+ * struct virtchnl2_sriov_vfs_info - VFs info.
+ * @num_vfs: Number of VFs.
+ * @pad: Padding for future extensions.
+ *
+ * This message is used to set number of SRIOV VFs to be created. The actual
+ * allocation of resources for the VFs in terms of vport, queues and interrupts
+ * is done by CP. When this call completes, the IDPF driver calls
+ * pci_enable_sriov to let the OS instantiate the SRIOV PCIE devices.
+ * The number of VFs set to 0 will destroy all the VFs of this function.
+ *
+ * Associated with VIRTCHNL2_OP_SET_SRIOV_VFS.
+ */
+struct virtchnl2_sriov_vfs_info {
+ __le16 num_vfs;
+ __le16 pad;
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(4, virtchnl2_sriov_vfs_info);
+
+/**
+ * struct virtchnl2_ptype - Packet type info.
+ * @ptype_id_10: 10-bit packet type.
+ * @ptype_id_8: 8-bit packet type.
+ * @proto_id_count: Number of protocol ids the packet supports, maximum of 32
+ * protocol ids are supported.
+ * @pad: Padding.
+ * @proto_id: proto_id_count decides the allocation of protocol id array.
+ * See enum virtchnl2_proto_hdr_type.
+ *
+ * Based on the descriptor type the PF supports, CP fills ptype_id_10 or
+ * ptype_id_8 for flex and base descriptor respectively. If ptype_id_10 value
+ * is set to 0xFFFF, PF should consider this ptype as dummy one and it is the
+ * last ptype.
+ */
+struct virtchnl2_ptype {
+ __le16 ptype_id_10;
+ u8 ptype_id_8;
+ u8 proto_id_count;
+ __le16 pad;
+ __le16 proto_id[];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(6, virtchnl2_ptype);
+
+/**
+ * struct virtchnl2_get_ptype_info - Packet type info.
+ * @start_ptype_id: Starting ptype ID.
+ * @num_ptypes: Number of packet types from start_ptype_id.
+ * @pad: Padding for future extensions.
+ *
+ * The total number of supported packet types is based on the descriptor type.
+ * For the flex descriptor, it is 1024 (10-bit ptype), and for the base
+ * descriptor, it is 256 (8-bit ptype). Send this message to the CP by
+ * populating the 'start_ptype_id' and the 'num_ptypes'. CP responds with the
+ * 'start_ptype_id', 'num_ptypes', and the array of ptype (virtchnl2_ptype) that
+ * are added at the end of the 'virtchnl2_get_ptype_info' message (Note: There
+ * is no specific field for the ptypes but are added at the end of the
+ * ptype info message. PF/VF is expected to extract the ptypes accordingly.
+ * Reason for doing this is because compiler doesn't allow nested flexible
+ * array fields).
+ *
+ * If all the ptypes don't fit into one mailbox buffer, CP splits the
+ * ptype info into multiple messages, where each message will have its own
+ * 'start_ptype_id', 'num_ptypes', and the ptype array itself. When CP is done
+ * updating all the ptype information extracted from the package (the number of
+ * ptypes extracted might be less than what PF/VF expects), it will append a
+ * dummy ptype (which has 'ptype_id_10' of 'struct virtchnl2_ptype' as 0xFFFF)
+ * to the ptype array.
+ *
+ * PF/VF is expected to receive multiple VIRTCHNL2_OP_GET_PTYPE_INFO messages.
+ *
+ * Associated with VIRTCHNL2_OP_GET_PTYPE_INFO.
+ */
+struct virtchnl2_get_ptype_info {
+ __le16 start_ptype_id;
+ __le16 num_ptypes;
+ __le32 pad;
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_get_ptype_info);
+
+/**
+ * struct virtchnl2_vport_stats - Vport statistics.
+ * @vport_id: Vport id.
+ * @pad: Padding.
+ * @rx_bytes: Received bytes.
+ * @rx_unicast: Received unicast packets.
+ * @rx_multicast: Received multicast packets.
+ * @rx_broadcast: Received broadcast packets.
+ * @rx_discards: Discarded packets on receive.
+ * @rx_errors: Receive errors.
+ * @rx_unknown_protocol: Unlnown protocol.
+ * @tx_bytes: Transmitted bytes.
+ * @tx_unicast: Transmitted unicast packets.
+ * @tx_multicast: Transmitted multicast packets.
+ * @tx_broadcast: Transmitted broadcast packets.
+ * @tx_discards: Discarded packets on transmit.
+ * @tx_errors: Transmit errors.
+ * @rx_invalid_frame_length: Packets with invalid frame length.
+ * @rx_overflow_drop: Packets dropped on buffer overflow.
+ *
+ * PF/VF sends this message to CP to get the update stats by specifying the
+ * vport_id. CP responds with stats in struct virtchnl2_vport_stats.
+ *
+ * Associated with VIRTCHNL2_OP_GET_STATS.
+ */
+struct virtchnl2_vport_stats {
+ __le32 vport_id;
+ u8 pad[4];
+ __le64 rx_bytes;
+ __le64 rx_unicast;
+ __le64 rx_multicast;
+ __le64 rx_broadcast;
+ __le64 rx_discards;
+ __le64 rx_errors;
+ __le64 rx_unknown_protocol;
+ __le64 tx_bytes;
+ __le64 tx_unicast;
+ __le64 tx_multicast;
+ __le64 tx_broadcast;
+ __le64 tx_discards;
+ __le64 tx_errors;
+ __le64 rx_invalid_frame_length;
+ __le64 rx_overflow_drop;
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(128, virtchnl2_vport_stats);
+
+/**
+ * struct virtchnl2_event - Event info.
+ * @event: Event opcode. See enum virtchnl2_event_codes.
+ * @link_speed: Link_speed provided in Mbps.
+ * @vport_id: Vport ID.
+ * @link_status: Link status.
+ * @pad: Padding.
+ * @reserved: Reserved.
+ *
+ * CP sends this message to inform the PF/VF driver of events that may affect
+ * it. No direct response is expected from the driver, though it may generate
+ * other messages in response to this one.
+ *
+ * Associated with VIRTCHNL2_OP_EVENT.
+ */
+struct virtchnl2_event {
+ __le32 event;
+ __le32 link_speed;
+ __le32 vport_id;
+ u8 link_status;
+ u8 pad;
+ __le16 reserved;
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_event);
+
+/**
+ * struct virtchnl2_rss_key - RSS key info.
+ * @vport_id: Vport id.
+ * @key_len: Length of RSS key.
+ * @pad: Padding.
+ * @key_flex: RSS hash key, packed bytes.
+ * PF/VF sends this message to get or set RSS key. Only supported if both
+ * PF/VF and CP drivers set the VIRTCHNL2_CAP_RSS bit during configuration
+ * negotiation.
+ *
+ * Associated with VIRTCHNL2_OP_GET_RSS_KEY and VIRTCHNL2_OP_SET_RSS_KEY.
+ */
+struct virtchnl2_rss_key {
+ __le32 vport_id;
+ __le16 key_len;
+ u8 pad;
+ __DECLARE_FLEX_ARRAY(u8, key_flex);
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_rss_key);
+
+/**
+ * struct virtchnl2_queue_chunk - chunk of contiguous queues
+ * @type: See enum virtchnl2_queue_type.
+ * @start_queue_id: Starting queue id.
+ * @num_queues: Number of queues.
+ * @pad: Padding for future extensions.
+ */
+struct virtchnl2_queue_chunk {
+ __le32 type;
+ __le32 start_queue_id;
+ __le32 num_queues;
+ u8 pad[4];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_queue_chunk);
+
+/* struct virtchnl2_queue_chunks - chunks of contiguous queues
+ * @num_chunks: Number of chunks.
+ * @pad: Padding.
+ * @chunks: Chunks of contiguous queues info.
+ */
+struct virtchnl2_queue_chunks {
+ __le16 num_chunks;
+ u8 pad[6];
+ struct virtchnl2_queue_chunk chunks[];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_queue_chunks);
+
+/**
+ * struct virtchnl2_del_ena_dis_queues - Enable/disable queues info.
+ * @vport_id: Vport id.
+ * @pad: Padding.
+ * @chunks: Chunks of contiguous queues info.
+ *
+ * PF sends these messages to enable, disable or delete queues specified in
+ * chunks. PF sends virtchnl2_del_ena_dis_queues struct to specify the queues
+ * to be enabled/disabled/deleted. Also applicable to single queue receive or
+ * transmit. CP performs requested action and returns status.
+ *
+ * Associated with VIRTCHNL2_OP_ENABLE_QUEUES, VIRTCHNL2_OP_DISABLE_QUEUES and
+ * VIRTCHNL2_OP_DISABLE_QUEUES.
+ */
+struct virtchnl2_del_ena_dis_queues {
+ __le32 vport_id;
+ u8 pad[4];
+ struct virtchnl2_queue_chunks chunks;
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_del_ena_dis_queues);
+
+/**
+ * struct virtchnl2_queue_vector - Queue to vector mapping.
+ * @queue_id: Queue id.
+ * @vector_id: Vector id.
+ * @pad: Padding.
+ * @itr_idx: See enum virtchnl2_itr_idx.
+ * @queue_type: See enum virtchnl2_queue_type.
+ * @pad1: Padding for future extensions.
+ */
+struct virtchnl2_queue_vector {
+ __le32 queue_id;
+ __le16 vector_id;
+ u8 pad[2];
+ __le32 itr_idx;
+ __le32 queue_type;
+ u8 pad1[8];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(24, virtchnl2_queue_vector);
+
+/**
+ * struct virtchnl2_queue_vector_maps - Map/unmap queues info.
+ * @vport_id: Vport id.
+ * @num_qv_maps: Number of queue vector maps.
+ * @pad: Padding.
+ * @qv_maps: Queue to vector maps.
+ *
+ * PF sends this message to map or unmap queues to vectors and interrupt
+ * throttling rate index registers. External data buffer contains
+ * virtchnl2_queue_vector_maps structure that contains num_qv_maps of
+ * virtchnl2_queue_vector structures. CP maps the requested queue vector maps
+ * after validating the queue and vector ids and returns a status code.
+ *
+ * Associated with VIRTCHNL2_OP_MAP_QUEUE_VECTOR and
+ * VIRTCHNL2_OP_UNMAP_QUEUE_VECTOR.
+ */
+struct virtchnl2_queue_vector_maps {
+ __le32 vport_id;
+ __le16 num_qv_maps;
+ u8 pad[10];
+ struct virtchnl2_queue_vector qv_maps[];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_queue_vector_maps);
+
+/**
+ * struct virtchnl2_loopback - Loopback info.
+ * @vport_id: Vport id.
+ * @enable: Enable/disable.
+ * @pad: Padding for future extensions.
+ *
+ * PF/VF sends this message to transition to/from the loopback state. Setting
+ * the 'enable' to 1 enables the loopback state and setting 'enable' to 0
+ * disables it. CP configures the state to loopback and returns status.
+ *
+ * Associated with VIRTCHNL2_OP_LOOPBACK.
+ */
+struct virtchnl2_loopback {
+ __le32 vport_id;
+ u8 enable;
+ u8 pad[3];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_loopback);
+
+/* struct virtchnl2_mac_addr - MAC address info.
+ * @addr: MAC address.
+ * @type: MAC type. See enum virtchnl2_mac_addr_type.
+ * @pad: Padding for future extensions.
+ */
+struct virtchnl2_mac_addr {
+ u8 addr[ETH_ALEN];
+ u8 type;
+ u8 pad;
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_mac_addr);
+
+/**
+ * struct virtchnl2_mac_addr_list - List of MAC addresses.
+ * @vport_id: Vport id.
+ * @num_mac_addr: Number of MAC addresses.
+ * @pad: Padding.
+ * @mac_addr_list: List with MAC address info.
+ *
+ * PF/VF driver uses this structure to send list of MAC addresses to be
+ * added/deleted to the CP where as CP performs the action and returns the
+ * status.
+ *
+ * Associated with VIRTCHNL2_OP_ADD_MAC_ADDR and VIRTCHNL2_OP_DEL_MAC_ADDR.
+ */
+struct virtchnl2_mac_addr_list {
+ __le32 vport_id;
+ __le16 num_mac_addr;
+ u8 pad[2];
+ struct virtchnl2_mac_addr mac_addr_list[];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_mac_addr_list);
+
+/**
+ * struct virtchnl2_promisc_info - Promisc type info.
+ * @vport_id: Vport id.
+ * @flags: See enum virtchnl2_promisc_flags.
+ * @pad: Padding for future extensions.
+ *
+ * PF/VF sends vport id and flags to the CP where as CP performs the action
+ * and returns the status.
+ *
+ * Associated with VIRTCHNL2_OP_CONFIG_PROMISCUOUS_MODE.
+ */
+struct virtchnl2_promisc_info {
+ __le32 vport_id;
+ /* See VIRTCHNL2_PROMISC_FLAGS definitions */
+ __le16 flags;
+ u8 pad[2];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_promisc_info);
+
+#endif /* _VIRTCHNL_2_H_ */
diff --git a/drivers/net/ethernet/intel/idpf/virtchnl2_lan_desc.h b/drivers/net/ethernet/intel/idpf/virtchnl2_lan_desc.h
new file mode 100644
index 000000000000..f1b577f1c452
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/virtchnl2_lan_desc.h
@@ -0,0 +1,451 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2023 Intel Corporation */
+
+#ifndef _VIRTCHNL2_LAN_DESC_H_
+#define _VIRTCHNL2_LAN_DESC_H_
+
+#include <linux/bits.h>
+
+/* This is an interface definition file where existing enums and their values
+ * must remain unchanged over time, so we specify explicit values for all enums.
+ */
+
+/* Transmit descriptor ID flags
+ */
+enum virtchnl2_tx_desc_ids {
+ VIRTCHNL2_TXDID_DATA = BIT(0),
+ VIRTCHNL2_TXDID_CTX = BIT(1),
+ /* TXDID bit 2 is reserved
+ * TXDID bit 3 is free for future use
+ * TXDID bit 4 is reserved
+ */
+ VIRTCHNL2_TXDID_FLEX_TSO_CTX = BIT(5),
+ /* TXDID bit 6 is reserved */
+ VIRTCHNL2_TXDID_FLEX_L2TAG1_L2TAG2 = BIT(7),
+ /* TXDID bits 8 and 9 are free for future use
+ * TXDID bit 10 is reserved
+ * TXDID bit 11 is free for future use
+ */
+ VIRTCHNL2_TXDID_FLEX_FLOW_SCHED = BIT(12),
+ /* TXDID bits 13 and 14 are free for future use */
+ VIRTCHNL2_TXDID_DESC_DONE = BIT(15),
+};
+
+/* Receive descriptor IDs */
+enum virtchnl2_rx_desc_ids {
+ VIRTCHNL2_RXDID_1_32B_BASE = 1,
+ /* FLEX_SQ_NIC and FLEX_SPLITQ share desc ids because they can be
+ * differentiated based on queue model; e.g. single queue model can
+ * only use FLEX_SQ_NIC and split queue model can only use FLEX_SPLITQ
+ * for DID 2.
+ */
+ VIRTCHNL2_RXDID_2_FLEX_SPLITQ = 2,
+ VIRTCHNL2_RXDID_2_FLEX_SQ_NIC = VIRTCHNL2_RXDID_2_FLEX_SPLITQ,
+ /* 3 through 6 are reserved */
+ VIRTCHNL2_RXDID_7_HW_RSVD = 7,
+ /* 8 through 15 are free */
+};
+
+/* Receive descriptor ID bitmasks */
+#define VIRTCHNL2_RXDID_M(bit) BIT_ULL(VIRTCHNL2_RXDID_##bit)
+
+enum virtchnl2_rx_desc_id_bitmasks {
+ VIRTCHNL2_RXDID_1_32B_BASE_M = VIRTCHNL2_RXDID_M(1_32B_BASE),
+ VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M = VIRTCHNL2_RXDID_M(2_FLEX_SPLITQ),
+ VIRTCHNL2_RXDID_2_FLEX_SQ_NIC_M = VIRTCHNL2_RXDID_M(2_FLEX_SQ_NIC),
+ VIRTCHNL2_RXDID_7_HW_RSVD_M = VIRTCHNL2_RXDID_M(7_HW_RSVD),
+};
+
+/* For splitq virtchnl2_rx_flex_desc_adv_nic_3 desc members */
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_RXDID_M GENMASK(3, 0)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_UMBCAST_M GENMASK(7, 6)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_M GENMASK(9, 0)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_RAW_CSUM_INV_S 12
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_RAW_CSUM_INV_M \
+ BIT_ULL(VIRTCHNL2_RX_FLEX_DESC_ADV_RAW_CSUM_INV_S)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_FF0_M GENMASK(15, 13)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_PBUF_M GENMASK(13, 0)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_S 14
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_M \
+ BIT_ULL(VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_S)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_S 15
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_M \
+ BIT_ULL(VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_S)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_HDR_M GENMASK(9, 0)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_S 10
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_M \
+ BIT_ULL(VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_S)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_SPH_S 11
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_SPH_M \
+ BIT_ULL(VIRTCHNL2_RX_FLEX_DESC_ADV_SPH_S)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_FF1_S 12
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_FF1_M GENMASK(14, 12)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_MISS_S 15
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_MISS_M \
+ BIT_ULL(VIRTCHNL2_RX_FLEX_DESC_ADV_MISS_S)
+
+/* Bitmasks for splitq virtchnl2_rx_flex_desc_adv_nic_3 */
+enum virtchl2_rx_flex_desc_adv_status_error_0_qw1_bits {
+ VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_DD_M = BIT(0),
+ VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_EOF_M = BIT(1),
+ VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_HBO_M = BIT(2),
+ VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_L3L4P_M = BIT(3),
+ VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_IPE_M = BIT(4),
+ VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_L4E_M = BIT(5),
+ VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EIPE_M = BIT(6),
+ VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EUDPE_M = BIT(7),
+};
+
+/* Bitmasks for splitq virtchnl2_rx_flex_desc_adv_nic_3 */
+enum virtchnl2_rx_flex_desc_adv_status_error_0_qw0_bits {
+ VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_LPBK_M = BIT(0),
+ VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_IPV6EXADD_M = BIT(1),
+ VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_RXE_M = BIT(2),
+ VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_CRCP_M = BIT(3),
+ VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_RSS_VALID_M = BIT(4),
+ VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_L2TAG1P_M = BIT(5),
+ VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XTRMD0_VALID_M = BIT(6),
+ VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XTRMD1_VALID_M = BIT(7),
+};
+
+/* Bitmasks for splitq virtchnl2_rx_flex_desc_adv_nic_3 */
+enum virtchnl2_rx_flex_desc_adv_status_error_1_bits {
+ VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS1_RSVD_M = GENMASK(1, 0),
+ VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS1_ATRAEFAIL_M = BIT(2),
+ VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS1_L2TAG2P_M = BIT(3),
+ VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS1_XTRMD2_VALID_M = BIT(4),
+ VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS1_XTRMD3_VALID_M = BIT(5),
+ VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS1_XTRMD4_VALID_M = BIT(6),
+ VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS1_XTRMD5_VALID_M = BIT(7),
+};
+
+/* For singleq (flex) virtchnl2_rx_flex_desc fields
+ * For virtchnl2_rx_flex_desc.ptype_flex_flags0 member
+ */
+#define VIRTCHNL2_RX_FLEX_DESC_PTYPE_M GENMASK(9, 0)
+
+/* For virtchnl2_rx_flex_desc.pkt_len member */
+#define VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_M GENMASK(13, 0)
+
+/* Bitmasks for singleq (flex) virtchnl2_rx_flex_desc */
+enum virtchnl2_rx_flex_desc_status_error_0_bits {
+ VIRTCHNL2_RX_FLEX_DESC_STATUS0_DD_M = BIT(0),
+ VIRTCHNL2_RX_FLEX_DESC_STATUS0_EOF_M = BIT(1),
+ VIRTCHNL2_RX_FLEX_DESC_STATUS0_HBO_M = BIT(2),
+ VIRTCHNL2_RX_FLEX_DESC_STATUS0_L3L4P_M = BIT(3),
+ VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_IPE_M = BIT(4),
+ VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_L4E_M = BIT(5),
+ VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EIPE_M = BIT(6),
+ VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_M = BIT(7),
+ VIRTCHNL2_RX_FLEX_DESC_STATUS0_LPBK_M = BIT(8),
+ VIRTCHNL2_RX_FLEX_DESC_STATUS0_IPV6EXADD_M = BIT(9),
+ VIRTCHNL2_RX_FLEX_DESC_STATUS0_RXE_M = BIT(10),
+ VIRTCHNL2_RX_FLEX_DESC_STATUS0_CRCP_M = BIT(11),
+ VIRTCHNL2_RX_FLEX_DESC_STATUS0_RSS_VALID_M = BIT(12),
+ VIRTCHNL2_RX_FLEX_DESC_STATUS0_L2TAG1P_M = BIT(13),
+ VIRTCHNL2_RX_FLEX_DESC_STATUS0_XTRMD0_VALID_M = BIT(14),
+ VIRTCHNL2_RX_FLEX_DESC_STATUS0_XTRMD1_VALID_M = BIT(15),
+};
+
+/* Bitmasks for singleq (flex) virtchnl2_rx_flex_desc */
+enum virtchnl2_rx_flex_desc_status_error_1_bits {
+ VIRTCHNL2_RX_FLEX_DESC_STATUS1_CPM_M = GENMASK(3, 0),
+ VIRTCHNL2_RX_FLEX_DESC_STATUS1_NAT_M = BIT(4),
+ VIRTCHNL2_RX_FLEX_DESC_STATUS1_CRYPTO_M = BIT(5),
+ /* [10:6] reserved */
+ VIRTCHNL2_RX_FLEX_DESC_STATUS1_L2TAG2P_M = BIT(11),
+ VIRTCHNL2_RX_FLEX_DESC_STATUS1_XTRMD2_VALID_M = BIT(12),
+ VIRTCHNL2_RX_FLEX_DESC_STATUS1_XTRMD3_VALID_M = BIT(13),
+ VIRTCHNL2_RX_FLEX_DESC_STATUS1_XTRMD4_VALID_M = BIT(14),
+ VIRTCHNL2_RX_FLEX_DESC_STATUS1_XTRMD5_VALID_M = BIT(15),
+};
+
+/* For virtchnl2_rx_flex_desc.ts_low member */
+#define VIRTCHNL2_RX_FLEX_TSTAMP_VALID BIT(0)
+
+/* For singleq (non flex) virtchnl2_singleq_base_rx_desc legacy desc members */
+#define VIRTCHNL2_RX_BASE_DESC_QW1_LEN_PBUF_M GENMASK_ULL(51, 38)
+#define VIRTCHNL2_RX_BASE_DESC_QW1_PTYPE_M GENMASK_ULL(37, 30)
+#define VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M GENMASK_ULL(26, 19)
+#define VIRTCHNL2_RX_BASE_DESC_QW1_STATUS_M GENMASK_ULL(18, 0)
+
+/* Bitmasks for singleq (base) virtchnl2_rx_base_desc */
+enum virtchnl2_rx_base_desc_status_bits {
+ VIRTCHNL2_RX_BASE_DESC_STATUS_DD_M = BIT(0),
+ VIRTCHNL2_RX_BASE_DESC_STATUS_EOF_M = BIT(1),
+ VIRTCHNL2_RX_BASE_DESC_STATUS_L2TAG1P_M = BIT(2),
+ VIRTCHNL2_RX_BASE_DESC_STATUS_L3L4P_M = BIT(3),
+ VIRTCHNL2_RX_BASE_DESC_STATUS_CRCP_M = BIT(4),
+ VIRTCHNL2_RX_BASE_DESC_STATUS_RSVD_M = GENMASK(7, 5),
+ VIRTCHNL2_RX_BASE_DESC_STATUS_EXT_UDP_0_M = BIT(8),
+ VIRTCHNL2_RX_BASE_DESC_STATUS_UMBCAST_M = GENMASK(10, 9),
+ VIRTCHNL2_RX_BASE_DESC_STATUS_FLM_M = BIT(11),
+ VIRTCHNL2_RX_BASE_DESC_STATUS_FLTSTAT_M = GENMASK(13, 12),
+ VIRTCHNL2_RX_BASE_DESC_STATUS_LPBK_M = BIT(14),
+ VIRTCHNL2_RX_BASE_DESC_STATUS_IPV6EXADD_M = BIT(15),
+ VIRTCHNL2_RX_BASE_DESC_STATUS_RSVD1_M = GENMASK(17, 16),
+ VIRTCHNL2_RX_BASE_DESC_STATUS_INT_UDP_0_M = BIT(18),
+};
+
+/* Bitmasks for singleq (base) virtchnl2_rx_base_desc */
+enum virtchnl2_rx_base_desc_error_bits {
+ VIRTCHNL2_RX_BASE_DESC_ERROR_RXE_M = BIT(0),
+ VIRTCHNL2_RX_BASE_DESC_ERROR_ATRAEFAIL_M = BIT(1),
+ VIRTCHNL2_RX_BASE_DESC_ERROR_HBO_M = BIT(2),
+ VIRTCHNL2_RX_BASE_DESC_ERROR_L3L4E_M = GENMASK(5, 3),
+ VIRTCHNL2_RX_BASE_DESC_ERROR_IPE_M = BIT(3),
+ VIRTCHNL2_RX_BASE_DESC_ERROR_L4E_M = BIT(4),
+ VIRTCHNL2_RX_BASE_DESC_ERROR_EIPE_M = BIT(5),
+ VIRTCHNL2_RX_BASE_DESC_ERROR_OVERSIZE_M = BIT(6),
+ VIRTCHNL2_RX_BASE_DESC_ERROR_PPRS_M = BIT(7),
+};
+
+/* Bitmasks for singleq (base) virtchnl2_rx_base_desc */
+#define VIRTCHNL2_RX_BASE_DESC_FLTSTAT_RSS_HASH_M GENMASK(13, 12)
+
+/**
+ * struct virtchnl2_splitq_rx_buf_desc - SplitQ RX buffer descriptor format
+ * @qword0: RX buffer struct.
+ * @qword0.buf_id: Buffer identifier.
+ * @qword0.rsvd0: Reserved.
+ * @qword0.rsvd1: Reserved.
+ * @pkt_addr: Packet buffer address.
+ * @hdr_addr: Header buffer address.
+ * @rsvd2: Rerserved.
+ *
+ * Receive Descriptors
+ * SplitQ buffer
+ * | 16| 0|
+ * ----------------------------------------------------------------
+ * | RSV | Buffer ID |
+ * ----------------------------------------------------------------
+ * | Rx packet buffer address |
+ * ----------------------------------------------------------------
+ * | Rx header buffer address |
+ * ----------------------------------------------------------------
+ * | RSV |
+ * ----------------------------------------------------------------
+ * | 0|
+ */
+struct virtchnl2_splitq_rx_buf_desc {
+ struct {
+ __le16 buf_id;
+ __le16 rsvd0;
+ __le32 rsvd1;
+ } qword0;
+ __le64 pkt_addr;
+ __le64 hdr_addr;
+ __le64 rsvd2;
+};
+
+/**
+ * struct virtchnl2_singleq_rx_buf_desc - SingleQ RX buffer descriptor format.
+ * @pkt_addr: Packet buffer address.
+ * @hdr_addr: Header buffer address.
+ * @rsvd1: Reserved.
+ * @rsvd2: Reserved.
+ *
+ * SingleQ buffer
+ * | 0|
+ * ----------------------------------------------------------------
+ * | Rx packet buffer address |
+ * ----------------------------------------------------------------
+ * | Rx header buffer address |
+ * ----------------------------------------------------------------
+ * | RSV |
+ * ----------------------------------------------------------------
+ * | RSV |
+ * ----------------------------------------------------------------
+ * | 0|
+ */
+struct virtchnl2_singleq_rx_buf_desc {
+ __le64 pkt_addr;
+ __le64 hdr_addr;
+ __le64 rsvd1;
+ __le64 rsvd2;
+};
+
+/**
+ * struct virtchnl2_singleq_base_rx_desc - RX descriptor writeback format.
+ * @qword0: First quad word struct.
+ * @qword0.lo_dword: Lower dual word struct.
+ * @qword0.lo_dword.mirroring_status: Mirrored packet status.
+ * @qword0.lo_dword.l2tag1: Stripped L2 tag from the received packet.
+ * @qword0.hi_dword: High dual word union.
+ * @qword0.hi_dword.rss: RSS hash.
+ * @qword0.hi_dword.fd_id: Flow director filter id.
+ * @qword1: Second quad word struct.
+ * @qword1.status_error_ptype_len: Status/error/PTYPE/length.
+ * @qword2: Third quad word struct.
+ * @qword2.ext_status: Extended status.
+ * @qword2.rsvd: Reserved.
+ * @qword2.l2tag2_1: Extracted L2 tag 2 from the packet.
+ * @qword2.l2tag2_2: Reserved.
+ * @qword3: Fourth quad word struct.
+ * @qword3.reserved: Reserved.
+ * @qword3.fd_id: Flow director filter id.
+ *
+ * Profile ID 0x1, SingleQ, base writeback format
+ */
+struct virtchnl2_singleq_base_rx_desc {
+ struct {
+ struct {
+ __le16 mirroring_status;
+ __le16 l2tag1;
+ } lo_dword;
+ union {
+ __le32 rss;
+ __le32 fd_id;
+ } hi_dword;
+ } qword0;
+ struct {
+ __le64 status_error_ptype_len;
+ } qword1;
+ struct {
+ __le16 ext_status;
+ __le16 rsvd;
+ __le16 l2tag2_1;
+ __le16 l2tag2_2;
+ } qword2;
+ struct {
+ __le32 reserved;
+ __le32 fd_id;
+ } qword3;
+};
+
+/**
+ * struct virtchnl2_rx_flex_desc_nic - RX descriptor writeback format.
+ *
+ * @rxdid: Descriptor builder profile id.
+ * @mir_id_umb_cast: umb_cast=[7:6], mirror=[5:0]
+ * @ptype_flex_flags0: ff0=[15:10], ptype=[9:0]
+ * @pkt_len: Packet length, [15:14] are reserved.
+ * @hdr_len_sph_flex_flags1: ff1/ext=[15:12], sph=[11], header=[10:0].
+ * @status_error0: Status/Error section 0.
+ * @l2tag1: Stripped L2 tag from the received packet
+ * @rss_hash: RSS hash.
+ * @status_error1: Status/Error section 1.
+ * @flexi_flags2: Flexible flags section 2.
+ * @ts_low: Lower word of timestamp value.
+ * @l2tag2_1st: First L2TAG2.
+ * @l2tag2_2nd: Second L2TAG2.
+ * @flow_id: Flow id.
+ * @flex_ts: Timestamp and flexible flow id union.
+ * @flex_ts.ts_high: Timestamp higher word of the timestamp value.
+ * @flex_ts.flex.rsvd: Reserved.
+ * @flex_ts.flex.flow_id_ipv6: IPv6 flow id.
+ *
+ * Profile ID 0x2, SingleQ, flex writeback format
+ */
+struct virtchnl2_rx_flex_desc_nic {
+ /* Qword 0 */
+ u8 rxdid;
+ u8 mir_id_umb_cast;
+ __le16 ptype_flex_flags0;
+ __le16 pkt_len;
+ __le16 hdr_len_sph_flex_flags1;
+ /* Qword 1 */
+ __le16 status_error0;
+ __le16 l2tag1;
+ __le32 rss_hash;
+ /* Qword 2 */
+ __le16 status_error1;
+ u8 flexi_flags2;
+ u8 ts_low;
+ __le16 l2tag2_1st;
+ __le16 l2tag2_2nd;
+ /* Qword 3 */
+ __le32 flow_id;
+ union {
+ struct {
+ __le16 rsvd;
+ __le16 flow_id_ipv6;
+ } flex;
+ __le32 ts_high;
+ } flex_ts;
+};
+
+/**
+ * struct virtchnl2_rx_flex_desc_adv_nic_3 - RX descriptor writeback format.
+ * @rxdid_ucast: ucast=[7:6], rsvd=[5:4], profile_id=[3:0].
+ * @status_err0_qw0: Status/Error section 0 in quad word 0.
+ * @ptype_err_fflags0: ff0=[15:12], udp_len_err=[11], ip_hdr_err=[10],
+ * ptype=[9:0].
+ * @pktlen_gen_bufq_id: bufq_id=[15] only in splitq, gen=[14] only in splitq,
+ * plen=[13:0].
+ * @hdrlen_flags: miss_prepend=[15], trunc_mirr=[14], int_udp_0=[13],
+ * ext_udp0=[12], sph=[11] only in splitq, rsc=[10]
+ * only in splitq, header=[9:0].
+ * @status_err0_qw1: Status/Error section 0 in quad word 1.
+ * @status_err1: Status/Error section 1.
+ * @fflags1: Flexible flags section 1.
+ * @ts_low: Lower word of timestamp value.
+ * @buf_id: Buffer identifier. Only in splitq mode.
+ * @misc: Union.
+ * @misc.raw_cs: Raw checksum.
+ * @misc.l2tag1: Stripped L2 tag from the received packet
+ * @misc.rscseglen:
+ * @hash1: Lower bits of Rx hash value.
+ * @ff2_mirrid_hash2: Union.
+ * @ff2_mirrid_hash2.fflags2: Flexible flags section 2.
+ * @ff2_mirrid_hash2.mirrorid: Mirror id.
+ * @ff2_mirrid_hash2.rscseglen: RSC segment length.
+ * @hash3: Upper bits of Rx hash value.
+ * @l2tag2: Extracted L2 tag 2 from the packet.
+ * @fmd4: Flexible metadata container 4.
+ * @l2tag1: Stripped L2 tag from the received packet
+ * @fmd6: Flexible metadata container 6.
+ * @ts_high: Timestamp higher word of the timestamp value.
+ *
+ * Profile ID 0x2, SplitQ, flex writeback format
+ *
+ * Flex-field 0: BufferID
+ * Flex-field 1: Raw checksum/L2TAG1/RSC Seg Len (determined by HW)
+ * Flex-field 2: Hash[15:0]
+ * Flex-flags 2: Hash[23:16]
+ * Flex-field 3: L2TAG2
+ * Flex-field 5: L2TAG1
+ * Flex-field 7: Timestamp (upper 32 bits)
+ */
+struct virtchnl2_rx_flex_desc_adv_nic_3 {
+ /* Qword 0 */
+ u8 rxdid_ucast;
+ u8 status_err0_qw0;
+ __le16 ptype_err_fflags0;
+ __le16 pktlen_gen_bufq_id;
+ __le16 hdrlen_flags;
+ /* Qword 1 */
+ u8 status_err0_qw1;
+ u8 status_err1;
+ u8 fflags1;
+ u8 ts_low;
+ __le16 buf_id;
+ union {
+ __le16 raw_cs;
+ __le16 l2tag1;
+ __le16 rscseglen;
+ } misc;
+ /* Qword 2 */
+ __le16 hash1;
+ union {
+ u8 fflags2;
+ u8 mirrorid;
+ u8 hash2;
+ } ff2_mirrid_hash2;
+ u8 hash3;
+ __le16 l2tag2;
+ __le16 fmd4;
+ /* Qword 3 */
+ __le16 l2tag1;
+ __le16 fmd6;
+ __le32 ts_high;
+};
+
+/* Common union for accessing descriptor format structs */
+union virtchnl2_rx_desc {
+ struct virtchnl2_singleq_base_rx_desc base_wb;
+ struct virtchnl2_rx_flex_desc_nic flex_nic_wb;
+ struct virtchnl2_rx_flex_desc_adv_nic_3 flex_adv_nic_3_wb;
+};
+
+#endif /* _VIRTCHNL_LAN_DESC_H_ */
diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c
index 5f6ae11212ae..81cf3361a1e5 100644
--- a/drivers/net/ethernet/korina.c
+++ b/drivers/net/ethernet/korina.c
@@ -1380,13 +1380,11 @@ static int korina_probe(struct platform_device *pdev)
return rc;
}
-static int korina_remove(struct platform_device *pdev)
+static void korina_remove(struct platform_device *pdev)
{
struct net_device *dev = platform_get_drvdata(pdev);
unregister_netdev(dev);
-
- return 0;
}
#ifdef CONFIG_OF
@@ -1405,7 +1403,7 @@ static struct platform_driver korina_driver = {
.of_match_table = of_match_ptr(korina_match),
},
.probe = korina_probe,
- .remove = korina_remove,
+ .remove_new = korina_remove,
};
module_platform_driver(korina_driver);
diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c
index f5961bdcc480..1d5b7bb6380f 100644
--- a/drivers/net/ethernet/lantiq_etop.c
+++ b/drivers/net/ethernet/lantiq_etop.c
@@ -721,8 +721,7 @@ err_out:
return err;
}
-static int
-ltq_etop_remove(struct platform_device *pdev)
+static void ltq_etop_remove(struct platform_device *pdev)
{
struct net_device *dev = platform_get_drvdata(pdev);
@@ -732,11 +731,10 @@ ltq_etop_remove(struct platform_device *pdev)
ltq_etop_mdio_cleanup(dev);
unregister_netdev(dev);
}
- return 0;
}
static struct platform_driver ltq_mii_driver = {
- .remove = ltq_etop_remove,
+ .remove_new = ltq_etop_remove,
.driver = {
.name = "ltq_etop",
},
diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c
index 8d646c7f8c82..8bd4def3622e 100644
--- a/drivers/net/ethernet/lantiq_xrx200.c
+++ b/drivers/net/ethernet/lantiq_xrx200.c
@@ -641,7 +641,7 @@ err_uninit_dma:
return err;
}
-static int xrx200_remove(struct platform_device *pdev)
+static void xrx200_remove(struct platform_device *pdev)
{
struct xrx200_priv *priv = platform_get_drvdata(pdev);
struct net_device *net_dev = priv->net_dev;
@@ -659,8 +659,6 @@ static int xrx200_remove(struct platform_device *pdev)
/* shut down hardware */
xrx200_hw_cleanup(priv);
-
- return 0;
}
static const struct of_device_id xrx200_match[] = {
@@ -671,7 +669,7 @@ MODULE_DEVICE_TABLE(of, xrx200_match);
static struct platform_driver xrx200_driver = {
.probe = xrx200_probe,
- .remove = xrx200_remove,
+ .remove_new = xrx200_remove,
.driver = {
.name = "lantiq,xrx200-net",
.of_match_table = xrx200_match,
diff --git a/drivers/net/ethernet/litex/litex_liteeth.c b/drivers/net/ethernet/litex/litex_liteeth.c
index ffa96059079c..5182fe737c37 100644
--- a/drivers/net/ethernet/litex/litex_liteeth.c
+++ b/drivers/net/ethernet/litex/litex_liteeth.c
@@ -294,13 +294,11 @@ static int liteeth_probe(struct platform_device *pdev)
return 0;
}
-static int liteeth_remove(struct platform_device *pdev)
+static void liteeth_remove(struct platform_device *pdev)
{
struct net_device *netdev = platform_get_drvdata(pdev);
unregister_netdev(netdev);
-
- return 0;
}
static const struct of_device_id liteeth_of_match[] = {
@@ -311,7 +309,7 @@ MODULE_DEVICE_TABLE(of, liteeth_of_match);
static struct platform_driver liteeth_driver = {
.probe = liteeth_probe,
- .remove = liteeth_remove,
+ .remove_new = liteeth_remove,
.driver = {
.name = DRV_NAME,
.of_match_table = liteeth_of_match,
diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index 3b129a1c3381..f0bdc06d253d 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -2892,19 +2892,18 @@ err_put_clk:
return ret;
}
-static int mv643xx_eth_shared_remove(struct platform_device *pdev)
+static void mv643xx_eth_shared_remove(struct platform_device *pdev)
{
struct mv643xx_eth_shared_private *msp = platform_get_drvdata(pdev);
mv643xx_eth_shared_of_remove();
if (!IS_ERR(msp->clk))
clk_disable_unprepare(msp->clk);
- return 0;
}
static struct platform_driver mv643xx_eth_shared_driver = {
.probe = mv643xx_eth_shared_probe,
- .remove = mv643xx_eth_shared_remove,
+ .remove_new = mv643xx_eth_shared_remove,
.driver = {
.name = MV643XX_ETH_SHARED_NAME,
.of_match_table = of_match_ptr(mv643xx_eth_shared_ids),
@@ -3279,7 +3278,7 @@ out:
return err;
}
-static int mv643xx_eth_remove(struct platform_device *pdev)
+static void mv643xx_eth_remove(struct platform_device *pdev)
{
struct mv643xx_eth_private *mp = platform_get_drvdata(pdev);
struct net_device *dev = mp->dev;
@@ -3293,8 +3292,6 @@ static int mv643xx_eth_remove(struct platform_device *pdev)
clk_disable_unprepare(mp->clk);
free_netdev(mp->dev);
-
- return 0;
}
static void mv643xx_eth_shutdown(struct platform_device *pdev)
@@ -3311,7 +3308,7 @@ static void mv643xx_eth_shutdown(struct platform_device *pdev)
static struct platform_driver mv643xx_eth_driver = {
.probe = mv643xx_eth_probe,
- .remove = mv643xx_eth_remove,
+ .remove_new = mv643xx_eth_remove,
.shutdown = mv643xx_eth_shutdown,
.driver = {
.name = MV643XX_ETH_NAME,
diff --git a/drivers/net/ethernet/marvell/mvmdio.c b/drivers/net/ethernet/marvell/mvmdio.c
index 674913184ebf..89f26402f8fb 100644
--- a/drivers/net/ethernet/marvell/mvmdio.c
+++ b/drivers/net/ethernet/marvell/mvmdio.c
@@ -388,7 +388,7 @@ out_clk:
return ret;
}
-static int orion_mdio_remove(struct platform_device *pdev)
+static void orion_mdio_remove(struct platform_device *pdev)
{
struct mii_bus *bus = platform_get_drvdata(pdev);
struct orion_mdio_dev *dev = bus->priv;
@@ -404,8 +404,6 @@ static int orion_mdio_remove(struct platform_device *pdev)
clk_disable_unprepare(dev->clk[i]);
clk_put(dev->clk[i]);
}
-
- return 0;
}
static const struct of_device_id orion_mdio_match[] = {
@@ -426,7 +424,7 @@ MODULE_DEVICE_TABLE(acpi, orion_mdio_acpi_match);
static struct platform_driver orion_mdio_driver = {
.probe = orion_mdio_probe,
- .remove = orion_mdio_remove,
+ .remove_new = orion_mdio_remove,
.driver = {
.name = "orion-mdio",
.of_match_table = orion_mdio_match,
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index d483b8c00ec0..61a430e5bc91 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -5725,7 +5725,7 @@ err_free_irq:
}
/* Device removal routine */
-static int mvneta_remove(struct platform_device *pdev)
+static void mvneta_remove(struct platform_device *pdev)
{
struct net_device *dev = platform_get_drvdata(pdev);
struct mvneta_port *pp = netdev_priv(dev);
@@ -5744,8 +5744,6 @@ static int mvneta_remove(struct platform_device *pdev)
1 << pp->id);
mvneta_bm_put(pp->bm_priv);
}
-
- return 0;
}
#ifdef CONFIG_PM_SLEEP
@@ -5871,7 +5869,7 @@ MODULE_DEVICE_TABLE(of, mvneta_match);
static struct platform_driver mvneta_driver = {
.probe = mvneta_probe,
- .remove = mvneta_remove,
+ .remove_new = mvneta_remove,
.driver = {
.name = MVNETA_DRIVER_NAME,
.of_match_table = mvneta_match,
diff --git a/drivers/net/ethernet/marvell/mvneta_bm.c b/drivers/net/ethernet/marvell/mvneta_bm.c
index 46c942ef2287..3f46a0fed048 100644
--- a/drivers/net/ethernet/marvell/mvneta_bm.c
+++ b/drivers/net/ethernet/marvell/mvneta_bm.c
@@ -457,7 +457,7 @@ err_clk:
return err;
}
-static int mvneta_bm_remove(struct platform_device *pdev)
+static void mvneta_bm_remove(struct platform_device *pdev)
{
struct mvneta_bm *priv = platform_get_drvdata(pdev);
u8 all_ports_map = 0xff;
@@ -475,8 +475,6 @@ static int mvneta_bm_remove(struct platform_device *pdev)
mvneta_bm_write(priv, MVNETA_BM_COMMAND_REG, MVNETA_BM_STOP_MASK);
clk_disable_unprepare(priv->clk);
-
- return 0;
}
static const struct of_device_id mvneta_bm_match[] = {
@@ -487,7 +485,7 @@ MODULE_DEVICE_TABLE(of, mvneta_bm_match);
static struct platform_driver mvneta_bm_driver = {
.probe = mvneta_bm_probe,
- .remove = mvneta_bm_remove,
+ .remove_new = mvneta_bm_remove,
.driver = {
.name = MVNETA_BM_DRIVER_NAME,
.of_match_table = mvneta_bm_match,
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 21c3f9b015c8..463e89d3f17a 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -7662,7 +7662,7 @@ err_pp_clk:
return err;
}
-static int mvpp2_remove(struct platform_device *pdev)
+static void mvpp2_remove(struct platform_device *pdev)
{
struct mvpp2 *priv = platform_get_drvdata(pdev);
struct fwnode_handle *fwnode = pdev->dev.fwnode;
@@ -7700,15 +7700,13 @@ static int mvpp2_remove(struct platform_device *pdev)
}
if (is_acpi_node(port_fwnode))
- return 0;
+ return;
clk_disable_unprepare(priv->axi_clk);
clk_disable_unprepare(priv->mg_core_clk);
clk_disable_unprepare(priv->mg_clk);
clk_disable_unprepare(priv->pp_clk);
clk_disable_unprepare(priv->gop_clk);
-
- return 0;
}
static const struct of_device_id mvpp2_match[] = {
@@ -7734,7 +7732,7 @@ MODULE_DEVICE_TABLE(acpi, mvpp2_acpi_match);
static struct platform_driver mvpp2_driver = {
.probe = mvpp2_probe,
- .remove = mvpp2_remove,
+ .remove_new = mvpp2_remove,
.driver = {
.name = MVPP2_DRIVER_NAME,
.of_match_table = mvpp2_match,
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c b/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c
index 90c3a419932d..d4ee2454675b 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c
@@ -16,9 +16,6 @@
#define CTRL_MBOX_MAX_PF 128
#define CTRL_MBOX_SZ ((size_t)(0x400000 / CTRL_MBOX_MAX_PF))
-#define FW_HB_INTERVAL_IN_SECS 1
-#define FW_HB_MISS_COUNT 10
-
/* Names of Hardware non-queue generic interrupts */
static char *cn93_non_ioq_msix_names[] = {
"epf_ire_rint",
@@ -250,12 +247,11 @@ static void octep_init_config_cn93_pf(struct octep_device *oct)
link = PCI_DEVFN(PCI_SLOT(oct->pdev->devfn), link);
}
conf->ctrl_mbox_cfg.barmem_addr = (void __iomem *)oct->mmio[2].hw_addr +
- (0x400000ull * 7) +
+ CN93_PEM_BAR4_INDEX_OFFSET +
(link * CTRL_MBOX_SZ);
- conf->hb_interval = FW_HB_INTERVAL_IN_SECS;
- conf->max_hb_miss_cnt = FW_HB_MISS_COUNT;
-
+ conf->fw_info.hb_interval = OCTEP_DEFAULT_FW_HB_INTERVAL;
+ conf->fw_info.hb_miss_count = OCTEP_DEFAULT_FW_HB_MISS_COUNT;
}
/* Setup registers for a hardware Tx Queue */
@@ -373,34 +369,40 @@ static void octep_setup_mbox_regs_cn93_pf(struct octep_device *oct, int q_no)
mbox->mbox_read_reg = oct->mmio[0].hw_addr + CN93_SDP_R_MBOX_VF_PF_DATA(q_no);
}
-/* Process non-ioq interrupts required to keep pf interface running.
- * OEI_RINT is needed for control mailbox
- */
-static bool octep_poll_non_ioq_interrupts_cn93_pf(struct octep_device *oct)
-{
- bool handled = false;
- u64 reg0;
-
- /* Check for OEI INTR */
- reg0 = octep_read_csr64(oct, CN93_SDP_EPF_OEI_RINT);
- if (reg0) {
- dev_info(&oct->pdev->dev,
- "Received OEI_RINT intr: 0x%llx\n",
- reg0);
- octep_write_csr64(oct, CN93_SDP_EPF_OEI_RINT, reg0);
- if (reg0 & CN93_SDP_EPF_OEI_RINT_DATA_BIT_MBOX)
+/* Poll OEI events like heartbeat */
+static void octep_poll_oei_cn93_pf(struct octep_device *oct)
+{
+ u64 reg;
+
+ reg = octep_read_csr64(oct, CN93_SDP_EPF_OEI_RINT);
+ if (reg) {
+ octep_write_csr64(oct, CN93_SDP_EPF_OEI_RINT, reg);
+ if (reg & CN93_SDP_EPF_OEI_RINT_DATA_BIT_MBOX)
queue_work(octep_wq, &oct->ctrl_mbox_task);
- else if (reg0 & CN93_SDP_EPF_OEI_RINT_DATA_BIT_HBEAT)
+ else if (reg & CN93_SDP_EPF_OEI_RINT_DATA_BIT_HBEAT)
atomic_set(&oct->hb_miss_cnt, 0);
-
- handled = true;
}
+}
+
+/* OEI interrupt handler */
+static irqreturn_t octep_oei_intr_handler_cn93_pf(void *dev)
+{
+ struct octep_device *oct = (struct octep_device *)dev;
+
+ octep_poll_oei_cn93_pf(oct);
+ return IRQ_HANDLED;
+}
- return handled;
+/* Process non-ioq interrupts required to keep pf interface running.
+ * OEI_RINT is needed for control mailbox
+ */
+static void octep_poll_non_ioq_interrupts_cn93_pf(struct octep_device *oct)
+{
+ octep_poll_oei_cn93_pf(oct);
}
-/* Interrupts handler for all non-queue generic interrupts. */
-static irqreturn_t octep_non_ioq_intr_handler_cn93_pf(void *dev)
+/* Interrupt handler for input ring error interrupts. */
+static irqreturn_t octep_ire_intr_handler_cn93_pf(void *dev)
{
struct octep_device *oct = (struct octep_device *)dev;
struct pci_dev *pdev = oct->pdev;
@@ -425,8 +427,17 @@ static irqreturn_t octep_non_ioq_intr_handler_cn93_pf(void *dev)
reg_val);
}
}
- goto irq_handled;
}
+ return IRQ_HANDLED;
+}
+
+/* Interrupt handler for output ring error interrupts. */
+static irqreturn_t octep_ore_intr_handler_cn93_pf(void *dev)
+{
+ struct octep_device *oct = (struct octep_device *)dev;
+ struct pci_dev *pdev = oct->pdev;
+ u64 reg_val = 0;
+ int i = 0;
/* Check for ORERR INTR */
reg_val = octep_read_csr64(oct, CN93_SDP_EPF_ORERR_RINT);
@@ -444,9 +455,16 @@ static irqreturn_t octep_non_ioq_intr_handler_cn93_pf(void *dev)
reg_val);
}
}
-
- goto irq_handled;
}
+ return IRQ_HANDLED;
+}
+
+/* Interrupt handler for vf input ring error interrupts. */
+static irqreturn_t octep_vfire_intr_handler_cn93_pf(void *dev)
+{
+ struct octep_device *oct = (struct octep_device *)dev;
+ struct pci_dev *pdev = oct->pdev;
+ u64 reg_val = 0;
/* Check for VFIRE INTR */
reg_val = octep_read_csr64(oct, CN93_SDP_EPF_VFIRE_RINT(0));
@@ -454,8 +472,16 @@ static irqreturn_t octep_non_ioq_intr_handler_cn93_pf(void *dev)
dev_info(&pdev->dev,
"Received VFIRE_RINT intr: 0x%llx\n", reg_val);
octep_write_csr64(oct, CN93_SDP_EPF_VFIRE_RINT(0), reg_val);
- goto irq_handled;
}
+ return IRQ_HANDLED;
+}
+
+/* Interrupt handler for vf output ring error interrupts. */
+static irqreturn_t octep_vfore_intr_handler_cn93_pf(void *dev)
+{
+ struct octep_device *oct = (struct octep_device *)dev;
+ struct pci_dev *pdev = oct->pdev;
+ u64 reg_val = 0;
/* Check for VFORE INTR */
reg_val = octep_read_csr64(oct, CN93_SDP_EPF_VFORE_RINT(0));
@@ -463,19 +489,30 @@ static irqreturn_t octep_non_ioq_intr_handler_cn93_pf(void *dev)
dev_info(&pdev->dev,
"Received VFORE_RINT intr: 0x%llx\n", reg_val);
octep_write_csr64(oct, CN93_SDP_EPF_VFORE_RINT(0), reg_val);
- goto irq_handled;
}
+ return IRQ_HANDLED;
+}
- /* Check for MBOX INTR and OEI INTR */
- if (octep_poll_non_ioq_interrupts_cn93_pf(oct))
- goto irq_handled;
+/* Interrupt handler for dpi dma related interrupts. */
+static irqreturn_t octep_dma_intr_handler_cn93_pf(void *dev)
+{
+ struct octep_device *oct = (struct octep_device *)dev;
+ u64 reg_val = 0;
/* Check for DMA INTR */
reg_val = octep_read_csr64(oct, CN93_SDP_EPF_DMA_RINT);
if (reg_val) {
octep_write_csr64(oct, CN93_SDP_EPF_DMA_RINT, reg_val);
- goto irq_handled;
}
+ return IRQ_HANDLED;
+}
+
+/* Interrupt handler for dpi dma transaction error interrupts for VFs */
+static irqreturn_t octep_dma_vf_intr_handler_cn93_pf(void *dev)
+{
+ struct octep_device *oct = (struct octep_device *)dev;
+ struct pci_dev *pdev = oct->pdev;
+ u64 reg_val = 0;
/* Check for DMA VF INTR */
reg_val = octep_read_csr64(oct, CN93_SDP_EPF_DMA_VF_RINT(0));
@@ -483,8 +520,16 @@ static irqreturn_t octep_non_ioq_intr_handler_cn93_pf(void *dev)
dev_info(&pdev->dev,
"Received DMA_VF_RINT intr: 0x%llx\n", reg_val);
octep_write_csr64(oct, CN93_SDP_EPF_DMA_VF_RINT(0), reg_val);
- goto irq_handled;
}
+ return IRQ_HANDLED;
+}
+
+/* Interrupt handler for pp transaction error interrupts for VFs */
+static irqreturn_t octep_pp_vf_intr_handler_cn93_pf(void *dev)
+{
+ struct octep_device *oct = (struct octep_device *)dev;
+ struct pci_dev *pdev = oct->pdev;
+ u64 reg_val = 0;
/* Check for PPVF INTR */
reg_val = octep_read_csr64(oct, CN93_SDP_EPF_PP_VF_RINT(0));
@@ -492,8 +537,16 @@ static irqreturn_t octep_non_ioq_intr_handler_cn93_pf(void *dev)
dev_info(&pdev->dev,
"Received PP_VF_RINT intr: 0x%llx\n", reg_val);
octep_write_csr64(oct, CN93_SDP_EPF_PP_VF_RINT(0), reg_val);
- goto irq_handled;
}
+ return IRQ_HANDLED;
+}
+
+/* Interrupt handler for mac related interrupts. */
+static irqreturn_t octep_misc_intr_handler_cn93_pf(void *dev)
+{
+ struct octep_device *oct = (struct octep_device *)dev;
+ struct pci_dev *pdev = oct->pdev;
+ u64 reg_val = 0;
/* Check for MISC INTR */
reg_val = octep_read_csr64(oct, CN93_SDP_EPF_MISC_RINT);
@@ -501,11 +554,17 @@ static irqreturn_t octep_non_ioq_intr_handler_cn93_pf(void *dev)
dev_info(&pdev->dev,
"Received MISC_RINT intr: 0x%llx\n", reg_val);
octep_write_csr64(oct, CN93_SDP_EPF_MISC_RINT, reg_val);
- goto irq_handled;
}
+ return IRQ_HANDLED;
+}
+
+/* Interrupts handler for all reserved interrupts. */
+static irqreturn_t octep_rsvd_intr_handler_cn93_pf(void *dev)
+{
+ struct octep_device *oct = (struct octep_device *)dev;
+ struct pci_dev *pdev = oct->pdev;
dev_info(&pdev->dev, "Reserved interrupts raised; Ignore\n");
-irq_handled:
return IRQ_HANDLED;
}
@@ -569,8 +628,15 @@ static void octep_enable_interrupts_cn93_pf(struct octep_device *oct)
octep_write_csr64(oct, CN93_SDP_EPF_IRERR_RINT_ENA_W1S, intr_mask);
octep_write_csr64(oct, CN93_SDP_EPF_ORERR_RINT_ENA_W1S, intr_mask);
octep_write_csr64(oct, CN93_SDP_EPF_OEI_RINT_ENA_W1S, -1ULL);
+
+ octep_write_csr64(oct, CN93_SDP_EPF_VFIRE_RINT_ENA_W1S(0), -1ULL);
+ octep_write_csr64(oct, CN93_SDP_EPF_VFORE_RINT_ENA_W1S(0), -1ULL);
+
octep_write_csr64(oct, CN93_SDP_EPF_MISC_RINT_ENA_W1S, intr_mask);
octep_write_csr64(oct, CN93_SDP_EPF_DMA_RINT_ENA_W1S, intr_mask);
+
+ octep_write_csr64(oct, CN93_SDP_EPF_DMA_VF_RINT_ENA_W1S(0), -1ULL);
+ octep_write_csr64(oct, CN93_SDP_EPF_PP_VF_RINT_ENA_W1S(0), -1ULL);
}
/* Disable all interrupts */
@@ -588,8 +654,15 @@ static void octep_disable_interrupts_cn93_pf(struct octep_device *oct)
octep_write_csr64(oct, CN93_SDP_EPF_IRERR_RINT_ENA_W1C, intr_mask);
octep_write_csr64(oct, CN93_SDP_EPF_ORERR_RINT_ENA_W1C, intr_mask);
octep_write_csr64(oct, CN93_SDP_EPF_OEI_RINT_ENA_W1C, -1ULL);
+
+ octep_write_csr64(oct, CN93_SDP_EPF_VFIRE_RINT_ENA_W1C(0), -1ULL);
+ octep_write_csr64(oct, CN93_SDP_EPF_VFORE_RINT_ENA_W1C(0), -1ULL);
+
octep_write_csr64(oct, CN93_SDP_EPF_MISC_RINT_ENA_W1C, intr_mask);
octep_write_csr64(oct, CN93_SDP_EPF_DMA_RINT_ENA_W1C, intr_mask);
+
+ octep_write_csr64(oct, CN93_SDP_EPF_DMA_VF_RINT_ENA_W1C(0), -1ULL);
+ octep_write_csr64(oct, CN93_SDP_EPF_PP_VF_RINT_ENA_W1C(0), -1ULL);
}
/* Get new Octeon Read Index: index of descriptor that Octeon reads next. */
@@ -722,7 +795,16 @@ void octep_device_setup_cn93_pf(struct octep_device *oct)
oct->hw_ops.setup_oq_regs = octep_setup_oq_regs_cn93_pf;
oct->hw_ops.setup_mbox_regs = octep_setup_mbox_regs_cn93_pf;
- oct->hw_ops.non_ioq_intr_handler = octep_non_ioq_intr_handler_cn93_pf;
+ oct->hw_ops.oei_intr_handler = octep_oei_intr_handler_cn93_pf;
+ oct->hw_ops.ire_intr_handler = octep_ire_intr_handler_cn93_pf;
+ oct->hw_ops.ore_intr_handler = octep_ore_intr_handler_cn93_pf;
+ oct->hw_ops.vfire_intr_handler = octep_vfire_intr_handler_cn93_pf;
+ oct->hw_ops.vfore_intr_handler = octep_vfore_intr_handler_cn93_pf;
+ oct->hw_ops.dma_intr_handler = octep_dma_intr_handler_cn93_pf;
+ oct->hw_ops.dma_vf_intr_handler = octep_dma_vf_intr_handler_cn93_pf;
+ oct->hw_ops.pp_vf_intr_handler = octep_pp_vf_intr_handler_cn93_pf;
+ oct->hw_ops.misc_intr_handler = octep_misc_intr_handler_cn93_pf;
+ oct->hw_ops.rsvd_intr_handler = octep_rsvd_intr_handler_cn93_pf;
oct->hw_ops.ioq_intr_handler = octep_ioq_intr_handler_cn93_pf;
oct->hw_ops.soft_reset = octep_soft_reset_cn93_pf;
oct->hw_ops.reinit_regs = octep_reinit_regs_cn93_pf;
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_config.h b/drivers/net/ethernet/marvell/octeon_ep/octep_config.h
index df7cd39d9fce..1622a6ebf036 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_config.h
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_config.h
@@ -49,6 +49,11 @@
/* Default MTU */
#define OCTEP_DEFAULT_MTU 1500
+/* pf heartbeat interval in milliseconds */
+#define OCTEP_DEFAULT_FW_HB_INTERVAL 1000
+/* pf heartbeat miss count */
+#define OCTEP_DEFAULT_FW_HB_MISS_COUNT 20
+
/* Macros to get octeon config params */
#define CFG_GET_IQ_CFG(cfg) ((cfg)->iq)
#define CFG_GET_IQ_NUM_DESC(cfg) ((cfg)->iq.num_descs)
@@ -181,6 +186,16 @@ struct octep_ctrl_mbox_config {
void __iomem *barmem_addr;
};
+/* Info from firmware */
+struct octep_fw_info {
+ /* interface pkind */
+ u16 pkind;
+ /* heartbeat interval in milliseconds */
+ u16 hb_interval;
+ /* heartbeat miss count */
+ u16 hb_miss_count;
+};
+
/* Data Structure to hold configuration limits and active config */
struct octep_config {
/* Input Queue attributes. */
@@ -201,10 +216,7 @@ struct octep_config {
/* ctrl mbox config */
struct octep_ctrl_mbox_config ctrl_mbox_cfg;
- /* Configured maximum heartbeat miss count */
- u32 max_hb_miss_cnt;
-
- /* Configured firmware heartbeat interval in secs */
- u32 hb_interval;
+ /* fw info */
+ struct octep_fw_info fw_info;
};
#endif /* _OCTEP_CONFIG_H_ */
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c
index 17bfd5cdf462..0594607a2585 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c
@@ -26,7 +26,7 @@ static atomic_t ctrl_net_msg_id;
/* Control plane version in which OCTEP_CTRL_NET_H2F_CMD was added */
static const u32 octep_ctrl_net_h2f_cmd_versions[OCTEP_CTRL_NET_H2F_CMD_MAX] = {
- [OCTEP_CTRL_NET_H2F_CMD_INVALID ... OCTEP_CTRL_NET_H2F_CMD_LINK_INFO] =
+ [OCTEP_CTRL_NET_H2F_CMD_INVALID ... OCTEP_CTRL_NET_H2F_CMD_GET_INFO] =
OCTEP_CP_VERSION(1, 0, 0)
};
@@ -353,6 +353,28 @@ void octep_ctrl_net_recv_fw_messages(struct octep_device *oct)
}
}
+int octep_ctrl_net_get_info(struct octep_device *oct, int vfid,
+ struct octep_fw_info *info)
+{
+ struct octep_ctrl_net_wait_data d = {0};
+ struct octep_ctrl_net_h2f_resp *resp;
+ struct octep_ctrl_net_h2f_req *req;
+ int err;
+
+ req = &d.data.req;
+ init_send_req(&d.msg, req, 0, vfid);
+ req->hdr.s.cmd = OCTEP_CTRL_NET_H2F_CMD_GET_INFO;
+ req->link_info.cmd = OCTEP_CTRL_NET_CMD_GET;
+ err = octep_send_mbox_req(oct, &d, true);
+ if (err < 0)
+ return err;
+
+ resp = &d.data.resp;
+ memcpy(info, &resp->info.fw_info, sizeof(struct octep_fw_info));
+
+ return 0;
+}
+
int octep_ctrl_net_uninit(struct octep_device *oct)
{
struct octep_ctrl_net_wait_data *pos, *n;
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.h b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.h
index 1c2ef4ee31d9..b330f370131b 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.h
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.h
@@ -41,6 +41,7 @@ enum octep_ctrl_net_h2f_cmd {
OCTEP_CTRL_NET_H2F_CMD_LINK_STATUS,
OCTEP_CTRL_NET_H2F_CMD_RX_STATE,
OCTEP_CTRL_NET_H2F_CMD_LINK_INFO,
+ OCTEP_CTRL_NET_H2F_CMD_GET_INFO,
OCTEP_CTRL_NET_H2F_CMD_MAX
};
@@ -161,6 +162,11 @@ struct octep_ctrl_net_h2f_resp_cmd_state {
u16 state;
};
+/* get info request */
+struct octep_ctrl_net_h2f_resp_cmd_get_info {
+ struct octep_fw_info fw_info;
+};
+
/* Host to fw response data */
struct octep_ctrl_net_h2f_resp {
union octep_ctrl_net_resp_hdr hdr;
@@ -171,6 +177,7 @@ struct octep_ctrl_net_h2f_resp {
struct octep_ctrl_net_h2f_resp_cmd_state link;
struct octep_ctrl_net_h2f_resp_cmd_state rx;
struct octep_ctrl_net_link_info link_info;
+ struct octep_ctrl_net_h2f_resp_cmd_get_info info;
};
} __packed;
@@ -330,6 +337,17 @@ int octep_ctrl_net_set_link_info(struct octep_device *oct,
*/
void octep_ctrl_net_recv_fw_messages(struct octep_device *oct);
+/** Get info from firmware.
+ *
+ * @param oct: non-null pointer to struct octep_device.
+ * @param vfid: Index of virtual function.
+ * @param info: non-null pointer to struct octep_fw_info.
+ *
+ * return value: 0 on success, -errno on failure.
+ */
+int octep_ctrl_net_get_info(struct octep_device *oct, int vfid,
+ struct octep_fw_info *info);
+
/** Uninitialize data for ctrl net.
*
* @param oct: non-null pointer to struct octep_device.
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
index 5b46ca47c8e5..552970c7dec0 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
@@ -155,18 +155,153 @@ static void octep_disable_msix(struct octep_device *oct)
}
/**
- * octep_non_ioq_intr_handler() - common handler for all generic interrupts.
+ * octep_oei_intr_handler() - common handler for output endpoint interrupts.
*
* @irq: Interrupt number.
* @data: interrupt data.
*
- * this is common handler for all non-queue (generic) interrupts.
+ * this is common handler for all output endpoint interrupts.
+ */
+static irqreturn_t octep_oei_intr_handler(int irq, void *data)
+{
+ struct octep_device *oct = data;
+
+ return oct->hw_ops.oei_intr_handler(oct);
+}
+
+/**
+ * octep_ire_intr_handler() - common handler for input ring error interrupts.
+ *
+ * @irq: Interrupt number.
+ * @data: interrupt data.
+ *
+ * this is common handler for input ring error interrupts.
+ */
+static irqreturn_t octep_ire_intr_handler(int irq, void *data)
+{
+ struct octep_device *oct = data;
+
+ return oct->hw_ops.ire_intr_handler(oct);
+}
+
+/**
+ * octep_ore_intr_handler() - common handler for output ring error interrupts.
+ *
+ * @irq: Interrupt number.
+ * @data: interrupt data.
+ *
+ * this is common handler for output ring error interrupts.
+ */
+static irqreturn_t octep_ore_intr_handler(int irq, void *data)
+{
+ struct octep_device *oct = data;
+
+ return oct->hw_ops.ore_intr_handler(oct);
+}
+
+/**
+ * octep_vfire_intr_handler() - common handler for vf input ring error interrupts.
+ *
+ * @irq: Interrupt number.
+ * @data: interrupt data.
+ *
+ * this is common handler for vf input ring error interrupts.
+ */
+static irqreturn_t octep_vfire_intr_handler(int irq, void *data)
+{
+ struct octep_device *oct = data;
+
+ return oct->hw_ops.vfire_intr_handler(oct);
+}
+
+/**
+ * octep_vfore_intr_handler() - common handler for vf output ring error interrupts.
+ *
+ * @irq: Interrupt number.
+ * @data: interrupt data.
+ *
+ * this is common handler for vf output ring error interrupts.
+ */
+static irqreturn_t octep_vfore_intr_handler(int irq, void *data)
+{
+ struct octep_device *oct = data;
+
+ return oct->hw_ops.vfore_intr_handler(oct);
+}
+
+/**
+ * octep_dma_intr_handler() - common handler for dpi dma related interrupts.
+ *
+ * @irq: Interrupt number.
+ * @data: interrupt data.
+ *
+ * this is common handler for dpi dma related interrupts.
+ */
+static irqreturn_t octep_dma_intr_handler(int irq, void *data)
+{
+ struct octep_device *oct = data;
+
+ return oct->hw_ops.dma_intr_handler(oct);
+}
+
+/**
+ * octep_dma_vf_intr_handler() - common handler for dpi dma transaction error interrupts for VFs.
+ *
+ * @irq: Interrupt number.
+ * @data: interrupt data.
+ *
+ * this is common handler for dpi dma transaction error interrupts for VFs.
*/
-static irqreturn_t octep_non_ioq_intr_handler(int irq, void *data)
+static irqreturn_t octep_dma_vf_intr_handler(int irq, void *data)
{
struct octep_device *oct = data;
- return oct->hw_ops.non_ioq_intr_handler(oct);
+ return oct->hw_ops.dma_vf_intr_handler(oct);
+}
+
+/**
+ * octep_pp_vf_intr_handler() - common handler for pp transaction error interrupts for VFs.
+ *
+ * @irq: Interrupt number.
+ * @data: interrupt data.
+ *
+ * this is common handler for pp transaction error interrupts for VFs.
+ */
+static irqreturn_t octep_pp_vf_intr_handler(int irq, void *data)
+{
+ struct octep_device *oct = data;
+
+ return oct->hw_ops.pp_vf_intr_handler(oct);
+}
+
+/**
+ * octep_misc_intr_handler() - common handler for mac related interrupts.
+ *
+ * @irq: Interrupt number.
+ * @data: interrupt data.
+ *
+ * this is common handler for mac related interrupts.
+ */
+static irqreturn_t octep_misc_intr_handler(int irq, void *data)
+{
+ struct octep_device *oct = data;
+
+ return oct->hw_ops.misc_intr_handler(oct);
+}
+
+/**
+ * octep_rsvd_intr_handler() - common handler for reserved interrupts (future use).
+ *
+ * @irq: Interrupt number.
+ * @data: interrupt data.
+ *
+ * this is common handler for all reserved interrupts.
+ */
+static irqreturn_t octep_rsvd_intr_handler(int irq, void *data)
+{
+ struct octep_device *oct = data;
+
+ return oct->hw_ops.rsvd_intr_handler(oct);
}
/**
@@ -222,9 +357,57 @@ static int octep_request_irqs(struct octep_device *oct)
snprintf(irq_name, OCTEP_MSIX_NAME_SIZE,
"%s-%s", netdev->name, non_ioq_msix_names[i]);
- ret = request_irq(msix_entry->vector,
- octep_non_ioq_intr_handler, 0,
- irq_name, oct);
+ if (!strncmp(non_ioq_msix_names[i], "epf_oei_rint",
+ strlen("epf_oei_rint"))) {
+ ret = request_irq(msix_entry->vector,
+ octep_oei_intr_handler, 0,
+ irq_name, oct);
+ } else if (!strncmp(non_ioq_msix_names[i], "epf_ire_rint",
+ strlen("epf_ire_rint"))) {
+ ret = request_irq(msix_entry->vector,
+ octep_ire_intr_handler, 0,
+ irq_name, oct);
+ } else if (!strncmp(non_ioq_msix_names[i], "epf_ore_rint",
+ strlen("epf_ore_rint"))) {
+ ret = request_irq(msix_entry->vector,
+ octep_ore_intr_handler, 0,
+ irq_name, oct);
+ } else if (!strncmp(non_ioq_msix_names[i], "epf_vfire_rint",
+ strlen("epf_vfire_rint"))) {
+ ret = request_irq(msix_entry->vector,
+ octep_vfire_intr_handler, 0,
+ irq_name, oct);
+ } else if (!strncmp(non_ioq_msix_names[i], "epf_vfore_rint",
+ strlen("epf_vfore_rint"))) {
+ ret = request_irq(msix_entry->vector,
+ octep_vfore_intr_handler, 0,
+ irq_name, oct);
+ } else if (!strncmp(non_ioq_msix_names[i], "epf_dma_rint",
+ strlen("epf_dma_rint"))) {
+ ret = request_irq(msix_entry->vector,
+ octep_dma_intr_handler, 0,
+ irq_name, oct);
+ } else if (!strncmp(non_ioq_msix_names[i], "epf_dma_vf_rint",
+ strlen("epf_dma_vf_rint"))) {
+ ret = request_irq(msix_entry->vector,
+ octep_dma_vf_intr_handler, 0,
+ irq_name, oct);
+ } else if (!strncmp(non_ioq_msix_names[i], "epf_pp_vf_rint",
+ strlen("epf_pp_vf_rint"))) {
+ ret = request_irq(msix_entry->vector,
+ octep_pp_vf_intr_handler, 0,
+ irq_name, oct);
+ } else if (!strncmp(non_ioq_msix_names[i], "epf_misc_rint",
+ strlen("epf_misc_rint"))) {
+ ret = request_irq(msix_entry->vector,
+ octep_misc_intr_handler, 0,
+ irq_name, oct);
+ } else {
+ ret = request_irq(msix_entry->vector,
+ octep_rsvd_intr_handler, 0,
+ irq_name, oct);
+ }
+
if (ret) {
netdev_err(netdev,
"request_irq failed for %s; err=%d",
@@ -917,9 +1100,9 @@ static void octep_hb_timeout_task(struct work_struct *work)
int miss_cnt;
miss_cnt = atomic_inc_return(&oct->hb_miss_cnt);
- if (miss_cnt < oct->conf->max_hb_miss_cnt) {
+ if (miss_cnt < oct->conf->fw_info.hb_miss_count) {
queue_delayed_work(octep_wq, &oct->hb_task,
- msecs_to_jiffies(oct->conf->hb_interval * 1000));
+ msecs_to_jiffies(oct->conf->fw_info.hb_interval));
return;
}
@@ -1012,8 +1195,7 @@ int octep_device_setup(struct octep_device *oct)
atomic_set(&oct->hb_miss_cnt, 0);
INIT_DELAYED_WORK(&oct->hb_task, octep_hb_timeout_task);
- queue_delayed_work(octep_wq, &oct->hb_task,
- msecs_to_jiffies(oct->conf->hb_interval * 1000));
+
return 0;
unsupported_dev:
@@ -1142,6 +1324,15 @@ static int octep_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
dev_err(&pdev->dev, "Device setup failed\n");
goto err_octep_config;
}
+
+ octep_ctrl_net_get_info(octep_dev, OCTEP_CTRL_NET_INVALID_VFID,
+ &octep_dev->conf->fw_info);
+ dev_info(&octep_dev->pdev->dev, "Heartbeat interval %u msecs Heartbeat miss count %u\n",
+ octep_dev->conf->fw_info.hb_interval,
+ octep_dev->conf->fw_info.hb_miss_count);
+ queue_delayed_work(octep_wq, &octep_dev->hb_task,
+ msecs_to_jiffies(octep_dev->conf->fw_info.hb_interval));
+
INIT_WORK(&octep_dev->tx_timeout_task, octep_tx_timeout_task);
INIT_WORK(&octep_dev->ctrl_mbox_task, octep_ctrl_mbox_task);
INIT_DELAYED_WORK(&octep_dev->intr_poll_task, octep_intr_poll_task);
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h
index e0907a719133..6df902ebb7f3 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h
@@ -65,7 +65,16 @@ struct octep_hw_ops {
void (*setup_oq_regs)(struct octep_device *oct, int q);
void (*setup_mbox_regs)(struct octep_device *oct, int mbox);
- irqreturn_t (*non_ioq_intr_handler)(void *ioq_vector);
+ irqreturn_t (*oei_intr_handler)(void *ioq_vector);
+ irqreturn_t (*ire_intr_handler)(void *ioq_vector);
+ irqreturn_t (*ore_intr_handler)(void *ioq_vector);
+ irqreturn_t (*vfire_intr_handler)(void *ioq_vector);
+ irqreturn_t (*vfore_intr_handler)(void *ioq_vector);
+ irqreturn_t (*dma_intr_handler)(void *ioq_vector);
+ irqreturn_t (*dma_vf_intr_handler)(void *ioq_vector);
+ irqreturn_t (*pp_vf_intr_handler)(void *ioq_vector);
+ irqreturn_t (*misc_intr_handler)(void *ioq_vector);
+ irqreturn_t (*rsvd_intr_handler)(void *ioq_vector);
irqreturn_t (*ioq_intr_handler)(void *ioq_vector);
int (*soft_reset)(struct octep_device *oct);
void (*reinit_regs)(struct octep_device *oct);
@@ -73,7 +82,7 @@ struct octep_hw_ops {
void (*enable_interrupts)(struct octep_device *oct);
void (*disable_interrupts)(struct octep_device *oct);
- bool (*poll_non_ioq_interrupts)(struct octep_device *oct);
+ void (*poll_non_ioq_interrupts)(struct octep_device *oct);
void (*enable_io_queues)(struct octep_device *oct);
void (*disable_io_queues)(struct octep_device *oct);
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h
index b25c3093dc7b..0a43983e9101 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h
@@ -370,4 +370,8 @@
/* bit 1 for firmware heartbeat interrupt */
#define CN93_SDP_EPF_OEI_RINT_DATA_BIT_HBEAT BIT_ULL(1)
+#define CN93_PEM_BAR4_INDEX 7
+#define CN93_PEM_BAR4_INDEX_SIZE 0x400000ULL
+#define CN93_PEM_BAR4_INDEX_OFFSET (CN93_PEM_BAR4_INDEX * CN93_PEM_BAR4_INDEX_SIZE)
+
#endif /* _OCTEP_REGS_CN9K_PF_H_ */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index 6b5b06c2b4e9..c5bd7747fc0e 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -1574,7 +1574,7 @@ enum ptp_op {
PTP_OP_GET_CLOCK = 1,
PTP_OP_GET_TSTMP = 2,
PTP_OP_SET_THRESH = 3,
- PTP_OP_EXTTS_ON = 4,
+ PTP_OP_PPS_ON = 4,
PTP_OP_ADJTIME = 5,
PTP_OP_SET_CLOCK = 6,
};
@@ -1584,7 +1584,8 @@ struct ptp_req {
u8 op;
s64 scaled_ppm;
u64 thresh;
- int extts_on;
+ u64 period;
+ int pps_on;
s64 delta;
u64 clk;
};
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/ptp.c b/drivers/net/ethernet/marvell/octeontx2/af/ptp.c
index ffbd22797163..bcc96eed2481 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/ptp.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/ptp.c
@@ -46,6 +46,7 @@
#define PTP_PPS_HI_INCR 0xF60ULL
#define PTP_PPS_LO_INCR 0xF68ULL
+#define PTP_PPS_THRESH_LO 0xF50ULL
#define PTP_PPS_THRESH_HI 0xF58ULL
#define PTP_CLOCK_LO 0xF08ULL
@@ -411,29 +412,12 @@ void ptp_start(struct rvu *rvu, u64 sclk, u32 ext_clk_freq, u32 extts)
}
clock_cfg |= PTP_CLOCK_CFG_PTP_EN;
- clock_cfg |= PTP_CLOCK_CFG_PPS_EN | PTP_CLOCK_CFG_PPS_INV;
writeq(clock_cfg, ptp->reg_base + PTP_CLOCK_CFG);
clock_cfg = readq(ptp->reg_base + PTP_CLOCK_CFG);
clock_cfg &= ~PTP_CLOCK_CFG_ATOMIC_OP_MASK;
clock_cfg |= (ATOMIC_SET << 26);
writeq(clock_cfg, ptp->reg_base + PTP_CLOCK_CFG);
- /* Set 50% duty cycle for 1Hz output */
- writeq(0x1dcd650000000000, ptp->reg_base + PTP_PPS_HI_INCR);
- writeq(0x1dcd650000000000, ptp->reg_base + PTP_PPS_LO_INCR);
- if (cn10k_ptp_errata(ptp)) {
- /* The ptp_clock_hi rollsover to zero once clock cycle before it
- * reaches one second boundary. so, program the pps_lo_incr in
- * such a way that the pps threshold value comparison at one
- * second boundary will succeed and pps edge changes. After each
- * one second boundary, the hrtimer handler will be invoked and
- * reprograms the pps threshold value.
- */
- ptp->clock_period = NSEC_PER_SEC / ptp->clock_rate;
- writeq((0x1dcd6500ULL - ptp->clock_period) << 32,
- ptp->reg_base + PTP_PPS_LO_INCR);
- }
-
if (cn10k_ptp_errata(ptp))
clock_comp = ptp_calc_adjusted_comp(ptp->clock_rate);
else
@@ -465,20 +449,68 @@ static int ptp_set_thresh(struct ptp *ptp, u64 thresh)
return 0;
}
-static int ptp_extts_on(struct ptp *ptp, int on)
+static int ptp_config_hrtimer(struct ptp *ptp, int on)
{
u64 ptp_clock_hi;
- if (cn10k_ptp_errata(ptp)) {
- if (on) {
- ptp_clock_hi = readq(ptp->reg_base + PTP_CLOCK_HI);
- ptp_hrtimer_start(ptp, (ktime_t)ptp_clock_hi);
- } else {
- if (hrtimer_active(&ptp->hrtimer))
- hrtimer_cancel(&ptp->hrtimer);
+ if (on) {
+ ptp_clock_hi = readq(ptp->reg_base + PTP_CLOCK_HI);
+ ptp_hrtimer_start(ptp, (ktime_t)ptp_clock_hi);
+ } else {
+ if (hrtimer_active(&ptp->hrtimer))
+ hrtimer_cancel(&ptp->hrtimer);
+ }
+
+ return 0;
+}
+
+static int ptp_pps_on(struct ptp *ptp, int on, u64 period)
+{
+ u64 clock_cfg;
+
+ clock_cfg = readq(ptp->reg_base + PTP_CLOCK_CFG);
+ if (on) {
+ if (cn10k_ptp_errata(ptp) && period != NSEC_PER_SEC) {
+ dev_err(&ptp->pdev->dev, "Supports max period value as 1 second\n");
+ return -EINVAL;
}
+
+ if (period > (8 * NSEC_PER_SEC)) {
+ dev_err(&ptp->pdev->dev, "Supports max period as 8 seconds\n");
+ return -EINVAL;
+ }
+
+ clock_cfg |= PTP_CLOCK_CFG_PPS_EN | PTP_CLOCK_CFG_PPS_INV;
+ writeq(clock_cfg, ptp->reg_base + PTP_CLOCK_CFG);
+
+ writeq(0, ptp->reg_base + PTP_PPS_THRESH_HI);
+ writeq(0, ptp->reg_base + PTP_PPS_THRESH_LO);
+
+ /* Configure high/low phase time */
+ period = period / 2;
+ writeq(((u64)period << 32), ptp->reg_base + PTP_PPS_HI_INCR);
+ writeq(((u64)period << 32), ptp->reg_base + PTP_PPS_LO_INCR);
+ } else {
+ clock_cfg &= ~(PTP_CLOCK_CFG_PPS_EN | PTP_CLOCK_CFG_PPS_INV);
+ writeq(clock_cfg, ptp->reg_base + PTP_CLOCK_CFG);
}
+ if (on && cn10k_ptp_errata(ptp)) {
+ /* The ptp_clock_hi rollsover to zero once clock cycle before it
+ * reaches one second boundary. so, program the pps_lo_incr in
+ * such a way that the pps threshold value comparison at one
+ * second boundary will succeed and pps edge changes. After each
+ * one second boundary, the hrtimer handler will be invoked and
+ * reprograms the pps threshold value.
+ */
+ ptp->clock_period = NSEC_PER_SEC / ptp->clock_rate;
+ writeq((0x1dcd6500ULL - ptp->clock_period) << 32,
+ ptp->reg_base + PTP_PPS_LO_INCR);
+ }
+
+ if (cn10k_ptp_errata(ptp))
+ ptp_config_hrtimer(ptp, on);
+
return 0;
}
@@ -613,8 +645,8 @@ int rvu_mbox_handler_ptp_op(struct rvu *rvu, struct ptp_req *req,
case PTP_OP_SET_THRESH:
err = ptp_set_thresh(rvu->ptp, req->thresh);
break;
- case PTP_OP_EXTTS_ON:
- err = ptp_extts_on(rvu->ptp, req->extts_on);
+ case PTP_OP_PPS_ON:
+ err = ptp_pps_on(rvu->ptp, req->pps_on, req->period);
break;
case PTP_OP_ADJTIME:
ptp_atomic_adjtime(rvu->ptp, req->delta);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c
index 3a72b0793d4a..63130ba37e9d 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c
@@ -175,7 +175,7 @@ static int ptp_set_thresh(struct otx2_ptp *ptp, u64 thresh)
return otx2_sync_mbox_msg(&ptp->nic->mbox);
}
-static int ptp_extts_on(struct otx2_ptp *ptp, int on)
+static int ptp_pps_on(struct otx2_ptp *ptp, int on, u64 period)
{
struct ptp_req *req;
@@ -186,8 +186,9 @@ static int ptp_extts_on(struct otx2_ptp *ptp, int on)
if (!req)
return -ENOMEM;
- req->op = PTP_OP_EXTTS_ON;
- req->extts_on = on;
+ req->op = PTP_OP_PPS_ON;
+ req->pps_on = on;
+ req->period = period;
return otx2_sync_mbox_msg(&ptp->nic->mbox);
}
@@ -276,8 +277,8 @@ static int otx2_ptp_verify_pin(struct ptp_clock_info *ptp, unsigned int pin,
switch (func) {
case PTP_PF_NONE:
case PTP_PF_EXTTS:
- break;
case PTP_PF_PEROUT:
+ break;
case PTP_PF_PHYSYNC:
return -1;
}
@@ -340,6 +341,7 @@ static int otx2_ptp_enable(struct ptp_clock_info *ptp_info,
{
struct otx2_ptp *ptp = container_of(ptp_info, struct otx2_ptp,
ptp_info);
+ u64 period = 0;
int pin;
if (!ptp->nic)
@@ -351,12 +353,24 @@ static int otx2_ptp_enable(struct ptp_clock_info *ptp_info,
rq->extts.index);
if (pin < 0)
return -EBUSY;
- if (on) {
- ptp_extts_on(ptp, on);
+ if (on)
schedule_delayed_work(&ptp->extts_work, msecs_to_jiffies(200));
- } else {
- ptp_extts_on(ptp, on);
+ else
cancel_delayed_work_sync(&ptp->extts_work);
+
+ return 0;
+ case PTP_CLK_REQ_PEROUT:
+ if (rq->perout.flags)
+ return -EOPNOTSUPP;
+
+ if (rq->perout.index >= ptp_info->n_pins)
+ return -EINVAL;
+ if (on) {
+ period = rq->perout.period.sec * NSEC_PER_SEC +
+ rq->perout.period.nsec;
+ ptp_pps_on(ptp, on, period);
+ } else {
+ ptp_pps_on(ptp, on, period);
}
return 0;
default:
@@ -411,6 +425,7 @@ int otx2_ptp_init(struct otx2_nic *pfvf)
.name = "OcteonTX2 PTP",
.max_adj = 1000000000ull,
.n_ext_ts = 1,
+ .n_per_out = 1,
.n_pins = 1,
.pps = 0,
.pin_config = &ptp_ptr->extts_config,
diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c
index d5691b6a2bc5..dd6ca2e4fd51 100644
--- a/drivers/net/ethernet/marvell/pxa168_eth.c
+++ b/drivers/net/ethernet/marvell/pxa168_eth.c
@@ -1528,7 +1528,7 @@ err_clk:
return err;
}
-static int pxa168_eth_remove(struct platform_device *pdev)
+static void pxa168_eth_remove(struct platform_device *pdev)
{
struct net_device *dev = platform_get_drvdata(pdev);
struct pxa168_eth_private *pep = netdev_priv(dev);
@@ -1547,7 +1547,6 @@ static int pxa168_eth_remove(struct platform_device *pdev)
mdiobus_free(pep->smi_bus);
unregister_netdev(dev);
free_netdev(dev);
- return 0;
}
static void pxa168_eth_shutdown(struct platform_device *pdev)
@@ -1580,7 +1579,7 @@ MODULE_DEVICE_TABLE(of, pxa168_eth_of_match);
static struct platform_driver pxa168_eth_driver = {
.probe = pxa168_eth_probe,
- .remove = pxa168_eth_remove,
+ .remove_new = pxa168_eth_remove,
.shutdown = pxa168_eth_shutdown,
.resume = pxa168_eth_resume,
.suspend = pxa168_eth_suspend,
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 20afe79f380a..7669b446915a 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -197,6 +197,7 @@ static const struct mtk_reg_map mt7988_reg_map = {
.wdma_base = {
[0] = 0x4800,
[1] = 0x4c00,
+ [2] = 0x5000,
},
.pse_iq_sta = 0x0180,
.pse_oq_sta = 0x01a0,
@@ -5002,7 +5003,7 @@ err_destroy_sgmii:
return err;
}
-static int mtk_remove(struct platform_device *pdev)
+static void mtk_remove(struct platform_device *pdev)
{
struct mtk_eth *eth = platform_get_drvdata(pdev);
struct mtk_mac *mac;
@@ -5024,8 +5025,6 @@ static int mtk_remove(struct platform_device *pdev)
netif_napi_del(&eth->rx_napi);
mtk_cleanup(eth);
mtk_mdio_cleanup(eth);
-
- return 0;
}
static const struct mtk_soc_data mt2701_data = {
@@ -5226,7 +5225,7 @@ MODULE_DEVICE_TABLE(of, of_mtk_match);
static struct platform_driver mtk_driver = {
.probe = mtk_probe,
- .remove = mtk_remove,
+ .remove_new = mtk_remove,
.driver = {
.name = "mtk_soc_eth",
.of_match_table = of_mtk_match,
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index 403219d987ef..9ae3b8a71d0e 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -1132,7 +1132,7 @@ struct mtk_reg_map {
u32 gdm1_cnt;
u32 gdma_to_ppe;
u32 ppe_base;
- u32 wdma_base[2];
+ u32 wdma_base[3];
u32 pse_iq_sta;
u32 pse_oq_sta;
};
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c
index 86f32f486043..b2a5d9c3733d 100644
--- a/drivers/net/ethernet/mediatek/mtk_ppe.c
+++ b/drivers/net/ethernet/mediatek/mtk_ppe.c
@@ -425,7 +425,8 @@ int mtk_foe_entry_set_pppoe(struct mtk_eth *eth, struct mtk_foe_entry *entry,
}
int mtk_foe_entry_set_wdma(struct mtk_eth *eth, struct mtk_foe_entry *entry,
- int wdma_idx, int txq, int bss, int wcid)
+ int wdma_idx, int txq, int bss, int wcid,
+ bool amsdu_en)
{
struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(eth, entry);
u32 *ib2 = mtk_foe_entry_ib2(eth, entry);
@@ -437,6 +438,7 @@ int mtk_foe_entry_set_wdma(struct mtk_eth *eth, struct mtk_foe_entry *entry,
MTK_FOE_IB2_WDMA_WINFO_V2;
l2->w3info = FIELD_PREP(MTK_FOE_WINFO_WCID_V3, wcid) |
FIELD_PREP(MTK_FOE_WINFO_BSS_V3, bss);
+ l2->amsdu = FIELD_PREP(MTK_FOE_WINFO_AMSDU_EN, amsdu_en);
break;
case 2:
*ib2 &= ~MTK_FOE_IB2_PORT_MG_V2;
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.h b/drivers/net/ethernet/mediatek/mtk_ppe.h
index e3d0ec72bc69..691806bca372 100644
--- a/drivers/net/ethernet/mediatek/mtk_ppe.h
+++ b/drivers/net/ethernet/mediatek/mtk_ppe.h
@@ -88,13 +88,13 @@ enum {
#define MTK_FOE_WINFO_BSS_V3 GENMASK(23, 16)
#define MTK_FOE_WINFO_WCID_V3 GENMASK(15, 0)
-#define MTK_FOE_WINFO_PAO_USR_INFO GENMASK(15, 0)
-#define MTK_FOE_WINFO_PAO_TID GENMASK(19, 16)
-#define MTK_FOE_WINFO_PAO_IS_FIXEDRATE BIT(20)
-#define MTK_FOE_WINFO_PAO_IS_PRIOR BIT(21)
-#define MTK_FOE_WINFO_PAO_IS_SP BIT(22)
-#define MTK_FOE_WINFO_PAO_HF BIT(23)
-#define MTK_FOE_WINFO_PAO_AMSDU_EN BIT(24)
+#define MTK_FOE_WINFO_AMSDU_USR_INFO GENMASK(15, 0)
+#define MTK_FOE_WINFO_AMSDU_TID GENMASK(19, 16)
+#define MTK_FOE_WINFO_AMSDU_IS_FIXEDRATE BIT(20)
+#define MTK_FOE_WINFO_AMSDU_IS_PRIOR BIT(21)
+#define MTK_FOE_WINFO_AMSDU_IS_SP BIT(22)
+#define MTK_FOE_WINFO_AMSDU_HF BIT(23)
+#define MTK_FOE_WINFO_AMSDU_EN BIT(24)
enum {
MTK_FOE_STATE_INVALID,
@@ -123,7 +123,7 @@ struct mtk_foe_mac_info {
/* netsys_v3 */
u32 w3info;
- u32 wpao;
+ u32 amsdu;
};
/* software-only entry type */
@@ -392,7 +392,8 @@ int mtk_foe_entry_set_vlan(struct mtk_eth *eth, struct mtk_foe_entry *entry,
int mtk_foe_entry_set_pppoe(struct mtk_eth *eth, struct mtk_foe_entry *entry,
int sid);
int mtk_foe_entry_set_wdma(struct mtk_eth *eth, struct mtk_foe_entry *entry,
- int wdma_idx, int txq, int bss, int wcid);
+ int wdma_idx, int txq, int bss, int wcid,
+ bool amsdu_en);
int mtk_foe_entry_set_queue(struct mtk_eth *eth, struct mtk_foe_entry *entry,
unsigned int queue);
int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_flow_entry *entry);
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
index a4efbeb16208..e073d2b5542c 100644
--- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
+++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
@@ -111,6 +111,7 @@ mtk_flow_get_wdma_info(struct net_device *dev, const u8 *addr, struct mtk_wdma_i
info->queue = path->mtk_wdma.queue;
info->bss = path->mtk_wdma.bss;
info->wcid = path->mtk_wdma.wcid;
+ info->amsdu = path->mtk_wdma.amsdu;
return 0;
}
@@ -192,14 +193,17 @@ mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe,
if (mtk_flow_get_wdma_info(dev, dest_mac, &info) == 0) {
mtk_foe_entry_set_wdma(eth, foe, info.wdma_idx, info.queue,
- info.bss, info.wcid);
+ info.bss, info.wcid, info.amsdu);
if (mtk_is_netsys_v2_or_greater(eth)) {
switch (info.wdma_idx) {
case 0:
- pse_port = 8;
+ pse_port = PSE_WDMA0_PORT;
break;
case 1:
- pse_port = 9;
+ pse_port = PSE_WDMA1_PORT;
+ break;
+ case 2:
+ pse_port = PSE_WDMA2_PORT;
break;
default:
return -EINVAL;
diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c
index 94376aa2b34c..9a6744c0d458 100644
--- a/drivers/net/ethernet/mediatek/mtk_wed.c
+++ b/drivers/net/ethernet/mediatek/mtk_wed.c
@@ -17,17 +17,21 @@
#include <net/flow_offload.h>
#include <net/pkt_cls.h>
#include "mtk_eth_soc.h"
-#include "mtk_wed_regs.h"
#include "mtk_wed.h"
#include "mtk_ppe.h"
#include "mtk_wed_wo.h"
#define MTK_PCIE_BASE(n) (0x1a143000 + (n) * 0x2000)
-#define MTK_WED_PKT_SIZE 1900
+#define MTK_WED_PKT_SIZE 1920
#define MTK_WED_BUF_SIZE 2048
+#define MTK_WED_PAGE_BUF_SIZE 128
#define MTK_WED_BUF_PER_PAGE (PAGE_SIZE / 2048)
+#define MTK_WED_RX_BUF_PER_PAGE (PAGE_SIZE / MTK_WED_PAGE_BUF_SIZE)
#define MTK_WED_RX_RING_SIZE 1536
+#define MTK_WED_RX_PG_BM_CNT 8192
+#define MTK_WED_AMSDU_BUF_SIZE (PAGE_SIZE << 4)
+#define MTK_WED_AMSDU_NPAGES 32
#define MTK_WED_TX_RING_SIZE 2048
#define MTK_WED_WDMA_RING_SIZE 1024
@@ -41,7 +45,10 @@
#define MTK_WED_RRO_QUE_CNT 8192
#define MTK_WED_MIOD_ENTRY_CNT 128
-static struct mtk_wed_hw *hw_list[2];
+#define MTK_WED_TX_BM_DMA_SIZE 65536
+#define MTK_WED_TX_BM_PKT_CNT 32768
+
+static struct mtk_wed_hw *hw_list[3];
static DEFINE_MUTEX(hw_lock);
struct mtk_wed_flow_block_priv {
@@ -49,6 +56,39 @@ struct mtk_wed_flow_block_priv {
struct net_device *dev;
};
+static const struct mtk_wed_soc_data mt7622_data = {
+ .regmap = {
+ .tx_bm_tkid = 0x088,
+ .wpdma_rx_ring0 = 0x770,
+ .reset_idx_tx_mask = GENMASK(3, 0),
+ .reset_idx_rx_mask = GENMASK(17, 16),
+ },
+ .tx_ring_desc_size = sizeof(struct mtk_wdma_desc),
+ .wdma_desc_size = sizeof(struct mtk_wdma_desc),
+};
+
+static const struct mtk_wed_soc_data mt7986_data = {
+ .regmap = {
+ .tx_bm_tkid = 0x0c8,
+ .wpdma_rx_ring0 = 0x770,
+ .reset_idx_tx_mask = GENMASK(1, 0),
+ .reset_idx_rx_mask = GENMASK(7, 6),
+ },
+ .tx_ring_desc_size = sizeof(struct mtk_wdma_desc),
+ .wdma_desc_size = 2 * sizeof(struct mtk_wdma_desc),
+};
+
+static const struct mtk_wed_soc_data mt7988_data = {
+ .regmap = {
+ .tx_bm_tkid = 0x0c8,
+ .wpdma_rx_ring0 = 0x7d0,
+ .reset_idx_tx_mask = GENMASK(1, 0),
+ .reset_idx_rx_mask = GENMASK(7, 6),
+ },
+ .tx_ring_desc_size = sizeof(struct mtk_wed_bm_desc),
+ .wdma_desc_size = 2 * sizeof(struct mtk_wdma_desc),
+};
+
static void
wed_m32(struct mtk_wed_device *dev, u32 reg, u32 mask, u32 val)
{
@@ -109,6 +149,90 @@ mtk_wdma_read_reset(struct mtk_wed_device *dev)
return wdma_r32(dev, MTK_WDMA_GLO_CFG);
}
+static void
+mtk_wdma_v3_rx_reset(struct mtk_wed_device *dev)
+{
+ u32 status;
+
+ if (!mtk_wed_is_v3_or_greater(dev->hw))
+ return;
+
+ wdma_clr(dev, MTK_WDMA_PREF_TX_CFG, MTK_WDMA_PREF_TX_CFG_PREF_EN);
+ wdma_clr(dev, MTK_WDMA_PREF_RX_CFG, MTK_WDMA_PREF_RX_CFG_PREF_EN);
+
+ if (read_poll_timeout(wdma_r32, status,
+ !(status & MTK_WDMA_PREF_TX_CFG_PREF_BUSY),
+ 0, 10000, false, dev, MTK_WDMA_PREF_TX_CFG))
+ dev_err(dev->hw->dev, "rx reset failed\n");
+
+ if (read_poll_timeout(wdma_r32, status,
+ !(status & MTK_WDMA_PREF_RX_CFG_PREF_BUSY),
+ 0, 10000, false, dev, MTK_WDMA_PREF_RX_CFG))
+ dev_err(dev->hw->dev, "rx reset failed\n");
+
+ wdma_clr(dev, MTK_WDMA_WRBK_TX_CFG, MTK_WDMA_WRBK_TX_CFG_WRBK_EN);
+ wdma_clr(dev, MTK_WDMA_WRBK_RX_CFG, MTK_WDMA_WRBK_RX_CFG_WRBK_EN);
+
+ if (read_poll_timeout(wdma_r32, status,
+ !(status & MTK_WDMA_WRBK_TX_CFG_WRBK_BUSY),
+ 0, 10000, false, dev, MTK_WDMA_WRBK_TX_CFG))
+ dev_err(dev->hw->dev, "rx reset failed\n");
+
+ if (read_poll_timeout(wdma_r32, status,
+ !(status & MTK_WDMA_WRBK_RX_CFG_WRBK_BUSY),
+ 0, 10000, false, dev, MTK_WDMA_WRBK_RX_CFG))
+ dev_err(dev->hw->dev, "rx reset failed\n");
+
+ /* prefetch FIFO */
+ wdma_w32(dev, MTK_WDMA_PREF_RX_FIFO_CFG,
+ MTK_WDMA_PREF_RX_FIFO_CFG_RING0_CLEAR |
+ MTK_WDMA_PREF_RX_FIFO_CFG_RING1_CLEAR);
+ wdma_clr(dev, MTK_WDMA_PREF_RX_FIFO_CFG,
+ MTK_WDMA_PREF_RX_FIFO_CFG_RING0_CLEAR |
+ MTK_WDMA_PREF_RX_FIFO_CFG_RING1_CLEAR);
+
+ /* core FIFO */
+ wdma_w32(dev, MTK_WDMA_XDMA_RX_FIFO_CFG,
+ MTK_WDMA_XDMA_RX_FIFO_CFG_RX_PAR_FIFO_CLEAR |
+ MTK_WDMA_XDMA_RX_FIFO_CFG_RX_CMD_FIFO_CLEAR |
+ MTK_WDMA_XDMA_RX_FIFO_CFG_RX_DMAD_FIFO_CLEAR |
+ MTK_WDMA_XDMA_RX_FIFO_CFG_RX_ARR_FIFO_CLEAR |
+ MTK_WDMA_XDMA_RX_FIFO_CFG_RX_LEN_FIFO_CLEAR |
+ MTK_WDMA_XDMA_RX_FIFO_CFG_RX_WID_FIFO_CLEAR |
+ MTK_WDMA_XDMA_RX_FIFO_CFG_RX_BID_FIFO_CLEAR);
+ wdma_clr(dev, MTK_WDMA_XDMA_RX_FIFO_CFG,
+ MTK_WDMA_XDMA_RX_FIFO_CFG_RX_PAR_FIFO_CLEAR |
+ MTK_WDMA_XDMA_RX_FIFO_CFG_RX_CMD_FIFO_CLEAR |
+ MTK_WDMA_XDMA_RX_FIFO_CFG_RX_DMAD_FIFO_CLEAR |
+ MTK_WDMA_XDMA_RX_FIFO_CFG_RX_ARR_FIFO_CLEAR |
+ MTK_WDMA_XDMA_RX_FIFO_CFG_RX_LEN_FIFO_CLEAR |
+ MTK_WDMA_XDMA_RX_FIFO_CFG_RX_WID_FIFO_CLEAR |
+ MTK_WDMA_XDMA_RX_FIFO_CFG_RX_BID_FIFO_CLEAR);
+
+ /* writeback FIFO */
+ wdma_w32(dev, MTK_WDMA_WRBK_RX_FIFO_CFG(0),
+ MTK_WDMA_WRBK_RX_FIFO_CFG_RING_CLEAR);
+ wdma_w32(dev, MTK_WDMA_WRBK_RX_FIFO_CFG(1),
+ MTK_WDMA_WRBK_RX_FIFO_CFG_RING_CLEAR);
+
+ wdma_clr(dev, MTK_WDMA_WRBK_RX_FIFO_CFG(0),
+ MTK_WDMA_WRBK_RX_FIFO_CFG_RING_CLEAR);
+ wdma_clr(dev, MTK_WDMA_WRBK_RX_FIFO_CFG(1),
+ MTK_WDMA_WRBK_RX_FIFO_CFG_RING_CLEAR);
+
+ /* prefetch ring status */
+ wdma_w32(dev, MTK_WDMA_PREF_SIDX_CFG,
+ MTK_WDMA_PREF_SIDX_CFG_RX_RING_CLEAR);
+ wdma_clr(dev, MTK_WDMA_PREF_SIDX_CFG,
+ MTK_WDMA_PREF_SIDX_CFG_RX_RING_CLEAR);
+
+ /* writeback ring status */
+ wdma_w32(dev, MTK_WDMA_WRBK_SIDX_CFG,
+ MTK_WDMA_WRBK_SIDX_CFG_RX_RING_CLEAR);
+ wdma_clr(dev, MTK_WDMA_WRBK_SIDX_CFG,
+ MTK_WDMA_WRBK_SIDX_CFG_RX_RING_CLEAR);
+}
+
static int
mtk_wdma_rx_reset(struct mtk_wed_device *dev)
{
@@ -121,6 +245,7 @@ mtk_wdma_rx_reset(struct mtk_wed_device *dev)
if (ret)
dev_err(dev->hw->dev, "rx reset failed\n");
+ mtk_wdma_v3_rx_reset(dev);
wdma_w32(dev, MTK_WDMA_RESET_IDX, MTK_WDMA_RESET_IDX_RX);
wdma_w32(dev, MTK_WDMA_RESET_IDX, 0);
@@ -135,6 +260,101 @@ mtk_wdma_rx_reset(struct mtk_wed_device *dev)
return ret;
}
+static u32
+mtk_wed_check_busy(struct mtk_wed_device *dev, u32 reg, u32 mask)
+{
+ return !!(wed_r32(dev, reg) & mask);
+}
+
+static int
+mtk_wed_poll_busy(struct mtk_wed_device *dev, u32 reg, u32 mask)
+{
+ int sleep = 15000;
+ int timeout = 100 * sleep;
+ u32 val;
+
+ return read_poll_timeout(mtk_wed_check_busy, val, !val, sleep,
+ timeout, false, dev, reg, mask);
+}
+
+static void
+mtk_wdma_v3_tx_reset(struct mtk_wed_device *dev)
+{
+ u32 status;
+
+ if (!mtk_wed_is_v3_or_greater(dev->hw))
+ return;
+
+ wdma_clr(dev, MTK_WDMA_PREF_TX_CFG, MTK_WDMA_PREF_TX_CFG_PREF_EN);
+ wdma_clr(dev, MTK_WDMA_PREF_RX_CFG, MTK_WDMA_PREF_RX_CFG_PREF_EN);
+
+ if (read_poll_timeout(wdma_r32, status,
+ !(status & MTK_WDMA_PREF_TX_CFG_PREF_BUSY),
+ 0, 10000, false, dev, MTK_WDMA_PREF_TX_CFG))
+ dev_err(dev->hw->dev, "tx reset failed\n");
+
+ if (read_poll_timeout(wdma_r32, status,
+ !(status & MTK_WDMA_PREF_RX_CFG_PREF_BUSY),
+ 0, 10000, false, dev, MTK_WDMA_PREF_RX_CFG))
+ dev_err(dev->hw->dev, "tx reset failed\n");
+
+ wdma_clr(dev, MTK_WDMA_WRBK_TX_CFG, MTK_WDMA_WRBK_TX_CFG_WRBK_EN);
+ wdma_clr(dev, MTK_WDMA_WRBK_RX_CFG, MTK_WDMA_WRBK_RX_CFG_WRBK_EN);
+
+ if (read_poll_timeout(wdma_r32, status,
+ !(status & MTK_WDMA_WRBK_TX_CFG_WRBK_BUSY),
+ 0, 10000, false, dev, MTK_WDMA_WRBK_TX_CFG))
+ dev_err(dev->hw->dev, "tx reset failed\n");
+
+ if (read_poll_timeout(wdma_r32, status,
+ !(status & MTK_WDMA_WRBK_RX_CFG_WRBK_BUSY),
+ 0, 10000, false, dev, MTK_WDMA_WRBK_RX_CFG))
+ dev_err(dev->hw->dev, "tx reset failed\n");
+
+ /* prefetch FIFO */
+ wdma_w32(dev, MTK_WDMA_PREF_TX_FIFO_CFG,
+ MTK_WDMA_PREF_TX_FIFO_CFG_RING0_CLEAR |
+ MTK_WDMA_PREF_TX_FIFO_CFG_RING1_CLEAR);
+ wdma_clr(dev, MTK_WDMA_PREF_TX_FIFO_CFG,
+ MTK_WDMA_PREF_TX_FIFO_CFG_RING0_CLEAR |
+ MTK_WDMA_PREF_TX_FIFO_CFG_RING1_CLEAR);
+
+ /* core FIFO */
+ wdma_w32(dev, MTK_WDMA_XDMA_TX_FIFO_CFG,
+ MTK_WDMA_XDMA_TX_FIFO_CFG_TX_PAR_FIFO_CLEAR |
+ MTK_WDMA_XDMA_TX_FIFO_CFG_TX_CMD_FIFO_CLEAR |
+ MTK_WDMA_XDMA_TX_FIFO_CFG_TX_DMAD_FIFO_CLEAR |
+ MTK_WDMA_XDMA_TX_FIFO_CFG_TX_ARR_FIFO_CLEAR);
+ wdma_clr(dev, MTK_WDMA_XDMA_TX_FIFO_CFG,
+ MTK_WDMA_XDMA_TX_FIFO_CFG_TX_PAR_FIFO_CLEAR |
+ MTK_WDMA_XDMA_TX_FIFO_CFG_TX_CMD_FIFO_CLEAR |
+ MTK_WDMA_XDMA_TX_FIFO_CFG_TX_DMAD_FIFO_CLEAR |
+ MTK_WDMA_XDMA_TX_FIFO_CFG_TX_ARR_FIFO_CLEAR);
+
+ /* writeback FIFO */
+ wdma_w32(dev, MTK_WDMA_WRBK_TX_FIFO_CFG(0),
+ MTK_WDMA_WRBK_TX_FIFO_CFG_RING_CLEAR);
+ wdma_w32(dev, MTK_WDMA_WRBK_TX_FIFO_CFG(1),
+ MTK_WDMA_WRBK_TX_FIFO_CFG_RING_CLEAR);
+
+ wdma_clr(dev, MTK_WDMA_WRBK_TX_FIFO_CFG(0),
+ MTK_WDMA_WRBK_TX_FIFO_CFG_RING_CLEAR);
+ wdma_clr(dev, MTK_WDMA_WRBK_TX_FIFO_CFG(1),
+ MTK_WDMA_WRBK_TX_FIFO_CFG_RING_CLEAR);
+
+ /* prefetch ring status */
+ wdma_w32(dev, MTK_WDMA_PREF_SIDX_CFG,
+ MTK_WDMA_PREF_SIDX_CFG_TX_RING_CLEAR);
+ wdma_clr(dev, MTK_WDMA_PREF_SIDX_CFG,
+ MTK_WDMA_PREF_SIDX_CFG_TX_RING_CLEAR);
+
+ /* writeback ring status */
+ wdma_w32(dev, MTK_WDMA_WRBK_SIDX_CFG,
+ MTK_WDMA_WRBK_SIDX_CFG_TX_RING_CLEAR);
+ wdma_clr(dev, MTK_WDMA_WRBK_SIDX_CFG,
+ MTK_WDMA_WRBK_SIDX_CFG_TX_RING_CLEAR);
+}
+
static void
mtk_wdma_tx_reset(struct mtk_wed_device *dev)
{
@@ -146,6 +366,7 @@ mtk_wdma_tx_reset(struct mtk_wed_device *dev)
!(status & mask), 0, 10000))
dev_err(dev->hw->dev, "tx reset failed\n");
+ mtk_wdma_v3_tx_reset(dev);
wdma_w32(dev, MTK_WDMA_RESET_IDX, MTK_WDMA_RESET_IDX_TX);
wdma_w32(dev, MTK_WDMA_RESET_IDX, 0);
@@ -278,7 +499,7 @@ mtk_wed_assign(struct mtk_wed_device *dev)
if (!hw->wed_dev)
goto out;
- if (hw->version == 1)
+ if (mtk_wed_is_v1(hw))
return NULL;
/* MT7986 WED devices do not have any pcie slot restrictions */
@@ -298,35 +519,152 @@ out:
}
static int
+mtk_wed_amsdu_buffer_alloc(struct mtk_wed_device *dev)
+{
+ struct mtk_wed_hw *hw = dev->hw;
+ struct mtk_wed_amsdu *wed_amsdu;
+ int i;
+
+ if (!mtk_wed_is_v3_or_greater(hw))
+ return 0;
+
+ wed_amsdu = devm_kcalloc(hw->dev, MTK_WED_AMSDU_NPAGES,
+ sizeof(*wed_amsdu), GFP_KERNEL);
+ if (!wed_amsdu)
+ return -ENOMEM;
+
+ for (i = 0; i < MTK_WED_AMSDU_NPAGES; i++) {
+ void *ptr;
+
+ /* each segment is 64K */
+ ptr = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN |
+ __GFP_ZERO | __GFP_COMP |
+ GFP_DMA32,
+ get_order(MTK_WED_AMSDU_BUF_SIZE));
+ if (!ptr)
+ goto error;
+
+ wed_amsdu[i].txd = ptr;
+ wed_amsdu[i].txd_phy = dma_map_single(hw->dev, ptr,
+ MTK_WED_AMSDU_BUF_SIZE,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(hw->dev, wed_amsdu[i].txd_phy))
+ goto error;
+ }
+ dev->hw->wed_amsdu = wed_amsdu;
+
+ return 0;
+
+error:
+ for (i--; i >= 0; i--)
+ dma_unmap_single(hw->dev, wed_amsdu[i].txd_phy,
+ MTK_WED_AMSDU_BUF_SIZE, DMA_TO_DEVICE);
+ return -ENOMEM;
+}
+
+static void
+mtk_wed_amsdu_free_buffer(struct mtk_wed_device *dev)
+{
+ struct mtk_wed_amsdu *wed_amsdu = dev->hw->wed_amsdu;
+ int i;
+
+ if (!wed_amsdu)
+ return;
+
+ for (i = 0; i < MTK_WED_AMSDU_NPAGES; i++) {
+ dma_unmap_single(dev->hw->dev, wed_amsdu[i].txd_phy,
+ MTK_WED_AMSDU_BUF_SIZE, DMA_TO_DEVICE);
+ free_pages((unsigned long)wed_amsdu[i].txd,
+ get_order(MTK_WED_AMSDU_BUF_SIZE));
+ }
+}
+
+static int
+mtk_wed_amsdu_init(struct mtk_wed_device *dev)
+{
+ struct mtk_wed_amsdu *wed_amsdu = dev->hw->wed_amsdu;
+ int i, ret;
+
+ if (!wed_amsdu)
+ return 0;
+
+ for (i = 0; i < MTK_WED_AMSDU_NPAGES; i++)
+ wed_w32(dev, MTK_WED_AMSDU_HIFTXD_BASE_L(i),
+ wed_amsdu[i].txd_phy);
+
+ /* init all sta parameter */
+ wed_w32(dev, MTK_WED_AMSDU_STA_INFO_INIT, MTK_WED_AMSDU_STA_RMVL |
+ MTK_WED_AMSDU_STA_WTBL_HDRT_MODE |
+ FIELD_PREP(MTK_WED_AMSDU_STA_MAX_AMSDU_LEN,
+ dev->wlan.amsdu_max_len >> 8) |
+ FIELD_PREP(MTK_WED_AMSDU_STA_MAX_AMSDU_NUM,
+ dev->wlan.amsdu_max_subframes));
+
+ wed_w32(dev, MTK_WED_AMSDU_STA_INFO, MTK_WED_AMSDU_STA_INFO_DO_INIT);
+
+ ret = mtk_wed_poll_busy(dev, MTK_WED_AMSDU_STA_INFO,
+ MTK_WED_AMSDU_STA_INFO_DO_INIT);
+ if (ret) {
+ dev_err(dev->hw->dev, "amsdu initialization failed\n");
+ return ret;
+ }
+
+ /* init partial amsdu offload txd src */
+ wed_set(dev, MTK_WED_AMSDU_HIFTXD_CFG,
+ FIELD_PREP(MTK_WED_AMSDU_HIFTXD_SRC, dev->hw->index));
+
+ /* init qmem */
+ wed_set(dev, MTK_WED_AMSDU_PSE, MTK_WED_AMSDU_PSE_RESET);
+ ret = mtk_wed_poll_busy(dev, MTK_WED_MON_AMSDU_QMEM_STS1, BIT(29));
+ if (ret) {
+ pr_info("%s: amsdu qmem initialization failed\n", __func__);
+ return ret;
+ }
+
+ /* eagle E1 PCIE1 tx ring 22 flow control issue */
+ if (dev->wlan.id == 0x7991)
+ wed_clr(dev, MTK_WED_AMSDU_FIFO, MTK_WED_AMSDU_IS_PRIOR0_RING);
+
+ wed_set(dev, MTK_WED_CTRL, MTK_WED_CTRL_TX_AMSDU_EN);
+
+ return 0;
+}
+
+static int
mtk_wed_tx_buffer_alloc(struct mtk_wed_device *dev)
{
- struct mtk_wdma_desc *desc;
- dma_addr_t desc_phys;
- void **page_list;
+ u32 desc_size = dev->hw->soc->tx_ring_desc_size;
+ int i, page_idx = 0, n_pages, ring_size;
int token = dev->wlan.token_start;
- int ring_size;
- int n_pages;
- int i, page_idx;
+ struct mtk_wed_buf *page_list;
+ dma_addr_t desc_phys;
+ void *desc_ptr;
- ring_size = dev->wlan.nbuf & ~(MTK_WED_BUF_PER_PAGE - 1);
- n_pages = ring_size / MTK_WED_BUF_PER_PAGE;
+ if (!mtk_wed_is_v3_or_greater(dev->hw)) {
+ ring_size = dev->wlan.nbuf & ~(MTK_WED_BUF_PER_PAGE - 1);
+ dev->tx_buf_ring.size = ring_size;
+ } else {
+ dev->tx_buf_ring.size = MTK_WED_TX_BM_DMA_SIZE;
+ ring_size = MTK_WED_TX_BM_PKT_CNT;
+ }
+ n_pages = dev->tx_buf_ring.size / MTK_WED_BUF_PER_PAGE;
page_list = kcalloc(n_pages, sizeof(*page_list), GFP_KERNEL);
if (!page_list)
return -ENOMEM;
- dev->tx_buf_ring.size = ring_size;
dev->tx_buf_ring.pages = page_list;
- desc = dma_alloc_coherent(dev->hw->dev, ring_size * sizeof(*desc),
- &desc_phys, GFP_KERNEL);
- if (!desc)
+ desc_ptr = dma_alloc_coherent(dev->hw->dev,
+ dev->tx_buf_ring.size * desc_size,
+ &desc_phys, GFP_KERNEL);
+ if (!desc_ptr)
return -ENOMEM;
- dev->tx_buf_ring.desc = desc;
+ dev->tx_buf_ring.desc = desc_ptr;
dev->tx_buf_ring.desc_phys = desc_phys;
- for (i = 0, page_idx = 0; i < ring_size; i += MTK_WED_BUF_PER_PAGE) {
+ for (i = 0; i < ring_size; i += MTK_WED_BUF_PER_PAGE) {
dma_addr_t page_phys, buf_phys;
struct page *page;
void *buf;
@@ -343,7 +681,8 @@ mtk_wed_tx_buffer_alloc(struct mtk_wed_device *dev)
return -ENOMEM;
}
- page_list[page_idx++] = page;
+ page_list[page_idx].p = page;
+ page_list[page_idx++].phy_addr = page_phys;
dma_sync_single_for_cpu(dev->hw->dev, page_phys, PAGE_SIZE,
DMA_BIDIRECTIONAL);
@@ -351,28 +690,31 @@ mtk_wed_tx_buffer_alloc(struct mtk_wed_device *dev)
buf_phys = page_phys;
for (s = 0; s < MTK_WED_BUF_PER_PAGE; s++) {
- u32 txd_size;
- u32 ctrl;
-
- txd_size = dev->wlan.init_buf(buf, buf_phys, token++);
+ struct mtk_wdma_desc *desc = desc_ptr;
desc->buf0 = cpu_to_le32(buf_phys);
- desc->buf1 = cpu_to_le32(buf_phys + txd_size);
-
- if (dev->hw->version == 1)
- ctrl = FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN0, txd_size) |
- FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN1,
- MTK_WED_BUF_SIZE - txd_size) |
- MTK_WDMA_DESC_CTRL_LAST_SEG1;
- else
- ctrl = FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN0, txd_size) |
- FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN1_V2,
- MTK_WED_BUF_SIZE - txd_size) |
- MTK_WDMA_DESC_CTRL_LAST_SEG0;
- desc->ctrl = cpu_to_le32(ctrl);
- desc->info = 0;
- desc++;
-
+ if (!mtk_wed_is_v3_or_greater(dev->hw)) {
+ u32 txd_size, ctrl;
+
+ txd_size = dev->wlan.init_buf(buf, buf_phys,
+ token++);
+ desc->buf1 = cpu_to_le32(buf_phys + txd_size);
+ ctrl = FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN0, txd_size);
+ if (mtk_wed_is_v1(dev->hw))
+ ctrl |= MTK_WDMA_DESC_CTRL_LAST_SEG1 |
+ FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN1,
+ MTK_WED_BUF_SIZE - txd_size);
+ else
+ ctrl |= MTK_WDMA_DESC_CTRL_LAST_SEG0 |
+ FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN1_V2,
+ MTK_WED_BUF_SIZE - txd_size);
+ desc->ctrl = cpu_to_le32(ctrl);
+ desc->info = 0;
+ } else {
+ desc->ctrl = cpu_to_le32(token << 16);
+ }
+
+ desc_ptr += desc_size;
buf += MTK_WED_BUF_SIZE;
buf_phys += MTK_WED_BUF_SIZE;
}
@@ -387,42 +729,103 @@ mtk_wed_tx_buffer_alloc(struct mtk_wed_device *dev)
static void
mtk_wed_free_tx_buffer(struct mtk_wed_device *dev)
{
- struct mtk_wdma_desc *desc = dev->tx_buf_ring.desc;
- void **page_list = dev->tx_buf_ring.pages;
- int page_idx;
- int i;
+ struct mtk_wed_buf *page_list = dev->tx_buf_ring.pages;
+ struct mtk_wed_hw *hw = dev->hw;
+ int i, page_idx = 0;
if (!page_list)
return;
- if (!desc)
+ if (!dev->tx_buf_ring.desc)
goto free_pagelist;
- for (i = 0, page_idx = 0; i < dev->tx_buf_ring.size;
- i += MTK_WED_BUF_PER_PAGE) {
- void *page = page_list[page_idx++];
- dma_addr_t buf_addr;
+ for (i = 0; i < dev->tx_buf_ring.size; i += MTK_WED_BUF_PER_PAGE) {
+ dma_addr_t page_phy = page_list[page_idx].phy_addr;
+ void *page = page_list[page_idx++].p;
if (!page)
break;
- buf_addr = le32_to_cpu(desc[i].buf0);
- dma_unmap_page(dev->hw->dev, buf_addr, PAGE_SIZE,
+ dma_unmap_page(dev->hw->dev, page_phy, PAGE_SIZE,
DMA_BIDIRECTIONAL);
__free_page(page);
}
- dma_free_coherent(dev->hw->dev, dev->tx_buf_ring.size * sizeof(*desc),
- desc, dev->tx_buf_ring.desc_phys);
+ dma_free_coherent(dev->hw->dev,
+ dev->tx_buf_ring.size * hw->soc->tx_ring_desc_size,
+ dev->tx_buf_ring.desc,
+ dev->tx_buf_ring.desc_phys);
free_pagelist:
kfree(page_list);
}
static int
+mtk_wed_hwrro_buffer_alloc(struct mtk_wed_device *dev)
+{
+ int n_pages = MTK_WED_RX_PG_BM_CNT / MTK_WED_RX_BUF_PER_PAGE;
+ struct mtk_wed_buf *page_list;
+ struct mtk_wed_bm_desc *desc;
+ dma_addr_t desc_phys;
+ int i, page_idx = 0;
+
+ if (!dev->wlan.hw_rro)
+ return 0;
+
+ page_list = kcalloc(n_pages, sizeof(*page_list), GFP_KERNEL);
+ if (!page_list)
+ return -ENOMEM;
+
+ dev->hw_rro.size = dev->wlan.rx_nbuf & ~(MTK_WED_BUF_PER_PAGE - 1);
+ dev->hw_rro.pages = page_list;
+ desc = dma_alloc_coherent(dev->hw->dev,
+ dev->wlan.rx_nbuf * sizeof(*desc),
+ &desc_phys, GFP_KERNEL);
+ if (!desc)
+ return -ENOMEM;
+
+ dev->hw_rro.desc = desc;
+ dev->hw_rro.desc_phys = desc_phys;
+
+ for (i = 0; i < MTK_WED_RX_PG_BM_CNT; i += MTK_WED_RX_BUF_PER_PAGE) {
+ dma_addr_t page_phys, buf_phys;
+ struct page *page;
+ int s;
+
+ page = __dev_alloc_page(GFP_KERNEL);
+ if (!page)
+ return -ENOMEM;
+
+ page_phys = dma_map_page(dev->hw->dev, page, 0, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(dev->hw->dev, page_phys)) {
+ __free_page(page);
+ return -ENOMEM;
+ }
+
+ page_list[page_idx].p = page;
+ page_list[page_idx++].phy_addr = page_phys;
+ dma_sync_single_for_cpu(dev->hw->dev, page_phys, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+
+ buf_phys = page_phys;
+ for (s = 0; s < MTK_WED_RX_BUF_PER_PAGE; s++) {
+ desc->buf0 = cpu_to_le32(buf_phys);
+ buf_phys += MTK_WED_PAGE_BUF_SIZE;
+ desc++;
+ }
+
+ dma_sync_single_for_device(dev->hw->dev, page_phys, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ }
+
+ return 0;
+}
+
+static int
mtk_wed_rx_buffer_alloc(struct mtk_wed_device *dev)
{
- struct mtk_rxbm_desc *desc;
+ struct mtk_wed_bm_desc *desc;
dma_addr_t desc_phys;
dev->rx_buf_ring.size = dev->wlan.rx_nbuf;
@@ -436,13 +839,48 @@ mtk_wed_rx_buffer_alloc(struct mtk_wed_device *dev)
dev->rx_buf_ring.desc_phys = desc_phys;
dev->wlan.init_rx_buf(dev, dev->wlan.rx_npkt);
- return 0;
+ return mtk_wed_hwrro_buffer_alloc(dev);
+}
+
+static void
+mtk_wed_hwrro_free_buffer(struct mtk_wed_device *dev)
+{
+ struct mtk_wed_buf *page_list = dev->hw_rro.pages;
+ struct mtk_wed_bm_desc *desc = dev->hw_rro.desc;
+ int i, page_idx = 0;
+
+ if (!dev->wlan.hw_rro)
+ return;
+
+ if (!page_list)
+ return;
+
+ if (!desc)
+ goto free_pagelist;
+
+ for (i = 0; i < MTK_WED_RX_PG_BM_CNT; i += MTK_WED_RX_BUF_PER_PAGE) {
+ dma_addr_t buf_addr = page_list[page_idx].phy_addr;
+ void *page = page_list[page_idx++].p;
+
+ if (!page)
+ break;
+
+ dma_unmap_page(dev->hw->dev, buf_addr, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ __free_page(page);
+ }
+
+ dma_free_coherent(dev->hw->dev, dev->hw_rro.size * sizeof(*desc),
+ desc, dev->hw_rro.desc_phys);
+
+free_pagelist:
+ kfree(page_list);
}
static void
mtk_wed_free_rx_buffer(struct mtk_wed_device *dev)
{
- struct mtk_rxbm_desc *desc = dev->rx_buf_ring.desc;
+ struct mtk_wed_bm_desc *desc = dev->rx_buf_ring.desc;
if (!desc)
return;
@@ -450,6 +888,28 @@ mtk_wed_free_rx_buffer(struct mtk_wed_device *dev)
dev->wlan.release_rx_buf(dev);
dma_free_coherent(dev->hw->dev, dev->rx_buf_ring.size * sizeof(*desc),
desc, dev->rx_buf_ring.desc_phys);
+
+ mtk_wed_hwrro_free_buffer(dev);
+}
+
+static void
+mtk_wed_hwrro_init(struct mtk_wed_device *dev)
+{
+ if (!mtk_wed_get_rx_capa(dev) || !dev->wlan.hw_rro)
+ return;
+
+ wed_set(dev, MTK_WED_RRO_PG_BM_RX_DMAM,
+ FIELD_PREP(MTK_WED_RRO_PG_BM_RX_SDL0, 128));
+
+ wed_w32(dev, MTK_WED_RRO_PG_BM_BASE, dev->hw_rro.desc_phys);
+
+ wed_w32(dev, MTK_WED_RRO_PG_BM_INIT_PTR,
+ MTK_WED_RRO_PG_BM_INIT_SW_TAIL_IDX |
+ FIELD_PREP(MTK_WED_RRO_PG_BM_SW_TAIL_IDX,
+ MTK_WED_RX_PG_BM_CNT));
+
+ /* enable rx_page_bm to fetch dmad */
+ wed_set(dev, MTK_WED_CTRL, MTK_WED_CTRL_WED_RX_PG_BM_EN);
}
static void
@@ -463,6 +923,8 @@ mtk_wed_rx_buffer_hw_init(struct mtk_wed_device *dev)
wed_w32(dev, MTK_WED_RX_BM_DYN_ALLOC_TH,
FIELD_PREP(MTK_WED_RX_BM_DYN_ALLOC_TH_H, 0xffff));
wed_set(dev, MTK_WED_CTRL, MTK_WED_CTRL_WED_RX_BM_EN);
+
+ mtk_wed_hwrro_init(dev);
}
static void
@@ -498,13 +960,23 @@ mtk_wed_set_ext_int(struct mtk_wed_device *dev, bool en)
{
u32 mask = MTK_WED_EXT_INT_STATUS_ERROR_MASK;
- if (dev->hw->version == 1)
+ switch (dev->hw->version) {
+ case 1:
mask |= MTK_WED_EXT_INT_STATUS_TX_DRV_R_RESP_ERR;
- else
+ break;
+ case 2:
mask |= MTK_WED_EXT_INT_STATUS_RX_FBUF_LO_TH |
MTK_WED_EXT_INT_STATUS_RX_FBUF_HI_TH |
MTK_WED_EXT_INT_STATUS_RX_DRV_COHERENT |
MTK_WED_EXT_INT_STATUS_TX_DMA_W_RESP_ERR;
+ break;
+ case 3:
+ mask = MTK_WED_EXT_INT_STATUS_RX_DRV_COHERENT |
+ MTK_WED_EXT_INT_STATUS_TKID_WO_PYLD;
+ break;
+ default:
+ break;
+ }
if (!dev->hw->num_flows)
mask &= ~MTK_WED_EXT_INT_STATUS_TKID_WO_PYLD;
@@ -516,6 +988,9 @@ mtk_wed_set_ext_int(struct mtk_wed_device *dev, bool en)
static void
mtk_wed_set_512_support(struct mtk_wed_device *dev, bool enable)
{
+ if (!mtk_wed_is_v2(dev->hw))
+ return;
+
if (enable) {
wed_w32(dev, MTK_WED_TXDP_CTRL, MTK_WED_TXDP_DW9_OVERWR);
wed_w32(dev, MTK_WED_TXP_DW1,
@@ -527,22 +1002,15 @@ mtk_wed_set_512_support(struct mtk_wed_device *dev, bool enable)
}
}
-#define MTK_WFMDA_RX_DMA_EN BIT(2)
-static void
-mtk_wed_check_wfdma_rx_fill(struct mtk_wed_device *dev, int idx)
+static int
+mtk_wed_check_wfdma_rx_fill(struct mtk_wed_device *dev,
+ struct mtk_wed_ring *ring)
{
- u32 val;
int i;
- if (!(dev->rx_ring[idx].flags & MTK_WED_RING_CONFIGURED))
- return; /* queue is not configured by mt76 */
-
for (i = 0; i < 3; i++) {
- u32 cur_idx;
+ u32 cur_idx = readl(ring->wpdma + MTK_WED_RING_OFS_CPU_IDX);
- cur_idx = wed_r32(dev,
- MTK_WED_WPDMA_RING_RX_DATA(idx) +
- MTK_WED_RING_OFS_CPU_IDX);
if (cur_idx == MTK_WED_RX_RING_SIZE - 1)
break;
@@ -551,12 +1019,10 @@ mtk_wed_check_wfdma_rx_fill(struct mtk_wed_device *dev, int idx)
if (i == 3) {
dev_err(dev->hw->dev, "rx dma enable failed\n");
- return;
+ return -ETIMEDOUT;
}
- val = wifi_r32(dev, dev->wlan.wpdma_rx_glo - dev->wlan.phy_base) |
- MTK_WFMDA_RX_DMA_EN;
- wifi_w32(dev, dev->wlan.wpdma_rx_glo - dev->wlan.phy_base, val);
+ return 0;
}
static void
@@ -577,7 +1043,7 @@ mtk_wed_dma_disable(struct mtk_wed_device *dev)
MTK_WDMA_GLO_CFG_RX_INFO1_PRERES |
MTK_WDMA_GLO_CFG_RX_INFO2_PRERES);
- if (dev->hw->version == 1) {
+ if (mtk_wed_is_v1(dev->hw)) {
regmap_write(dev->hw->mirror, dev->hw->index * 4, 0);
wdma_clr(dev, MTK_WDMA_GLO_CFG,
MTK_WDMA_GLO_CFG_RX_INFO3_PRERES);
@@ -590,6 +1056,14 @@ mtk_wed_dma_disable(struct mtk_wed_device *dev)
MTK_WED_WPDMA_RX_D_RX_DRV_EN);
wed_clr(dev, MTK_WED_WDMA_GLO_CFG,
MTK_WED_WDMA_GLO_CFG_TX_DDONE_CHK);
+
+ if (mtk_wed_is_v3_or_greater(dev->hw) &&
+ mtk_wed_get_rx_capa(dev)) {
+ wdma_clr(dev, MTK_WDMA_PREF_TX_CFG,
+ MTK_WDMA_PREF_TX_CFG_PREF_EN);
+ wdma_clr(dev, MTK_WDMA_PREF_RX_CFG,
+ MTK_WDMA_PREF_RX_CFG_PREF_EN);
+ }
}
mtk_wed_set_512_support(dev, false);
@@ -606,7 +1080,7 @@ mtk_wed_stop(struct mtk_wed_device *dev)
wdma_w32(dev, MTK_WDMA_INT_GRP2, 0);
wed_w32(dev, MTK_WED_WPDMA_INT_MASK, 0);
- if (dev->hw->version == 1)
+ if (!mtk_wed_get_rx_capa(dev))
return;
wed_w32(dev, MTK_WED_EXT_INT_MASK1, 0);
@@ -625,13 +1099,21 @@ mtk_wed_deinit(struct mtk_wed_device *dev)
MTK_WED_CTRL_WED_TX_BM_EN |
MTK_WED_CTRL_WED_TX_FREE_AGENT_EN);
- if (dev->hw->version == 1)
+ if (mtk_wed_is_v1(dev->hw))
return;
wed_clr(dev, MTK_WED_CTRL,
MTK_WED_CTRL_RX_ROUTE_QM_EN |
MTK_WED_CTRL_WED_RX_BM_EN |
MTK_WED_CTRL_RX_RRO_QM_EN);
+
+ if (mtk_wed_is_v3_or_greater(dev->hw)) {
+ wed_clr(dev, MTK_WED_CTRL, MTK_WED_CTRL_TX_AMSDU_EN);
+ wed_clr(dev, MTK_WED_RESET, MTK_WED_RESET_TX_AMSDU);
+ wed_clr(dev, MTK_WED_PCIE_INT_CTRL,
+ MTK_WED_PCIE_INT_CTRL_MSK_EN_POLA |
+ MTK_WED_PCIE_INT_CTRL_MSK_IRQ_FILTER);
+ }
}
static void
@@ -643,6 +1125,7 @@ __mtk_wed_detach(struct mtk_wed_device *dev)
mtk_wdma_rx_reset(dev);
mtk_wed_reset(dev, MTK_WED_RESET_WED);
+ mtk_wed_amsdu_free_buffer(dev);
mtk_wed_free_tx_buffer(dev);
mtk_wed_free_tx_rings(dev);
@@ -681,21 +1164,37 @@ mtk_wed_detach(struct mtk_wed_device *dev)
mutex_unlock(&hw_lock);
}
-#define PCIE_BASE_ADDR0 0x11280000
static void
mtk_wed_bus_init(struct mtk_wed_device *dev)
{
switch (dev->wlan.bus_type) {
case MTK_WED_BUS_PCIE: {
struct device_node *np = dev->hw->eth->dev->of_node;
- struct regmap *regs;
- regs = syscon_regmap_lookup_by_phandle(np,
- "mediatek,wed-pcie");
- if (IS_ERR(regs))
- break;
+ if (mtk_wed_is_v2(dev->hw)) {
+ struct regmap *regs;
+
+ regs = syscon_regmap_lookup_by_phandle(np,
+ "mediatek,wed-pcie");
+ if (IS_ERR(regs))
+ break;
- regmap_update_bits(regs, 0, BIT(0), BIT(0));
+ regmap_update_bits(regs, 0, BIT(0), BIT(0));
+ }
+
+ if (dev->wlan.msi) {
+ wed_w32(dev, MTK_WED_PCIE_CFG_INTM,
+ dev->hw->pcie_base | 0xc08);
+ wed_w32(dev, MTK_WED_PCIE_CFG_BASE,
+ dev->hw->pcie_base | 0xc04);
+ wed_w32(dev, MTK_WED_PCIE_INT_TRIGGER, BIT(8));
+ } else {
+ wed_w32(dev, MTK_WED_PCIE_CFG_INTM,
+ dev->hw->pcie_base | 0x180);
+ wed_w32(dev, MTK_WED_PCIE_CFG_BASE,
+ dev->hw->pcie_base | 0x184);
+ wed_w32(dev, MTK_WED_PCIE_INT_TRIGGER, BIT(24));
+ }
wed_w32(dev, MTK_WED_PCIE_INT_CTRL,
FIELD_PREP(MTK_WED_PCIE_INT_CTRL_POLL_EN, 2));
@@ -703,19 +1202,9 @@ mtk_wed_bus_init(struct mtk_wed_device *dev)
/* pcie interrupt control: pola/source selection */
wed_set(dev, MTK_WED_PCIE_INT_CTRL,
MTK_WED_PCIE_INT_CTRL_MSK_EN_POLA |
- FIELD_PREP(MTK_WED_PCIE_INT_CTRL_SRC_SEL, 1));
- wed_r32(dev, MTK_WED_PCIE_INT_CTRL);
-
- wed_w32(dev, MTK_WED_PCIE_CFG_INTM, PCIE_BASE_ADDR0 | 0x180);
- wed_w32(dev, MTK_WED_PCIE_CFG_BASE, PCIE_BASE_ADDR0 | 0x184);
-
- /* pcie interrupt status trigger register */
- wed_w32(dev, MTK_WED_PCIE_INT_TRIGGER, BIT(24));
- wed_r32(dev, MTK_WED_PCIE_INT_TRIGGER);
-
- /* pola setting */
- wed_set(dev, MTK_WED_PCIE_INT_CTRL,
- MTK_WED_PCIE_INT_CTRL_MSK_EN_POLA);
+ MTK_WED_PCIE_INT_CTRL_MSK_IRQ_FILTER |
+ FIELD_PREP(MTK_WED_PCIE_INT_CTRL_SRC_SEL,
+ dev->hw->index));
break;
}
case MTK_WED_BUS_AXI:
@@ -731,38 +1220,55 @@ mtk_wed_bus_init(struct mtk_wed_device *dev)
static void
mtk_wed_set_wpdma(struct mtk_wed_device *dev)
{
- if (dev->hw->version == 1) {
- wed_w32(dev, MTK_WED_WPDMA_CFG_BASE, dev->wlan.wpdma_phys);
- } else {
- mtk_wed_bus_init(dev);
+ int i;
- wed_w32(dev, MTK_WED_WPDMA_CFG_BASE, dev->wlan.wpdma_int);
- wed_w32(dev, MTK_WED_WPDMA_CFG_INT_MASK, dev->wlan.wpdma_mask);
- wed_w32(dev, MTK_WED_WPDMA_CFG_TX, dev->wlan.wpdma_tx);
- wed_w32(dev, MTK_WED_WPDMA_CFG_TX_FREE, dev->wlan.wpdma_txfree);
- wed_w32(dev, MTK_WED_WPDMA_RX_GLO_CFG, dev->wlan.wpdma_rx_glo);
- wed_w32(dev, MTK_WED_WPDMA_RX_RING, dev->wlan.wpdma_rx);
+ if (mtk_wed_is_v1(dev->hw)) {
+ wed_w32(dev, MTK_WED_WPDMA_CFG_BASE, dev->wlan.wpdma_phys);
+ return;
}
+
+ mtk_wed_bus_init(dev);
+
+ wed_w32(dev, MTK_WED_WPDMA_CFG_BASE, dev->wlan.wpdma_int);
+ wed_w32(dev, MTK_WED_WPDMA_CFG_INT_MASK, dev->wlan.wpdma_mask);
+ wed_w32(dev, MTK_WED_WPDMA_CFG_TX, dev->wlan.wpdma_tx);
+ wed_w32(dev, MTK_WED_WPDMA_CFG_TX_FREE, dev->wlan.wpdma_txfree);
+
+ if (!mtk_wed_get_rx_capa(dev))
+ return;
+
+ wed_w32(dev, MTK_WED_WPDMA_RX_GLO_CFG, dev->wlan.wpdma_rx_glo);
+ wed_w32(dev, dev->hw->soc->regmap.wpdma_rx_ring0, dev->wlan.wpdma_rx);
+
+ if (!dev->wlan.hw_rro)
+ return;
+
+ wed_w32(dev, MTK_WED_RRO_RX_D_CFG(0), dev->wlan.wpdma_rx_rro[0]);
+ wed_w32(dev, MTK_WED_RRO_RX_D_CFG(1), dev->wlan.wpdma_rx_rro[1]);
+ for (i = 0; i < MTK_WED_RX_PAGE_QUEUES; i++)
+ wed_w32(dev, MTK_WED_RRO_MSDU_PG_RING_CFG(i),
+ dev->wlan.wpdma_rx_pg + i * 0x10);
}
static void
mtk_wed_hw_init_early(struct mtk_wed_device *dev)
{
- u32 mask, set;
+ u32 set = FIELD_PREP(MTK_WED_WDMA_GLO_CFG_BT_SIZE, 2);
+ u32 mask = MTK_WED_WDMA_GLO_CFG_BT_SIZE;
mtk_wed_deinit(dev);
mtk_wed_reset(dev, MTK_WED_RESET_WED);
mtk_wed_set_wpdma(dev);
- mask = MTK_WED_WDMA_GLO_CFG_BT_SIZE |
- MTK_WED_WDMA_GLO_CFG_DYNAMIC_DMAD_RECYCLE |
- MTK_WED_WDMA_GLO_CFG_RX_DIS_FSM_AUTO_IDLE;
- set = FIELD_PREP(MTK_WED_WDMA_GLO_CFG_BT_SIZE, 2) |
- MTK_WED_WDMA_GLO_CFG_DYNAMIC_SKIP_DMAD_PREP |
- MTK_WED_WDMA_GLO_CFG_IDLE_DMAD_SUPPLY;
+ if (!mtk_wed_is_v3_or_greater(dev->hw)) {
+ mask |= MTK_WED_WDMA_GLO_CFG_DYNAMIC_DMAD_RECYCLE |
+ MTK_WED_WDMA_GLO_CFG_RX_DIS_FSM_AUTO_IDLE;
+ set |= MTK_WED_WDMA_GLO_CFG_DYNAMIC_SKIP_DMAD_PREP |
+ MTK_WED_WDMA_GLO_CFG_IDLE_DMAD_SUPPLY;
+ }
wed_m32(dev, MTK_WED_WDMA_GLO_CFG, mask, set);
- if (dev->hw->version == 1) {
+ if (mtk_wed_is_v1(dev->hw)) {
u32 offset = dev->hw->index ? 0x04000400 : 0;
wdma_set(dev, MTK_WDMA_GLO_CFG,
@@ -907,11 +1413,18 @@ mtk_wed_route_qm_hw_init(struct mtk_wed_device *dev)
}
/* configure RX_ROUTE_QM */
- wed_clr(dev, MTK_WED_RTQM_GLO_CFG, MTK_WED_RTQM_Q_RST);
- wed_clr(dev, MTK_WED_RTQM_GLO_CFG, MTK_WED_RTQM_TXDMAD_FPORT);
- wed_set(dev, MTK_WED_RTQM_GLO_CFG,
- FIELD_PREP(MTK_WED_RTQM_TXDMAD_FPORT, 0x3 + dev->hw->index));
- wed_clr(dev, MTK_WED_RTQM_GLO_CFG, MTK_WED_RTQM_Q_RST);
+ if (mtk_wed_is_v2(dev->hw)) {
+ wed_clr(dev, MTK_WED_RTQM_GLO_CFG, MTK_WED_RTQM_Q_RST);
+ wed_clr(dev, MTK_WED_RTQM_GLO_CFG, MTK_WED_RTQM_TXDMAD_FPORT);
+ wed_set(dev, MTK_WED_RTQM_GLO_CFG,
+ FIELD_PREP(MTK_WED_RTQM_TXDMAD_FPORT,
+ 0x3 + dev->hw->index));
+ wed_clr(dev, MTK_WED_RTQM_GLO_CFG, MTK_WED_RTQM_Q_RST);
+ } else {
+ wed_set(dev, MTK_WED_RTQM_ENQ_CFG0,
+ FIELD_PREP(MTK_WED_RTQM_ENQ_CFG_TXDMAD_FPORT,
+ 0x3 + dev->hw->index));
+ }
/* enable RX_ROUTE_QM */
wed_set(dev, MTK_WED_CTRL, MTK_WED_CTRL_RX_ROUTE_QM_EN);
}
@@ -924,34 +1437,30 @@ mtk_wed_hw_init(struct mtk_wed_device *dev)
dev->init_done = true;
mtk_wed_set_ext_int(dev, false);
- wed_w32(dev, MTK_WED_TX_BM_CTRL,
- MTK_WED_TX_BM_CTRL_PAUSE |
- FIELD_PREP(MTK_WED_TX_BM_CTRL_VLD_GRP_NUM,
- dev->tx_buf_ring.size / 128) |
- FIELD_PREP(MTK_WED_TX_BM_CTRL_RSV_GRP_NUM,
- MTK_WED_TX_RING_SIZE / 256));
wed_w32(dev, MTK_WED_TX_BM_BASE, dev->tx_buf_ring.desc_phys);
-
wed_w32(dev, MTK_WED_TX_BM_BUF_LEN, MTK_WED_PKT_SIZE);
- if (dev->hw->version == 1) {
- wed_w32(dev, MTK_WED_TX_BM_TKID,
- FIELD_PREP(MTK_WED_TX_BM_TKID_START,
- dev->wlan.token_start) |
- FIELD_PREP(MTK_WED_TX_BM_TKID_END,
- dev->wlan.token_start +
- dev->wlan.nbuf - 1));
+ if (mtk_wed_is_v1(dev->hw)) {
+ wed_w32(dev, MTK_WED_TX_BM_CTRL,
+ MTK_WED_TX_BM_CTRL_PAUSE |
+ FIELD_PREP(MTK_WED_TX_BM_CTRL_VLD_GRP_NUM,
+ dev->tx_buf_ring.size / 128) |
+ FIELD_PREP(MTK_WED_TX_BM_CTRL_RSV_GRP_NUM,
+ MTK_WED_TX_RING_SIZE / 256));
wed_w32(dev, MTK_WED_TX_BM_DYN_THR,
FIELD_PREP(MTK_WED_TX_BM_DYN_THR_LO, 1) |
MTK_WED_TX_BM_DYN_THR_HI);
- } else {
- wed_w32(dev, MTK_WED_TX_BM_TKID_V2,
- FIELD_PREP(MTK_WED_TX_BM_TKID_START,
- dev->wlan.token_start) |
- FIELD_PREP(MTK_WED_TX_BM_TKID_END,
- dev->wlan.token_start +
- dev->wlan.nbuf - 1));
+ } else if (mtk_wed_is_v2(dev->hw)) {
+ wed_w32(dev, MTK_WED_TX_BM_CTRL,
+ MTK_WED_TX_BM_CTRL_PAUSE |
+ FIELD_PREP(MTK_WED_TX_BM_CTRL_VLD_GRP_NUM,
+ dev->tx_buf_ring.size / 128) |
+ FIELD_PREP(MTK_WED_TX_BM_CTRL_RSV_GRP_NUM,
+ MTK_WED_TX_RING_SIZE / 256));
+ wed_w32(dev, MTK_WED_TX_TKID_DYN_THR,
+ FIELD_PREP(MTK_WED_TX_TKID_DYN_THR_LO, 0) |
+ MTK_WED_TX_TKID_DYN_THR_HI);
wed_w32(dev, MTK_WED_TX_BM_DYN_THR,
FIELD_PREP(MTK_WED_TX_BM_DYN_THR_LO_V2, 0) |
MTK_WED_TX_BM_DYN_THR_HI_V2);
@@ -961,31 +1470,71 @@ mtk_wed_hw_init(struct mtk_wed_device *dev)
dev->tx_buf_ring.size / 128) |
FIELD_PREP(MTK_WED_TX_TKID_CTRL_RSV_GRP_NUM,
dev->tx_buf_ring.size / 128));
- wed_w32(dev, MTK_WED_TX_TKID_DYN_THR,
- FIELD_PREP(MTK_WED_TX_TKID_DYN_THR_LO, 0) |
- MTK_WED_TX_TKID_DYN_THR_HI);
}
+ wed_w32(dev, dev->hw->soc->regmap.tx_bm_tkid,
+ FIELD_PREP(MTK_WED_TX_BM_TKID_START, dev->wlan.token_start) |
+ FIELD_PREP(MTK_WED_TX_BM_TKID_END,
+ dev->wlan.token_start + dev->wlan.nbuf - 1));
+
mtk_wed_reset(dev, MTK_WED_RESET_TX_BM);
- if (dev->hw->version == 1) {
+ if (mtk_wed_is_v3_or_greater(dev->hw)) {
+ /* switch to new bm architecture */
+ wed_clr(dev, MTK_WED_TX_BM_CTRL,
+ MTK_WED_TX_BM_CTRL_LEGACY_EN);
+
+ wed_w32(dev, MTK_WED_TX_TKID_CTRL,
+ MTK_WED_TX_TKID_CTRL_PAUSE |
+ FIELD_PREP(MTK_WED_TX_TKID_CTRL_VLD_GRP_NUM_V3,
+ dev->wlan.nbuf / 128) |
+ FIELD_PREP(MTK_WED_TX_TKID_CTRL_RSV_GRP_NUM_V3,
+ dev->wlan.nbuf / 128));
+ /* return SKBID + SDP back to bm */
+ wed_set(dev, MTK_WED_TX_TKID_CTRL,
+ MTK_WED_TX_TKID_CTRL_FREE_FORMAT);
+
+ wed_w32(dev, MTK_WED_TX_BM_INIT_PTR,
+ MTK_WED_TX_BM_PKT_CNT |
+ MTK_WED_TX_BM_INIT_SW_TAIL_IDX);
+ }
+
+ if (mtk_wed_is_v1(dev->hw)) {
wed_set(dev, MTK_WED_CTRL,
MTK_WED_CTRL_WED_TX_BM_EN |
MTK_WED_CTRL_WED_TX_FREE_AGENT_EN);
- } else {
- wed_clr(dev, MTK_WED_TX_TKID_CTRL, MTK_WED_TX_TKID_CTRL_PAUSE);
+ } else if (mtk_wed_get_rx_capa(dev)) {
/* rx hw init */
wed_w32(dev, MTK_WED_WPDMA_RX_D_RST_IDX,
MTK_WED_WPDMA_RX_D_RST_CRX_IDX |
MTK_WED_WPDMA_RX_D_RST_DRV_IDX);
wed_w32(dev, MTK_WED_WPDMA_RX_D_RST_IDX, 0);
+ /* reset prefetch index of ring */
+ wed_set(dev, MTK_WED_WPDMA_RX_D_PREF_RX0_SIDX,
+ MTK_WED_WPDMA_RX_D_PREF_SIDX_IDX_CLR);
+ wed_clr(dev, MTK_WED_WPDMA_RX_D_PREF_RX0_SIDX,
+ MTK_WED_WPDMA_RX_D_PREF_SIDX_IDX_CLR);
+
+ wed_set(dev, MTK_WED_WPDMA_RX_D_PREF_RX1_SIDX,
+ MTK_WED_WPDMA_RX_D_PREF_SIDX_IDX_CLR);
+ wed_clr(dev, MTK_WED_WPDMA_RX_D_PREF_RX1_SIDX,
+ MTK_WED_WPDMA_RX_D_PREF_SIDX_IDX_CLR);
+
+ /* reset prefetch FIFO of ring */
+ wed_set(dev, MTK_WED_WPDMA_RX_D_PREF_FIFO_CFG,
+ MTK_WED_WPDMA_RX_D_PREF_FIFO_CFG_R0_CLR |
+ MTK_WED_WPDMA_RX_D_PREF_FIFO_CFG_R1_CLR);
+ wed_w32(dev, MTK_WED_WPDMA_RX_D_PREF_FIFO_CFG, 0);
+
mtk_wed_rx_buffer_hw_init(dev);
mtk_wed_rro_hw_init(dev);
mtk_wed_route_qm_hw_init(dev);
}
wed_clr(dev, MTK_WED_TX_BM_CTRL, MTK_WED_TX_BM_CTRL_PAUSE);
+ if (!mtk_wed_is_v1(dev->hw))
+ wed_clr(dev, MTK_WED_TX_TKID_CTRL, MTK_WED_TX_TKID_CTRL_PAUSE);
}
static void
@@ -1008,23 +1557,6 @@ mtk_wed_ring_reset(struct mtk_wed_ring *ring, int size, bool tx)
}
}
-static u32
-mtk_wed_check_busy(struct mtk_wed_device *dev, u32 reg, u32 mask)
-{
- return !!(wed_r32(dev, reg) & mask);
-}
-
-static int
-mtk_wed_poll_busy(struct mtk_wed_device *dev, u32 reg, u32 mask)
-{
- int sleep = 15000;
- int timeout = 100 * sleep;
- u32 val;
-
- return read_poll_timeout(mtk_wed_check_busy, val, !val, sleep,
- timeout, false, dev, reg, mask);
-}
-
static int
mtk_wed_rx_reset(struct mtk_wed_device *dev)
{
@@ -1038,13 +1570,33 @@ mtk_wed_rx_reset(struct mtk_wed_device *dev)
if (ret)
return ret;
+ if (dev->wlan.hw_rro) {
+ wed_clr(dev, MTK_WED_CTRL, MTK_WED_CTRL_WED_RX_IND_CMD_EN);
+ mtk_wed_poll_busy(dev, MTK_WED_RRO_RX_HW_STS,
+ MTK_WED_RX_IND_CMD_BUSY);
+ mtk_wed_reset(dev, MTK_WED_RESET_RRO_RX_TO_PG);
+ }
+
wed_clr(dev, MTK_WED_WPDMA_RX_D_GLO_CFG, MTK_WED_WPDMA_RX_D_RX_DRV_EN);
ret = mtk_wed_poll_busy(dev, MTK_WED_WPDMA_RX_D_GLO_CFG,
MTK_WED_WPDMA_RX_D_RX_DRV_BUSY);
+ if (!ret && mtk_wed_is_v3_or_greater(dev->hw))
+ ret = mtk_wed_poll_busy(dev, MTK_WED_WPDMA_RX_D_PREF_CFG,
+ MTK_WED_WPDMA_RX_D_PREF_BUSY);
if (ret) {
mtk_wed_reset(dev, MTK_WED_RESET_WPDMA_INT_AGENT);
mtk_wed_reset(dev, MTK_WED_RESET_WPDMA_RX_D_DRV);
} else {
+ if (mtk_wed_is_v3_or_greater(dev->hw)) {
+ /* 1.a. disable prefetch HW */
+ wed_clr(dev, MTK_WED_WPDMA_RX_D_PREF_CFG,
+ MTK_WED_WPDMA_RX_D_PREF_EN);
+ mtk_wed_poll_busy(dev, MTK_WED_WPDMA_RX_D_PREF_CFG,
+ MTK_WED_WPDMA_RX_D_PREF_BUSY);
+ wed_w32(dev, MTK_WED_WPDMA_RX_D_RST_IDX,
+ MTK_WED_WPDMA_RX_D_RST_DRV_IDX_ALL);
+ }
+
wed_w32(dev, MTK_WED_WPDMA_RX_D_RST_IDX,
MTK_WED_WPDMA_RX_D_RST_CRX_IDX |
MTK_WED_WPDMA_RX_D_RST_DRV_IDX);
@@ -1072,23 +1624,52 @@ mtk_wed_rx_reset(struct mtk_wed_device *dev)
wed_w32(dev, MTK_WED_RROQM_RST_IDX, 0);
}
+ if (dev->wlan.hw_rro) {
+ /* disable rro msdu page drv */
+ wed_clr(dev, MTK_WED_RRO_MSDU_PG_RING2_CFG,
+ MTK_WED_RRO_MSDU_PG_DRV_EN);
+
+ /* disable rro data drv */
+ wed_clr(dev, MTK_WED_RRO_RX_D_CFG(2), MTK_WED_RRO_RX_D_DRV_EN);
+
+ /* rro msdu page drv reset */
+ wed_w32(dev, MTK_WED_RRO_MSDU_PG_RING2_CFG,
+ MTK_WED_RRO_MSDU_PG_DRV_CLR);
+ mtk_wed_poll_busy(dev, MTK_WED_RRO_MSDU_PG_RING2_CFG,
+ MTK_WED_RRO_MSDU_PG_DRV_CLR);
+
+ /* rro data drv reset */
+ wed_w32(dev, MTK_WED_RRO_RX_D_CFG(2),
+ MTK_WED_RRO_RX_D_DRV_CLR);
+ mtk_wed_poll_busy(dev, MTK_WED_RRO_RX_D_CFG(2),
+ MTK_WED_RRO_RX_D_DRV_CLR);
+ }
+
/* reset route qm */
wed_clr(dev, MTK_WED_CTRL, MTK_WED_CTRL_RX_ROUTE_QM_EN);
ret = mtk_wed_poll_busy(dev, MTK_WED_CTRL,
MTK_WED_CTRL_RX_ROUTE_QM_BUSY);
- if (ret)
+ if (ret) {
mtk_wed_reset(dev, MTK_WED_RESET_RX_ROUTE_QM);
- else
- wed_set(dev, MTK_WED_RTQM_GLO_CFG,
- MTK_WED_RTQM_Q_RST);
+ } else if (mtk_wed_is_v3_or_greater(dev->hw)) {
+ wed_set(dev, MTK_WED_RTQM_RST, BIT(0));
+ wed_clr(dev, MTK_WED_RTQM_RST, BIT(0));
+ mtk_wed_reset(dev, MTK_WED_RESET_RX_ROUTE_QM);
+ } else {
+ wed_set(dev, MTK_WED_RTQM_GLO_CFG, MTK_WED_RTQM_Q_RST);
+ }
/* reset tx wdma */
mtk_wdma_tx_reset(dev);
/* reset tx wdma drv */
wed_clr(dev, MTK_WED_WDMA_GLO_CFG, MTK_WED_WDMA_GLO_CFG_TX_DRV_EN);
- mtk_wed_poll_busy(dev, MTK_WED_CTRL,
- MTK_WED_CTRL_WDMA_INT_AGENT_BUSY);
+ if (mtk_wed_is_v3_or_greater(dev->hw))
+ mtk_wed_poll_busy(dev, MTK_WED_WPDMA_STATUS,
+ MTK_WED_WPDMA_STATUS_TX_DRV);
+ else
+ mtk_wed_poll_busy(dev, MTK_WED_CTRL,
+ MTK_WED_CTRL_WDMA_INT_AGENT_BUSY);
mtk_wed_reset(dev, MTK_WED_RESET_WDMA_TX_DRV);
/* reset wed rx dma */
@@ -1098,13 +1679,8 @@ mtk_wed_rx_reset(struct mtk_wed_device *dev)
if (ret) {
mtk_wed_reset(dev, MTK_WED_RESET_WED_RX_DMA);
} else {
- struct mtk_eth *eth = dev->hw->eth;
-
- if (mtk_is_netsys_v2_or_greater(eth))
- wed_set(dev, MTK_WED_RESET_IDX,
- MTK_WED_RESET_IDX_RX_V2);
- else
- wed_set(dev, MTK_WED_RESET_IDX, MTK_WED_RESET_IDX_RX);
+ wed_set(dev, MTK_WED_RESET_IDX,
+ dev->hw->soc->regmap.reset_idx_rx_mask);
wed_w32(dev, MTK_WED_RESET_IDX, 0);
}
@@ -1114,6 +1690,14 @@ mtk_wed_rx_reset(struct mtk_wed_device *dev)
MTK_WED_CTRL_WED_RX_BM_BUSY);
mtk_wed_reset(dev, MTK_WED_RESET_RX_BM);
+ if (dev->wlan.hw_rro) {
+ wed_clr(dev, MTK_WED_CTRL, MTK_WED_CTRL_WED_RX_PG_BM_EN);
+ mtk_wed_poll_busy(dev, MTK_WED_CTRL,
+ MTK_WED_CTRL_WED_RX_PG_BM_BUSY);
+ wed_set(dev, MTK_WED_RESET, MTK_WED_RESET_RX_PG_BM);
+ wed_clr(dev, MTK_WED_RESET, MTK_WED_RESET_RX_PG_BM);
+ }
+
/* wo change to enable state */
val = MTK_WED_WO_STATE_ENABLE;
ret = mtk_wed_mcu_send_msg(wo, MTK_WED_MODULE_ID_WO,
@@ -1131,6 +1715,7 @@ mtk_wed_rx_reset(struct mtk_wed_device *dev)
false);
}
mtk_wed_free_rx_buffer(dev);
+ mtk_wed_hwrro_free_buffer(dev);
return 0;
}
@@ -1157,21 +1742,48 @@ mtk_wed_reset_dma(struct mtk_wed_device *dev)
if (busy) {
mtk_wed_reset(dev, MTK_WED_RESET_WED_TX_DMA);
} else {
- wed_w32(dev, MTK_WED_RESET_IDX, MTK_WED_RESET_IDX_TX);
+ wed_w32(dev, MTK_WED_RESET_IDX,
+ dev->hw->soc->regmap.reset_idx_tx_mask);
wed_w32(dev, MTK_WED_RESET_IDX, 0);
}
/* 2. reset WDMA rx DMA */
busy = !!mtk_wdma_rx_reset(dev);
- wed_clr(dev, MTK_WED_WDMA_GLO_CFG, MTK_WED_WDMA_GLO_CFG_RX_DRV_EN);
+ if (mtk_wed_is_v3_or_greater(dev->hw)) {
+ val = MTK_WED_WDMA_GLO_CFG_RX_DIS_FSM_AUTO_IDLE |
+ wed_r32(dev, MTK_WED_WDMA_GLO_CFG);
+ val &= ~MTK_WED_WDMA_GLO_CFG_RX_DRV_EN;
+ wed_w32(dev, MTK_WED_WDMA_GLO_CFG, val);
+ } else {
+ wed_clr(dev, MTK_WED_WDMA_GLO_CFG,
+ MTK_WED_WDMA_GLO_CFG_RX_DRV_EN);
+ }
+
if (!busy)
busy = mtk_wed_poll_busy(dev, MTK_WED_WDMA_GLO_CFG,
MTK_WED_WDMA_GLO_CFG_RX_DRV_BUSY);
+ if (!busy && mtk_wed_is_v3_or_greater(dev->hw))
+ busy = mtk_wed_poll_busy(dev, MTK_WED_WDMA_RX_PREF_CFG,
+ MTK_WED_WDMA_RX_PREF_BUSY);
if (busy) {
mtk_wed_reset(dev, MTK_WED_RESET_WDMA_INT_AGENT);
mtk_wed_reset(dev, MTK_WED_RESET_WDMA_RX_DRV);
} else {
+ if (mtk_wed_is_v3_or_greater(dev->hw)) {
+ /* 1.a. disable prefetch HW */
+ wed_clr(dev, MTK_WED_WDMA_RX_PREF_CFG,
+ MTK_WED_WDMA_RX_PREF_EN);
+ mtk_wed_poll_busy(dev, MTK_WED_WDMA_RX_PREF_CFG,
+ MTK_WED_WDMA_RX_PREF_BUSY);
+ wed_clr(dev, MTK_WED_WDMA_RX_PREF_CFG,
+ MTK_WED_WDMA_RX_PREF_DDONE2_EN);
+
+ /* 2. Reset dma index */
+ wed_w32(dev, MTK_WED_WDMA_RESET_IDX,
+ MTK_WED_WDMA_RESET_IDX_RX_ALL);
+ }
+
wed_w32(dev, MTK_WED_WDMA_RESET_IDX,
MTK_WED_WDMA_RESET_IDX_RX | MTK_WED_WDMA_RESET_IDX_DRV);
wed_w32(dev, MTK_WED_WDMA_RESET_IDX, 0);
@@ -1187,8 +1799,13 @@ mtk_wed_reset_dma(struct mtk_wed_device *dev)
wed_clr(dev, MTK_WED_CTRL, MTK_WED_CTRL_WED_TX_FREE_AGENT_EN);
for (i = 0; i < 100; i++) {
- val = wed_r32(dev, MTK_WED_TX_BM_INTF);
- if (FIELD_GET(MTK_WED_TX_BM_INTF_TKFIFO_FDEP, val) == 0x40)
+ if (mtk_wed_is_v1(dev->hw))
+ val = FIELD_GET(MTK_WED_TX_BM_INTF_TKFIFO_FDEP,
+ wed_r32(dev, MTK_WED_TX_BM_INTF));
+ else
+ val = FIELD_GET(MTK_WED_TX_TKID_INTF_TKFIFO_FDEP,
+ wed_r32(dev, MTK_WED_TX_TKID_INTF));
+ if (val == 0x40)
break;
}
@@ -1210,6 +1827,8 @@ mtk_wed_reset_dma(struct mtk_wed_device *dev)
mtk_wed_reset(dev, MTK_WED_RESET_WPDMA_INT_AGENT);
mtk_wed_reset(dev, MTK_WED_RESET_WPDMA_TX_DRV);
mtk_wed_reset(dev, MTK_WED_RESET_WPDMA_RX_DRV);
+ if (mtk_wed_is_v3_or_greater(dev->hw))
+ wed_w32(dev, MTK_WED_RX1_CTRL2, 0);
} else {
wed_w32(dev, MTK_WED_WPDMA_RESET_IDX,
MTK_WED_WPDMA_RESET_IDX_TX |
@@ -1218,7 +1837,7 @@ mtk_wed_reset_dma(struct mtk_wed_device *dev)
}
dev->init_done = false;
- if (dev->hw->version == 1)
+ if (mtk_wed_is_v1(dev->hw))
return;
if (!busy) {
@@ -1226,7 +1845,14 @@ mtk_wed_reset_dma(struct mtk_wed_device *dev)
wed_w32(dev, MTK_WED_RESET_IDX, 0);
}
- mtk_wed_rx_reset(dev);
+ if (mtk_wed_is_v3_or_greater(dev->hw)) {
+ /* reset amsdu engine */
+ wed_clr(dev, MTK_WED_CTRL, MTK_WED_CTRL_TX_AMSDU_EN);
+ mtk_wed_reset(dev, MTK_WED_RESET_TX_AMSDU);
+ }
+
+ if (mtk_wed_get_rx_capa(dev))
+ mtk_wed_rx_reset(dev);
}
static int
@@ -1249,7 +1875,6 @@ static int
mtk_wed_wdma_rx_ring_setup(struct mtk_wed_device *dev, int idx, int size,
bool reset)
{
- u32 desc_size = sizeof(struct mtk_wdma_desc) * dev->hw->version;
struct mtk_wed_ring *wdma;
if (idx >= ARRAY_SIZE(dev->rx_wdma))
@@ -1257,7 +1882,7 @@ mtk_wed_wdma_rx_ring_setup(struct mtk_wed_device *dev, int idx, int size,
wdma = &dev->rx_wdma[idx];
if (!reset && mtk_wed_ring_alloc(dev, wdma, MTK_WED_WDMA_RING_SIZE,
- desc_size, true))
+ dev->hw->soc->wdma_desc_size, true))
return -ENOMEM;
wdma_w32(dev, MTK_WDMA_RING_RX(idx) + MTK_WED_RING_OFS_BASE,
@@ -1278,7 +1903,6 @@ static int
mtk_wed_wdma_tx_ring_setup(struct mtk_wed_device *dev, int idx, int size,
bool reset)
{
- u32 desc_size = sizeof(struct mtk_wdma_desc) * dev->hw->version;
struct mtk_wed_ring *wdma;
if (idx >= ARRAY_SIZE(dev->tx_wdma))
@@ -1286,9 +1910,27 @@ mtk_wed_wdma_tx_ring_setup(struct mtk_wed_device *dev, int idx, int size,
wdma = &dev->tx_wdma[idx];
if (!reset && mtk_wed_ring_alloc(dev, wdma, MTK_WED_WDMA_RING_SIZE,
- desc_size, true))
+ dev->hw->soc->wdma_desc_size, true))
return -ENOMEM;
+ if (mtk_wed_is_v3_or_greater(dev->hw)) {
+ struct mtk_wdma_desc *desc = wdma->desc;
+ int i;
+
+ for (i = 0; i < MTK_WED_WDMA_RING_SIZE; i++) {
+ desc->buf0 = 0;
+ desc->ctrl = cpu_to_le32(MTK_WDMA_DESC_CTRL_DMA_DONE);
+ desc->buf1 = 0;
+ desc->info = cpu_to_le32(MTK_WDMA_TXD0_DESC_INFO_DMA_DONE);
+ desc++;
+ desc->buf0 = 0;
+ desc->ctrl = cpu_to_le32(MTK_WDMA_DESC_CTRL_DMA_DONE);
+ desc->buf1 = 0;
+ desc->info = cpu_to_le32(MTK_WDMA_TXD1_DESC_INFO_DMA_DONE);
+ desc++;
+ }
+ }
+
wdma_w32(dev, MTK_WDMA_RING_TX(idx) + MTK_WED_RING_OFS_BASE,
wdma->desc_phys);
wdma_w32(dev, MTK_WDMA_RING_TX(idx) + MTK_WED_RING_OFS_COUNT,
@@ -1344,7 +1986,7 @@ mtk_wed_configure_irq(struct mtk_wed_device *dev, u32 irq_mask)
MTK_WED_CTRL_WED_TX_BM_EN |
MTK_WED_CTRL_WED_TX_FREE_AGENT_EN);
- if (dev->hw->version == 1) {
+ if (mtk_wed_is_v1(dev->hw)) {
wed_w32(dev, MTK_WED_PCIE_INT_TRIGGER,
MTK_WED_PCIE_INT_TRIGGER_STATUS);
@@ -1354,8 +1996,9 @@ mtk_wed_configure_irq(struct mtk_wed_device *dev, u32 irq_mask)
wed_clr(dev, MTK_WED_WDMA_INT_CTRL, wdma_mask);
} else {
- wdma_mask |= FIELD_PREP(MTK_WDMA_INT_MASK_TX_DONE,
- GENMASK(1, 0));
+ if (mtk_wed_is_v3_or_greater(dev->hw))
+ wed_set(dev, MTK_WED_CTRL, MTK_WED_CTRL_TX_TKID_ALI_EN);
+
/* initail tx interrupt trigger */
wed_w32(dev, MTK_WED_WPDMA_INT_CTRL_TX,
MTK_WED_WPDMA_INT_CTRL_TX0_DONE_EN |
@@ -1374,15 +2017,20 @@ mtk_wed_configure_irq(struct mtk_wed_device *dev, u32 irq_mask)
FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_TX_FREE_DONE_TRIG,
dev->wlan.txfree_tbit));
- wed_w32(dev, MTK_WED_WPDMA_INT_CTRL_RX,
- MTK_WED_WPDMA_INT_CTRL_RX0_EN |
- MTK_WED_WPDMA_INT_CTRL_RX0_CLR |
- MTK_WED_WPDMA_INT_CTRL_RX1_EN |
- MTK_WED_WPDMA_INT_CTRL_RX1_CLR |
- FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_RX0_DONE_TRIG,
- dev->wlan.rx_tbit[0]) |
- FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_RX1_DONE_TRIG,
- dev->wlan.rx_tbit[1]));
+ if (mtk_wed_get_rx_capa(dev)) {
+ wed_w32(dev, MTK_WED_WPDMA_INT_CTRL_RX,
+ MTK_WED_WPDMA_INT_CTRL_RX0_EN |
+ MTK_WED_WPDMA_INT_CTRL_RX0_CLR |
+ MTK_WED_WPDMA_INT_CTRL_RX1_EN |
+ MTK_WED_WPDMA_INT_CTRL_RX1_CLR |
+ FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_RX0_DONE_TRIG,
+ dev->wlan.rx_tbit[0]) |
+ FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_RX1_DONE_TRIG,
+ dev->wlan.rx_tbit[1]));
+
+ wdma_mask |= FIELD_PREP(MTK_WDMA_INT_MASK_TX_DONE,
+ GENMASK(1, 0));
+ }
wed_w32(dev, MTK_WED_WDMA_INT_CLR, wdma_mask);
wed_set(dev, MTK_WED_WDMA_INT_CTRL,
@@ -1398,55 +2046,280 @@ mtk_wed_configure_irq(struct mtk_wed_device *dev, u32 irq_mask)
wed_w32(dev, MTK_WED_INT_MASK, irq_mask);
}
+#define MTK_WFMDA_RX_DMA_EN BIT(2)
static void
mtk_wed_dma_enable(struct mtk_wed_device *dev)
{
- wed_set(dev, MTK_WED_WPDMA_INT_CTRL, MTK_WED_WPDMA_INT_CTRL_SUBRT_ADV);
+ int i;
+
+ if (!mtk_wed_is_v3_or_greater(dev->hw)) {
+ wed_set(dev, MTK_WED_WPDMA_INT_CTRL,
+ MTK_WED_WPDMA_INT_CTRL_SUBRT_ADV);
+ wed_set(dev, MTK_WED_WPDMA_GLO_CFG,
+ MTK_WED_WPDMA_GLO_CFG_TX_DRV_EN |
+ MTK_WED_WPDMA_GLO_CFG_RX_DRV_EN);
+ wdma_set(dev, MTK_WDMA_GLO_CFG,
+ MTK_WDMA_GLO_CFG_TX_DMA_EN |
+ MTK_WDMA_GLO_CFG_RX_INFO1_PRERES |
+ MTK_WDMA_GLO_CFG_RX_INFO2_PRERES);
+ wed_set(dev, MTK_WED_WPDMA_CTRL, MTK_WED_WPDMA_CTRL_SDL1_FIXED);
+ } else {
+ wed_set(dev, MTK_WED_WPDMA_GLO_CFG,
+ MTK_WED_WPDMA_GLO_CFG_TX_DRV_EN |
+ MTK_WED_WPDMA_GLO_CFG_RX_DRV_EN |
+ MTK_WED_WPDMA_GLO_CFG_RX_DDONE2_WR);
+ wdma_set(dev, MTK_WDMA_GLO_CFG, MTK_WDMA_GLO_CFG_TX_DMA_EN);
+ }
wed_set(dev, MTK_WED_GLO_CFG,
MTK_WED_GLO_CFG_TX_DMA_EN |
MTK_WED_GLO_CFG_RX_DMA_EN);
- wed_set(dev, MTK_WED_WPDMA_GLO_CFG,
- MTK_WED_WPDMA_GLO_CFG_TX_DRV_EN |
- MTK_WED_WPDMA_GLO_CFG_RX_DRV_EN);
+
wed_set(dev, MTK_WED_WDMA_GLO_CFG,
MTK_WED_WDMA_GLO_CFG_RX_DRV_EN);
- wdma_set(dev, MTK_WDMA_GLO_CFG,
- MTK_WDMA_GLO_CFG_TX_DMA_EN |
- MTK_WDMA_GLO_CFG_RX_INFO1_PRERES |
- MTK_WDMA_GLO_CFG_RX_INFO2_PRERES);
-
- if (dev->hw->version == 1) {
+ if (mtk_wed_is_v1(dev->hw)) {
wdma_set(dev, MTK_WDMA_GLO_CFG,
MTK_WDMA_GLO_CFG_RX_INFO3_PRERES);
- } else {
- int i;
+ return;
+ }
- wed_set(dev, MTK_WED_WPDMA_CTRL,
- MTK_WED_WPDMA_CTRL_SDL1_FIXED);
+ wed_set(dev, MTK_WED_WPDMA_GLO_CFG,
+ MTK_WED_WPDMA_GLO_CFG_RX_DRV_R0_PKT_PROC |
+ MTK_WED_WPDMA_GLO_CFG_RX_DRV_R0_CRX_SYNC);
- wed_set(dev, MTK_WED_WDMA_GLO_CFG,
- MTK_WED_WDMA_GLO_CFG_TX_DRV_EN |
- MTK_WED_WDMA_GLO_CFG_TX_DDONE_CHK);
+ if (mtk_wed_is_v3_or_greater(dev->hw)) {
+ wed_set(dev, MTK_WED_WDMA_RX_PREF_CFG,
+ FIELD_PREP(MTK_WED_WDMA_RX_PREF_BURST_SIZE, 0x10) |
+ FIELD_PREP(MTK_WED_WDMA_RX_PREF_LOW_THRES, 0x8));
+ wed_clr(dev, MTK_WED_WDMA_RX_PREF_CFG,
+ MTK_WED_WDMA_RX_PREF_DDONE2_EN);
+ wed_set(dev, MTK_WED_WDMA_RX_PREF_CFG, MTK_WED_WDMA_RX_PREF_EN);
+ wed_clr(dev, MTK_WED_WPDMA_GLO_CFG,
+ MTK_WED_WPDMA_GLO_CFG_TX_DDONE_CHK_LAST);
wed_set(dev, MTK_WED_WPDMA_GLO_CFG,
- MTK_WED_WPDMA_GLO_CFG_RX_DRV_R0_PKT_PROC |
- MTK_WED_WPDMA_GLO_CFG_RX_DRV_R0_CRX_SYNC);
+ MTK_WED_WPDMA_GLO_CFG_TX_DDONE_CHK |
+ MTK_WED_WPDMA_GLO_CFG_RX_DRV_EVENT_PKT_FMT_CHK |
+ MTK_WED_WPDMA_GLO_CFG_RX_DRV_UNS_VER_FORCE_4);
- wed_clr(dev, MTK_WED_WPDMA_GLO_CFG,
- MTK_WED_WPDMA_GLO_CFG_TX_TKID_KEEP |
- MTK_WED_WPDMA_GLO_CFG_TX_DMAD_DW3_PREV);
+ wdma_set(dev, MTK_WDMA_PREF_RX_CFG, MTK_WDMA_PREF_RX_CFG_PREF_EN);
+ wdma_set(dev, MTK_WDMA_WRBK_RX_CFG, MTK_WDMA_WRBK_RX_CFG_WRBK_EN);
+ }
- wed_set(dev, MTK_WED_WPDMA_RX_D_GLO_CFG,
- MTK_WED_WPDMA_RX_D_RX_DRV_EN |
- FIELD_PREP(MTK_WED_WPDMA_RX_D_RXD_READ_LEN, 0x18) |
- FIELD_PREP(MTK_WED_WPDMA_RX_D_INIT_PHASE_RXEN_SEL,
- 0x2));
+ wed_clr(dev, MTK_WED_WPDMA_GLO_CFG,
+ MTK_WED_WPDMA_GLO_CFG_TX_TKID_KEEP |
+ MTK_WED_WPDMA_GLO_CFG_TX_DMAD_DW3_PREV);
+
+ if (!mtk_wed_get_rx_capa(dev))
+ return;
+
+ wed_set(dev, MTK_WED_WDMA_GLO_CFG,
+ MTK_WED_WDMA_GLO_CFG_TX_DRV_EN |
+ MTK_WED_WDMA_GLO_CFG_TX_DDONE_CHK);
+
+ wed_clr(dev, MTK_WED_WPDMA_RX_D_GLO_CFG, MTK_WED_WPDMA_RX_D_RXD_READ_LEN);
+ wed_set(dev, MTK_WED_WPDMA_RX_D_GLO_CFG,
+ MTK_WED_WPDMA_RX_D_RX_DRV_EN |
+ FIELD_PREP(MTK_WED_WPDMA_RX_D_RXD_READ_LEN, 0x18) |
+ FIELD_PREP(MTK_WED_WPDMA_RX_D_INIT_PHASE_RXEN_SEL, 0x2));
+
+ if (mtk_wed_is_v3_or_greater(dev->hw)) {
+ wed_set(dev, MTK_WED_WPDMA_RX_D_PREF_CFG,
+ MTK_WED_WPDMA_RX_D_PREF_EN |
+ FIELD_PREP(MTK_WED_WPDMA_RX_D_PREF_BURST_SIZE, 0x10) |
+ FIELD_PREP(MTK_WED_WPDMA_RX_D_PREF_LOW_THRES, 0x8));
+
+ wed_set(dev, MTK_WED_RRO_RX_D_CFG(2), MTK_WED_RRO_RX_D_DRV_EN);
+ wdma_set(dev, MTK_WDMA_PREF_TX_CFG, MTK_WDMA_PREF_TX_CFG_PREF_EN);
+ wdma_set(dev, MTK_WDMA_WRBK_TX_CFG, MTK_WDMA_WRBK_TX_CFG_WRBK_EN);
+ }
+
+ for (i = 0; i < MTK_WED_RX_QUEUES; i++) {
+ struct mtk_wed_ring *ring = &dev->rx_ring[i];
+ u32 val;
+
+ if (!(ring->flags & MTK_WED_RING_CONFIGURED))
+ continue; /* queue is not configured by mt76 */
+
+ if (mtk_wed_check_wfdma_rx_fill(dev, ring)) {
+ dev_err(dev->hw->dev,
+ "rx_ring(%d) dma enable failed\n", i);
+ continue;
+ }
+
+ val = wifi_r32(dev,
+ dev->wlan.wpdma_rx_glo -
+ dev->wlan.phy_base) | MTK_WFMDA_RX_DMA_EN;
+ wifi_w32(dev,
+ dev->wlan.wpdma_rx_glo - dev->wlan.phy_base,
+ val);
+ }
+}
+
+static void
+mtk_wed_start_hw_rro(struct mtk_wed_device *dev, u32 irq_mask, bool reset)
+{
+ int i;
+
+ wed_w32(dev, MTK_WED_WPDMA_INT_MASK, irq_mask);
+ wed_w32(dev, MTK_WED_INT_MASK, irq_mask);
+
+ if (!mtk_wed_get_rx_capa(dev) || !dev->wlan.hw_rro)
+ return;
+
+ if (reset) {
+ wed_set(dev, MTK_WED_RRO_MSDU_PG_RING2_CFG,
+ MTK_WED_RRO_MSDU_PG_DRV_EN);
+ return;
+ }
+
+ wed_set(dev, MTK_WED_RRO_RX_D_CFG(2), MTK_WED_RRO_MSDU_PG_DRV_CLR);
+ wed_w32(dev, MTK_WED_RRO_MSDU_PG_RING2_CFG,
+ MTK_WED_RRO_MSDU_PG_DRV_CLR);
+
+ wed_w32(dev, MTK_WED_WPDMA_INT_CTRL_RRO_RX,
+ MTK_WED_WPDMA_INT_CTRL_RRO_RX0_EN |
+ MTK_WED_WPDMA_INT_CTRL_RRO_RX0_CLR |
+ MTK_WED_WPDMA_INT_CTRL_RRO_RX1_EN |
+ MTK_WED_WPDMA_INT_CTRL_RRO_RX1_CLR |
+ FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_RRO_RX0_DONE_TRIG,
+ dev->wlan.rro_rx_tbit[0]) |
+ FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_RRO_RX1_DONE_TRIG,
+ dev->wlan.rro_rx_tbit[1]));
+
+ wed_w32(dev, MTK_WED_WPDMA_INT_CTRL_RRO_MSDU_PG,
+ MTK_WED_WPDMA_INT_CTRL_RRO_PG0_EN |
+ MTK_WED_WPDMA_INT_CTRL_RRO_PG0_CLR |
+ MTK_WED_WPDMA_INT_CTRL_RRO_PG1_EN |
+ MTK_WED_WPDMA_INT_CTRL_RRO_PG1_CLR |
+ MTK_WED_WPDMA_INT_CTRL_RRO_PG2_EN |
+ MTK_WED_WPDMA_INT_CTRL_RRO_PG2_CLR |
+ FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_RRO_PG0_DONE_TRIG,
+ dev->wlan.rx_pg_tbit[0]) |
+ FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_RRO_PG1_DONE_TRIG,
+ dev->wlan.rx_pg_tbit[1]) |
+ FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_RRO_PG2_DONE_TRIG,
+ dev->wlan.rx_pg_tbit[2]));
+
+ /* RRO_MSDU_PG_RING2_CFG1_FLD_DRV_EN should be enabled after
+ * WM FWDL completed, otherwise RRO_MSDU_PG ring may broken
+ */
+ wed_set(dev, MTK_WED_RRO_MSDU_PG_RING2_CFG,
+ MTK_WED_RRO_MSDU_PG_DRV_EN);
+
+ for (i = 0; i < MTK_WED_RX_QUEUES; i++) {
+ struct mtk_wed_ring *ring = &dev->rx_rro_ring[i];
+
+ if (!(ring->flags & MTK_WED_RING_CONFIGURED))
+ continue;
+
+ if (mtk_wed_check_wfdma_rx_fill(dev, ring))
+ dev_err(dev->hw->dev,
+ "rx_rro_ring(%d) initialization failed\n", i);
+ }
+
+ for (i = 0; i < MTK_WED_RX_PAGE_QUEUES; i++) {
+ struct mtk_wed_ring *ring = &dev->rx_page_ring[i];
+
+ if (!(ring->flags & MTK_WED_RING_CONFIGURED))
+ continue;
+
+ if (mtk_wed_check_wfdma_rx_fill(dev, ring))
+ dev_err(dev->hw->dev,
+ "rx_page_ring(%d) initialization failed\n", i);
+ }
+}
+
+static void
+mtk_wed_rro_rx_ring_setup(struct mtk_wed_device *dev, int idx,
+ void __iomem *regs)
+{
+ struct mtk_wed_ring *ring = &dev->rx_rro_ring[idx];
+
+ ring->wpdma = regs;
+ wed_w32(dev, MTK_WED_RRO_RX_D_RX(idx) + MTK_WED_RING_OFS_BASE,
+ readl(regs));
+ wed_w32(dev, MTK_WED_RRO_RX_D_RX(idx) + MTK_WED_RING_OFS_COUNT,
+ readl(regs + MTK_WED_RING_OFS_COUNT));
+ ring->flags |= MTK_WED_RING_CONFIGURED;
+}
+
+static void
+mtk_wed_msdu_pg_rx_ring_setup(struct mtk_wed_device *dev, int idx, void __iomem *regs)
+{
+ struct mtk_wed_ring *ring = &dev->rx_page_ring[idx];
+
+ ring->wpdma = regs;
+ wed_w32(dev, MTK_WED_RRO_MSDU_PG_CTRL0(idx) + MTK_WED_RING_OFS_BASE,
+ readl(regs));
+ wed_w32(dev, MTK_WED_RRO_MSDU_PG_CTRL0(idx) + MTK_WED_RING_OFS_COUNT,
+ readl(regs + MTK_WED_RING_OFS_COUNT));
+ ring->flags |= MTK_WED_RING_CONFIGURED;
+}
+
+static int
+mtk_wed_ind_rx_ring_setup(struct mtk_wed_device *dev, void __iomem *regs)
+{
+ struct mtk_wed_ring *ring = &dev->ind_cmd_ring;
+ u32 val = readl(regs + MTK_WED_RING_OFS_COUNT);
+ int i, count = 0;
- for (i = 0; i < MTK_WED_RX_QUEUES; i++)
- mtk_wed_check_wfdma_rx_fill(dev, i);
+ ring->wpdma = regs;
+ wed_w32(dev, MTK_WED_IND_CMD_RX_CTRL1 + MTK_WED_RING_OFS_BASE,
+ readl(regs) & 0xfffffff0);
+
+ wed_w32(dev, MTK_WED_IND_CMD_RX_CTRL1 + MTK_WED_RING_OFS_COUNT,
+ readl(regs + MTK_WED_RING_OFS_COUNT));
+
+ /* ack sn cr */
+ wed_w32(dev, MTK_WED_RRO_CFG0, dev->wlan.phy_base +
+ dev->wlan.ind_cmd.ack_sn_addr);
+ wed_w32(dev, MTK_WED_RRO_CFG1,
+ FIELD_PREP(MTK_WED_RRO_CFG1_MAX_WIN_SZ,
+ dev->wlan.ind_cmd.win_size) |
+ FIELD_PREP(MTK_WED_RRO_CFG1_PARTICL_SE_ID,
+ dev->wlan.ind_cmd.particular_sid));
+
+ /* particular session addr element */
+ wed_w32(dev, MTK_WED_ADDR_ELEM_CFG0,
+ dev->wlan.ind_cmd.particular_se_phys);
+
+ for (i = 0; i < dev->wlan.ind_cmd.se_group_nums; i++) {
+ wed_w32(dev, MTK_WED_RADDR_ELEM_TBL_WDATA,
+ dev->wlan.ind_cmd.addr_elem_phys[i] >> 4);
+ wed_w32(dev, MTK_WED_ADDR_ELEM_TBL_CFG,
+ MTK_WED_ADDR_ELEM_TBL_WR | (i & 0x7f));
+
+ val = wed_r32(dev, MTK_WED_ADDR_ELEM_TBL_CFG);
+ while (!(val & MTK_WED_ADDR_ELEM_TBL_WR_RDY) && count++ < 100)
+ val = wed_r32(dev, MTK_WED_ADDR_ELEM_TBL_CFG);
+ if (count >= 100)
+ dev_err(dev->hw->dev,
+ "write ba session base failed\n");
+ }
+
+ /* pn check init */
+ for (i = 0; i < dev->wlan.ind_cmd.particular_sid; i++) {
+ wed_w32(dev, MTK_WED_PN_CHECK_WDATA_M,
+ MTK_WED_PN_CHECK_IS_FIRST);
+
+ wed_w32(dev, MTK_WED_PN_CHECK_CFG, MTK_WED_PN_CHECK_WR |
+ FIELD_PREP(MTK_WED_PN_CHECK_SE_ID, i));
+
+ count = 0;
+ val = wed_r32(dev, MTK_WED_PN_CHECK_CFG);
+ while (!(val & MTK_WED_PN_CHECK_WR_RDY) && count++ < 100)
+ val = wed_r32(dev, MTK_WED_PN_CHECK_CFG);
+ if (count >= 100)
+ dev_err(dev->hw->dev,
+ "session(%d) initialization failed\n", i);
}
+
+ wed_w32(dev, MTK_WED_RX_IND_CMD_CNT0, MTK_WED_RX_IND_CMD_DBG_CNT_EN);
+ wed_set(dev, MTK_WED_CTRL, MTK_WED_CTRL_WED_RX_IND_CMD_EN);
+
+ return 0;
}
static void
@@ -1466,14 +2339,14 @@ mtk_wed_start(struct mtk_wed_device *dev, u32 irq_mask)
mtk_wed_set_ext_int(dev, true);
- if (dev->hw->version == 1) {
+ if (mtk_wed_is_v1(dev->hw)) {
u32 val = dev->wlan.wpdma_phys | MTK_PCIE_MIRROR_MAP_EN |
FIELD_PREP(MTK_PCIE_MIRROR_MAP_WED_ID,
dev->hw->index);
val |= BIT(0) | (BIT(1) * !!dev->hw->index);
regmap_write(dev->hw->mirror, dev->hw->index * 4, val);
- } else {
+ } else if (mtk_wed_get_rx_capa(dev)) {
/* driver set mid ready and only once */
wed_w32(dev, MTK_WED_EXT_INT_MASK1,
MTK_WED_EXT_INT_STATUS_WPDMA_MID_RDY);
@@ -1483,12 +2356,18 @@ mtk_wed_start(struct mtk_wed_device *dev, u32 irq_mask)
wed_r32(dev, MTK_WED_EXT_INT_MASK1);
wed_r32(dev, MTK_WED_EXT_INT_MASK2);
+ if (mtk_wed_is_v3_or_greater(dev->hw)) {
+ wed_w32(dev, MTK_WED_EXT_INT_MASK3,
+ MTK_WED_EXT_INT_STATUS_WPDMA_MID_RDY);
+ wed_r32(dev, MTK_WED_EXT_INT_MASK3);
+ }
+
if (mtk_wed_rro_cfg(dev))
return;
-
}
mtk_wed_set_512_support(dev, dev->wlan.wcid_512);
+ mtk_wed_amsdu_init(dev);
mtk_wed_dma_enable(dev);
dev->running = true;
@@ -1535,6 +2414,7 @@ mtk_wed_attach(struct mtk_wed_device *dev)
dev->irq = hw->irq;
dev->wdma_idx = hw->index;
dev->version = hw->version;
+ dev->hw->pcie_base = mtk_wed_get_pcie_base(dev);
if (hw->eth->dma_dev == hw->eth->dev &&
of_dma_is_coherent(hw->eth->dev->of_node))
@@ -1544,6 +2424,10 @@ mtk_wed_attach(struct mtk_wed_device *dev)
if (ret)
goto out;
+ ret = mtk_wed_amsdu_buffer_alloc(dev);
+ if (ret)
+ goto out;
+
if (mtk_wed_get_rx_capa(dev)) {
ret = mtk_wed_rro_alloc(dev);
if (ret)
@@ -1551,13 +2435,14 @@ mtk_wed_attach(struct mtk_wed_device *dev)
}
mtk_wed_hw_init_early(dev);
- if (hw->version == 1) {
+ if (mtk_wed_is_v1(hw))
regmap_update_bits(hw->hifsys, HIFSYS_DMA_AG_MAP,
BIT(hw->index), 0);
- } else {
+ else
dev->rev_id = wed_r32(dev, MTK_WED_REV_ID);
+
+ if (mtk_wed_get_rx_capa(dev))
ret = mtk_wed_wo_init(hw);
- }
out:
if (ret) {
dev_err(dev->hw->dev, "failed to attach wed device\n");
@@ -1601,6 +2486,23 @@ mtk_wed_tx_ring_setup(struct mtk_wed_device *dev, int idx, void __iomem *regs,
ring->reg_base = MTK_WED_RING_TX(idx);
ring->wpdma = regs;
+ if (mtk_wed_is_v3_or_greater(dev->hw) && idx == 1) {
+ /* reset prefetch index */
+ wed_set(dev, MTK_WED_WDMA_RX_PREF_CFG,
+ MTK_WED_WDMA_RX_PREF_RX0_SIDX_CLR |
+ MTK_WED_WDMA_RX_PREF_RX1_SIDX_CLR);
+
+ wed_clr(dev, MTK_WED_WDMA_RX_PREF_CFG,
+ MTK_WED_WDMA_RX_PREF_RX0_SIDX_CLR |
+ MTK_WED_WDMA_RX_PREF_RX1_SIDX_CLR);
+
+ /* reset prefetch FIFO */
+ wed_w32(dev, MTK_WED_WDMA_RX_PREF_FIFO_CFG,
+ MTK_WED_WDMA_RX_PREF_FIFO_RX0_CLR |
+ MTK_WED_WDMA_RX_PREF_FIFO_RX1_CLR);
+ wed_w32(dev, MTK_WED_WDMA_RX_PREF_FIFO_CFG, 0);
+ }
+
/* WED -> WPDMA */
wpdma_tx_w32(dev, idx, MTK_WED_RING_OFS_BASE, ring->desc_phys);
wpdma_tx_w32(dev, idx, MTK_WED_RING_OFS_COUNT, MTK_WED_TX_RING_SIZE);
@@ -1619,7 +2521,7 @@ static int
mtk_wed_txfree_ring_setup(struct mtk_wed_device *dev, void __iomem *regs)
{
struct mtk_wed_ring *ring = &dev->txfree_ring;
- int i, index = dev->hw->version == 1;
+ int i, index = mtk_wed_is_v1(dev->hw);
/*
* For txfree event handling, the same DMA ring is shared between WED
@@ -1675,15 +2577,13 @@ mtk_wed_rx_ring_setup(struct mtk_wed_device *dev, int idx, void __iomem *regs,
static u32
mtk_wed_irq_get(struct mtk_wed_device *dev, u32 mask)
{
- u32 val, ext_mask = MTK_WED_EXT_INT_STATUS_ERROR_MASK;
+ u32 val, ext_mask;
- if (dev->hw->version == 1)
- ext_mask |= MTK_WED_EXT_INT_STATUS_TX_DRV_R_RESP_ERR;
+ if (mtk_wed_is_v3_or_greater(dev->hw))
+ ext_mask = MTK_WED_EXT_INT_STATUS_RX_DRV_COHERENT |
+ MTK_WED_EXT_INT_STATUS_TKID_WO_PYLD;
else
- ext_mask |= MTK_WED_EXT_INT_STATUS_RX_FBUF_LO_TH |
- MTK_WED_EXT_INT_STATUS_RX_FBUF_HI_TH |
- MTK_WED_EXT_INT_STATUS_RX_DRV_COHERENT |
- MTK_WED_EXT_INT_STATUS_TX_DMA_W_RESP_ERR;
+ ext_mask = MTK_WED_EXT_INT_STATUS_ERROR_MASK;
val = wed_r32(dev, MTK_WED_EXT_INT_STATUS);
wed_w32(dev, MTK_WED_EXT_INT_STATUS, val);
@@ -1713,19 +2613,20 @@ mtk_wed_irq_set_mask(struct mtk_wed_device *dev, u32 mask)
int mtk_wed_flow_add(int index)
{
struct mtk_wed_hw *hw = hw_list[index];
- int ret;
+ int ret = 0;
- if (!hw || !hw->wed_dev)
- return -ENODEV;
+ mutex_lock(&hw_lock);
- if (hw->num_flows) {
- hw->num_flows++;
- return 0;
+ if (!hw || !hw->wed_dev) {
+ ret = -ENODEV;
+ goto out;
}
- mutex_lock(&hw_lock);
- if (!hw->wed_dev) {
- ret = -ENODEV;
+ if (!hw->wed_dev->wlan.offload_enable)
+ goto out;
+
+ if (hw->num_flows) {
+ hw->num_flows++;
goto out;
}
@@ -1744,14 +2645,15 @@ void mtk_wed_flow_remove(int index)
{
struct mtk_wed_hw *hw = hw_list[index];
- if (!hw)
- return;
+ mutex_lock(&hw_lock);
- if (--hw->num_flows)
- return;
+ if (!hw || !hw->wed_dev)
+ goto out;
- mutex_lock(&hw_lock);
- if (!hw->wed_dev)
+ if (!hw->wed_dev->wlan.offload_disable)
+ goto out;
+
+ if (--hw->num_flows)
goto out;
hw->wed_dev->wlan.offload_disable(hw->wed_dev);
@@ -1842,7 +2744,7 @@ mtk_wed_setup_tc(struct mtk_wed_device *wed, struct net_device *dev,
{
struct mtk_wed_hw *hw = wed->hw;
- if (hw->version < 2)
+ if (mtk_wed_is_v1(hw))
return -EOPNOTSUPP;
switch (type) {
@@ -1874,6 +2776,10 @@ void mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth,
.detach = mtk_wed_detach,
.ppe_check = mtk_wed_ppe_check,
.setup_tc = mtk_wed_setup_tc,
+ .start_hw_rro = mtk_wed_start_hw_rro,
+ .rro_rx_ring_setup = mtk_wed_rro_rx_ring_setup,
+ .msdu_pg_rx_ring_setup = mtk_wed_msdu_pg_rx_ring_setup,
+ .ind_rx_ring_setup = mtk_wed_ind_rx_ring_setup,
};
struct device_node *eth_np = eth->dev->of_node;
struct platform_device *pdev;
@@ -1916,9 +2822,17 @@ void mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth,
hw->wdma = wdma;
hw->index = index;
hw->irq = irq;
- hw->version = mtk_is_netsys_v1(eth) ? 1 : 2;
+ hw->version = eth->soc->version;
- if (hw->version == 1) {
+ switch (hw->version) {
+ case 2:
+ hw->soc = &mt7986_data;
+ break;
+ case 3:
+ hw->soc = &mt7988_data;
+ break;
+ default:
+ case 1:
hw->mirror = syscon_regmap_lookup_by_phandle(eth_np,
"mediatek,pcie-mirror");
hw->hifsys = syscon_regmap_lookup_by_phandle(eth_np,
@@ -1932,6 +2846,8 @@ void mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth,
regmap_write(hw->mirror, 0, 0);
regmap_write(hw->mirror, 4, 0);
}
+ hw->soc = &mt7622_data;
+ break;
}
mtk_wed_hw_add_debugfs(hw);
diff --git a/drivers/net/ethernet/mediatek/mtk_wed.h b/drivers/net/ethernet/mediatek/mtk_wed.h
index 43ab77eaf683..c1f0479d7a71 100644
--- a/drivers/net/ethernet/mediatek/mtk_wed.h
+++ b/drivers/net/ethernet/mediatek/mtk_wed.h
@@ -9,10 +9,29 @@
#include <linux/regmap.h>
#include <linux/netdevice.h>
+#include "mtk_wed_regs.h"
+
struct mtk_eth;
struct mtk_wed_wo;
+struct mtk_wed_soc_data {
+ struct {
+ u32 tx_bm_tkid;
+ u32 wpdma_rx_ring0;
+ u32 reset_idx_tx_mask;
+ u32 reset_idx_rx_mask;
+ } regmap;
+ u32 tx_ring_desc_size;
+ u32 wdma_desc_size;
+};
+
+struct mtk_wed_amsdu {
+ void *txd;
+ dma_addr_t txd_phy;
+};
+
struct mtk_wed_hw {
+ const struct mtk_wed_soc_data *soc;
struct device_node *node;
struct mtk_eth *eth;
struct regmap *regs;
@@ -24,6 +43,8 @@ struct mtk_wed_hw {
struct dentry *debugfs_dir;
struct mtk_wed_device *wed_dev;
struct mtk_wed_wo *wed_wo;
+ struct mtk_wed_amsdu *wed_amsdu;
+ u32 pcie_base;
u32 debugfs_reg;
u32 num_flows;
u8 version;
@@ -37,9 +58,30 @@ struct mtk_wdma_info {
u8 queue;
u16 wcid;
u8 bss;
+ u8 amsdu;
};
#ifdef CONFIG_NET_MEDIATEK_SOC_WED
+static inline bool mtk_wed_is_v1(struct mtk_wed_hw *hw)
+{
+ return hw->version == 1;
+}
+
+static inline bool mtk_wed_is_v2(struct mtk_wed_hw *hw)
+{
+ return hw->version == 2;
+}
+
+static inline bool mtk_wed_is_v3(struct mtk_wed_hw *hw)
+{
+ return hw->version == 3;
+}
+
+static inline bool mtk_wed_is_v3_or_greater(struct mtk_wed_hw *hw)
+{
+ return hw->version > 2;
+}
+
static inline void
wed_w32(struct mtk_wed_device *dev, u32 reg, u32 val)
{
@@ -122,6 +164,21 @@ wpdma_txfree_w32(struct mtk_wed_device *dev, u32 reg, u32 val)
writel(val, dev->txfree_ring.wpdma + reg);
}
+static inline u32 mtk_wed_get_pcie_base(struct mtk_wed_device *dev)
+{
+ if (!mtk_wed_is_v3_or_greater(dev->hw))
+ return MTK_WED_PCIE_BASE;
+
+ switch (dev->hw->index) {
+ case 1:
+ return MTK_WED_PCIE_BASE1;
+ case 2:
+ return MTK_WED_PCIE_BASE2;
+ default:
+ return MTK_WED_PCIE_BASE0;
+ }
+}
+
void mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth,
void __iomem *wdma, phys_addr_t wdma_phy,
int index);
diff --git a/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c b/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c
index e24afeaea0da..781c691473e1 100644
--- a/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c
+++ b/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c
@@ -11,6 +11,7 @@ struct reg_dump {
u16 offset;
u8 type;
u8 base;
+ u32 mask;
};
enum {
@@ -25,6 +26,8 @@ enum {
#define DUMP_STR(_str) { _str, 0, DUMP_TYPE_STRING }
#define DUMP_REG(_reg, ...) { #_reg, MTK_##_reg, __VA_ARGS__ }
+#define DUMP_REG_MASK(_reg, _mask) \
+ { #_mask, MTK_##_reg, DUMP_TYPE_WED, 0, MTK_##_mask }
#define DUMP_RING(_prefix, _base, ...) \
{ _prefix " BASE", _base, __VA_ARGS__ }, \
{ _prefix " CNT", _base + 0x4, __VA_ARGS__ }, \
@@ -32,6 +35,7 @@ enum {
{ _prefix " DIDX", _base + 0xc, __VA_ARGS__ }
#define DUMP_WED(_reg) DUMP_REG(_reg, DUMP_TYPE_WED)
+#define DUMP_WED_MASK(_reg, _mask) DUMP_REG_MASK(_reg, _mask)
#define DUMP_WED_RING(_base) DUMP_RING(#_base, MTK_##_base, DUMP_TYPE_WED)
#define DUMP_WDMA(_reg) DUMP_REG(_reg, DUMP_TYPE_WDMA)
@@ -151,7 +155,7 @@ DEFINE_SHOW_ATTRIBUTE(wed_txinfo);
static int
wed_rxinfo_show(struct seq_file *s, void *data)
{
- static const struct reg_dump regs[] = {
+ static const struct reg_dump regs_common[] = {
DUMP_STR("WPDMA RX"),
DUMP_WPDMA_RX_RING(0),
DUMP_WPDMA_RX_RING(1),
@@ -169,7 +173,7 @@ wed_rxinfo_show(struct seq_file *s, void *data)
DUMP_WED_RING(WED_RING_RX_DATA(0)),
DUMP_WED_RING(WED_RING_RX_DATA(1)),
- DUMP_STR("WED RRO"),
+ DUMP_STR("WED WO RRO"),
DUMP_WED_RRO_RING(WED_RROQM_MIOD_CTRL0),
DUMP_WED(WED_RROQM_MID_MIB),
DUMP_WED(WED_RROQM_MOD_MIB),
@@ -180,17 +184,6 @@ wed_rxinfo_show(struct seq_file *s, void *data)
DUMP_WED(WED_RROQM_FDBK_ANC_MIB),
DUMP_WED(WED_RROQM_FDBK_ANC2H_MIB),
- DUMP_STR("WED Route QM"),
- DUMP_WED(WED_RTQM_R2H_MIB(0)),
- DUMP_WED(WED_RTQM_R2Q_MIB(0)),
- DUMP_WED(WED_RTQM_Q2H_MIB(0)),
- DUMP_WED(WED_RTQM_R2H_MIB(1)),
- DUMP_WED(WED_RTQM_R2Q_MIB(1)),
- DUMP_WED(WED_RTQM_Q2H_MIB(1)),
- DUMP_WED(WED_RTQM_Q2N_MIB),
- DUMP_WED(WED_RTQM_Q2B_MIB),
- DUMP_WED(WED_RTQM_PFDBK_MIB),
-
DUMP_STR("WED WDMA TX"),
DUMP_WED(WED_WDMA_TX_MIB),
DUMP_WED_RING(WED_WDMA_RING_TX),
@@ -211,6 +204,287 @@ wed_rxinfo_show(struct seq_file *s, void *data)
DUMP_WED(WED_RX_BM_INTF),
DUMP_WED(WED_RX_BM_ERR_STS),
};
+ static const struct reg_dump regs_wed_v2[] = {
+ DUMP_STR("WED Route QM"),
+ DUMP_WED(WED_RTQM_R2H_MIB(0)),
+ DUMP_WED(WED_RTQM_R2Q_MIB(0)),
+ DUMP_WED(WED_RTQM_Q2H_MIB(0)),
+ DUMP_WED(WED_RTQM_R2H_MIB(1)),
+ DUMP_WED(WED_RTQM_R2Q_MIB(1)),
+ DUMP_WED(WED_RTQM_Q2H_MIB(1)),
+ DUMP_WED(WED_RTQM_Q2N_MIB),
+ DUMP_WED(WED_RTQM_Q2B_MIB),
+ DUMP_WED(WED_RTQM_PFDBK_MIB),
+ };
+ static const struct reg_dump regs_wed_v3[] = {
+ DUMP_STR("WED RX RRO DATA"),
+ DUMP_WED_RING(WED_RRO_RX_D_RX(0)),
+ DUMP_WED_RING(WED_RRO_RX_D_RX(1)),
+
+ DUMP_STR("WED RX MSDU PAGE"),
+ DUMP_WED_RING(WED_RRO_MSDU_PG_CTRL0(0)),
+ DUMP_WED_RING(WED_RRO_MSDU_PG_CTRL0(1)),
+ DUMP_WED_RING(WED_RRO_MSDU_PG_CTRL0(2)),
+
+ DUMP_STR("WED RX IND CMD"),
+ DUMP_WED(WED_IND_CMD_RX_CTRL1),
+ DUMP_WED_MASK(WED_IND_CMD_RX_CTRL2, WED_IND_CMD_MAX_CNT),
+ DUMP_WED_MASK(WED_IND_CMD_RX_CTRL0, WED_IND_CMD_PROC_IDX),
+ DUMP_WED_MASK(RRO_IND_CMD_SIGNATURE, RRO_IND_CMD_DMA_IDX),
+ DUMP_WED_MASK(WED_IND_CMD_RX_CTRL0, WED_IND_CMD_MAGIC_CNT),
+ DUMP_WED_MASK(RRO_IND_CMD_SIGNATURE, RRO_IND_CMD_MAGIC_CNT),
+ DUMP_WED_MASK(WED_IND_CMD_RX_CTRL0,
+ WED_IND_CMD_PREFETCH_FREE_CNT),
+ DUMP_WED_MASK(WED_RRO_CFG1, WED_RRO_CFG1_PARTICL_SE_ID),
+
+ DUMP_STR("WED ADDR ELEM"),
+ DUMP_WED(WED_ADDR_ELEM_CFG0),
+ DUMP_WED_MASK(WED_ADDR_ELEM_CFG1,
+ WED_ADDR_ELEM_PREFETCH_FREE_CNT),
+
+ DUMP_STR("WED Route QM"),
+ DUMP_WED(WED_RTQM_ENQ_I2Q_DMAD_CNT),
+ DUMP_WED(WED_RTQM_ENQ_I2N_DMAD_CNT),
+ DUMP_WED(WED_RTQM_ENQ_I2Q_PKT_CNT),
+ DUMP_WED(WED_RTQM_ENQ_I2N_PKT_CNT),
+ DUMP_WED(WED_RTQM_ENQ_USED_ENTRY_CNT),
+ DUMP_WED(WED_RTQM_ENQ_ERR_CNT),
+
+ DUMP_WED(WED_RTQM_DEQ_DMAD_CNT),
+ DUMP_WED(WED_RTQM_DEQ_Q2I_DMAD_CNT),
+ DUMP_WED(WED_RTQM_DEQ_PKT_CNT),
+ DUMP_WED(WED_RTQM_DEQ_Q2I_PKT_CNT),
+ DUMP_WED(WED_RTQM_DEQ_USED_PFDBK_CNT),
+ DUMP_WED(WED_RTQM_DEQ_ERR_CNT),
+ };
+ struct mtk_wed_hw *hw = s->private;
+ struct mtk_wed_device *dev = hw->wed_dev;
+
+ if (dev) {
+ dump_wed_regs(s, dev, regs_common, ARRAY_SIZE(regs_common));
+ if (mtk_wed_is_v2(hw))
+ dump_wed_regs(s, dev,
+ regs_wed_v2, ARRAY_SIZE(regs_wed_v2));
+ else
+ dump_wed_regs(s, dev,
+ regs_wed_v3, ARRAY_SIZE(regs_wed_v3));
+ }
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(wed_rxinfo);
+
+static int
+wed_amsdu_show(struct seq_file *s, void *data)
+{
+ static const struct reg_dump regs[] = {
+ DUMP_STR("WED AMDSU INFO"),
+ DUMP_WED(WED_MON_AMSDU_FIFO_DMAD),
+
+ DUMP_STR("WED AMDSU ENG0 INFO"),
+ DUMP_WED(WED_MON_AMSDU_ENG_DMAD(0)),
+ DUMP_WED(WED_MON_AMSDU_ENG_QFPL(0)),
+ DUMP_WED(WED_MON_AMSDU_ENG_QENI(0)),
+ DUMP_WED(WED_MON_AMSDU_ENG_QENO(0)),
+ DUMP_WED(WED_MON_AMSDU_ENG_MERG(0)),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(0),
+ WED_AMSDU_ENG_MAX_PL_CNT),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(0),
+ WED_AMSDU_ENG_MAX_QGPP_CNT),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(0),
+ WED_AMSDU_ENG_CUR_ENTRY),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(0),
+ WED_AMSDU_ENG_MAX_BUF_MERGED),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(0),
+ WED_AMSDU_ENG_MAX_MSDU_MERGED),
+
+ DUMP_STR("WED AMDSU ENG1 INFO"),
+ DUMP_WED(WED_MON_AMSDU_ENG_DMAD(1)),
+ DUMP_WED(WED_MON_AMSDU_ENG_QFPL(1)),
+ DUMP_WED(WED_MON_AMSDU_ENG_QENI(1)),
+ DUMP_WED(WED_MON_AMSDU_ENG_QENO(1)),
+ DUMP_WED(WED_MON_AMSDU_ENG_MERG(1)),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(1),
+ WED_AMSDU_ENG_MAX_PL_CNT),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(1),
+ WED_AMSDU_ENG_MAX_QGPP_CNT),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(1),
+ WED_AMSDU_ENG_CUR_ENTRY),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(2),
+ WED_AMSDU_ENG_MAX_BUF_MERGED),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(2),
+ WED_AMSDU_ENG_MAX_MSDU_MERGED),
+
+ DUMP_STR("WED AMDSU ENG2 INFO"),
+ DUMP_WED(WED_MON_AMSDU_ENG_DMAD(2)),
+ DUMP_WED(WED_MON_AMSDU_ENG_QFPL(2)),
+ DUMP_WED(WED_MON_AMSDU_ENG_QENI(2)),
+ DUMP_WED(WED_MON_AMSDU_ENG_QENO(2)),
+ DUMP_WED(WED_MON_AMSDU_ENG_MERG(2)),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(2),
+ WED_AMSDU_ENG_MAX_PL_CNT),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(2),
+ WED_AMSDU_ENG_MAX_QGPP_CNT),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(2),
+ WED_AMSDU_ENG_CUR_ENTRY),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(2),
+ WED_AMSDU_ENG_MAX_BUF_MERGED),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(2),
+ WED_AMSDU_ENG_MAX_MSDU_MERGED),
+
+ DUMP_STR("WED AMDSU ENG3 INFO"),
+ DUMP_WED(WED_MON_AMSDU_ENG_DMAD(3)),
+ DUMP_WED(WED_MON_AMSDU_ENG_QFPL(3)),
+ DUMP_WED(WED_MON_AMSDU_ENG_QENI(3)),
+ DUMP_WED(WED_MON_AMSDU_ENG_QENO(3)),
+ DUMP_WED(WED_MON_AMSDU_ENG_MERG(3)),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(3),
+ WED_AMSDU_ENG_MAX_PL_CNT),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(3),
+ WED_AMSDU_ENG_MAX_QGPP_CNT),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(3),
+ WED_AMSDU_ENG_CUR_ENTRY),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(3),
+ WED_AMSDU_ENG_MAX_BUF_MERGED),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(3),
+ WED_AMSDU_ENG_MAX_MSDU_MERGED),
+
+ DUMP_STR("WED AMDSU ENG4 INFO"),
+ DUMP_WED(WED_MON_AMSDU_ENG_DMAD(4)),
+ DUMP_WED(WED_MON_AMSDU_ENG_QFPL(4)),
+ DUMP_WED(WED_MON_AMSDU_ENG_QENI(4)),
+ DUMP_WED(WED_MON_AMSDU_ENG_QENO(4)),
+ DUMP_WED(WED_MON_AMSDU_ENG_MERG(4)),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(4),
+ WED_AMSDU_ENG_MAX_PL_CNT),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(4),
+ WED_AMSDU_ENG_MAX_QGPP_CNT),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(4),
+ WED_AMSDU_ENG_CUR_ENTRY),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(4),
+ WED_AMSDU_ENG_MAX_BUF_MERGED),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(4),
+ WED_AMSDU_ENG_MAX_MSDU_MERGED),
+
+ DUMP_STR("WED AMDSU ENG5 INFO"),
+ DUMP_WED(WED_MON_AMSDU_ENG_DMAD(5)),
+ DUMP_WED(WED_MON_AMSDU_ENG_QFPL(5)),
+ DUMP_WED(WED_MON_AMSDU_ENG_QENI(5)),
+ DUMP_WED(WED_MON_AMSDU_ENG_QENO(5)),
+ DUMP_WED(WED_MON_AMSDU_ENG_MERG(5)),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(5),
+ WED_AMSDU_ENG_MAX_PL_CNT),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(5),
+ WED_AMSDU_ENG_MAX_QGPP_CNT),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(5),
+ WED_AMSDU_ENG_CUR_ENTRY),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(5),
+ WED_AMSDU_ENG_MAX_BUF_MERGED),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(5),
+ WED_AMSDU_ENG_MAX_MSDU_MERGED),
+
+ DUMP_STR("WED AMDSU ENG6 INFO"),
+ DUMP_WED(WED_MON_AMSDU_ENG_DMAD(6)),
+ DUMP_WED(WED_MON_AMSDU_ENG_QFPL(6)),
+ DUMP_WED(WED_MON_AMSDU_ENG_QENI(6)),
+ DUMP_WED(WED_MON_AMSDU_ENG_QENO(6)),
+ DUMP_WED(WED_MON_AMSDU_ENG_MERG(6)),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(6),
+ WED_AMSDU_ENG_MAX_PL_CNT),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(6),
+ WED_AMSDU_ENG_MAX_QGPP_CNT),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(6),
+ WED_AMSDU_ENG_CUR_ENTRY),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(6),
+ WED_AMSDU_ENG_MAX_BUF_MERGED),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(6),
+ WED_AMSDU_ENG_MAX_MSDU_MERGED),
+
+ DUMP_STR("WED AMDSU ENG7 INFO"),
+ DUMP_WED(WED_MON_AMSDU_ENG_DMAD(7)),
+ DUMP_WED(WED_MON_AMSDU_ENG_QFPL(7)),
+ DUMP_WED(WED_MON_AMSDU_ENG_QENI(7)),
+ DUMP_WED(WED_MON_AMSDU_ENG_QENO(7)),
+ DUMP_WED(WED_MON_AMSDU_ENG_MERG(7)),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(7),
+ WED_AMSDU_ENG_MAX_PL_CNT),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(7),
+ WED_AMSDU_ENG_MAX_QGPP_CNT),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(7),
+ WED_AMSDU_ENG_CUR_ENTRY),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(7),
+ WED_AMSDU_ENG_MAX_BUF_MERGED),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(4),
+ WED_AMSDU_ENG_MAX_MSDU_MERGED),
+
+ DUMP_STR("WED AMDSU ENG8 INFO"),
+ DUMP_WED(WED_MON_AMSDU_ENG_DMAD(8)),
+ DUMP_WED(WED_MON_AMSDU_ENG_QFPL(8)),
+ DUMP_WED(WED_MON_AMSDU_ENG_QENI(8)),
+ DUMP_WED(WED_MON_AMSDU_ENG_QENO(8)),
+ DUMP_WED(WED_MON_AMSDU_ENG_MERG(8)),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(8),
+ WED_AMSDU_ENG_MAX_PL_CNT),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(8),
+ WED_AMSDU_ENG_MAX_QGPP_CNT),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(8),
+ WED_AMSDU_ENG_CUR_ENTRY),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(8),
+ WED_AMSDU_ENG_MAX_BUF_MERGED),
+ DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(8),
+ WED_AMSDU_ENG_MAX_MSDU_MERGED),
+
+ DUMP_STR("WED QMEM INFO"),
+ DUMP_WED_MASK(WED_MON_AMSDU_QMEM_CNT(0), WED_AMSDU_QMEM_FQ_CNT),
+ DUMP_WED_MASK(WED_MON_AMSDU_QMEM_CNT(0), WED_AMSDU_QMEM_SP_QCNT),
+ DUMP_WED_MASK(WED_MON_AMSDU_QMEM_CNT(1), WED_AMSDU_QMEM_TID0_QCNT),
+ DUMP_WED_MASK(WED_MON_AMSDU_QMEM_CNT(1), WED_AMSDU_QMEM_TID1_QCNT),
+ DUMP_WED_MASK(WED_MON_AMSDU_QMEM_CNT(2), WED_AMSDU_QMEM_TID2_QCNT),
+ DUMP_WED_MASK(WED_MON_AMSDU_QMEM_CNT(2), WED_AMSDU_QMEM_TID3_QCNT),
+ DUMP_WED_MASK(WED_MON_AMSDU_QMEM_CNT(3), WED_AMSDU_QMEM_TID4_QCNT),
+ DUMP_WED_MASK(WED_MON_AMSDU_QMEM_CNT(3), WED_AMSDU_QMEM_TID5_QCNT),
+ DUMP_WED_MASK(WED_MON_AMSDU_QMEM_CNT(4), WED_AMSDU_QMEM_TID6_QCNT),
+ DUMP_WED_MASK(WED_MON_AMSDU_QMEM_CNT(4), WED_AMSDU_QMEM_TID7_QCNT),
+
+ DUMP_STR("WED QMEM HEAD INFO"),
+ DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(0), WED_AMSDU_QMEM_FQ_HEAD),
+ DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(0), WED_AMSDU_QMEM_SP_QHEAD),
+ DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(1), WED_AMSDU_QMEM_TID0_QHEAD),
+ DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(1), WED_AMSDU_QMEM_TID1_QHEAD),
+ DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(2), WED_AMSDU_QMEM_TID2_QHEAD),
+ DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(2), WED_AMSDU_QMEM_TID3_QHEAD),
+ DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(3), WED_AMSDU_QMEM_TID4_QHEAD),
+ DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(3), WED_AMSDU_QMEM_TID5_QHEAD),
+ DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(4), WED_AMSDU_QMEM_TID6_QHEAD),
+ DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(4), WED_AMSDU_QMEM_TID7_QHEAD),
+
+ DUMP_STR("WED QMEM TAIL INFO"),
+ DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(5), WED_AMSDU_QMEM_FQ_TAIL),
+ DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(5), WED_AMSDU_QMEM_SP_QTAIL),
+ DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(6), WED_AMSDU_QMEM_TID0_QTAIL),
+ DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(6), WED_AMSDU_QMEM_TID1_QTAIL),
+ DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(7), WED_AMSDU_QMEM_TID2_QTAIL),
+ DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(7), WED_AMSDU_QMEM_TID3_QTAIL),
+ DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(8), WED_AMSDU_QMEM_TID4_QTAIL),
+ DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(8), WED_AMSDU_QMEM_TID5_QTAIL),
+ DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(9), WED_AMSDU_QMEM_TID6_QTAIL),
+ DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(9), WED_AMSDU_QMEM_TID7_QTAIL),
+
+ DUMP_STR("WED HIFTXD MSDU INFO"),
+ DUMP_WED(WED_MON_AMSDU_HIFTXD_FETCH_MSDU(1)),
+ DUMP_WED(WED_MON_AMSDU_HIFTXD_FETCH_MSDU(2)),
+ DUMP_WED(WED_MON_AMSDU_HIFTXD_FETCH_MSDU(3)),
+ DUMP_WED(WED_MON_AMSDU_HIFTXD_FETCH_MSDU(4)),
+ DUMP_WED(WED_MON_AMSDU_HIFTXD_FETCH_MSDU(5)),
+ DUMP_WED(WED_MON_AMSDU_HIFTXD_FETCH_MSDU(6)),
+ DUMP_WED(WED_MON_AMSDU_HIFTXD_FETCH_MSDU(7)),
+ DUMP_WED(WED_MON_AMSDU_HIFTXD_FETCH_MSDU(8)),
+ DUMP_WED(WED_MON_AMSDU_HIFTXD_FETCH_MSDU(9)),
+ DUMP_WED(WED_MON_AMSDU_HIFTXD_FETCH_MSDU(10)),
+ DUMP_WED(WED_MON_AMSDU_HIFTXD_FETCH_MSDU(11)),
+ DUMP_WED(WED_MON_AMSDU_HIFTXD_FETCH_MSDU(12)),
+ DUMP_WED(WED_MON_AMSDU_HIFTXD_FETCH_MSDU(13)),
+ };
struct mtk_wed_hw *hw = s->private;
struct mtk_wed_device *dev = hw->wed_dev;
@@ -219,7 +493,94 @@ wed_rxinfo_show(struct seq_file *s, void *data)
return 0;
}
-DEFINE_SHOW_ATTRIBUTE(wed_rxinfo);
+DEFINE_SHOW_ATTRIBUTE(wed_amsdu);
+
+static int
+wed_rtqm_show(struct seq_file *s, void *data)
+{
+ static const struct reg_dump regs[] = {
+ DUMP_STR("WED Route QM IGRS0(N2H + Recycle)"),
+ DUMP_WED(WED_RTQM_IGRS0_I2HW_DMAD_CNT),
+ DUMP_WED(WED_RTQM_IGRS0_I2H_DMAD_CNT(0)),
+ DUMP_WED(WED_RTQM_IGRS0_I2H_DMAD_CNT(1)),
+ DUMP_WED(WED_RTQM_IGRS0_I2HW_PKT_CNT),
+ DUMP_WED(WED_RTQM_IGRS0_I2H_PKT_CNT(0)),
+ DUMP_WED(WED_RTQM_IGRS0_I2H_PKT_CNT(0)),
+ DUMP_WED(WED_RTQM_IGRS0_FDROP_CNT),
+
+ DUMP_STR("WED Route QM IGRS1(Legacy)"),
+ DUMP_WED(WED_RTQM_IGRS1_I2HW_DMAD_CNT),
+ DUMP_WED(WED_RTQM_IGRS1_I2H_DMAD_CNT(0)),
+ DUMP_WED(WED_RTQM_IGRS1_I2H_DMAD_CNT(1)),
+ DUMP_WED(WED_RTQM_IGRS1_I2HW_PKT_CNT),
+ DUMP_WED(WED_RTQM_IGRS1_I2H_PKT_CNT(0)),
+ DUMP_WED(WED_RTQM_IGRS1_I2H_PKT_CNT(1)),
+ DUMP_WED(WED_RTQM_IGRS1_FDROP_CNT),
+
+ DUMP_STR("WED Route QM IGRS2(RRO3.0)"),
+ DUMP_WED(WED_RTQM_IGRS2_I2HW_DMAD_CNT),
+ DUMP_WED(WED_RTQM_IGRS2_I2H_DMAD_CNT(0)),
+ DUMP_WED(WED_RTQM_IGRS2_I2H_DMAD_CNT(1)),
+ DUMP_WED(WED_RTQM_IGRS2_I2HW_PKT_CNT),
+ DUMP_WED(WED_RTQM_IGRS2_I2H_PKT_CNT(0)),
+ DUMP_WED(WED_RTQM_IGRS2_I2H_PKT_CNT(1)),
+ DUMP_WED(WED_RTQM_IGRS2_FDROP_CNT),
+
+ DUMP_STR("WED Route QM IGRS3(DEBUG)"),
+ DUMP_WED(WED_RTQM_IGRS2_I2HW_DMAD_CNT),
+ DUMP_WED(WED_RTQM_IGRS3_I2H_DMAD_CNT(0)),
+ DUMP_WED(WED_RTQM_IGRS3_I2H_DMAD_CNT(1)),
+ DUMP_WED(WED_RTQM_IGRS3_I2HW_PKT_CNT),
+ DUMP_WED(WED_RTQM_IGRS3_I2H_PKT_CNT(0)),
+ DUMP_WED(WED_RTQM_IGRS3_I2H_PKT_CNT(1)),
+ DUMP_WED(WED_RTQM_IGRS3_FDROP_CNT),
+ };
+ struct mtk_wed_hw *hw = s->private;
+ struct mtk_wed_device *dev = hw->wed_dev;
+
+ if (dev)
+ dump_wed_regs(s, dev, regs, ARRAY_SIZE(regs));
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(wed_rtqm);
+
+static int
+wed_rro_show(struct seq_file *s, void *data)
+{
+ static const struct reg_dump regs[] = {
+ DUMP_STR("RRO/IND CMD CNT"),
+ DUMP_WED(WED_RX_IND_CMD_CNT(1)),
+ DUMP_WED(WED_RX_IND_CMD_CNT(2)),
+ DUMP_WED(WED_RX_IND_CMD_CNT(3)),
+ DUMP_WED(WED_RX_IND_CMD_CNT(4)),
+ DUMP_WED(WED_RX_IND_CMD_CNT(5)),
+ DUMP_WED(WED_RX_IND_CMD_CNT(6)),
+ DUMP_WED(WED_RX_IND_CMD_CNT(7)),
+ DUMP_WED(WED_RX_IND_CMD_CNT(8)),
+ DUMP_WED_MASK(WED_RX_IND_CMD_CNT(9),
+ WED_IND_CMD_MAGIC_CNT_FAIL_CNT),
+
+ DUMP_WED(WED_RX_ADDR_ELEM_CNT(0)),
+ DUMP_WED_MASK(WED_RX_ADDR_ELEM_CNT(1),
+ WED_ADDR_ELEM_SIG_FAIL_CNT),
+ DUMP_WED(WED_RX_MSDU_PG_CNT(1)),
+ DUMP_WED(WED_RX_MSDU_PG_CNT(2)),
+ DUMP_WED(WED_RX_MSDU_PG_CNT(3)),
+ DUMP_WED(WED_RX_MSDU_PG_CNT(4)),
+ DUMP_WED(WED_RX_MSDU_PG_CNT(5)),
+ DUMP_WED_MASK(WED_RX_PN_CHK_CNT,
+ WED_PN_CHK_FAIL_CNT),
+ };
+ struct mtk_wed_hw *hw = s->private;
+ struct mtk_wed_device *dev = hw->wed_dev;
+
+ if (dev)
+ dump_wed_regs(s, dev, regs, ARRAY_SIZE(regs));
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(wed_rro);
static int
mtk_wed_reg_set(void *data, u64 val)
@@ -261,7 +622,16 @@ void mtk_wed_hw_add_debugfs(struct mtk_wed_hw *hw)
debugfs_create_u32("regidx", 0600, dir, &hw->debugfs_reg);
debugfs_create_file_unsafe("regval", 0600, dir, hw, &fops_regval);
debugfs_create_file_unsafe("txinfo", 0400, dir, hw, &wed_txinfo_fops);
- if (hw->version != 1)
+ if (!mtk_wed_is_v1(hw)) {
debugfs_create_file_unsafe("rxinfo", 0400, dir, hw,
&wed_rxinfo_fops);
+ if (mtk_wed_is_v3_or_greater(hw)) {
+ debugfs_create_file_unsafe("amsdu", 0400, dir, hw,
+ &wed_amsdu_fops);
+ debugfs_create_file_unsafe("rtqm", 0400, dir, hw,
+ &wed_rtqm_fops);
+ debugfs_create_file_unsafe("rro", 0400, dir, hw,
+ &wed_rro_fops);
+ }
+ }
}
diff --git a/drivers/net/ethernet/mediatek/mtk_wed_mcu.c b/drivers/net/ethernet/mediatek/mtk_wed_mcu.c
index 071ed3dea860..65a78e274009 100644
--- a/drivers/net/ethernet/mediatek/mtk_wed_mcu.c
+++ b/drivers/net/ethernet/mediatek/mtk_wed_mcu.c
@@ -16,14 +16,30 @@
#include "mtk_wed_wo.h"
#include "mtk_wed.h"
+static struct mtk_wed_wo_memory_region mem_region[] = {
+ [MTK_WED_WO_REGION_EMI] = {
+ .name = "wo-emi",
+ },
+ [MTK_WED_WO_REGION_ILM] = {
+ .name = "wo-ilm",
+ },
+ [MTK_WED_WO_REGION_DATA] = {
+ .name = "wo-data",
+ .shared = true,
+ },
+ [MTK_WED_WO_REGION_BOOT] = {
+ .name = "wo-boot",
+ },
+};
+
static u32 wo_r32(struct mtk_wed_wo *wo, u32 reg)
{
- return readl(wo->boot.addr + reg);
+ return readl(mem_region[MTK_WED_WO_REGION_BOOT].addr + reg);
}
static void wo_w32(struct mtk_wed_wo *wo, u32 reg, u32 val)
{
- writel(val, wo->boot.addr + reg);
+ writel(val, mem_region[MTK_WED_WO_REGION_BOOT].addr + reg);
}
static struct sk_buff *
@@ -68,6 +84,9 @@ mtk_wed_update_rx_stats(struct mtk_wed_device *wed, struct sk_buff *skb)
struct mtk_wed_wo_rx_stats *stats;
int i;
+ if (!wed->wlan.update_wo_rx_stats)
+ return;
+
if (count * sizeof(*stats) > skb->len - sizeof(u32))
return;
@@ -204,7 +223,7 @@ int mtk_wed_mcu_msg_update(struct mtk_wed_device *dev, int id, void *data,
{
struct mtk_wed_wo *wo = dev->hw->wed_wo;
- if (dev->hw->version == 1)
+ if (!mtk_wed_get_rx_capa(dev))
return 0;
if (WARN_ON(!wo))
@@ -215,19 +234,13 @@ int mtk_wed_mcu_msg_update(struct mtk_wed_device *dev, int id, void *data,
}
static int
-mtk_wed_get_memory_region(struct mtk_wed_wo *wo,
+mtk_wed_get_memory_region(struct mtk_wed_hw *hw, int index,
struct mtk_wed_wo_memory_region *region)
{
struct reserved_mem *rmem;
struct device_node *np;
- int index;
-
- index = of_property_match_string(wo->hw->node, "memory-region-names",
- region->name);
- if (index < 0)
- return index;
- np = of_parse_phandle(wo->hw->node, "memory-region", index);
+ np = of_parse_phandle(hw->node, "memory-region", index);
if (!np)
return -ENODEV;
@@ -239,7 +252,7 @@ mtk_wed_get_memory_region(struct mtk_wed_wo *wo,
region->phy_addr = rmem->base;
region->size = rmem->size;
- region->addr = devm_ioremap(wo->hw->dev, region->phy_addr, region->size);
+ region->addr = devm_ioremap(hw->dev, region->phy_addr, region->size);
return !region->addr ? -EINVAL : 0;
}
@@ -252,6 +265,9 @@ mtk_wed_mcu_run_firmware(struct mtk_wed_wo *wo, const struct firmware *fw,
const struct mtk_wed_fw_trailer *trailer;
const struct mtk_wed_fw_region *fw_region;
+ if (!region->phy_addr || !region->size)
+ return 0;
+
trailer_ptr = fw->data + fw->size - sizeof(*trailer);
trailer = (const struct mtk_wed_fw_trailer *)trailer_ptr;
region_ptr = trailer_ptr - trailer->num_region * sizeof(*fw_region);
@@ -291,18 +307,6 @@ next:
static int
mtk_wed_mcu_load_firmware(struct mtk_wed_wo *wo)
{
- static struct mtk_wed_wo_memory_region mem_region[] = {
- [MTK_WED_WO_REGION_EMI] = {
- .name = "wo-emi",
- },
- [MTK_WED_WO_REGION_ILM] = {
- .name = "wo-ilm",
- },
- [MTK_WED_WO_REGION_DATA] = {
- .name = "wo-data",
- .shared = true,
- },
- };
const struct mtk_wed_fw_trailer *trailer;
const struct firmware *fw;
const char *fw_name;
@@ -311,25 +315,38 @@ mtk_wed_mcu_load_firmware(struct mtk_wed_wo *wo)
/* load firmware region metadata */
for (i = 0; i < ARRAY_SIZE(mem_region); i++) {
- ret = mtk_wed_get_memory_region(wo, &mem_region[i]);
+ int index = of_property_match_string(wo->hw->node,
+ "memory-region-names",
+ mem_region[i].name);
+ if (index < 0)
+ continue;
+
+ ret = mtk_wed_get_memory_region(wo->hw, index, &mem_region[i]);
if (ret)
return ret;
}
- wo->boot.name = "wo-boot";
- ret = mtk_wed_get_memory_region(wo, &wo->boot);
- if (ret)
- return ret;
-
/* set dummy cr */
wed_w32(wo->hw->wed_dev, MTK_WED_SCR0 + 4 * MTK_WED_DUMMY_CR_FWDL,
wo->hw->index + 1);
/* load firmware */
- if (of_device_is_compatible(wo->hw->node, "mediatek,mt7981-wed"))
- fw_name = MT7981_FIRMWARE_WO;
- else
- fw_name = wo->hw->index ? MT7986_FIRMWARE_WO1 : MT7986_FIRMWARE_WO0;
+ switch (wo->hw->version) {
+ case 2:
+ if (of_device_is_compatible(wo->hw->node,
+ "mediatek,mt7981-wed"))
+ fw_name = MT7981_FIRMWARE_WO;
+ else
+ fw_name = wo->hw->index ? MT7986_FIRMWARE_WO1
+ : MT7986_FIRMWARE_WO0;
+ break;
+ case 3:
+ fw_name = wo->hw->index ? MT7988_FIRMWARE_WO1
+ : MT7988_FIRMWARE_WO0;
+ break;
+ default:
+ return -EINVAL;
+ }
ret = request_firmware(&fw, fw_name, wo->hw->dev);
if (ret)
@@ -350,15 +367,16 @@ mtk_wed_mcu_load_firmware(struct mtk_wed_wo *wo)
}
/* set the start address */
- boot_cr = wo->hw->index ? MTK_WO_MCU_CFG_LS_WA_BOOT_ADDR_ADDR
- : MTK_WO_MCU_CFG_LS_WM_BOOT_ADDR_ADDR;
+ if (!mtk_wed_is_v3_or_greater(wo->hw) && wo->hw->index)
+ boot_cr = MTK_WO_MCU_CFG_LS_WA_BOOT_ADDR_ADDR;
+ else
+ boot_cr = MTK_WO_MCU_CFG_LS_WM_BOOT_ADDR_ADDR;
wo_w32(wo, boot_cr, mem_region[MTK_WED_WO_REGION_EMI].phy_addr >> 16);
/* wo firmware reset */
wo_w32(wo, MTK_WO_MCU_CFG_LS_WF_MCCR_CLR_ADDR, 0xc00);
- val = wo_r32(wo, MTK_WO_MCU_CFG_LS_WF_MCU_CFG_WM_WA_ADDR);
- val |= wo->hw->index ? MTK_WO_MCU_CFG_LS_WF_WM_WA_WA_CPU_RSTB_MASK
- : MTK_WO_MCU_CFG_LS_WF_WM_WA_WM_CPU_RSTB_MASK;
+ val = wo_r32(wo, MTK_WO_MCU_CFG_LS_WF_MCU_CFG_WM_WA_ADDR) |
+ MTK_WO_MCU_CFG_LS_WF_WM_WA_WM_CPU_RSTB_MASK;
wo_w32(wo, MTK_WO_MCU_CFG_LS_WF_MCU_CFG_WM_WA_ADDR, val);
out:
release_firmware(fw);
@@ -393,3 +411,5 @@ int mtk_wed_mcu_init(struct mtk_wed_wo *wo)
MODULE_FIRMWARE(MT7981_FIRMWARE_WO);
MODULE_FIRMWARE(MT7986_FIRMWARE_WO0);
MODULE_FIRMWARE(MT7986_FIRMWARE_WO1);
+MODULE_FIRMWARE(MT7988_FIRMWARE_WO0);
+MODULE_FIRMWARE(MT7988_FIRMWARE_WO1);
diff --git a/drivers/net/ethernet/mediatek/mtk_wed_regs.h b/drivers/net/ethernet/mediatek/mtk_wed_regs.h
index 47ea69feb3b2..c71190924816 100644
--- a/drivers/net/ethernet/mediatek/mtk_wed_regs.h
+++ b/drivers/net/ethernet/mediatek/mtk_wed_regs.h
@@ -13,6 +13,9 @@
#define MTK_WDMA_DESC_CTRL_LAST_SEG0 BIT(30)
#define MTK_WDMA_DESC_CTRL_DMA_DONE BIT(31)
+#define MTK_WDMA_TXD0_DESC_INFO_DMA_DONE BIT(29)
+#define MTK_WDMA_TXD1_DESC_INFO_DMA_DONE BIT(31)
+
struct mtk_wdma_desc {
__le32 buf0;
__le32 ctrl;
@@ -25,6 +28,8 @@ struct mtk_wdma_desc {
#define MTK_WED_RESET 0x008
#define MTK_WED_RESET_TX_BM BIT(0)
#define MTK_WED_RESET_RX_BM BIT(1)
+#define MTK_WED_RESET_RX_PG_BM BIT(2)
+#define MTK_WED_RESET_RRO_RX_TO_PG BIT(3)
#define MTK_WED_RESET_TX_FREE_AGENT BIT(4)
#define MTK_WED_RESET_WPDMA_TX_DRV BIT(8)
#define MTK_WED_RESET_WPDMA_RX_DRV BIT(9)
@@ -37,6 +42,7 @@ struct mtk_wdma_desc {
#define MTK_WED_RESET_WDMA_INT_AGENT BIT(19)
#define MTK_WED_RESET_RX_RRO_QM BIT(20)
#define MTK_WED_RESET_RX_ROUTE_QM BIT(21)
+#define MTK_WED_RESET_TX_AMSDU BIT(22)
#define MTK_WED_RESET_WED BIT(31)
#define MTK_WED_CTRL 0x00c
@@ -44,6 +50,9 @@ struct mtk_wdma_desc {
#define MTK_WED_CTRL_WPDMA_INT_AGENT_BUSY BIT(1)
#define MTK_WED_CTRL_WDMA_INT_AGENT_EN BIT(2)
#define MTK_WED_CTRL_WDMA_INT_AGENT_BUSY BIT(3)
+#define MTK_WED_CTRL_WED_RX_IND_CMD_EN BIT(5)
+#define MTK_WED_CTRL_WED_RX_PG_BM_EN BIT(6)
+#define MTK_WED_CTRL_WED_RX_PG_BM_BUSY BIT(7)
#define MTK_WED_CTRL_WED_TX_BM_EN BIT(8)
#define MTK_WED_CTRL_WED_TX_BM_BUSY BIT(9)
#define MTK_WED_CTRL_WED_TX_FREE_AGENT_EN BIT(10)
@@ -54,9 +63,14 @@ struct mtk_wdma_desc {
#define MTK_WED_CTRL_RX_RRO_QM_BUSY BIT(15)
#define MTK_WED_CTRL_RX_ROUTE_QM_EN BIT(16)
#define MTK_WED_CTRL_RX_ROUTE_QM_BUSY BIT(17)
+#define MTK_WED_CTRL_TX_TKID_ALI_EN BIT(20)
+#define MTK_WED_CTRL_TX_TKID_ALI_BUSY BIT(21)
+#define MTK_WED_CTRL_TX_AMSDU_EN BIT(22)
+#define MTK_WED_CTRL_TX_AMSDU_BUSY BIT(23)
#define MTK_WED_CTRL_FINAL_DIDX_READ BIT(24)
#define MTK_WED_CTRL_ETH_DMAD_FMT BIT(25)
#define MTK_WED_CTRL_MIB_READ_CLEAR BIT(28)
+#define MTK_WED_CTRL_FLD_MIB_RD_CLR BIT(28)
#define MTK_WED_EXT_INT_STATUS 0x020
#define MTK_WED_EXT_INT_STATUS_TF_LEN_ERR BIT(0)
@@ -64,8 +78,8 @@ struct mtk_wdma_desc {
#define MTK_WED_EXT_INT_STATUS_TKID_TITO_INVALID BIT(4)
#define MTK_WED_EXT_INT_STATUS_TX_FBUF_LO_TH BIT(8)
#define MTK_WED_EXT_INT_STATUS_TX_FBUF_HI_TH BIT(9)
-#define MTK_WED_EXT_INT_STATUS_RX_FBUF_LO_TH BIT(12)
-#define MTK_WED_EXT_INT_STATUS_RX_FBUF_HI_TH BIT(13)
+#define MTK_WED_EXT_INT_STATUS_RX_FBUF_LO_TH BIT(10) /* wed v2 */
+#define MTK_WED_EXT_INT_STATUS_RX_FBUF_HI_TH BIT(11) /* wed v2 */
#define MTK_WED_EXT_INT_STATUS_RX_DRV_R_RESP_ERR BIT(16)
#define MTK_WED_EXT_INT_STATUS_RX_DRV_W_RESP_ERR BIT(17)
#define MTK_WED_EXT_INT_STATUS_RX_DRV_COHERENT BIT(18)
@@ -89,19 +103,26 @@ struct mtk_wdma_desc {
#define MTK_WED_EXT_INT_MASK 0x028
#define MTK_WED_EXT_INT_MASK1 0x02c
#define MTK_WED_EXT_INT_MASK2 0x030
+#define MTK_WED_EXT_INT_MASK3 0x034
#define MTK_WED_STATUS 0x060
#define MTK_WED_STATUS_TX GENMASK(15, 8)
+#define MTK_WED_WPDMA_STATUS 0x068
+#define MTK_WED_WPDMA_STATUS_TX_DRV GENMASK(15, 8)
+
#define MTK_WED_TX_BM_CTRL 0x080
#define MTK_WED_TX_BM_CTRL_VLD_GRP_NUM GENMASK(6, 0)
#define MTK_WED_TX_BM_CTRL_RSV_GRP_NUM GENMASK(22, 16)
+#define MTK_WED_TX_BM_CTRL_LEGACY_EN BIT(26)
+#define MTK_WED_TX_TKID_CTRL_FREE_FORMAT BIT(27)
#define MTK_WED_TX_BM_CTRL_PAUSE BIT(28)
#define MTK_WED_TX_BM_BASE 0x084
+#define MTK_WED_TX_BM_INIT_PTR 0x088
+#define MTK_WED_TX_BM_SW_TAIL_IDX GENMASK(16, 0)
+#define MTK_WED_TX_BM_INIT_SW_TAIL_IDX BIT(16)
-#define MTK_WED_TX_BM_TKID 0x088
-#define MTK_WED_TX_BM_TKID_V2 0x0c8
#define MTK_WED_TX_BM_TKID_START GENMASK(15, 0)
#define MTK_WED_TX_BM_TKID_END GENMASK(31, 16)
@@ -124,6 +145,12 @@ struct mtk_wdma_desc {
#define MTK_WED_TX_TKID_CTRL_RSV_GRP_NUM GENMASK(22, 16)
#define MTK_WED_TX_TKID_CTRL_PAUSE BIT(28)
+#define MTK_WED_TX_TKID_INTF 0x0dc
+#define MTK_WED_TX_TKID_INTF_TKFIFO_FDEP GENMASK(25, 16)
+
+#define MTK_WED_TX_TKID_CTRL_VLD_GRP_NUM_V3 GENMASK(7, 0)
+#define MTK_WED_TX_TKID_CTRL_RSV_GRP_NUM_V3 GENMASK(23, 16)
+
#define MTK_WED_TX_TKID_DYN_THR 0x0e0
#define MTK_WED_TX_TKID_DYN_THR_LO GENMASK(6, 0)
#define MTK_WED_TX_TKID_DYN_THR_HI GENMASK(22, 16)
@@ -160,9 +187,6 @@ struct mtk_wdma_desc {
#define MTK_WED_GLO_CFG_RX_2B_OFFSET BIT(31)
#define MTK_WED_RESET_IDX 0x20c
-#define MTK_WED_RESET_IDX_TX GENMASK(3, 0)
-#define MTK_WED_RESET_IDX_RX GENMASK(17, 16)
-#define MTK_WED_RESET_IDX_RX_V2 GENMASK(7, 6)
#define MTK_WED_RESET_WPDMA_IDX_RX GENMASK(31, 30)
#define MTK_WED_TX_MIB(_n) (0x2a0 + (_n) * 4)
@@ -174,6 +198,7 @@ struct mtk_wdma_desc {
#define MTK_WED_RING_RX_DATA(_n) (0x420 + (_n) * 0x10)
#define MTK_WED_SCR0 0x3c0
+#define MTK_WED_RX1_CTRL2 0x418
#define MTK_WED_WPDMA_INT_TRIGGER 0x504
#define MTK_WED_WPDMA_INT_TRIGGER_RX_DONE BIT(1)
#define MTK_WED_WPDMA_INT_TRIGGER_TX_DONE GENMASK(5, 4)
@@ -204,12 +229,15 @@ struct mtk_wdma_desc {
#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_R1_PKT_PROC BIT(5)
#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_R0_CRX_SYNC BIT(6)
#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_R1_CRX_SYNC BIT(7)
-#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_EVENT_PKT_FMT_VER GENMASK(18, 16)
+#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_EVENT_PKT_FMT_VER GENMASK(15, 12)
+#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_UNS_VER_FORCE_4 BIT(18)
#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_UNSUPPORT_FMT BIT(19)
-#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_UEVENT_PKT_FMT_CHK BIT(20)
+#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_EVENT_PKT_FMT_CHK BIT(20)
#define MTK_WED_WPDMA_GLO_CFG_RX_DDONE2_WR BIT(21)
#define MTK_WED_WPDMA_GLO_CFG_TX_TKID_KEEP BIT(24)
+#define MTK_WED_WPDMA_GLO_CFG_TX_DDONE_CHK_LAST BIT(25)
#define MTK_WED_WPDMA_GLO_CFG_TX_DMAD_DW3_PREV BIT(28)
+#define MTK_WED_WPDMA_GLO_CFG_TX_DDONE_CHK BIT(30)
#define MTK_WED_WPDMA_RESET_IDX 0x50c
#define MTK_WED_WPDMA_RESET_IDX_TX GENMASK(3, 0)
@@ -255,9 +283,10 @@ struct mtk_wdma_desc {
#define MTK_WED_PCIE_INT_TRIGGER_STATUS BIT(16)
#define MTK_WED_PCIE_INT_CTRL 0x57c
-#define MTK_WED_PCIE_INT_CTRL_MSK_EN_POLA BIT(20)
-#define MTK_WED_PCIE_INT_CTRL_SRC_SEL GENMASK(17, 16)
#define MTK_WED_PCIE_INT_CTRL_POLL_EN GENMASK(13, 12)
+#define MTK_WED_PCIE_INT_CTRL_SRC_SEL GENMASK(17, 16)
+#define MTK_WED_PCIE_INT_CTRL_MSK_EN_POLA BIT(20)
+#define MTK_WED_PCIE_INT_CTRL_MSK_IRQ_FILTER BIT(21)
#define MTK_WED_WPDMA_CFG_BASE 0x580
#define MTK_WED_WPDMA_CFG_INT_MASK 0x584
@@ -283,15 +312,30 @@ struct mtk_wdma_desc {
#define MTK_WED_WPDMA_RX_D_RST_IDX 0x760
#define MTK_WED_WPDMA_RX_D_RST_CRX_IDX GENMASK(17, 16)
+#define MTK_WED_WPDMA_RX_D_RST_DRV_IDX_ALL BIT(20)
#define MTK_WED_WPDMA_RX_D_RST_DRV_IDX GENMASK(25, 24)
#define MTK_WED_WPDMA_RX_GLO_CFG 0x76c
-#define MTK_WED_WPDMA_RX_RING 0x770
#define MTK_WED_WPDMA_RX_D_MIB(_n) (0x774 + (_n) * 4)
#define MTK_WED_WPDMA_RX_D_PROCESSED_MIB(_n) (0x784 + (_n) * 4)
#define MTK_WED_WPDMA_RX_D_COHERENT_MIB 0x78c
+#define MTK_WED_WPDMA_RX_D_PREF_CFG 0x7b4
+#define MTK_WED_WPDMA_RX_D_PREF_EN BIT(0)
+#define MTK_WED_WPDMA_RX_D_PREF_BUSY BIT(1)
+#define MTK_WED_WPDMA_RX_D_PREF_BURST_SIZE GENMASK(12, 8)
+#define MTK_WED_WPDMA_RX_D_PREF_LOW_THRES GENMASK(21, 16)
+
+#define MTK_WED_WPDMA_RX_D_PREF_RX0_SIDX 0x7b8
+#define MTK_WED_WPDMA_RX_D_PREF_SIDX_IDX_CLR BIT(15)
+
+#define MTK_WED_WPDMA_RX_D_PREF_RX1_SIDX 0x7bc
+
+#define MTK_WED_WPDMA_RX_D_PREF_FIFO_CFG 0x7c0
+#define MTK_WED_WPDMA_RX_D_PREF_FIFO_CFG_R0_CLR BIT(0)
+#define MTK_WED_WPDMA_RX_D_PREF_FIFO_CFG_R1_CLR BIT(16)
+
#define MTK_WED_WDMA_RING_TX 0x800
#define MTK_WED_WDMA_TX_MIB 0x810
@@ -299,6 +343,20 @@ struct mtk_wdma_desc {
#define MTK_WED_WDMA_RING_RX(_n) (0x900 + (_n) * 0x10)
#define MTK_WED_WDMA_RX_THRES(_n) (0x940 + (_n) * 0x4)
+#define MTK_WED_WDMA_RX_PREF_CFG 0x950
+#define MTK_WED_WDMA_RX_PREF_EN BIT(0)
+#define MTK_WED_WDMA_RX_PREF_BUSY BIT(1)
+#define MTK_WED_WDMA_RX_PREF_BURST_SIZE GENMASK(12, 8)
+#define MTK_WED_WDMA_RX_PREF_LOW_THRES GENMASK(21, 16)
+#define MTK_WED_WDMA_RX_PREF_RX0_SIDX_CLR BIT(24)
+#define MTK_WED_WDMA_RX_PREF_RX1_SIDX_CLR BIT(25)
+#define MTK_WED_WDMA_RX_PREF_DDONE2_EN BIT(26)
+#define MTK_WED_WDMA_RX_PREF_DDONE2_BUSY BIT(27)
+
+#define MTK_WED_WDMA_RX_PREF_FIFO_CFG 0x95C
+#define MTK_WED_WDMA_RX_PREF_FIFO_RX0_CLR BIT(0)
+#define MTK_WED_WDMA_RX_PREF_FIFO_RX1_CLR BIT(16)
+
#define MTK_WED_WDMA_GLO_CFG 0xa04
#define MTK_WED_WDMA_GLO_CFG_TX_DRV_EN BIT(0)
#define MTK_WED_WDMA_GLO_CFG_TX_DDONE_CHK BIT(1)
@@ -322,6 +380,7 @@ struct mtk_wdma_desc {
#define MTK_WED_WDMA_RESET_IDX 0xa08
#define MTK_WED_WDMA_RESET_IDX_RX GENMASK(17, 16)
+#define MTK_WED_WDMA_RESET_IDX_RX_ALL BIT(20)
#define MTK_WED_WDMA_RESET_IDX_DRV GENMASK(25, 24)
#define MTK_WED_WDMA_INT_CLR 0xa24
@@ -331,6 +390,7 @@ struct mtk_wdma_desc {
#define MTK_WED_WDMA_INT_TRIGGER_RX_DONE GENMASK(17, 16)
#define MTK_WED_WDMA_INT_CTRL 0xa2c
+#define MTK_WED_WDMA_INT_POLL_PRD GENMASK(7, 0)
#define MTK_WED_WDMA_INT_CTRL_POLL_SRC_SEL GENMASK(17, 16)
#define MTK_WED_WDMA_CFG_BASE 0xaa0
@@ -391,9 +451,62 @@ struct mtk_wdma_desc {
#define MTK_WDMA_INT_MASK_RX_DELAY BIT(30)
#define MTK_WDMA_INT_MASK_RX_COHERENT BIT(31)
+#define MTK_WDMA_XDMA_TX_FIFO_CFG 0x238
+#define MTK_WDMA_XDMA_TX_FIFO_CFG_TX_PAR_FIFO_CLEAR BIT(0)
+#define MTK_WDMA_XDMA_TX_FIFO_CFG_TX_CMD_FIFO_CLEAR BIT(4)
+#define MTK_WDMA_XDMA_TX_FIFO_CFG_TX_DMAD_FIFO_CLEAR BIT(8)
+#define MTK_WDMA_XDMA_TX_FIFO_CFG_TX_ARR_FIFO_CLEAR BIT(12)
+
+#define MTK_WDMA_XDMA_RX_FIFO_CFG 0x23c
+#define MTK_WDMA_XDMA_RX_FIFO_CFG_RX_PAR_FIFO_CLEAR BIT(0)
+#define MTK_WDMA_XDMA_RX_FIFO_CFG_RX_CMD_FIFO_CLEAR BIT(4)
+#define MTK_WDMA_XDMA_RX_FIFO_CFG_RX_DMAD_FIFO_CLEAR BIT(8)
+#define MTK_WDMA_XDMA_RX_FIFO_CFG_RX_ARR_FIFO_CLEAR BIT(12)
+#define MTK_WDMA_XDMA_RX_FIFO_CFG_RX_LEN_FIFO_CLEAR BIT(15)
+#define MTK_WDMA_XDMA_RX_FIFO_CFG_RX_WID_FIFO_CLEAR BIT(18)
+#define MTK_WDMA_XDMA_RX_FIFO_CFG_RX_BID_FIFO_CLEAR BIT(21)
+
#define MTK_WDMA_INT_GRP1 0x250
#define MTK_WDMA_INT_GRP2 0x254
+#define MTK_WDMA_PREF_TX_CFG 0x2d0
+#define MTK_WDMA_PREF_TX_CFG_PREF_EN BIT(0)
+#define MTK_WDMA_PREF_TX_CFG_PREF_BUSY BIT(1)
+
+#define MTK_WDMA_PREF_RX_CFG 0x2dc
+#define MTK_WDMA_PREF_RX_CFG_PREF_EN BIT(0)
+#define MTK_WDMA_PREF_RX_CFG_PREF_BUSY BIT(1)
+
+#define MTK_WDMA_PREF_RX_FIFO_CFG 0x2e0
+#define MTK_WDMA_PREF_RX_FIFO_CFG_RING0_CLEAR BIT(0)
+#define MTK_WDMA_PREF_RX_FIFO_CFG_RING1_CLEAR BIT(16)
+
+#define MTK_WDMA_PREF_TX_FIFO_CFG 0x2d4
+#define MTK_WDMA_PREF_TX_FIFO_CFG_RING0_CLEAR BIT(0)
+#define MTK_WDMA_PREF_TX_FIFO_CFG_RING1_CLEAR BIT(16)
+
+#define MTK_WDMA_PREF_SIDX_CFG 0x2e4
+#define MTK_WDMA_PREF_SIDX_CFG_TX_RING_CLEAR GENMASK(3, 0)
+#define MTK_WDMA_PREF_SIDX_CFG_RX_RING_CLEAR GENMASK(5, 4)
+
+#define MTK_WDMA_WRBK_TX_CFG 0x300
+#define MTK_WDMA_WRBK_TX_CFG_WRBK_BUSY BIT(0)
+#define MTK_WDMA_WRBK_TX_CFG_WRBK_EN BIT(30)
+
+#define MTK_WDMA_WRBK_TX_FIFO_CFG(_n) (0x304 + (_n) * 0x4)
+#define MTK_WDMA_WRBK_TX_FIFO_CFG_RING_CLEAR BIT(0)
+
+#define MTK_WDMA_WRBK_RX_CFG 0x344
+#define MTK_WDMA_WRBK_RX_CFG_WRBK_BUSY BIT(0)
+#define MTK_WDMA_WRBK_RX_CFG_WRBK_EN BIT(30)
+
+#define MTK_WDMA_WRBK_RX_FIFO_CFG(_n) (0x348 + (_n) * 0x4)
+#define MTK_WDMA_WRBK_RX_FIFO_CFG_RING_CLEAR BIT(0)
+
+#define MTK_WDMA_WRBK_SIDX_CFG 0x388
+#define MTK_WDMA_WRBK_SIDX_CFG_TX_RING_CLEAR GENMASK(3, 0)
+#define MTK_WDMA_WRBK_SIDX_CFG_RX_RING_CLEAR GENMASK(5, 4)
+
#define MTK_PCIE_MIRROR_MAP(n) ((n) ? 0x4 : 0x0)
#define MTK_PCIE_MIRROR_MAP_EN BIT(0)
#define MTK_PCIE_MIRROR_MAP_WED_ID BIT(1)
@@ -407,6 +520,32 @@ struct mtk_wdma_desc {
#define MTK_WED_RTQM_Q_DBG_BYPASS BIT(5)
#define MTK_WED_RTQM_TXDMAD_FPORT GENMASK(23, 20)
+#define MTK_WED_RTQM_RST 0xb04
+
+#define MTK_WED_RTQM_IGRS0_I2HW_DMAD_CNT 0xb1c
+#define MTK_WED_RTQM_IGRS0_I2H_DMAD_CNT(_n) (0xb20 + (_n) * 0x4)
+#define MTK_WED_RTQM_IGRS0_I2HW_PKT_CNT 0xb28
+#define MTK_WED_RTQM_IGRS0_I2H_PKT_CNT(_n) (0xb2c + (_n) * 0x4)
+#define MTK_WED_RTQM_IGRS0_FDROP_CNT 0xb34
+
+#define MTK_WED_RTQM_IGRS1_I2HW_DMAD_CNT 0xb44
+#define MTK_WED_RTQM_IGRS1_I2H_DMAD_CNT(_n) (0xb48 + (_n) * 0x4)
+#define MTK_WED_RTQM_IGRS1_I2HW_PKT_CNT 0xb50
+#define MTK_WED_RTQM_IGRS1_I2H_PKT_CNT(_n) (0xb54 + (_n) * 0x4)
+#define MTK_WED_RTQM_IGRS1_FDROP_CNT 0xb5c
+
+#define MTK_WED_RTQM_IGRS2_I2HW_DMAD_CNT 0xb6c
+#define MTK_WED_RTQM_IGRS2_I2H_DMAD_CNT(_n) (0xb70 + (_n) * 0x4)
+#define MTK_WED_RTQM_IGRS2_I2HW_PKT_CNT 0xb78
+#define MTK_WED_RTQM_IGRS2_I2H_PKT_CNT(_n) (0xb7c + (_n) * 0x4)
+#define MTK_WED_RTQM_IGRS2_FDROP_CNT 0xb84
+
+#define MTK_WED_RTQM_IGRS3_I2HW_DMAD_CNT 0xb94
+#define MTK_WED_RTQM_IGRS3_I2H_DMAD_CNT(_n) (0xb98 + (_n) * 0x4)
+#define MTK_WED_RTQM_IGRS3_I2HW_PKT_CNT 0xba0
+#define MTK_WED_RTQM_IGRS3_I2H_PKT_CNT(_n) (0xba4 + (_n) * 0x4)
+#define MTK_WED_RTQM_IGRS3_FDROP_CNT 0xbac
+
#define MTK_WED_RTQM_R2H_MIB(_n) (0xb70 + (_n) * 0x4)
#define MTK_WED_RTQM_R2Q_MIB(_n) (0xb78 + (_n) * 0x4)
#define MTK_WED_RTQM_Q2N_MIB 0xb80
@@ -415,6 +554,24 @@ struct mtk_wdma_desc {
#define MTK_WED_RTQM_Q2B_MIB 0xb8c
#define MTK_WED_RTQM_PFDBK_MIB 0xb90
+#define MTK_WED_RTQM_ENQ_CFG0 0xbb8
+#define MTK_WED_RTQM_ENQ_CFG_TXDMAD_FPORT GENMASK(15, 12)
+
+#define MTK_WED_RTQM_FDROP_MIB 0xb84
+#define MTK_WED_RTQM_ENQ_I2Q_DMAD_CNT 0xbbc
+#define MTK_WED_RTQM_ENQ_I2N_DMAD_CNT 0xbc0
+#define MTK_WED_RTQM_ENQ_I2Q_PKT_CNT 0xbc4
+#define MTK_WED_RTQM_ENQ_I2N_PKT_CNT 0xbc8
+#define MTK_WED_RTQM_ENQ_USED_ENTRY_CNT 0xbcc
+#define MTK_WED_RTQM_ENQ_ERR_CNT 0xbd0
+
+#define MTK_WED_RTQM_DEQ_DMAD_CNT 0xbd8
+#define MTK_WED_RTQM_DEQ_Q2I_DMAD_CNT 0xbdc
+#define MTK_WED_RTQM_DEQ_PKT_CNT 0xbe0
+#define MTK_WED_RTQM_DEQ_Q2I_PKT_CNT 0xbe4
+#define MTK_WED_RTQM_DEQ_USED_PFDBK_CNT 0xbe8
+#define MTK_WED_RTQM_DEQ_ERR_CNT 0xbec
+
#define MTK_WED_RROQM_GLO_CFG 0xc04
#define MTK_WED_RROQM_RST_IDX 0xc08
#define MTK_WED_RROQM_RST_IDX_MIOD BIT(0)
@@ -464,7 +621,195 @@ struct mtk_wdma_desc {
#define MTK_WED_RX_BM_INTF 0xd9c
#define MTK_WED_RX_BM_ERR_STS 0xda8
+#define MTK_RRO_IND_CMD_SIGNATURE 0xe00
+#define MTK_RRO_IND_CMD_DMA_IDX GENMASK(11, 0)
+#define MTK_RRO_IND_CMD_MAGIC_CNT GENMASK(30, 28)
+
+#define MTK_WED_IND_CMD_RX_CTRL0 0xe04
+#define MTK_WED_IND_CMD_PROC_IDX GENMASK(11, 0)
+#define MTK_WED_IND_CMD_PREFETCH_FREE_CNT GENMASK(19, 16)
+#define MTK_WED_IND_CMD_MAGIC_CNT GENMASK(30, 28)
+
+#define MTK_WED_IND_CMD_RX_CTRL1 0xe08
+#define MTK_WED_IND_CMD_RX_CTRL2 0xe0c
+#define MTK_WED_IND_CMD_MAX_CNT GENMASK(11, 0)
+#define MTK_WED_IND_CMD_BASE_M GENMASK(19, 16)
+
+#define MTK_WED_RRO_CFG0 0xe10
+#define MTK_WED_RRO_CFG1 0xe14
+#define MTK_WED_RRO_CFG1_MAX_WIN_SZ GENMASK(31, 29)
+#define MTK_WED_RRO_CFG1_ACK_SN_BASE_M GENMASK(19, 16)
+#define MTK_WED_RRO_CFG1_PARTICL_SE_ID GENMASK(11, 0)
+
+#define MTK_WED_ADDR_ELEM_CFG0 0xe18
+#define MTK_WED_ADDR_ELEM_CFG1 0xe1c
+#define MTK_WED_ADDR_ELEM_PREFETCH_FREE_CNT GENMASK(19, 16)
+
+#define MTK_WED_ADDR_ELEM_TBL_CFG 0xe20
+#define MTK_WED_ADDR_ELEM_TBL_OFFSET GENMASK(6, 0)
+#define MTK_WED_ADDR_ELEM_TBL_RD_RDY BIT(28)
+#define MTK_WED_ADDR_ELEM_TBL_WR_RDY BIT(29)
+#define MTK_WED_ADDR_ELEM_TBL_RD BIT(30)
+#define MTK_WED_ADDR_ELEM_TBL_WR BIT(31)
+
+#define MTK_WED_RADDR_ELEM_TBL_WDATA 0xe24
+#define MTK_WED_RADDR_ELEM_TBL_RDATA 0xe28
+
+#define MTK_WED_PN_CHECK_CFG 0xe30
+#define MTK_WED_PN_CHECK_SE_ID GENMASK(11, 0)
+#define MTK_WED_PN_CHECK_RD_RDY BIT(28)
+#define MTK_WED_PN_CHECK_WR_RDY BIT(29)
+#define MTK_WED_PN_CHECK_RD BIT(30)
+#define MTK_WED_PN_CHECK_WR BIT(31)
+
+#define MTK_WED_PN_CHECK_WDATA_M 0xe38
+#define MTK_WED_PN_CHECK_IS_FIRST BIT(17)
+
+#define MTK_WED_RRO_MSDU_PG_RING_CFG(_n) (0xe44 + (_n) * 0x8)
+
+#define MTK_WED_RRO_MSDU_PG_RING2_CFG 0xe58
+#define MTK_WED_RRO_MSDU_PG_DRV_CLR BIT(26)
+#define MTK_WED_RRO_MSDU_PG_DRV_EN BIT(31)
+
+#define MTK_WED_RRO_MSDU_PG_CTRL0(_n) (0xe5c + (_n) * 0xc)
+#define MTK_WED_RRO_MSDU_PG_CTRL1(_n) (0xe60 + (_n) * 0xc)
+#define MTK_WED_RRO_MSDU_PG_CTRL2(_n) (0xe64 + (_n) * 0xc)
+
+#define MTK_WED_RRO_RX_D_RX(_n) (0xe80 + (_n) * 0x10)
+
+#define MTK_WED_RRO_RX_MAGIC_CNT BIT(13)
+
+#define MTK_WED_RRO_RX_D_CFG(_n) (0xea0 + (_n) * 0x4)
+#define MTK_WED_RRO_RX_D_DRV_CLR BIT(26)
+#define MTK_WED_RRO_RX_D_DRV_EN BIT(31)
+
+#define MTK_WED_RRO_PG_BM_RX_DMAM 0xeb0
+#define MTK_WED_RRO_PG_BM_RX_SDL0 GENMASK(13, 0)
+
+#define MTK_WED_RRO_PG_BM_BASE 0xeb4
+#define MTK_WED_RRO_PG_BM_INIT_PTR 0xeb8
+#define MTK_WED_RRO_PG_BM_SW_TAIL_IDX GENMASK(15, 0)
+#define MTK_WED_RRO_PG_BM_INIT_SW_TAIL_IDX BIT(16)
+
+#define MTK_WED_WPDMA_INT_CTRL_RRO_RX 0xeec
+#define MTK_WED_WPDMA_INT_CTRL_RRO_RX0_EN BIT(0)
+#define MTK_WED_WPDMA_INT_CTRL_RRO_RX0_CLR BIT(1)
+#define MTK_WED_WPDMA_INT_CTRL_RRO_RX0_DONE_TRIG GENMASK(6, 2)
+#define MTK_WED_WPDMA_INT_CTRL_RRO_RX1_EN BIT(8)
+#define MTK_WED_WPDMA_INT_CTRL_RRO_RX1_CLR BIT(9)
+#define MTK_WED_WPDMA_INT_CTRL_RRO_RX1_DONE_TRIG GENMASK(14, 10)
+
+#define MTK_WED_WPDMA_INT_CTRL_RRO_MSDU_PG 0xef4
+#define MTK_WED_WPDMA_INT_CTRL_RRO_PG0_EN BIT(0)
+#define MTK_WED_WPDMA_INT_CTRL_RRO_PG0_CLR BIT(1)
+#define MTK_WED_WPDMA_INT_CTRL_RRO_PG0_DONE_TRIG GENMASK(6, 2)
+#define MTK_WED_WPDMA_INT_CTRL_RRO_PG1_EN BIT(8)
+#define MTK_WED_WPDMA_INT_CTRL_RRO_PG1_CLR BIT(9)
+#define MTK_WED_WPDMA_INT_CTRL_RRO_PG1_DONE_TRIG GENMASK(14, 10)
+#define MTK_WED_WPDMA_INT_CTRL_RRO_PG2_EN BIT(16)
+#define MTK_WED_WPDMA_INT_CTRL_RRO_PG2_CLR BIT(17)
+#define MTK_WED_WPDMA_INT_CTRL_RRO_PG2_DONE_TRIG GENMASK(22, 18)
+
+#define MTK_WED_RRO_RX_HW_STS 0xf00
+#define MTK_WED_RX_IND_CMD_BUSY GENMASK(31, 0)
+
+#define MTK_WED_RX_IND_CMD_CNT0 0xf20
+#define MTK_WED_RX_IND_CMD_DBG_CNT_EN BIT(31)
+
+#define MTK_WED_RX_IND_CMD_CNT(_n) (0xf20 + (_n) * 0x4)
+#define MTK_WED_IND_CMD_MAGIC_CNT_FAIL_CNT GENMASK(15, 0)
+
+#define MTK_WED_RX_ADDR_ELEM_CNT(_n) (0xf48 + (_n) * 0x4)
+#define MTK_WED_ADDR_ELEM_SIG_FAIL_CNT GENMASK(15, 0)
+#define MTK_WED_ADDR_ELEM_FIRST_SIG_FAIL_CNT GENMASK(31, 16)
+#define MTK_WED_ADDR_ELEM_ACKSN_CNT GENMASK(27, 0)
+
+#define MTK_WED_RX_MSDU_PG_CNT(_n) (0xf5c + (_n) * 0x4)
+
+#define MTK_WED_RX_PN_CHK_CNT 0xf70
+#define MTK_WED_PN_CHK_FAIL_CNT GENMASK(15, 0)
+
#define MTK_WED_WOCPU_VIEW_MIOD_BASE 0x8000
#define MTK_WED_PCIE_INT_MASK 0x0
+#define MTK_WED_AMSDU_FIFO 0x1800
+#define MTK_WED_AMSDU_IS_PRIOR0_RING BIT(10)
+
+#define MTK_WED_AMSDU_STA_INFO 0x01810
+#define MTK_WED_AMSDU_STA_INFO_DO_INIT BIT(0)
+#define MTK_WED_AMSDU_STA_INFO_SET_INIT BIT(1)
+
+#define MTK_WED_AMSDU_STA_INFO_INIT 0x01814
+#define MTK_WED_AMSDU_STA_WTBL_HDRT_MODE BIT(0)
+#define MTK_WED_AMSDU_STA_RMVL BIT(1)
+#define MTK_WED_AMSDU_STA_MAX_AMSDU_LEN GENMASK(7, 2)
+#define MTK_WED_AMSDU_STA_MAX_AMSDU_NUM GENMASK(11, 8)
+
+#define MTK_WED_AMSDU_HIFTXD_BASE_L(_n) (0x1980 + (_n) * 0x4)
+
+#define MTK_WED_AMSDU_PSE 0x1910
+#define MTK_WED_AMSDU_PSE_RESET BIT(16)
+
+#define MTK_WED_AMSDU_HIFTXD_CFG 0x1968
+#define MTK_WED_AMSDU_HIFTXD_SRC GENMASK(16, 15)
+
+#define MTK_WED_MON_AMSDU_FIFO_DMAD 0x1a34
+
+#define MTK_WED_MON_AMSDU_ENG_DMAD(_n) (0x1a80 + (_n) * 0x50)
+#define MTK_WED_MON_AMSDU_ENG_QFPL(_n) (0x1a84 + (_n) * 0x50)
+#define MTK_WED_MON_AMSDU_ENG_QENI(_n) (0x1a88 + (_n) * 0x50)
+#define MTK_WED_MON_AMSDU_ENG_QENO(_n) (0x1a8c + (_n) * 0x50)
+#define MTK_WED_MON_AMSDU_ENG_MERG(_n) (0x1a90 + (_n) * 0x50)
+
+#define MTK_WED_MON_AMSDU_ENG_CNT8(_n) (0x1a94 + (_n) * 0x50)
+#define MTK_WED_AMSDU_ENG_MAX_QGPP_CNT GENMASK(10, 0)
+#define MTK_WED_AMSDU_ENG_MAX_PL_CNT GENMASK(27, 16)
+
+#define MTK_WED_MON_AMSDU_ENG_CNT9(_n) (0x1a98 + (_n) * 0x50)
+#define MTK_WED_AMSDU_ENG_CUR_ENTRY GENMASK(10, 0)
+#define MTK_WED_AMSDU_ENG_MAX_BUF_MERGED GENMASK(20, 16)
+#define MTK_WED_AMSDU_ENG_MAX_MSDU_MERGED GENMASK(28, 24)
+
+#define MTK_WED_MON_AMSDU_QMEM_STS1 0x1e04
+
+#define MTK_WED_MON_AMSDU_QMEM_CNT(_n) (0x1e0c + (_n) * 0x4)
+#define MTK_WED_AMSDU_QMEM_FQ_CNT GENMASK(27, 16)
+#define MTK_WED_AMSDU_QMEM_SP_QCNT GENMASK(11, 0)
+#define MTK_WED_AMSDU_QMEM_TID0_QCNT GENMASK(27, 16)
+#define MTK_WED_AMSDU_QMEM_TID1_QCNT GENMASK(11, 0)
+#define MTK_WED_AMSDU_QMEM_TID2_QCNT GENMASK(27, 16)
+#define MTK_WED_AMSDU_QMEM_TID3_QCNT GENMASK(11, 0)
+#define MTK_WED_AMSDU_QMEM_TID4_QCNT GENMASK(27, 16)
+#define MTK_WED_AMSDU_QMEM_TID5_QCNT GENMASK(11, 0)
+#define MTK_WED_AMSDU_QMEM_TID6_QCNT GENMASK(27, 16)
+#define MTK_WED_AMSDU_QMEM_TID7_QCNT GENMASK(11, 0)
+
+#define MTK_WED_MON_AMSDU_QMEM_PTR(_n) (0x1e20 + (_n) * 0x4)
+#define MTK_WED_AMSDU_QMEM_FQ_HEAD GENMASK(27, 16)
+#define MTK_WED_AMSDU_QMEM_SP_QHEAD GENMASK(11, 0)
+#define MTK_WED_AMSDU_QMEM_TID0_QHEAD GENMASK(27, 16)
+#define MTK_WED_AMSDU_QMEM_TID1_QHEAD GENMASK(11, 0)
+#define MTK_WED_AMSDU_QMEM_TID2_QHEAD GENMASK(27, 16)
+#define MTK_WED_AMSDU_QMEM_TID3_QHEAD GENMASK(11, 0)
+#define MTK_WED_AMSDU_QMEM_TID4_QHEAD GENMASK(27, 16)
+#define MTK_WED_AMSDU_QMEM_TID5_QHEAD GENMASK(11, 0)
+#define MTK_WED_AMSDU_QMEM_TID6_QHEAD GENMASK(27, 16)
+#define MTK_WED_AMSDU_QMEM_TID7_QHEAD GENMASK(11, 0)
+#define MTK_WED_AMSDU_QMEM_FQ_TAIL GENMASK(27, 16)
+#define MTK_WED_AMSDU_QMEM_SP_QTAIL GENMASK(11, 0)
+#define MTK_WED_AMSDU_QMEM_TID0_QTAIL GENMASK(27, 16)
+#define MTK_WED_AMSDU_QMEM_TID1_QTAIL GENMASK(11, 0)
+#define MTK_WED_AMSDU_QMEM_TID2_QTAIL GENMASK(27, 16)
+#define MTK_WED_AMSDU_QMEM_TID3_QTAIL GENMASK(11, 0)
+#define MTK_WED_AMSDU_QMEM_TID4_QTAIL GENMASK(27, 16)
+#define MTK_WED_AMSDU_QMEM_TID5_QTAIL GENMASK(11, 0)
+#define MTK_WED_AMSDU_QMEM_TID6_QTAIL GENMASK(27, 16)
+#define MTK_WED_AMSDU_QMEM_TID7_QTAIL GENMASK(11, 0)
+
+#define MTK_WED_MON_AMSDU_HIFTXD_FETCH_MSDU(_n) (0x1ec4 + (_n) * 0x4)
+
+#define MTK_WED_PCIE_BASE 0x11280000
+#define MTK_WED_PCIE_BASE0 0x11300000
+#define MTK_WED_PCIE_BASE1 0x11310000
+#define MTK_WED_PCIE_BASE2 0x11290000
#endif
diff --git a/drivers/net/ethernet/mediatek/mtk_wed_wo.h b/drivers/net/ethernet/mediatek/mtk_wed_wo.h
index 7a1a2a28f1ac..87a67fa3868d 100644
--- a/drivers/net/ethernet/mediatek/mtk_wed_wo.h
+++ b/drivers/net/ethernet/mediatek/mtk_wed_wo.h
@@ -91,6 +91,8 @@ enum mtk_wed_dummy_cr_idx {
#define MT7981_FIRMWARE_WO "mediatek/mt7981_wo.bin"
#define MT7986_FIRMWARE_WO0 "mediatek/mt7986_wo_0.bin"
#define MT7986_FIRMWARE_WO1 "mediatek/mt7986_wo_1.bin"
+#define MT7988_FIRMWARE_WO0 "mediatek/mt7988_wo_0.bin"
+#define MT7988_FIRMWARE_WO1 "mediatek/mt7988_wo_1.bin"
#define MTK_WO_MCU_CFG_LS_BASE 0
#define MTK_WO_MCU_CFG_LS_HW_VER_ADDR (MTK_WO_MCU_CFG_LS_BASE + 0x000)
@@ -228,7 +230,6 @@ struct mtk_wed_wo_queue {
struct mtk_wed_wo {
struct mtk_wed_hw *hw;
- struct mtk_wed_wo_memory_region boot;
struct mtk_wed_wo_queue q_tx;
struct mtk_wed_wo_queue q_rx;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index c4f4de82e29e..685335832a93 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -189,3 +189,11 @@ config MLX5_SF_MANAGER
port is managed through devlink. A subfunction supports RDMA, netdevice
and vdpa device. It is similar to a SRIOV VF but it doesn't require
SRIOV support.
+
+config MLX5_DPLL
+ tristate "Mellanox 5th generation network adapters (ConnectX series) DPLL support"
+ depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE
+ select DPLL
+ help
+ DPLL support in Mellanox Technologies ConnectX NICs.
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 7e94caca4888..c44870b175f9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -128,3 +128,6 @@ mlx5_core-$(CONFIG_MLX5_SF) += sf/vhca_event.o sf/dev/dev.o sf/dev/driver.o irq_
# SF manager
#
mlx5_core-$(CONFIG_MLX5_SF_MANAGER) += sf/cmd.o sf/hw_table.o sf/devlink.o
+
+obj-$(CONFIG_MLX5_DPLL) += mlx5_dpll.o
+mlx5_dpll-y := dpll.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index 7909f378dc93..1fc03480c2ff 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -206,6 +206,19 @@ static bool is_ib_enabled(struct mlx5_core_dev *dev)
return err ? false : val.vbool;
}
+static bool is_dpll_supported(struct mlx5_core_dev *dev)
+{
+ if (!IS_ENABLED(CONFIG_MLX5_DPLL))
+ return false;
+
+ if (!MLX5_CAP_MCAM_REG2(dev, synce_registers)) {
+ mlx5_core_warn(dev, "Missing SyncE capability\n");
+ return false;
+ }
+
+ return true;
+}
+
enum {
MLX5_INTERFACE_PROTOCOL_ETH,
MLX5_INTERFACE_PROTOCOL_ETH_REP,
@@ -215,6 +228,8 @@ enum {
MLX5_INTERFACE_PROTOCOL_MPIB,
MLX5_INTERFACE_PROTOCOL_VNET,
+
+ MLX5_INTERFACE_PROTOCOL_DPLL,
};
static const struct mlx5_adev_device {
@@ -237,6 +252,8 @@ static const struct mlx5_adev_device {
.is_supported = &is_ib_rep_supported },
[MLX5_INTERFACE_PROTOCOL_MPIB] = { .suffix = "multiport",
.is_supported = &is_mp_supported },
+ [MLX5_INTERFACE_PROTOCOL_DPLL] = { .suffix = "dpll",
+ .is_supported = &is_dpll_supported },
};
int mlx5_adev_idx_alloc(void)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index af8460bb257b..3e064234f6fe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -138,7 +138,6 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change,
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
struct pci_dev *pdev = dev->pdev;
- bool sf_dev_allocated;
int ret = 0;
if (mlx5_dev_is_lightweight(dev)) {
@@ -148,16 +147,6 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change,
return 0;
}
- sf_dev_allocated = mlx5_sf_dev_allocated(dev);
- if (sf_dev_allocated) {
- /* Reload results in deleting SF device which further results in
- * unregistering devlink instance while holding devlink_mutext.
- * Hence, do not support reload.
- */
- NL_SET_ERR_MSG_MOD(extack, "reload is unsupported when SFs are allocated");
- return -EOPNOTSUPP;
- }
-
if (mlx5_lag_is_active(dev)) {
NL_SET_ERR_MSG_MOD(extack, "reload is unsupported in Lag mode");
return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dpll.c b/drivers/net/ethernet/mellanox/mlx5/core/dpll.c
new file mode 100644
index 000000000000..74f0c7867120
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dpll.c
@@ -0,0 +1,432 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <linux/dpll.h>
+#include <linux/mlx5/driver.h>
+
+/* This structure represents a reference to DPLL, one is created
+ * per mdev instance.
+ */
+struct mlx5_dpll {
+ struct dpll_device *dpll;
+ struct dpll_pin *dpll_pin;
+ struct mlx5_core_dev *mdev;
+ struct workqueue_struct *wq;
+ struct delayed_work work;
+ struct {
+ bool valid;
+ enum dpll_lock_status lock_status;
+ enum dpll_pin_state pin_state;
+ } last;
+ struct notifier_block mdev_nb;
+ struct net_device *tracking_netdev;
+};
+
+static int mlx5_dpll_clock_id_get(struct mlx5_core_dev *mdev, u64 *clock_id)
+{
+ u32 out[MLX5_ST_SZ_DW(msecq_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(msecq_reg)] = {};
+ int err;
+
+ err = mlx5_core_access_reg(mdev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_MSECQ, 0, 0);
+ if (err)
+ return err;
+ *clock_id = MLX5_GET64(msecq_reg, out, local_clock_identity);
+ return 0;
+}
+
+static int
+mlx5_dpll_synce_status_get(struct mlx5_core_dev *mdev,
+ enum mlx5_msees_admin_status *admin_status,
+ enum mlx5_msees_oper_status *oper_status,
+ bool *ho_acq)
+{
+ u32 out[MLX5_ST_SZ_DW(msees_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(msees_reg)] = {};
+ int err;
+
+ err = mlx5_core_access_reg(mdev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_MSEES, 0, 0);
+ if (err)
+ return err;
+ if (admin_status)
+ *admin_status = MLX5_GET(msees_reg, out, admin_status);
+ *oper_status = MLX5_GET(msees_reg, out, oper_status);
+ if (ho_acq)
+ *ho_acq = MLX5_GET(msees_reg, out, ho_acq);
+ return 0;
+}
+
+static int
+mlx5_dpll_synce_status_set(struct mlx5_core_dev *mdev,
+ enum mlx5_msees_admin_status admin_status)
+{
+ u32 out[MLX5_ST_SZ_DW(msees_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(msees_reg)] = {};
+
+ MLX5_SET(msees_reg, in, field_select,
+ MLX5_MSEES_FIELD_SELECT_ENABLE |
+ MLX5_MSEES_FIELD_SELECT_ADMIN_STATUS);
+ MLX5_SET(msees_reg, in, admin_status, admin_status);
+ return mlx5_core_access_reg(mdev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_MSEES, 0, 1);
+}
+
+static enum dpll_lock_status
+mlx5_dpll_lock_status_get(enum mlx5_msees_oper_status oper_status, bool ho_acq)
+{
+ switch (oper_status) {
+ case MLX5_MSEES_OPER_STATUS_SELF_TRACK:
+ fallthrough;
+ case MLX5_MSEES_OPER_STATUS_OTHER_TRACK:
+ return ho_acq ? DPLL_LOCK_STATUS_LOCKED_HO_ACQ :
+ DPLL_LOCK_STATUS_LOCKED;
+ case MLX5_MSEES_OPER_STATUS_HOLDOVER:
+ fallthrough;
+ case MLX5_MSEES_OPER_STATUS_FAIL_HOLDOVER:
+ return DPLL_LOCK_STATUS_HOLDOVER;
+ default:
+ return DPLL_LOCK_STATUS_UNLOCKED;
+ }
+}
+
+static enum dpll_pin_state
+mlx5_dpll_pin_state_get(enum mlx5_msees_admin_status admin_status,
+ enum mlx5_msees_oper_status oper_status)
+{
+ return (admin_status == MLX5_MSEES_ADMIN_STATUS_TRACK &&
+ (oper_status == MLX5_MSEES_OPER_STATUS_SELF_TRACK ||
+ oper_status == MLX5_MSEES_OPER_STATUS_OTHER_TRACK)) ?
+ DPLL_PIN_STATE_CONNECTED : DPLL_PIN_STATE_DISCONNECTED;
+}
+
+static int mlx5_dpll_device_lock_status_get(const struct dpll_device *dpll,
+ void *priv,
+ enum dpll_lock_status *status,
+ struct netlink_ext_ack *extack)
+{
+ enum mlx5_msees_oper_status oper_status;
+ struct mlx5_dpll *mdpll = priv;
+ bool ho_acq;
+ int err;
+
+ err = mlx5_dpll_synce_status_get(mdpll->mdev, NULL,
+ &oper_status, &ho_acq);
+ if (err)
+ return err;
+
+ *status = mlx5_dpll_lock_status_get(oper_status, ho_acq);
+ return 0;
+}
+
+static int mlx5_dpll_device_mode_get(const struct dpll_device *dpll,
+ void *priv,
+ u32 *mode, struct netlink_ext_ack *extack)
+{
+ *mode = DPLL_MODE_MANUAL;
+ return 0;
+}
+
+static bool mlx5_dpll_device_mode_supported(const struct dpll_device *dpll,
+ void *priv,
+ enum dpll_mode mode,
+ struct netlink_ext_ack *extack)
+{
+ return mode == DPLL_MODE_MANUAL;
+}
+
+static const struct dpll_device_ops mlx5_dpll_device_ops = {
+ .lock_status_get = mlx5_dpll_device_lock_status_get,
+ .mode_get = mlx5_dpll_device_mode_get,
+ .mode_supported = mlx5_dpll_device_mode_supported,
+};
+
+static int mlx5_dpll_pin_direction_get(const struct dpll_pin *pin,
+ void *pin_priv,
+ const struct dpll_device *dpll,
+ void *dpll_priv,
+ enum dpll_pin_direction *direction,
+ struct netlink_ext_ack *extack)
+{
+ *direction = DPLL_PIN_DIRECTION_INPUT;
+ return 0;
+}
+
+static int mlx5_dpll_state_on_dpll_get(const struct dpll_pin *pin,
+ void *pin_priv,
+ const struct dpll_device *dpll,
+ void *dpll_priv,
+ enum dpll_pin_state *state,
+ struct netlink_ext_ack *extack)
+{
+ enum mlx5_msees_admin_status admin_status;
+ enum mlx5_msees_oper_status oper_status;
+ struct mlx5_dpll *mdpll = pin_priv;
+ int err;
+
+ err = mlx5_dpll_synce_status_get(mdpll->mdev, &admin_status,
+ &oper_status, NULL);
+ if (err)
+ return err;
+ *state = mlx5_dpll_pin_state_get(admin_status, oper_status);
+ return 0;
+}
+
+static int mlx5_dpll_state_on_dpll_set(const struct dpll_pin *pin,
+ void *pin_priv,
+ const struct dpll_device *dpll,
+ void *dpll_priv,
+ enum dpll_pin_state state,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_dpll *mdpll = pin_priv;
+
+ return mlx5_dpll_synce_status_set(mdpll->mdev,
+ state == DPLL_PIN_STATE_CONNECTED ?
+ MLX5_MSEES_ADMIN_STATUS_TRACK :
+ MLX5_MSEES_ADMIN_STATUS_FREE_RUNNING);
+}
+
+static const struct dpll_pin_ops mlx5_dpll_pins_ops = {
+ .direction_get = mlx5_dpll_pin_direction_get,
+ .state_on_dpll_get = mlx5_dpll_state_on_dpll_get,
+ .state_on_dpll_set = mlx5_dpll_state_on_dpll_set,
+};
+
+static const struct dpll_pin_properties mlx5_dpll_pin_properties = {
+ .type = DPLL_PIN_TYPE_SYNCE_ETH_PORT,
+ .capabilities = DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE,
+};
+
+#define MLX5_DPLL_PERIODIC_WORK_INTERVAL 500 /* ms */
+
+static void mlx5_dpll_periodic_work_queue(struct mlx5_dpll *mdpll)
+{
+ queue_delayed_work(mdpll->wq, &mdpll->work,
+ msecs_to_jiffies(MLX5_DPLL_PERIODIC_WORK_INTERVAL));
+}
+
+static void mlx5_dpll_periodic_work(struct work_struct *work)
+{
+ struct mlx5_dpll *mdpll = container_of(work, struct mlx5_dpll,
+ work.work);
+ enum mlx5_msees_admin_status admin_status;
+ enum mlx5_msees_oper_status oper_status;
+ enum dpll_lock_status lock_status;
+ enum dpll_pin_state pin_state;
+ bool ho_acq;
+ int err;
+
+ err = mlx5_dpll_synce_status_get(mdpll->mdev, &admin_status,
+ &oper_status, &ho_acq);
+ if (err)
+ goto err_out;
+ lock_status = mlx5_dpll_lock_status_get(oper_status, ho_acq);
+ pin_state = mlx5_dpll_pin_state_get(admin_status, oper_status);
+
+ if (!mdpll->last.valid)
+ goto invalid_out;
+
+ if (mdpll->last.lock_status != lock_status)
+ dpll_device_change_ntf(mdpll->dpll);
+ if (mdpll->last.pin_state != pin_state)
+ dpll_pin_change_ntf(mdpll->dpll_pin);
+
+invalid_out:
+ mdpll->last.lock_status = lock_status;
+ mdpll->last.pin_state = pin_state;
+ mdpll->last.valid = true;
+err_out:
+ mlx5_dpll_periodic_work_queue(mdpll);
+}
+
+static void mlx5_dpll_netdev_dpll_pin_set(struct mlx5_dpll *mdpll,
+ struct net_device *netdev)
+{
+ if (mdpll->tracking_netdev)
+ return;
+ netdev_dpll_pin_set(netdev, mdpll->dpll_pin);
+ mdpll->tracking_netdev = netdev;
+}
+
+static void mlx5_dpll_netdev_dpll_pin_clear(struct mlx5_dpll *mdpll)
+{
+ if (!mdpll->tracking_netdev)
+ return;
+ netdev_dpll_pin_clear(mdpll->tracking_netdev);
+ mdpll->tracking_netdev = NULL;
+}
+
+static int mlx5_dpll_mdev_notifier_event(struct notifier_block *nb,
+ unsigned long event, void *data)
+{
+ struct mlx5_dpll *mdpll = container_of(nb, struct mlx5_dpll, mdev_nb);
+ struct net_device *netdev = data;
+
+ switch (event) {
+ case MLX5_DRIVER_EVENT_UPLINK_NETDEV:
+ if (netdev)
+ mlx5_dpll_netdev_dpll_pin_set(mdpll, netdev);
+ else
+ mlx5_dpll_netdev_dpll_pin_clear(mdpll);
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+
+ return NOTIFY_OK;
+}
+
+static void mlx5_dpll_mdev_netdev_track(struct mlx5_dpll *mdpll,
+ struct mlx5_core_dev *mdev)
+{
+ mdpll->mdev_nb.notifier_call = mlx5_dpll_mdev_notifier_event;
+ mlx5_blocking_notifier_register(mdev, &mdpll->mdev_nb);
+ mlx5_core_uplink_netdev_event_replay(mdev);
+}
+
+static void mlx5_dpll_mdev_netdev_untrack(struct mlx5_dpll *mdpll,
+ struct mlx5_core_dev *mdev)
+{
+ mlx5_blocking_notifier_unregister(mdev, &mdpll->mdev_nb);
+ mlx5_dpll_netdev_dpll_pin_clear(mdpll);
+}
+
+static int mlx5_dpll_probe(struct auxiliary_device *adev,
+ const struct auxiliary_device_id *id)
+{
+ struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev);
+ struct mlx5_core_dev *mdev = edev->mdev;
+ struct mlx5_dpll *mdpll;
+ u64 clock_id;
+ int err;
+
+ err = mlx5_dpll_synce_status_set(mdev,
+ MLX5_MSEES_ADMIN_STATUS_FREE_RUNNING);
+ if (err)
+ return err;
+
+ err = mlx5_dpll_clock_id_get(mdev, &clock_id);
+ if (err)
+ return err;
+
+ mdpll = kzalloc(sizeof(*mdpll), GFP_KERNEL);
+ if (!mdpll)
+ return -ENOMEM;
+ mdpll->mdev = mdev;
+ auxiliary_set_drvdata(adev, mdpll);
+
+ /* Multiple mdev instances might share one DPLL device. */
+ mdpll->dpll = dpll_device_get(clock_id, 0, THIS_MODULE);
+ if (IS_ERR(mdpll->dpll)) {
+ err = PTR_ERR(mdpll->dpll);
+ goto err_free_mdpll;
+ }
+
+ err = dpll_device_register(mdpll->dpll, DPLL_TYPE_EEC,
+ &mlx5_dpll_device_ops, mdpll);
+ if (err)
+ goto err_put_dpll_device;
+
+ /* Multiple mdev instances might share one DPLL pin. */
+ mdpll->dpll_pin = dpll_pin_get(clock_id, mlx5_get_dev_index(mdev),
+ THIS_MODULE, &mlx5_dpll_pin_properties);
+ if (IS_ERR(mdpll->dpll_pin)) {
+ err = PTR_ERR(mdpll->dpll_pin);
+ goto err_unregister_dpll_device;
+ }
+
+ err = dpll_pin_register(mdpll->dpll, mdpll->dpll_pin,
+ &mlx5_dpll_pins_ops, mdpll);
+ if (err)
+ goto err_put_dpll_pin;
+
+ mdpll->wq = create_singlethread_workqueue("mlx5_dpll");
+ if (!mdpll->wq) {
+ err = -ENOMEM;
+ goto err_unregister_dpll_pin;
+ }
+
+ mlx5_dpll_mdev_netdev_track(mdpll, mdev);
+
+ INIT_DELAYED_WORK(&mdpll->work, &mlx5_dpll_periodic_work);
+ mlx5_dpll_periodic_work_queue(mdpll);
+
+ return 0;
+
+err_unregister_dpll_pin:
+ dpll_pin_unregister(mdpll->dpll, mdpll->dpll_pin,
+ &mlx5_dpll_pins_ops, mdpll);
+err_put_dpll_pin:
+ dpll_pin_put(mdpll->dpll_pin);
+err_unregister_dpll_device:
+ dpll_device_unregister(mdpll->dpll, &mlx5_dpll_device_ops, mdpll);
+err_put_dpll_device:
+ dpll_device_put(mdpll->dpll);
+err_free_mdpll:
+ kfree(mdpll);
+ return err;
+}
+
+static void mlx5_dpll_remove(struct auxiliary_device *adev)
+{
+ struct mlx5_dpll *mdpll = auxiliary_get_drvdata(adev);
+ struct mlx5_core_dev *mdev = mdpll->mdev;
+
+ cancel_delayed_work(&mdpll->work);
+ mlx5_dpll_mdev_netdev_untrack(mdpll, mdev);
+ destroy_workqueue(mdpll->wq);
+ dpll_pin_unregister(mdpll->dpll, mdpll->dpll_pin,
+ &mlx5_dpll_pins_ops, mdpll);
+ dpll_pin_put(mdpll->dpll_pin);
+ dpll_device_unregister(mdpll->dpll, &mlx5_dpll_device_ops, mdpll);
+ dpll_device_put(mdpll->dpll);
+ kfree(mdpll);
+
+ mlx5_dpll_synce_status_set(mdev,
+ MLX5_MSEES_ADMIN_STATUS_FREE_RUNNING);
+}
+
+static int mlx5_dpll_suspend(struct auxiliary_device *adev, pm_message_t state)
+{
+ return 0;
+}
+
+static int mlx5_dpll_resume(struct auxiliary_device *adev)
+{
+ return 0;
+}
+
+static const struct auxiliary_device_id mlx5_dpll_id_table[] = {
+ { .name = MLX5_ADEV_NAME ".dpll", },
+ {},
+};
+
+MODULE_DEVICE_TABLE(auxiliary, mlx5_dpll_id_table);
+
+static struct auxiliary_driver mlx5_dpll_driver = {
+ .name = "dpll",
+ .probe = mlx5_dpll_probe,
+ .remove = mlx5_dpll_remove,
+ .suspend = mlx5_dpll_suspend,
+ .resume = mlx5_dpll_resume,
+ .id_table = mlx5_dpll_id_table,
+};
+
+static int __init mlx5_dpll_init(void)
+{
+ return auxiliary_driver_register(&mlx5_dpll_driver);
+}
+
+static void __exit mlx5_dpll_exit(void)
+{
+ auxiliary_driver_unregister(&mlx5_dpll_driver);
+}
+
+module_init(mlx5_dpll_init);
+module_exit(mlx5_dpll_exit);
+
+MODULE_AUTHOR("Jiri Pirko <jiri@nvidia.com>");
+MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) DPLL driver");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
index c6b6e290fd79..0b1ac6e5c890 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
@@ -12,11 +12,19 @@ struct mlx5e_dev *mlx5e_create_devlink(struct device *dev,
{
struct mlx5e_dev *mlx5e_dev;
struct devlink *devlink;
+ int err;
devlink = devlink_alloc_ns(&mlx5e_devlink_ops, sizeof(*mlx5e_dev),
devlink_net(priv_to_devlink(mdev)), dev);
if (!devlink)
return ERR_PTR(-ENOMEM);
+
+ err = devl_nested_devlink_set(priv_to_devlink(mdev), devlink);
+ if (err) {
+ devlink_free(devlink);
+ return ERR_PTR(err);
+ }
+
devlink_register(devlink);
return devlink_priv(devlink);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 15561965d2af..d17c9c31b165 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1405,9 +1405,9 @@ err_irq_table:
static void mlx5_unload(struct mlx5_core_dev *dev)
{
+ mlx5_eswitch_disable(dev->priv.eswitch);
mlx5_devlink_traps_unregister(priv_to_devlink(dev));
mlx5_sf_dev_table_destroy(dev);
- mlx5_eswitch_disable(dev->priv.eswitch);
mlx5_sriov_detach(dev);
mlx5_lag_remove_mdev(dev);
mlx5_ec_cleanup(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h
index 2a66a427ef15..b99131e95e37 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h
@@ -19,6 +19,12 @@ struct mlx5_sf_dev {
u16 fn_id;
};
+struct mlx5_sf_peer_devlink_event_ctx {
+ u16 fn_id;
+ struct devlink *devlink;
+ int err;
+};
+
void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev);
void mlx5_sf_dev_table_destroy(struct mlx5_core_dev *dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
index 8fe82f1191bb..169c2c68ed5c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
@@ -8,6 +8,20 @@
#include "dev.h"
#include "devlink.h"
+static int mlx5_core_peer_devlink_set(struct mlx5_sf_dev *sf_dev, struct devlink *devlink)
+{
+ struct mlx5_sf_peer_devlink_event_ctx event_ctx = {
+ .fn_id = sf_dev->fn_id,
+ .devlink = devlink,
+ };
+ int ret;
+
+ ret = mlx5_blocking_notifier_call_chain(sf_dev->parent_mdev,
+ MLX5_DRIVER_EVENT_SF_PEER_DEVLINK,
+ &event_ctx);
+ return ret == NOTIFY_OK ? event_ctx.err : 0;
+}
+
static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxiliary_device_id *id)
{
struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev);
@@ -54,9 +68,21 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia
mlx5_core_warn(mdev, "mlx5_init_one err=%d\n", err);
goto init_one_err;
}
+
+ err = mlx5_core_peer_devlink_set(sf_dev, devlink);
+ if (err) {
+ mlx5_core_warn(mdev, "mlx5_core_peer_devlink_set err=%d\n", err);
+ goto peer_devlink_set_err;
+ }
+
devlink_register(devlink);
return 0;
+peer_devlink_set_err:
+ if (mlx5_dev_is_lightweight(sf_dev->mdev))
+ mlx5_uninit_one_light(sf_dev->mdev);
+ else
+ mlx5_uninit_one(sf_dev->mdev);
init_one_err:
iounmap(mdev->iseg);
remap_err:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
index e34a8f88c518..964a5b1876f3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
@@ -28,6 +28,7 @@ struct mlx5_sf_table {
struct mutex sf_state_lock; /* Serializes sf state among user cmds & vhca event handler. */
struct notifier_block esw_nb;
struct notifier_block vhca_nb;
+ struct notifier_block mdev_nb;
};
static struct mlx5_sf *
@@ -511,6 +512,35 @@ static int mlx5_sf_esw_event(struct notifier_block *nb, unsigned long event, voi
return 0;
}
+static int mlx5_sf_mdev_event(struct notifier_block *nb, unsigned long event, void *data)
+{
+ struct mlx5_sf_table *table = container_of(nb, struct mlx5_sf_table, mdev_nb);
+ struct mlx5_sf_peer_devlink_event_ctx *event_ctx = data;
+ int ret = NOTIFY_DONE;
+ struct mlx5_sf *sf;
+
+ if (event != MLX5_DRIVER_EVENT_SF_PEER_DEVLINK)
+ return NOTIFY_DONE;
+
+ table = mlx5_sf_table_try_get(table->dev);
+ if (!table)
+ return NOTIFY_DONE;
+
+ mutex_lock(&table->sf_state_lock);
+ sf = mlx5_sf_lookup_by_function_id(table, event_ctx->fn_id);
+ if (!sf)
+ goto out;
+
+ event_ctx->err = devl_port_fn_devlink_set(&sf->dl_port.dl_port,
+ event_ctx->devlink);
+
+ ret = NOTIFY_OK;
+out:
+ mutex_unlock(&table->sf_state_lock);
+ mlx5_sf_table_put(table);
+ return ret;
+}
+
static bool mlx5_sf_table_supported(const struct mlx5_core_dev *dev)
{
return dev->priv.eswitch && MLX5_ESWITCH_MANAGER(dev) &&
@@ -544,6 +574,9 @@ int mlx5_sf_table_init(struct mlx5_core_dev *dev)
if (err)
goto vhca_err;
+ table->mdev_nb.notifier_call = mlx5_sf_mdev_event;
+ mlx5_blocking_notifier_register(dev, &table->mdev_nb);
+
return 0;
vhca_err:
@@ -562,6 +595,7 @@ void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev)
if (!table)
return;
+ mlx5_blocking_notifier_unregister(dev, &table->mdev_nb);
mlx5_vhca_event_notifier_unregister(table->dev, &table->vhca_nb);
mlx5_esw_event_notifier_unregister(dev->priv.eswitch, &table->esw_nb);
WARN_ON(refcount_read(&table->refcount));
diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
index 694de9513b9f..954ba0826c61 100644
--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
+++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
@@ -471,7 +471,7 @@ out:
return err;
}
-static int mlxbf_gige_remove(struct platform_device *pdev)
+static void mlxbf_gige_remove(struct platform_device *pdev)
{
struct mlxbf_gige *priv = platform_get_drvdata(pdev);
@@ -479,8 +479,6 @@ static int mlxbf_gige_remove(struct platform_device *pdev)
phy_disconnect(priv->netdev->phydev);
mlxbf_gige_mdio_remove(priv);
platform_set_drvdata(pdev, NULL);
-
- return 0;
}
static void mlxbf_gige_shutdown(struct platform_device *pdev)
@@ -499,7 +497,7 @@ MODULE_DEVICE_TABLE(acpi, mlxbf_gige_acpi_match);
static struct platform_driver mlxbf_gige_driver = {
.probe = mlxbf_gige_probe,
- .remove = mlxbf_gige_remove,
+ .remove_new = mlxbf_gige_remove,
.shutdown = mlxbf_gige_shutdown,
.driver = {
.name = KBUILD_MODNAME,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_linecard_dev.c b/drivers/net/ethernet/mellanox/mlxsw/core_linecard_dev.c
index af37e650a8ad..e8d6fe35bf36 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_linecard_dev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_linecard_dev.c
@@ -132,6 +132,7 @@ static int mlxsw_linecard_bdev_probe(struct auxiliary_device *adev,
struct mlxsw_linecard *linecard = linecard_bdev->linecard;
struct mlxsw_linecard_dev *linecard_dev;
struct devlink *devlink;
+ int err;
devlink = devlink_alloc(&mlxsw_linecard_dev_devlink_ops,
sizeof(*linecard_dev), &adev->dev);
@@ -141,8 +142,12 @@ static int mlxsw_linecard_bdev_probe(struct auxiliary_device *adev,
linecard_dev->linecard = linecard_bdev->linecard;
linecard_bdev->linecard_dev = linecard_dev;
+ err = devlink_linecard_nested_dl_set(linecard->devlink_linecard, devlink);
+ if (err) {
+ devlink_free(devlink);
+ return err;
+ }
devlink_register(devlink);
- devlink_linecard_nested_dl_set(linecard->devlink_linecard, devlink);
return 0;
}
@@ -151,9 +156,7 @@ static void mlxsw_linecard_bdev_remove(struct auxiliary_device *adev)
struct mlxsw_linecard_bdev *linecard_bdev =
container_of(adev, struct mlxsw_linecard_bdev, adev);
struct devlink *devlink = priv_to_devlink(linecard_bdev->linecard_dev);
- struct mlxsw_linecard *linecard = linecard_bdev->linecard;
- devlink_linecard_nested_dl_set(linecard->devlink_linecard, NULL);
devlink_unregister(devlink);
devlink_free(devlink);
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c
index e2aced7ab454..95f63fcf4ba1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c
@@ -496,7 +496,7 @@ mlxsw_sp_acl_bf_init(struct mlxsw_sp *mlxsw_sp, unsigned int num_erp_banks)
* is 2^ACL_MAX_BF_LOG
*/
bf_bank_size = 1 << MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_BF_LOG);
- bf = kzalloc(struct_size(bf, refcnt, bf_bank_size * num_erp_banks),
+ bf = kzalloc(struct_size(bf, refcnt, size_mul(bf_bank_size, num_erp_banks)),
GFP_KERNEL);
if (!bf)
return ERR_PTR(-ENOMEM);
diff --git a/drivers/net/ethernet/micrel/ks8842.c b/drivers/net/ethernet/micrel/ks8842.c
index c11b118dc415..ddd87ef71caf 100644
--- a/drivers/net/ethernet/micrel/ks8842.c
+++ b/drivers/net/ethernet/micrel/ks8842.c
@@ -1228,7 +1228,7 @@ err_mem_region:
return err;
}
-static int ks8842_remove(struct platform_device *pdev)
+static void ks8842_remove(struct platform_device *pdev)
{
struct net_device *netdev = platform_get_drvdata(pdev);
struct ks8842_adapter *adapter = netdev_priv(netdev);
@@ -1239,7 +1239,6 @@ static int ks8842_remove(struct platform_device *pdev)
iounmap(adapter->hw_addr);
free_netdev(netdev);
release_mem_region(iomem->start, resource_size(iomem));
- return 0;
}
@@ -1248,7 +1247,7 @@ static struct platform_driver ks8842_platform_driver = {
.name = DRV_NAME,
},
.probe = ks8842_probe,
- .remove = ks8842_remove,
+ .remove_new = ks8842_remove,
};
module_platform_driver(ks8842_platform_driver);
diff --git a/drivers/net/ethernet/micrel/ks8851_par.c b/drivers/net/ethernet/micrel/ks8851_par.c
index 7f49042484bd..2a7f29854267 100644
--- a/drivers/net/ethernet/micrel/ks8851_par.c
+++ b/drivers/net/ethernet/micrel/ks8851_par.c
@@ -327,11 +327,9 @@ static int ks8851_probe_par(struct platform_device *pdev)
return ks8851_probe_common(netdev, dev, msg_enable);
}
-static int ks8851_remove_par(struct platform_device *pdev)
+static void ks8851_remove_par(struct platform_device *pdev)
{
ks8851_remove_common(&pdev->dev);
-
- return 0;
}
static const struct of_device_id ks8851_match_table[] = {
@@ -347,7 +345,7 @@ static struct platform_driver ks8851_driver = {
.pm = &ks8851_pm_ops,
},
.probe = ks8851_probe_par,
- .remove = ks8851_remove_par,
+ .remove_new = ks8851_remove_par,
};
module_platform_driver(ks8851_driver);
diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index c81cdeb4d4e7..f940895b14e8 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -1466,9 +1466,15 @@ static void lan743x_phy_link_status_change(struct net_device *netdev)
static void lan743x_phy_close(struct lan743x_adapter *adapter)
{
struct net_device *netdev = adapter->netdev;
+ struct phy_device *phydev = netdev->phydev;
phy_stop(netdev->phydev);
phy_disconnect(netdev->phydev);
+
+ /* using phydev here as phy_disconnect NULLs netdev->phydev */
+ if (phy_is_pseudo_fixed_link(phydev))
+ fixed_phy_unregister(phydev);
+
}
static void lan743x_phy_interface_select(struct lan743x_adapter *adapter)
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
index 0d6e79af2410..8e4101628fbd 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
@@ -1261,7 +1261,7 @@ cleanup_ports:
return err;
}
-static int lan966x_remove(struct platform_device *pdev)
+static void lan966x_remove(struct platform_device *pdev)
{
struct lan966x *lan966x = platform_get_drvdata(pdev);
@@ -1280,13 +1280,11 @@ static int lan966x_remove(struct platform_device *pdev)
lan966x_ptp_deinit(lan966x);
debugfs_remove_recursive(lan966x->debugfs_root);
-
- return 0;
}
static struct platform_driver lan966x_driver = {
.probe = lan966x_probe,
- .remove = lan966x_remove,
+ .remove_new = lan966x_remove,
.driver = {
.name = "lan966x-switch",
.of_match_table = lan966x_match,
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
index dc9af480bfea..d1f7fc8b1b71 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
@@ -911,7 +911,7 @@ cleanup_pnode:
return err;
}
-static int mchp_sparx5_remove(struct platform_device *pdev)
+static void mchp_sparx5_remove(struct platform_device *pdev)
{
struct sparx5 *sparx5 = platform_get_drvdata(pdev);
@@ -931,8 +931,6 @@ static int mchp_sparx5_remove(struct platform_device *pdev)
/* Unregister netdevs */
sparx5_unregister_notifier_blocks(sparx5);
destroy_workqueue(sparx5->mact_queue);
-
- return 0;
}
static const struct of_device_id mchp_sparx5_match[] = {
@@ -943,7 +941,7 @@ MODULE_DEVICE_TABLE(of, mchp_sparx5_match);
static struct platform_driver mchp_sparx5_driver = {
.probe = mchp_sparx5_probe,
- .remove = mchp_sparx5_remove,
+ .remove_new = mchp_sparx5_remove,
.driver = {
.name = "sparx5-switch",
.of_match_table = mchp_sparx5_match,
diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c
index 3da99b62797d..96dc69e7141f 100644
--- a/drivers/net/ethernet/moxa/moxart_ether.c
+++ b/drivers/net/ethernet/moxa/moxart_ether.c
@@ -558,7 +558,7 @@ irq_map_fail:
return ret;
}
-static int moxart_remove(struct platform_device *pdev)
+static void moxart_remove(struct platform_device *pdev)
{
struct net_device *ndev = platform_get_drvdata(pdev);
@@ -566,8 +566,6 @@ static int moxart_remove(struct platform_device *pdev)
devm_free_irq(&pdev->dev, ndev->irq, ndev);
moxart_mac_free_memory(ndev);
free_netdev(ndev);
-
- return 0;
}
static const struct of_device_id moxart_mac_match[] = {
@@ -578,7 +576,7 @@ MODULE_DEVICE_TABLE(of, moxart_mac_match);
static struct platform_driver moxart_mac_driver = {
.probe = moxart_mac_probe,
- .remove = moxart_remove,
+ .remove_new = moxart_remove,
.driver = {
.name = "moxart-ethernet",
.of_match_table = moxart_mac_match,
diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c
index 151b42465348..993212c3a7da 100644
--- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c
+++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c
@@ -392,7 +392,7 @@ out_free_devlink:
return err;
}
-static int mscc_ocelot_remove(struct platform_device *pdev)
+static void mscc_ocelot_remove(struct platform_device *pdev)
{
struct ocelot *ocelot = platform_get_drvdata(pdev);
@@ -408,13 +408,11 @@ static int mscc_ocelot_remove(struct platform_device *pdev)
unregister_switchdev_notifier(&ocelot_switchdev_nb);
unregister_netdevice_notifier(&ocelot_netdevice_nb);
devlink_free(ocelot->devlink);
-
- return 0;
}
static struct platform_driver mscc_ocelot_driver = {
.probe = mscc_ocelot_probe,
- .remove = mscc_ocelot_remove,
+ .remove_new = mscc_ocelot_remove,
.driver = {
.name = "ocelot-switch",
.of_match_table = mscc_ocelot_match,
diff --git a/drivers/net/ethernet/natsemi/jazzsonic.c b/drivers/net/ethernet/natsemi/jazzsonic.c
index 3f371faeb6d0..2b6e097df28f 100644
--- a/drivers/net/ethernet/natsemi/jazzsonic.c
+++ b/drivers/net/ethernet/natsemi/jazzsonic.c
@@ -227,7 +227,7 @@ MODULE_ALIAS("platform:jazzsonic");
#include "sonic.c"
-static int jazz_sonic_device_remove(struct platform_device *pdev)
+static void jazz_sonic_device_remove(struct platform_device *pdev)
{
struct net_device *dev = platform_get_drvdata(pdev);
struct sonic_local* lp = netdev_priv(dev);
@@ -237,13 +237,11 @@ static int jazz_sonic_device_remove(struct platform_device *pdev)
lp->descriptors, lp->descriptors_laddr);
release_mem_region(dev->base_addr, SONIC_MEM_SIZE);
free_netdev(dev);
-
- return 0;
}
static struct platform_driver jazz_sonic_driver = {
.probe = jazz_sonic_probe,
- .remove = jazz_sonic_device_remove,
+ .remove_new = jazz_sonic_device_remove,
.driver = {
.name = jazz_sonic_string,
},
diff --git a/drivers/net/ethernet/natsemi/macsonic.c b/drivers/net/ethernet/natsemi/macsonic.c
index b16f7c830f9b..2fc63860dbdb 100644
--- a/drivers/net/ethernet/natsemi/macsonic.c
+++ b/drivers/net/ethernet/natsemi/macsonic.c
@@ -532,7 +532,7 @@ MODULE_ALIAS("platform:macsonic");
#include "sonic.c"
-static int mac_sonic_platform_remove(struct platform_device *pdev)
+static void mac_sonic_platform_remove(struct platform_device *pdev)
{
struct net_device *dev = platform_get_drvdata(pdev);
struct sonic_local* lp = netdev_priv(dev);
@@ -541,13 +541,11 @@ static int mac_sonic_platform_remove(struct platform_device *pdev)
dma_free_coherent(lp->device, SIZEOF_SONIC_DESC * SONIC_BUS_SCALE(lp->dma_bitmode),
lp->descriptors, lp->descriptors_laddr);
free_netdev(dev);
-
- return 0;
}
static struct platform_driver mac_sonic_platform_driver = {
.probe = mac_sonic_platform_probe,
- .remove = mac_sonic_platform_remove,
+ .remove_new = mac_sonic_platform_remove,
.driver = {
.name = "macsonic",
},
diff --git a/drivers/net/ethernet/natsemi/xtsonic.c b/drivers/net/ethernet/natsemi/xtsonic.c
index 52fef34d43f9..8943e7244310 100644
--- a/drivers/net/ethernet/natsemi/xtsonic.c
+++ b/drivers/net/ethernet/natsemi/xtsonic.c
@@ -249,7 +249,7 @@ MODULE_DESCRIPTION("Xtensa XT2000 SONIC ethernet driver");
#include "sonic.c"
-static int xtsonic_device_remove(struct platform_device *pdev)
+static void xtsonic_device_remove(struct platform_device *pdev)
{
struct net_device *dev = platform_get_drvdata(pdev);
struct sonic_local *lp = netdev_priv(dev);
@@ -260,13 +260,11 @@ static int xtsonic_device_remove(struct platform_device *pdev)
lp->descriptors, lp->descriptors_laddr);
release_region (dev->base_addr, SONIC_MEM_SIZE);
free_netdev(dev);
-
- return 0;
}
static struct platform_driver xtsonic_driver = {
.probe = xtsonic_probe,
- .remove = xtsonic_device_remove,
+ .remove_new = xtsonic_device_remove,
.driver = {
.name = xtsonic_string,
},
diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c
index ba27bbc68f85..97f4798f4b42 100644
--- a/drivers/net/ethernet/ni/nixge.c
+++ b/drivers/net/ethernet/ni/nixge.c
@@ -1397,7 +1397,7 @@ free_netdev:
return err;
}
-static int nixge_remove(struct platform_device *pdev)
+static void nixge_remove(struct platform_device *pdev)
{
struct net_device *ndev = platform_get_drvdata(pdev);
struct nixge_priv *priv = netdev_priv(ndev);
@@ -1412,13 +1412,11 @@ static int nixge_remove(struct platform_device *pdev)
mdiobus_unregister(priv->mii_bus);
free_netdev(ndev);
-
- return 0;
}
static struct platform_driver nixge_driver = {
.probe = nixge_probe,
- .remove = nixge_remove,
+ .remove_new = nixge_remove,
.driver = {
.name = "nixge",
.of_match_table = nixge_dt_ids,
diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c
index 1a4a272f4c5c..dd3e58a1319c 100644
--- a/drivers/net/ethernet/nxp/lpc_eth.c
+++ b/drivers/net/ethernet/nxp/lpc_eth.c
@@ -1417,7 +1417,7 @@ err_exit:
return ret;
}
-static int lpc_eth_drv_remove(struct platform_device *pdev)
+static void lpc_eth_drv_remove(struct platform_device *pdev)
{
struct net_device *ndev = platform_get_drvdata(pdev);
struct netdata_local *pldat = netdev_priv(ndev);
@@ -1436,8 +1436,6 @@ static int lpc_eth_drv_remove(struct platform_device *pdev)
clk_disable_unprepare(pldat->clk);
clk_put(pldat->clk);
free_netdev(ndev);
-
- return 0;
}
#ifdef CONFIG_PM
@@ -1505,7 +1503,7 @@ MODULE_DEVICE_TABLE(of, lpc_eth_match);
static struct platform_driver lpc_eth_driver = {
.probe = lpc_eth_drv_probe,
- .remove = lpc_eth_drv_remove,
+ .remove_new = lpc_eth_drv_remove,
#ifdef CONFIG_PM
.suspend = lpc_eth_drv_suspend,
.resume = lpc_eth_drv_resume,
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
index aae4131f146a..1dbc3cb50b1d 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
@@ -7,6 +7,7 @@
#include <linux/atomic.h>
#include <linux/mutex.h>
#include <linux/workqueue.h>
+#include <linux/skbuff.h>
#include "ionic_if.h"
#include "ionic_regs.h"
@@ -217,7 +218,7 @@ struct ionic_desc_info {
};
unsigned int bytes;
unsigned int nbufs;
- struct ionic_buf_info bufs[IONIC_MAX_FRAGS];
+ struct ionic_buf_info bufs[MAX_SKB_FRAGS + 1];
ionic_desc_cb cb;
void *cb_arg;
};
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 2c3e36b2dd7f..edc14730ce88 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -3831,6 +3831,18 @@ static void ionic_lif_queue_identify(struct ionic_lif *lif)
qtype, qti->max_sg_elems);
dev_dbg(ionic->dev, " qtype[%d].sg_desc_stride = %d\n",
qtype, qti->sg_desc_stride);
+
+ if (qti->max_sg_elems >= IONIC_MAX_FRAGS) {
+ qti->max_sg_elems = IONIC_MAX_FRAGS - 1;
+ dev_dbg(ionic->dev, "limiting qtype %d max_sg_elems to IONIC_MAX_FRAGS-1 %d\n",
+ qtype, qti->max_sg_elems);
+ }
+
+ if (qti->max_sg_elems > MAX_SKB_FRAGS) {
+ qti->max_sg_elems = MAX_SKB_FRAGS;
+ dev_dbg(ionic->dev, "limiting qtype %d max_sg_elems to MAX_SKB_FRAGS %d\n",
+ qtype, qti->max_sg_elems);
+ }
}
}
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index 44466e8c5d77..ccc1b1d407e4 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -1243,25 +1243,84 @@ static int ionic_tx(struct ionic_queue *q, struct sk_buff *skb)
static int ionic_tx_descs_needed(struct ionic_queue *q, struct sk_buff *skb)
{
struct ionic_tx_stats *stats = q_to_tx_stats(q);
+ bool too_many_frags = false;
+ skb_frag_t *frag;
+ int desc_bufs;
+ int chunk_len;
+ int frag_rem;
+ int tso_rem;
+ int seg_rem;
+ bool encap;
+ int hdrlen;
int ndescs;
int err;
/* Each desc is mss long max, so a descriptor for each gso_seg */
- if (skb_is_gso(skb))
+ if (skb_is_gso(skb)) {
ndescs = skb_shinfo(skb)->gso_segs;
- else
+ } else {
ndescs = 1;
+ if (skb_shinfo(skb)->nr_frags > q->max_sg_elems) {
+ too_many_frags = true;
+ goto linearize;
+ }
+ }
- /* If non-TSO, just need 1 desc and nr_frags sg elems */
- if (skb_shinfo(skb)->nr_frags <= q->max_sg_elems)
+ /* If non-TSO, or no frags to check, we're done */
+ if (!skb_is_gso(skb) || !skb_shinfo(skb)->nr_frags)
return ndescs;
- /* Too many frags, so linearize */
- err = skb_linearize(skb);
- if (err)
- return err;
+ /* We need to scan the skb to be sure that none of the MTU sized
+ * packets in the TSO will require more sgs per descriptor than we
+ * can support. We loop through the frags, add up the lengths for
+ * a packet, and count the number of sgs used per packet.
+ */
+ tso_rem = skb->len;
+ frag = skb_shinfo(skb)->frags;
+ encap = skb->encapsulation;
+
+ /* start with just hdr in first part of first descriptor */
+ if (encap)
+ hdrlen = skb_inner_tcp_all_headers(skb);
+ else
+ hdrlen = skb_tcp_all_headers(skb);
+ seg_rem = min_t(int, tso_rem, hdrlen + skb_shinfo(skb)->gso_size);
+ frag_rem = hdrlen;
+
+ while (tso_rem > 0) {
+ desc_bufs = 0;
+ while (seg_rem > 0) {
+ desc_bufs++;
+
+ /* We add the +1 because we can take buffers for one
+ * more than we have SGs: one for the initial desc data
+ * in addition to the SG segments that might follow.
+ */
+ if (desc_bufs > q->max_sg_elems + 1) {
+ too_many_frags = true;
+ goto linearize;
+ }
+
+ if (frag_rem == 0) {
+ frag_rem = skb_frag_size(frag);
+ frag++;
+ }
+ chunk_len = min(frag_rem, seg_rem);
+ frag_rem -= chunk_len;
+ tso_rem -= chunk_len;
+ seg_rem -= chunk_len;
+ }
+
+ seg_rem = min_t(int, tso_rem, skb_shinfo(skb)->gso_size);
+ }
- stats->linearize++;
+linearize:
+ if (too_many_frags) {
+ err = skb_linearize(skb);
+ if (err)
+ return err;
+ stats->linearize++;
+ }
return ndescs;
}
diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c
index 19bb16daf4e7..3270df72541b 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac.c
@@ -718,7 +718,7 @@ err_undo_netdev:
return ret;
}
-static int emac_remove(struct platform_device *pdev)
+static void emac_remove(struct platform_device *pdev)
{
struct net_device *netdev = dev_get_drvdata(&pdev->dev);
struct emac_adapter *adpt = netdev_priv(netdev);
@@ -742,8 +742,6 @@ static int emac_remove(struct platform_device *pdev)
iounmap(adpt->phy.base);
free_netdev(netdev);
-
- return 0;
}
static void emac_shutdown(struct platform_device *pdev)
@@ -762,7 +760,7 @@ static void emac_shutdown(struct platform_device *pdev)
static struct platform_driver emac_platform_driver = {
.probe = emac_probe,
- .remove = emac_remove,
+ .remove_new = emac_remove,
.driver = {
.name = "qcom-emac",
.of_match_table = emac_dt_match,
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 0ef0b88b7145..c70cff80cc99 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -2880,7 +2880,7 @@ out_release:
return error;
}
-static int ravb_remove(struct platform_device *pdev)
+static void ravb_remove(struct platform_device *pdev)
{
struct net_device *ndev = platform_get_drvdata(pdev);
struct ravb_private *priv = netdev_priv(ndev);
@@ -2907,8 +2907,6 @@ static int ravb_remove(struct platform_device *pdev)
reset_control_assert(priv->rstc);
free_netdev(ndev);
platform_set_drvdata(pdev, NULL);
-
- return 0;
}
static int ravb_wol_setup(struct net_device *ndev)
@@ -3046,7 +3044,7 @@ static const struct dev_pm_ops ravb_dev_pm_ops = {
static struct platform_driver ravb_driver = {
.probe = ravb_probe,
- .remove = ravb_remove,
+ .remove_new = ravb_remove,
.driver = {
.name = "ravb",
.pm = &ravb_dev_pm_ops,
diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c
index 0fc0b6bea753..112e605f104a 100644
--- a/drivers/net/ethernet/renesas/rswitch.c
+++ b/drivers/net/ethernet/renesas/rswitch.c
@@ -1981,7 +1981,7 @@ static void rswitch_deinit(struct rswitch_private *priv)
rswitch_clock_disable(priv);
}
-static int renesas_eth_sw_remove(struct platform_device *pdev)
+static void renesas_eth_sw_remove(struct platform_device *pdev)
{
struct rswitch_private *priv = platform_get_drvdata(pdev);
@@ -1991,13 +1991,11 @@ static int renesas_eth_sw_remove(struct platform_device *pdev)
pm_runtime_disable(&pdev->dev);
platform_set_drvdata(pdev, NULL);
-
- return 0;
}
static struct platform_driver renesas_eth_sw_driver_platform = {
.probe = renesas_eth_sw_probe,
- .remove = renesas_eth_sw_remove,
+ .remove_new = renesas_eth_sw_remove,
.driver = {
.name = "renesas_eth_sw",
.of_match_table = renesas_eth_sw_of_table,
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 274ea16c0a1f..475e1e8c1d35 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -3431,7 +3431,7 @@ out_release:
return ret;
}
-static int sh_eth_drv_remove(struct platform_device *pdev)
+static void sh_eth_drv_remove(struct platform_device *pdev)
{
struct net_device *ndev = platform_get_drvdata(pdev);
struct sh_eth_private *mdp = netdev_priv(ndev);
@@ -3441,8 +3441,6 @@ static int sh_eth_drv_remove(struct platform_device *pdev)
sh_mdio_release(mdp);
pm_runtime_disable(&pdev->dev);
free_netdev(ndev);
-
- return 0;
}
#ifdef CONFIG_PM
@@ -3562,7 +3560,7 @@ MODULE_DEVICE_TABLE(platform, sh_eth_id_table);
static struct platform_driver sh_eth_driver = {
.probe = sh_eth_drv_probe,
- .remove = sh_eth_drv_remove,
+ .remove_new = sh_eth_drv_remove,
.id_table = sh_eth_id_table,
.driver = {
.name = CARDNAME,
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c
index fb59ff94509a..e6e130dbe1de 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c
@@ -169,13 +169,11 @@ err_out:
* Description: this function calls the main to free the net resources
* and calls the platforms hook and release the resources (e.g. mem).
*/
-static int sxgbe_platform_remove(struct platform_device *pdev)
+static void sxgbe_platform_remove(struct platform_device *pdev)
{
struct net_device *ndev = platform_get_drvdata(pdev);
sxgbe_drv_remove(ndev);
-
- return 0;
}
#ifdef CONFIG_PM
@@ -226,7 +224,7 @@ MODULE_DEVICE_TABLE(of, sxgbe_dt_ids);
static struct platform_driver sxgbe_platform_driver = {
.probe = sxgbe_platform_probe,
- .remove = sxgbe_platform_remove,
+ .remove_new = sxgbe_platform_remove,
.driver = {
.name = SXGBE_RESOURCE_NAME,
.pm = &sxgbe_platform_pm_ops,
diff --git a/drivers/net/ethernet/seeq/sgiseeq.c b/drivers/net/ethernet/seeq/sgiseeq.c
index 96065dfc747b..76356dadf233 100644
--- a/drivers/net/ethernet/seeq/sgiseeq.c
+++ b/drivers/net/ethernet/seeq/sgiseeq.c
@@ -819,7 +819,7 @@ err_out:
return err;
}
-static int sgiseeq_remove(struct platform_device *pdev)
+static void sgiseeq_remove(struct platform_device *pdev)
{
struct net_device *dev = platform_get_drvdata(pdev);
struct sgiseeq_private *sp = netdev_priv(dev);
@@ -828,13 +828,11 @@ static int sgiseeq_remove(struct platform_device *pdev)
dma_free_noncoherent(&pdev->dev, sizeof(*sp->srings), sp->srings,
sp->srings_dma, DMA_BIDIRECTIONAL);
free_netdev(dev);
-
- return 0;
}
static struct platform_driver sgiseeq_driver = {
.probe = sgiseeq_probe,
- .remove = sgiseeq_remove,
+ .remove_new = sgiseeq_remove,
.driver = {
.name = "sgiseeq",
}
diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c
index f54200f03e15..b04fdbb8aece 100644
--- a/drivers/net/ethernet/sfc/ptp.c
+++ b/drivers/net/ethernet/sfc/ptp.c
@@ -108,11 +108,17 @@
#define PTP_MIN_LENGTH 63
#define PTP_ADDR_IPV4 0xe0000181 /* 224.0.1.129 */
-#define PTP_ADDR_IPV6 {0xff, 0x0e, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
- 0, 0x01, 0x81} /* ff0e::181 */
+
+/* ff0e::181 */
+static const struct in6_addr ptp_addr_ipv6 = { { {
+ 0xff, 0x0e, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x01, 0x81 } } };
+
+/* 01-1B-19-00-00-00 */
+static const u8 ptp_addr_ether[ETH_ALEN] __aligned(2) = {
+ 0x01, 0x1b, 0x19, 0x00, 0x00, 0x00 };
+
#define PTP_EVENT_PORT 319
#define PTP_GENERAL_PORT 320
-#define PTP_ADDR_ETHER {0x01, 0x1b, 0x19, 0, 0, 0} /* 01-1B-19-00-00-00 */
/* Annoyingly the format of the version numbers are different between
* versions 1 and 2 so it isn't possible to simply look for 1 or 2.
@@ -1296,7 +1302,7 @@ static int efx_ptp_insert_ipv4_filter(struct efx_nic *efx,
static int efx_ptp_insert_ipv6_filter(struct efx_nic *efx,
struct list_head *filter_list,
- struct in6_addr *addr, u16 port,
+ const struct in6_addr *addr, u16 port,
unsigned long expiry)
{
struct efx_filter_spec spec;
@@ -1309,11 +1315,10 @@ static int efx_ptp_insert_ipv6_filter(struct efx_nic *efx,
static int efx_ptp_insert_eth_multicast_filter(struct efx_nic *efx)
{
struct efx_ptp_data *ptp = efx->ptp_data;
- const u8 addr[ETH_ALEN] = PTP_ADDR_ETHER;
struct efx_filter_spec spec;
efx_ptp_init_filter(efx, &spec);
- efx_filter_set_eth_local(&spec, EFX_FILTER_VID_UNSPEC, addr);
+ efx_filter_set_eth_local(&spec, EFX_FILTER_VID_UNSPEC, ptp_addr_ether);
spec.match_flags |= EFX_FILTER_MATCH_ETHER_TYPE;
spec.ether_type = htons(ETH_P_1588);
return efx_ptp_insert_filter(efx, &ptp->rxfilters_mcast, &spec, 0);
@@ -1346,15 +1351,13 @@ static int efx_ptp_insert_multicast_filters(struct efx_nic *efx)
* PTP over IPv6 and Ethernet
*/
if (efx_ptp_use_mac_tx_timestamps(efx)) {
- struct in6_addr ipv6_addr = {{PTP_ADDR_IPV6}};
-
rc = efx_ptp_insert_ipv6_filter(efx, &ptp->rxfilters_mcast,
- &ipv6_addr, PTP_EVENT_PORT, 0);
+ &ptp_addr_ipv6, PTP_EVENT_PORT, 0);
if (rc < 0)
goto fail;
rc = efx_ptp_insert_ipv6_filter(efx, &ptp->rxfilters_mcast,
- &ipv6_addr, PTP_GENERAL_PORT, 0);
+ &ptp_addr_ipv6, PTP_GENERAL_PORT, 0);
if (rc < 0)
goto fail;
@@ -1379,9 +1382,7 @@ static bool efx_ptp_valid_unicast_event_pkt(struct sk_buff *skb)
ip_hdr(skb)->protocol == IPPROTO_UDP &&
udp_hdr(skb)->source == htons(PTP_EVENT_PORT);
} else if (skb->protocol == htons(ETH_P_IPV6)) {
- struct in6_addr mcast_addr = {{PTP_ADDR_IPV6}};
-
- return !ipv6_addr_equal(&ipv6_hdr(skb)->daddr, &mcast_addr) &&
+ return !ipv6_addr_equal(&ipv6_hdr(skb)->daddr, &ptp_addr_ipv6) &&
ipv6_hdr(skb)->nexthdr == IPPROTO_UDP &&
udp_hdr(skb)->source == htons(PTP_EVENT_PORT);
}
diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c
index 8fc3f5272fa7..98d0b561a057 100644
--- a/drivers/net/ethernet/sgi/ioc3-eth.c
+++ b/drivers/net/ethernet/sgi/ioc3-eth.c
@@ -962,7 +962,7 @@ out_free:
return err;
}
-static int ioc3eth_remove(struct platform_device *pdev)
+static void ioc3eth_remove(struct platform_device *pdev)
{
struct net_device *dev = platform_get_drvdata(pdev);
struct ioc3_private *ip = netdev_priv(dev);
@@ -973,8 +973,6 @@ static int ioc3eth_remove(struct platform_device *pdev)
unregister_netdev(dev);
del_timer_sync(&ip->ioc3_timer);
free_netdev(dev);
-
- return 0;
}
@@ -1275,7 +1273,7 @@ static void ioc3_set_multicast_list(struct net_device *dev)
static struct platform_driver ioc3eth_driver = {
.probe = ioc3eth_probe,
- .remove = ioc3eth_remove,
+ .remove_new = ioc3eth_remove,
.driver = {
.name = "ioc3-eth",
}
diff --git a/drivers/net/ethernet/sgi/meth.c b/drivers/net/ethernet/sgi/meth.c
index 6d850ea2b94c..18b6f93d875e 100644
--- a/drivers/net/ethernet/sgi/meth.c
+++ b/drivers/net/ethernet/sgi/meth.c
@@ -854,19 +854,17 @@ static int meth_probe(struct platform_device *pdev)
return 0;
}
-static int meth_remove(struct platform_device *pdev)
+static void meth_remove(struct platform_device *pdev)
{
struct net_device *dev = platform_get_drvdata(pdev);
unregister_netdev(dev);
free_netdev(dev);
-
- return 0;
}
static struct platform_driver meth_driver = {
.probe = meth_probe,
- .remove = meth_remove,
+ .remove_new = meth_remove,
.driver = {
.name = "meth",
}
diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c
index 032eccf8eb42..758347616535 100644
--- a/drivers/net/ethernet/smsc/smc91x.c
+++ b/drivers/net/ethernet/smsc/smc91x.c
@@ -2411,7 +2411,7 @@ static int smc_drv_probe(struct platform_device *pdev)
return ret;
}
-static int smc_drv_remove(struct platform_device *pdev)
+static void smc_drv_remove(struct platform_device *pdev)
{
struct net_device *ndev = platform_get_drvdata(pdev);
struct smc_local *lp = netdev_priv(ndev);
@@ -2436,8 +2436,6 @@ static int smc_drv_remove(struct platform_device *pdev)
release_mem_region(res->start, SMC_IO_EXTENT);
free_netdev(ndev);
-
- return 0;
}
static int smc_drv_suspend(struct device *dev)
@@ -2480,7 +2478,7 @@ static const struct dev_pm_ops smc_drv_pm_ops = {
static struct platform_driver smc_driver = {
.probe = smc_drv_probe,
- .remove = smc_drv_remove,
+ .remove_new = smc_drv_remove,
.driver = {
.name = CARDNAME,
.pm = &smc_drv_pm_ops,
diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c
index cb590db625e8..31cb7d0166f0 100644
--- a/drivers/net/ethernet/smsc/smsc911x.c
+++ b/drivers/net/ethernet/smsc/smsc911x.c
@@ -2314,7 +2314,7 @@ static int smsc911x_init(struct net_device *dev)
return 0;
}
-static int smsc911x_drv_remove(struct platform_device *pdev)
+static void smsc911x_drv_remove(struct platform_device *pdev)
{
struct net_device *dev;
struct smsc911x_data *pdata;
@@ -2348,8 +2348,6 @@ static int smsc911x_drv_remove(struct platform_device *pdev)
free_netdev(dev);
pm_runtime_disable(&pdev->dev);
-
- return 0;
}
/* standard register acces */
@@ -2668,7 +2666,7 @@ MODULE_DEVICE_TABLE(acpi, smsc911x_acpi_match);
static struct platform_driver smsc911x_driver = {
.probe = smsc911x_drv_probe,
- .remove = smsc911x_drv_remove,
+ .remove_new = smsc911x_drv_remove,
.driver = {
.name = SMSC_CHIPNAME,
.pm = SMSC911X_PM_OPS,
diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c
index f358ea003193..d9cfafb96085 100644
--- a/drivers/net/ethernet/socionext/netsec.c
+++ b/drivers/net/ethernet/socionext/netsec.c
@@ -2150,7 +2150,7 @@ free_ndev:
return ret;
}
-static int netsec_remove(struct platform_device *pdev)
+static void netsec_remove(struct platform_device *pdev)
{
struct netsec_priv *priv = platform_get_drvdata(pdev);
@@ -2162,8 +2162,6 @@ static int netsec_remove(struct platform_device *pdev)
pm_runtime_disable(&pdev->dev);
free_netdev(priv->ndev);
-
- return 0;
}
#ifdef CONFIG_PM
@@ -2211,7 +2209,7 @@ MODULE_DEVICE_TABLE(acpi, netsec_acpi_ids);
static struct platform_driver netsec_driver = {
.probe = netsec_probe,
- .remove = netsec_remove,
+ .remove_new = netsec_remove,
.driver = {
.name = "netsec",
.pm = &netsec_pm_ops,
diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c
index 4838d2383a43..eed24e67c5a6 100644
--- a/drivers/net/ethernet/socionext/sni_ave.c
+++ b/drivers/net/ethernet/socionext/sni_ave.c
@@ -1719,7 +1719,7 @@ out_del_napi:
return ret;
}
-static int ave_remove(struct platform_device *pdev)
+static void ave_remove(struct platform_device *pdev)
{
struct net_device *ndev = platform_get_drvdata(pdev);
struct ave_private *priv = netdev_priv(ndev);
@@ -1727,8 +1727,6 @@ static int ave_remove(struct platform_device *pdev)
unregister_netdev(ndev);
netif_napi_del(&priv->napi_rx);
netif_napi_del(&priv->napi_tx);
-
- return 0;
}
#ifdef CONFIG_PM_SLEEP
@@ -1976,7 +1974,7 @@ MODULE_DEVICE_TABLE(of, of_ave_match);
static struct platform_driver ave_driver = {
.probe = ave_probe,
- .remove = ave_remove,
+ .remove_new = ave_remove,
.driver = {
.name = "ave",
.pm = AVE_PM_OPS,
diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig
index 06c6871f8788..a2b9e289aa36 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Kconfig
+++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig
@@ -239,6 +239,17 @@ config DWMAC_INTEL_PLAT
the stmmac device driver. This driver is used for the Intel Keem Bay
SoC.
+config DWMAC_LOONGSON1
+ tristate "Loongson1 GMAC support"
+ default MACH_LOONGSON32
+ depends on OF && (MACH_LOONGSON32 || COMPILE_TEST)
+ help
+ Support for ethernet controller on Loongson1 SoC.
+
+ This selects Loongson1 SoC glue layer support for the stmmac
+ device driver. This driver is used for Loongson1-based boards
+ like Loongson LS1B/LS1C.
+
config DWMAC_TEGRA
tristate "NVIDIA Tegra MGBE support"
depends on ARCH_TEGRA || COMPILE_TEST
diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile
index 5b57aee19267..80e598bd4255 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Makefile
+++ b/drivers/net/ethernet/stmicro/stmmac/Makefile
@@ -29,6 +29,7 @@ obj-$(CONFIG_DWMAC_SUNXI) += dwmac-sunxi.o
obj-$(CONFIG_DWMAC_SUN8I) += dwmac-sun8i.o
obj-$(CONFIG_DWMAC_DWC_QOS_ETH) += dwmac-dwc-qos-eth.o
obj-$(CONFIG_DWMAC_INTEL_PLAT) += dwmac-intel-plat.o
+obj-$(CONFIG_DWMAC_LOONGSON1) += dwmac-loongson1.o
obj-$(CONFIG_DWMAC_GENERIC) += dwmac-generic.o
obj-$(CONFIG_DWMAC_IMX8) += dwmac-imx.o
obj-$(CONFIG_DWMAC_TEGRA) += dwmac-tegra.o
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c
index 58a7f08e8d78..643ee6d8d4dd 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c
@@ -115,7 +115,7 @@ static int anarion_dwmac_probe(struct platform_device *pdev)
if (IS_ERR(gmac))
return PTR_ERR(gmac);
- plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+ plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
if (IS_ERR(plat_dat))
return PTR_ERR(plat_dat);
@@ -124,13 +124,7 @@ static int anarion_dwmac_probe(struct platform_device *pdev)
anarion_gmac_init(pdev, gmac);
plat_dat->bsp_priv = gmac;
- ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
- if (ret) {
- stmmac_remove_config_dt(pdev, plat_dat);
- return ret;
- }
-
- return 0;
+ return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
}
static const struct of_device_id anarion_dwmac_match[] = {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
index 61ebf36da13d..ec924c6c76c6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
@@ -435,15 +435,14 @@ static int dwc_eth_dwmac_probe(struct platform_device *pdev)
if (IS_ERR(stmmac_res.addr))
return PTR_ERR(stmmac_res.addr);
- plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+ plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
if (IS_ERR(plat_dat))
return PTR_ERR(plat_dat);
ret = data->probe(pdev, plat_dat, &stmmac_res);
if (ret < 0) {
dev_err_probe(&pdev->dev, ret, "failed to probe subdriver\n");
-
- goto remove_config;
+ return ret;
}
ret = dwc_eth_dwmac_config_dt(pdev, plat_dat);
@@ -458,25 +457,17 @@ static int dwc_eth_dwmac_probe(struct platform_device *pdev)
remove:
data->remove(pdev);
-remove_config:
- stmmac_remove_config_dt(pdev, plat_dat);
return ret;
}
static void dwc_eth_dwmac_remove(struct platform_device *pdev)
{
- struct net_device *ndev = platform_get_drvdata(pdev);
- struct stmmac_priv *priv = netdev_priv(ndev);
- const struct dwc_eth_dwmac_data *data;
-
- data = device_get_match_data(&pdev->dev);
+ const struct dwc_eth_dwmac_data *data = device_get_match_data(&pdev->dev);
stmmac_dvr_remove(&pdev->dev);
data->remove(pdev);
-
- stmmac_remove_config_dt(pdev, priv->plat);
}
static const struct of_device_id dwc_eth_dwmac_match[] = {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c
index 20fc455b3337..598eff926815 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c
@@ -27,7 +27,7 @@ static int dwmac_generic_probe(struct platform_device *pdev)
return ret;
if (pdev->dev.of_node) {
- plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+ plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
if (IS_ERR(plat_dat)) {
dev_err(&pdev->dev, "dt configuration failed\n");
return PTR_ERR(plat_dat);
@@ -46,17 +46,7 @@ static int dwmac_generic_probe(struct platform_device *pdev)
plat_dat->unicast_filter_entries = 1;
}
- ret = stmmac_pltfr_probe(pdev, plat_dat, &stmmac_res);
- if (ret)
- goto err_remove_config_dt;
-
- return 0;
-
-err_remove_config_dt:
- if (pdev->dev.of_node)
- stmmac_remove_config_dt(pdev, plat_dat);
-
- return ret;
+ return devm_stmmac_pltfr_probe(pdev, plat_dat, &stmmac_res);
}
static const struct of_device_id dwmac_generic_match[] = {
@@ -77,7 +67,6 @@ MODULE_DEVICE_TABLE(of, dwmac_generic_match);
static struct platform_driver dwmac_generic_driver = {
.probe = dwmac_generic_probe,
- .remove_new = stmmac_pltfr_remove,
.driver = {
.name = STMMAC_RESOURCE_NAME,
.pm = &stmmac_pltfr_pm_ops,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
index df34e34cc14f..8f730ada71f9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
@@ -331,15 +331,14 @@ static int imx_dwmac_probe(struct platform_device *pdev)
if (!dwmac)
return -ENOMEM;
- plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+ plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
if (IS_ERR(plat_dat))
return PTR_ERR(plat_dat);
data = of_device_get_match_data(&pdev->dev);
if (!data) {
dev_err(&pdev->dev, "failed to get match data\n");
- ret = -EINVAL;
- goto err_match_data;
+ return -EINVAL;
}
dwmac->ops = data;
@@ -348,7 +347,7 @@ static int imx_dwmac_probe(struct platform_device *pdev)
ret = imx_dwmac_parse_dt(dwmac, &pdev->dev);
if (ret) {
dev_err(&pdev->dev, "failed to parse OF data\n");
- goto err_parse_dt;
+ return ret;
}
if (data->flags & STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY)
@@ -365,7 +364,7 @@ static int imx_dwmac_probe(struct platform_device *pdev)
ret = imx_dwmac_clks_config(dwmac, true);
if (ret)
- goto err_clks_config;
+ return ret;
ret = imx_dwmac_init(pdev, dwmac);
if (ret)
@@ -385,10 +384,6 @@ err_drv_probe:
imx_dwmac_exit(pdev, plat_dat->bsp_priv);
err_dwmac_init:
imx_dwmac_clks_config(dwmac, false);
-err_clks_config:
-err_parse_dt:
-err_match_data:
- stmmac_remove_config_dt(pdev, plat_dat);
return ret;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c
index 0a20c3d24722..19c93b998fb3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c
@@ -241,29 +241,25 @@ static int ingenic_mac_probe(struct platform_device *pdev)
if (ret)
return ret;
- plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+ plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
if (IS_ERR(plat_dat))
return PTR_ERR(plat_dat);
mac = devm_kzalloc(&pdev->dev, sizeof(*mac), GFP_KERNEL);
- if (!mac) {
- ret = -ENOMEM;
- goto err_remove_config_dt;
- }
+ if (!mac)
+ return -ENOMEM;
data = of_device_get_match_data(&pdev->dev);
if (!data) {
dev_err(&pdev->dev, "No of match data provided\n");
- ret = -EINVAL;
- goto err_remove_config_dt;
+ return -EINVAL;
}
/* Get MAC PHY control register */
mac->regmap = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, "mode-reg");
if (IS_ERR(mac->regmap)) {
dev_err(&pdev->dev, "%s: Failed to get syscon regmap\n", __func__);
- ret = PTR_ERR(mac->regmap);
- goto err_remove_config_dt;
+ return PTR_ERR(mac->regmap);
}
if (!of_property_read_u32(pdev->dev.of_node, "tx-clk-delay-ps", &tx_delay_ps)) {
@@ -272,8 +268,7 @@ static int ingenic_mac_probe(struct platform_device *pdev)
mac->tx_delay = tx_delay_ps * 1000;
} else {
dev_err(&pdev->dev, "Invalid TX clock delay: %dps\n", tx_delay_ps);
- ret = -EINVAL;
- goto err_remove_config_dt;
+ return -EINVAL;
}
}
@@ -283,8 +278,7 @@ static int ingenic_mac_probe(struct platform_device *pdev)
mac->rx_delay = rx_delay_ps * 1000;
} else {
dev_err(&pdev->dev, "Invalid RX clock delay: %dps\n", rx_delay_ps);
- ret = -EINVAL;
- goto err_remove_config_dt;
+ return -EINVAL;
}
}
@@ -295,18 +289,9 @@ static int ingenic_mac_probe(struct platform_device *pdev)
ret = ingenic_mac_init(plat_dat);
if (ret)
- goto err_remove_config_dt;
-
- ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
- if (ret)
- goto err_remove_config_dt;
-
- return 0;
-
-err_remove_config_dt:
- stmmac_remove_config_dt(pdev, plat_dat);
+ return ret;
- return ret;
+ return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
}
#ifdef CONFIG_PM_SLEEP
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c
index d352a14f9d48..70edc5232379 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c
@@ -85,17 +85,15 @@ static int intel_eth_plat_probe(struct platform_device *pdev)
if (ret)
return ret;
- plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+ plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
if (IS_ERR(plat_dat)) {
dev_err(&pdev->dev, "dt configuration failed\n");
return PTR_ERR(plat_dat);
}
dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL);
- if (!dwmac) {
- ret = -ENOMEM;
- goto err_remove_config_dt;
- }
+ if (!dwmac)
+ return -ENOMEM;
dwmac->dev = &pdev->dev;
dwmac->tx_clk = NULL;
@@ -110,10 +108,8 @@ static int intel_eth_plat_probe(struct platform_device *pdev)
/* Enable TX clock */
if (dwmac->data->tx_clk_en) {
dwmac->tx_clk = devm_clk_get(&pdev->dev, "tx_clk");
- if (IS_ERR(dwmac->tx_clk)) {
- ret = PTR_ERR(dwmac->tx_clk);
- goto err_remove_config_dt;
- }
+ if (IS_ERR(dwmac->tx_clk))
+ return PTR_ERR(dwmac->tx_clk);
clk_prepare_enable(dwmac->tx_clk);
@@ -126,7 +122,7 @@ static int intel_eth_plat_probe(struct platform_device *pdev)
if (ret) {
dev_err(&pdev->dev,
"Failed to set tx_clk\n");
- goto err_remove_config_dt;
+ return ret;
}
}
}
@@ -140,7 +136,7 @@ static int intel_eth_plat_probe(struct platform_device *pdev)
if (ret) {
dev_err(&pdev->dev,
"Failed to set clk_ptp_ref\n");
- goto err_remove_config_dt;
+ return ret;
}
}
}
@@ -158,15 +154,10 @@ static int intel_eth_plat_probe(struct platform_device *pdev)
ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
if (ret) {
clk_disable_unprepare(dwmac->tx_clk);
- goto err_remove_config_dt;
+ return ret;
}
return 0;
-
-err_remove_config_dt:
- stmmac_remove_config_dt(pdev, plat_dat);
-
- return ret;
}
static void intel_eth_plat_remove(struct platform_device *pdev)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
index 9b0200749109..281687d7083b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
@@ -384,22 +384,20 @@ static int ipq806x_gmac_probe(struct platform_device *pdev)
if (val)
return val;
- plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+ plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
if (IS_ERR(plat_dat))
return PTR_ERR(plat_dat);
gmac = devm_kzalloc(dev, sizeof(*gmac), GFP_KERNEL);
- if (!gmac) {
- err = -ENOMEM;
- goto err_remove_config_dt;
- }
+ if (!gmac)
+ return -ENOMEM;
gmac->pdev = pdev;
err = ipq806x_gmac_of_parse(gmac);
if (err) {
dev_err(dev, "device tree parsing error\n");
- goto err_remove_config_dt;
+ return err;
}
regmap_write(gmac->qsgmii_csr, QSGMII_PCS_CAL_LCKDT_CTL,
@@ -459,11 +457,11 @@ static int ipq806x_gmac_probe(struct platform_device *pdev)
if (gmac->phy_mode == PHY_INTERFACE_MODE_SGMII) {
err = ipq806x_gmac_configure_qsgmii_params(gmac);
if (err)
- goto err_remove_config_dt;
+ return err;
err = ipq806x_gmac_configure_qsgmii_pcs_speed(gmac);
if (err)
- goto err_remove_config_dt;
+ return err;
}
plat_dat->has_gmac = true;
@@ -473,21 +471,12 @@ static int ipq806x_gmac_probe(struct platform_device *pdev)
plat_dat->tx_fifo_size = 8192;
plat_dat->rx_fifo_size = 8192;
- err = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
- if (err)
- goto err_remove_config_dt;
-
- return 0;
+ return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
err_unsupported_phy:
dev_err(&pdev->dev, "Unsupported PHY mode: \"%s\"\n",
phy_modes(gmac->phy_mode));
- err = -EINVAL;
-
-err_remove_config_dt:
- stmmac_remove_config_dt(pdev, plat_dat);
-
- return err;
+ return -EINVAL;
}
static const struct of_device_id ipq806x_gmac_dwmac_match[] = {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson1.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson1.c
new file mode 100644
index 000000000000..3e86810717d3
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson1.c
@@ -0,0 +1,209 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Loongson-1 DWMAC glue layer
+ *
+ * Copyright (C) 2011-2023 Keguang Zhang <keguang.zhang@gmail.com>
+ */
+
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/phy.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+#include "stmmac.h"
+#include "stmmac_platform.h"
+
+#define LS1B_GMAC0_BASE (0x1fe10000)
+#define LS1B_GMAC1_BASE (0x1fe20000)
+
+/* Loongson-1 SYSCON Registers */
+#define LS1X_SYSCON0 (0x0)
+#define LS1X_SYSCON1 (0x4)
+
+/* Loongson-1B SYSCON Register Bits */
+#define GMAC1_USE_UART1 BIT(4)
+#define GMAC1_USE_UART0 BIT(3)
+
+#define GMAC1_SHUT BIT(13)
+#define GMAC0_SHUT BIT(12)
+
+#define GMAC1_USE_TXCLK BIT(3)
+#define GMAC0_USE_TXCLK BIT(2)
+#define GMAC1_USE_PWM23 BIT(1)
+#define GMAC0_USE_PWM01 BIT(0)
+
+/* Loongson-1C SYSCON Register Bits */
+#define GMAC_SHUT BIT(6)
+
+#define PHY_INTF_SELI GENMASK(30, 28)
+#define PHY_INTF_MII FIELD_PREP(PHY_INTF_SELI, 0)
+#define PHY_INTF_RMII FIELD_PREP(PHY_INTF_SELI, 4)
+
+struct ls1x_dwmac {
+ struct plat_stmmacenet_data *plat_dat;
+ struct regmap *regmap;
+};
+
+static int ls1b_dwmac_syscon_init(struct platform_device *pdev, void *priv)
+{
+ struct ls1x_dwmac *dwmac = priv;
+ struct plat_stmmacenet_data *plat = dwmac->plat_dat;
+ struct regmap *regmap = dwmac->regmap;
+ struct resource *res;
+ unsigned long reg_base;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res) {
+ dev_err(&pdev->dev, "Could not get IO_MEM resources\n");
+ return -EINVAL;
+ }
+ reg_base = (unsigned long)res->start;
+
+ if (reg_base == LS1B_GMAC0_BASE) {
+ switch (plat->phy_interface) {
+ case PHY_INTERFACE_MODE_RGMII_ID:
+ regmap_update_bits(regmap, LS1X_SYSCON0,
+ GMAC0_USE_TXCLK | GMAC0_USE_PWM01,
+ 0);
+ break;
+ case PHY_INTERFACE_MODE_MII:
+ regmap_update_bits(regmap, LS1X_SYSCON0,
+ GMAC0_USE_TXCLK | GMAC0_USE_PWM01,
+ GMAC0_USE_TXCLK | GMAC0_USE_PWM01);
+ break;
+ default:
+ dev_err(&pdev->dev, "Unsupported PHY mode %u\n",
+ plat->phy_interface);
+ return -EOPNOTSUPP;
+ }
+
+ regmap_update_bits(regmap, LS1X_SYSCON0, GMAC0_SHUT, 0);
+ } else if (reg_base == LS1B_GMAC1_BASE) {
+ regmap_update_bits(regmap, LS1X_SYSCON0,
+ GMAC1_USE_UART1 | GMAC1_USE_UART0,
+ GMAC1_USE_UART1 | GMAC1_USE_UART0);
+
+ switch (plat->phy_interface) {
+ case PHY_INTERFACE_MODE_RGMII_ID:
+ regmap_update_bits(regmap, LS1X_SYSCON1,
+ GMAC1_USE_TXCLK | GMAC1_USE_PWM23,
+ 0);
+
+ break;
+ case PHY_INTERFACE_MODE_MII:
+ regmap_update_bits(regmap, LS1X_SYSCON1,
+ GMAC1_USE_TXCLK | GMAC1_USE_PWM23,
+ GMAC1_USE_TXCLK | GMAC1_USE_PWM23);
+ break;
+ default:
+ dev_err(&pdev->dev, "Unsupported PHY mode %u\n",
+ plat->phy_interface);
+ return -EOPNOTSUPP;
+ }
+
+ regmap_update_bits(regmap, LS1X_SYSCON1, GMAC1_SHUT, 0);
+ } else {
+ dev_err(&pdev->dev, "Invalid Ethernet MAC base address %lx",
+ reg_base);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int ls1c_dwmac_syscon_init(struct platform_device *pdev, void *priv)
+{
+ struct ls1x_dwmac *dwmac = priv;
+ struct plat_stmmacenet_data *plat = dwmac->plat_dat;
+ struct regmap *regmap = dwmac->regmap;
+
+ switch (plat->phy_interface) {
+ case PHY_INTERFACE_MODE_MII:
+ regmap_update_bits(regmap, LS1X_SYSCON1, PHY_INTF_SELI,
+ PHY_INTF_MII);
+ break;
+ case PHY_INTERFACE_MODE_RMII:
+ regmap_update_bits(regmap, LS1X_SYSCON1, PHY_INTF_SELI,
+ PHY_INTF_RMII);
+ break;
+ default:
+ dev_err(&pdev->dev, "Unsupported PHY-mode %u\n",
+ plat->phy_interface);
+ return -EOPNOTSUPP;
+ }
+
+ regmap_update_bits(regmap, LS1X_SYSCON0, GMAC0_SHUT, 0);
+
+ return 0;
+}
+
+static int ls1x_dwmac_probe(struct platform_device *pdev)
+{
+ struct plat_stmmacenet_data *plat_dat;
+ struct stmmac_resources stmmac_res;
+ struct regmap *regmap;
+ struct ls1x_dwmac *dwmac;
+ int (*init)(struct platform_device *pdev, void *priv);
+ int ret;
+
+ ret = stmmac_get_platform_resources(pdev, &stmmac_res);
+ if (ret)
+ return ret;
+
+ /* Probe syscon */
+ regmap = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
+ "loongson,ls1-syscon");
+ if (IS_ERR(regmap))
+ return dev_err_probe(&pdev->dev, PTR_ERR(regmap),
+ "Unable to find syscon\n");
+
+ init = of_device_get_match_data(&pdev->dev);
+ if (!init) {
+ dev_err(&pdev->dev, "No of match data provided\n");
+ return -EINVAL;
+ }
+
+ dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL);
+ if (!dwmac)
+ return -ENOMEM;
+
+ plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
+ if (IS_ERR(plat_dat))
+ return dev_err_probe(&pdev->dev, PTR_ERR(plat_dat),
+ "dt configuration failed\n");
+
+ plat_dat->bsp_priv = dwmac;
+ plat_dat->init = init;
+ dwmac->plat_dat = plat_dat;
+ dwmac->regmap = regmap;
+
+ return devm_stmmac_pltfr_probe(pdev, plat_dat, &stmmac_res);
+}
+
+static const struct of_device_id ls1x_dwmac_match[] = {
+ {
+ .compatible = "loongson,ls1b-gmac",
+ .data = &ls1b_dwmac_syscon_init,
+ },
+ {
+ .compatible = "loongson,ls1c-emac",
+ .data = &ls1c_dwmac_syscon_init,
+ },
+ { }
+};
+MODULE_DEVICE_TABLE(of, ls1x_dwmac_match);
+
+static struct platform_driver ls1x_dwmac_driver = {
+ .probe = ls1x_dwmac_probe,
+ .driver = {
+ .name = "loongson1-dwmac",
+ .of_match_table = ls1x_dwmac_match,
+ },
+};
+module_platform_driver(ls1x_dwmac_driver);
+
+MODULE_AUTHOR("Keguang Zhang <keguang.zhang@gmail.com>");
+MODULE_DESCRIPTION("Loongson-1 DWMAC glue layer");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c
index d0aa674ce705..4c810d8f5bea 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c
@@ -37,7 +37,7 @@ static int lpc18xx_dwmac_probe(struct platform_device *pdev)
if (ret)
return ret;
- plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+ plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
if (IS_ERR(plat_dat))
return PTR_ERR(plat_dat);
@@ -46,8 +46,7 @@ static int lpc18xx_dwmac_probe(struct platform_device *pdev)
reg = syscon_regmap_lookup_by_compatible("nxp,lpc1850-creg");
if (IS_ERR(reg)) {
dev_err(&pdev->dev, "syscon lookup failed\n");
- ret = PTR_ERR(reg);
- goto err_remove_config_dt;
+ return PTR_ERR(reg);
}
if (plat_dat->mac_interface == PHY_INTERFACE_MODE_MII) {
@@ -56,23 +55,13 @@ static int lpc18xx_dwmac_probe(struct platform_device *pdev)
ethmode = LPC18XX_CREG_CREG6_ETHMODE_RMII;
} else {
dev_err(&pdev->dev, "Only MII and RMII mode supported\n");
- ret = -EINVAL;
- goto err_remove_config_dt;
+ return -EINVAL;
}
regmap_update_bits(reg, LPC18XX_CREG_CREG6,
LPC18XX_CREG_CREG6_ETHMODE_MASK, ethmode);
- ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
- if (ret)
- goto err_remove_config_dt;
-
- return 0;
-
-err_remove_config_dt:
- stmmac_remove_config_dt(pdev, plat_dat);
-
- return ret;
+ return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
}
static const struct of_device_id lpc18xx_dwmac_match[] = {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
index cd796ec04132..2a9132d6d743 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
@@ -656,7 +656,7 @@ static int mediatek_dwmac_probe(struct platform_device *pdev)
if (ret)
return ret;
- plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+ plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
if (IS_ERR(plat_dat))
return PTR_ERR(plat_dat);
@@ -665,7 +665,7 @@ static int mediatek_dwmac_probe(struct platform_device *pdev)
ret = mediatek_dwmac_clks_config(priv_plat, true);
if (ret)
- goto err_remove_config_dt;
+ return ret;
ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
if (ret)
@@ -675,8 +675,6 @@ static int mediatek_dwmac_probe(struct platform_device *pdev)
err_drv_probe:
mediatek_dwmac_clks_config(priv_plat, false);
-err_remove_config_dt:
- stmmac_remove_config_dt(pdev, plat_dat);
return ret;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c
index 959f88c6da16..a16bfa9089ea 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c
@@ -52,35 +52,22 @@ static int meson6_dwmac_probe(struct platform_device *pdev)
if (ret)
return ret;
- plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+ plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
if (IS_ERR(plat_dat))
return PTR_ERR(plat_dat);
dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL);
- if (!dwmac) {
- ret = -ENOMEM;
- goto err_remove_config_dt;
- }
+ if (!dwmac)
+ return -ENOMEM;
dwmac->reg = devm_platform_ioremap_resource(pdev, 1);
- if (IS_ERR(dwmac->reg)) {
- ret = PTR_ERR(dwmac->reg);
- goto err_remove_config_dt;
- }
+ if (IS_ERR(dwmac->reg))
+ return PTR_ERR(dwmac->reg);
plat_dat->bsp_priv = dwmac;
plat_dat->fix_mac_speed = meson6_dwmac_fix_mac_speed;
- ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
- if (ret)
- goto err_remove_config_dt;
-
- return 0;
-
-err_remove_config_dt:
- stmmac_remove_config_dt(pdev, plat_dat);
-
- return ret;
+ return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
}
static const struct of_device_id meson6_dwmac_match[] = {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
index 0b159dc0d5f6..b23944aa344e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
@@ -400,33 +400,27 @@ static int meson8b_dwmac_probe(struct platform_device *pdev)
if (ret)
return ret;
- plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+ plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
if (IS_ERR(plat_dat))
return PTR_ERR(plat_dat);
dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL);
- if (!dwmac) {
- ret = -ENOMEM;
- goto err_remove_config_dt;
- }
+ if (!dwmac)
+ return -ENOMEM;
dwmac->data = (const struct meson8b_dwmac_data *)
of_device_get_match_data(&pdev->dev);
- if (!dwmac->data) {
- ret = -EINVAL;
- goto err_remove_config_dt;
- }
+ if (!dwmac->data)
+ return -EINVAL;
dwmac->regs = devm_platform_ioremap_resource(pdev, 1);
- if (IS_ERR(dwmac->regs)) {
- ret = PTR_ERR(dwmac->regs);
- goto err_remove_config_dt;
- }
+ if (IS_ERR(dwmac->regs))
+ return PTR_ERR(dwmac->regs);
dwmac->dev = &pdev->dev;
ret = of_get_phy_mode(pdev->dev.of_node, &dwmac->phy_mode);
if (ret) {
dev_err(&pdev->dev, "missing phy-mode property\n");
- goto err_remove_config_dt;
+ return ret;
}
/* use 2ns as fallback since this value was previously hardcoded */
@@ -448,53 +442,40 @@ static int meson8b_dwmac_probe(struct platform_device *pdev)
if (dwmac->rx_delay_ps > 3000 || dwmac->rx_delay_ps % 200) {
dev_err(dwmac->dev,
"The RGMII RX delay range is 0..3000ps in 200ps steps");
- ret = -EINVAL;
- goto err_remove_config_dt;
+ return -EINVAL;
}
} else {
if (dwmac->rx_delay_ps != 0 && dwmac->rx_delay_ps != 2000) {
dev_err(dwmac->dev,
"The only allowed RGMII RX delays values are: 0ps, 2000ps");
- ret = -EINVAL;
- goto err_remove_config_dt;
+ return -EINVAL;
}
}
dwmac->timing_adj_clk = devm_clk_get_optional(dwmac->dev,
"timing-adjustment");
- if (IS_ERR(dwmac->timing_adj_clk)) {
- ret = PTR_ERR(dwmac->timing_adj_clk);
- goto err_remove_config_dt;
- }
+ if (IS_ERR(dwmac->timing_adj_clk))
+ return PTR_ERR(dwmac->timing_adj_clk);
ret = meson8b_init_rgmii_delays(dwmac);
if (ret)
- goto err_remove_config_dt;
+ return ret;
ret = meson8b_init_rgmii_tx_clk(dwmac);
if (ret)
- goto err_remove_config_dt;
+ return ret;
ret = dwmac->data->set_phy_mode(dwmac);
if (ret)
- goto err_remove_config_dt;
+ return ret;
ret = meson8b_init_prg_eth(dwmac);
if (ret)
- goto err_remove_config_dt;
+ return ret;
plat_dat->bsp_priv = dwmac;
- ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
- if (ret)
- goto err_remove_config_dt;
-
- return 0;
-
-err_remove_config_dt:
- stmmac_remove_config_dt(pdev, plat_dat);
-
- return ret;
+ return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
}
static const struct meson8b_dwmac_data meson8b_dwmac_data = {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
index d920a50dd16c..382e8de1255d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
@@ -1824,7 +1824,7 @@ static int rk_gmac_probe(struct platform_device *pdev)
if (ret)
return ret;
- plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+ plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
if (IS_ERR(plat_dat))
return PTR_ERR(plat_dat);
@@ -1836,18 +1836,16 @@ static int rk_gmac_probe(struct platform_device *pdev)
plat_dat->fix_mac_speed = rk_fix_speed;
plat_dat->bsp_priv = rk_gmac_setup(pdev, plat_dat, data);
- if (IS_ERR(plat_dat->bsp_priv)) {
- ret = PTR_ERR(plat_dat->bsp_priv);
- goto err_remove_config_dt;
- }
+ if (IS_ERR(plat_dat->bsp_priv))
+ return PTR_ERR(plat_dat->bsp_priv);
ret = rk_gmac_clk_init(plat_dat);
if (ret)
- goto err_remove_config_dt;
+ return ret;
ret = rk_gmac_powerup(plat_dat->bsp_priv);
if (ret)
- goto err_remove_config_dt;
+ return ret;
ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
if (ret)
@@ -1857,8 +1855,6 @@ static int rk_gmac_probe(struct platform_device *pdev)
err_gmac_powerdown:
rk_gmac_powerdown(plat_dat->bsp_priv);
-err_remove_config_dt:
- stmmac_remove_config_dt(pdev, plat_dat);
return ret;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
index 9bf102bbc6a0..ba2ce776bd4d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
@@ -400,21 +400,19 @@ static int socfpga_dwmac_probe(struct platform_device *pdev)
if (ret)
return ret;
- plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+ plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
if (IS_ERR(plat_dat))
return PTR_ERR(plat_dat);
dwmac = devm_kzalloc(dev, sizeof(*dwmac), GFP_KERNEL);
- if (!dwmac) {
- ret = -ENOMEM;
- goto err_remove_config_dt;
- }
+ if (!dwmac)
+ return -ENOMEM;
dwmac->stmmac_ocp_rst = devm_reset_control_get_optional(dev, "stmmaceth-ocp");
if (IS_ERR(dwmac->stmmac_ocp_rst)) {
ret = PTR_ERR(dwmac->stmmac_ocp_rst);
dev_err(dev, "error getting reset control of ocp %d\n", ret);
- goto err_remove_config_dt;
+ return ret;
}
reset_control_deassert(dwmac->stmmac_ocp_rst);
@@ -422,7 +420,7 @@ static int socfpga_dwmac_probe(struct platform_device *pdev)
ret = socfpga_dwmac_parse_data(dwmac, dev);
if (ret) {
dev_err(dev, "Unable to parse OF data\n");
- goto err_remove_config_dt;
+ return ret;
}
dwmac->ops = ops;
@@ -431,7 +429,7 @@ static int socfpga_dwmac_probe(struct platform_device *pdev)
ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
if (ret)
- goto err_remove_config_dt;
+ return ret;
ndev = platform_get_drvdata(pdev);
stpriv = netdev_priv(ndev);
@@ -492,8 +490,6 @@ static int socfpga_dwmac_probe(struct platform_device *pdev)
err_dvr_remove:
stmmac_dvr_remove(&pdev->dev);
-err_remove_config_dt:
- stmmac_remove_config_dt(pdev, plat_dat);
return ret;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c
index 9289bb87c3e3..5d630affb4d1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c
@@ -105,7 +105,7 @@ static int starfive_dwmac_probe(struct platform_device *pdev)
return dev_err_probe(&pdev->dev, err,
"failed to get resources\n");
- plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+ plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
if (IS_ERR(plat_dat))
return dev_err_probe(&pdev->dev, PTR_ERR(plat_dat),
"dt configuration failed\n");
@@ -141,13 +141,7 @@ static int starfive_dwmac_probe(struct platform_device *pdev)
if (err)
return err;
- err = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
- if (err) {
- stmmac_remove_config_dt(pdev, plat_dat);
- return err;
- }
-
- return 0;
+ return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
}
static const struct of_device_id starfive_dwmac_match[] = {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
index 0d653bbb931b..4445cddc4cbe 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
@@ -273,20 +273,18 @@ static int sti_dwmac_probe(struct platform_device *pdev)
if (ret)
return ret;
- plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+ plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
if (IS_ERR(plat_dat))
return PTR_ERR(plat_dat);
dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL);
- if (!dwmac) {
- ret = -ENOMEM;
- goto err_remove_config_dt;
- }
+ if (!dwmac)
+ return -ENOMEM;
ret = sti_dwmac_parse_data(dwmac, pdev);
if (ret) {
dev_err(&pdev->dev, "Unable to parse OF data\n");
- goto err_remove_config_dt;
+ return ret;
}
dwmac->fix_retime_src = data->fix_retime_src;
@@ -296,7 +294,7 @@ static int sti_dwmac_probe(struct platform_device *pdev)
ret = clk_prepare_enable(dwmac->clk);
if (ret)
- goto err_remove_config_dt;
+ return ret;
ret = sti_dwmac_set_mode(dwmac);
if (ret)
@@ -310,8 +308,6 @@ static int sti_dwmac_probe(struct platform_device *pdev)
disable_clk:
clk_disable_unprepare(dwmac->clk);
-err_remove_config_dt:
- stmmac_remove_config_dt(pdev, plat_dat);
return ret;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
index a0e276783e65..d8d3c729f219 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
@@ -372,21 +372,18 @@ static int stm32_dwmac_probe(struct platform_device *pdev)
if (ret)
return ret;
- plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+ plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
if (IS_ERR(plat_dat))
return PTR_ERR(plat_dat);
dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL);
- if (!dwmac) {
- ret = -ENOMEM;
- goto err_remove_config_dt;
- }
+ if (!dwmac)
+ return -ENOMEM;
data = of_device_get_match_data(&pdev->dev);
if (!data) {
dev_err(&pdev->dev, "no of match data provided\n");
- ret = -EINVAL;
- goto err_remove_config_dt;
+ return -EINVAL;
}
dwmac->ops = data;
@@ -395,14 +392,14 @@ static int stm32_dwmac_probe(struct platform_device *pdev)
ret = stm32_dwmac_parse_data(dwmac, &pdev->dev);
if (ret) {
dev_err(&pdev->dev, "Unable to parse OF data\n");
- goto err_remove_config_dt;
+ return ret;
}
plat_dat->bsp_priv = dwmac;
ret = stm32_dwmac_init(plat_dat);
if (ret)
- goto err_remove_config_dt;
+ return ret;
ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
if (ret)
@@ -412,8 +409,6 @@ static int stm32_dwmac_probe(struct platform_device *pdev)
err_clk_disable:
stm32_dwmac_clk_disable(dwmac);
-err_remove_config_dt:
- stmmac_remove_config_dt(pdev, plat_dat);
return ret;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index 465ff1fd4785..137741b94122 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -1224,7 +1224,7 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
if (ret)
return -EINVAL;
- plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+ plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
if (IS_ERR(plat_dat))
return PTR_ERR(plat_dat);
@@ -1244,7 +1244,7 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
ret = sun8i_dwmac_set_syscon(&pdev->dev, plat_dat);
if (ret)
- goto dwmac_deconfig;
+ return ret;
ret = sun8i_dwmac_init(pdev, plat_dat->bsp_priv);
if (ret)
@@ -1295,8 +1295,6 @@ dwmac_exit:
sun8i_dwmac_exit(pdev, gmac);
dwmac_syscon:
sun8i_dwmac_unset_syscon(gmac);
-dwmac_deconfig:
- stmmac_remove_config_dt(pdev, plat_dat);
return ret;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
index beceeae579bf..2653a9f0958c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
@@ -108,36 +108,31 @@ static int sun7i_gmac_probe(struct platform_device *pdev)
if (ret)
return ret;
- plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+ plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
if (IS_ERR(plat_dat))
return PTR_ERR(plat_dat);
gmac = devm_kzalloc(dev, sizeof(*gmac), GFP_KERNEL);
- if (!gmac) {
- ret = -ENOMEM;
- goto err_remove_config_dt;
- }
+ if (!gmac)
+ return -ENOMEM;
ret = of_get_phy_mode(dev->of_node, &gmac->interface);
if (ret && ret != -ENODEV) {
dev_err(dev, "Can't get phy-mode\n");
- goto err_remove_config_dt;
+ return ret;
}
gmac->tx_clk = devm_clk_get(dev, "allwinner_gmac_tx");
if (IS_ERR(gmac->tx_clk)) {
dev_err(dev, "could not get tx clock\n");
- ret = PTR_ERR(gmac->tx_clk);
- goto err_remove_config_dt;
+ return PTR_ERR(gmac->tx_clk);
}
/* Optional regulator for PHY */
gmac->regulator = devm_regulator_get_optional(dev, "phy");
if (IS_ERR(gmac->regulator)) {
- if (PTR_ERR(gmac->regulator) == -EPROBE_DEFER) {
- ret = -EPROBE_DEFER;
- goto err_remove_config_dt;
- }
+ if (PTR_ERR(gmac->regulator) == -EPROBE_DEFER)
+ return -EPROBE_DEFER;
dev_info(dev, "no regulator found\n");
gmac->regulator = NULL;
}
@@ -155,7 +150,7 @@ static int sun7i_gmac_probe(struct platform_device *pdev)
ret = sun7i_gmac_init(pdev, plat_dat->bsp_priv);
if (ret)
- goto err_remove_config_dt;
+ return ret;
ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
if (ret)
@@ -165,8 +160,6 @@ static int sun7i_gmac_probe(struct platform_device *pdev)
err_gmac_exit:
sun7i_gmac_exit(pdev, plat_dat->bsp_priv);
-err_remove_config_dt:
- stmmac_remove_config_dt(pdev, plat_dat);
return ret;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c
index e0f3cbd36852..362f85136c3e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c
@@ -284,7 +284,7 @@ static int tegra_mgbe_probe(struct platform_device *pdev)
if (err < 0)
goto disable_clks;
- plat = stmmac_probe_config_dt(pdev, res.mac);
+ plat = devm_stmmac_probe_config_dt(pdev, res.mac);
if (IS_ERR(plat)) {
err = PTR_ERR(plat);
goto disable_clks;
@@ -303,7 +303,7 @@ static int tegra_mgbe_probe(struct platform_device *pdev)
GFP_KERNEL);
if (!plat->mdio_bus_data) {
err = -ENOMEM;
- goto remove;
+ goto disable_clks;
}
}
@@ -321,7 +321,7 @@ static int tegra_mgbe_probe(struct platform_device *pdev)
500, 500 * 2000);
if (err < 0) {
dev_err(mgbe->dev, "timeout waiting for TX lane to become enabled\n");
- goto remove;
+ goto disable_clks;
}
plat->serdes_powerup = mgbe_uphy_lane_bringup_serdes_up;
@@ -342,12 +342,10 @@ static int tegra_mgbe_probe(struct platform_device *pdev)
err = stmmac_dvr_probe(&pdev->dev, plat, &res);
if (err < 0)
- goto remove;
+ goto disable_clks;
return 0;
-remove:
- stmmac_remove_config_dt(pdev, plat);
disable_clks:
clk_bulk_disable_unprepare(ARRAY_SIZE(mgbe_clks), mgbe->clks);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
index 22d113fb8e09..a5a5cfa989c6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
@@ -220,15 +220,13 @@ static int visconti_eth_dwmac_probe(struct platform_device *pdev)
if (ret)
return ret;
- plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+ plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
if (IS_ERR(plat_dat))
return PTR_ERR(plat_dat);
dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL);
- if (!dwmac) {
- ret = -ENOMEM;
- goto remove_config;
- }
+ if (!dwmac)
+ return -ENOMEM;
spin_lock_init(&dwmac->lock);
dwmac->reg = stmmac_res.addr;
@@ -238,7 +236,7 @@ static int visconti_eth_dwmac_probe(struct platform_device *pdev)
ret = visconti_eth_clock_probe(pdev, plat_dat);
if (ret)
- goto remove_config;
+ return ret;
visconti_eth_init_hw(pdev, plat_dat);
@@ -252,22 +250,14 @@ static int visconti_eth_dwmac_probe(struct platform_device *pdev)
remove:
visconti_eth_clock_remove(pdev);
-remove_config:
- stmmac_remove_config_dt(pdev, plat_dat);
return ret;
}
static void visconti_eth_dwmac_remove(struct platform_device *pdev)
{
- struct net_device *ndev = platform_get_drvdata(pdev);
- struct stmmac_priv *priv = netdev_priv(ndev);
-
stmmac_pltfr_remove(pdev);
-
visconti_eth_clock_remove(pdev);
-
- stmmac_remove_config_dt(pdev, priv->plat);
}
static const struct of_device_id visconti_eth_dwmac_match[] = {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 5801f4d50f95..bd1249a9d7f2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -4415,6 +4415,16 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
WARN_ON(tx_q->tx_skbuff[first_entry]);
csum_insertion = (skb->ip_summed == CHECKSUM_PARTIAL);
+ /* DWMAC IPs can be synthesized to support tx coe only for a few tx
+ * queues. In that case, checksum offloading for those queues that don't
+ * support tx coe needs to fallback to software checksum calculation.
+ */
+ if (csum_insertion &&
+ priv->plat->tx_queues_cfg[queue].coe_unsupported) {
+ if (unlikely(skb_checksum_help(skb)))
+ goto dma_map_err;
+ csum_insertion = !csum_insertion;
+ }
if (likely(priv->extend_desc))
desc = (struct dma_desc *)(tx_q->dma_etx + entry);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 2f0678f15fb7..1ffde555da47 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -276,6 +276,9 @@ static int stmmac_mtl_setup(struct platform_device *pdev,
plat->tx_queues_cfg[queue].use_prio = true;
}
+ plat->tx_queues_cfg[queue].coe_unsupported =
+ of_property_read_bool(q_node, "snps,coe-unsupported");
+
queue++;
}
if (queue != plat->tx_queues_to_use) {
@@ -385,6 +388,22 @@ static int stmmac_of_get_mac_mode(struct device_node *np)
}
/**
+ * stmmac_remove_config_dt - undo the effects of stmmac_probe_config_dt()
+ * @pdev: platform_device structure
+ * @plat: driver data platform structure
+ *
+ * Release resources claimed by stmmac_probe_config_dt().
+ */
+static void stmmac_remove_config_dt(struct platform_device *pdev,
+ struct plat_stmmacenet_data *plat)
+{
+ clk_disable_unprepare(plat->stmmac_clk);
+ clk_disable_unprepare(plat->pclk);
+ of_node_put(plat->phy_node);
+ of_node_put(plat->mdio_node);
+}
+
+/**
* stmmac_probe_config_dt - parse device-tree driver parameters
* @pdev: platform_device structure
* @mac: MAC address to use
@@ -392,7 +411,7 @@ static int stmmac_of_get_mac_mode(struct device_node *np)
* this function is to read the driver parameters from device-tree and
* set some private fields that will be used by the main at runtime.
*/
-struct plat_stmmacenet_data *
+static struct plat_stmmacenet_data *
stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac)
{
struct device_node *np = pdev->dev.of_node;
@@ -662,43 +681,14 @@ devm_stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac)
return plat;
}
-
-/**
- * stmmac_remove_config_dt - undo the effects of stmmac_probe_config_dt()
- * @pdev: platform_device structure
- * @plat: driver data platform structure
- *
- * Release resources claimed by stmmac_probe_config_dt().
- */
-void stmmac_remove_config_dt(struct platform_device *pdev,
- struct plat_stmmacenet_data *plat)
-{
- clk_disable_unprepare(plat->stmmac_clk);
- clk_disable_unprepare(plat->pclk);
- of_node_put(plat->phy_node);
- of_node_put(plat->mdio_node);
-}
#else
struct plat_stmmacenet_data *
-stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac)
-{
- return ERR_PTR(-EINVAL);
-}
-
-struct plat_stmmacenet_data *
devm_stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac)
{
return ERR_PTR(-EINVAL);
}
-
-void stmmac_remove_config_dt(struct platform_device *pdev,
- struct plat_stmmacenet_data *plat)
-{
-}
#endif /* CONFIG_OF */
-EXPORT_SYMBOL_GPL(stmmac_probe_config_dt);
EXPORT_SYMBOL_GPL(devm_stmmac_probe_config_dt);
-EXPORT_SYMBOL_GPL(stmmac_remove_config_dt);
int stmmac_get_platform_resources(struct platform_device *pdev,
struct stmmac_resources *stmmac_res)
@@ -807,7 +797,7 @@ static void devm_stmmac_pltfr_remove(void *data)
{
struct platform_device *pdev = data;
- stmmac_pltfr_remove_no_dt(pdev);
+ stmmac_pltfr_remove(pdev);
}
/**
@@ -834,12 +824,12 @@ int devm_stmmac_pltfr_probe(struct platform_device *pdev,
EXPORT_SYMBOL_GPL(devm_stmmac_pltfr_probe);
/**
- * stmmac_pltfr_remove_no_dt
+ * stmmac_pltfr_remove
* @pdev: pointer to the platform device
* Description: This undoes the effects of stmmac_pltfr_probe() by removing the
* driver and calling the platform's exit() callback.
*/
-void stmmac_pltfr_remove_no_dt(struct platform_device *pdev)
+void stmmac_pltfr_remove(struct platform_device *pdev)
{
struct net_device *ndev = platform_get_drvdata(pdev);
struct stmmac_priv *priv = netdev_priv(ndev);
@@ -848,23 +838,6 @@ void stmmac_pltfr_remove_no_dt(struct platform_device *pdev)
stmmac_dvr_remove(&pdev->dev);
stmmac_pltfr_exit(pdev, plat);
}
-EXPORT_SYMBOL_GPL(stmmac_pltfr_remove_no_dt);
-
-/**
- * stmmac_pltfr_remove
- * @pdev: platform device pointer
- * Description: this function calls the main to free the net resources
- * and calls the platforms hook and release the resources (e.g. mem).
- */
-void stmmac_pltfr_remove(struct platform_device *pdev)
-{
- struct net_device *ndev = platform_get_drvdata(pdev);
- struct stmmac_priv *priv = netdev_priv(ndev);
- struct plat_stmmacenet_data *plat = priv->plat;
-
- stmmac_pltfr_remove_no_dt(pdev);
- stmmac_remove_config_dt(pdev, plat);
-}
EXPORT_SYMBOL_GPL(stmmac_pltfr_remove);
/**
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h
index c5565b2a70ac..bb6fc7e59aed 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h
@@ -12,11 +12,7 @@
#include "stmmac.h"
struct plat_stmmacenet_data *
-stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac);
-struct plat_stmmacenet_data *
devm_stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac);
-void stmmac_remove_config_dt(struct platform_device *pdev,
- struct plat_stmmacenet_data *plat);
int stmmac_get_platform_resources(struct platform_device *pdev,
struct stmmac_resources *stmmac_res);
@@ -32,7 +28,6 @@ int stmmac_pltfr_probe(struct platform_device *pdev,
int devm_stmmac_pltfr_probe(struct platform_device *pdev,
struct plat_stmmacenet_data *plat,
struct stmmac_resources *res);
-void stmmac_pltfr_remove_no_dt(struct platform_device *pdev);
void stmmac_pltfr_remove(struct platform_device *pdev);
extern const struct dev_pm_ops stmmac_pltfr_pm_ops;
diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index 011d74087f86..21431f43e4c2 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -10132,7 +10132,7 @@ err_out:
return err;
}
-static int niu_of_remove(struct platform_device *op)
+static void niu_of_remove(struct platform_device *op)
{
struct net_device *dev = platform_get_drvdata(op);
@@ -10165,7 +10165,6 @@ static int niu_of_remove(struct platform_device *op)
free_netdev(dev);
}
- return 0;
}
static const struct of_device_id niu_match[] = {
@@ -10183,7 +10182,7 @@ static struct platform_driver niu_of_driver = {
.of_match_table = niu_match,
},
.probe = niu_of_probe,
- .remove = niu_of_remove,
+ .remove_new = niu_of_remove,
};
#endif /* CONFIG_SPARC64 */
diff --git a/drivers/net/ethernet/sun/sunbmac.c b/drivers/net/ethernet/sun/sunbmac.c
index cc34d92d2e3d..16c86b13c185 100644
--- a/drivers/net/ethernet/sun/sunbmac.c
+++ b/drivers/net/ethernet/sun/sunbmac.c
@@ -1234,7 +1234,7 @@ static int bigmac_sbus_probe(struct platform_device *op)
return bigmac_ether_init(op, qec_op);
}
-static int bigmac_sbus_remove(struct platform_device *op)
+static void bigmac_sbus_remove(struct platform_device *op)
{
struct bigmac *bp = platform_get_drvdata(op);
struct device *parent = op->dev.parent;
@@ -1255,8 +1255,6 @@ static int bigmac_sbus_remove(struct platform_device *op)
bp->bblock_dvma);
free_netdev(net_dev);
-
- return 0;
}
static const struct of_device_id bigmac_sbus_match[] = {
@@ -1274,7 +1272,7 @@ static struct platform_driver bigmac_sbus_driver = {
.of_match_table = bigmac_sbus_match,
},
.probe = bigmac_sbus_probe,
- .remove = bigmac_sbus_remove,
+ .remove_new = bigmac_sbus_remove,
};
module_platform_driver(bigmac_sbus_driver);
diff --git a/drivers/net/ethernet/sun/sunqe.c b/drivers/net/ethernet/sun/sunqe.c
index b37360f44972..aedd13c94225 100644
--- a/drivers/net/ethernet/sun/sunqe.c
+++ b/drivers/net/ethernet/sun/sunqe.c
@@ -933,7 +933,7 @@ static int qec_sbus_probe(struct platform_device *op)
return qec_ether_init(op);
}
-static int qec_sbus_remove(struct platform_device *op)
+static void qec_sbus_remove(struct platform_device *op)
{
struct sunqe *qp = platform_get_drvdata(op);
struct net_device *net_dev = qp->dev;
@@ -948,8 +948,6 @@ static int qec_sbus_remove(struct platform_device *op)
qp->buffers, qp->buffers_dvma);
free_netdev(net_dev);
-
- return 0;
}
static const struct of_device_id qec_sbus_match[] = {
@@ -967,7 +965,7 @@ static struct platform_driver qec_sbus_driver = {
.of_match_table = qec_sbus_match,
},
.probe = qec_sbus_probe,
- .remove = qec_sbus_remove,
+ .remove_new = qec_sbus_remove,
};
static int __init qec_init(void)
diff --git a/drivers/net/ethernet/sunplus/spl2sw_driver.c b/drivers/net/ethernet/sunplus/spl2sw_driver.c
index c499a14314f1..391a1bc7f446 100644
--- a/drivers/net/ethernet/sunplus/spl2sw_driver.c
+++ b/drivers/net/ethernet/sunplus/spl2sw_driver.c
@@ -511,7 +511,7 @@ out_clk_disable:
return ret;
}
-static int spl2sw_remove(struct platform_device *pdev)
+static void spl2sw_remove(struct platform_device *pdev)
{
struct spl2sw_common *comm;
int i;
@@ -538,8 +538,6 @@ static int spl2sw_remove(struct platform_device *pdev)
spl2sw_mdio_remove(comm);
clk_disable_unprepare(comm->clk);
-
- return 0;
}
static const struct of_device_id spl2sw_of_match[] = {
@@ -551,7 +549,7 @@ MODULE_DEVICE_TABLE(of, spl2sw_of_match);
static struct platform_driver spl2sw_driver = {
.probe = spl2sw_probe,
- .remove = spl2sw_remove,
+ .remove_new = spl2sw_remove,
.driver = {
.name = "sp7021_emac",
.of_match_table = spl2sw_of_match,
diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c
index 80eeeb463c4f..bc9b5d18427d 100644
--- a/drivers/net/ethernet/ti/cpmac.c
+++ b/drivers/net/ethernet/ti/cpmac.c
@@ -1151,14 +1151,12 @@ fail:
return rc;
}
-static int cpmac_remove(struct platform_device *pdev)
+static void cpmac_remove(struct platform_device *pdev)
{
struct net_device *dev = platform_get_drvdata(pdev);
unregister_netdev(dev);
free_netdev(dev);
-
- return 0;
}
static struct platform_driver cpmac_driver = {
@@ -1166,7 +1164,7 @@ static struct platform_driver cpmac_driver = {
.name = "cpmac",
},
.probe = cpmac_probe,
- .remove = cpmac_remove,
+ .remove_new = cpmac_remove,
};
int __init cpmac_init(void)
diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index 2eb9d5a32588..5d756df133eb 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -2002,7 +2002,7 @@ err_free_netdev:
* Called when removing the device driver. We disable clock usage and release
* the resources taken up by the driver and unregister network device
*/
-static int davinci_emac_remove(struct platform_device *pdev)
+static void davinci_emac_remove(struct platform_device *pdev)
{
struct net_device *ndev = platform_get_drvdata(pdev);
struct emac_priv *priv = netdev_priv(ndev);
@@ -2022,8 +2022,6 @@ static int davinci_emac_remove(struct platform_device *pdev)
if (of_phy_is_fixed_link(np))
of_phy_deregister_fixed_link(np);
free_netdev(ndev);
-
- return 0;
}
static int davinci_emac_suspend(struct device *dev)
@@ -2076,7 +2074,7 @@ static struct platform_driver davinci_emac_driver = {
.of_match_table = davinci_emac_of_match,
},
.probe = davinci_emac_probe,
- .remove = davinci_emac_remove,
+ .remove_new = davinci_emac_remove,
};
/**
diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c
index 89b6d23e9937..628c87dc1d28 100644
--- a/drivers/net/ethernet/ti/davinci_mdio.c
+++ b/drivers/net/ethernet/ti/davinci_mdio.c
@@ -673,7 +673,7 @@ bail_out:
return ret;
}
-static int davinci_mdio_remove(struct platform_device *pdev)
+static void davinci_mdio_remove(struct platform_device *pdev)
{
struct davinci_mdio_data *data = platform_get_drvdata(pdev);
@@ -686,8 +686,6 @@ static int davinci_mdio_remove(struct platform_device *pdev)
pm_runtime_dont_use_autosuspend(&pdev->dev);
pm_runtime_disable(&pdev->dev);
-
- return 0;
}
#ifdef CONFIG_PM
@@ -766,7 +764,7 @@ static struct platform_driver davinci_mdio_driver = {
.of_match_table = of_match_ptr(davinci_mdio_of_mtable),
},
.probe = davinci_mdio_probe,
- .remove = davinci_mdio_remove,
+ .remove_new = davinci_mdio_remove,
};
static int __init davinci_mdio_init(void)
diff --git a/drivers/net/ethernet/ti/icssg/icssg_config.c b/drivers/net/ethernet/ti/icssg/icssg_config.c
index b272361e378f..99de8a40ed60 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_config.c
+++ b/drivers/net/ethernet/ti/icssg/icssg_config.c
@@ -433,6 +433,17 @@ int emac_set_port_state(struct prueth_emac *emac,
return ret;
}
+void icssg_config_half_duplex(struct prueth_emac *emac)
+{
+ u32 val;
+
+ if (!emac->half_duplex)
+ return;
+
+ val = get_random_u32();
+ writel(val, emac->dram.va + HD_RAND_SEED_OFFSET);
+}
+
void icssg_config_set_speed(struct prueth_emac *emac)
{
u8 fw_speed;
@@ -453,5 +464,8 @@ void icssg_config_set_speed(struct prueth_emac *emac)
return;
}
+ if (emac->duplex == DUPLEX_HALF)
+ fw_speed |= FW_LINK_SPEED_HD;
+
writeb(fw_speed, emac->dram.va + PORT_LINK_SPEED_OFFSET);
}
diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c
index 4914d0ef58e9..482ae82a99c1 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c
+++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c
@@ -1029,6 +1029,8 @@ static void emac_adjust_link(struct net_device *ndev)
* values
*/
if (emac->link) {
+ if (emac->duplex == DUPLEX_HALF)
+ icssg_config_half_duplex(emac);
/* Set the RGMII cfg for gig en and full duplex */
icssg_update_rgmii_cfg(prueth->miig_rt, emac);
@@ -1147,9 +1149,13 @@ static int emac_phy_connect(struct prueth_emac *emac)
return -ENODEV;
}
+ if (!emac->half_duplex) {
+ dev_dbg(prueth->dev, "half duplex mode is not supported\n");
+ phy_remove_link_mode(ndev->phydev, ETHTOOL_LINK_MODE_10baseT_Half_BIT);
+ phy_remove_link_mode(ndev->phydev, ETHTOOL_LINK_MODE_100baseT_Half_BIT);
+ }
+
/* remove unsupported modes */
- phy_remove_link_mode(ndev->phydev, ETHTOOL_LINK_MODE_10baseT_Half_BIT);
- phy_remove_link_mode(ndev->phydev, ETHTOOL_LINK_MODE_100baseT_Half_BIT);
phy_remove_link_mode(ndev->phydev, ETHTOOL_LINK_MODE_1000baseT_Half_BIT);
phy_remove_link_mode(ndev->phydev, ETHTOOL_LINK_MODE_Pause_BIT);
phy_remove_link_mode(ndev->phydev, ETHTOOL_LINK_MODE_Asym_Pause_BIT);
@@ -2113,6 +2119,10 @@ static int prueth_probe(struct platform_device *pdev)
eth0_node->name);
goto exit_iep;
}
+
+ if (of_find_property(eth0_node, "ti,half-duplex-capable", NULL))
+ prueth->emac[PRUETH_MAC0]->half_duplex = 1;
+
prueth->emac[PRUETH_MAC0]->iep = prueth->iep0;
}
@@ -2124,6 +2134,9 @@ static int prueth_probe(struct platform_device *pdev)
goto netdev_exit;
}
+ if (of_find_property(eth1_node, "ti,half-duplex-capable", NULL))
+ prueth->emac[PRUETH_MAC1]->half_duplex = 1;
+
prueth->emac[PRUETH_MAC1]->iep = prueth->iep0;
}
@@ -2313,8 +2326,13 @@ static const struct prueth_pdata am654_icssg_pdata = {
.quirk_10m_link_issue = 1,
};
+static const struct prueth_pdata am64x_icssg_pdata = {
+ .fdqring_mode = K3_RINGACC_RING_MODE_RING,
+};
+
static const struct of_device_id prueth_dt_match[] = {
{ .compatible = "ti,am654-icssg-prueth", .data = &am654_icssg_pdata },
+ { .compatible = "ti,am642-icssg-prueth", .data = &am64x_icssg_pdata },
{ /* sentinel */ }
};
MODULE_DEVICE_TABLE(of, prueth_dt_match);
diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h
index 3fe80a8758d3..8b6d6b497010 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h
+++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h
@@ -145,6 +145,7 @@ struct prueth_emac {
struct icss_iep *iep;
unsigned int rx_ts_enabled : 1;
unsigned int tx_ts_enabled : 1;
+ unsigned int half_duplex : 1;
/* DMA related */
struct prueth_tx_chn tx_chns[PRUETH_MAX_TX_QUEUES];
@@ -271,6 +272,7 @@ int icssg_config(struct prueth *prueth, struct prueth_emac *emac,
int emac_set_port_state(struct prueth_emac *emac,
enum icssg_port_state_cmd state);
void icssg_config_set_speed(struct prueth_emac *emac);
+void icssg_config_half_duplex(struct prueth_emac *emac);
/* Buffer queue helpers */
int icssg_queue_pop(struct prueth *prueth, u8 queue);
diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index d829113c16ee..11b90e1da0c6 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@ -2228,7 +2228,7 @@ probe_quit:
return ret;
}
-static int netcp_remove(struct platform_device *pdev)
+static void netcp_remove(struct platform_device *pdev)
{
struct netcp_device *netcp_device = platform_get_drvdata(pdev);
struct netcp_intf *netcp_intf, *netcp_tmp;
@@ -2256,7 +2256,6 @@ static int netcp_remove(struct platform_device *pdev)
pm_runtime_put_sync(&pdev->dev);
pm_runtime_disable(&pdev->dev);
platform_set_drvdata(pdev, NULL);
- return 0;
}
static const struct of_device_id of_match[] = {
@@ -2271,7 +2270,7 @@ static struct platform_driver netcp_driver = {
.of_match_table = of_match,
},
.probe = netcp_probe,
- .remove = netcp_remove,
+ .remove_new = netcp_remove,
};
module_platform_driver(netcp_driver);
diff --git a/drivers/net/ethernet/toshiba/spider_net.c b/drivers/net/ethernet/toshiba/spider_net.c
index 50d7eacfec58..87e67121477c 100644
--- a/drivers/net/ethernet/toshiba/spider_net.c
+++ b/drivers/net/ethernet/toshiba/spider_net.c
@@ -2332,7 +2332,7 @@ spider_net_alloc_card(void)
struct spider_net_card *card;
netdev = alloc_etherdev(struct_size(card, darray,
- tx_descriptors + rx_descriptors));
+ size_add(tx_descriptors, rx_descriptors)));
if (!netdev)
return NULL;
diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c
index d09d352e1c0a..554aff7c8f3b 100644
--- a/drivers/net/ethernet/tundra/tsi108_eth.c
+++ b/drivers/net/ethernet/tundra/tsi108_eth.c
@@ -1660,7 +1660,7 @@ static void tsi108_timed_checker(struct timer_list *t)
mod_timer(&data->timer, jiffies + CHECK_PHY_INTERVAL);
}
-static int tsi108_ether_remove(struct platform_device *pdev)
+static void tsi108_ether_remove(struct platform_device *pdev)
{
struct net_device *dev = platform_get_drvdata(pdev);
struct tsi108_prv_data *priv = netdev_priv(dev);
@@ -1670,15 +1670,13 @@ static int tsi108_ether_remove(struct platform_device *pdev)
iounmap(priv->regs);
iounmap(priv->phyregs);
free_netdev(dev);
-
- return 0;
}
/* Structure for a device driver */
static struct platform_driver tsi_eth_driver = {
.probe = tsi108_init_one,
- .remove = tsi108_ether_remove,
+ .remove_new = tsi108_ether_remove,
.driver = {
.name = "tsi-ethernet",
},
diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c
index 3e09e5036490..e80c02948801 100644
--- a/drivers/net/ethernet/via/via-rhine.c
+++ b/drivers/net/ethernet/via/via-rhine.c
@@ -2443,7 +2443,7 @@ static void rhine_remove_one_pci(struct pci_dev *pdev)
pci_disable_device(pdev);
}
-static int rhine_remove_one_platform(struct platform_device *pdev)
+static void rhine_remove_one_platform(struct platform_device *pdev)
{
struct net_device *dev = platform_get_drvdata(pdev);
struct rhine_private *rp = netdev_priv(dev);
@@ -2453,8 +2453,6 @@ static int rhine_remove_one_platform(struct platform_device *pdev)
iounmap(rp->base);
free_netdev(dev);
-
- return 0;
}
static void rhine_shutdown_pci(struct pci_dev *pdev)
@@ -2572,7 +2570,7 @@ static struct pci_driver rhine_driver_pci = {
static struct platform_driver rhine_driver_platform = {
.probe = rhine_init_one_platform,
- .remove = rhine_remove_one_platform,
+ .remove_new = rhine_remove_one_platform,
.driver = {
.name = DRV_NAME,
.of_match_table = rhine_of_tbl,
diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c
index 731f689412e6..1c6b2a9bba08 100644
--- a/drivers/net/ethernet/via/via-velocity.c
+++ b/drivers/net/ethernet/via/via-velocity.c
@@ -2957,11 +2957,9 @@ static int velocity_platform_probe(struct platform_device *pdev)
return velocity_probe(&pdev->dev, irq, info, BUS_PLATFORM);
}
-static int velocity_platform_remove(struct platform_device *pdev)
+static void velocity_platform_remove(struct platform_device *pdev)
{
velocity_remove(&pdev->dev);
-
- return 0;
}
#ifdef CONFIG_PM_SLEEP
@@ -3249,7 +3247,7 @@ static struct pci_driver velocity_pci_driver = {
static struct platform_driver velocity_platform_driver = {
.probe = velocity_platform_probe,
- .remove = velocity_platform_remove,
+ .remove_new = velocity_platform_remove,
.driver = {
.name = "via-velocity",
.of_match_table = velocity_of_ids,
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
index 85dc16faca54..f0063d569c80 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
@@ -12,6 +12,98 @@
#include "wx_lib.h"
#include "wx_hw.h"
+static int wx_phy_read_reg_mdi(struct mii_bus *bus, int phy_addr, int devnum, int regnum)
+{
+ struct wx *wx = bus->priv;
+ u32 command, val;
+ int ret;
+
+ /* setup and write the address cycle command */
+ command = WX_MSCA_RA(regnum) |
+ WX_MSCA_PA(phy_addr) |
+ WX_MSCA_DA(devnum);
+ wr32(wx, WX_MSCA, command);
+
+ command = WX_MSCC_CMD(WX_MSCA_CMD_READ) | WX_MSCC_BUSY;
+ if (wx->mac.type == wx_mac_em)
+ command |= WX_MDIO_CLK(6);
+ wr32(wx, WX_MSCC, command);
+
+ /* wait to complete */
+ ret = read_poll_timeout(rd32, val, !(val & WX_MSCC_BUSY), 1000,
+ 100000, false, wx, WX_MSCC);
+ if (ret) {
+ wx_err(wx, "Mdio read c22 command did not complete.\n");
+ return ret;
+ }
+
+ return (u16)rd32(wx, WX_MSCC);
+}
+
+static int wx_phy_write_reg_mdi(struct mii_bus *bus, int phy_addr,
+ int devnum, int regnum, u16 value)
+{
+ struct wx *wx = bus->priv;
+ u32 command, val;
+ int ret;
+
+ /* setup and write the address cycle command */
+ command = WX_MSCA_RA(regnum) |
+ WX_MSCA_PA(phy_addr) |
+ WX_MSCA_DA(devnum);
+ wr32(wx, WX_MSCA, command);
+
+ command = value | WX_MSCC_CMD(WX_MSCA_CMD_WRITE) | WX_MSCC_BUSY;
+ if (wx->mac.type == wx_mac_em)
+ command |= WX_MDIO_CLK(6);
+ wr32(wx, WX_MSCC, command);
+
+ /* wait to complete */
+ ret = read_poll_timeout(rd32, val, !(val & WX_MSCC_BUSY), 1000,
+ 100000, false, wx, WX_MSCC);
+ if (ret)
+ wx_err(wx, "Mdio write c22 command did not complete.\n");
+
+ return ret;
+}
+
+int wx_phy_read_reg_mdi_c22(struct mii_bus *bus, int phy_addr, int regnum)
+{
+ struct wx *wx = bus->priv;
+
+ wr32(wx, WX_MDIO_CLAUSE_SELECT, 0xF);
+ return wx_phy_read_reg_mdi(bus, phy_addr, 0, regnum);
+}
+EXPORT_SYMBOL(wx_phy_read_reg_mdi_c22);
+
+int wx_phy_write_reg_mdi_c22(struct mii_bus *bus, int phy_addr, int regnum, u16 value)
+{
+ struct wx *wx = bus->priv;
+
+ wr32(wx, WX_MDIO_CLAUSE_SELECT, 0xF);
+ return wx_phy_write_reg_mdi(bus, phy_addr, 0, regnum, value);
+}
+EXPORT_SYMBOL(wx_phy_write_reg_mdi_c22);
+
+int wx_phy_read_reg_mdi_c45(struct mii_bus *bus, int phy_addr, int devnum, int regnum)
+{
+ struct wx *wx = bus->priv;
+
+ wr32(wx, WX_MDIO_CLAUSE_SELECT, 0);
+ return wx_phy_read_reg_mdi(bus, phy_addr, devnum, regnum);
+}
+EXPORT_SYMBOL(wx_phy_read_reg_mdi_c45);
+
+int wx_phy_write_reg_mdi_c45(struct mii_bus *bus, int phy_addr,
+ int devnum, int regnum, u16 value)
+{
+ struct wx *wx = bus->priv;
+
+ wr32(wx, WX_MDIO_CLAUSE_SELECT, 0);
+ return wx_phy_write_reg_mdi(bus, phy_addr, devnum, regnum, value);
+}
+EXPORT_SYMBOL(wx_phy_write_reg_mdi_c45);
+
static void wx_intr_disable(struct wx *wx, u64 qmask)
{
u32 mask;
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.h b/drivers/net/ethernet/wangxun/libwx/wx_hw.h
index 0b3447bc6f2f..48d3ccabc272 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_hw.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.h
@@ -4,6 +4,13 @@
#ifndef _WX_HW_H_
#define _WX_HW_H_
+#include <linux/phy.h>
+
+int wx_phy_read_reg_mdi_c22(struct mii_bus *bus, int phy_addr, int regnum);
+int wx_phy_write_reg_mdi_c22(struct mii_bus *bus, int phy_addr, int regnum, u16 value);
+int wx_phy_read_reg_mdi_c45(struct mii_bus *bus, int phy_addr, int devnum, int regnum);
+int wx_phy_write_reg_mdi_c45(struct mii_bus *bus, int phy_addr,
+ int devnum, int regnum, u16 value);
void wx_intr_enable(struct wx *wx, u64 qmask);
void wx_irq_disable(struct wx *wx);
int wx_check_flash_load(struct wx *wx, u32 check_bit);
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h
index c5cbd177ef62..e3fc49284219 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_type.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h
@@ -251,6 +251,7 @@ enum WX_MSCA_CMD_value {
#define WX_MSCC_SADDR BIT(18)
#define WX_MSCC_BUSY BIT(22)
#define WX_MDIO_CLK(v) FIELD_PREP(GENMASK(21, 19), v)
+#define WX_MDIO_CLAUSE_SELECT 0x11220
#define WX_MMC_CONTROL 0x11800
#define WX_MMC_CONTROL_RSTONRD BIT(2) /* reset on read */
diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c
index 591f5b7b6da6..6302ecca71bb 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c
@@ -29,117 +29,6 @@ static int ngbe_phy_write_reg_internal(struct mii_bus *bus, int phy_addr, int re
return 0;
}
-static int ngbe_phy_read_reg_mdi_c22(struct mii_bus *bus, int phy_addr, int regnum)
-{
- u32 command, val, device_type = 0;
- struct wx *wx = bus->priv;
- int ret;
-
- wr32(wx, NGBE_MDIO_CLAUSE_SELECT, 0xF);
- /* setup and write the address cycle command */
- command = WX_MSCA_RA(regnum) |
- WX_MSCA_PA(phy_addr) |
- WX_MSCA_DA(device_type);
- wr32(wx, WX_MSCA, command);
- command = WX_MSCC_CMD(WX_MSCA_CMD_READ) |
- WX_MSCC_BUSY |
- WX_MDIO_CLK(6);
- wr32(wx, WX_MSCC, command);
-
- /* wait to complete */
- ret = read_poll_timeout(rd32, val, !(val & WX_MSCC_BUSY), 1000,
- 100000, false, wx, WX_MSCC);
- if (ret) {
- wx_err(wx, "Mdio read c22 command did not complete.\n");
- return ret;
- }
-
- return (u16)rd32(wx, WX_MSCC);
-}
-
-static int ngbe_phy_write_reg_mdi_c22(struct mii_bus *bus, int phy_addr, int regnum, u16 value)
-{
- u32 command, val, device_type = 0;
- struct wx *wx = bus->priv;
- int ret;
-
- wr32(wx, NGBE_MDIO_CLAUSE_SELECT, 0xF);
- /* setup and write the address cycle command */
- command = WX_MSCA_RA(regnum) |
- WX_MSCA_PA(phy_addr) |
- WX_MSCA_DA(device_type);
- wr32(wx, WX_MSCA, command);
- command = value |
- WX_MSCC_CMD(WX_MSCA_CMD_WRITE) |
- WX_MSCC_BUSY |
- WX_MDIO_CLK(6);
- wr32(wx, WX_MSCC, command);
-
- /* wait to complete */
- ret = read_poll_timeout(rd32, val, !(val & WX_MSCC_BUSY), 1000,
- 100000, false, wx, WX_MSCC);
- if (ret)
- wx_err(wx, "Mdio write c22 command did not complete.\n");
-
- return ret;
-}
-
-static int ngbe_phy_read_reg_mdi_c45(struct mii_bus *bus, int phy_addr, int devnum, int regnum)
-{
- struct wx *wx = bus->priv;
- u32 val, command;
- int ret;
-
- wr32(wx, NGBE_MDIO_CLAUSE_SELECT, 0x0);
- /* setup and write the address cycle command */
- command = WX_MSCA_RA(regnum) |
- WX_MSCA_PA(phy_addr) |
- WX_MSCA_DA(devnum);
- wr32(wx, WX_MSCA, command);
- command = WX_MSCC_CMD(WX_MSCA_CMD_READ) |
- WX_MSCC_BUSY |
- WX_MDIO_CLK(6);
- wr32(wx, WX_MSCC, command);
-
- /* wait to complete */
- ret = read_poll_timeout(rd32, val, !(val & WX_MSCC_BUSY), 1000,
- 100000, false, wx, WX_MSCC);
- if (ret) {
- wx_err(wx, "Mdio read c45 command did not complete.\n");
- return ret;
- }
-
- return (u16)rd32(wx, WX_MSCC);
-}
-
-static int ngbe_phy_write_reg_mdi_c45(struct mii_bus *bus, int phy_addr,
- int devnum, int regnum, u16 value)
-{
- struct wx *wx = bus->priv;
- int ret, command;
- u16 val;
-
- wr32(wx, NGBE_MDIO_CLAUSE_SELECT, 0x0);
- /* setup and write the address cycle command */
- command = WX_MSCA_RA(regnum) |
- WX_MSCA_PA(phy_addr) |
- WX_MSCA_DA(devnum);
- wr32(wx, WX_MSCA, command);
- command = value |
- WX_MSCC_CMD(WX_MSCA_CMD_WRITE) |
- WX_MSCC_BUSY |
- WX_MDIO_CLK(6);
- wr32(wx, WX_MSCC, command);
-
- /* wait to complete */
- ret = read_poll_timeout(rd32, val, !(val & WX_MSCC_BUSY), 1000,
- 100000, false, wx, WX_MSCC);
- if (ret)
- wx_err(wx, "Mdio write c45 command did not complete.\n");
-
- return ret;
-}
-
static int ngbe_phy_read_reg_c22(struct mii_bus *bus, int phy_addr, int regnum)
{
struct wx *wx = bus->priv;
@@ -148,7 +37,7 @@ static int ngbe_phy_read_reg_c22(struct mii_bus *bus, int phy_addr, int regnum)
if (wx->mac_type == em_mac_type_mdi)
phy_data = ngbe_phy_read_reg_internal(bus, phy_addr, regnum);
else
- phy_data = ngbe_phy_read_reg_mdi_c22(bus, phy_addr, regnum);
+ phy_data = wx_phy_read_reg_mdi_c22(bus, phy_addr, regnum);
return phy_data;
}
@@ -162,7 +51,7 @@ static int ngbe_phy_write_reg_c22(struct mii_bus *bus, int phy_addr,
if (wx->mac_type == em_mac_type_mdi)
ret = ngbe_phy_write_reg_internal(bus, phy_addr, regnum, value);
else
- ret = ngbe_phy_write_reg_mdi_c22(bus, phy_addr, regnum, value);
+ ret = wx_phy_write_reg_mdi_c22(bus, phy_addr, regnum, value);
return ret;
}
@@ -262,8 +151,8 @@ int ngbe_mdio_init(struct wx *wx)
mii_bus->priv = wx;
if (wx->mac_type == em_mac_type_rgmii) {
- mii_bus->read_c45 = ngbe_phy_read_reg_mdi_c45;
- mii_bus->write_c45 = ngbe_phy_write_reg_mdi_c45;
+ mii_bus->read_c45 = wx_phy_read_reg_mdi_c45;
+ mii_bus->write_c45 = wx_phy_write_reg_mdi_c45;
}
snprintf(mii_bus->id, MII_BUS_ID_SIZE, "ngbe-%x", pci_dev_id(pdev));
diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
index 72c8cd2d5575..ff754d69bdf6 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
@@ -59,9 +59,6 @@
#define NGBE_EEPROM_VERSION_L 0x1D
#define NGBE_EEPROM_VERSION_H 0x1E
-/* Media-dependent registers. */
-#define NGBE_MDIO_CLAUSE_SELECT 0x11220
-
/* GPIO Registers */
#define NGBE_GPIO_DR 0x14800
#define NGBE_GPIO_DDR 0x14804
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c
index 4159c84035fd..b6c06adb8656 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c
@@ -647,58 +647,6 @@ static int txgbe_sfp_register(struct txgbe *txgbe)
return 0;
}
-static int txgbe_phy_read(struct mii_bus *bus, int phy_addr,
- int devnum, int regnum)
-{
- struct wx *wx = bus->priv;
- u32 val, command;
- int ret;
-
- /* setup and write the address cycle command */
- command = WX_MSCA_RA(regnum) |
- WX_MSCA_PA(phy_addr) |
- WX_MSCA_DA(devnum);
- wr32(wx, WX_MSCA, command);
-
- command = WX_MSCC_CMD(WX_MSCA_CMD_READ) | WX_MSCC_BUSY;
- wr32(wx, WX_MSCC, command);
-
- /* wait to complete */
- ret = read_poll_timeout(rd32, val, !(val & WX_MSCC_BUSY), 1000,
- 100000, false, wx, WX_MSCC);
- if (ret) {
- wx_err(wx, "Mdio read c45 command did not complete.\n");
- return ret;
- }
-
- return (u16)rd32(wx, WX_MSCC);
-}
-
-static int txgbe_phy_write(struct mii_bus *bus, int phy_addr,
- int devnum, int regnum, u16 value)
-{
- struct wx *wx = bus->priv;
- int ret, command;
- u16 val;
-
- /* setup and write the address cycle command */
- command = WX_MSCA_RA(regnum) |
- WX_MSCA_PA(phy_addr) |
- WX_MSCA_DA(devnum);
- wr32(wx, WX_MSCA, command);
-
- command = value | WX_MSCC_CMD(WX_MSCA_CMD_WRITE) | WX_MSCC_BUSY;
- wr32(wx, WX_MSCC, command);
-
- /* wait to complete */
- ret = read_poll_timeout(rd32, val, !(val & WX_MSCC_BUSY), 1000,
- 100000, false, wx, WX_MSCC);
- if (ret)
- wx_err(wx, "Mdio write c45 command did not complete.\n");
-
- return ret;
-}
-
static int txgbe_ext_phy_init(struct txgbe *txgbe)
{
struct phy_device *phydev;
@@ -715,8 +663,8 @@ static int txgbe_ext_phy_init(struct txgbe *txgbe)
return -ENOMEM;
mii_bus->name = "txgbe_mii_bus";
- mii_bus->read_c45 = &txgbe_phy_read;
- mii_bus->write_c45 = &txgbe_phy_write;
+ mii_bus->read_c45 = &wx_phy_read_reg_mdi_c45;
+ mii_bus->write_c45 = &wx_phy_write_reg_mdi_c45;
mii_bus->parent = &pdev->dev;
mii_bus->phy_mask = GENMASK(31, 1);
mii_bus->priv = wx;
diff --git a/drivers/net/ethernet/wiznet/w5100.c b/drivers/net/ethernet/wiznet/w5100.c
index 634946e87e5f..341ee2f249fd 100644
--- a/drivers/net/ethernet/wiznet/w5100.c
+++ b/drivers/net/ethernet/wiznet/w5100.c
@@ -1062,11 +1062,9 @@ static int w5100_mmio_probe(struct platform_device *pdev)
mac_addr, irq, data ? data->link_gpio : -EINVAL);
}
-static int w5100_mmio_remove(struct platform_device *pdev)
+static void w5100_mmio_remove(struct platform_device *pdev)
{
w5100_remove(&pdev->dev);
-
- return 0;
}
void *w5100_ops_priv(const struct net_device *ndev)
@@ -1273,6 +1271,6 @@ static struct platform_driver w5100_mmio_driver = {
.pm = &w5100_pm_ops,
},
.probe = w5100_mmio_probe,
- .remove = w5100_mmio_remove,
+ .remove_new = w5100_mmio_remove,
};
module_platform_driver(w5100_mmio_driver);
diff --git a/drivers/net/ethernet/wiznet/w5300.c b/drivers/net/ethernet/wiznet/w5300.c
index b0958fe8111e..3318b50a5911 100644
--- a/drivers/net/ethernet/wiznet/w5300.c
+++ b/drivers/net/ethernet/wiznet/w5300.c
@@ -627,7 +627,7 @@ err_register:
return err;
}
-static int w5300_remove(struct platform_device *pdev)
+static void w5300_remove(struct platform_device *pdev)
{
struct net_device *ndev = platform_get_drvdata(pdev);
struct w5300_priv *priv = netdev_priv(ndev);
@@ -639,7 +639,6 @@ static int w5300_remove(struct platform_device *pdev)
unregister_netdev(ndev);
free_netdev(ndev);
- return 0;
}
#ifdef CONFIG_PM_SLEEP
@@ -683,7 +682,7 @@ static struct platform_driver w5300_driver = {
.pm = &w5300_pm_ops,
},
.probe = w5300_probe,
- .remove = w5300_remove,
+ .remove_new = w5300_remove,
};
module_platform_driver(w5300_driver);
diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index 1444b855e7aa..9df39cf8b097 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -1626,7 +1626,7 @@ err_sysfs_create:
return rc;
}
-static int temac_remove(struct platform_device *pdev)
+static void temac_remove(struct platform_device *pdev)
{
struct net_device *ndev = platform_get_drvdata(pdev);
struct temac_local *lp = netdev_priv(ndev);
@@ -1636,7 +1636,6 @@ static int temac_remove(struct platform_device *pdev)
if (lp->phy_node)
of_node_put(lp->phy_node);
temac_mdio_teardown(lp);
- return 0;
}
static const struct of_device_id temac_of_match[] = {
@@ -1650,7 +1649,7 @@ MODULE_DEVICE_TABLE(of, temac_of_match);
static struct platform_driver temac_driver = {
.probe = temac_probe,
- .remove = temac_remove,
+ .remove_new = temac_remove,
.driver = {
.name = "xilinx_temac",
.of_match_table = temac_of_match,
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index b7ec4dafae90..82d0d44b2b02 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -2183,7 +2183,7 @@ free_netdev:
return ret;
}
-static int axienet_remove(struct platform_device *pdev)
+static void axienet_remove(struct platform_device *pdev)
{
struct net_device *ndev = platform_get_drvdata(pdev);
struct axienet_local *lp = netdev_priv(ndev);
@@ -2202,8 +2202,6 @@ static int axienet_remove(struct platform_device *pdev)
clk_disable_unprepare(lp->axi_clk);
free_netdev(ndev);
-
- return 0;
}
static void axienet_shutdown(struct platform_device *pdev)
@@ -2256,7 +2254,7 @@ static DEFINE_SIMPLE_DEV_PM_OPS(axienet_pm_ops,
static struct platform_driver axienet_driver = {
.probe = axienet_probe,
- .remove = axienet_remove,
+ .remove_new = axienet_remove,
.shutdown = axienet_shutdown,
.driver = {
.name = "xilinx_axienet",
diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
index b358ecc67227..32a502e7318b 100644
--- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c
+++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
@@ -1183,7 +1183,7 @@ error:
*
* Return: 0, always.
*/
-static int xemaclite_of_remove(struct platform_device *of_dev)
+static void xemaclite_of_remove(struct platform_device *of_dev)
{
struct net_device *ndev = platform_get_drvdata(of_dev);
@@ -1202,8 +1202,6 @@ static int xemaclite_of_remove(struct platform_device *of_dev)
lp->phy_node = NULL;
free_netdev(ndev);
-
- return 0;
}
#ifdef CONFIG_NET_POLL_CONTROLLER
@@ -1262,7 +1260,7 @@ static struct platform_driver xemaclite_of_driver = {
.of_match_table = xemaclite_of_match,
},
.probe = xemaclite_of_probe,
- .remove = xemaclite_of_remove,
+ .remove_new = xemaclite_of_remove,
};
module_platform_driver(xemaclite_of_driver);
diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c
index 3b0c5f177447..b242aa61d8ab 100644
--- a/drivers/net/ethernet/xscale/ixp4xx_eth.c
+++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c
@@ -1533,7 +1533,7 @@ err_free_mem:
return err;
}
-static int ixp4xx_eth_remove(struct platform_device *pdev)
+static void ixp4xx_eth_remove(struct platform_device *pdev)
{
struct net_device *ndev = platform_get_drvdata(pdev);
struct phy_device *phydev = ndev->phydev;
@@ -1544,7 +1544,6 @@ static int ixp4xx_eth_remove(struct platform_device *pdev)
ixp4xx_mdio_remove();
npe_port_tab[NPE_ID(port->id)] = NULL;
npe_release(port->npe);
- return 0;
}
static const struct of_device_id ixp4xx_eth_of_match[] = {
@@ -1560,7 +1559,7 @@ static struct platform_driver ixp4xx_eth_driver = {
.of_match_table = of_match_ptr(ixp4xx_eth_of_match),
},
.probe = ixp4xx_eth_probe,
- .remove = ixp4xx_eth_remove,
+ .remove_new = ixp4xx_eth_remove,
};
module_platform_driver(ixp4xx_eth_driver);
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index b22596b18ee8..b1919278e931 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -630,7 +630,7 @@ static void __gtp_encap_destroy(struct sock *sk)
gtp->sk0 = NULL;
else
gtp->sk1u = NULL;
- udp_sk(sk)->encap_type = 0;
+ WRITE_ONCE(udp_sk(sk)->encap_type, 0);
rcu_assign_sk_user_data(sk, NULL);
release_sock(sk);
sock_put(sk);
@@ -682,7 +682,7 @@ static int gtp_encap_recv(struct sock *sk, struct sk_buff *skb)
netdev_dbg(gtp->dev, "encap_recv sk=%p\n", sk);
- switch (udp_sk(sk)->encap_type) {
+ switch (READ_ONCE(udp_sk(sk)->encap_type)) {
case UDP_ENCAP_GTP0:
netdev_dbg(gtp->dev, "received GTP0 packet\n");
ret = gtp0_udp_encap_recv(gtp, skb);
diff --git a/drivers/net/mdio/mdio-aspeed.c b/drivers/net/mdio/mdio-aspeed.c
index c727103c8b05..70edeeb7771e 100644
--- a/drivers/net/mdio/mdio-aspeed.c
+++ b/drivers/net/mdio/mdio-aspeed.c
@@ -177,15 +177,13 @@ static int aspeed_mdio_probe(struct platform_device *pdev)
return 0;
}
-static int aspeed_mdio_remove(struct platform_device *pdev)
+static void aspeed_mdio_remove(struct platform_device *pdev)
{
struct mii_bus *bus = (struct mii_bus *)platform_get_drvdata(pdev);
struct aspeed_mdio *ctx = bus->priv;
reset_control_assert(ctx->reset);
mdiobus_unregister(bus);
-
- return 0;
}
static const struct of_device_id aspeed_mdio_of_match[] = {
@@ -200,7 +198,7 @@ static struct platform_driver aspeed_mdio_driver = {
.of_match_table = aspeed_mdio_of_match,
},
.probe = aspeed_mdio_probe,
- .remove = aspeed_mdio_remove,
+ .remove_new = aspeed_mdio_remove,
};
module_platform_driver(aspeed_mdio_driver);
diff --git a/drivers/net/mdio/mdio-bcm-iproc.c b/drivers/net/mdio/mdio-bcm-iproc.c
index 77fc970cdfde..5a2d26c6afdc 100644
--- a/drivers/net/mdio/mdio-bcm-iproc.c
+++ b/drivers/net/mdio/mdio-bcm-iproc.c
@@ -168,14 +168,12 @@ err_iproc_mdio:
return rc;
}
-static int iproc_mdio_remove(struct platform_device *pdev)
+static void iproc_mdio_remove(struct platform_device *pdev)
{
struct iproc_mdio_priv *priv = platform_get_drvdata(pdev);
mdiobus_unregister(priv->mii_bus);
mdiobus_free(priv->mii_bus);
-
- return 0;
}
#ifdef CONFIG_PM_SLEEP
@@ -210,7 +208,7 @@ static struct platform_driver iproc_mdio_driver = {
#endif
},
.probe = iproc_mdio_probe,
- .remove = iproc_mdio_remove,
+ .remove_new = iproc_mdio_remove,
};
module_platform_driver(iproc_mdio_driver);
diff --git a/drivers/net/mdio/mdio-bcm-unimac.c b/drivers/net/mdio/mdio-bcm-unimac.c
index 6b26a0803696..e8cd8eef319b 100644
--- a/drivers/net/mdio/mdio-bcm-unimac.c
+++ b/drivers/net/mdio/mdio-bcm-unimac.c
@@ -296,15 +296,13 @@ out_clk_disable:
return ret;
}
-static int unimac_mdio_remove(struct platform_device *pdev)
+static void unimac_mdio_remove(struct platform_device *pdev)
{
struct unimac_mdio_priv *priv = platform_get_drvdata(pdev);
mdiobus_unregister(priv->mii_bus);
mdiobus_free(priv->mii_bus);
clk_disable_unprepare(priv->clk);
-
- return 0;
}
static int __maybe_unused unimac_mdio_suspend(struct device *d)
@@ -353,7 +351,7 @@ static struct platform_driver unimac_mdio_driver = {
.pm = &unimac_mdio_pm_ops,
},
.probe = unimac_mdio_probe,
- .remove = unimac_mdio_remove,
+ .remove_new = unimac_mdio_remove,
};
module_platform_driver(unimac_mdio_driver);
diff --git a/drivers/net/mdio/mdio-gpio.c b/drivers/net/mdio/mdio-gpio.c
index 0fb3c2de0845..897b88c50bbb 100644
--- a/drivers/net/mdio/mdio-gpio.c
+++ b/drivers/net/mdio/mdio-gpio.c
@@ -194,11 +194,9 @@ static int mdio_gpio_probe(struct platform_device *pdev)
return ret;
}
-static int mdio_gpio_remove(struct platform_device *pdev)
+static void mdio_gpio_remove(struct platform_device *pdev)
{
mdio_gpio_bus_destroy(&pdev->dev);
-
- return 0;
}
static const struct of_device_id mdio_gpio_of_match[] = {
@@ -210,7 +208,7 @@ MODULE_DEVICE_TABLE(of, mdio_gpio_of_match);
static struct platform_driver mdio_gpio_driver = {
.probe = mdio_gpio_probe,
- .remove = mdio_gpio_remove,
+ .remove_new = mdio_gpio_remove,
.driver = {
.name = "mdio-gpio",
.of_match_table = mdio_gpio_of_match,
diff --git a/drivers/net/mdio/mdio-hisi-femac.c b/drivers/net/mdio/mdio-hisi-femac.c
index f231c2fbb1de..6703f626ee83 100644
--- a/drivers/net/mdio/mdio-hisi-femac.c
+++ b/drivers/net/mdio/mdio-hisi-femac.c
@@ -118,7 +118,7 @@ err_out_free_mdiobus:
return ret;
}
-static int hisi_femac_mdio_remove(struct platform_device *pdev)
+static void hisi_femac_mdio_remove(struct platform_device *pdev)
{
struct mii_bus *bus = platform_get_drvdata(pdev);
struct hisi_femac_mdio_data *data = bus->priv;
@@ -126,8 +126,6 @@ static int hisi_femac_mdio_remove(struct platform_device *pdev)
mdiobus_unregister(bus);
clk_disable_unprepare(data->clk);
mdiobus_free(bus);
-
- return 0;
}
static const struct of_device_id hisi_femac_mdio_dt_ids[] = {
@@ -138,7 +136,7 @@ MODULE_DEVICE_TABLE(of, hisi_femac_mdio_dt_ids);
static struct platform_driver hisi_femac_mdio_driver = {
.probe = hisi_femac_mdio_probe,
- .remove = hisi_femac_mdio_remove,
+ .remove_new = hisi_femac_mdio_remove,
.driver = {
.name = "hisi-femac-mdio",
.of_match_table = hisi_femac_mdio_dt_ids,
diff --git a/drivers/net/mdio/mdio-ipq4019.c b/drivers/net/mdio/mdio-ipq4019.c
index 78b93de636f5..abd8b508ec16 100644
--- a/drivers/net/mdio/mdio-ipq4019.c
+++ b/drivers/net/mdio/mdio-ipq4019.c
@@ -278,13 +278,11 @@ static int ipq4019_mdio_probe(struct platform_device *pdev)
return 0;
}
-static int ipq4019_mdio_remove(struct platform_device *pdev)
+static void ipq4019_mdio_remove(struct platform_device *pdev)
{
struct mii_bus *bus = platform_get_drvdata(pdev);
mdiobus_unregister(bus);
-
- return 0;
}
static const struct of_device_id ipq4019_mdio_dt_ids[] = {
@@ -296,7 +294,7 @@ MODULE_DEVICE_TABLE(of, ipq4019_mdio_dt_ids);
static struct platform_driver ipq4019_mdio_driver = {
.probe = ipq4019_mdio_probe,
- .remove = ipq4019_mdio_remove,
+ .remove_new = ipq4019_mdio_remove,
.driver = {
.name = "ipq4019-mdio",
.of_match_table = ipq4019_mdio_dt_ids,
diff --git a/drivers/net/mdio/mdio-ipq8064.c b/drivers/net/mdio/mdio-ipq8064.c
index fd9716960106..f71b6e1c66e4 100644
--- a/drivers/net/mdio/mdio-ipq8064.c
+++ b/drivers/net/mdio/mdio-ipq8064.c
@@ -147,14 +147,11 @@ ipq8064_mdio_probe(struct platform_device *pdev)
return 0;
}
-static int
-ipq8064_mdio_remove(struct platform_device *pdev)
+static void ipq8064_mdio_remove(struct platform_device *pdev)
{
struct mii_bus *bus = platform_get_drvdata(pdev);
mdiobus_unregister(bus);
-
- return 0;
}
static const struct of_device_id ipq8064_mdio_dt_ids[] = {
@@ -165,7 +162,7 @@ MODULE_DEVICE_TABLE(of, ipq8064_mdio_dt_ids);
static struct platform_driver ipq8064_mdio_driver = {
.probe = ipq8064_mdio_probe,
- .remove = ipq8064_mdio_remove,
+ .remove_new = ipq8064_mdio_remove,
.driver = {
.name = "ipq8064-mdio",
.of_match_table = ipq8064_mdio_dt_ids,
diff --git a/drivers/net/mdio/mdio-moxart.c b/drivers/net/mdio/mdio-moxart.c
index f0cff584e176..d35af8cd7c4d 100644
--- a/drivers/net/mdio/mdio-moxart.c
+++ b/drivers/net/mdio/mdio-moxart.c
@@ -155,14 +155,12 @@ err_out_free_mdiobus:
return ret;
}
-static int moxart_mdio_remove(struct platform_device *pdev)
+static void moxart_mdio_remove(struct platform_device *pdev)
{
struct mii_bus *bus = platform_get_drvdata(pdev);
mdiobus_unregister(bus);
mdiobus_free(bus);
-
- return 0;
}
static const struct of_device_id moxart_mdio_dt_ids[] = {
@@ -173,7 +171,7 @@ MODULE_DEVICE_TABLE(of, moxart_mdio_dt_ids);
static struct platform_driver moxart_mdio_driver = {
.probe = moxart_mdio_probe,
- .remove = moxart_mdio_remove,
+ .remove_new = moxart_mdio_remove,
.driver = {
.name = "moxart-mdio",
.of_match_table = moxart_mdio_dt_ids,
diff --git a/drivers/net/mdio/mdio-mscc-miim.c b/drivers/net/mdio/mdio-mscc-miim.c
index 1a1b95ae95fa..c29377c85307 100644
--- a/drivers/net/mdio/mdio-mscc-miim.c
+++ b/drivers/net/mdio/mdio-mscc-miim.c
@@ -335,15 +335,13 @@ out_disable_clk:
return ret;
}
-static int mscc_miim_remove(struct platform_device *pdev)
+static void mscc_miim_remove(struct platform_device *pdev)
{
struct mii_bus *bus = platform_get_drvdata(pdev);
struct mscc_miim_dev *miim = bus->priv;
clk_disable_unprepare(miim->clk);
mdiobus_unregister(bus);
-
- return 0;
}
static const struct mscc_miim_info mscc_ocelot_miim_info = {
@@ -371,7 +369,7 @@ MODULE_DEVICE_TABLE(of, mscc_miim_match);
static struct platform_driver mscc_miim_driver = {
.probe = mscc_miim_probe,
- .remove = mscc_miim_remove,
+ .remove_new = mscc_miim_remove,
.driver = {
.name = "mscc-miim",
.of_match_table = mscc_miim_match,
diff --git a/drivers/net/mdio/mdio-mux-bcm-iproc.c b/drivers/net/mdio/mdio-mux-bcm-iproc.c
index 956d54846b62..a750bd4c77a0 100644
--- a/drivers/net/mdio/mdio-mux-bcm-iproc.c
+++ b/drivers/net/mdio/mdio-mux-bcm-iproc.c
@@ -287,15 +287,13 @@ out_clk:
return rc;
}
-static int mdio_mux_iproc_remove(struct platform_device *pdev)
+static void mdio_mux_iproc_remove(struct platform_device *pdev)
{
struct iproc_mdiomux_desc *md = platform_get_drvdata(pdev);
mdio_mux_uninit(md->mux_handle);
mdiobus_unregister(md->mii_bus);
clk_disable_unprepare(md->core_clk);
-
- return 0;
}
#ifdef CONFIG_PM_SLEEP
@@ -342,7 +340,7 @@ static struct platform_driver mdiomux_iproc_driver = {
.pm = &mdio_mux_iproc_pm_ops,
},
.probe = mdio_mux_iproc_probe,
- .remove = mdio_mux_iproc_remove,
+ .remove_new = mdio_mux_iproc_remove,
};
module_platform_driver(mdiomux_iproc_driver);
diff --git a/drivers/net/mdio/mdio-mux-bcm6368.c b/drivers/net/mdio/mdio-mux-bcm6368.c
index 8b444a8eb6b5..1b77e0e3e6e1 100644
--- a/drivers/net/mdio/mdio-mux-bcm6368.c
+++ b/drivers/net/mdio/mdio-mux-bcm6368.c
@@ -153,14 +153,12 @@ out_register:
return rc;
}
-static int bcm6368_mdiomux_remove(struct platform_device *pdev)
+static void bcm6368_mdiomux_remove(struct platform_device *pdev)
{
struct bcm6368_mdiomux_desc *md = platform_get_drvdata(pdev);
mdio_mux_uninit(md->mux_handle);
mdiobus_unregister(md->mii_bus);
-
- return 0;
}
static const struct of_device_id bcm6368_mdiomux_ids[] = {
@@ -175,7 +173,7 @@ static struct platform_driver bcm6368_mdiomux_driver = {
.of_match_table = bcm6368_mdiomux_ids,
},
.probe = bcm6368_mdiomux_probe,
- .remove = bcm6368_mdiomux_remove,
+ .remove_new = bcm6368_mdiomux_remove,
};
module_platform_driver(bcm6368_mdiomux_driver);
diff --git a/drivers/net/mdio/mdio-mux-gpio.c b/drivers/net/mdio/mdio-mux-gpio.c
index 3c7f16f06b45..38fb031f8979 100644
--- a/drivers/net/mdio/mdio-mux-gpio.c
+++ b/drivers/net/mdio/mdio-mux-gpio.c
@@ -62,11 +62,10 @@ static int mdio_mux_gpio_probe(struct platform_device *pdev)
return 0;
}
-static int mdio_mux_gpio_remove(struct platform_device *pdev)
+static void mdio_mux_gpio_remove(struct platform_device *pdev)
{
struct mdio_mux_gpio_state *s = dev_get_platdata(&pdev->dev);
mdio_mux_uninit(s->mux_handle);
- return 0;
}
static const struct of_device_id mdio_mux_gpio_match[] = {
@@ -87,7 +86,7 @@ static struct platform_driver mdio_mux_gpio_driver = {
.of_match_table = mdio_mux_gpio_match,
},
.probe = mdio_mux_gpio_probe,
- .remove = mdio_mux_gpio_remove,
+ .remove_new = mdio_mux_gpio_remove,
};
module_platform_driver(mdio_mux_gpio_driver);
diff --git a/drivers/net/mdio/mdio-mux-meson-g12a.c b/drivers/net/mdio/mdio-mux-meson-g12a.c
index 910e5cf74e89..754b0f2cf15b 100644
--- a/drivers/net/mdio/mdio-mux-meson-g12a.c
+++ b/drivers/net/mdio/mdio-mux-meson-g12a.c
@@ -336,7 +336,7 @@ static int g12a_mdio_mux_probe(struct platform_device *pdev)
return ret;
}
-static int g12a_mdio_mux_remove(struct platform_device *pdev)
+static void g12a_mdio_mux_remove(struct platform_device *pdev)
{
struct g12a_mdio_mux *priv = platform_get_drvdata(pdev);
@@ -344,13 +344,11 @@ static int g12a_mdio_mux_remove(struct platform_device *pdev)
if (__clk_is_enabled(priv->pll))
clk_disable_unprepare(priv->pll);
-
- return 0;
}
static struct platform_driver g12a_mdio_mux_driver = {
.probe = g12a_mdio_mux_probe,
- .remove = g12a_mdio_mux_remove,
+ .remove_new = g12a_mdio_mux_remove,
.driver = {
.name = "g12a-mdio_mux",
.of_match_table = g12a_mdio_mux_match,
diff --git a/drivers/net/mdio/mdio-mux-meson-gxl.c b/drivers/net/mdio/mdio-mux-meson-gxl.c
index 76188575ca1f..89554021b5cc 100644
--- a/drivers/net/mdio/mdio-mux-meson-gxl.c
+++ b/drivers/net/mdio/mdio-mux-meson-gxl.c
@@ -140,18 +140,16 @@ static int gxl_mdio_mux_probe(struct platform_device *pdev)
return ret;
}
-static int gxl_mdio_mux_remove(struct platform_device *pdev)
+static void gxl_mdio_mux_remove(struct platform_device *pdev)
{
struct gxl_mdio_mux *priv = platform_get_drvdata(pdev);
mdio_mux_uninit(priv->mux_handle);
-
- return 0;
}
static struct platform_driver gxl_mdio_mux_driver = {
.probe = gxl_mdio_mux_probe,
- .remove = gxl_mdio_mux_remove,
+ .remove_new = gxl_mdio_mux_remove,
.driver = {
.name = "gxl-mdio-mux",
.of_match_table = gxl_mdio_mux_match,
diff --git a/drivers/net/mdio/mdio-mux-mmioreg.c b/drivers/net/mdio/mdio-mux-mmioreg.c
index 09af150ed774..de08419d0c98 100644
--- a/drivers/net/mdio/mdio-mux-mmioreg.c
+++ b/drivers/net/mdio/mdio-mux-mmioreg.c
@@ -169,13 +169,11 @@ static int mdio_mux_mmioreg_probe(struct platform_device *pdev)
return 0;
}
-static int mdio_mux_mmioreg_remove(struct platform_device *pdev)
+static void mdio_mux_mmioreg_remove(struct platform_device *pdev)
{
struct mdio_mux_mmioreg_state *s = dev_get_platdata(&pdev->dev);
mdio_mux_uninit(s->mux_handle);
-
- return 0;
}
static const struct of_device_id mdio_mux_mmioreg_match[] = {
@@ -192,7 +190,7 @@ static struct platform_driver mdio_mux_mmioreg_driver = {
.of_match_table = mdio_mux_mmioreg_match,
},
.probe = mdio_mux_mmioreg_probe,
- .remove = mdio_mux_mmioreg_remove,
+ .remove_new = mdio_mux_mmioreg_remove,
};
module_platform_driver(mdio_mux_mmioreg_driver);
diff --git a/drivers/net/mdio/mdio-mux-multiplexer.c b/drivers/net/mdio/mdio-mux-multiplexer.c
index bfa5af577b0a..569b13383191 100644
--- a/drivers/net/mdio/mdio-mux-multiplexer.c
+++ b/drivers/net/mdio/mdio-mux-multiplexer.c
@@ -85,7 +85,7 @@ static int mdio_mux_multiplexer_probe(struct platform_device *pdev)
return ret;
}
-static int mdio_mux_multiplexer_remove(struct platform_device *pdev)
+static void mdio_mux_multiplexer_remove(struct platform_device *pdev)
{
struct mdio_mux_multiplexer_state *s = platform_get_drvdata(pdev);
@@ -93,8 +93,6 @@ static int mdio_mux_multiplexer_remove(struct platform_device *pdev)
if (s->do_deselect)
mux_control_deselect(s->muxc);
-
- return 0;
}
static const struct of_device_id mdio_mux_multiplexer_match[] = {
@@ -109,7 +107,7 @@ static struct platform_driver mdio_mux_multiplexer_driver = {
.of_match_table = mdio_mux_multiplexer_match,
},
.probe = mdio_mux_multiplexer_probe,
- .remove = mdio_mux_multiplexer_remove,
+ .remove_new = mdio_mux_multiplexer_remove,
};
module_platform_driver(mdio_mux_multiplexer_driver);
diff --git a/drivers/net/mdio/mdio-octeon.c b/drivers/net/mdio/mdio-octeon.c
index 7c65c547d377..037a38cfed56 100644
--- a/drivers/net/mdio/mdio-octeon.c
+++ b/drivers/net/mdio/mdio-octeon.c
@@ -78,7 +78,7 @@ fail_register:
return err;
}
-static int octeon_mdiobus_remove(struct platform_device *pdev)
+static void octeon_mdiobus_remove(struct platform_device *pdev)
{
struct cavium_mdiobus *bus;
union cvmx_smix_en smi_en;
@@ -88,7 +88,6 @@ static int octeon_mdiobus_remove(struct platform_device *pdev)
mdiobus_unregister(bus->mii_bus);
smi_en.u64 = 0;
oct_mdio_writeq(smi_en.u64, bus->register_base + SMI_EN);
- return 0;
}
static const struct of_device_id octeon_mdiobus_match[] = {
@@ -105,7 +104,7 @@ static struct platform_driver octeon_mdiobus_driver = {
.of_match_table = octeon_mdiobus_match,
},
.probe = octeon_mdiobus_probe,
- .remove = octeon_mdiobus_remove,
+ .remove_new = octeon_mdiobus_remove,
};
module_platform_driver(octeon_mdiobus_driver);
diff --git a/drivers/net/mdio/mdio-sun4i.c b/drivers/net/mdio/mdio-sun4i.c
index f798de3276dc..4511bcc73b36 100644
--- a/drivers/net/mdio/mdio-sun4i.c
+++ b/drivers/net/mdio/mdio-sun4i.c
@@ -142,7 +142,7 @@ err_out_free_mdiobus:
return ret;
}
-static int sun4i_mdio_remove(struct platform_device *pdev)
+static void sun4i_mdio_remove(struct platform_device *pdev)
{
struct mii_bus *bus = platform_get_drvdata(pdev);
struct sun4i_mdio_data *data = bus->priv;
@@ -151,8 +151,6 @@ static int sun4i_mdio_remove(struct platform_device *pdev)
if (data->regulator)
regulator_disable(data->regulator);
mdiobus_free(bus);
-
- return 0;
}
static const struct of_device_id sun4i_mdio_dt_ids[] = {
@@ -166,7 +164,7 @@ MODULE_DEVICE_TABLE(of, sun4i_mdio_dt_ids);
static struct platform_driver sun4i_mdio_driver = {
.probe = sun4i_mdio_probe,
- .remove = sun4i_mdio_remove,
+ .remove_new = sun4i_mdio_remove,
.driver = {
.name = "sun4i-mdio",
.of_match_table = sun4i_mdio_dt_ids,
diff --git a/drivers/net/mdio/mdio-xgene.c b/drivers/net/mdio/mdio-xgene.c
index 1190a793555a..7909d7caf45c 100644
--- a/drivers/net/mdio/mdio-xgene.c
+++ b/drivers/net/mdio/mdio-xgene.c
@@ -432,7 +432,7 @@ out_clk:
return ret;
}
-static int xgene_mdio_remove(struct platform_device *pdev)
+static void xgene_mdio_remove(struct platform_device *pdev)
{
struct xgene_mdio_pdata *pdata = platform_get_drvdata(pdev);
struct mii_bus *mdio_bus = pdata->mdio_bus;
@@ -443,8 +443,6 @@ static int xgene_mdio_remove(struct platform_device *pdev)
if (dev->of_node)
clk_disable_unprepare(pdata->clk);
-
- return 0;
}
static struct platform_driver xgene_mdio_driver = {
@@ -454,7 +452,7 @@ static struct platform_driver xgene_mdio_driver = {
.acpi_match_table = ACPI_PTR(xgene_mdio_acpi_match),
},
.probe = xgene_mdio_probe,
- .remove = xgene_mdio_remove,
+ .remove_new = xgene_mdio_remove,
};
module_platform_driver(xgene_mdio_driver);
diff --git a/drivers/net/phy/ax88796b.c b/drivers/net/phy/ax88796b.c
index 0f1e617a26c9..eb74a8cf8df1 100644
--- a/drivers/net/phy/ax88796b.c
+++ b/drivers/net/phy/ax88796b.c
@@ -90,7 +90,7 @@ static void asix_ax88772a_link_change_notify(struct phy_device *phydev)
*/
if (phydev->state == PHY_NOLINK) {
phy_init_hw(phydev);
- phy_start_aneg(phydev);
+ _phy_start_aneg(phydev);
}
}
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index df54c137c5f5..a5fa077650e8 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -981,7 +981,7 @@ static int phy_check_link_status(struct phy_device *phydev)
* If the PHYCONTROL Layer is operating, we change the state to
* reflect the beginning of Auto-negotiation or forcing.
*/
-static int _phy_start_aneg(struct phy_device *phydev)
+int _phy_start_aneg(struct phy_device *phydev)
{
int err;
@@ -1002,6 +1002,7 @@ static int _phy_start_aneg(struct phy_device *phydev)
return err;
}
+EXPORT_SYMBOL(_phy_start_aneg);
/**
* phy_start_aneg - start auto-negotiation for this PHY device
@@ -1231,9 +1232,7 @@ static void phy_error_precise(struct phy_device *phydev,
const void *func, int err)
{
WARN(1, "%pS: returned: %d\n", func, err);
- mutex_lock(&phydev->lock);
phy_process_error(phydev);
- mutex_unlock(&phydev->lock);
}
/**
@@ -1355,6 +1354,113 @@ void phy_free_interrupt(struct phy_device *phydev)
}
EXPORT_SYMBOL(phy_free_interrupt);
+enum phy_state_work {
+ PHY_STATE_WORK_NONE,
+ PHY_STATE_WORK_ANEG,
+ PHY_STATE_WORK_SUSPEND,
+};
+
+static enum phy_state_work _phy_state_machine(struct phy_device *phydev)
+{
+ enum phy_state_work state_work = PHY_STATE_WORK_NONE;
+ struct net_device *dev = phydev->attached_dev;
+ enum phy_state old_state = phydev->state;
+ const void *func = NULL;
+ bool finished = false;
+ int err = 0;
+
+ switch (phydev->state) {
+ case PHY_DOWN:
+ case PHY_READY:
+ break;
+ case PHY_UP:
+ state_work = PHY_STATE_WORK_ANEG;
+ break;
+ case PHY_NOLINK:
+ case PHY_RUNNING:
+ err = phy_check_link_status(phydev);
+ func = &phy_check_link_status;
+ break;
+ case PHY_CABLETEST:
+ err = phydev->drv->cable_test_get_status(phydev, &finished);
+ if (err) {
+ phy_abort_cable_test(phydev);
+ netif_testing_off(dev);
+ state_work = PHY_STATE_WORK_ANEG;
+ phydev->state = PHY_UP;
+ break;
+ }
+
+ if (finished) {
+ ethnl_cable_test_finished(phydev);
+ netif_testing_off(dev);
+ state_work = PHY_STATE_WORK_ANEG;
+ phydev->state = PHY_UP;
+ }
+ break;
+ case PHY_HALTED:
+ case PHY_ERROR:
+ if (phydev->link) {
+ phydev->link = 0;
+ phy_link_down(phydev);
+ }
+ state_work = PHY_STATE_WORK_SUSPEND;
+ break;
+ }
+
+ if (state_work == PHY_STATE_WORK_ANEG) {
+ err = _phy_start_aneg(phydev);
+ func = &_phy_start_aneg;
+ }
+
+ if (err == -ENODEV)
+ return state_work;
+
+ if (err < 0)
+ phy_error_precise(phydev, func, err);
+
+ phy_process_state_change(phydev, old_state);
+
+ /* Only re-schedule a PHY state machine change if we are polling the
+ * PHY, if PHY_MAC_INTERRUPT is set, then we will be moving
+ * between states from phy_mac_interrupt().
+ *
+ * In state PHY_HALTED the PHY gets suspended, so rescheduling the
+ * state machine would be pointless and possibly error prone when
+ * called from phy_disconnect() synchronously.
+ */
+ if (phy_polling_mode(phydev) && phy_is_started(phydev))
+ phy_queue_state_machine(phydev, PHY_STATE_TIME);
+
+ return state_work;
+}
+
+/* unlocked part of the PHY state machine */
+static void _phy_state_machine_post_work(struct phy_device *phydev,
+ enum phy_state_work state_work)
+{
+ if (state_work == PHY_STATE_WORK_SUSPEND)
+ phy_suspend(phydev);
+}
+
+/**
+ * phy_state_machine - Handle the state machine
+ * @work: work_struct that describes the work to be done
+ */
+void phy_state_machine(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct phy_device *phydev =
+ container_of(dwork, struct phy_device, state_queue);
+ enum phy_state_work state_work;
+
+ mutex_lock(&phydev->lock);
+ state_work = _phy_state_machine(phydev);
+ mutex_unlock(&phydev->lock);
+
+ _phy_state_machine_post_work(phydev, state_work);
+}
+
/**
* phy_stop - Bring down the PHY link, and stop checking the status
* @phydev: target phy_device struct
@@ -1362,6 +1468,7 @@ EXPORT_SYMBOL(phy_free_interrupt);
void phy_stop(struct phy_device *phydev)
{
struct net_device *dev = phydev->attached_dev;
+ enum phy_state_work state_work;
enum phy_state old_state;
if (!phy_is_started(phydev) && phydev->state != PHY_DOWN &&
@@ -1385,9 +1492,10 @@ void phy_stop(struct phy_device *phydev)
phydev->state = PHY_HALTED;
phy_process_state_change(phydev, old_state);
+ state_work = _phy_state_machine(phydev);
mutex_unlock(&phydev->lock);
- phy_state_machine(&phydev->state_queue.work);
+ _phy_state_machine_post_work(phydev, state_work);
phy_stop_machine(phydev);
/* Cannot call flush_scheduled_work() here as desired because
@@ -1432,97 +1540,6 @@ out:
EXPORT_SYMBOL(phy_start);
/**
- * phy_state_machine - Handle the state machine
- * @work: work_struct that describes the work to be done
- */
-void phy_state_machine(struct work_struct *work)
-{
- struct delayed_work *dwork = to_delayed_work(work);
- struct phy_device *phydev =
- container_of(dwork, struct phy_device, state_queue);
- struct net_device *dev = phydev->attached_dev;
- bool needs_aneg = false, do_suspend = false;
- enum phy_state old_state;
- const void *func = NULL;
- bool finished = false;
- int err = 0;
-
- mutex_lock(&phydev->lock);
-
- old_state = phydev->state;
-
- switch (phydev->state) {
- case PHY_DOWN:
- case PHY_READY:
- break;
- case PHY_UP:
- needs_aneg = true;
-
- break;
- case PHY_NOLINK:
- case PHY_RUNNING:
- err = phy_check_link_status(phydev);
- func = &phy_check_link_status;
- break;
- case PHY_CABLETEST:
- err = phydev->drv->cable_test_get_status(phydev, &finished);
- if (err) {
- phy_abort_cable_test(phydev);
- netif_testing_off(dev);
- needs_aneg = true;
- phydev->state = PHY_UP;
- break;
- }
-
- if (finished) {
- ethnl_cable_test_finished(phydev);
- netif_testing_off(dev);
- needs_aneg = true;
- phydev->state = PHY_UP;
- }
- break;
- case PHY_HALTED:
- case PHY_ERROR:
- if (phydev->link) {
- phydev->link = 0;
- phy_link_down(phydev);
- }
- do_suspend = true;
- break;
- }
-
- mutex_unlock(&phydev->lock);
-
- if (needs_aneg) {
- err = phy_start_aneg(phydev);
- func = &phy_start_aneg;
- } else if (do_suspend) {
- phy_suspend(phydev);
- }
-
- if (err == -ENODEV)
- return;
-
- if (err < 0)
- phy_error_precise(phydev, func, err);
-
- phy_process_state_change(phydev, old_state);
-
- /* Only re-schedule a PHY state machine change if we are polling the
- * PHY, if PHY_MAC_INTERRUPT is set, then we will be moving
- * between states from phy_mac_interrupt().
- *
- * In state PHY_HALTED the PHY gets suspended, so rescheduling the
- * state machine would be pointless and possibly error prone when
- * called from phy_disconnect() synchronously.
- */
- mutex_lock(&phydev->lock);
- if (phy_polling_mode(phydev) && phy_is_started(phydev))
- phy_queue_state_machine(phydev, PHY_STATE_TIME);
- mutex_unlock(&phydev->lock);
-}
-
-/**
* phy_mac_interrupt - MAC says the link has changed
* @phydev: phy_device struct with changed link
*
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
index fc7ace638ce8..e7d8e03f826f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
@@ -591,7 +591,7 @@ static void mt7915_mmio_wed_release_rx_buf(struct mtk_wed_device *wed)
static u32 mt7915_mmio_wed_init_rx_buf(struct mtk_wed_device *wed, int size)
{
- struct mtk_rxbm_desc *desc = wed->rx_buf_ring.desc;
+ struct mtk_wed_bm_desc *desc = wed->rx_buf_ring.desc;
struct mt76_txwi_cache *t = NULL;
struct mt7915_dev *dev;
struct mt76_queue *q;
diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c
index 284ab1f56391..87df60916960 100644
--- a/drivers/net/wwan/wwan_core.c
+++ b/drivers/net/wwan/wwan_core.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2021, Linaro Ltd <loic.poulain@linaro.org> */
+#include <linux/bitmap.h>
#include <linux/err.h>
#include <linux/errno.h>
#include <linux/debugfs.h>
@@ -395,7 +396,7 @@ static int __wwan_port_dev_assign_name(struct wwan_port *port, const char *fmt)
char buf[0x20];
int id;
- idmap = (unsigned long *)get_zeroed_page(GFP_KERNEL);
+ idmap = bitmap_zalloc(max_ports, GFP_KERNEL);
if (!idmap)
return -ENOMEM;
@@ -414,7 +415,7 @@ static int __wwan_port_dev_assign_name(struct wwan_port *port, const char *fmt)
/* Allocate unique id */
id = find_first_zero_bit(idmap, max_ports);
- free_page((unsigned long)idmap);
+ bitmap_free(idmap);
snprintf(buf, sizeof(buf), fmt, id); /* Name generation */
diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig
index ed9d97a032f1..5dd5f188e14f 100644
--- a/drivers/ptp/Kconfig
+++ b/drivers/ptp/Kconfig
@@ -188,6 +188,7 @@ config PTP_1588_CLOCK_OCP
depends on COMMON_CLK
select NET_DEVLINK
select CRC16
+ select DPLL
help
This driver adds support for an OpenCompute time card.
diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c
index a7a6947ab4bc..41eaffcae462 100644
--- a/drivers/ptp/ptp_ocp.c
+++ b/drivers/ptp/ptp_ocp.c
@@ -23,6 +23,7 @@
#include <linux/mtd/mtd.h>
#include <linux/nvmem-consumer.h>
#include <linux/crc16.h>
+#include <linux/dpll.h>
#define PCI_VENDOR_ID_FACEBOOK 0x1d9b
#define PCI_DEVICE_ID_FACEBOOK_TIMECARD 0x0400
@@ -260,12 +261,21 @@ enum ptp_ocp_sma_mode {
SMA_MODE_OUT,
};
+static struct dpll_pin_frequency ptp_ocp_sma_freq[] = {
+ DPLL_PIN_FREQUENCY_1PPS,
+ DPLL_PIN_FREQUENCY_10MHZ,
+ DPLL_PIN_FREQUENCY_IRIG_B,
+ DPLL_PIN_FREQUENCY_DCF77,
+};
+
struct ptp_ocp_sma_connector {
enum ptp_ocp_sma_mode mode;
bool fixed_fcn;
bool fixed_dir;
bool disabled;
u8 default_fcn;
+ struct dpll_pin *dpll_pin;
+ struct dpll_pin_properties dpll_prop;
};
struct ocp_attr_group {
@@ -294,6 +304,7 @@ struct ptp_ocp_serial_port {
#define OCP_BOARD_ID_LEN 13
#define OCP_SERIAL_LEN 6
+#define OCP_SMA_NUM 4
struct ptp_ocp {
struct pci_dev *pdev;
@@ -331,7 +342,9 @@ struct ptp_ocp {
const struct attribute_group **attr_group;
const struct ptp_ocp_eeprom_map *eeprom_map;
struct dentry *debug_root;
+ bool sync;
time64_t gnss_lost;
+ struct delayed_work sync_work;
int id;
int n_irqs;
struct ptp_ocp_serial_port gnss_port;
@@ -350,8 +363,9 @@ struct ptp_ocp {
u32 ts_window_adjust;
u64 fw_cap;
struct ptp_ocp_signal signal[4];
- struct ptp_ocp_sma_connector sma[4];
+ struct ptp_ocp_sma_connector sma[OCP_SMA_NUM];
const struct ocp_sma_op *sma_op;
+ struct dpll_device *dpll;
};
#define OCP_REQ_TIMESTAMP BIT(0)
@@ -835,6 +849,7 @@ static DEFINE_IDR(ptp_ocp_idr);
struct ocp_selector {
const char *name;
int value;
+ u64 frequency;
};
static const struct ocp_selector ptp_ocp_clock[] = {
@@ -855,31 +870,31 @@ static const struct ocp_selector ptp_ocp_clock[] = {
#define SMA_SELECT_MASK GENMASK(14, 0)
static const struct ocp_selector ptp_ocp_sma_in[] = {
- { .name = "10Mhz", .value = 0x0000 },
- { .name = "PPS1", .value = 0x0001 },
- { .name = "PPS2", .value = 0x0002 },
- { .name = "TS1", .value = 0x0004 },
- { .name = "TS2", .value = 0x0008 },
- { .name = "IRIG", .value = 0x0010 },
- { .name = "DCF", .value = 0x0020 },
- { .name = "TS3", .value = 0x0040 },
- { .name = "TS4", .value = 0x0080 },
- { .name = "FREQ1", .value = 0x0100 },
- { .name = "FREQ2", .value = 0x0200 },
- { .name = "FREQ3", .value = 0x0400 },
- { .name = "FREQ4", .value = 0x0800 },
- { .name = "None", .value = SMA_DISABLE },
+ { .name = "10Mhz", .value = 0x0000, .frequency = 10000000 },
+ { .name = "PPS1", .value = 0x0001, .frequency = 1 },
+ { .name = "PPS2", .value = 0x0002, .frequency = 1 },
+ { .name = "TS1", .value = 0x0004, .frequency = 0 },
+ { .name = "TS2", .value = 0x0008, .frequency = 0 },
+ { .name = "IRIG", .value = 0x0010, .frequency = 10000 },
+ { .name = "DCF", .value = 0x0020, .frequency = 77500 },
+ { .name = "TS3", .value = 0x0040, .frequency = 0 },
+ { .name = "TS4", .value = 0x0080, .frequency = 0 },
+ { .name = "FREQ1", .value = 0x0100, .frequency = 0 },
+ { .name = "FREQ2", .value = 0x0200, .frequency = 0 },
+ { .name = "FREQ3", .value = 0x0400, .frequency = 0 },
+ { .name = "FREQ4", .value = 0x0800, .frequency = 0 },
+ { .name = "None", .value = SMA_DISABLE, .frequency = 0 },
{ }
};
static const struct ocp_selector ptp_ocp_sma_out[] = {
- { .name = "10Mhz", .value = 0x0000 },
- { .name = "PHC", .value = 0x0001 },
- { .name = "MAC", .value = 0x0002 },
- { .name = "GNSS1", .value = 0x0004 },
- { .name = "GNSS2", .value = 0x0008 },
- { .name = "IRIG", .value = 0x0010 },
- { .name = "DCF", .value = 0x0020 },
+ { .name = "10Mhz", .value = 0x0000, .frequency = 10000000 },
+ { .name = "PHC", .value = 0x0001, .frequency = 1 },
+ { .name = "MAC", .value = 0x0002, .frequency = 1 },
+ { .name = "GNSS1", .value = 0x0004, .frequency = 1 },
+ { .name = "GNSS2", .value = 0x0008, .frequency = 1 },
+ { .name = "IRIG", .value = 0x0010, .frequency = 10000 },
+ { .name = "DCF", .value = 0x0020, .frequency = 77000 },
{ .name = "GEN1", .value = 0x0040 },
{ .name = "GEN2", .value = 0x0080 },
{ .name = "GEN3", .value = 0x0100 },
@@ -890,15 +905,15 @@ static const struct ocp_selector ptp_ocp_sma_out[] = {
};
static const struct ocp_selector ptp_ocp_art_sma_in[] = {
- { .name = "PPS1", .value = 0x0001 },
- { .name = "10Mhz", .value = 0x0008 },
+ { .name = "PPS1", .value = 0x0001, .frequency = 1 },
+ { .name = "10Mhz", .value = 0x0008, .frequency = 1000000 },
{ }
};
static const struct ocp_selector ptp_ocp_art_sma_out[] = {
- { .name = "PHC", .value = 0x0002 },
- { .name = "GNSS", .value = 0x0004 },
- { .name = "10Mhz", .value = 0x0010 },
+ { .name = "PHC", .value = 0x0002, .frequency = 1 },
+ { .name = "GNSS", .value = 0x0004, .frequency = 1 },
+ { .name = "10Mhz", .value = 0x0010, .frequency = 10000000 },
{ }
};
@@ -1351,7 +1366,6 @@ static int
ptp_ocp_init_clock(struct ptp_ocp *bp)
{
struct timespec64 ts;
- bool sync;
u32 ctrl;
ctrl = OCP_CTRL_ENABLE;
@@ -1375,8 +1389,8 @@ ptp_ocp_init_clock(struct ptp_ocp *bp)
ptp_ocp_estimate_pci_timing(bp);
- sync = ioread32(&bp->reg->status) & OCP_STATUS_IN_SYNC;
- if (!sync) {
+ bp->sync = ioread32(&bp->reg->status) & OCP_STATUS_IN_SYNC;
+ if (!bp->sync) {
ktime_get_clocktai_ts64(&ts);
ptp_ocp_settime(&bp->ptp_info, &ts);
}
@@ -2289,22 +2303,35 @@ ptp_ocp_sma_fb_set_inputs(struct ptp_ocp *bp, int sma_nr, u32 val)
static void
ptp_ocp_sma_fb_init(struct ptp_ocp *bp)
{
+ struct dpll_pin_properties prop = {
+ .board_label = NULL,
+ .type = DPLL_PIN_TYPE_EXT,
+ .capabilities = DPLL_PIN_CAPABILITIES_DIRECTION_CAN_CHANGE,
+ .freq_supported_num = ARRAY_SIZE(ptp_ocp_sma_freq),
+ .freq_supported = ptp_ocp_sma_freq,
+
+ };
u32 reg;
int i;
/* defaults */
+ for (i = 0; i < OCP_SMA_NUM; i++) {
+ bp->sma[i].default_fcn = i & 1;
+ bp->sma[i].dpll_prop = prop;
+ bp->sma[i].dpll_prop.board_label =
+ bp->ptp_info.pin_config[i].name;
+ }
bp->sma[0].mode = SMA_MODE_IN;
bp->sma[1].mode = SMA_MODE_IN;
bp->sma[2].mode = SMA_MODE_OUT;
bp->sma[3].mode = SMA_MODE_OUT;
- for (i = 0; i < 4; i++)
- bp->sma[i].default_fcn = i & 1;
-
/* If no SMA1 map, the pin functions and directions are fixed. */
if (!bp->sma_map1) {
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < OCP_SMA_NUM; i++) {
bp->sma[i].fixed_fcn = true;
bp->sma[i].fixed_dir = true;
+ bp->sma[1].dpll_prop.capabilities &=
+ ~DPLL_PIN_CAPABILITIES_DIRECTION_CAN_CHANGE;
}
return;
}
@@ -2314,7 +2341,7 @@ ptp_ocp_sma_fb_init(struct ptp_ocp *bp)
*/
reg = ioread32(&bp->sma_map2->gpio2);
if (reg == 0xffffffff) {
- for (i = 0; i < 4; i++)
+ for (i = 0; i < OCP_SMA_NUM; i++)
bp->sma[i].fixed_dir = true;
} else {
reg = ioread32(&bp->sma_map1->gpio1);
@@ -2336,7 +2363,7 @@ static const struct ocp_sma_op ocp_fb_sma_op = {
};
static int
-ptp_ocp_fb_set_pins(struct ptp_ocp *bp)
+ptp_ocp_set_pins(struct ptp_ocp *bp)
{
struct ptp_pin_desc *config;
int i;
@@ -2403,16 +2430,16 @@ ptp_ocp_fb_board_init(struct ptp_ocp *bp, struct ocp_resource *r)
ptp_ocp_tod_init(bp);
ptp_ocp_nmea_out_init(bp);
- ptp_ocp_sma_init(bp);
ptp_ocp_signal_init(bp);
err = ptp_ocp_attr_group_add(bp, fb_timecard_groups);
if (err)
return err;
- err = ptp_ocp_fb_set_pins(bp);
+ err = ptp_ocp_set_pins(bp);
if (err)
return err;
+ ptp_ocp_sma_init(bp);
return ptp_ocp_init_clock(bp);
}
@@ -2452,6 +2479,14 @@ ptp_ocp_register_resources(struct ptp_ocp *bp, kernel_ulong_t driver_data)
static void
ptp_ocp_art_sma_init(struct ptp_ocp *bp)
{
+ struct dpll_pin_properties prop = {
+ .board_label = NULL,
+ .type = DPLL_PIN_TYPE_EXT,
+ .capabilities = 0,
+ .freq_supported_num = ARRAY_SIZE(ptp_ocp_sma_freq),
+ .freq_supported = ptp_ocp_sma_freq,
+
+ };
u32 reg;
int i;
@@ -2466,16 +2501,16 @@ ptp_ocp_art_sma_init(struct ptp_ocp *bp)
bp->sma[2].default_fcn = 0x10; /* OUT: 10Mhz */
bp->sma[3].default_fcn = 0x02; /* OUT: PHC */
- /* If no SMA map, the pin functions and directions are fixed. */
- if (!bp->art_sma) {
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < OCP_SMA_NUM; i++) {
+ /* If no SMA map, the pin functions and directions are fixed. */
+ bp->sma[i].dpll_prop = prop;
+ bp->sma[i].dpll_prop.board_label =
+ bp->ptp_info.pin_config[i].name;
+ if (!bp->art_sma) {
bp->sma[i].fixed_fcn = true;
bp->sma[i].fixed_dir = true;
+ continue;
}
- return;
- }
-
- for (i = 0; i < 4; i++) {
reg = ioread32(&bp->art_sma->map[i].gpio);
switch (reg & 0xff) {
@@ -2486,9 +2521,13 @@ ptp_ocp_art_sma_init(struct ptp_ocp *bp)
case 1:
case 8:
bp->sma[i].mode = SMA_MODE_IN;
+ bp->sma[i].dpll_prop.capabilities =
+ DPLL_PIN_CAPABILITIES_DIRECTION_CAN_CHANGE;
break;
default:
bp->sma[i].mode = SMA_MODE_OUT;
+ bp->sma[i].dpll_prop.capabilities =
+ DPLL_PIN_CAPABILITIES_DIRECTION_CAN_CHANGE;
break;
}
}
@@ -2555,6 +2594,9 @@ ptp_ocp_art_board_init(struct ptp_ocp *bp, struct ocp_resource *r)
/* Enable MAC serial port during initialisation */
iowrite32(1, &bp->board_config->mro50_serial_activate);
+ err = ptp_ocp_set_pins(bp);
+ if (err)
+ return err;
ptp_ocp_sma_init(bp);
err = ptp_ocp_attr_group_add(bp, art_timecard_groups);
@@ -2696,16 +2738,9 @@ sma4_show(struct device *dev, struct device_attribute *attr, char *buf)
}
static int
-ptp_ocp_sma_store(struct ptp_ocp *bp, const char *buf, int sma_nr)
+ptp_ocp_sma_store_val(struct ptp_ocp *bp, int val, enum ptp_ocp_sma_mode mode, int sma_nr)
{
struct ptp_ocp_sma_connector *sma = &bp->sma[sma_nr - 1];
- enum ptp_ocp_sma_mode mode;
- int val;
-
- mode = sma->mode;
- val = sma_parse_inputs(bp->sma_op->tbl, buf, &mode);
- if (val < 0)
- return val;
if (sma->fixed_dir && (mode != sma->mode || val & SMA_DISABLE))
return -EOPNOTSUPP;
@@ -2740,6 +2775,20 @@ ptp_ocp_sma_store(struct ptp_ocp *bp, const char *buf, int sma_nr)
return val;
}
+static int
+ptp_ocp_sma_store(struct ptp_ocp *bp, const char *buf, int sma_nr)
+{
+ struct ptp_ocp_sma_connector *sma = &bp->sma[sma_nr - 1];
+ enum ptp_ocp_sma_mode mode;
+ int val;
+
+ mode = sma->mode;
+ val = sma_parse_inputs(bp->sma_op->tbl, buf, &mode);
+ if (val < 0)
+ return val;
+ return ptp_ocp_sma_store_val(bp, val, mode, sma_nr);
+}
+
static ssize_t
sma1_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
@@ -3834,9 +3883,8 @@ ptp_ocp_summary_show(struct seq_file *s, void *data)
strcpy(buf, "unknown");
break;
}
- val = ioread32(&bp->reg->status);
seq_printf(s, "%7s: %s, state: %s\n", "PHC src", buf,
- val & OCP_STATUS_IN_SYNC ? "sync" : "unsynced");
+ bp->sync ? "sync" : "unsynced");
if (!ptp_ocp_gettimex(&bp->ptp_info, &ts, &sts)) {
struct timespec64 sys_ts;
@@ -4067,7 +4115,6 @@ ptp_ocp_phc_info(struct ptp_ocp *bp)
{
struct timespec64 ts;
u32 version, select;
- bool sync;
version = ioread32(&bp->reg->version);
select = ioread32(&bp->reg->select);
@@ -4076,11 +4123,10 @@ ptp_ocp_phc_info(struct ptp_ocp *bp)
ptp_ocp_select_name_from_val(ptp_ocp_clock, select >> 16),
ptp_clock_index(bp->ptp));
- sync = ioread32(&bp->reg->status) & OCP_STATUS_IN_SYNC;
if (!ptp_ocp_gettimex(&bp->ptp_info, &ts, NULL))
dev_info(&bp->pdev->dev, "Time: %lld.%ld, %s\n",
ts.tv_sec, ts.tv_nsec,
- sync ? "in-sync" : "UNSYNCED");
+ bp->sync ? "in-sync" : "UNSYNCED");
}
static void
@@ -4177,12 +4223,168 @@ ptp_ocp_detach(struct ptp_ocp *bp)
device_unregister(&bp->dev);
}
+static int ptp_ocp_dpll_lock_status_get(const struct dpll_device *dpll,
+ void *priv,
+ enum dpll_lock_status *status,
+ struct netlink_ext_ack *extack)
+{
+ struct ptp_ocp *bp = priv;
+
+ *status = bp->sync ? DPLL_LOCK_STATUS_LOCKED : DPLL_LOCK_STATUS_UNLOCKED;
+
+ return 0;
+}
+
+static int ptp_ocp_dpll_state_get(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_device *dpll, void *priv,
+ enum dpll_pin_state *state,
+ struct netlink_ext_ack *extack)
+{
+ struct ptp_ocp *bp = priv;
+ int idx;
+
+ if (bp->pps_select) {
+ idx = ioread32(&bp->pps_select->gpio1);
+ *state = (&bp->sma[idx] == pin_priv) ? DPLL_PIN_STATE_CONNECTED :
+ DPLL_PIN_STATE_SELECTABLE;
+ return 0;
+ }
+ NL_SET_ERR_MSG(extack, "pin selection is not supported on current HW");
+ return -EINVAL;
+}
+
+static int ptp_ocp_dpll_mode_get(const struct dpll_device *dpll, void *priv,
+ u32 *mode, struct netlink_ext_ack *extack)
+{
+ *mode = DPLL_MODE_AUTOMATIC;
+ return 0;
+}
+
+static bool ptp_ocp_dpll_mode_supported(const struct dpll_device *dpll,
+ void *priv, const enum dpll_mode mode,
+ struct netlink_ext_ack *extack)
+{
+ return mode == DPLL_MODE_AUTOMATIC;
+}
+
+static int ptp_ocp_dpll_direction_get(const struct dpll_pin *pin,
+ void *pin_priv,
+ const struct dpll_device *dpll,
+ void *priv,
+ enum dpll_pin_direction *direction,
+ struct netlink_ext_ack *extack)
+{
+ struct ptp_ocp_sma_connector *sma = pin_priv;
+
+ *direction = sma->mode == SMA_MODE_IN ?
+ DPLL_PIN_DIRECTION_INPUT :
+ DPLL_PIN_DIRECTION_OUTPUT;
+ return 0;
+}
+
+static int ptp_ocp_dpll_direction_set(const struct dpll_pin *pin,
+ void *pin_priv,
+ const struct dpll_device *dpll,
+ void *dpll_priv,
+ enum dpll_pin_direction direction,
+ struct netlink_ext_ack *extack)
+{
+ struct ptp_ocp_sma_connector *sma = pin_priv;
+ struct ptp_ocp *bp = dpll_priv;
+ enum ptp_ocp_sma_mode mode;
+ int sma_nr = (sma - bp->sma);
+
+ if (sma->fixed_dir)
+ return -EOPNOTSUPP;
+ mode = direction == DPLL_PIN_DIRECTION_INPUT ?
+ SMA_MODE_IN : SMA_MODE_OUT;
+ return ptp_ocp_sma_store_val(bp, 0, mode, sma_nr);
+}
+
+static int ptp_ocp_dpll_frequency_set(const struct dpll_pin *pin,
+ void *pin_priv,
+ const struct dpll_device *dpll,
+ void *dpll_priv, u64 frequency,
+ struct netlink_ext_ack *extack)
+{
+ struct ptp_ocp_sma_connector *sma = pin_priv;
+ struct ptp_ocp *bp = dpll_priv;
+ const struct ocp_selector *tbl;
+ int sma_nr = (sma - bp->sma);
+ int i;
+
+ if (sma->fixed_fcn)
+ return -EOPNOTSUPP;
+
+ tbl = bp->sma_op->tbl[sma->mode];
+ for (i = 0; tbl[i].name; i++)
+ if (tbl[i].frequency == frequency)
+ return ptp_ocp_sma_store_val(bp, i, sma->mode, sma_nr);
+ return -EINVAL;
+}
+
+static int ptp_ocp_dpll_frequency_get(const struct dpll_pin *pin,
+ void *pin_priv,
+ const struct dpll_device *dpll,
+ void *dpll_priv, u64 *frequency,
+ struct netlink_ext_ack *extack)
+{
+ struct ptp_ocp_sma_connector *sma = pin_priv;
+ struct ptp_ocp *bp = dpll_priv;
+ const struct ocp_selector *tbl;
+ int sma_nr = (sma - bp->sma);
+ u32 val;
+ int i;
+
+ val = bp->sma_op->get(bp, sma_nr);
+ tbl = bp->sma_op->tbl[sma->mode];
+ for (i = 0; tbl[i].name; i++)
+ if (val == tbl[i].value) {
+ *frequency = tbl[i].frequency;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static const struct dpll_device_ops dpll_ops = {
+ .lock_status_get = ptp_ocp_dpll_lock_status_get,
+ .mode_get = ptp_ocp_dpll_mode_get,
+ .mode_supported = ptp_ocp_dpll_mode_supported,
+};
+
+static const struct dpll_pin_ops dpll_pins_ops = {
+ .frequency_get = ptp_ocp_dpll_frequency_get,
+ .frequency_set = ptp_ocp_dpll_frequency_set,
+ .direction_get = ptp_ocp_dpll_direction_get,
+ .direction_set = ptp_ocp_dpll_direction_set,
+ .state_on_dpll_get = ptp_ocp_dpll_state_get,
+};
+
+static void
+ptp_ocp_sync_work(struct work_struct *work)
+{
+ struct ptp_ocp *bp;
+ bool sync;
+
+ bp = container_of(work, struct ptp_ocp, sync_work.work);
+ sync = !!(ioread32(&bp->reg->status) & OCP_STATUS_IN_SYNC);
+
+ if (bp->sync != sync)
+ dpll_device_change_ntf(bp->dpll);
+
+ bp->sync = sync;
+
+ queue_delayed_work(system_power_efficient_wq, &bp->sync_work, HZ);
+}
+
static int
ptp_ocp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct devlink *devlink;
struct ptp_ocp *bp;
- int err;
+ int err, i;
+ u64 clkid;
devlink = devlink_alloc(&ptp_ocp_devlink_ops, sizeof(*bp), &pdev->dev);
if (!devlink) {
@@ -4201,6 +4403,8 @@ ptp_ocp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (err)
goto out_disable;
+ INIT_DELAYED_WORK(&bp->sync_work, ptp_ocp_sync_work);
+
/* compat mode.
* Older FPGA firmware only returns 2 irq's.
* allow this - if not all of the IRQ's are returned, skip the
@@ -4232,8 +4436,43 @@ ptp_ocp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
ptp_ocp_info(bp);
devlink_register(devlink);
- return 0;
+ clkid = pci_get_dsn(pdev);
+ bp->dpll = dpll_device_get(clkid, 0, THIS_MODULE);
+ if (IS_ERR(bp->dpll)) {
+ err = PTR_ERR(bp->dpll);
+ dev_err(&pdev->dev, "dpll_device_alloc failed\n");
+ goto out;
+ }
+
+ err = dpll_device_register(bp->dpll, DPLL_TYPE_PPS, &dpll_ops, bp);
+ if (err)
+ goto out;
+
+ for (i = 0; i < OCP_SMA_NUM; i++) {
+ bp->sma[i].dpll_pin = dpll_pin_get(clkid, i, THIS_MODULE, &bp->sma[i].dpll_prop);
+ if (IS_ERR(bp->sma[i].dpll_pin)) {
+ err = PTR_ERR(bp->dpll);
+ goto out_dpll;
+ }
+
+ err = dpll_pin_register(bp->dpll, bp->sma[i].dpll_pin, &dpll_pins_ops,
+ &bp->sma[i]);
+ if (err) {
+ dpll_pin_put(bp->sma[i].dpll_pin);
+ goto out_dpll;
+ }
+ }
+ queue_delayed_work(system_power_efficient_wq, &bp->sync_work, HZ);
+
+ return 0;
+out_dpll:
+ while (i) {
+ --i;
+ dpll_pin_unregister(bp->dpll, bp->sma[i].dpll_pin, &dpll_pins_ops, &bp->sma[i]);
+ dpll_pin_put(bp->sma[i].dpll_pin);
+ }
+ dpll_device_put(bp->dpll);
out:
ptp_ocp_detach(bp);
out_disable:
@@ -4248,7 +4487,17 @@ ptp_ocp_remove(struct pci_dev *pdev)
{
struct ptp_ocp *bp = pci_get_drvdata(pdev);
struct devlink *devlink = priv_to_devlink(bp);
+ int i;
+ cancel_delayed_work_sync(&bp->sync_work);
+ for (i = 0; i < OCP_SMA_NUM; i++) {
+ if (bp->sma[i].dpll_pin) {
+ dpll_pin_unregister(bp->dpll, bp->sma[i].dpll_pin, &dpll_pins_ops, bp);
+ dpll_pin_put(bp->sma[i].dpll_pin);
+ }
+ }
+ dpll_device_unregister(bp->dpll, &dpll_ops, bp);
+ dpll_device_put(bp->dpll);
devlink_unregister(devlink);
ptp_ocp_detach(bp);
pci_disable_device(pdev);
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 817d377a3f36..83711aad855c 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -114,6 +114,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
struct sk_buff *skb;
unsigned out, in;
size_t nbytes;
+ u32 offset;
int head;
skb = virtio_vsock_skb_dequeue(&vsock->send_pkt_queue);
@@ -156,7 +157,8 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
}
iov_iter_init(&iov_iter, ITER_DEST, &vq->iov[out], in, iov_len);
- payload_len = skb->len;
+ offset = VIRTIO_VSOCK_SKB_CB(skb)->offset;
+ payload_len = skb->len - offset;
hdr = virtio_vsock_hdr(skb);
/* If the packet is greater than the space available in the
@@ -197,8 +199,10 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
break;
}
- nbytes = copy_to_iter(skb->data, payload_len, &iov_iter);
- if (nbytes != payload_len) {
+ if (skb_copy_datagram_iter(skb,
+ offset,
+ &iov_iter,
+ payload_len)) {
kfree_skb(skb);
vq_err(vq, "Faulted on copying pkt buf\n");
break;
@@ -212,13 +216,13 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
vhost_add_used(vq, head, sizeof(*hdr) + payload_len);
added = true;
- skb_pull(skb, payload_len);
+ VIRTIO_VSOCK_SKB_CB(skb)->offset += payload_len;
total_len += payload_len;
/* If we didn't send all the payload we can requeue the packet
* to send it with the next available buffer.
*/
- if (skb->len > 0) {
+ if (VIRTIO_VSOCK_SKB_CB(skb)->offset < skb->len) {
hdr->flags |= cpu_to_le32(flags_to_restore);
/* We are queueing the same skb to handle
diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index d0807ad43f93..dd71d3009771 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -240,6 +240,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource);
#define VIRTCHNL_VF_OFFLOAD_REQ_QUEUES BIT(6)
/* used to negotiate communicating link speeds in Mbps */
#define VIRTCHNL_VF_CAP_ADV_LINK_SPEED BIT(7)
+#define VIRTCHNL_VF_OFFLOAD_CRC BIT(10)
#define VIRTCHNL_VF_OFFLOAD_VLAN_V2 BIT(15)
#define VIRTCHNL_VF_OFFLOAD_VLAN BIT(16)
#define VIRTCHNL_VF_OFFLOAD_RX_POLLING BIT(17)
@@ -295,7 +296,13 @@ VIRTCHNL_CHECK_STRUCT_LEN(24, virtchnl_txq_info);
/* VIRTCHNL_OP_CONFIG_RX_QUEUE
* VF sends this message to set up parameters for one RX queue.
* External data buffer contains one instance of virtchnl_rxq_info.
- * PF configures requested queue and returns a status code.
+ * PF configures requested queue and returns a status code. The
+ * crc_disable flag disables CRC stripping on the VF. Setting
+ * the crc_disable flag to 1 will disable CRC stripping for each
+ * queue in the VF where the flag is set. The VIRTCHNL_VF_OFFLOAD_CRC
+ * offload must have been set prior to sending this info or the PF
+ * will ignore the request. This flag should be set the same for
+ * all of the queues for a VF.
*/
/* Rx queue config info */
@@ -307,7 +314,7 @@ struct virtchnl_rxq_info {
u16 splithdr_enabled; /* deprecated with AVF 1.0 */
u32 databuffer_size;
u32 max_pkt_size;
- u8 pad0;
+ u8 crc_disable;
u8 rxdid;
u8 pad1[2];
u64 dma_ring_addr;
diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h
index 7b121bd780eb..0985221d5478 100644
--- a/include/linux/bpf-cgroup-defs.h
+++ b/include/linux/bpf-cgroup-defs.h
@@ -28,19 +28,24 @@ enum cgroup_bpf_attach_type {
CGROUP_INET6_BIND,
CGROUP_INET4_CONNECT,
CGROUP_INET6_CONNECT,
+ CGROUP_UNIX_CONNECT,
CGROUP_INET4_POST_BIND,
CGROUP_INET6_POST_BIND,
CGROUP_UDP4_SENDMSG,
CGROUP_UDP6_SENDMSG,
+ CGROUP_UNIX_SENDMSG,
CGROUP_SYSCTL,
CGROUP_UDP4_RECVMSG,
CGROUP_UDP6_RECVMSG,
+ CGROUP_UNIX_RECVMSG,
CGROUP_GETSOCKOPT,
CGROUP_SETSOCKOPT,
CGROUP_INET4_GETPEERNAME,
CGROUP_INET6_GETPEERNAME,
+ CGROUP_UNIX_GETPEERNAME,
CGROUP_INET4_GETSOCKNAME,
CGROUP_INET6_GETSOCKNAME,
+ CGROUP_UNIX_GETSOCKNAME,
CGROUP_INET_SOCK_RELEASE,
CGROUP_LSM_START,
CGROUP_LSM_END = CGROUP_LSM_START + CGROUP_LSM_NUM - 1,
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 8506690dbb9c..98b8cea904fe 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -48,19 +48,24 @@ to_cgroup_bpf_attach_type(enum bpf_attach_type attach_type)
CGROUP_ATYPE(CGROUP_INET6_BIND);
CGROUP_ATYPE(CGROUP_INET4_CONNECT);
CGROUP_ATYPE(CGROUP_INET6_CONNECT);
+ CGROUP_ATYPE(CGROUP_UNIX_CONNECT);
CGROUP_ATYPE(CGROUP_INET4_POST_BIND);
CGROUP_ATYPE(CGROUP_INET6_POST_BIND);
CGROUP_ATYPE(CGROUP_UDP4_SENDMSG);
CGROUP_ATYPE(CGROUP_UDP6_SENDMSG);
+ CGROUP_ATYPE(CGROUP_UNIX_SENDMSG);
CGROUP_ATYPE(CGROUP_SYSCTL);
CGROUP_ATYPE(CGROUP_UDP4_RECVMSG);
CGROUP_ATYPE(CGROUP_UDP6_RECVMSG);
+ CGROUP_ATYPE(CGROUP_UNIX_RECVMSG);
CGROUP_ATYPE(CGROUP_GETSOCKOPT);
CGROUP_ATYPE(CGROUP_SETSOCKOPT);
CGROUP_ATYPE(CGROUP_INET4_GETPEERNAME);
CGROUP_ATYPE(CGROUP_INET6_GETPEERNAME);
+ CGROUP_ATYPE(CGROUP_UNIX_GETPEERNAME);
CGROUP_ATYPE(CGROUP_INET4_GETSOCKNAME);
CGROUP_ATYPE(CGROUP_INET6_GETSOCKNAME);
+ CGROUP_ATYPE(CGROUP_UNIX_GETSOCKNAME);
CGROUP_ATYPE(CGROUP_INET_SOCK_RELEASE);
default:
return CGROUP_BPF_ATTACH_TYPE_INVALID;
@@ -120,6 +125,7 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
struct sockaddr *uaddr,
+ int *uaddrlen,
enum cgroup_bpf_attach_type atype,
void *t_ctx,
u32 *flags);
@@ -230,22 +236,22 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) \
BPF_CGROUP_RUN_SK_PROG(sk, CGROUP_INET6_POST_BIND)
-#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) \
+#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, atype) \
({ \
int __ret = 0; \
if (cgroup_bpf_enabled(atype)) \
- __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \
- NULL, NULL); \
+ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \
+ atype, NULL, NULL); \
__ret; \
})
-#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) \
+#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, atype, t_ctx) \
({ \
int __ret = 0; \
if (cgroup_bpf_enabled(atype)) { \
lock_sock(sk); \
- __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \
- t_ctx, NULL); \
+ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \
+ atype, t_ctx, NULL); \
release_sock(sk); \
} \
__ret; \
@@ -256,14 +262,14 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
* (at bit position 0) is to indicate CAP_NET_BIND_SERVICE capability check
* should be bypassed (BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE).
*/
-#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, atype, bind_flags) \
+#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, uaddrlen, atype, bind_flags) \
({ \
u32 __flags = 0; \
int __ret = 0; \
if (cgroup_bpf_enabled(atype)) { \
lock_sock(sk); \
- __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \
- NULL, &__flags); \
+ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \
+ atype, NULL, &__flags); \
release_sock(sk); \
if (__flags & BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE) \
*bind_flags |= BIND_NO_CAP_NET_BIND_SERVICE; \
@@ -276,29 +282,38 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
cgroup_bpf_enabled(CGROUP_INET6_CONNECT)) && \
(sk)->sk_prot->pre_connect)
-#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG(sk, uaddr, CGROUP_INET4_CONNECT)
+#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, uaddrlen) \
+ BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_INET4_CONNECT)
-#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG(sk, uaddr, CGROUP_INET6_CONNECT)
+#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen) \
+ BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT)
-#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_INET4_CONNECT, NULL)
+#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen) \
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET4_CONNECT, NULL)
-#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_INET6_CONNECT, NULL)
+#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) \
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT, NULL)
-#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP4_SENDMSG, t_ctx)
+#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, uaddrlen) \
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_CONNECT, NULL)
-#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP6_SENDMSG, t_ctx)
+#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_SENDMSG, t_ctx)
-#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP4_RECVMSG, NULL)
+#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_SENDMSG, t_ctx)
-#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP6_RECVMSG, NULL)
+#define BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_SENDMSG, t_ctx)
+
+#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) \
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_RECVMSG, NULL)
+
+#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) \
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_RECVMSG, NULL)
+
+#define BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, uaddr, uaddrlen) \
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_RECVMSG, NULL)
/* The SOCK_OPS"_SK" macro should be used when sock_ops->sk is not a
* fullsock and its parent fullsock cannot be traced by
@@ -477,24 +492,27 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
}
#define cgroup_bpf_enabled(atype) (0)
-#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) ({ 0; })
-#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) ({ 0; })
+#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, atype, t_ctx) ({ 0; })
+#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, atype) ({ 0; })
#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, atype, flags) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, uaddrlen, atype, flags) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, uaddrlen) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; })
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(atype, major, minor, access) ({ 0; })
#define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; })
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 49f8b691496c..d3c51a507508 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -55,8 +55,8 @@ struct cgroup;
extern struct idr btf_idr;
extern spinlock_t btf_idr_lock;
extern struct kobject *btf_kobj;
-extern struct bpf_mem_alloc bpf_global_ma;
-extern bool bpf_global_ma_set;
+extern struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma;
+extern bool bpf_global_ma_set, bpf_global_percpu_ma_set;
typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64);
typedef int (*bpf_iter_init_seq_priv_t)(void *private_data,
@@ -180,14 +180,15 @@ enum btf_field_type {
BPF_TIMER = (1 << 1),
BPF_KPTR_UNREF = (1 << 2),
BPF_KPTR_REF = (1 << 3),
- BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF,
- BPF_LIST_HEAD = (1 << 4),
- BPF_LIST_NODE = (1 << 5),
- BPF_RB_ROOT = (1 << 6),
- BPF_RB_NODE = (1 << 7),
+ BPF_KPTR_PERCPU = (1 << 4),
+ BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF | BPF_KPTR_PERCPU,
+ BPF_LIST_HEAD = (1 << 5),
+ BPF_LIST_NODE = (1 << 6),
+ BPF_RB_ROOT = (1 << 7),
+ BPF_RB_NODE = (1 << 8),
BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD |
BPF_RB_NODE | BPF_RB_ROOT,
- BPF_REFCOUNT = (1 << 8),
+ BPF_REFCOUNT = (1 << 9),
};
typedef void (*btf_dtor_kfunc_t)(void *);
@@ -300,6 +301,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type)
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
return "kptr";
+ case BPF_KPTR_PERCPU:
+ return "percpu_kptr";
case BPF_LIST_HEAD:
return "bpf_list_head";
case BPF_LIST_NODE:
@@ -325,6 +328,7 @@ static inline u32 btf_field_type_size(enum btf_field_type type)
return sizeof(struct bpf_timer);
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
+ case BPF_KPTR_PERCPU:
return sizeof(u64);
case BPF_LIST_HEAD:
return sizeof(struct bpf_list_head);
@@ -351,6 +355,7 @@ static inline u32 btf_field_type_align(enum btf_field_type type)
return __alignof__(struct bpf_timer);
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
+ case BPF_KPTR_PERCPU:
return __alignof__(u64);
case BPF_LIST_HEAD:
return __alignof__(struct bpf_list_head);
@@ -389,6 +394,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr)
case BPF_TIMER:
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
+ case BPF_KPTR_PERCPU:
break;
default:
WARN_ON_ONCE(1);
@@ -1029,6 +1035,11 @@ struct btf_func_model {
*/
#define BPF_TRAMP_F_SHARE_IPMODIFY BIT(6)
+/* Indicate that current trampoline is in a tail call context. Then, it has to
+ * cache and restore tail_call_cnt to avoid infinite tail call loop.
+ */
+#define BPF_TRAMP_F_TAIL_CALL_CTX BIT(7)
+
/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
* bytes on x86.
*/
@@ -1378,6 +1389,7 @@ struct bpf_prog_aux {
u32 stack_depth;
u32 id;
u32 func_cnt; /* used by non-func prog as the number of func progs */
+ u32 real_func_cnt; /* includes hidden progs, only used for JIT and freeing progs */
u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */
u32 attach_btf_id; /* in-kernel BTF type id to attach to */
u32 ctx_arg_info_size;
@@ -1398,6 +1410,8 @@ struct bpf_prog_aux {
bool sleepable;
bool tail_call_reachable;
bool xdp_has_frags;
+ bool exception_cb;
+ bool exception_boundary;
/* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
const struct btf_type *attach_func_proto;
/* function name for valid attach_btf_id */
@@ -1420,6 +1434,7 @@ struct bpf_prog_aux {
int cgroup_atype; /* enum cgroup_bpf_attach_type */
struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
char name[BPF_OBJ_NAME_LEN];
+ unsigned int (*bpf_exception_cb)(u64 cookie, u64 sp, u64 bp);
#ifdef CONFIG_SECURITY
void *security;
#endif
@@ -2149,12 +2164,12 @@ static inline bool bpf_allow_uninit_stack(void)
static inline bool bpf_bypass_spec_v1(void)
{
- return perfmon_capable();
+ return cpu_mitigations_off() || perfmon_capable();
}
static inline bool bpf_bypass_spec_v4(void)
{
- return perfmon_capable();
+ return cpu_mitigations_off() || perfmon_capable();
}
int bpf_map_new_fd(struct bpf_map *map, int flags);
@@ -2407,9 +2422,11 @@ int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog,
int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog,
struct bpf_reg_state *regs);
int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
- struct bpf_reg_state *reg);
+ struct bpf_reg_state *reg, bool is_ex_cb);
int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog,
struct btf *btf, const struct btf_type *t);
+const char *btf_find_decl_tag_value(const struct btf *btf, const struct btf_type *pt,
+ int comp_idx, const char *tag_key);
struct bpf_prog *bpf_prog_by_id(u32 id);
struct bpf_link *bpf_link_by_id(u32 id);
@@ -2905,6 +2922,22 @@ static inline int sock_map_bpf_prog_query(const union bpf_attr *attr,
#endif /* CONFIG_BPF_SYSCALL */
#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
+static __always_inline void
+bpf_prog_inc_misses_counters(const struct bpf_prog_array *array)
+{
+ const struct bpf_prog_array_item *item;
+ struct bpf_prog *prog;
+
+ if (unlikely(!array))
+ return;
+
+ item = &array->items[0];
+ while ((prog = READ_ONCE(item->prog))) {
+ bpf_prog_inc_misses_counter(prog);
+ item++;
+ }
+}
+
#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
void bpf_sk_reuseport_detach(struct sock *sk);
int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
@@ -3183,4 +3216,9 @@ static inline gfp_t bpf_memcg_flags(gfp_t flags)
return flags;
}
+static inline bool bpf_is_subprog(const struct bpf_prog *prog)
+{
+ return prog->aux->func_idx != 0;
+}
+
#endif /* _LINUX_BPF_H */
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index b6e58dab8e27..94ec766432f5 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -300,6 +300,7 @@ struct bpf_func_state {
bool in_callback_fn;
struct tnum callback_ret_range;
bool in_async_callback_fn;
+ bool in_exception_callback_fn;
/* The following fields should be last. See copy_func_state() */
int acquired_refs;
@@ -480,6 +481,7 @@ struct bpf_insn_aux_data {
bool zext_dst; /* this insn zero extends dst reg */
bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */
bool is_iter_next; /* bpf_iter_<type>_next() kfunc call */
+ bool call_with_percpu_alloc_ptr; /* {this,per}_cpu_ptr() with prog percpu alloc */
u8 alu_state; /* used in combination with alu_limit */
/* below fields are initialized once */
@@ -540,7 +542,9 @@ struct bpf_subprog_info {
bool has_tail_call;
bool tail_call_reachable;
bool has_ld_abs;
+ bool is_cb;
bool is_async_cb;
+ bool is_exception_cb;
};
struct bpf_verifier_env;
@@ -587,6 +591,8 @@ struct bpf_verifier_env {
u32 used_map_cnt; /* number of used maps */
u32 used_btf_cnt; /* number of used BTF objects */
u32 id_gen; /* used to generate unique reg IDs */
+ u32 hidden_subprog_cnt; /* number of hidden subprogs */
+ int exception_callback_subprog;
bool explore_alu_limits;
bool allow_ptr_leaks;
bool allow_uninit_stack;
@@ -594,10 +600,11 @@ struct bpf_verifier_env {
bool bypass_spec_v1;
bool bypass_spec_v4;
bool seen_direct_write;
+ bool seen_exception;
struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
const struct bpf_line_info *prev_linfo;
struct bpf_verifier_log log;
- struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1];
+ struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 2]; /* max + 2 for the fake and exception subprogs */
union {
struct bpf_idmap idmap_scratch;
struct bpf_idset idset_scratch;
diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h
index b658961156a0..7a9a40163c0f 100644
--- a/include/linux/ceph/mon_client.h
+++ b/include/linux/ceph/mon_client.h
@@ -19,7 +19,7 @@ struct ceph_monmap {
struct ceph_fsid fsid;
u32 epoch;
u32 num_mon;
- struct ceph_entity_inst mon_inst[];
+ struct ceph_entity_inst mon_inst[] __counted_by(num_mon);
};
struct ceph_mon_client;
diff --git a/include/linux/dpll.h b/include/linux/dpll.h
new file mode 100644
index 000000000000..bbc480cd2932
--- /dev/null
+++ b/include/linux/dpll.h
@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates
+ * Copyright (c) 2023 Intel and affiliates
+ */
+
+#ifndef __DPLL_H__
+#define __DPLL_H__
+
+#include <uapi/linux/dpll.h>
+#include <linux/device.h>
+#include <linux/netlink.h>
+
+struct dpll_device;
+struct dpll_pin;
+
+struct dpll_device_ops {
+ int (*mode_get)(const struct dpll_device *dpll, void *dpll_priv,
+ enum dpll_mode *mode, struct netlink_ext_ack *extack);
+ bool (*mode_supported)(const struct dpll_device *dpll, void *dpll_priv,
+ const enum dpll_mode mode,
+ struct netlink_ext_ack *extack);
+ int (*lock_status_get)(const struct dpll_device *dpll, void *dpll_priv,
+ enum dpll_lock_status *status,
+ struct netlink_ext_ack *extack);
+ int (*temp_get)(const struct dpll_device *dpll, void *dpll_priv,
+ s32 *temp, struct netlink_ext_ack *extack);
+};
+
+struct dpll_pin_ops {
+ int (*frequency_set)(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_device *dpll, void *dpll_priv,
+ const u64 frequency,
+ struct netlink_ext_ack *extack);
+ int (*frequency_get)(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_device *dpll, void *dpll_priv,
+ u64 *frequency, struct netlink_ext_ack *extack);
+ int (*direction_set)(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_device *dpll, void *dpll_priv,
+ const enum dpll_pin_direction direction,
+ struct netlink_ext_ack *extack);
+ int (*direction_get)(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_device *dpll, void *dpll_priv,
+ enum dpll_pin_direction *direction,
+ struct netlink_ext_ack *extack);
+ int (*state_on_pin_get)(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_pin *parent_pin,
+ void *parent_pin_priv,
+ enum dpll_pin_state *state,
+ struct netlink_ext_ack *extack);
+ int (*state_on_dpll_get)(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_device *dpll,
+ void *dpll_priv, enum dpll_pin_state *state,
+ struct netlink_ext_ack *extack);
+ int (*state_on_pin_set)(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_pin *parent_pin,
+ void *parent_pin_priv,
+ const enum dpll_pin_state state,
+ struct netlink_ext_ack *extack);
+ int (*state_on_dpll_set)(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_device *dpll,
+ void *dpll_priv,
+ const enum dpll_pin_state state,
+ struct netlink_ext_ack *extack);
+ int (*prio_get)(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_device *dpll, void *dpll_priv,
+ u32 *prio, struct netlink_ext_ack *extack);
+ int (*prio_set)(const struct dpll_pin *pin, void *pin_priv,
+ const struct dpll_device *dpll, void *dpll_priv,
+ const u32 prio, struct netlink_ext_ack *extack);
+};
+
+struct dpll_pin_frequency {
+ u64 min;
+ u64 max;
+};
+
+#define DPLL_PIN_FREQUENCY_RANGE(_min, _max) \
+ { \
+ .min = _min, \
+ .max = _max, \
+ }
+
+#define DPLL_PIN_FREQUENCY(_val) DPLL_PIN_FREQUENCY_RANGE(_val, _val)
+#define DPLL_PIN_FREQUENCY_1PPS \
+ DPLL_PIN_FREQUENCY(DPLL_PIN_FREQUENCY_1_HZ)
+#define DPLL_PIN_FREQUENCY_10MHZ \
+ DPLL_PIN_FREQUENCY(DPLL_PIN_FREQUENCY_10_MHZ)
+#define DPLL_PIN_FREQUENCY_IRIG_B \
+ DPLL_PIN_FREQUENCY(DPLL_PIN_FREQUENCY_10_KHZ)
+#define DPLL_PIN_FREQUENCY_DCF77 \
+ DPLL_PIN_FREQUENCY(DPLL_PIN_FREQUENCY_77_5_KHZ)
+
+struct dpll_pin_properties {
+ const char *board_label;
+ const char *panel_label;
+ const char *package_label;
+ enum dpll_pin_type type;
+ unsigned long capabilities;
+ u32 freq_supported_num;
+ struct dpll_pin_frequency *freq_supported;
+};
+
+#if IS_ENABLED(CONFIG_DPLL)
+size_t dpll_msg_pin_handle_size(struct dpll_pin *pin);
+int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin);
+#else
+static inline size_t dpll_msg_pin_handle_size(struct dpll_pin *pin)
+{
+ return 0;
+}
+
+static inline int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin)
+{
+ return 0;
+}
+#endif
+
+struct dpll_device *
+dpll_device_get(u64 clock_id, u32 dev_driver_id, struct module *module);
+
+void dpll_device_put(struct dpll_device *dpll);
+
+int dpll_device_register(struct dpll_device *dpll, enum dpll_type type,
+ const struct dpll_device_ops *ops, void *priv);
+
+void dpll_device_unregister(struct dpll_device *dpll,
+ const struct dpll_device_ops *ops, void *priv);
+
+struct dpll_pin *
+dpll_pin_get(u64 clock_id, u32 dev_driver_id, struct module *module,
+ const struct dpll_pin_properties *prop);
+
+int dpll_pin_register(struct dpll_device *dpll, struct dpll_pin *pin,
+ const struct dpll_pin_ops *ops, void *priv);
+
+void dpll_pin_unregister(struct dpll_device *dpll, struct dpll_pin *pin,
+ const struct dpll_pin_ops *ops, void *priv);
+
+void dpll_pin_put(struct dpll_pin *pin);
+
+int dpll_pin_on_pin_register(struct dpll_pin *parent, struct dpll_pin *pin,
+ const struct dpll_pin_ops *ops, void *priv);
+
+void dpll_pin_on_pin_unregister(struct dpll_pin *parent, struct dpll_pin *pin,
+ const struct dpll_pin_ops *ops, void *priv);
+
+int dpll_device_change_ntf(struct dpll_device *dpll);
+
+int dpll_pin_change_ntf(struct dpll_pin *pin);
+
+#endif
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 761af6b3cf2b..bcd2bc15ff56 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -117,21 +117,25 @@ struct ctl_table_header;
/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */
-#define BPF_ALU64_IMM(OP, DST, IMM) \
+#define BPF_ALU64_IMM_OFF(OP, DST, IMM, OFF) \
((struct bpf_insn) { \
.code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \
.dst_reg = DST, \
.src_reg = 0, \
- .off = 0, \
+ .off = OFF, \
.imm = IMM })
+#define BPF_ALU64_IMM(OP, DST, IMM) \
+ BPF_ALU64_IMM_OFF(OP, DST, IMM, 0)
-#define BPF_ALU32_IMM(OP, DST, IMM) \
+#define BPF_ALU32_IMM_OFF(OP, DST, IMM, OFF) \
((struct bpf_insn) { \
.code = BPF_ALU | BPF_OP(OP) | BPF_K, \
.dst_reg = DST, \
.src_reg = 0, \
- .off = 0, \
+ .off = OFF, \
.imm = IMM })
+#define BPF_ALU32_IMM(OP, DST, IMM) \
+ BPF_ALU32_IMM_OFF(OP, DST, IMM, 0)
/* Endianess conversion, cpu_to_{l,b}e(), {l,b}e_to_cpu() */
@@ -143,6 +147,16 @@ struct ctl_table_header;
.off = 0, \
.imm = LEN })
+/* Byte Swap, bswap16/32/64 */
+
+#define BPF_BSWAP(DST, LEN) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_END | BPF_SRC(BPF_TO_LE), \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = LEN })
+
/* Short form of mov, dst_reg = src_reg */
#define BPF_MOV64_REG(DST, SRC) \
@@ -179,6 +193,24 @@ struct ctl_table_header;
.off = 0, \
.imm = IMM })
+/* Short form of movsx, dst_reg = (s8,s16,s32)src_reg */
+
+#define BPF_MOVSX64_REG(DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_MOV | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+#define BPF_MOVSX32_REG(DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_MOV | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
/* Special form of mov32, used for doing explicit zero extension on dst. */
#define BPF_ZEXT_REG(DST) \
((struct bpf_insn) { \
@@ -263,6 +295,16 @@ static inline bool insn_is_zext(const struct bpf_insn *insn)
.off = OFF, \
.imm = 0 })
+/* Memory load, dst_reg = *(signed size *) (src_reg + off16) */
+
+#define BPF_LDX_MEMSX(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEMSX, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
/* Memory store, *(uint *) (dst_reg + off16) = src_reg */
#define BPF_STX_MEM(SIZE, DST, SRC, OFF) \
@@ -694,7 +736,7 @@ static inline void bpf_compute_and_save_data_end(
cb->data_end = skb->data + skb_headlen(skb);
}
-/* Restore data saved by bpf_compute_data_pointers(). */
+/* Restore data saved by bpf_compute_and_save_data_end(). */
static inline void bpf_restore_data_end(
struct sk_buff *skb, void *saved_data_end)
{
@@ -912,6 +954,8 @@ bool bpf_jit_needs_zext(void);
bool bpf_jit_supports_subprog_tailcalls(void);
bool bpf_jit_supports_kfunc_call(void);
bool bpf_jit_supports_far_kfunc_call(void);
+bool bpf_jit_supports_exceptions(void);
+void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie);
bool bpf_helper_changes_pkt_data(void *func);
static inline bool bpf_dump_raw_ok(const struct cred *cred)
@@ -1127,6 +1171,7 @@ const char *__bpf_address_lookup(unsigned long addr, unsigned long *size,
bool is_bpf_text_address(unsigned long addr);
int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
char *sym);
+struct bpf_prog *bpf_prog_ksym_find(unsigned long addr);
static inline const char *
bpf_address_lookup(unsigned long addr, unsigned long *size,
@@ -1194,6 +1239,11 @@ static inline int bpf_get_kallsym(unsigned int symnum, unsigned long *value,
return -ERANGE;
}
+static inline struct bpf_prog *bpf_prog_ksym_find(unsigned long addr)
+{
+ return NULL;
+}
+
static inline const char *
bpf_address_lookup(unsigned long addr, unsigned long *size,
unsigned long *off, char **modname, char *sym)
@@ -1285,6 +1335,7 @@ struct bpf_sock_addr_kern {
*/
u64 tmp_reg;
void *t_ctx; /* Attach type specific context. */
+ u32 uaddrlen;
};
struct bpf_sock_ops_kern {
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index af8a771a053c..e400ff757f13 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -213,28 +213,9 @@ struct ipv6_pinfo {
__be32 flow_label;
__u32 frag_size;
- /*
- * Packed in 16bits.
- * Omit one shift by putting the signed field at MSB.
- */
-#if defined(__BIG_ENDIAN_BITFIELD)
- __s16 hop_limit:9;
- __u16 __unused_1:7;
-#else
- __u16 __unused_1:7;
- __s16 hop_limit:9;
-#endif
+ s16 hop_limit;
+ u8 mcast_hops;
-#if defined(__BIG_ENDIAN_BITFIELD)
- /* Packed in 16bits. */
- __s16 mcast_hops:9;
- __u16 __unused_2:6,
- mc_loop:1;
-#else
- __u16 mc_loop:1,
- __unused_2:6;
- __s16 mcast_hops:9;
-#endif
int ucast_oif;
int mcast_oif;
@@ -262,21 +243,11 @@ struct ipv6_pinfo {
} rxopt;
/* sockopt flags */
- __u16 recverr:1,
- sndflow:1,
- repflow:1,
- pmtudisc:3,
- padding:1, /* 1 bit hole */
- srcprefs:3, /* 001: prefer temporary address
+ __u8 srcprefs; /* 001: prefer temporary address
* 010: prefer public address
* 100: prefer care-of address
*/
- dontfrag:1,
- autoflowlabel:1,
- autoflowlabel_set:1,
- mc_all:1,
- recverr_rfc4884:1,
- rtalert_isolate:1;
+ __u8 pmtudisc;
__u8 min_hopcount;
__u8 tclass;
__be32 rcv_flowinfo;
@@ -293,6 +264,18 @@ struct ipv6_pinfo {
struct inet6_cork cork;
};
+/* We currently use available bits from inet_sk(sk)->inet_flags,
+ * this could change in the future.
+ */
+#define inet6_test_bit(nr, sk) \
+ test_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags)
+#define inet6_set_bit(nr, sk) \
+ set_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags)
+#define inet6_clear_bit(nr, sk) \
+ clear_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags)
+#define inet6_assign_bit(nr, sk, val) \
+ assign_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags, val)
+
/* WARNING: don't change the layout of the members in {raw,udp,tcp}6_sock! */
struct raw6_sock {
/* inet_sock has to be the first member of raw6_sock */
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 71fa9a40fb5a..72cb693b075b 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -285,8 +285,10 @@ static inline bool kasan_check_byte(const void *address)
#if defined(CONFIG_KASAN) && defined(CONFIG_KASAN_STACK)
void kasan_unpoison_task_stack(struct task_struct *task);
+asmlinkage void kasan_unpoison_task_stack_below(const void *watermark);
#else
static inline void kasan_unpoison_task_stack(struct task_struct *task) {}
+static inline void kasan_unpoison_task_stack_below(const void *watermark) {}
#endif
#ifdef CONFIG_KASAN_GENERIC
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 4d5be378fa8c..8fbe22de16ef 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -366,6 +366,7 @@ enum mlx5_driver_event {
MLX5_DRIVER_EVENT_UPLINK_NETDEV,
MLX5_DRIVER_EVENT_MACSEC_SA_ADDED,
MLX5_DRIVER_EVENT_MACSEC_SA_DELETED,
+ MLX5_DRIVER_EVENT_SF_PEER_DEVLINK,
};
enum {
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 3033bbaeac81..92434814c855 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -155,6 +155,8 @@ enum {
MLX5_REG_MCC = 0x9062,
MLX5_REG_MCDA = 0x9063,
MLX5_REG_MCAM = 0x907f,
+ MLX5_REG_MSECQ = 0x9155,
+ MLX5_REG_MSEES = 0x9156,
MLX5_REG_MIRC = 0x9162,
MLX5_REG_SBCAM = 0xB01F,
MLX5_REG_RESOURCE_DUMP = 0xC000,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index fc3db401f8a2..dd8421d021cf 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -10176,7 +10176,9 @@ struct mlx5_ifc_mcam_access_reg_bits2 {
u8 mirc[0x1];
u8 regs_97_to_96[0x2];
- u8 regs_95_to_64[0x20];
+ u8 regs_95_to_87[0x09];
+ u8 synce_registers[0x2];
+ u8 regs_84_to_64[0x15];
u8 regs_63_to_32[0x20];
@@ -12549,4 +12551,59 @@ struct mlx5_ifc_modify_page_track_obj_in_bits {
struct mlx5_ifc_page_track_bits obj_context;
};
+struct mlx5_ifc_msecq_reg_bits {
+ u8 reserved_at_0[0x20];
+
+ u8 reserved_at_20[0x12];
+ u8 network_option[0x2];
+ u8 local_ssm_code[0x4];
+ u8 local_enhanced_ssm_code[0x8];
+
+ u8 local_clock_identity[0x40];
+
+ u8 reserved_at_80[0x180];
+};
+
+enum {
+ MLX5_MSEES_FIELD_SELECT_ENABLE = BIT(0),
+ MLX5_MSEES_FIELD_SELECT_ADMIN_STATUS = BIT(1),
+ MLX5_MSEES_FIELD_SELECT_ADMIN_FREQ_MEASURE = BIT(2),
+};
+
+enum mlx5_msees_admin_status {
+ MLX5_MSEES_ADMIN_STATUS_FREE_RUNNING = 0x0,
+ MLX5_MSEES_ADMIN_STATUS_TRACK = 0x1,
+};
+
+enum mlx5_msees_oper_status {
+ MLX5_MSEES_OPER_STATUS_FREE_RUNNING = 0x0,
+ MLX5_MSEES_OPER_STATUS_SELF_TRACK = 0x1,
+ MLX5_MSEES_OPER_STATUS_OTHER_TRACK = 0x2,
+ MLX5_MSEES_OPER_STATUS_HOLDOVER = 0x3,
+ MLX5_MSEES_OPER_STATUS_FAIL_HOLDOVER = 0x4,
+ MLX5_MSEES_OPER_STATUS_FAIL_FREE_RUNNING = 0x5,
+};
+
+struct mlx5_ifc_msees_reg_bits {
+ u8 reserved_at_0[0x8];
+ u8 local_port[0x8];
+ u8 pnat[0x2];
+ u8 lp_msb[0x2];
+ u8 reserved_at_14[0xc];
+
+ u8 field_select[0x20];
+
+ u8 admin_status[0x4];
+ u8 oper_status[0x4];
+ u8 ho_acq[0x1];
+ u8 reserved_at_49[0xc];
+ u8 admin_freq_measure[0x1];
+ u8 oper_freq_measure[0x1];
+ u8 failure_reason[0x9];
+
+ u8 frequency_diff[0x20];
+
+ u8 reserved_at_80[0x180];
+};
+
#endif /* MLX5_IFC_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0896aaa91dd7..7e520c14eb8c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -79,6 +79,8 @@ struct xdp_buff;
struct xdp_frame;
struct xdp_metadata_ops;
struct xdp_md;
+/* DPLL specific */
+struct dpll_pin;
typedef u32 xdp_features_t;
@@ -917,6 +919,7 @@ struct net_device_path {
u8 queue;
u16 wcid;
u8 bss;
+ u8 amsdu;
} mtk_wdma;
};
};
@@ -2049,6 +2052,9 @@ enum netdev_ml_priv_type {
* SET_NETDEV_DEVLINK_PORT macro. This pointer is static
* during the time netdevice is registered.
*
+ * @dpll_pin: Pointer to the SyncE source pin of a DPLL subsystem,
+ * where the clock is recovered.
+ *
* FIXME: cleanup struct net_device such that network protocol info
* moves out.
*/
@@ -2405,6 +2411,10 @@ struct net_device {
struct rtnl_hw_stats64 *offload_xstats_l3;
struct devlink_port *devlink_port;
+
+#if IS_ENABLED(CONFIG_DPLL)
+ struct dpll_pin *dpll_pin;
+#endif
};
#define to_net_dev(d) container_of(d, struct net_device, dev)
@@ -3940,6 +3950,18 @@ int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name);
int dev_get_port_parent_id(struct net_device *dev,
struct netdev_phys_item_id *ppid, bool recurse);
bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b);
+void netdev_dpll_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin);
+void netdev_dpll_pin_clear(struct net_device *dev);
+
+static inline struct dpll_pin *netdev_dpll_pin(const struct net_device *dev)
+{
+#if IS_ENABLED(CONFIG_DPLL)
+ return dev->dpll_pin;
+#else
+ return NULL;
+#endif
+}
+
struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again);
struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq, int *ret);
diff --git a/include/linux/pds/pds_core_if.h b/include/linux/pds/pds_core_if.h
index e838a2b90440..17a87c1a55d7 100644
--- a/include/linux/pds/pds_core_if.h
+++ b/include/linux/pds/pds_core_if.h
@@ -79,6 +79,7 @@ enum pds_core_status_code {
PDS_RC_EVFID = 31, /* VF ID does not exist */
PDS_RC_BAD_FW = 32, /* FW file is invalid or corrupted */
PDS_RC_ECLIENT = 33, /* No such client id */
+ PDS_RC_BAD_PCI = 255, /* Broken PCI when reading status */
};
/**
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 1351b802ffcf..3cc52826f18e 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1736,6 +1736,7 @@ void phy_detach(struct phy_device *phydev);
void phy_start(struct phy_device *phydev);
void phy_stop(struct phy_device *phydev);
int phy_config_aneg(struct phy_device *phydev);
+int _phy_start_aneg(struct phy_device *phydev);
int phy_start_aneg(struct phy_device *phydev);
int phy_aneg_done(struct phy_device *phydev);
int phy_speed_down(struct phy_device *phydev, bool sync);
diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h
index b2b28180dff7..a476648858a6 100644
--- a/include/linux/soc/mediatek/mtk_wed.h
+++ b/include/linux/soc/mediatek/mtk_wed.h
@@ -10,6 +10,7 @@
#define MTK_WED_TX_QUEUES 2
#define MTK_WED_RX_QUEUES 2
+#define MTK_WED_RX_PAGE_QUEUES 3
#define WED_WO_STA_REC 0x6
@@ -45,7 +46,7 @@ enum mtk_wed_wo_cmd {
MTK_WED_WO_CMD_WED_END
};
-struct mtk_rxbm_desc {
+struct mtk_wed_bm_desc {
__le32 buf0;
__le32 token;
} __packed __aligned(4);
@@ -76,6 +77,11 @@ struct mtk_wed_wo_rx_stats {
__le32 rx_drop_cnt;
};
+struct mtk_wed_buf {
+ void *p;
+ dma_addr_t phy_addr;
+};
+
struct mtk_wed_device {
#ifdef CONFIG_NET_MEDIATEK_SOC_WED
const struct mtk_wed_ops *ops;
@@ -94,17 +100,20 @@ struct mtk_wed_device {
struct mtk_wed_ring txfree_ring;
struct mtk_wed_ring tx_wdma[MTK_WED_TX_QUEUES];
struct mtk_wed_ring rx_wdma[MTK_WED_RX_QUEUES];
+ struct mtk_wed_ring rx_rro_ring[MTK_WED_RX_QUEUES];
+ struct mtk_wed_ring rx_page_ring[MTK_WED_RX_PAGE_QUEUES];
+ struct mtk_wed_ring ind_cmd_ring;
struct {
int size;
- void **pages;
+ struct mtk_wed_buf *pages;
struct mtk_wdma_desc *desc;
dma_addr_t desc_phys;
} tx_buf_ring;
struct {
int size;
- struct mtk_rxbm_desc *desc;
+ struct mtk_wed_bm_desc *desc;
dma_addr_t desc_phys;
} rx_buf_ring;
@@ -114,6 +123,13 @@ struct mtk_wed_device {
dma_addr_t fdbk_phys;
} rro;
+ struct {
+ int size;
+ struct mtk_wed_buf *pages;
+ struct mtk_wed_bm_desc *desc;
+ dma_addr_t desc_phys;
+ } hw_rro;
+
/* filled by driver: */
struct {
union {
@@ -123,6 +139,7 @@ struct mtk_wed_device {
enum mtk_wed_bus_tye bus_type;
void __iomem *base;
u32 phy_base;
+ u32 id;
u32 wpdma_phys;
u32 wpdma_int;
@@ -131,18 +148,35 @@ struct mtk_wed_device {
u32 wpdma_txfree;
u32 wpdma_rx_glo;
u32 wpdma_rx;
+ u32 wpdma_rx_rro[MTK_WED_RX_QUEUES];
+ u32 wpdma_rx_pg;
bool wcid_512;
+ bool hw_rro;
+ bool msi;
u16 token_start;
unsigned int nbuf;
unsigned int rx_nbuf;
unsigned int rx_npkt;
unsigned int rx_size;
+ unsigned int amsdu_max_len;
u8 tx_tbit[MTK_WED_TX_QUEUES];
u8 rx_tbit[MTK_WED_RX_QUEUES];
+ u8 rro_rx_tbit[MTK_WED_RX_QUEUES];
+ u8 rx_pg_tbit[MTK_WED_RX_PAGE_QUEUES];
u8 txfree_tbit;
+ u8 amsdu_max_subframes;
+
+ struct {
+ u8 se_group_nums;
+ u16 win_size;
+ u16 particular_sid;
+ u32 ack_sn_addr;
+ dma_addr_t particular_se_phys;
+ dma_addr_t addr_elem_phys[1024];
+ } ind_cmd;
u32 (*init_buf)(void *ptr, dma_addr_t phys, int token_id);
int (*offload_enable)(struct mtk_wed_device *wed);
@@ -182,6 +216,14 @@ struct mtk_wed_ops {
void (*irq_set_mask)(struct mtk_wed_device *dev, u32 mask);
int (*setup_tc)(struct mtk_wed_device *wed, struct net_device *dev,
enum tc_setup_type type, void *type_data);
+ void (*start_hw_rro)(struct mtk_wed_device *dev, u32 irq_mask,
+ bool reset);
+ void (*rro_rx_ring_setup)(struct mtk_wed_device *dev, int ring,
+ void __iomem *regs);
+ void (*msdu_pg_rx_ring_setup)(struct mtk_wed_device *dev, int ring,
+ void __iomem *regs);
+ int (*ind_rx_ring_setup)(struct mtk_wed_device *dev,
+ void __iomem *regs);
};
extern const struct mtk_wed_ops __rcu *mtk_soc_wed_ops;
@@ -206,16 +248,27 @@ mtk_wed_device_attach(struct mtk_wed_device *dev)
return ret;
}
-static inline bool
-mtk_wed_get_rx_capa(struct mtk_wed_device *dev)
+static inline bool mtk_wed_get_rx_capa(struct mtk_wed_device *dev)
{
#ifdef CONFIG_NET_MEDIATEK_SOC_WED
+ if (dev->version == 3)
+ return dev->wlan.hw_rro;
+
return dev->version != 1;
#else
return false;
#endif
}
+static inline bool mtk_wed_is_amsdu_supported(struct mtk_wed_device *dev)
+{
+#ifdef CONFIG_NET_MEDIATEK_SOC_WED
+ return dev->version == 3;
+#else
+ return false;
+#endif
+}
+
#ifdef CONFIG_NET_MEDIATEK_SOC_WED
#define mtk_wed_device_active(_dev) !!(_dev)->ops
#define mtk_wed_device_detach(_dev) (_dev)->ops->detach(_dev)
@@ -242,6 +295,15 @@ mtk_wed_get_rx_capa(struct mtk_wed_device *dev)
#define mtk_wed_device_dma_reset(_dev) (_dev)->ops->reset_dma(_dev)
#define mtk_wed_device_setup_tc(_dev, _netdev, _type, _type_data) \
(_dev)->ops->setup_tc(_dev, _netdev, _type, _type_data)
+#define mtk_wed_device_start_hw_rro(_dev, _mask, _reset) \
+ (_dev)->ops->start_hw_rro(_dev, _mask, _reset)
+#define mtk_wed_device_rro_rx_ring_setup(_dev, _ring, _regs) \
+ (_dev)->ops->rro_rx_ring_setup(_dev, _ring, _regs)
+#define mtk_wed_device_msdu_pg_rx_ring_setup(_dev, _ring, _regs) \
+ (_dev)->ops->msdu_pg_rx_ring_setup(_dev, _ring, _regs)
+#define mtk_wed_device_ind_rx_ring_setup(_dev, _regs) \
+ (_dev)->ops->ind_rx_ring_setup(_dev, _regs)
+
#else
static inline bool mtk_wed_device_active(struct mtk_wed_device *dev)
{
@@ -261,6 +323,10 @@ static inline bool mtk_wed_device_active(struct mtk_wed_device *dev)
#define mtk_wed_device_stop(_dev) do {} while (0)
#define mtk_wed_device_dma_reset(_dev) do {} while (0)
#define mtk_wed_device_setup_tc(_dev, _netdev, _type, _type_data) -EOPNOTSUPP
+#define mtk_wed_device_start_hw_rro(_dev, _mask, _reset) do {} while (0)
+#define mtk_wed_device_rro_rx_ring_setup(_dev, _ring, _regs) -ENODEV
+#define mtk_wed_device_msdu_pg_rx_ring_setup(_dev, _ring, _regs) -ENODEV
+#define mtk_wed_device_ind_rx_ring_setup(_dev, _regs) -ENODEV
#endif
#endif
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index ce89cc3e4913..c0079a7574ae 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -139,6 +139,7 @@ struct stmmac_rxq_cfg {
struct stmmac_txq_cfg {
u32 weight;
+ bool coe_unsupported;
u8 mode_to_use;
/* Credit Base Shaper parameters */
u32 send_slope;
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 3c5efeeb024f..e15452df9804 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -377,6 +377,14 @@ struct tcp_sock {
* Total data bytes retransmitted
*/
u32 total_retrans; /* Total retransmits for entire connection */
+ u32 rto_stamp; /* Start time (ms) of last CA_Loss recovery */
+ u16 total_rto; /* Total number of RTO timeouts, including
+ * SYN/SYN-ACK and recurring timeouts.
+ */
+ u16 total_rto_recoveries; /* Total number of RTO recoveries,
+ * including any unfinished recovery.
+ */
+ u32 total_rto_time; /* ms spent in (completed) RTO recoveries. */
u32 urg_seq; /* Seq of received urgent pointer */
unsigned int keepalive_time; /* time before keep alive takes place */
@@ -463,15 +471,17 @@ enum tsq_enum {
TCP_MTU_REDUCED_DEFERRED, /* tcp_v{4|6}_err() could not call
* tcp_v{4|6}_mtu_reduced()
*/
+ TCP_ACK_DEFERRED, /* TX pure ack is deferred */
};
enum tsq_flags {
- TSQF_THROTTLED = (1UL << TSQ_THROTTLED),
- TSQF_QUEUED = (1UL << TSQ_QUEUED),
- TCPF_TSQ_DEFERRED = (1UL << TCP_TSQ_DEFERRED),
- TCPF_WRITE_TIMER_DEFERRED = (1UL << TCP_WRITE_TIMER_DEFERRED),
- TCPF_DELACK_TIMER_DEFERRED = (1UL << TCP_DELACK_TIMER_DEFERRED),
- TCPF_MTU_REDUCED_DEFERRED = (1UL << TCP_MTU_REDUCED_DEFERRED),
+ TSQF_THROTTLED = BIT(TSQ_THROTTLED),
+ TSQF_QUEUED = BIT(TSQ_QUEUED),
+ TCPF_TSQ_DEFERRED = BIT(TCP_TSQ_DEFERRED),
+ TCPF_WRITE_TIMER_DEFERRED = BIT(TCP_WRITE_TIMER_DEFERRED),
+ TCPF_DELACK_TIMER_DEFERRED = BIT(TCP_DELACK_TIMER_DEFERRED),
+ TCPF_MTU_REDUCED_DEFERRED = BIT(TCP_MTU_REDUCED_DEFERRED),
+ TCPF_ACK_DEFERRED = BIT(TCP_ACK_DEFERRED),
};
#define tcp_sk(ptr) container_of_const(ptr, struct tcp_sock, inet_conn.icsk_inet.sk)
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 21ae37e49319..5eb88a66eb68 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -761,7 +761,8 @@ struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name);
void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp);
int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
u32 *fd_type, const char **buf,
- u64 *probe_offset, u64 *probe_addr);
+ u64 *probe_offset, u64 *probe_addr,
+ unsigned long *missed);
int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
#else
@@ -801,7 +802,7 @@ static inline void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp)
static inline int bpf_get_perf_event_info(const struct perf_event *event,
u32 *prog_id, u32 *fd_type,
const char **buf, u64 *probe_offset,
- u64 *probe_addr)
+ u64 *probe_addr, unsigned long *missed)
{
return -EOPNOTSUPP;
}
@@ -877,6 +878,7 @@ extern void perf_kprobe_destroy(struct perf_event *event);
extern int bpf_get_kprobe_info(const struct perf_event *event,
u32 *fd_type, const char **symbol,
u64 *probe_offset, u64 *probe_addr,
+ unsigned long *missed,
bool perf_type_tracepoint);
#endif
#ifdef CONFIG_UPROBE_EVENTS
diff --git a/include/linux/udp.h b/include/linux/udp.h
index 43c1fb2d2c21..d04188714dca 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -32,25 +32,30 @@ static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask)
return (num + net_hash_mix(net)) & mask;
}
+enum {
+ UDP_FLAGS_CORK, /* Cork is required */
+ UDP_FLAGS_NO_CHECK6_TX, /* Send zero UDP6 checksums on TX? */
+ UDP_FLAGS_NO_CHECK6_RX, /* Allow zero UDP6 checksums on RX? */
+ UDP_FLAGS_GRO_ENABLED, /* Request GRO aggregation */
+ UDP_FLAGS_ACCEPT_FRAGLIST,
+ UDP_FLAGS_ACCEPT_L4,
+ UDP_FLAGS_ENCAP_ENABLED, /* This socket enabled encap */
+ UDP_FLAGS_UDPLITE_SEND_CC, /* set via udplite setsockopt */
+ UDP_FLAGS_UDPLITE_RECV_CC, /* set via udplite setsockopt */
+};
+
struct udp_sock {
/* inet_sock has to be the first member */
struct inet_sock inet;
#define udp_port_hash inet.sk.__sk_common.skc_u16hashes[0]
#define udp_portaddr_hash inet.sk.__sk_common.skc_u16hashes[1]
#define udp_portaddr_node inet.sk.__sk_common.skc_portaddr_node
+
+ unsigned long udp_flags;
+
int pending; /* Any pending frames ? */
- unsigned int corkflag; /* Cork is required */
__u8 encap_type; /* Is this an Encapsulation socket? */
- unsigned char no_check6_tx:1,/* Send zero UDP6 checksums on TX? */
- no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */
- encap_enabled:1, /* This socket enabled encap
- * processing; UDP tunnels and
- * different encapsulation layer set
- * this
- */
- gro_enabled:1, /* Request GRO aggregation */
- accept_udp_l4:1,
- accept_udp_fraglist:1;
+
/*
* Following member retains the information to create a UDP header
* when the socket is uncorked.
@@ -62,12 +67,6 @@ struct udp_sock {
*/
__u16 pcslen;
__u16 pcrlen;
-/* indicator bits used by pcflag: */
-#define UDPLITE_BIT 0x1 /* set by udplite proto init function */
-#define UDPLITE_SEND_CC 0x2 /* set via udplite setsockopt */
-#define UDPLITE_RECV_CC 0x4 /* set via udplite setsocktopt */
- __u8 pcflag; /* marks socket as UDP-Lite if > 0 */
- __u8 unused[3];
/*
* For encapsulation sockets.
*/
@@ -95,28 +94,39 @@ struct udp_sock {
int forward_threshold;
};
+#define udp_test_bit(nr, sk) \
+ test_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags)
+#define udp_set_bit(nr, sk) \
+ set_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags)
+#define udp_test_and_set_bit(nr, sk) \
+ test_and_set_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags)
+#define udp_clear_bit(nr, sk) \
+ clear_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags)
+#define udp_assign_bit(nr, sk, val) \
+ assign_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags, val)
+
#define UDP_MAX_SEGMENTS (1 << 6UL)
#define udp_sk(ptr) container_of_const(ptr, struct udp_sock, inet.sk)
static inline void udp_set_no_check6_tx(struct sock *sk, bool val)
{
- udp_sk(sk)->no_check6_tx = val;
+ udp_assign_bit(NO_CHECK6_TX, sk, val);
}
static inline void udp_set_no_check6_rx(struct sock *sk, bool val)
{
- udp_sk(sk)->no_check6_rx = val;
+ udp_assign_bit(NO_CHECK6_RX, sk, val);
}
-static inline bool udp_get_no_check6_tx(struct sock *sk)
+static inline bool udp_get_no_check6_tx(const struct sock *sk)
{
- return udp_sk(sk)->no_check6_tx;
+ return udp_test_bit(NO_CHECK6_TX, sk);
}
-static inline bool udp_get_no_check6_rx(struct sock *sk)
+static inline bool udp_get_no_check6_rx(const struct sock *sk)
{
- return udp_sk(sk)->no_check6_rx;
+ return udp_test_bit(NO_CHECK6_RX, sk);
}
static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk,
@@ -135,10 +145,12 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb)
if (!skb_is_gso(skb))
return false;
- if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && !udp_sk(sk)->accept_udp_l4)
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 &&
+ !udp_test_bit(ACCEPT_L4, sk))
return true;
- if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST && !udp_sk(sk)->accept_udp_fraglist)
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST &&
+ !udp_test_bit(ACCEPT_FRAGLIST, sk))
return true;
return false;
@@ -146,8 +158,8 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb)
static inline void udp_allow_gso(struct sock *sk)
{
- udp_sk(sk)->accept_udp_l4 = 1;
- udp_sk(sk)->accept_udp_fraglist = 1;
+ udp_set_bit(ACCEPT_L4, sk);
+ udp_set_bit(ACCEPT_FRAGLIST, sk);
}
#define udp_portaddr_for_each_entry(__sk, list) \
diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
index c58453699ee9..ebb3ce63d64d 100644
--- a/include/linux/virtio_vsock.h
+++ b/include/linux/virtio_vsock.h
@@ -12,6 +12,7 @@
struct virtio_vsock_skb_cb {
bool reply;
bool tap_delivered;
+ u32 offset;
};
#define VIRTIO_VSOCK_SKB_CB(skb) ((struct virtio_vsock_skb_cb *)((skb)->cb))
@@ -159,6 +160,15 @@ struct virtio_transport {
/* Takes ownership of the packet */
int (*send_pkt)(struct sk_buff *skb);
+
+ /* Used in MSG_ZEROCOPY mode. Checks, that provided data
+ * (number of buffers) could be transmitted with zerocopy
+ * mode. If this callback is not implemented for the current
+ * transport - this means that this transport doesn't need
+ * extra checks and can perform zerocopy transmission by
+ * default.
+ */
+ bool (*can_msgzerocopy)(int bufs_num);
};
ssize_t
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 29fd1b4ee654..fad8e36e3d98 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -150,6 +150,7 @@ struct devlink_port {
struct devlink_rate *devlink_rate;
struct devlink_linecard *linecard;
+ u32 rel_index;
};
struct devlink_port_new_attrs {
@@ -1697,6 +1698,8 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 contro
void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port,
u32 controller, u16 pf, u32 sf,
bool external);
+int devl_port_fn_devlink_set(struct devlink_port *devlink_port,
+ struct devlink *fn_devlink);
struct devlink_rate *
devl_rate_node_create(struct devlink *devlink, void *priv, char *node_name,
struct devlink_rate *parent);
@@ -1717,8 +1720,8 @@ void devlink_linecard_provision_clear(struct devlink_linecard *linecard);
void devlink_linecard_provision_fail(struct devlink_linecard *linecard);
void devlink_linecard_activate(struct devlink_linecard *linecard);
void devlink_linecard_deactivate(struct devlink_linecard *linecard);
-void devlink_linecard_nested_dl_set(struct devlink_linecard *linecard,
- struct devlink *nested_devlink);
+int devlink_linecard_nested_dl_set(struct devlink_linecard *linecard,
+ struct devlink *nested_devlink);
int devl_sb_register(struct devlink *devlink, unsigned int sb_index,
u32 size, u16 ingress_pools_count,
u16 egress_pools_count, u16 ingress_tc_count,
@@ -1918,6 +1921,8 @@ devlink_health_reporter_state_update(struct devlink_health_reporter *reporter,
void
devlink_health_reporter_recovery_done(struct devlink_health_reporter *reporter);
+int devl_nested_devlink_set(struct devlink *devlink,
+ struct devlink *nested_devlink);
bool devlink_is_reload_failed(const struct devlink *devlink);
void devlink_remote_reload_actions_performed(struct devlink *devlink,
enum devlink_reload_limit limit,
diff --git a/include/net/dst.h b/include/net/dst.h
index 78884429deed..f8b8599a0600 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -392,10 +392,10 @@ static inline int dst_discard(struct sk_buff *skb)
{
return dst_discard_out(&init_net, skb->sk, skb);
}
-void *dst_alloc(struct dst_ops *ops, struct net_device *dev, int initial_ref,
+void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
int initial_obsolete, unsigned short flags);
void dst_init(struct dst_entry *dst, struct dst_ops *ops,
- struct net_device *dev, int initial_ref, int initial_obsolete,
+ struct net_device *dev, int initial_obsolete,
unsigned short flags);
struct dst_entry *dst_destroy(struct dst_entry *dst);
void dst_dev_put(struct dst_entry *dst);
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 2de0e4d4a027..98e11958cdff 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -268,6 +268,16 @@ enum {
INET_FLAGS_NODEFRAG = 17,
INET_FLAGS_BIND_ADDRESS_NO_PORT = 18,
INET_FLAGS_DEFER_CONNECT = 19,
+ INET_FLAGS_MC6_LOOP = 20,
+ INET_FLAGS_RECVERR6_RFC4884 = 21,
+ INET_FLAGS_MC6_ALL = 22,
+ INET_FLAGS_AUTOFLOWLABEL_SET = 23,
+ INET_FLAGS_AUTOFLOWLABEL = 24,
+ INET_FLAGS_DONTFRAG = 25,
+ INET_FLAGS_RECVERR6 = 26,
+ INET_FLAGS_REPFLOW = 27,
+ INET_FLAGS_RTALERT_ISOLATE = 28,
+ INET_FLAGS_SNDFLOW = 29,
};
/* cmsg flags for inet */
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index b32539bb0fb0..28b065790261 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -53,13 +53,12 @@ struct route_info {
*/
static inline int rt6_srcprefs2flags(unsigned int srcprefs)
{
- /* No need to bitmask because srcprefs have only 3 bits. */
- return srcprefs << 3;
+ return (srcprefs & IPV6_PREFER_SRC_MASK) << 3;
}
static inline unsigned int rt6_flags2srcprefs(int flags)
{
- return (flags >> 3) & 7;
+ return (flags >> 3) & IPV6_PREFER_SRC_MASK;
}
static inline bool rt6_need_strict(const struct in6_addr *daddr)
@@ -266,7 +265,7 @@ static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb)
const struct dst_entry *dst = skb_dst(skb);
unsigned int mtu;
- if (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) {
+ if (np && READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE) {
mtu = READ_ONCE(dst->dev->mtu);
mtu -= lwtunnel_headroom(dst->lwtstate, mtu);
} else {
@@ -277,14 +276,18 @@ static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb)
static inline bool ip6_sk_accept_pmtu(const struct sock *sk)
{
- return inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_INTERFACE &&
- inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_OMIT;
+ u8 pmtudisc = READ_ONCE(inet6_sk(sk)->pmtudisc);
+
+ return pmtudisc != IPV6_PMTUDISC_INTERFACE &&
+ pmtudisc != IPV6_PMTUDISC_OMIT;
}
static inline bool ip6_sk_ignore_df(const struct sock *sk)
{
- return inet6_sk(sk)->pmtudisc < IPV6_PMTUDISC_DO ||
- inet6_sk(sk)->pmtudisc == IPV6_PMTUDISC_OMIT;
+ u8 pmtudisc = READ_ONCE(inet6_sk(sk)->pmtudisc);
+
+ return pmtudisc < IPV6_PMTUDISC_DO ||
+ pmtudisc == IPV6_PMTUDISC_OMIT;
}
static inline const struct in6_addr *rt6_nexthop(const struct rt6_info *rt,
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index c6932d1a3fa8..b3444c8a6f74 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -373,12 +373,12 @@ static inline void ipcm6_init(struct ipcm6_cookie *ipc6)
}
static inline void ipcm6_init_sk(struct ipcm6_cookie *ipc6,
- const struct ipv6_pinfo *np)
+ const struct sock *sk)
{
*ipc6 = (struct ipcm6_cookie) {
.hlimit = -1,
- .tclass = np->tclass,
- .dontfrag = np->dontfrag,
+ .tclass = inet6_sk(sk)->tclass,
+ .dontfrag = inet6_test_bit(DONTFRAG, sk),
};
}
@@ -428,7 +428,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq,
int flags);
int ip6_flowlabel_init(void);
void ip6_flowlabel_cleanup(void);
-bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np);
+bool ip6_autoflowlabel(struct net *net, const struct sock *sk);
static inline void fl6_sock_release(struct ip6_flowlabel *fl)
{
@@ -914,9 +914,9 @@ static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6,
int hlimit;
if (ipv6_addr_is_multicast(&fl6->daddr))
- hlimit = np->mcast_hops;
+ hlimit = READ_ONCE(np->mcast_hops);
else
- hlimit = np->hop_limit;
+ hlimit = READ_ONCE(np->hop_limit);
if (hlimit < 0)
hlimit = ip6_dst_hoplimit(dst);
return hlimit;
@@ -1303,15 +1303,16 @@ static inline int ip6_sock_set_v6only(struct sock *sk)
static inline void ip6_sock_set_recverr(struct sock *sk)
{
- lock_sock(sk);
- inet6_sk(sk)->recverr = true;
- release_sock(sk);
+ inet6_set_bit(RECVERR6, sk);
}
-static inline int __ip6_sock_set_addr_preferences(struct sock *sk, int val)
+#define IPV6_PREFER_SRC_MASK (IPV6_PREFER_SRC_TMP | IPV6_PREFER_SRC_PUBLIC | \
+ IPV6_PREFER_SRC_COA)
+
+static inline int ip6_sock_set_addr_preferences(struct sock *sk, int val)
{
+ unsigned int prefmask = ~IPV6_PREFER_SRC_MASK;
unsigned int pref = 0;
- unsigned int prefmask = ~0;
/* check PUBLIC/TMP/PUBTMP_DEFAULT conflicts */
switch (val & (IPV6_PREFER_SRC_PUBLIC |
@@ -1361,20 +1362,11 @@ static inline int __ip6_sock_set_addr_preferences(struct sock *sk, int val)
return -EINVAL;
}
- inet6_sk(sk)->srcprefs = (inet6_sk(sk)->srcprefs & prefmask) | pref;
+ WRITE_ONCE(inet6_sk(sk)->srcprefs,
+ (READ_ONCE(inet6_sk(sk)->srcprefs) & prefmask) | pref);
return 0;
}
-static inline int ip6_sock_set_addr_preferences(struct sock *sk, int val)
-{
- int ret;
-
- lock_sock(sk);
- ret = __ip6_sock_set_addr_preferences(sk, val);
- release_sock(sk);
- return ret;
-}
-
static inline void ip6_sock_set_recvpktinfo(struct sock *sk)
{
lock_sock(sk);
diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
index c48186bf4737..21da31e1dff5 100644
--- a/include/net/ipv6_stubs.h
+++ b/include/net/ipv6_stubs.h
@@ -85,6 +85,11 @@ struct ipv6_bpf_stub {
sockptr_t optval, unsigned int optlen);
int (*ipv6_getsockopt)(struct sock *sk, int level, int optname,
sockptr_t optval, sockptr_t optlen);
+ int (*ipv6_dev_get_saddr)(struct net *net,
+ const struct net_device *dst_dev,
+ const struct in6_addr *daddr,
+ unsigned int prefs,
+ struct in6_addr *saddr);
};
extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 7a41c4791536..d96d05b08819 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -132,6 +132,7 @@ struct netns_ipv4 {
u8 sysctl_tcp_syncookies;
u8 sysctl_tcp_migrate_req;
u8 sysctl_tcp_comp_sack_nr;
+ u8 sysctl_tcp_backlog_ack_defer;
int sysctl_tcp_reordering;
u8 sysctl_tcp_retries1;
u8 sysctl_tcp_retries2;
diff --git a/include/net/sock.h b/include/net/sock.h
index 92f7ea62a915..3f2ecef884f8 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1821,12 +1821,11 @@ static inline bool sock_owned_by_user_nocheck(const struct sock *sk)
static inline void sock_release_ownership(struct sock *sk)
{
- if (sock_owned_by_user_nocheck(sk)) {
- sk->sk_lock.owned = 0;
+ DEBUG_NET_WARN_ON_ONCE(!sock_owned_by_user_nocheck(sk));
+ sk->sk_lock.owned = 0;
- /* The sk_lock has mutex_unlock() semantics: */
- mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
- }
+ /* The sk_lock has mutex_unlock() semantics: */
+ mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
}
/* no reclassification while locks are held */
@@ -2237,7 +2236,7 @@ static inline void sock_confirm_neigh(struct sk_buff *skb, struct neighbour *n)
}
}
-bool sk_mc_loop(struct sock *sk);
+bool sk_mc_loop(const struct sock *sk);
static inline bool sk_can_gso(const struct sock *sk)
{
diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index 0ca9b7a11baf..29251c3519cf 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -174,16 +174,13 @@ static inline int udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum)
}
#endif
-static inline void udp_tunnel_encap_enable(struct socket *sock)
+static inline void udp_tunnel_encap_enable(struct sock *sk)
{
- struct udp_sock *up = udp_sk(sock->sk);
-
- if (up->encap_enabled)
+ if (udp_test_and_set_bit(ENCAP_ENABLED, sk))
return;
- up->encap_enabled = 1;
#if IS_ENABLED(CONFIG_IPV6)
- if (sock->sk->sk_family == PF_INET6)
+ if (READ_ONCE(sk->sk_family) == PF_INET6)
ipv6_stub->udpv6_encap_enable();
#endif
udp_encap_enable();
diff --git a/include/net/udplite.h b/include/net/udplite.h
index bd33ff2b8f42..786919d29f8d 100644
--- a/include/net/udplite.h
+++ b/include/net/udplite.h
@@ -66,14 +66,18 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh)
/* Fast-path computation of checksum. Socket may not be locked. */
static inline __wsum udplite_csum(struct sk_buff *skb)
{
- const struct udp_sock *up = udp_sk(skb->sk);
const int off = skb_transport_offset(skb);
+ const struct sock *sk = skb->sk;
int len = skb->len - off;
- if ((up->pcflag & UDPLITE_SEND_CC) && up->pcslen < len) {
- if (0 < up->pcslen)
- len = up->pcslen;
- udp_hdr(skb)->len = htons(up->pcslen);
+ if (udp_test_bit(UDPLITE_SEND_CC, sk)) {
+ u16 pcslen = READ_ONCE(udp_sk(sk)->pcslen);
+
+ if (pcslen < len) {
+ if (pcslen > 0)
+ len = pcslen;
+ udp_hdr(skb)->len = htons(pcslen);
+ }
}
skb->ip_summed = CHECKSUM_NONE; /* no HW support for checksumming */
diff --git a/include/net/xdp.h b/include/net/xdp.h
index de08c8e0d134..349c36fb5fd8 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -383,14 +383,25 @@ void xdp_attachment_setup(struct xdp_attachment_info *info,
#define DEV_MAP_BULK_SIZE XDP_BULK_QUEUE_SIZE
+/* Define the relationship between xdp-rx-metadata kfunc and
+ * various other entities:
+ * - xdp_rx_metadata enum
+ * - netdev netlink enum (Documentation/netlink/specs/netdev.yaml)
+ * - kfunc name
+ * - xdp_metadata_ops field
+ */
#define XDP_METADATA_KFUNC_xxx \
XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_TIMESTAMP, \
- bpf_xdp_metadata_rx_timestamp) \
+ NETDEV_XDP_RX_METADATA_TIMESTAMP, \
+ bpf_xdp_metadata_rx_timestamp, \
+ xmo_rx_timestamp) \
XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_HASH, \
- bpf_xdp_metadata_rx_hash) \
+ NETDEV_XDP_RX_METADATA_HASH, \
+ bpf_xdp_metadata_rx_hash, \
+ xmo_rx_hash) \
-enum {
-#define XDP_METADATA_KFUNC(name, _) name,
+enum xdp_rx_metadata {
+#define XDP_METADATA_KFUNC(name, _, __, ___) name,
XDP_METADATA_KFUNC_xxx
#undef XDP_METADATA_KFUNC
MAX_XDP_METADATA_KFUNC,
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 1617af380162..69b472604b86 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -14,6 +14,8 @@
#include <linux/mm.h>
#include <net/sock.h>
+#define XDP_UMEM_SG_FLAG (1 << 1)
+
struct net_device;
struct xsk_queue;
struct xdp_buff;
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 363c7d510554..98d7aa78adda 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -2166,7 +2166,7 @@ static inline bool xfrm6_local_dontfrag(const struct sock *sk)
proto = sk->sk_protocol;
if (proto == IPPROTO_UDP || proto == IPPROTO_RAW)
- return inet6_sk(sk)->dontfrag;
+ return inet6_test_bit(DONTFRAG, sk);
return false;
}
diff --git a/include/trace/events/vsock_virtio_transport_common.h b/include/trace/events/vsock_virtio_transport_common.h
index d0b3f0ea9ba1..f1ebe36787c3 100644
--- a/include/trace/events/vsock_virtio_transport_common.h
+++ b/include/trace/events/vsock_virtio_transport_common.h
@@ -43,7 +43,8 @@ TRACE_EVENT(virtio_transport_alloc_pkt,
__u32 len,
__u16 type,
__u16 op,
- __u32 flags
+ __u32 flags,
+ bool zcopy
),
TP_ARGS(
src_cid, src_port,
@@ -51,7 +52,8 @@ TRACE_EVENT(virtio_transport_alloc_pkt,
len,
type,
op,
- flags
+ flags,
+ zcopy
),
TP_STRUCT__entry(
__field(__u32, src_cid)
@@ -62,6 +64,7 @@ TRACE_EVENT(virtio_transport_alloc_pkt,
__field(__u16, type)
__field(__u16, op)
__field(__u32, flags)
+ __field(bool, zcopy)
),
TP_fast_assign(
__entry->src_cid = src_cid;
@@ -72,14 +75,15 @@ TRACE_EVENT(virtio_transport_alloc_pkt,
__entry->type = type;
__entry->op = op;
__entry->flags = flags;
+ __entry->zcopy = zcopy;
),
- TP_printk("%u:%u -> %u:%u len=%u type=%s op=%s flags=%#x",
+ TP_printk("%u:%u -> %u:%u len=%u type=%s op=%s flags=%#x zcopy=%s",
__entry->src_cid, __entry->src_port,
__entry->dst_cid, __entry->dst_port,
__entry->len,
show_type(__entry->type),
show_op(__entry->op),
- __entry->flags)
+ __entry->flags, __entry->zcopy ? "true" : "false")
);
TRACE_EVENT(virtio_transport_recv_pkt,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0448700890f7..7ba61b75bc0e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -932,7 +932,14 @@ enum bpf_map_type {
*/
BPF_MAP_TYPE_CGROUP_STORAGE = BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED,
BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
- BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+ BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED,
+ /* BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE is available to bpf programs
+ * attaching to a cgroup. The new mechanism (BPF_MAP_TYPE_CGRP_STORAGE +
+ * local percpu kptr) supports all BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE
+ * functionality and more. So mark * BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE
+ * deprecated.
+ */
+ BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE = BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED,
BPF_MAP_TYPE_QUEUE,
BPF_MAP_TYPE_STACK,
BPF_MAP_TYPE_SK_STORAGE,
@@ -1040,6 +1047,11 @@ enum bpf_attach_type {
BPF_TCX_INGRESS,
BPF_TCX_EGRESS,
BPF_TRACE_UPROBE_MULTI,
+ BPF_CGROUP_UNIX_CONNECT,
+ BPF_CGROUP_UNIX_SENDMSG,
+ BPF_CGROUP_UNIX_RECVMSG,
+ BPF_CGROUP_UNIX_GETPEERNAME,
+ BPF_CGROUP_UNIX_GETSOCKNAME,
__MAX_BPF_ATTACH_TYPE
};
@@ -2697,8 +2709,8 @@ union bpf_attr {
* *bpf_socket* should be one of the following:
*
* * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
- * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
- * and **BPF_CGROUP_INET6_CONNECT**.
+ * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**,
+ * **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**.
*
* This helper actually implements a subset of **setsockopt()**.
* It supports the following *level*\ s:
@@ -2936,8 +2948,8 @@ union bpf_attr {
* *bpf_socket* should be one of the following:
*
* * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
- * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
- * and **BPF_CGROUP_INET6_CONNECT**.
+ * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**,
+ * **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**.
*
* This helper actually implements a subset of **getsockopt()**.
* It supports the same set of *optname*\ s that is supported by
@@ -3257,6 +3269,11 @@ union bpf_attr {
* and *params*->smac will not be set as output. A common
* use case is to call **bpf_redirect_neigh**\ () after
* doing **bpf_fib_lookup**\ ().
+ * **BPF_FIB_LOOKUP_SRC**
+ * Derive and set source IP addr in *params*->ipv{4,6}_src
+ * for the nexthop. If the src addr cannot be derived,
+ * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this
+ * case, *params*->dmac and *params*->smac are not set either.
*
* *ctx* is either **struct xdp_md** for XDP programs or
* **struct sk_buff** tc cls_act programs.
@@ -5089,6 +5106,8 @@ union bpf_attr {
* **BPF_F_TIMER_ABS**
* Start the timer in absolute expire value instead of the
* default relative one.
+ * **BPF_F_TIMER_CPU_PIN**
+ * Timer will be pinned to the CPU of the caller.
*
* Return
* 0 on success.
@@ -6525,6 +6544,7 @@ struct bpf_link_info {
__aligned_u64 addrs;
__u32 count; /* in/out: kprobe_multi function count */
__u32 flags;
+ __u64 missed;
} kprobe_multi;
struct {
__u32 type; /* enum bpf_perf_event_type */
@@ -6540,6 +6560,7 @@ struct bpf_link_info {
__u32 name_len;
__u32 offset; /* offset from func_name */
__u64 addr;
+ __u64 missed;
} kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */
struct {
__aligned_u64 tp_name; /* in/out */
@@ -6953,6 +6974,7 @@ enum {
BPF_FIB_LOOKUP_OUTPUT = (1U << 1),
BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
BPF_FIB_LOOKUP_TBID = (1U << 3),
+ BPF_FIB_LOOKUP_SRC = (1U << 4),
};
enum {
@@ -6965,6 +6987,7 @@ enum {
BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */
BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */
BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */
+ BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */
};
struct bpf_fib_lookup {
@@ -6999,6 +7022,9 @@ struct bpf_fib_lookup {
__u32 rt_metric;
};
+ /* input: source address to consider for lookup
+ * output: source address result from lookup
+ */
union {
__be32 ipv4_src;
__u32 ipv6_src[4]; /* in6_addr; network order */
@@ -7300,9 +7326,11 @@ struct bpf_core_relo {
* Flags to control bpf_timer_start() behaviour.
* - BPF_F_TIMER_ABS: Timeout passed is absolute time, by default it is
* relative to current time.
+ * - BPF_F_TIMER_CPU_PIN: Timer will be pinned to the CPU of the caller.
*/
enum {
BPF_F_TIMER_ABS = (1ULL << 0),
+ BPF_F_TIMER_CPU_PIN = (1ULL << 1),
};
/* BPF numbers iterator state */
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 03875e078be8..cd4b82458d1b 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -680,6 +680,7 @@ enum devlink_port_function_attr {
DEVLINK_PORT_FN_ATTR_STATE, /* u8 */
DEVLINK_PORT_FN_ATTR_OPSTATE, /* u8 */
DEVLINK_PORT_FN_ATTR_CAPS, /* bitfield32 */
+ DEVLINK_PORT_FN_ATTR_DEVLINK, /* nested */
__DEVLINK_PORT_FUNCTION_ATTR_MAX,
DEVLINK_PORT_FUNCTION_ATTR_MAX = __DEVLINK_PORT_FUNCTION_ATTR_MAX - 1
diff --git a/include/uapi/linux/dpll.h b/include/uapi/linux/dpll.h
new file mode 100644
index 000000000000..20ef0718f8dc
--- /dev/null
+++ b/include/uapi/linux/dpll.h
@@ -0,0 +1,201 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/dpll.yaml */
+/* YNL-GEN uapi header */
+
+#ifndef _UAPI_LINUX_DPLL_H
+#define _UAPI_LINUX_DPLL_H
+
+#define DPLL_FAMILY_NAME "dpll"
+#define DPLL_FAMILY_VERSION 1
+
+/**
+ * enum dpll_mode - working modes a dpll can support, differentiates if and how
+ * dpll selects one of its inputs to syntonize with it, valid values for
+ * DPLL_A_MODE attribute
+ * @DPLL_MODE_MANUAL: input can be only selected by sending a request to dpll
+ * @DPLL_MODE_AUTOMATIC: highest prio input pin auto selected by dpll
+ */
+enum dpll_mode {
+ DPLL_MODE_MANUAL = 1,
+ DPLL_MODE_AUTOMATIC,
+
+ /* private: */
+ __DPLL_MODE_MAX,
+ DPLL_MODE_MAX = (__DPLL_MODE_MAX - 1)
+};
+
+/**
+ * enum dpll_lock_status - provides information of dpll device lock status,
+ * valid values for DPLL_A_LOCK_STATUS attribute
+ * @DPLL_LOCK_STATUS_UNLOCKED: dpll was not yet locked to any valid input (or
+ * forced by setting DPLL_A_MODE to DPLL_MODE_DETACHED)
+ * @DPLL_LOCK_STATUS_LOCKED: dpll is locked to a valid signal, but no holdover
+ * available
+ * @DPLL_LOCK_STATUS_LOCKED_HO_ACQ: dpll is locked and holdover acquired
+ * @DPLL_LOCK_STATUS_HOLDOVER: dpll is in holdover state - lost a valid lock or
+ * was forced by disconnecting all the pins (latter possible only when dpll
+ * lock-state was already DPLL_LOCK_STATUS_LOCKED_HO_ACQ, if dpll lock-state
+ * was not DPLL_LOCK_STATUS_LOCKED_HO_ACQ, the dpll's lock-state shall remain
+ * DPLL_LOCK_STATUS_UNLOCKED)
+ */
+enum dpll_lock_status {
+ DPLL_LOCK_STATUS_UNLOCKED = 1,
+ DPLL_LOCK_STATUS_LOCKED,
+ DPLL_LOCK_STATUS_LOCKED_HO_ACQ,
+ DPLL_LOCK_STATUS_HOLDOVER,
+
+ /* private: */
+ __DPLL_LOCK_STATUS_MAX,
+ DPLL_LOCK_STATUS_MAX = (__DPLL_LOCK_STATUS_MAX - 1)
+};
+
+#define DPLL_TEMP_DIVIDER 1000
+
+/**
+ * enum dpll_type - type of dpll, valid values for DPLL_A_TYPE attribute
+ * @DPLL_TYPE_PPS: dpll produces Pulse-Per-Second signal
+ * @DPLL_TYPE_EEC: dpll drives the Ethernet Equipment Clock
+ */
+enum dpll_type {
+ DPLL_TYPE_PPS = 1,
+ DPLL_TYPE_EEC,
+
+ /* private: */
+ __DPLL_TYPE_MAX,
+ DPLL_TYPE_MAX = (__DPLL_TYPE_MAX - 1)
+};
+
+/**
+ * enum dpll_pin_type - defines possible types of a pin, valid values for
+ * DPLL_A_PIN_TYPE attribute
+ * @DPLL_PIN_TYPE_MUX: aggregates another layer of selectable pins
+ * @DPLL_PIN_TYPE_EXT: external input
+ * @DPLL_PIN_TYPE_SYNCE_ETH_PORT: ethernet port PHY's recovered clock
+ * @DPLL_PIN_TYPE_INT_OSCILLATOR: device internal oscillator
+ * @DPLL_PIN_TYPE_GNSS: GNSS recovered clock
+ */
+enum dpll_pin_type {
+ DPLL_PIN_TYPE_MUX = 1,
+ DPLL_PIN_TYPE_EXT,
+ DPLL_PIN_TYPE_SYNCE_ETH_PORT,
+ DPLL_PIN_TYPE_INT_OSCILLATOR,
+ DPLL_PIN_TYPE_GNSS,
+
+ /* private: */
+ __DPLL_PIN_TYPE_MAX,
+ DPLL_PIN_TYPE_MAX = (__DPLL_PIN_TYPE_MAX - 1)
+};
+
+/**
+ * enum dpll_pin_direction - defines possible direction of a pin, valid values
+ * for DPLL_A_PIN_DIRECTION attribute
+ * @DPLL_PIN_DIRECTION_INPUT: pin used as a input of a signal
+ * @DPLL_PIN_DIRECTION_OUTPUT: pin used to output the signal
+ */
+enum dpll_pin_direction {
+ DPLL_PIN_DIRECTION_INPUT = 1,
+ DPLL_PIN_DIRECTION_OUTPUT,
+
+ /* private: */
+ __DPLL_PIN_DIRECTION_MAX,
+ DPLL_PIN_DIRECTION_MAX = (__DPLL_PIN_DIRECTION_MAX - 1)
+};
+
+#define DPLL_PIN_FREQUENCY_1_HZ 1
+#define DPLL_PIN_FREQUENCY_10_KHZ 10000
+#define DPLL_PIN_FREQUENCY_77_5_KHZ 77500
+#define DPLL_PIN_FREQUENCY_10_MHZ 10000000
+
+/**
+ * enum dpll_pin_state - defines possible states of a pin, valid values for
+ * DPLL_A_PIN_STATE attribute
+ * @DPLL_PIN_STATE_CONNECTED: pin connected, active input of phase locked loop
+ * @DPLL_PIN_STATE_DISCONNECTED: pin disconnected, not considered as a valid
+ * input
+ * @DPLL_PIN_STATE_SELECTABLE: pin enabled for automatic input selection
+ */
+enum dpll_pin_state {
+ DPLL_PIN_STATE_CONNECTED = 1,
+ DPLL_PIN_STATE_DISCONNECTED,
+ DPLL_PIN_STATE_SELECTABLE,
+
+ /* private: */
+ __DPLL_PIN_STATE_MAX,
+ DPLL_PIN_STATE_MAX = (__DPLL_PIN_STATE_MAX - 1)
+};
+
+/**
+ * enum dpll_pin_capabilities - defines possible capabilities of a pin, valid
+ * flags on DPLL_A_PIN_CAPABILITIES attribute
+ * @DPLL_PIN_CAPABILITIES_DIRECTION_CAN_CHANGE: pin direction can be changed
+ * @DPLL_PIN_CAPABILITIES_PRIORITY_CAN_CHANGE: pin priority can be changed
+ * @DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE: pin state can be changed
+ */
+enum dpll_pin_capabilities {
+ DPLL_PIN_CAPABILITIES_DIRECTION_CAN_CHANGE = 1,
+ DPLL_PIN_CAPABILITIES_PRIORITY_CAN_CHANGE = 2,
+ DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE = 4,
+};
+
+enum dpll_a {
+ DPLL_A_ID = 1,
+ DPLL_A_MODULE_NAME,
+ DPLL_A_PAD,
+ DPLL_A_CLOCK_ID,
+ DPLL_A_MODE,
+ DPLL_A_MODE_SUPPORTED,
+ DPLL_A_LOCK_STATUS,
+ DPLL_A_TEMP,
+ DPLL_A_TYPE,
+
+ __DPLL_A_MAX,
+ DPLL_A_MAX = (__DPLL_A_MAX - 1)
+};
+
+enum dpll_a_pin {
+ DPLL_A_PIN_ID = 1,
+ DPLL_A_PIN_PARENT_ID,
+ DPLL_A_PIN_MODULE_NAME,
+ DPLL_A_PIN_PAD,
+ DPLL_A_PIN_CLOCK_ID,
+ DPLL_A_PIN_BOARD_LABEL,
+ DPLL_A_PIN_PANEL_LABEL,
+ DPLL_A_PIN_PACKAGE_LABEL,
+ DPLL_A_PIN_TYPE,
+ DPLL_A_PIN_DIRECTION,
+ DPLL_A_PIN_FREQUENCY,
+ DPLL_A_PIN_FREQUENCY_SUPPORTED,
+ DPLL_A_PIN_FREQUENCY_MIN,
+ DPLL_A_PIN_FREQUENCY_MAX,
+ DPLL_A_PIN_PRIO,
+ DPLL_A_PIN_STATE,
+ DPLL_A_PIN_CAPABILITIES,
+ DPLL_A_PIN_PARENT_DEVICE,
+ DPLL_A_PIN_PARENT_PIN,
+
+ __DPLL_A_PIN_MAX,
+ DPLL_A_PIN_MAX = (__DPLL_A_PIN_MAX - 1)
+};
+
+enum dpll_cmd {
+ DPLL_CMD_DEVICE_ID_GET = 1,
+ DPLL_CMD_DEVICE_GET,
+ DPLL_CMD_DEVICE_SET,
+ DPLL_CMD_DEVICE_CREATE_NTF,
+ DPLL_CMD_DEVICE_DELETE_NTF,
+ DPLL_CMD_DEVICE_CHANGE_NTF,
+ DPLL_CMD_PIN_ID_GET,
+ DPLL_CMD_PIN_GET,
+ DPLL_CMD_PIN_SET,
+ DPLL_CMD_PIN_CREATE_NTF,
+ DPLL_CMD_PIN_DELETE_NTF,
+ DPLL_CMD_PIN_CHANGE_NTF,
+
+ __DPLL_CMD_MAX,
+ DPLL_CMD_MAX = (__DPLL_CMD_MAX - 1)
+};
+
+#define DPLL_MCGRP_MONITOR "monitor"
+
+#endif /* _UAPI_LINUX_DPLL_H */
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index ce3117df9cec..fac351a93aed 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -376,7 +376,7 @@ enum {
IFLA_GSO_IPV4_MAX_SIZE,
IFLA_GRO_IPV4_MAX_SIZE,
-
+ IFLA_DPLL_PIN,
__IFLA_MAX
};
diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h
index c1634b95c223..2943a151d4f1 100644
--- a/include/uapi/linux/netdev.h
+++ b/include/uapi/linux/netdev.h
@@ -38,11 +38,27 @@ enum netdev_xdp_act {
NETDEV_XDP_ACT_MASK = 127,
};
+/**
+ * enum netdev_xdp_rx_metadata
+ * @NETDEV_XDP_RX_METADATA_TIMESTAMP: Device is capable of exposing receive HW
+ * timestamp via bpf_xdp_metadata_rx_timestamp().
+ * @NETDEV_XDP_RX_METADATA_HASH: Device is capable of exposing receive packet
+ * hash via bpf_xdp_metadata_rx_hash().
+ */
+enum netdev_xdp_rx_metadata {
+ NETDEV_XDP_RX_METADATA_TIMESTAMP = 1,
+ NETDEV_XDP_RX_METADATA_HASH = 2,
+
+ /* private: */
+ NETDEV_XDP_RX_METADATA_MASK = 3,
+};
+
enum {
NETDEV_A_DEV_IFINDEX = 1,
NETDEV_A_DEV_PAD,
NETDEV_A_DEV_XDP_FEATURES,
NETDEV_A_DEV_XDP_ZC_MAX_SEGS,
+ NETDEV_A_DEV_XDP_RX_METADATA_FEATURES,
__NETDEV_A_DEV_MAX,
NETDEV_A_DEV_MAX = (__NETDEV_A_DEV_MAX - 1)
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 879eeb0a084b..d1d08da6331a 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -289,6 +289,18 @@ struct tcp_info {
*/
__u32 tcpi_rehash; /* PLB or timeout triggered rehash attempts */
+
+ __u16 tcpi_total_rto; /* Total number of RTO timeouts, including
+ * SYN/SYN-ACK and recurring timeouts.
+ */
+ __u16 tcpi_total_rto_recoveries; /* Total number of RTO
+ * recoveries, including any
+ * unfinished recovery.
+ */
+ __u32 tcpi_total_rto_time; /* Total time spent in RTO recoveries
+ * in milliseconds, including any
+ * unfinished recovery.
+ */
};
/* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 96856f130cbf..833faa04461b 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -793,8 +793,6 @@ __bpf_kfunc int bpf_iter_num_new(struct bpf_iter_num *it, int start, int end)
BUILD_BUG_ON(sizeof(struct bpf_iter_num_kern) != sizeof(struct bpf_iter_num));
BUILD_BUG_ON(__alignof__(struct bpf_iter_num_kern) != __alignof__(struct bpf_iter_num));
- BTF_TYPE_EMIT(struct btf_iter_num);
-
/* start == end is legit, it's an empty range and we'll just get NULL
* on first (and any subsequent) bpf_iter_num_next() call
*/
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index fdc3e8705a3c..db6176fb64dc 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -615,7 +615,10 @@ static void __bpf_struct_ops_map_free(struct bpf_map *map)
if (st_map->links)
bpf_struct_ops_map_put_progs(st_map);
bpf_map_area_free(st_map->links);
- bpf_jit_free_exec(st_map->image);
+ if (st_map->image) {
+ bpf_jit_free_exec(st_map->image);
+ bpf_jit_uncharge_modmem(PAGE_SIZE);
+ }
bpf_map_area_free(st_map->uvalue);
bpf_map_area_free(st_map);
}
@@ -657,6 +660,7 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
struct bpf_struct_ops_map *st_map;
const struct btf_type *t, *vt;
struct bpf_map *map;
+ int ret;
st_ops = bpf_struct_ops_find_value(attr->btf_vmlinux_value_type_id);
if (!st_ops)
@@ -681,12 +685,27 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
st_map->st_ops = st_ops;
map = &st_map->map;
+ ret = bpf_jit_charge_modmem(PAGE_SIZE);
+ if (ret) {
+ __bpf_struct_ops_map_free(map);
+ return ERR_PTR(ret);
+ }
+
+ st_map->image = bpf_jit_alloc_exec(PAGE_SIZE);
+ if (!st_map->image) {
+ /* __bpf_struct_ops_map_free() uses st_map->image as flag
+ * for "charged or not". In this case, we need to unchange
+ * here.
+ */
+ bpf_jit_uncharge_modmem(PAGE_SIZE);
+ __bpf_struct_ops_map_free(map);
+ return ERR_PTR(-ENOMEM);
+ }
st_map->uvalue = bpf_map_area_alloc(vt->size, NUMA_NO_NODE);
st_map->links =
bpf_map_area_alloc(btf_type_vlen(t) * sizeof(struct bpf_links *),
NUMA_NO_NODE);
- st_map->image = bpf_jit_alloc_exec(PAGE_SIZE);
- if (!st_map->uvalue || !st_map->links || !st_map->image) {
+ if (!st_map->uvalue || !st_map->links) {
__bpf_struct_ops_map_free(map);
return ERR_PTR(-ENOMEM);
}
@@ -907,4 +926,3 @@ err_out:
kfree(link);
return err;
}
-
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 8090d7fb11ef..15d71d2986d3 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3293,6 +3293,8 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t,
type = BPF_KPTR_UNREF;
else if (!strcmp("kptr", __btf_name_by_offset(btf, t->name_off)))
type = BPF_KPTR_REF;
+ else if (!strcmp("percpu_kptr", __btf_name_by_offset(btf, t->name_off)))
+ type = BPF_KPTR_PERCPU;
else
return -EINVAL;
@@ -3308,10 +3310,10 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t,
return BTF_FIELD_FOUND;
}
-static const char *btf_find_decl_tag_value(const struct btf *btf,
- const struct btf_type *pt,
- int comp_idx, const char *tag_key)
+const char *btf_find_decl_tag_value(const struct btf *btf, const struct btf_type *pt,
+ int comp_idx, const char *tag_key)
{
+ const char *value = NULL;
int i;
for (i = 1; i < btf_nr_types(btf); i++) {
@@ -3325,9 +3327,14 @@ static const char *btf_find_decl_tag_value(const struct btf *btf,
continue;
if (strncmp(__btf_name_by_offset(btf, t->name_off), tag_key, len))
continue;
- return __btf_name_by_offset(btf, t->name_off) + len;
+ /* Prevent duplicate entries for same type */
+ if (value)
+ return ERR_PTR(-EEXIST);
+ value = __btf_name_by_offset(btf, t->name_off) + len;
}
- return NULL;
+ if (!value)
+ return ERR_PTR(-ENOENT);
+ return value;
}
static int
@@ -3345,7 +3352,7 @@ btf_find_graph_root(const struct btf *btf, const struct btf_type *pt,
if (t->size != sz)
return BTF_FIELD_IGNORE;
value_type = btf_find_decl_tag_value(btf, pt, comp_idx, "contains:");
- if (!value_type)
+ if (IS_ERR(value_type))
return -EINVAL;
node_field_name = strstr(value_type, ":");
if (!node_field_name)
@@ -3457,6 +3464,7 @@ static int btf_find_struct_field(const struct btf *btf,
break;
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
+ case BPF_KPTR_PERCPU:
ret = btf_find_kptr(btf, member_type, off, sz,
idx < info_cnt ? &info[idx] : &tmp);
if (ret < 0)
@@ -3523,6 +3531,7 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
break;
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
+ case BPF_KPTR_PERCPU:
ret = btf_find_kptr(btf, var_type, off, sz,
idx < info_cnt ? &info[idx] : &tmp);
if (ret < 0)
@@ -3783,6 +3792,7 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
break;
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
+ case BPF_KPTR_PERCPU:
ret = btf_parse_kptr(btf, &rec->fields[i], &info_arr[i]);
if (ret < 0)
goto end;
@@ -6949,7 +6959,7 @@ int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog,
* (either PTR_TO_CTX or SCALAR_VALUE).
*/
int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
- struct bpf_reg_state *regs)
+ struct bpf_reg_state *regs, bool is_ex_cb)
{
struct bpf_verifier_log *log = &env->log;
struct bpf_prog *prog = env->prog;
@@ -7006,7 +7016,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
tname, nargs, MAX_BPF_FUNC_REG_ARGS);
return -EINVAL;
}
- /* check that function returns int */
+ /* check that function returns int, exception cb also requires this */
t = btf_type_by_id(btf, t->type);
while (btf_type_is_modifier(t))
t = btf_type_by_id(btf, t->type);
@@ -7055,6 +7065,14 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
i, btf_type_str(t), tname);
return -EINVAL;
}
+ /* We have already ensured that the callback returns an integer, just
+ * like all global subprogs. We need to determine it only has a single
+ * scalar argument.
+ */
+ if (is_ex_cb && (nargs != 1 || regs[BPF_REG_1].type != SCALAR_VALUE)) {
+ bpf_log(log, "exception cb only supports single integer argument\n");
+ return -EINVAL;
+ }
return 0;
}
@@ -7832,6 +7850,7 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
case BPF_PROG_TYPE_SYSCALL:
return BTF_KFUNC_HOOK_SYSCALL;
case BPF_PROG_TYPE_CGROUP_SKB:
+ case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
return BTF_KFUNC_HOOK_CGROUP_SKB;
case BPF_PROG_TYPE_SCHED_ACT:
return BTF_KFUNC_HOOK_SCHED_ACT;
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 03b3d4492980..74ad2215e1ba 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1450,18 +1450,22 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
* provided by user sockaddr
* @sk: sock struct that will use sockaddr
* @uaddr: sockaddr struct provided by user
+ * @uaddrlen: Pointer to the size of the sockaddr struct provided by user. It is
+ * read-only for AF_INET[6] uaddr but can be modified for AF_UNIX
+ * uaddr.
* @atype: The type of program to be executed
* @t_ctx: Pointer to attach type specific context
* @flags: Pointer to u32 which contains higher bits of BPF program
* return value (OR'ed together).
*
- * socket is expected to be of type INET or INET6.
+ * socket is expected to be of type INET, INET6 or UNIX.
*
* This function will return %-EPERM if an attached program is found and
* returned value != 1 during execution. In all other cases, 0 is returned.
*/
int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
struct sockaddr *uaddr,
+ int *uaddrlen,
enum cgroup_bpf_attach_type atype,
void *t_ctx,
u32 *flags)
@@ -1473,21 +1477,31 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
};
struct sockaddr_storage unspec;
struct cgroup *cgrp;
+ int ret;
/* Check socket family since not all sockets represent network
* endpoint (e.g. AF_UNIX).
*/
- if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
+ if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6 &&
+ sk->sk_family != AF_UNIX)
return 0;
if (!ctx.uaddr) {
memset(&unspec, 0, sizeof(unspec));
ctx.uaddr = (struct sockaddr *)&unspec;
+ ctx.uaddrlen = 0;
+ } else {
+ ctx.uaddrlen = *uaddrlen;
}
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
- return bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run,
- 0, flags);
+ ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run,
+ 0, flags);
+
+ if (!ret && uaddr)
+ *uaddrlen = ctx.uaddrlen;
+
+ return ret;
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
@@ -2520,10 +2534,13 @@ cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_CGROUP_SOCK_OPS:
case BPF_CGROUP_UDP4_RECVMSG:
case BPF_CGROUP_UDP6_RECVMSG:
+ case BPF_CGROUP_UNIX_RECVMSG:
case BPF_CGROUP_INET4_GETPEERNAME:
case BPF_CGROUP_INET6_GETPEERNAME:
+ case BPF_CGROUP_UNIX_GETPEERNAME:
case BPF_CGROUP_INET4_GETSOCKNAME:
case BPF_CGROUP_INET6_GETSOCKNAME:
+ case BPF_CGROUP_UNIX_GETSOCKNAME:
return NULL;
default:
return &bpf_get_retval_proto;
@@ -2535,10 +2552,13 @@ cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_CGROUP_SOCK_OPS:
case BPF_CGROUP_UDP4_RECVMSG:
case BPF_CGROUP_UDP6_RECVMSG:
+ case BPF_CGROUP_UNIX_RECVMSG:
case BPF_CGROUP_INET4_GETPEERNAME:
case BPF_CGROUP_INET6_GETPEERNAME:
+ case BPF_CGROUP_UNIX_GETPEERNAME:
case BPF_CGROUP_INET4_GETSOCKNAME:
case BPF_CGROUP_INET6_GETSOCKNAME:
+ case BPF_CGROUP_UNIX_GETSOCKNAME:
return NULL;
default:
return &bpf_set_retval_proto;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 4e3ce0542e31..08626b519ce2 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -64,8 +64,8 @@
#define OFF insn->off
#define IMM insn->imm
-struct bpf_mem_alloc bpf_global_ma;
-bool bpf_global_ma_set;
+struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma;
+bool bpf_global_ma_set, bpf_global_percpu_ma_set;
/* No hurry in this branch
*
@@ -212,7 +212,7 @@ void bpf_prog_fill_jited_linfo(struct bpf_prog *prog,
const struct bpf_line_info *linfo;
void **jited_linfo;
- if (!prog->aux->jited_linfo)
+ if (!prog->aux->jited_linfo || prog->aux->func_idx > prog->aux->func_cnt)
/* Userspace did not provide linfo */
return;
@@ -539,7 +539,7 @@ static void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp)
{
int i;
- for (i = 0; i < fp->aux->func_cnt; i++)
+ for (i = 0; i < fp->aux->real_func_cnt; i++)
bpf_prog_kallsyms_del(fp->aux->func[i]);
}
@@ -589,7 +589,7 @@ bpf_prog_ksym_set_name(struct bpf_prog *prog)
sym = bin2hex(sym, prog->tag, sizeof(prog->tag));
/* prog->aux->name will be ignored if full btf name is available */
- if (prog->aux->func_info_cnt) {
+ if (prog->aux->func_info_cnt && prog->aux->func_idx < prog->aux->func_info_cnt) {
type = btf_type_by_id(prog->aux->btf,
prog->aux->func_info[prog->aux->func_idx].type_id);
func_name = btf_name_by_offset(prog->aux->btf, type->name_off);
@@ -623,7 +623,11 @@ static __always_inline int bpf_tree_comp(void *key, struct latch_tree_node *n)
if (val < ksym->start)
return -1;
- if (val >= ksym->end)
+ /* Ensure that we detect return addresses as part of the program, when
+ * the final instruction is a call for a program part of the stack
+ * trace. Therefore, do val > ksym->end instead of val >= ksym->end.
+ */
+ if (val > ksym->end)
return 1;
return 0;
@@ -733,7 +737,7 @@ bool is_bpf_text_address(unsigned long addr)
return ret;
}
-static struct bpf_prog *bpf_prog_ksym_find(unsigned long addr)
+struct bpf_prog *bpf_prog_ksym_find(unsigned long addr)
{
struct bpf_ksym *ksym = bpf_ksym_find(addr);
@@ -1208,7 +1212,7 @@ int bpf_jit_get_func_addr(const struct bpf_prog *prog,
if (!extra_pass)
addr = NULL;
else if (prog->aux->func &&
- off >= 0 && off < prog->aux->func_cnt)
+ off >= 0 && off < prog->aux->real_func_cnt)
addr = (u8 *)prog->aux->func[off]->bpf_func;
else
return -EINVAL;
@@ -2721,7 +2725,7 @@ static void bpf_prog_free_deferred(struct work_struct *work)
#endif
if (aux->dst_trampoline)
bpf_trampoline_put(aux->dst_trampoline);
- for (i = 0; i < aux->func_cnt; i++) {
+ for (i = 0; i < aux->real_func_cnt; i++) {
/* We can just unlink the subprog poke descriptor table as
* it was originally linked to the main program and is also
* released along with it.
@@ -2729,7 +2733,7 @@ static void bpf_prog_free_deferred(struct work_struct *work)
aux->func[i]->aux->poke_tab = NULL;
bpf_jit_free(aux->func[i]);
}
- if (aux->func_cnt) {
+ if (aux->real_func_cnt) {
kfree(aux->func);
bpf_prog_unlock_free(aux->prog);
} else {
@@ -2914,6 +2918,15 @@ int __weak bpf_arch_text_invalidate(void *dst, size_t len)
return -ENOTSUPP;
}
+bool __weak bpf_jit_supports_exceptions(void)
+{
+ return false;
+}
+
+void __weak arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie)
+{
+}
+
#ifdef CONFIG_BPF_SYSCALL
static int __init bpf_global_ma_init(void)
{
@@ -2921,7 +2934,9 @@ static int __init bpf_global_ma_init(void)
ret = bpf_mem_alloc_init(&bpf_global_ma, 0, false);
bpf_global_ma_set = !ret;
- return ret;
+ ret = bpf_mem_alloc_init(&bpf_global_percpu_ma, 0, true);
+ bpf_global_percpu_ma_set = !ret;
+ return !bpf_global_ma_set || !bpf_global_percpu_ma_set;
}
late_initcall(bpf_global_ma_init);
#endif
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 8bd3812fb8df..62a53ebfedf9 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -22,6 +22,7 @@
#include <linux/security.h>
#include <linux/btf_ids.h>
#include <linux/bpf_mem_alloc.h>
+#include <linux/kasan.h>
#include "../../lib/kstrtox.h"
@@ -1271,7 +1272,7 @@ BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, fla
if (in_nmi())
return -EOPNOTSUPP;
- if (flags > BPF_F_TIMER_ABS)
+ if (flags & ~(BPF_F_TIMER_ABS | BPF_F_TIMER_CPU_PIN))
return -EINVAL;
__bpf_spin_lock_irqsave(&timer->lock);
t = timer->timer;
@@ -1285,6 +1286,9 @@ BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, fla
else
mode = HRTIMER_MODE_REL_SOFT;
+ if (flags & BPF_F_TIMER_CPU_PIN)
+ mode |= HRTIMER_MODE_PINNED;
+
hrtimer_start(&t->timer, ns_to_ktime(nsecs), mode);
out:
__bpf_spin_unlock_irqrestore(&timer->lock);
@@ -1902,6 +1906,14 @@ __bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign)
return p;
}
+__bpf_kfunc void *bpf_percpu_obj_new_impl(u64 local_type_id__k, void *meta__ign)
+{
+ u64 size = local_type_id__k;
+
+ /* The verifier has ensured that meta__ign must be NULL */
+ return bpf_mem_alloc(&bpf_global_percpu_ma, size);
+}
+
/* Must be called under migrate_disable(), as required by bpf_mem_free */
void __bpf_obj_drop_impl(void *p, const struct btf_record *rec)
{
@@ -1930,6 +1942,12 @@ __bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign)
__bpf_obj_drop_impl(p, meta ? meta->record : NULL);
}
+__bpf_kfunc void bpf_percpu_obj_drop_impl(void *p__alloc, void *meta__ign)
+{
+ /* The verifier has ensured that meta__ign must be NULL */
+ bpf_mem_free_rcu(&bpf_global_percpu_ma, p__alloc);
+}
+
__bpf_kfunc void *bpf_refcount_acquire_impl(void *p__refcounted_kptr, void *meta__ign)
{
struct btf_struct_meta *meta = meta__ign;
@@ -2435,6 +2453,49 @@ __bpf_kfunc void bpf_rcu_read_unlock(void)
rcu_read_unlock();
}
+struct bpf_throw_ctx {
+ struct bpf_prog_aux *aux;
+ u64 sp;
+ u64 bp;
+ int cnt;
+};
+
+static bool bpf_stack_walker(void *cookie, u64 ip, u64 sp, u64 bp)
+{
+ struct bpf_throw_ctx *ctx = cookie;
+ struct bpf_prog *prog;
+
+ if (!is_bpf_text_address(ip))
+ return !ctx->cnt;
+ prog = bpf_prog_ksym_find(ip);
+ ctx->cnt++;
+ if (bpf_is_subprog(prog))
+ return true;
+ ctx->aux = prog->aux;
+ ctx->sp = sp;
+ ctx->bp = bp;
+ return false;
+}
+
+__bpf_kfunc void bpf_throw(u64 cookie)
+{
+ struct bpf_throw_ctx ctx = {};
+
+ arch_bpf_stack_walk(bpf_stack_walker, &ctx);
+ WARN_ON_ONCE(!ctx.aux);
+ if (ctx.aux)
+ WARN_ON_ONCE(!ctx.aux->exception_boundary);
+ WARN_ON_ONCE(!ctx.bp);
+ WARN_ON_ONCE(!ctx.cnt);
+ /* Prevent KASAN false positives for CONFIG_KASAN_STACK by unpoisoning
+ * deeper stack depths than ctx.sp as we do not return from bpf_throw,
+ * which skips compiler generated instrumentation to do the same.
+ */
+ kasan_unpoison_task_stack_below((void *)(long)ctx.sp);
+ ctx.aux->bpf_exception_cb(cookie, ctx.sp, ctx.bp);
+ WARN(1, "A call to BPF exception callback should never return\n");
+}
+
__diag_pop();
BTF_SET8_START(generic_btf_ids)
@@ -2442,7 +2503,9 @@ BTF_SET8_START(generic_btf_ids)
BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)
#endif
BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_percpu_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_percpu_obj_drop_impl, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_list_push_front_impl)
BTF_ID_FLAGS(func, bpf_list_push_back_impl)
@@ -2462,6 +2525,7 @@ BTF_ID_FLAGS(func, bpf_cgroup_from_id, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_under_cgroup, KF_RCU)
#endif
BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_throw)
BTF_SET8_END(generic_btf_ids)
static const struct btf_kfunc_id_set generic_kfunc_set = {
@@ -2488,6 +2552,9 @@ BTF_ID_FLAGS(func, bpf_dynptr_slice_rdwr, KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_iter_num_new, KF_ITER_NEW)
BTF_ID_FLAGS(func, bpf_iter_num_next, KF_ITER_NEXT | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY)
+BTF_ID_FLAGS(func, bpf_iter_task_vma_new, KF_ITER_NEW | KF_RCU)
+BTF_ID_FLAGS(func, bpf_iter_task_vma_next, KF_ITER_NEXT | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_iter_task_vma_destroy, KF_ITER_DESTROY)
BTF_ID_FLAGS(func, bpf_dynptr_adjust)
BTF_ID_FLAGS(func, bpf_dynptr_is_null)
BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly)
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index d93ddac283d4..39ea316c55e7 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -526,15 +526,16 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
struct bpf_mem_cache *c, __percpu *pc;
struct obj_cgroup *objcg = NULL;
+ /* room for llist_node and per-cpu pointer */
+ if (percpu)
+ percpu_size = LLIST_NODE_SZ + sizeof(void *);
+
if (size) {
pc = __alloc_percpu_gfp(sizeof(*pc), 8, GFP_KERNEL);
if (!pc)
return -ENOMEM;
- if (percpu)
- /* room for llist_node and per-cpu pointer */
- percpu_size = LLIST_NODE_SZ + sizeof(void *);
- else
+ if (!percpu)
size += LLIST_NODE_SZ; /* room for llist_node */
unit_size = size;
@@ -555,10 +556,6 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
return 0;
}
- /* size == 0 && percpu is an invalid combination */
- if (WARN_ON_ONCE(percpu))
- return -EINVAL;
-
pcc = __alloc_percpu_gfp(sizeof(*cc), 8, GFP_KERNEL);
if (!pcc)
return -ENOMEM;
@@ -572,6 +569,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
c = &cc->cache[i];
c->unit_size = sizes[i];
c->objcg = objcg;
+ c->percpu_size = percpu_size;
c->tgt = c;
init_refill_work(c);
@@ -782,12 +780,17 @@ static void notrace *unit_alloc(struct bpf_mem_cache *c)
}
}
local_dec(&c->active);
- local_irq_restore(flags);
WARN_ON(cnt < 0);
if (cnt < c->low_watermark)
irq_work_raise(c);
+ /* Enable IRQ after the enqueue of irq work completes, so irq work
+ * will run after IRQ is enabled and free_llist may be refilled by
+ * irq work before other task preempts current task.
+ */
+ local_irq_restore(flags);
+
return llnode;
}
@@ -823,11 +826,16 @@ static void notrace unit_free(struct bpf_mem_cache *c, void *ptr)
llist_add(llnode, &c->free_llist_extra);
}
local_dec(&c->active);
- local_irq_restore(flags);
if (cnt > c->high_watermark)
/* free few objects from current cpu into global kmalloc pool */
irq_work_raise(c);
+ /* Enable IRQ after irq_work_raise() completes, otherwise when current
+ * task is preempted by task which does unit_alloc(), unit_alloc() may
+ * return NULL unexpectedly because irq work is already pending but can
+ * not been triggered and free_llist can not be refilled timely.
+ */
+ local_irq_restore(flags);
}
static void notrace unit_free_rcu(struct bpf_mem_cache *c, void *ptr)
@@ -845,10 +853,10 @@ static void notrace unit_free_rcu(struct bpf_mem_cache *c, void *ptr)
llist_add(llnode, &c->free_llist_extra_rcu);
}
local_dec(&c->active);
- local_irq_restore(flags);
if (!atomic_read(&c->call_rcu_in_progress))
irq_work_raise(c);
+ local_irq_restore(flags);
}
/* Called from BPF program or from sys_bpf syscall.
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 87d6693d8233..1a4fec330eaa 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -234,7 +234,14 @@ int bpf_prog_dev_bound_init(struct bpf_prog *prog, union bpf_attr *attr)
attr->prog_type != BPF_PROG_TYPE_XDP)
return -EINVAL;
- if (attr->prog_flags & ~BPF_F_XDP_DEV_BOUND_ONLY)
+ if (attr->prog_flags & ~(BPF_F_XDP_DEV_BOUND_ONLY | BPF_F_XDP_HAS_FRAGS))
+ return -EINVAL;
+
+ /* Frags are allowed only if program is dev-bound-only, but not
+ * if it is requesting bpf offload.
+ */
+ if (attr->prog_flags & BPF_F_XDP_HAS_FRAGS &&
+ !(attr->prog_flags & BPF_F_XDP_DEV_BOUND_ONLY))
return -EINVAL;
if (attr->prog_type == BPF_PROG_TYPE_SCHED_CLS &&
@@ -847,10 +854,11 @@ void *bpf_dev_bound_resolve_kfunc(struct bpf_prog *prog, u32 func_id)
if (!ops)
goto out;
- if (func_id == bpf_xdp_metadata_kfunc_id(XDP_METADATA_KFUNC_RX_TIMESTAMP))
- p = ops->xmo_rx_timestamp;
- else if (func_id == bpf_xdp_metadata_kfunc_id(XDP_METADATA_KFUNC_RX_HASH))
- p = ops->xmo_rx_hash;
+#define XDP_METADATA_KFUNC(name, _, __, xmo) \
+ if (func_id == bpf_xdp_metadata_kfunc_id(name)) p = ops->xmo;
+ XDP_METADATA_KFUNC_xxx
+#undef XDP_METADATA_KFUNC
+
out:
up_read(&bpf_devs_lock);
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 458bb80b14d5..d6b277482085 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -28,7 +28,7 @@ struct bpf_stack_map {
void *elems;
struct pcpu_freelist freelist;
u32 n_buckets;
- struct stack_map_bucket *buckets[];
+ struct stack_map_bucket *buckets[] __counted_by(n_buckets);
};
static inline bool stack_map_use_build_id(struct bpf_map *map)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index d77b2f8b9364..341f8cb4405c 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -514,6 +514,7 @@ void btf_record_free(struct btf_record *rec)
switch (rec->fields[i].type) {
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
+ case BPF_KPTR_PERCPU:
if (rec->fields[i].kptr.module)
module_put(rec->fields[i].kptr.module);
btf_put(rec->fields[i].kptr.btf);
@@ -560,6 +561,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec)
switch (fields[i].type) {
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
+ case BPF_KPTR_PERCPU:
btf_get(fields[i].kptr.btf);
if (fields[i].kptr.module && !try_module_get(fields[i].kptr.module)) {
ret = -ENXIO;
@@ -650,6 +652,7 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
WRITE_ONCE(*(u64 *)field_ptr, 0);
break;
case BPF_KPTR_REF:
+ case BPF_KPTR_PERCPU:
xchgd_field = (void *)xchg((unsigned long *)field_ptr, 0);
if (!xchgd_field)
break;
@@ -1045,6 +1048,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
break;
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
+ case BPF_KPTR_PERCPU:
case BPF_REFCOUNT:
if (map->map_type != BPF_MAP_TYPE_HASH &&
map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
@@ -2442,14 +2446,19 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
case BPF_CGROUP_INET6_BIND:
case BPF_CGROUP_INET4_CONNECT:
case BPF_CGROUP_INET6_CONNECT:
+ case BPF_CGROUP_UNIX_CONNECT:
case BPF_CGROUP_INET4_GETPEERNAME:
case BPF_CGROUP_INET6_GETPEERNAME:
+ case BPF_CGROUP_UNIX_GETPEERNAME:
case BPF_CGROUP_INET4_GETSOCKNAME:
case BPF_CGROUP_INET6_GETSOCKNAME:
+ case BPF_CGROUP_UNIX_GETSOCKNAME:
case BPF_CGROUP_UDP4_SENDMSG:
case BPF_CGROUP_UDP6_SENDMSG:
+ case BPF_CGROUP_UNIX_SENDMSG:
case BPF_CGROUP_UDP4_RECVMSG:
case BPF_CGROUP_UDP6_RECVMSG:
+ case BPF_CGROUP_UNIX_RECVMSG:
return 0;
default:
return -EINVAL;
@@ -2745,7 +2754,7 @@ free_used_maps:
* period before we can tear down JIT memory since symbols
* are already exposed under kallsyms.
*/
- __bpf_prog_put_noref(prog, prog->aux->func_cnt);
+ __bpf_prog_put_noref(prog, prog->aux->real_func_cnt);
return err;
free_prog_sec:
free_uid(prog->aux->user);
@@ -3370,7 +3379,7 @@ static void bpf_perf_link_dealloc(struct bpf_link *link)
static int bpf_perf_link_fill_common(const struct perf_event *event,
char __user *uname, u32 ulen,
u64 *probe_offset, u64 *probe_addr,
- u32 *fd_type)
+ u32 *fd_type, unsigned long *missed)
{
const char *buf;
u32 prog_id;
@@ -3381,7 +3390,7 @@ static int bpf_perf_link_fill_common(const struct perf_event *event,
return -EINVAL;
err = bpf_get_perf_event_info(event, &prog_id, fd_type, &buf,
- probe_offset, probe_addr);
+ probe_offset, probe_addr, missed);
if (err)
return err;
if (!uname)
@@ -3404,6 +3413,7 @@ static int bpf_perf_link_fill_common(const struct perf_event *event,
static int bpf_perf_link_fill_kprobe(const struct perf_event *event,
struct bpf_link_info *info)
{
+ unsigned long missed;
char __user *uname;
u64 addr, offset;
u32 ulen, type;
@@ -3412,7 +3422,7 @@ static int bpf_perf_link_fill_kprobe(const struct perf_event *event,
uname = u64_to_user_ptr(info->perf_event.kprobe.func_name);
ulen = info->perf_event.kprobe.name_len;
err = bpf_perf_link_fill_common(event, uname, ulen, &offset, &addr,
- &type);
+ &type, &missed);
if (err)
return err;
if (type == BPF_FD_TYPE_KRETPROBE)
@@ -3421,6 +3431,7 @@ static int bpf_perf_link_fill_kprobe(const struct perf_event *event,
info->perf_event.type = BPF_PERF_EVENT_KPROBE;
info->perf_event.kprobe.offset = offset;
+ info->perf_event.kprobe.missed = missed;
if (!kallsyms_show_value(current_cred()))
addr = 0;
info->perf_event.kprobe.addr = addr;
@@ -3440,7 +3451,7 @@ static int bpf_perf_link_fill_uprobe(const struct perf_event *event,
uname = u64_to_user_ptr(info->perf_event.uprobe.file_name);
ulen = info->perf_event.uprobe.name_len;
err = bpf_perf_link_fill_common(event, uname, ulen, &offset, &addr,
- &type);
+ &type, NULL);
if (err)
return err;
@@ -3476,7 +3487,7 @@ static int bpf_perf_link_fill_tracepoint(const struct perf_event *event,
uname = u64_to_user_ptr(info->perf_event.tracepoint.tp_name);
ulen = info->perf_event.tracepoint.name_len;
info->perf_event.type = BPF_PERF_EVENT_TRACEPOINT;
- return bpf_perf_link_fill_common(event, uname, ulen, NULL, NULL, NULL);
+ return bpf_perf_link_fill_common(event, uname, ulen, NULL, NULL, NULL, NULL);
}
static int bpf_perf_link_fill_perf_event(const struct perf_event *event,
@@ -3672,14 +3683,19 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
case BPF_CGROUP_INET6_BIND:
case BPF_CGROUP_INET4_CONNECT:
case BPF_CGROUP_INET6_CONNECT:
+ case BPF_CGROUP_UNIX_CONNECT:
case BPF_CGROUP_INET4_GETPEERNAME:
case BPF_CGROUP_INET6_GETPEERNAME:
+ case BPF_CGROUP_UNIX_GETPEERNAME:
case BPF_CGROUP_INET4_GETSOCKNAME:
case BPF_CGROUP_INET6_GETSOCKNAME:
+ case BPF_CGROUP_UNIX_GETSOCKNAME:
case BPF_CGROUP_UDP4_SENDMSG:
case BPF_CGROUP_UDP6_SENDMSG:
+ case BPF_CGROUP_UNIX_SENDMSG:
case BPF_CGROUP_UDP4_RECVMSG:
case BPF_CGROUP_UDP6_RECVMSG:
+ case BPF_CGROUP_UNIX_RECVMSG:
return BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
case BPF_CGROUP_SOCK_OPS:
return BPF_PROG_TYPE_SOCK_OPS;
@@ -3945,14 +3961,19 @@ static int bpf_prog_query(const union bpf_attr *attr,
case BPF_CGROUP_INET6_POST_BIND:
case BPF_CGROUP_INET4_CONNECT:
case BPF_CGROUP_INET6_CONNECT:
+ case BPF_CGROUP_UNIX_CONNECT:
case BPF_CGROUP_INET4_GETPEERNAME:
case BPF_CGROUP_INET6_GETPEERNAME:
+ case BPF_CGROUP_UNIX_GETPEERNAME:
case BPF_CGROUP_INET4_GETSOCKNAME:
case BPF_CGROUP_INET6_GETSOCKNAME:
+ case BPF_CGROUP_UNIX_GETSOCKNAME:
case BPF_CGROUP_UDP4_SENDMSG:
case BPF_CGROUP_UDP6_SENDMSG:
+ case BPF_CGROUP_UNIX_SENDMSG:
case BPF_CGROUP_UDP4_RECVMSG:
case BPF_CGROUP_UDP6_RECVMSG:
+ case BPF_CGROUP_UNIX_RECVMSG:
case BPF_CGROUP_SOCK_OPS:
case BPF_CGROUP_DEVICE:
case BPF_CGROUP_SYSCTL:
@@ -4818,7 +4839,7 @@ static int bpf_task_fd_query(const union bpf_attr *attr,
err = bpf_get_perf_event_info(event, &prog_id, &fd_type,
&buf, &probe_offset,
- &probe_addr);
+ &probe_addr, NULL);
if (!err)
err = bpf_task_fd_query_copy(attr, uattr, prog_id,
fd_type, buf,
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index c4ab9d6cdbe9..fef17628341f 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -7,7 +7,9 @@
#include <linux/fs.h>
#include <linux/fdtable.h>
#include <linux/filter.h>
+#include <linux/bpf_mem_alloc.h>
#include <linux/btf_ids.h>
+#include <linux/mm_types.h>
#include "mmap_unlock_work.h"
static const char * const iter_task_type_names[] = {
@@ -35,16 +37,13 @@ static struct task_struct *task_group_seq_get_next(struct bpf_iter_seq_task_comm
u32 *tid,
bool skip_if_dup_files)
{
- struct task_struct *task, *next_task;
+ struct task_struct *task;
struct pid *pid;
- u32 saved_tid;
+ u32 next_tid;
if (!*tid) {
/* The first time, the iterator calls this function. */
pid = find_pid_ns(common->pid, common->ns);
- if (!pid)
- return NULL;
-
task = get_pid_task(pid, PIDTYPE_TGID);
if (!task)
return NULL;
@@ -66,44 +65,27 @@ static struct task_struct *task_group_seq_get_next(struct bpf_iter_seq_task_comm
return task;
}
- pid = find_pid_ns(common->pid_visiting, common->ns);
- if (!pid)
- return NULL;
-
- task = get_pid_task(pid, PIDTYPE_PID);
+ task = find_task_by_pid_ns(common->pid_visiting, common->ns);
if (!task)
return NULL;
retry:
- if (!pid_alive(task)) {
- put_task_struct(task);
- return NULL;
- }
-
- next_task = next_thread(task);
- put_task_struct(task);
- if (!next_task)
- return NULL;
+ task = next_thread(task);
- saved_tid = *tid;
- *tid = __task_pid_nr_ns(next_task, PIDTYPE_PID, common->ns);
- if (!*tid || *tid == common->pid) {
+ next_tid = __task_pid_nr_ns(task, PIDTYPE_PID, common->ns);
+ if (!next_tid || next_tid == common->pid) {
/* Run out of tasks of a process. The tasks of a
* thread_group are linked as circular linked list.
*/
- *tid = saved_tid;
return NULL;
}
- get_task_struct(next_task);
- common->pid_visiting = *tid;
-
- if (skip_if_dup_files && task->files == task->group_leader->files) {
- task = next_task;
+ if (skip_if_dup_files && task->files == task->group_leader->files)
goto retry;
- }
- return next_task;
+ *tid = common->pid_visiting = next_tid;
+ get_task_struct(task);
+ return task;
}
static struct task_struct *task_seq_get_next(struct bpf_iter_seq_task_common *common,
@@ -823,6 +805,95 @@ const struct bpf_func_proto bpf_find_vma_proto = {
.arg5_type = ARG_ANYTHING,
};
+struct bpf_iter_task_vma_kern_data {
+ struct task_struct *task;
+ struct mm_struct *mm;
+ struct mmap_unlock_irq_work *work;
+ struct vma_iterator vmi;
+};
+
+struct bpf_iter_task_vma {
+ /* opaque iterator state; having __u64 here allows to preserve correct
+ * alignment requirements in vmlinux.h, generated from BTF
+ */
+ __u64 __opaque[1];
+} __attribute__((aligned(8)));
+
+/* Non-opaque version of bpf_iter_task_vma */
+struct bpf_iter_task_vma_kern {
+ struct bpf_iter_task_vma_kern_data *data;
+} __attribute__((aligned(8)));
+
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+ "Global functions as their definitions will be in vmlinux BTF");
+
+__bpf_kfunc int bpf_iter_task_vma_new(struct bpf_iter_task_vma *it,
+ struct task_struct *task, u64 addr)
+{
+ struct bpf_iter_task_vma_kern *kit = (void *)it;
+ bool irq_work_busy = false;
+ int err;
+
+ BUILD_BUG_ON(sizeof(struct bpf_iter_task_vma_kern) != sizeof(struct bpf_iter_task_vma));
+ BUILD_BUG_ON(__alignof__(struct bpf_iter_task_vma_kern) != __alignof__(struct bpf_iter_task_vma));
+
+ /* is_iter_reg_valid_uninit guarantees that kit hasn't been initialized
+ * before, so non-NULL kit->data doesn't point to previously
+ * bpf_mem_alloc'd bpf_iter_task_vma_kern_data
+ */
+ kit->data = bpf_mem_alloc(&bpf_global_ma, sizeof(struct bpf_iter_task_vma_kern_data));
+ if (!kit->data)
+ return -ENOMEM;
+
+ kit->data->task = get_task_struct(task);
+ kit->data->mm = task->mm;
+ if (!kit->data->mm) {
+ err = -ENOENT;
+ goto err_cleanup_iter;
+ }
+
+ /* kit->data->work == NULL is valid after bpf_mmap_unlock_get_irq_work */
+ irq_work_busy = bpf_mmap_unlock_get_irq_work(&kit->data->work);
+ if (irq_work_busy || !mmap_read_trylock(kit->data->mm)) {
+ err = -EBUSY;
+ goto err_cleanup_iter;
+ }
+
+ vma_iter_init(&kit->data->vmi, kit->data->mm, addr);
+ return 0;
+
+err_cleanup_iter:
+ if (kit->data->task)
+ put_task_struct(kit->data->task);
+ bpf_mem_free(&bpf_global_ma, kit->data);
+ /* NULL kit->data signals failed bpf_iter_task_vma initialization */
+ kit->data = NULL;
+ return err;
+}
+
+__bpf_kfunc struct vm_area_struct *bpf_iter_task_vma_next(struct bpf_iter_task_vma *it)
+{
+ struct bpf_iter_task_vma_kern *kit = (void *)it;
+
+ if (!kit->data) /* bpf_iter_task_vma_new failed */
+ return NULL;
+ return vma_next(&kit->data->vmi);
+}
+
+__bpf_kfunc void bpf_iter_task_vma_destroy(struct bpf_iter_task_vma *it)
+{
+ struct bpf_iter_task_vma_kern *kit = (void *)it;
+
+ if (kit->data) {
+ bpf_mmap_unlock_mm(kit->data->work, kit->data->mm);
+ put_task_struct(kit->data->task);
+ bpf_mem_free(&bpf_global_ma, kit->data);
+ }
+}
+
+__diag_pop();
+
DEFINE_PER_CPU(struct mmap_unlock_irq_work, mmap_unlock_work);
static void do_mmap_read_unlock(struct irq_work *entry)
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 53ff50cac61e..e97aeda3a86b 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -415,8 +415,8 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
goto out;
}
- /* clear all bits except SHARE_IPMODIFY */
- tr->flags &= BPF_TRAMP_F_SHARE_IPMODIFY;
+ /* clear all bits except SHARE_IPMODIFY and TAIL_CALL_CTX */
+ tr->flags &= (BPF_TRAMP_F_SHARE_IPMODIFY | BPF_TRAMP_F_TAIL_CALL_CTX);
if (tlinks[BPF_TRAMP_FEXIT].nr_links ||
tlinks[BPF_TRAMP_MODIFY_RETURN].nr_links) {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 873ade146f3d..bb58987e4844 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -304,7 +304,7 @@ struct bpf_kfunc_call_arg_meta {
/* arg_{btf,btf_id,owning_ref} are used by kfunc-specific handling,
* generally to pass info about user-defined local kptr types to later
* verification logic
- * bpf_obj_drop
+ * bpf_obj_drop/bpf_percpu_obj_drop
* Record the local kptr type to be drop'd
* bpf_refcount_acquire (via KF_ARG_PTR_TO_REFCOUNTED_KPTR arg type)
* Record the local kptr type to be refcount_incr'd and use
@@ -543,6 +543,7 @@ static bool is_dynptr_ref_function(enum bpf_func_id func_id)
}
static bool is_callback_calling_kfunc(u32 btf_id);
+static bool is_bpf_throw_kfunc(struct bpf_insn *insn);
static bool is_callback_calling_function(enum bpf_func_id func_id)
{
@@ -1341,6 +1342,50 @@ static void scrub_spilled_slot(u8 *stype)
*stype = STACK_MISC;
}
+static void print_scalar_ranges(struct bpf_verifier_env *env,
+ const struct bpf_reg_state *reg,
+ const char **sep)
+{
+ struct {
+ const char *name;
+ u64 val;
+ bool omit;
+ } minmaxs[] = {
+ {"smin", reg->smin_value, reg->smin_value == S64_MIN},
+ {"smax", reg->smax_value, reg->smax_value == S64_MAX},
+ {"umin", reg->umin_value, reg->umin_value == 0},
+ {"umax", reg->umax_value, reg->umax_value == U64_MAX},
+ {"smin32", (s64)reg->s32_min_value, reg->s32_min_value == S32_MIN},
+ {"smax32", (s64)reg->s32_max_value, reg->s32_max_value == S32_MAX},
+ {"umin32", reg->u32_min_value, reg->u32_min_value == 0},
+ {"umax32", reg->u32_max_value, reg->u32_max_value == U32_MAX},
+ }, *m1, *m2, *mend = &minmaxs[ARRAY_SIZE(minmaxs)];
+ bool neg1, neg2;
+
+ for (m1 = &minmaxs[0]; m1 < mend; m1++) {
+ if (m1->omit)
+ continue;
+
+ neg1 = m1->name[0] == 's' && (s64)m1->val < 0;
+
+ verbose(env, "%s%s=", *sep, m1->name);
+ *sep = ",";
+
+ for (m2 = m1 + 2; m2 < mend; m2 += 2) {
+ if (m2->omit || m2->val != m1->val)
+ continue;
+ /* don't mix negatives with positives */
+ neg2 = m2->name[0] == 's' && (s64)m2->val < 0;
+ if (neg2 != neg1)
+ continue;
+ m2->omit = true;
+ verbose(env, "%s=", m2->name);
+ }
+
+ verbose(env, m1->name[0] == 's' ? "%lld" : "%llu", m1->val);
+ }
+}
+
static void print_verifier_state(struct bpf_verifier_env *env,
const struct bpf_func_state *state,
bool print_all)
@@ -1404,34 +1449,13 @@ static void print_verifier_state(struct bpf_verifier_env *env,
*/
verbose_a("imm=%llx", reg->var_off.value);
} else {
- if (reg->smin_value != reg->umin_value &&
- reg->smin_value != S64_MIN)
- verbose_a("smin=%lld", (long long)reg->smin_value);
- if (reg->smax_value != reg->umax_value &&
- reg->smax_value != S64_MAX)
- verbose_a("smax=%lld", (long long)reg->smax_value);
- if (reg->umin_value != 0)
- verbose_a("umin=%llu", (unsigned long long)reg->umin_value);
- if (reg->umax_value != U64_MAX)
- verbose_a("umax=%llu", (unsigned long long)reg->umax_value);
+ print_scalar_ranges(env, reg, &sep);
if (!tnum_is_unknown(reg->var_off)) {
char tn_buf[48];
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
verbose_a("var_off=%s", tn_buf);
}
- if (reg->s32_min_value != reg->smin_value &&
- reg->s32_min_value != S32_MIN)
- verbose_a("s32_min=%d", (int)(reg->s32_min_value));
- if (reg->s32_max_value != reg->smax_value &&
- reg->s32_max_value != S32_MAX)
- verbose_a("s32_max=%d", (int)(reg->s32_max_value));
- if (reg->u32_min_value != reg->umin_value &&
- reg->u32_min_value != U32_MIN)
- verbose_a("u32_min=%d", (int)(reg->u32_min_value));
- if (reg->u32_max_value != reg->umax_value &&
- reg->u32_max_value != U32_MAX)
- verbose_a("u32_max=%d", (int)(reg->u32_max_value));
}
#undef verbose_a
@@ -1515,7 +1539,8 @@ static void print_verifier_state(struct bpf_verifier_env *env,
if (state->in_async_callback_fn)
verbose(env, " async_cb");
verbose(env, "\n");
- mark_verifier_state_clean(env);
+ if (!print_all)
+ mark_verifier_state_clean(env);
}
static inline u32 vlog_alignment(u32 pos)
@@ -1748,7 +1773,9 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
return -ENOMEM;
dst_state->jmp_history_cnt = src->jmp_history_cnt;
- /* if dst has more stack frames then src frame, free them */
+ /* if dst has more stack frames then src frame, free them, this is also
+ * necessary in case of exceptional exits using bpf_throw.
+ */
for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
free_func_state(dst_state->frame[i]);
dst_state->frame[i] = NULL;
@@ -2454,6 +2481,68 @@ static int add_subprog(struct bpf_verifier_env *env, int off)
return env->subprog_cnt - 1;
}
+static int bpf_find_exception_callback_insn_off(struct bpf_verifier_env *env)
+{
+ struct bpf_prog_aux *aux = env->prog->aux;
+ struct btf *btf = aux->btf;
+ const struct btf_type *t;
+ u32 main_btf_id, id;
+ const char *name;
+ int ret, i;
+
+ /* Non-zero func_info_cnt implies valid btf */
+ if (!aux->func_info_cnt)
+ return 0;
+ main_btf_id = aux->func_info[0].type_id;
+
+ t = btf_type_by_id(btf, main_btf_id);
+ if (!t) {
+ verbose(env, "invalid btf id for main subprog in func_info\n");
+ return -EINVAL;
+ }
+
+ name = btf_find_decl_tag_value(btf, t, -1, "exception_callback:");
+ if (IS_ERR(name)) {
+ ret = PTR_ERR(name);
+ /* If there is no tag present, there is no exception callback */
+ if (ret == -ENOENT)
+ ret = 0;
+ else if (ret == -EEXIST)
+ verbose(env, "multiple exception callback tags for main subprog\n");
+ return ret;
+ }
+
+ ret = btf_find_by_name_kind(btf, name, BTF_KIND_FUNC);
+ if (ret < 0) {
+ verbose(env, "exception callback '%s' could not be found in BTF\n", name);
+ return ret;
+ }
+ id = ret;
+ t = btf_type_by_id(btf, id);
+ if (btf_func_linkage(t) != BTF_FUNC_GLOBAL) {
+ verbose(env, "exception callback '%s' must have global linkage\n", name);
+ return -EINVAL;
+ }
+ ret = 0;
+ for (i = 0; i < aux->func_info_cnt; i++) {
+ if (aux->func_info[i].type_id != id)
+ continue;
+ ret = aux->func_info[i].insn_off;
+ /* Further func_info and subprog checks will also happen
+ * later, so assume this is the right insn_off for now.
+ */
+ if (!ret) {
+ verbose(env, "invalid exception callback insn_off in func_info: 0\n");
+ ret = -EINVAL;
+ }
+ }
+ if (!ret) {
+ verbose(env, "exception callback type id not found in func_info\n");
+ ret = -EINVAL;
+ }
+ return ret;
+}
+
#define MAX_KFUNC_DESCS 256
#define MAX_KFUNC_BTFS 256
@@ -2793,8 +2882,8 @@ bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
{
struct bpf_subprog_info *subprog = env->subprog_info;
+ int i, ret, insn_cnt = env->prog->len, ex_cb_insn;
struct bpf_insn *insn = env->prog->insnsi;
- int i, ret, insn_cnt = env->prog->len;
/* Add entry function. */
ret = add_subprog(env, 0);
@@ -2820,6 +2909,26 @@ static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
return ret;
}
+ ret = bpf_find_exception_callback_insn_off(env);
+ if (ret < 0)
+ return ret;
+ ex_cb_insn = ret;
+
+ /* If ex_cb_insn > 0, this means that the main program has a subprog
+ * marked using BTF decl tag to serve as the exception callback.
+ */
+ if (ex_cb_insn) {
+ ret = add_subprog(env, ex_cb_insn);
+ if (ret < 0)
+ return ret;
+ for (i = 1; i < env->subprog_cnt; i++) {
+ if (env->subprog_info[i].start != ex_cb_insn)
+ continue;
+ env->exception_callback_subprog = i;
+ break;
+ }
+ }
+
/* Add a fake 'exit' subprog which could simplify subprog iteration
* logic. 'subprog_cnt' should not be increased.
*/
@@ -2868,7 +2977,7 @@ next:
if (i == subprog_end - 1) {
/* to avoid fall-through from one subprog into another
* the last insn of the subprog should be either exit
- * or unconditional jump back
+ * or unconditional jump back or bpf_throw call
*/
if (code != (BPF_JMP | BPF_EXIT) &&
code != (BPF_JMP32 | BPF_JA) &&
@@ -3029,7 +3138,7 @@ static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
if (class == BPF_LDX) {
if (t != SRC_OP)
- return BPF_SIZE(code) == BPF_DW;
+ return BPF_SIZE(code) == BPF_DW || BPF_MODE(code) == BPF_MEMSX;
/* LDX source must be ptr. */
return true;
}
@@ -4999,6 +5108,8 @@ static int map_kptr_match_type(struct bpf_verifier_env *env,
perm_flags |= PTR_UNTRUSTED;
} else {
perm_flags = PTR_MAYBE_NULL | MEM_ALLOC;
+ if (kptr_field->type == BPF_KPTR_PERCPU)
+ perm_flags |= MEM_PERCPU;
}
if (base_type(reg->type) != PTR_TO_BTF_ID || (type_flag(reg->type) & ~perm_flags))
@@ -5042,7 +5153,7 @@ static int map_kptr_match_type(struct bpf_verifier_env *env,
*/
if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
kptr_field->kptr.btf, kptr_field->kptr.btf_id,
- kptr_field->type == BPF_KPTR_REF))
+ kptr_field->type != BPF_KPTR_UNREF))
goto bad_type;
return 0;
bad_type:
@@ -5086,7 +5197,18 @@ static bool rcu_safe_kptr(const struct btf_field *field)
{
const struct btf_field_kptr *kptr = &field->kptr;
- return field->type == BPF_KPTR_REF && rcu_protected_object(kptr->btf, kptr->btf_id);
+ return field->type == BPF_KPTR_PERCPU ||
+ (field->type == BPF_KPTR_REF && rcu_protected_object(kptr->btf, kptr->btf_id));
+}
+
+static u32 btf_ld_kptr_type(struct bpf_verifier_env *env, struct btf_field *kptr_field)
+{
+ if (rcu_safe_kptr(kptr_field) && in_rcu_cs(env)) {
+ if (kptr_field->type != BPF_KPTR_PERCPU)
+ return PTR_MAYBE_NULL | MEM_RCU;
+ return PTR_MAYBE_NULL | MEM_RCU | MEM_PERCPU;
+ }
+ return PTR_MAYBE_NULL | PTR_UNTRUSTED;
}
static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
@@ -5112,7 +5234,8 @@ static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
/* We only allow loading referenced kptr, since it will be marked as
* untrusted, similar to unreferenced kptr.
*/
- if (class != BPF_LDX && kptr_field->type == BPF_KPTR_REF) {
+ if (class != BPF_LDX &&
+ (kptr_field->type == BPF_KPTR_REF || kptr_field->type == BPF_KPTR_PERCPU)) {
verbose(env, "store to referenced kptr disallowed\n");
return -EACCES;
}
@@ -5123,10 +5246,7 @@ static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
* value from map as PTR_TO_BTF_ID, with the correct type.
*/
mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID, kptr_field->kptr.btf,
- kptr_field->kptr.btf_id,
- rcu_safe_kptr(kptr_field) && in_rcu_cs(env) ?
- PTR_MAYBE_NULL | MEM_RCU :
- PTR_MAYBE_NULL | PTR_UNTRUSTED);
+ kptr_field->kptr.btf_id, btf_ld_kptr_type(env, kptr_field));
/* For mark_ptr_or_null_reg */
val_reg->id = ++env->id_gen;
} else if (class == BPF_STX) {
@@ -5180,6 +5300,7 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
switch (field->type) {
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
+ case BPF_KPTR_PERCPU:
if (src != ACCESS_DIRECT) {
verbose(env, "kptr cannot be accessed indirectly by helper\n");
return -EACCES;
@@ -5647,6 +5768,27 @@ continue_func:
for (; i < subprog_end; i++) {
int next_insn, sidx;
+ if (bpf_pseudo_kfunc_call(insn + i) && !insn[i].off) {
+ bool err = false;
+
+ if (!is_bpf_throw_kfunc(insn + i))
+ continue;
+ if (subprog[idx].is_cb)
+ err = true;
+ for (int c = 0; c < frame && !err; c++) {
+ if (subprog[ret_prog[c]].is_cb) {
+ err = true;
+ break;
+ }
+ }
+ if (!err)
+ continue;
+ verbose(env,
+ "bpf_throw kfunc (insn %d) cannot be called from callback subprog %d\n",
+ i, idx);
+ return -EINVAL;
+ }
+
if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
continue;
/* remember insn and function to return to */
@@ -5669,6 +5811,10 @@ continue_func:
/* async callbacks don't increase bpf prog stack size unless called directly */
if (!bpf_pseudo_call(insn + i))
continue;
+ if (subprog[sidx].is_exception_cb) {
+ verbose(env, "insn %d cannot call exception cb directly\n", i);
+ return -EINVAL;
+ }
}
i = next_insn;
idx = sidx;
@@ -5690,8 +5836,13 @@ continue_func:
* tail call counter throughout bpf2bpf calls combined with tailcalls
*/
if (tail_call_reachable)
- for (j = 0; j < frame; j++)
+ for (j = 0; j < frame; j++) {
+ if (subprog[ret_prog[j]].is_exception_cb) {
+ verbose(env, "cannot tail call within exception cb\n");
+ return -EINVAL;
+ }
subprog[ret_prog[j]].tail_call_reachable = true;
+ }
if (subprog[0].tail_call_reachable)
env->prog->aux->tail_call_reachable = true;
@@ -6207,7 +6358,7 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
}
if (type_is_alloc(reg->type) && !type_is_non_owning_ref(reg->type) &&
- !reg->ref_obj_id) {
+ !(reg->type & MEM_RCU) && !reg->ref_obj_id) {
verbose(env, "verifier internal error: ref_obj_id for allocated object must be non-zero\n");
return -EFAULT;
}
@@ -7318,7 +7469,7 @@ static int process_kptr_func(struct bpf_verifier_env *env, int regno,
verbose(env, "off=%d doesn't point to kptr\n", kptr_off);
return -EACCES;
}
- if (kptr_field->type != BPF_KPTR_REF) {
+ if (kptr_field->type != BPF_KPTR_REF && kptr_field->type != BPF_KPTR_PERCPU) {
verbose(env, "off=%d kptr isn't referenced kptr\n", kptr_off);
return -EACCES;
}
@@ -7751,6 +7902,7 @@ static const struct bpf_reg_types btf_ptr_types = {
static const struct bpf_reg_types percpu_btf_ptr_types = {
.types = {
PTR_TO_BTF_ID | MEM_PERCPU,
+ PTR_TO_BTF_ID | MEM_PERCPU | MEM_RCU,
PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED,
}
};
@@ -7829,8 +7981,10 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
if (base_type(arg_type) == ARG_PTR_TO_MEM)
type &= ~DYNPTR_TYPE_FLAG_MASK;
- if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type))
+ if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type)) {
type &= ~MEM_ALLOC;
+ type &= ~MEM_PERCPU;
+ }
for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
expected = compatible->types[i];
@@ -7913,6 +8067,7 @@ found:
break;
}
case PTR_TO_BTF_ID | MEM_ALLOC:
+ case PTR_TO_BTF_ID | MEM_PERCPU | MEM_ALLOC:
if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock &&
meta->func_id != BPF_FUNC_kptr_xchg) {
verbose(env, "verifier internal error: unimplemented handling of MEM_ALLOC\n");
@@ -7924,6 +8079,7 @@ found:
}
break;
case PTR_TO_BTF_ID | MEM_PERCPU:
+ case PTR_TO_BTF_ID | MEM_PERCPU | MEM_RCU:
case PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED:
/* Handled by helper specific checks */
break;
@@ -8900,6 +9056,7 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn
* callbacks
*/
if (set_callee_state_cb != set_callee_state) {
+ env->subprog_info[subprog].is_cb = true;
if (bpf_pseudo_kfunc_call(insn) &&
!is_callback_calling_kfunc(insn->imm)) {
verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n",
@@ -9289,7 +9446,8 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
verbose(env, "to caller at %d:\n", *insn_idx);
print_verifier_state(env, caller, true);
}
- /* clear everything in the callee */
+ /* clear everything in the callee. In case of exceptional exits using
+ * bpf_throw, this will be done by copy_verifier_state for extra frames. */
free_func_state(callee);
state->frame[state->curframe--] = NULL;
return 0;
@@ -9413,17 +9571,17 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
return 0;
}
-static int check_reference_leak(struct bpf_verifier_env *env)
+static int check_reference_leak(struct bpf_verifier_env *env, bool exception_exit)
{
struct bpf_func_state *state = cur_func(env);
bool refs_lingering = false;
int i;
- if (state->frameno && !state->in_callback_fn)
+ if (!exception_exit && state->frameno && !state->in_callback_fn)
return 0;
for (i = 0; i < state->acquired_refs; i++) {
- if (state->in_callback_fn && state->refs[i].callback_ref != state->frameno)
+ if (!exception_exit && state->in_callback_fn && state->refs[i].callback_ref != state->frameno)
continue;
verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
state->refs[i].id, state->refs[i].insn_idx);
@@ -9530,6 +9688,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
int *insn_idx_p)
{
enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
+ bool returns_cpu_specific_alloc_ptr = false;
const struct bpf_func_proto *fn = NULL;
enum bpf_return_type ret_type;
enum bpf_type_flag ret_flag;
@@ -9640,6 +9799,26 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
return -EFAULT;
}
err = unmark_stack_slots_dynptr(env, &regs[meta.release_regno]);
+ } else if (func_id == BPF_FUNC_kptr_xchg && meta.ref_obj_id) {
+ u32 ref_obj_id = meta.ref_obj_id;
+ bool in_rcu = in_rcu_cs(env);
+ struct bpf_func_state *state;
+ struct bpf_reg_state *reg;
+
+ err = release_reference_state(cur_func(env), ref_obj_id);
+ if (!err) {
+ bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
+ if (reg->ref_obj_id == ref_obj_id) {
+ if (in_rcu && (reg->type & MEM_ALLOC) && (reg->type & MEM_PERCPU)) {
+ reg->ref_obj_id = 0;
+ reg->type &= ~MEM_ALLOC;
+ reg->type |= MEM_RCU;
+ } else {
+ mark_reg_invalid(env, reg);
+ }
+ }
+ }));
+ }
} else if (meta.ref_obj_id) {
err = release_reference(env, meta.ref_obj_id);
} else if (register_is_null(&regs[meta.release_regno])) {
@@ -9657,7 +9836,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
switch (func_id) {
case BPF_FUNC_tail_call:
- err = check_reference_leak(env);
+ err = check_reference_leak(env, false);
if (err) {
verbose(env, "tail_call would lead to reference leak\n");
return err;
@@ -9768,6 +9947,23 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
break;
}
+ case BPF_FUNC_per_cpu_ptr:
+ case BPF_FUNC_this_cpu_ptr:
+ {
+ struct bpf_reg_state *reg = &regs[BPF_REG_1];
+ const struct btf_type *type;
+
+ if (reg->type & MEM_RCU) {
+ type = btf_type_by_id(reg->btf, reg->btf_id);
+ if (!type || !btf_type_is_struct(type)) {
+ verbose(env, "Helper has invalid btf/btf_id in R1\n");
+ return -EFAULT;
+ }
+ returns_cpu_specific_alloc_ptr = true;
+ env->insn_aux_data[insn_idx].call_with_percpu_alloc_ptr = true;
+ }
+ break;
+ }
case BPF_FUNC_user_ringbuf_drain:
err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
set_user_ringbuf_callback_state);
@@ -9857,14 +10053,18 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
regs[BPF_REG_0].mem_size = tsize;
} else {
- /* MEM_RDONLY may be carried from ret_flag, but it
- * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise
- * it will confuse the check of PTR_TO_BTF_ID in
- * check_mem_access().
- */
- ret_flag &= ~MEM_RDONLY;
+ if (returns_cpu_specific_alloc_ptr) {
+ regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC | MEM_RCU;
+ } else {
+ /* MEM_RDONLY may be carried from ret_flag, but it
+ * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise
+ * it will confuse the check of PTR_TO_BTF_ID in
+ * check_mem_access().
+ */
+ ret_flag &= ~MEM_RDONLY;
+ regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
+ }
- regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
regs[BPF_REG_0].btf = meta.ret_btf;
regs[BPF_REG_0].btf_id = meta.ret_btf_id;
}
@@ -9880,8 +10080,11 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
if (func_id == BPF_FUNC_kptr_xchg) {
ret_btf = meta.kptr_field->kptr.btf;
ret_btf_id = meta.kptr_field->kptr.btf_id;
- if (!btf_is_kernel(ret_btf))
+ if (!btf_is_kernel(ret_btf)) {
regs[BPF_REG_0].type |= MEM_ALLOC;
+ if (meta.kptr_field->type == BPF_KPTR_PERCPU)
+ regs[BPF_REG_0].type |= MEM_PERCPU;
+ }
} else {
if (fn->ret_btf_id == BPF_PTR_POISON) {
verbose(env, "verifier internal error:");
@@ -10266,6 +10469,9 @@ enum special_kfunc_type {
KF_bpf_dynptr_slice,
KF_bpf_dynptr_slice_rdwr,
KF_bpf_dynptr_clone,
+ KF_bpf_percpu_obj_new_impl,
+ KF_bpf_percpu_obj_drop_impl,
+ KF_bpf_throw,
};
BTF_SET_START(special_kfunc_set)
@@ -10286,6 +10492,9 @@ BTF_ID(func, bpf_dynptr_from_xdp)
BTF_ID(func, bpf_dynptr_slice)
BTF_ID(func, bpf_dynptr_slice_rdwr)
BTF_ID(func, bpf_dynptr_clone)
+BTF_ID(func, bpf_percpu_obj_new_impl)
+BTF_ID(func, bpf_percpu_obj_drop_impl)
+BTF_ID(func, bpf_throw)
BTF_SET_END(special_kfunc_set)
BTF_ID_LIST(special_kfunc_list)
@@ -10308,6 +10517,9 @@ BTF_ID(func, bpf_dynptr_from_xdp)
BTF_ID(func, bpf_dynptr_slice)
BTF_ID(func, bpf_dynptr_slice_rdwr)
BTF_ID(func, bpf_dynptr_clone)
+BTF_ID(func, bpf_percpu_obj_new_impl)
+BTF_ID(func, bpf_percpu_obj_drop_impl)
+BTF_ID(func, bpf_throw)
static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
{
@@ -10625,6 +10837,12 @@ static bool is_callback_calling_kfunc(u32 btf_id)
return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl];
}
+static bool is_bpf_throw_kfunc(struct bpf_insn *insn)
+{
+ return bpf_pseudo_kfunc_call(insn) && insn->off == 0 &&
+ insn->imm == special_kfunc_list[KF_bpf_throw];
+}
+
static bool is_rbtree_lock_required_kfunc(u32 btf_id)
{
return is_bpf_rbtree_api_kfunc(btf_id);
@@ -11002,7 +11220,17 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
}
break;
case KF_ARG_PTR_TO_ALLOC_BTF_ID:
- if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
+ if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC)) {
+ if (meta->func_id != special_kfunc_list[KF_bpf_obj_drop_impl]) {
+ verbose(env, "arg#%d expected for bpf_obj_drop_impl()\n", i);
+ return -EINVAL;
+ }
+ } else if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC | MEM_PERCPU)) {
+ if (meta->func_id != special_kfunc_list[KF_bpf_percpu_obj_drop_impl]) {
+ verbose(env, "arg#%d expected for bpf_percpu_obj_drop_impl()\n", i);
+ return -EINVAL;
+ }
+ } else {
verbose(env, "arg#%d expected pointer to allocated object\n", i);
return -EINVAL;
}
@@ -11010,8 +11238,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
verbose(env, "allocated object must be referenced\n");
return -EINVAL;
}
- if (meta->btf == btf_vmlinux &&
- meta->func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
+ if (meta->btf == btf_vmlinux) {
meta->arg_btf = reg->btf;
meta->arg_btf_id = reg->btf_id;
}
@@ -11202,6 +11429,10 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
break;
}
case KF_ARG_PTR_TO_CALLBACK:
+ if (reg->type != PTR_TO_FUNC) {
+ verbose(env, "arg%d expected pointer to func\n", i);
+ return -EINVAL;
+ }
meta->subprogno = reg->subprogno;
break;
case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
@@ -11280,6 +11511,8 @@ static int fetch_kfunc_meta(struct bpf_verifier_env *env,
return 0;
}
+static int check_return_code(struct bpf_verifier_env *env, int regno);
+
static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx_p)
{
@@ -11401,6 +11634,24 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
}
}
+ if (meta.func_id == special_kfunc_list[KF_bpf_throw]) {
+ if (!bpf_jit_supports_exceptions()) {
+ verbose(env, "JIT does not support calling kfunc %s#%d\n",
+ func_name, meta.func_id);
+ return -ENOTSUPP;
+ }
+ env->seen_exception = true;
+
+ /* In the case of the default callback, the cookie value passed
+ * to bpf_throw becomes the return value of the program.
+ */
+ if (!env->exception_callback_subprog) {
+ err = check_return_code(env, BPF_REG_1);
+ if (err < 0)
+ return err;
+ }
+ }
+
for (i = 0; i < CALLER_SAVED_REGS; i++)
mark_reg_not_init(env, regs, caller_saved[i]);
@@ -11411,6 +11662,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
/* Only exception is bpf_obj_new_impl */
if (meta.btf != btf_vmlinux ||
(meta.func_id != special_kfunc_list[KF_bpf_obj_new_impl] &&
+ meta.func_id != special_kfunc_list[KF_bpf_percpu_obj_new_impl] &&
meta.func_id != special_kfunc_list[KF_bpf_refcount_acquire_impl])) {
verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
return -EINVAL;
@@ -11424,11 +11676,16 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id);
if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
- if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl]) {
+ if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] ||
+ meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
+ struct btf_struct_meta *struct_meta;
struct btf *ret_btf;
u32 ret_btf_id;
- if (unlikely(!bpf_global_ma_set))
+ if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] && !bpf_global_ma_set)
+ return -ENOMEM;
+
+ if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl] && !bpf_global_percpu_ma_set)
return -ENOMEM;
if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) {
@@ -11441,24 +11698,38 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
/* This may be NULL due to user not supplying a BTF */
if (!ret_btf) {
- verbose(env, "bpf_obj_new requires prog BTF\n");
+ verbose(env, "bpf_obj_new/bpf_percpu_obj_new requires prog BTF\n");
return -EINVAL;
}
ret_t = btf_type_by_id(ret_btf, ret_btf_id);
if (!ret_t || !__btf_type_is_struct(ret_t)) {
- verbose(env, "bpf_obj_new type ID argument must be of a struct\n");
+ verbose(env, "bpf_obj_new/bpf_percpu_obj_new type ID argument must be of a struct\n");
return -EINVAL;
}
+ struct_meta = btf_find_struct_meta(ret_btf, ret_btf_id);
+ if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
+ if (!__btf_type_is_scalar_struct(env, ret_btf, ret_t, 0)) {
+ verbose(env, "bpf_percpu_obj_new type ID argument must be of a struct of scalars\n");
+ return -EINVAL;
+ }
+
+ if (struct_meta) {
+ verbose(env, "bpf_percpu_obj_new type ID argument must not contain special fields\n");
+ return -EINVAL;
+ }
+ }
+
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
regs[BPF_REG_0].btf = ret_btf;
regs[BPF_REG_0].btf_id = ret_btf_id;
+ if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl])
+ regs[BPF_REG_0].type |= MEM_PERCPU;
insn_aux->obj_new_size = ret_t->size;
- insn_aux->kptr_struct_meta =
- btf_find_struct_meta(ret_btf, ret_btf_id);
+ insn_aux->kptr_struct_meta = struct_meta;
} else if (meta.func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) {
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
@@ -11595,7 +11866,8 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
regs[BPF_REG_0].id = ++env->id_gen;
} else if (btf_type_is_void(t)) {
if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
- if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
+ if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl] ||
+ meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl]) {
insn_aux->kptr_struct_meta =
btf_find_struct_meta(meta.arg_btf,
meta.arg_btf_id);
@@ -14135,6 +14407,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
!sanitize_speculative_path(env, insn, *insn_idx + 1,
*insn_idx))
return -EFAULT;
+ if (env->log.level & BPF_LOG_LEVEL)
+ print_insn_state(env, this_branch->frame[this_branch->curframe]);
*insn_idx += insn->off;
return 0;
} else if (pred == 0) {
@@ -14147,6 +14421,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
*insn_idx + insn->off + 1,
*insn_idx))
return -EFAULT;
+ if (env->log.level & BPF_LOG_LEVEL)
+ print_insn_state(env, this_branch->frame[this_branch->curframe]);
return 0;
}
@@ -14425,7 +14701,7 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
* gen_ld_abs() may terminate the program at runtime, leading to
* reference leak.
*/
- err = check_reference_leak(env);
+ err = check_reference_leak(env, false);
if (err) {
verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
return err;
@@ -14474,7 +14750,7 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
return 0;
}
-static int check_return_code(struct bpf_verifier_env *env)
+static int check_return_code(struct bpf_verifier_env *env, int regno)
{
struct tnum enforce_attach_type_range = tnum_unknown;
const struct bpf_prog *prog = env->prog;
@@ -14486,7 +14762,7 @@ static int check_return_code(struct bpf_verifier_env *env)
const bool is_subprog = frame->subprogno;
/* LSM and struct_ops func-ptr's return type could be "void" */
- if (!is_subprog) {
+ if (!is_subprog || frame->in_exception_callback_fn) {
switch (prog_type) {
case BPF_PROG_TYPE_LSM:
if (prog->expected_attach_type == BPF_LSM_CGROUP)
@@ -14508,22 +14784,22 @@ static int check_return_code(struct bpf_verifier_env *env)
* of bpf_exit, which means that program wrote
* something into it earlier
*/
- err = check_reg_arg(env, BPF_REG_0, SRC_OP);
+ err = check_reg_arg(env, regno, SRC_OP);
if (err)
return err;
- if (is_pointer_value(env, BPF_REG_0)) {
- verbose(env, "R0 leaks addr as return value\n");
+ if (is_pointer_value(env, regno)) {
+ verbose(env, "R%d leaks addr as return value\n", regno);
return -EACCES;
}
- reg = cur_regs(env) + BPF_REG_0;
+ reg = cur_regs(env) + regno;
if (frame->in_async_callback_fn) {
/* enforce return zero from async callbacks like timer */
if (reg->type != SCALAR_VALUE) {
- verbose(env, "In async callback the register R0 is not a known value (%s)\n",
- reg_type_str(env, reg->type));
+ verbose(env, "In async callback the register R%d is not a known value (%s)\n",
+ regno, reg_type_str(env, reg->type));
return -EINVAL;
}
@@ -14534,10 +14810,10 @@ static int check_return_code(struct bpf_verifier_env *env)
return 0;
}
- if (is_subprog) {
+ if (is_subprog && !frame->in_exception_callback_fn) {
if (reg->type != SCALAR_VALUE) {
- verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
- reg_type_str(env, reg->type));
+ verbose(env, "At subprogram exit the register R%d is not a scalar value (%s)\n",
+ regno, reg_type_str(env, reg->type));
return -EINVAL;
}
return 0;
@@ -14547,10 +14823,13 @@ static int check_return_code(struct bpf_verifier_env *env)
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG ||
+ env->prog->expected_attach_type == BPF_CGROUP_UNIX_RECVMSG ||
env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME ||
env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME ||
+ env->prog->expected_attach_type == BPF_CGROUP_UNIX_GETPEERNAME ||
env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
- env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME)
+ env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME ||
+ env->prog->expected_attach_type == BPF_CGROUP_UNIX_GETSOCKNAME)
range = tnum_range(1, 1);
if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND ||
env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND)
@@ -14619,8 +14898,8 @@ static int check_return_code(struct bpf_verifier_env *env)
}
if (reg->type != SCALAR_VALUE) {
- verbose(env, "At program exit the register R0 is not a known value (%s)\n",
- reg_type_str(env, reg->type));
+ verbose(env, "At program exit the register R%d is not a known value (%s)\n",
+ regno, reg_type_str(env, reg->type));
return -EINVAL;
}
@@ -14891,8 +15170,8 @@ static int check_cfg(struct bpf_verifier_env *env)
{
int insn_cnt = env->prog->len;
int *insn_stack, *insn_state;
- int ret = 0;
- int i;
+ int ex_insn_beg, i, ret = 0;
+ bool ex_done = false;
insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
if (!insn_state)
@@ -14908,6 +15187,7 @@ static int check_cfg(struct bpf_verifier_env *env)
insn_stack[0] = 0; /* 0 is the first instruction */
env->cfg.cur_stack = 1;
+walk_cfg:
while (env->cfg.cur_stack > 0) {
int t = insn_stack[env->cfg.cur_stack - 1];
@@ -14934,6 +15214,16 @@ static int check_cfg(struct bpf_verifier_env *env)
goto err_free;
}
+ if (env->exception_callback_subprog && !ex_done) {
+ ex_insn_beg = env->subprog_info[env->exception_callback_subprog].start;
+
+ insn_state[ex_insn_beg] = DISCOVERED;
+ insn_stack[0] = ex_insn_beg;
+ env->cfg.cur_stack = 1;
+ ex_done = true;
+ goto walk_cfg;
+ }
+
for (i = 0; i < insn_cnt; i++) {
if (insn_state[i] != EXPLORED) {
verbose(env, "unreachable insn %d\n", i);
@@ -14971,20 +15261,18 @@ static int check_abnormal_return(struct bpf_verifier_env *env)
#define MIN_BPF_FUNCINFO_SIZE 8
#define MAX_FUNCINFO_REC_SIZE 252
-static int check_btf_func(struct bpf_verifier_env *env,
- const union bpf_attr *attr,
- bpfptr_t uattr)
+static int check_btf_func_early(struct bpf_verifier_env *env,
+ const union bpf_attr *attr,
+ bpfptr_t uattr)
{
- const struct btf_type *type, *func_proto, *ret_type;
- u32 i, nfuncs, urec_size, min_size;
u32 krec_size = sizeof(struct bpf_func_info);
+ const struct btf_type *type, *func_proto;
+ u32 i, nfuncs, urec_size, min_size;
struct bpf_func_info *krecord;
- struct bpf_func_info_aux *info_aux = NULL;
struct bpf_prog *prog;
const struct btf *btf;
- bpfptr_t urecord;
u32 prev_offset = 0;
- bool scalar_return;
+ bpfptr_t urecord;
int ret = -ENOMEM;
nfuncs = attr->func_info_cnt;
@@ -14994,11 +15282,6 @@ static int check_btf_func(struct bpf_verifier_env *env,
return 0;
}
- if (nfuncs != env->subprog_cnt) {
- verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
- return -EINVAL;
- }
-
urec_size = attr->func_info_rec_size;
if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
urec_size > MAX_FUNCINFO_REC_SIZE ||
@@ -15016,9 +15299,6 @@ static int check_btf_func(struct bpf_verifier_env *env,
krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
if (!krecord)
return -ENOMEM;
- info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN);
- if (!info_aux)
- goto err_free;
for (i = 0; i < nfuncs; i++) {
ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
@@ -15057,11 +15337,6 @@ static int check_btf_func(struct bpf_verifier_env *env,
goto err_free;
}
- if (env->subprog_info[i].start != krecord[i].insn_off) {
- verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
- goto err_free;
- }
-
/* check type_id */
type = btf_type_by_id(btf, krecord[i].type_id);
if (!type || !btf_type_is_func(type)) {
@@ -15069,12 +15344,77 @@ static int check_btf_func(struct bpf_verifier_env *env,
krecord[i].type_id);
goto err_free;
}
- info_aux[i].linkage = BTF_INFO_VLEN(type->info);
func_proto = btf_type_by_id(btf, type->type);
if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto)))
/* btf_func_check() already verified it during BTF load */
goto err_free;
+
+ prev_offset = krecord[i].insn_off;
+ bpfptr_add(&urecord, urec_size);
+ }
+
+ prog->aux->func_info = krecord;
+ prog->aux->func_info_cnt = nfuncs;
+ return 0;
+
+err_free:
+ kvfree(krecord);
+ return ret;
+}
+
+static int check_btf_func(struct bpf_verifier_env *env,
+ const union bpf_attr *attr,
+ bpfptr_t uattr)
+{
+ const struct btf_type *type, *func_proto, *ret_type;
+ u32 i, nfuncs, urec_size;
+ struct bpf_func_info *krecord;
+ struct bpf_func_info_aux *info_aux = NULL;
+ struct bpf_prog *prog;
+ const struct btf *btf;
+ bpfptr_t urecord;
+ bool scalar_return;
+ int ret = -ENOMEM;
+
+ nfuncs = attr->func_info_cnt;
+ if (!nfuncs) {
+ if (check_abnormal_return(env))
+ return -EINVAL;
+ return 0;
+ }
+ if (nfuncs != env->subprog_cnt) {
+ verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
+ return -EINVAL;
+ }
+
+ urec_size = attr->func_info_rec_size;
+
+ prog = env->prog;
+ btf = prog->aux->btf;
+
+ urecord = make_bpfptr(attr->func_info, uattr.is_kernel);
+
+ krecord = prog->aux->func_info;
+ info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN);
+ if (!info_aux)
+ return -ENOMEM;
+
+ for (i = 0; i < nfuncs; i++) {
+ /* check insn_off */
+ ret = -EINVAL;
+
+ if (env->subprog_info[i].start != krecord[i].insn_off) {
+ verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
+ goto err_free;
+ }
+
+ /* Already checked type_id */
+ type = btf_type_by_id(btf, krecord[i].type_id);
+ info_aux[i].linkage = BTF_INFO_VLEN(type->info);
+ /* Already checked func_proto */
+ func_proto = btf_type_by_id(btf, type->type);
+
ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
scalar_return =
btf_type_is_small_int(ret_type) || btf_is_any_enum(ret_type);
@@ -15087,17 +15427,13 @@ static int check_btf_func(struct bpf_verifier_env *env,
goto err_free;
}
- prev_offset = krecord[i].insn_off;
bpfptr_add(&urecord, urec_size);
}
- prog->aux->func_info = krecord;
- prog->aux->func_info_cnt = nfuncs;
prog->aux->func_info_aux = info_aux;
return 0;
err_free:
- kvfree(krecord);
kfree(info_aux);
return ret;
}
@@ -15110,7 +15446,8 @@ static void adjust_btf_func(struct bpf_verifier_env *env)
if (!aux->func_info)
return;
- for (i = 0; i < env->subprog_cnt; i++)
+ /* func_info is not available for hidden subprogs */
+ for (i = 0; i < env->subprog_cnt - env->hidden_subprog_cnt; i++)
aux->func_info[i].insn_off = env->subprog_info[i].start;
}
@@ -15314,9 +15651,9 @@ static int check_core_relo(struct bpf_verifier_env *env,
return err;
}
-static int check_btf_info(struct bpf_verifier_env *env,
- const union bpf_attr *attr,
- bpfptr_t uattr)
+static int check_btf_info_early(struct bpf_verifier_env *env,
+ const union bpf_attr *attr,
+ bpfptr_t uattr)
{
struct btf *btf;
int err;
@@ -15336,6 +15673,24 @@ static int check_btf_info(struct bpf_verifier_env *env,
}
env->prog->aux->btf = btf;
+ err = check_btf_func_early(env, attr, uattr);
+ if (err)
+ return err;
+ return 0;
+}
+
+static int check_btf_info(struct bpf_verifier_env *env,
+ const union bpf_attr *attr,
+ bpfptr_t uattr)
+{
+ int err;
+
+ if (!attr->func_info_cnt && !attr->line_info_cnt) {
+ if (check_abnormal_return(env))
+ return -EINVAL;
+ return 0;
+ }
+
err = check_btf_func(env, attr, uattr);
if (err)
return err;
@@ -16438,6 +16793,7 @@ static int do_check(struct bpf_verifier_env *env)
int prev_insn_idx = -1;
for (;;) {
+ bool exception_exit = false;
struct bpf_insn *insn;
u8 class;
int err;
@@ -16652,12 +17008,17 @@ static int do_check(struct bpf_verifier_env *env)
return -EINVAL;
}
}
- if (insn->src_reg == BPF_PSEUDO_CALL)
+ if (insn->src_reg == BPF_PSEUDO_CALL) {
err = check_func_call(env, insn, &env->insn_idx);
- else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL)
+ } else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
err = check_kfunc_call(env, insn, &env->insn_idx);
- else
+ if (!err && is_bpf_throw_kfunc(insn)) {
+ exception_exit = true;
+ goto process_bpf_exit_full;
+ }
+ } else {
err = check_helper_call(env, insn, &env->insn_idx);
+ }
if (err)
return err;
@@ -16687,7 +17048,7 @@ static int do_check(struct bpf_verifier_env *env)
verbose(env, "BPF_EXIT uses reserved fields\n");
return -EINVAL;
}
-
+process_bpf_exit_full:
if (env->cur_state->active_lock.ptr &&
!in_rbtree_lock_required_cb(env)) {
verbose(env, "bpf_spin_unlock is missing\n");
@@ -16706,10 +17067,23 @@ static int do_check(struct bpf_verifier_env *env)
* function, for which reference_state must
* match caller reference state when it exits.
*/
- err = check_reference_leak(env);
+ err = check_reference_leak(env, exception_exit);
if (err)
return err;
+ /* The side effect of the prepare_func_exit
+ * which is being skipped is that it frees
+ * bpf_func_state. Typically, process_bpf_exit
+ * will only be hit with outermost exit.
+ * copy_verifier_state in pop_stack will handle
+ * freeing of any extra bpf_func_state left over
+ * from not processing all nested function
+ * exits. We also skip return code checks as
+ * they are not needed for exceptional exits.
+ */
+ if (exception_exit)
+ goto process_bpf_exit;
+
if (state->curframe) {
/* exit from nested function */
err = prepare_func_exit(env, &env->insn_idx);
@@ -16719,7 +17093,7 @@ static int do_check(struct bpf_verifier_env *env)
continue;
}
- err = check_return_code(env);
+ err = check_return_code(env, BPF_REG_0);
if (err)
return err;
process_bpf_exit:
@@ -18012,6 +18386,9 @@ static int jit_subprogs(struct bpf_verifier_env *env)
}
func[i]->aux->num_exentries = num_exentries;
func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
+ func[i]->aux->exception_cb = env->subprog_info[i].is_exception_cb;
+ if (!i)
+ func[i]->aux->exception_boundary = env->seen_exception;
func[i] = bpf_int_jit_compile(func[i]);
if (!func[i]->jited) {
err = -ENOTSUPP;
@@ -18051,7 +18428,8 @@ static int jit_subprogs(struct bpf_verifier_env *env)
* the call instruction, as an index for this list
*/
func[i]->aux->func = func;
- func[i]->aux->func_cnt = env->subprog_cnt;
+ func[i]->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt;
+ func[i]->aux->real_func_cnt = env->subprog_cnt;
}
for (i = 0; i < env->subprog_cnt; i++) {
old_bpf_func = func[i]->bpf_func;
@@ -18097,7 +18475,10 @@ static int jit_subprogs(struct bpf_verifier_env *env)
prog->aux->extable = func[0]->aux->extable;
prog->aux->num_exentries = func[0]->aux->num_exentries;
prog->aux->func = func;
- prog->aux->func_cnt = env->subprog_cnt;
+ prog->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt;
+ prog->aux->real_func_cnt = env->subprog_cnt;
+ prog->aux->bpf_exception_cb = (void *)func[env->exception_callback_subprog]->bpf_func;
+ prog->aux->exception_boundary = func[0]->aux->exception_boundary;
bpf_prog_jit_attempt_done(prog);
return 0;
out_free:
@@ -18264,21 +18645,35 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
insn->imm = BPF_CALL_IMM(desc->addr);
if (insn->off)
return 0;
- if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl]) {
+ if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl] ||
+ desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
u64 obj_new_size = env->insn_aux_data[insn_idx].obj_new_size;
+ if (desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl] && kptr_struct_meta) {
+ verbose(env, "verifier internal error: NULL kptr_struct_meta expected at insn_idx %d\n",
+ insn_idx);
+ return -EFAULT;
+ }
+
insn_buf[0] = BPF_MOV64_IMM(BPF_REG_1, obj_new_size);
insn_buf[1] = addr[0];
insn_buf[2] = addr[1];
insn_buf[3] = *insn;
*cnt = 4;
} else if (desc->func_id == special_kfunc_list[KF_bpf_obj_drop_impl] ||
+ desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl] ||
desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) {
struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
+ if (desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl] && kptr_struct_meta) {
+ verbose(env, "verifier internal error: NULL kptr_struct_meta expected at insn_idx %d\n",
+ insn_idx);
+ return -EFAULT;
+ }
+
if (desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl] &&
!kptr_struct_meta) {
verbose(env, "verifier internal error: kptr_struct_meta expected at insn_idx %d\n",
@@ -18319,6 +18714,33 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
return 0;
}
+/* The function requires that first instruction in 'patch' is insnsi[prog->len - 1] */
+static int add_hidden_subprog(struct bpf_verifier_env *env, struct bpf_insn *patch, int len)
+{
+ struct bpf_subprog_info *info = env->subprog_info;
+ int cnt = env->subprog_cnt;
+ struct bpf_prog *prog;
+
+ /* We only reserve one slot for hidden subprogs in subprog_info. */
+ if (env->hidden_subprog_cnt) {
+ verbose(env, "verifier internal error: only one hidden subprog supported\n");
+ return -EFAULT;
+ }
+ /* We're not patching any existing instruction, just appending the new
+ * ones for the hidden subprog. Hence all of the adjustment operations
+ * in bpf_patch_insn_data are no-ops.
+ */
+ prog = bpf_patch_insn_data(env, env->prog->len - 1, patch, len);
+ if (!prog)
+ return -ENOMEM;
+ env->prog = prog;
+ info[cnt + 1].start = info[cnt].start;
+ info[cnt].start = prog->len - len + 1;
+ env->subprog_cnt++;
+ env->hidden_subprog_cnt++;
+ return 0;
+}
+
/* Do various post-verification rewrites in a single program pass.
* These rewrites simplify JIT and interpreter implementations.
*/
@@ -18337,6 +18759,26 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
struct bpf_map *map_ptr;
int i, ret, cnt, delta = 0;
+ if (env->seen_exception && !env->exception_callback_subprog) {
+ struct bpf_insn patch[] = {
+ env->prog->insnsi[insn_cnt - 1],
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ };
+
+ ret = add_hidden_subprog(env, patch, ARRAY_SIZE(patch));
+ if (ret < 0)
+ return ret;
+ prog = env->prog;
+ insn = prog->insnsi;
+
+ env->exception_callback_subprog = env->subprog_cnt - 1;
+ /* Don't update insn_cnt, as add_hidden_subprog always appends insns */
+ env->subprog_info[env->exception_callback_subprog].is_cb = true;
+ env->subprog_info[env->exception_callback_subprog].is_async_cb = true;
+ env->subprog_info[env->exception_callback_subprog].is_exception_cb = true;
+ }
+
for (i = 0; i < insn_cnt; i++, insn++) {
/* Make divide-by-zero exceptions impossible. */
if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
@@ -18606,6 +19048,25 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
goto patch_call_imm;
}
+ /* bpf_per_cpu_ptr() and bpf_this_cpu_ptr() */
+ if (env->insn_aux_data[i + delta].call_with_percpu_alloc_ptr) {
+ /* patch with 'r1 = *(u64 *)(r1 + 0)' since for percpu data,
+ * bpf_mem_alloc() returns a ptr to the percpu data ptr.
+ */
+ insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0);
+ insn_buf[1] = *insn;
+ cnt = 2;
+
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ goto patch_call_imm;
+ }
+
/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
* and other inlining handlers are currently limited to 64 bit
* only.
@@ -19015,7 +19476,7 @@ static void free_states(struct bpf_verifier_env *env)
}
}
-static int do_check_common(struct bpf_verifier_env *env, int subprog)
+static int do_check_common(struct bpf_verifier_env *env, int subprog, bool is_ex_cb)
{
bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
struct bpf_verifier_state *state;
@@ -19046,7 +19507,7 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog)
regs = state->frame[state->curframe]->regs;
if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
- ret = btf_prepare_func_args(env, subprog, regs);
+ ret = btf_prepare_func_args(env, subprog, regs, is_ex_cb);
if (ret)
goto out;
for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
@@ -19062,6 +19523,12 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog)
regs[i].id = ++env->id_gen;
}
}
+ if (is_ex_cb) {
+ state->frame[0]->in_exception_callback_fn = true;
+ env->subprog_info[subprog].is_cb = true;
+ env->subprog_info[subprog].is_async_cb = true;
+ env->subprog_info[subprog].is_exception_cb = true;
+ }
} else {
/* 1st arg to a function */
regs[BPF_REG_1].type = PTR_TO_CTX;
@@ -19126,7 +19593,7 @@ static int do_check_subprogs(struct bpf_verifier_env *env)
continue;
env->insn_idx = env->subprog_info[i].start;
WARN_ON_ONCE(env->insn_idx == 0);
- ret = do_check_common(env, i);
+ ret = do_check_common(env, i, env->exception_callback_subprog == i);
if (ret) {
return ret;
} else if (env->log.level & BPF_LOG_LEVEL) {
@@ -19143,7 +19610,7 @@ static int do_check_main(struct bpf_verifier_env *env)
int ret;
env->insn_idx = 0;
- ret = do_check_common(env, 0);
+ ret = do_check_common(env, 0, false);
if (!ret)
env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
return ret;
@@ -19312,6 +19779,12 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
bpf_log(log, "Subprog %s doesn't exist\n", tname);
return -EINVAL;
}
+ if (aux->func && aux->func[subprog]->aux->exception_cb) {
+ bpf_log(log,
+ "%s programs cannot attach to exception callback\n",
+ prog_extension ? "Extension" : "FENTRY/FEXIT");
+ return -EINVAL;
+ }
conservative = aux->func_info_aux[subprog].unreliable;
if (prog_extension) {
if (conservative) {
@@ -19641,6 +20114,9 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
if (!tr)
return -ENOMEM;
+ if (tgt_prog && tgt_prog->aux->tail_call_reachable)
+ tr->flags = BPF_TRAMP_F_TAIL_CALL_CTX;
+
prog->aux->dst_trampoline = tr;
return 0;
}
@@ -19736,6 +20212,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
if (!env->explored_states)
goto skip_full_check;
+ ret = check_btf_info_early(env, attr, uattr);
+ if (ret < 0)
+ goto skip_full_check;
+
ret = add_subprog_and_kfunc(env);
if (ret < 0)
goto skip_full_check;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 868008f56fec..df697c74d519 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -117,6 +117,9 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
* and don't send kprobe event into ring-buffer,
* so return zero here
*/
+ rcu_read_lock();
+ bpf_prog_inc_misses_counters(rcu_dereference(call->prog_array));
+ rcu_read_unlock();
ret = 0;
goto out;
}
@@ -2384,7 +2387,8 @@ int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
u32 *fd_type, const char **buf,
- u64 *probe_offset, u64 *probe_addr)
+ u64 *probe_offset, u64 *probe_addr,
+ unsigned long *missed)
{
bool is_tracepoint, is_syscall_tp;
struct bpf_prog *prog;
@@ -2419,7 +2423,7 @@ int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
#ifdef CONFIG_KPROBE_EVENTS
if (flags & TRACE_EVENT_FL_KPROBE)
err = bpf_get_kprobe_info(event, fd_type, buf,
- probe_offset, probe_addr,
+ probe_offset, probe_addr, missed,
event->attr.type == PERF_TYPE_TRACEPOINT);
#endif
#ifdef CONFIG_UPROBE_EVENTS
@@ -2614,6 +2618,7 @@ static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link,
kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link);
info->kprobe_multi.count = kmulti_link->cnt;
info->kprobe_multi.flags = kmulti_link->flags;
+ info->kprobe_multi.missed = kmulti_link->fp.nmissed;
if (!uaddrs)
return 0;
@@ -2710,6 +2715,7 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
int err;
if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
+ bpf_prog_inc_misses_counter(link->link.prog);
err = 0;
goto out;
}
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index e834f149695b..a3442db35670 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1249,6 +1249,12 @@ static const struct file_operations kprobe_events_ops = {
.write = probes_write,
};
+static unsigned long trace_kprobe_missed(struct trace_kprobe *tk)
+{
+ return trace_kprobe_is_return(tk) ?
+ tk->rp.kp.nmissed + tk->rp.nmissed : tk->rp.kp.nmissed;
+}
+
/* Probes profiling interfaces */
static int probes_profile_seq_show(struct seq_file *m, void *v)
{
@@ -1260,8 +1266,7 @@ static int probes_profile_seq_show(struct seq_file *m, void *v)
return 0;
tk = to_trace_kprobe(ev);
- nmissed = trace_kprobe_is_return(tk) ?
- tk->rp.kp.nmissed + tk->rp.nmissed : tk->rp.kp.nmissed;
+ nmissed = trace_kprobe_missed(tk);
seq_printf(m, " %-44s %15lu %15lu\n",
trace_probe_name(&tk->tp),
trace_kprobe_nhit(tk),
@@ -1607,7 +1612,8 @@ NOKPROBE_SYMBOL(kretprobe_perf_func);
int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type,
const char **symbol, u64 *probe_offset,
- u64 *probe_addr, bool perf_type_tracepoint)
+ u64 *probe_addr, unsigned long *missed,
+ bool perf_type_tracepoint)
{
const char *pevent = trace_event_name(event->tp_event);
const char *group = event->tp_event->class->system;
@@ -1626,6 +1632,8 @@ int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type,
*probe_addr = kallsyms_show_value(current_cred()) ?
(unsigned long)tk->rp.kp.addr : 0;
*symbol = tk->symbol;
+ if (missed)
+ *missed = trace_kprobe_missed(tk);
return 0;
}
#endif /* CONFIG_PERF_EVENTS */
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index de753403cdaf..9c581d6da843 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -556,7 +556,7 @@ static int perf_call_bpf_enter(struct trace_event_call *call, struct pt_regs *re
{
struct syscall_tp_t {
struct trace_entry ent;
- unsigned long syscall_nr;
+ int syscall_nr;
unsigned long args[SYSCALL_DEFINE_MAXARGS];
} __aligned(8) param;
int i;
@@ -661,7 +661,7 @@ static int perf_call_bpf_exit(struct trace_event_call *call, struct pt_regs *reg
{
struct syscall_tp_t {
struct trace_entry ent;
- unsigned long syscall_nr;
+ int syscall_nr;
unsigned long ret;
} __aligned(8) param;
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index ecde4216201e..7916503e6a6a 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -5111,6 +5111,104 @@ static struct bpf_test tests[] = {
{ },
{ { 0, 0xffffffff } }
},
+ /* MOVSX32 */
+ {
+ "ALU_MOVSX | BPF_B",
+ .u.insns_int = {
+ BPF_LD_IMM64(R2, 0x00000000ffffffefLL),
+ BPF_LD_IMM64(R3, 0xdeadbeefdeadbeefLL),
+ BPF_MOVSX32_REG(R1, R3, 8),
+ BPF_JMP_REG(BPF_JEQ, R2, R1, 2),
+ BPF_MOV32_IMM(R0, 2),
+ BPF_EXIT_INSN(),
+ BPF_MOV32_IMM(R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x1 } },
+ },
+ {
+ "ALU_MOVSX | BPF_H",
+ .u.insns_int = {
+ BPF_LD_IMM64(R2, 0x00000000ffffbeefLL),
+ BPF_LD_IMM64(R3, 0xdeadbeefdeadbeefLL),
+ BPF_MOVSX32_REG(R1, R3, 16),
+ BPF_JMP_REG(BPF_JEQ, R2, R1, 2),
+ BPF_MOV32_IMM(R0, 2),
+ BPF_EXIT_INSN(),
+ BPF_MOV32_IMM(R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x1 } },
+ },
+ {
+ "ALU_MOVSX | BPF_W",
+ .u.insns_int = {
+ BPF_LD_IMM64(R2, 0x00000000deadbeefLL),
+ BPF_LD_IMM64(R3, 0xdeadbeefdeadbeefLL),
+ BPF_MOVSX32_REG(R1, R3, 32),
+ BPF_JMP_REG(BPF_JEQ, R2, R1, 2),
+ BPF_MOV32_IMM(R0, 2),
+ BPF_EXIT_INSN(),
+ BPF_MOV32_IMM(R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x1 } },
+ },
+ /* MOVSX64 REG */
+ {
+ "ALU64_MOVSX | BPF_B",
+ .u.insns_int = {
+ BPF_LD_IMM64(R2, 0xffffffffffffffefLL),
+ BPF_LD_IMM64(R3, 0xdeadbeefdeadbeefLL),
+ BPF_MOVSX64_REG(R1, R3, 8),
+ BPF_JMP_REG(BPF_JEQ, R2, R1, 2),
+ BPF_MOV32_IMM(R0, 2),
+ BPF_EXIT_INSN(),
+ BPF_MOV32_IMM(R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x1 } },
+ },
+ {
+ "ALU64_MOVSX | BPF_H",
+ .u.insns_int = {
+ BPF_LD_IMM64(R2, 0xffffffffffffbeefLL),
+ BPF_LD_IMM64(R3, 0xdeadbeefdeadbeefLL),
+ BPF_MOVSX64_REG(R1, R3, 16),
+ BPF_JMP_REG(BPF_JEQ, R2, R1, 2),
+ BPF_MOV32_IMM(R0, 2),
+ BPF_EXIT_INSN(),
+ BPF_MOV32_IMM(R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x1 } },
+ },
+ {
+ "ALU64_MOVSX | BPF_W",
+ .u.insns_int = {
+ BPF_LD_IMM64(R2, 0xffffffffdeadbeefLL),
+ BPF_LD_IMM64(R3, 0xdeadbeefdeadbeefLL),
+ BPF_MOVSX64_REG(R1, R3, 32),
+ BPF_JMP_REG(BPF_JEQ, R2, R1, 2),
+ BPF_MOV32_IMM(R0, 2),
+ BPF_EXIT_INSN(),
+ BPF_MOV32_IMM(R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x1 } },
+ },
/* BPF_ALU | BPF_ADD | BPF_X */
{
"ALU_ADD_X: 1 + 2 = 3",
@@ -6105,6 +6203,106 @@ static struct bpf_test tests[] = {
{ },
{ { 0, 2 } },
},
+ /* BPF_ALU | BPF_DIV | BPF_X off=1 (SDIV) */
+ {
+ "ALU_SDIV_X: -6 / 2 = -3",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, -6),
+ BPF_ALU32_IMM(BPF_MOV, R1, 2),
+ BPF_ALU32_REG_OFF(BPF_DIV, R0, R1, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -3 } },
+ },
+ /* BPF_ALU | BPF_DIV | BPF_K off=1 (SDIV) */
+ {
+ "ALU_SDIV_K: -6 / 2 = -3",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, -6),
+ BPF_ALU32_IMM_OFF(BPF_DIV, R0, 2, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -3 } },
+ },
+ /* BPF_ALU64 | BPF_DIV | BPF_X off=1 (SDIV64) */
+ {
+ "ALU64_SDIV_X: -6 / 2 = -3",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, -6),
+ BPF_ALU32_IMM(BPF_MOV, R1, 2),
+ BPF_ALU64_REG_OFF(BPF_DIV, R0, R1, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -3 } },
+ },
+ /* BPF_ALU64 | BPF_DIV | BPF_K off=1 (SDIV64) */
+ {
+ "ALU64_SDIV_K: -6 / 2 = -3",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, -6),
+ BPF_ALU64_IMM_OFF(BPF_DIV, R0, 2, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -3 } },
+ },
+ /* BPF_ALU | BPF_MOD | BPF_X off=1 (SMOD) */
+ {
+ "ALU_SMOD_X: -7 % 2 = -1",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, -7),
+ BPF_ALU32_IMM(BPF_MOV, R1, 2),
+ BPF_ALU32_REG_OFF(BPF_MOD, R0, R1, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -1 } },
+ },
+ /* BPF_ALU | BPF_MOD | BPF_K off=1 (SMOD) */
+ {
+ "ALU_SMOD_K: -7 % 2 = -1",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, -7),
+ BPF_ALU32_IMM_OFF(BPF_MOD, R0, 2, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -1 } },
+ },
+ /* BPF_ALU64 | BPF_MOD | BPF_X off=1 (SMOD64) */
+ {
+ "ALU64_SMOD_X: -7 % 2 = -1",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, -7),
+ BPF_ALU32_IMM(BPF_MOV, R1, 2),
+ BPF_ALU64_REG_OFF(BPF_MOD, R0, R1, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -1 } },
+ },
+ /* BPF_ALU64 | BPF_MOD | BPF_K off=1 (SMOD64) */
+ {
+ "ALU64_SMOD_X: -7 % 2 = -1",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, -7),
+ BPF_ALU64_IMM_OFF(BPF_MOD, R0, 2, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -1 } },
+ },
/* BPF_ALU | BPF_AND | BPF_X */
{
"ALU_AND_X: 3 & 2 = 2",
@@ -7837,6 +8035,104 @@ static struct bpf_test tests[] = {
{ },
{ { 0, (u32) (cpu_to_le64(0xfedcba9876543210ULL) >> 32) } },
},
+ /* BSWAP */
+ {
+ "BSWAP 16: 0x0123456789abcdef -> 0xefcd",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_BSWAP(R0, 16),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xefcd } },
+ },
+ {
+ "BSWAP 32: 0x0123456789abcdef -> 0xefcdab89",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_BSWAP(R0, 32),
+ BPF_ALU64_REG(BPF_MOV, R1, R0),
+ BPF_ALU64_IMM(BPF_RSH, R1, 32),
+ BPF_ALU32_REG(BPF_ADD, R0, R1), /* R1 = 0 */
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xefcdab89 } },
+ },
+ {
+ "BSWAP 64: 0x0123456789abcdef -> 0x67452301",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_BSWAP(R0, 64),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x67452301 } },
+ },
+ {
+ "BSWAP 64: 0x0123456789abcdef >> 32 -> 0xefcdab89",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_BSWAP(R0, 64),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xefcdab89 } },
+ },
+ /* BSWAP, reversed */
+ {
+ "BSWAP 16: 0xfedcba9876543210 -> 0x1032",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0xfedcba9876543210ULL),
+ BPF_BSWAP(R0, 16),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x1032 } },
+ },
+ {
+ "BSWAP 32: 0xfedcba9876543210 -> 0x10325476",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0xfedcba9876543210ULL),
+ BPF_BSWAP(R0, 32),
+ BPF_ALU64_REG(BPF_MOV, R1, R0),
+ BPF_ALU64_IMM(BPF_RSH, R1, 32),
+ BPF_ALU32_REG(BPF_ADD, R0, R1), /* R1 = 0 */
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x10325476 } },
+ },
+ {
+ "BSWAP 64: 0xfedcba9876543210 -> 0x98badcfe",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0xfedcba9876543210ULL),
+ BPF_BSWAP(R0, 64),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x98badcfe } },
+ },
+ {
+ "BSWAP 64: 0xfedcba9876543210 >> 32 -> 0x10325476",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0xfedcba9876543210ULL),
+ BPF_BSWAP(R0, 64),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x10325476 } },
+ },
/* BPF_LDX_MEM B/H/W/DW */
{
"BPF_LDX_MEM | BPF_B, base",
@@ -8228,6 +8524,67 @@ static struct bpf_test tests[] = {
{ { 32, 0 } },
.stack_depth = 0,
},
+ /* BPF_LDX_MEMSX B/H/W */
+ {
+ "BPF_LDX_MEMSX | BPF_B",
+ .u.insns_int = {
+ BPF_LD_IMM64(R1, 0xdead0000000000f0ULL),
+ BPF_LD_IMM64(R2, 0xfffffffffffffff0ULL),
+ BPF_STX_MEM(BPF_DW, R10, R1, -8),
+#ifdef __BIG_ENDIAN
+ BPF_LDX_MEMSX(BPF_B, R0, R10, -1),
+#else
+ BPF_LDX_MEMSX(BPF_B, R0, R10, -8),
+#endif
+ BPF_JMP_REG(BPF_JNE, R0, R2, 1),
+ BPF_ALU64_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0 } },
+ .stack_depth = 8,
+ },
+ {
+ "BPF_LDX_MEMSX | BPF_H",
+ .u.insns_int = {
+ BPF_LD_IMM64(R1, 0xdead00000000f123ULL),
+ BPF_LD_IMM64(R2, 0xfffffffffffff123ULL),
+ BPF_STX_MEM(BPF_DW, R10, R1, -8),
+#ifdef __BIG_ENDIAN
+ BPF_LDX_MEMSX(BPF_H, R0, R10, -2),
+#else
+ BPF_LDX_MEMSX(BPF_H, R0, R10, -8),
+#endif
+ BPF_JMP_REG(BPF_JNE, R0, R2, 1),
+ BPF_ALU64_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0 } },
+ .stack_depth = 8,
+ },
+ {
+ "BPF_LDX_MEMSX | BPF_W",
+ .u.insns_int = {
+ BPF_LD_IMM64(R1, 0x00000000deadbeefULL),
+ BPF_LD_IMM64(R2, 0xffffffffdeadbeefULL),
+ BPF_STX_MEM(BPF_DW, R10, R1, -8),
+#ifdef __BIG_ENDIAN
+ BPF_LDX_MEMSX(BPF_W, R0, R10, -4),
+#else
+ BPF_LDX_MEMSX(BPF_W, R0, R10, -8),
+#endif
+ BPF_JMP_REG(BPF_JNE, R0, R2, 1),
+ BPF_ALU64_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0 } },
+ .stack_depth = 8,
+ },
/* BPF_STX_MEM B/H/W/DW */
{
"BPF_STX_MEM | BPF_B",
@@ -9474,6 +9831,20 @@ static struct bpf_test tests[] = {
{ },
{ { 0, 1 } },
},
+ /* BPF_JMP32 | BPF_JA */
+ {
+ "JMP32_JA: Unconditional jump: if (true) return 1",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_JMP32_IMM(BPF_JA, 0, 1, 0),
+ BPF_EXIT_INSN(),
+ BPF_ALU32_IMM(BPF_MOV, R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1 } },
+ },
/* BPF_JMP | BPF_JSLT | BPF_K */
{
"JMP_JSLT_K: Signed jump: if (-2 < -1) return 1",
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index d37831b8511c..8b06bab5c406 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -562,7 +562,6 @@ void kasan_restore_multi_shot(bool enabled);
* code. Declared here to avoid warnings about missing declarations.
*/
-asmlinkage void kasan_unpoison_task_stack_below(const void *watermark);
void __asan_register_globals(void *globals, ssize_t size);
void __asan_unregister_globals(void *globals, ssize_t size);
void __asan_handle_no_return(void);
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index faabad6603db..f263f7e91a21 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -1136,6 +1136,7 @@ static int build_initial_monmap(struct ceph_mon_client *monc)
GFP_KERNEL);
if (!monc->monmap)
return -ENOMEM;
+ monc->monmap->num_mon = num_mon;
for (i = 0; i < num_mon; i++) {
struct ceph_entity_inst *inst = &monc->monmap->mon_inst[i];
@@ -1147,7 +1148,6 @@ static int build_initial_monmap(struct ceph_mon_client *monc)
inst->name.type = CEPH_ENTITY_TYPE_MON;
inst->name.num = cpu_to_le64(i);
}
- monc->monmap->num_mon = num_mon;
return 0;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index 9f3f8930c691..17ba4c5be97e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -9052,6 +9052,28 @@ bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b)
}
EXPORT_SYMBOL(netdev_port_same_parent_id);
+static void netdev_dpll_pin_assign(struct net_device *dev, struct dpll_pin *dpll_pin)
+{
+#if IS_ENABLED(CONFIG_DPLL)
+ rtnl_lock();
+ dev->dpll_pin = dpll_pin;
+ rtnl_unlock();
+#endif
+}
+
+void netdev_dpll_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin)
+{
+ WARN_ON(!dpll_pin);
+ netdev_dpll_pin_assign(dev, dpll_pin);
+}
+EXPORT_SYMBOL(netdev_dpll_pin_set);
+
+void netdev_dpll_pin_clear(struct net_device *dev)
+{
+ netdev_dpll_pin_assign(dev, NULL);
+}
+EXPORT_SYMBOL(netdev_dpll_pin_clear);
+
/**
* dev_change_proto_down - set carrier according to proto_down.
*
diff --git a/net/core/dst.c b/net/core/dst.c
index 980e2fd2f013..6838d3212c37 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -45,7 +45,7 @@ const struct dst_metrics dst_default_metrics = {
EXPORT_SYMBOL(dst_default_metrics);
void dst_init(struct dst_entry *dst, struct dst_ops *ops,
- struct net_device *dev, int initial_ref, int initial_obsolete,
+ struct net_device *dev, int initial_obsolete,
unsigned short flags)
{
dst->dev = dev;
@@ -66,7 +66,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
dst->tclassid = 0;
#endif
dst->lwtstate = NULL;
- rcuref_init(&dst->__rcuref, initial_ref);
+ rcuref_init(&dst->__rcuref, 1);
INIT_LIST_HEAD(&dst->rt_uncached);
dst->__use = 0;
dst->lastuse = jiffies;
@@ -77,7 +77,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
EXPORT_SYMBOL(dst_init);
void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
- int initial_ref, int initial_obsolete, unsigned short flags)
+ int initial_obsolete, unsigned short flags)
{
struct dst_entry *dst;
@@ -90,7 +90,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
if (!dst)
return NULL;
- dst_init(dst, ops, dev, initial_ref, initial_obsolete, flags);
+ dst_init(dst, ops, dev, initial_obsolete, flags);
return dst;
}
@@ -270,7 +270,7 @@ static void __metadata_dst_init(struct metadata_dst *md_dst,
struct dst_entry *dst;
dst = &md_dst->dst;
- dst_init(dst, &dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE,
+ dst_init(dst, &dst_blackhole_ops, NULL, DST_OBSOLETE_NONE,
DST_METADATA | DST_NOCOUNT);
memset(dst + 1, 0, sizeof(*md_dst) + optslen - sizeof(*dst));
md_dst->type = type;
diff --git a/net/core/filter.c b/net/core/filter.c
index a094694899c9..cc2e4babc85f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -81,6 +81,7 @@
#include <net/xdp.h>
#include <net/mptcp.h>
#include <net/netfilter/nf_conntrack_bpf.h>
+#include <linux/un.h>
static const struct bpf_func_proto *
bpf_sk_base_func_proto(enum bpf_func_id func_id);
@@ -5850,6 +5851,9 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
params->rt_metric = res.fi->fib_priority;
params->ifindex = dev->ifindex;
+ if (flags & BPF_FIB_LOOKUP_SRC)
+ params->ipv4_src = fib_result_prefsrc(net, &res);
+
/* xdp and cls_bpf programs are run in RCU-bh so
* rcu_read_lock_bh is not needed here
*/
@@ -5992,6 +5996,18 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
params->rt_metric = res.f6i->fib6_metric;
params->ifindex = dev->ifindex;
+ if (flags & BPF_FIB_LOOKUP_SRC) {
+ if (res.f6i->fib6_prefsrc.plen) {
+ *src = res.f6i->fib6_prefsrc.addr;
+ } else {
+ err = ipv6_bpf_stub->ipv6_dev_get_saddr(net, dev,
+ &fl6.daddr, 0,
+ src);
+ if (err)
+ return BPF_FIB_LKUP_RET_NO_SRC_ADDR;
+ }
+ }
+
if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH)
goto set_fwd_params;
@@ -6010,7 +6026,8 @@ set_fwd_params:
#endif
#define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \
- BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID)
+ BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID | \
+ BPF_FIB_LOOKUP_SRC)
BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
struct bpf_fib_lookup *, params, int, plen, u32, flags)
@@ -7858,14 +7875,19 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_CGROUP_INET6_BIND:
case BPF_CGROUP_INET4_CONNECT:
case BPF_CGROUP_INET6_CONNECT:
+ case BPF_CGROUP_UNIX_CONNECT:
case BPF_CGROUP_UDP4_RECVMSG:
case BPF_CGROUP_UDP6_RECVMSG:
+ case BPF_CGROUP_UNIX_RECVMSG:
case BPF_CGROUP_UDP4_SENDMSG:
case BPF_CGROUP_UDP6_SENDMSG:
+ case BPF_CGROUP_UNIX_SENDMSG:
case BPF_CGROUP_INET4_GETPEERNAME:
case BPF_CGROUP_INET6_GETPEERNAME:
+ case BPF_CGROUP_UNIX_GETPEERNAME:
case BPF_CGROUP_INET4_GETSOCKNAME:
case BPF_CGROUP_INET6_GETSOCKNAME:
+ case BPF_CGROUP_UNIX_GETSOCKNAME:
return &bpf_sock_addr_setsockopt_proto;
default:
return NULL;
@@ -7876,14 +7898,19 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_CGROUP_INET6_BIND:
case BPF_CGROUP_INET4_CONNECT:
case BPF_CGROUP_INET6_CONNECT:
+ case BPF_CGROUP_UNIX_CONNECT:
case BPF_CGROUP_UDP4_RECVMSG:
case BPF_CGROUP_UDP6_RECVMSG:
+ case BPF_CGROUP_UNIX_RECVMSG:
case BPF_CGROUP_UDP4_SENDMSG:
case BPF_CGROUP_UDP6_SENDMSG:
+ case BPF_CGROUP_UNIX_SENDMSG:
case BPF_CGROUP_INET4_GETPEERNAME:
case BPF_CGROUP_INET6_GETPEERNAME:
+ case BPF_CGROUP_UNIX_GETPEERNAME:
case BPF_CGROUP_INET4_GETSOCKNAME:
case BPF_CGROUP_INET6_GETSOCKNAME:
+ case BPF_CGROUP_UNIX_GETSOCKNAME:
return &bpf_sock_addr_getsockopt_proto;
default:
return NULL;
@@ -8931,8 +8958,8 @@ static bool sock_addr_is_valid_access(int off, int size,
if (off % size != 0)
return false;
- /* Disallow access to IPv6 fields from IPv4 contex and vise
- * versa.
+ /* Disallow access to fields not belonging to the attach type's address
+ * family.
*/
switch (off) {
case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
@@ -11752,6 +11779,27 @@ __bpf_kfunc int bpf_dynptr_from_xdp(struct xdp_buff *xdp, u64 flags,
return 0;
}
+
+__bpf_kfunc int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern,
+ const u8 *sun_path, u32 sun_path__sz)
+{
+ struct sockaddr_un *un;
+
+ if (sa_kern->sk->sk_family != AF_UNIX)
+ return -EINVAL;
+
+ /* We do not allow changing the address to unnamed or larger than the
+ * maximum allowed address size for a unix sockaddr.
+ */
+ if (sun_path__sz == 0 || sun_path__sz > UNIX_PATH_MAX)
+ return -EINVAL;
+
+ un = (struct sockaddr_un *)sa_kern->uaddr;
+ memcpy(un->sun_path, sun_path, sun_path__sz);
+ sa_kern->uaddrlen = offsetof(struct sockaddr_un, sun_path) + sun_path__sz;
+
+ return 0;
+}
__diag_pop();
int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags,
@@ -11776,6 +11824,10 @@ BTF_SET8_START(bpf_kfunc_check_set_xdp)
BTF_ID_FLAGS(func, bpf_dynptr_from_xdp)
BTF_SET8_END(bpf_kfunc_check_set_xdp)
+BTF_SET8_START(bpf_kfunc_check_set_sock_addr)
+BTF_ID_FLAGS(func, bpf_sock_addr_set_sun_path)
+BTF_SET8_END(bpf_kfunc_check_set_sock_addr)
+
static const struct btf_kfunc_id_set bpf_kfunc_set_skb = {
.owner = THIS_MODULE,
.set = &bpf_kfunc_check_set_skb,
@@ -11786,6 +11838,11 @@ static const struct btf_kfunc_id_set bpf_kfunc_set_xdp = {
.set = &bpf_kfunc_check_set_xdp,
};
+static const struct btf_kfunc_id_set bpf_kfunc_set_sock_addr = {
+ .owner = THIS_MODULE,
+ .set = &bpf_kfunc_check_set_sock_addr,
+};
+
static int __init bpf_kfunc_init(void)
{
int ret;
@@ -11800,7 +11857,9 @@ static int __init bpf_kfunc_init(void)
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_XMIT, &bpf_kfunc_set_skb);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_SEG6LOCAL, &bpf_kfunc_set_skb);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_NETFILTER, &bpf_kfunc_set_skb);
- return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp);
+ return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ &bpf_kfunc_set_sock_addr);
}
late_initcall(bpf_kfunc_init);
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index c1aea8b756b6..fe61f85bcf33 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -5,6 +5,7 @@
#include <linux/rtnetlink.h>
#include <net/net_namespace.h>
#include <net/sock.h>
+#include <net/xdp.h>
#include "netdev-genl-gen.h"
@@ -12,15 +13,24 @@ static int
netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp,
const struct genl_info *info)
{
+ u64 xdp_rx_meta = 0;
void *hdr;
hdr = genlmsg_iput(rsp, info);
if (!hdr)
return -EMSGSIZE;
+#define XDP_METADATA_KFUNC(_, flag, __, xmo) \
+ if (netdev->xdp_metadata_ops && netdev->xdp_metadata_ops->xmo) \
+ xdp_rx_meta |= flag;
+XDP_METADATA_KFUNC_xxx
+#undef XDP_METADATA_KFUNC
+
if (nla_put_u32(rsp, NETDEV_A_DEV_IFINDEX, netdev->ifindex) ||
nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_FEATURES,
- netdev->xdp_features, NETDEV_A_DEV_PAD)) {
+ netdev->xdp_features, NETDEV_A_DEV_PAD) ||
+ nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_RX_METADATA_FEATURES,
+ xdp_rx_meta, NETDEV_A_DEV_PAD)) {
genlmsg_cancel(rsp, hdr);
return -EINVAL;
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 53c377d054f0..4363f913ce7c 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -57,6 +57,7 @@
#if IS_ENABLED(CONFIG_IPV6)
#include <net/addrconf.h>
#endif
+#include <linux/dpll.h>
#include "dev.h"
@@ -1055,6 +1056,15 @@ static size_t rtnl_devlink_port_size(const struct net_device *dev)
return size;
}
+static size_t rtnl_dpll_pin_size(const struct net_device *dev)
+{
+ size_t size = nla_total_size(0); /* nest IFLA_DPLL_PIN */
+
+ size += dpll_msg_pin_handle_size(netdev_dpll_pin(dev));
+
+ return size;
+}
+
static noinline size_t if_nlmsg_size(const struct net_device *dev,
u32 ext_filter_mask)
{
@@ -1111,6 +1121,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ rtnl_prop_list_size(dev)
+ nla_total_size(MAX_ADDR_LEN) /* IFLA_PERM_ADDRESS */
+ rtnl_devlink_port_size(dev)
+ + rtnl_dpll_pin_size(dev)
+ 0;
}
@@ -1774,6 +1785,28 @@ nest_cancel:
return ret;
}
+static int rtnl_fill_dpll_pin(struct sk_buff *skb,
+ const struct net_device *dev)
+{
+ struct nlattr *dpll_pin_nest;
+ int ret;
+
+ dpll_pin_nest = nla_nest_start(skb, IFLA_DPLL_PIN);
+ if (!dpll_pin_nest)
+ return -EMSGSIZE;
+
+ ret = dpll_msg_add_pin_handle(skb, netdev_dpll_pin(dev));
+ if (ret < 0)
+ goto nest_cancel;
+
+ nla_nest_end(skb, dpll_pin_nest);
+ return 0;
+
+nest_cancel:
+ nla_nest_cancel(skb, dpll_pin_nest);
+ return ret;
+}
+
static int rtnl_fill_ifinfo(struct sk_buff *skb,
struct net_device *dev, struct net *src_net,
int type, u32 pid, u32 seq, u32 change,
@@ -1916,6 +1949,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
if (rtnl_fill_devlink_port(skb, dev))
goto nla_put_failure;
+ if (rtnl_fill_dpll_pin(skb, dev))
+ goto nla_put_failure;
+
nlmsg_end(skb, nlh);
return 0;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4eaf7ed0d1f4..2198979470ec 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -847,6 +847,8 @@ EXPORT_SYMBOL(__napi_alloc_skb);
void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
int size, unsigned int truesize)
{
+ DEBUG_NET_WARN_ON_ONCE(size > truesize);
+
skb_fill_page_desc(skb, i, page, off, size);
skb->len += size;
skb->data_len += size;
@@ -859,6 +861,8 @@ void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size,
{
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ DEBUG_NET_WARN_ON_ONCE(size > truesize);
+
skb_frag_size_add(frag, size);
skb->len += size;
skb->data_len += size;
diff --git a/net/core/sock.c b/net/core/sock.c
index 16584e2dd648..a5995750c5c5 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -759,7 +759,7 @@ out:
return ret;
}
-bool sk_mc_loop(struct sock *sk)
+bool sk_mc_loop(const struct sock *sk)
{
if (dev_recursion_level())
return false;
@@ -771,7 +771,7 @@ bool sk_mc_loop(struct sock *sk)
return inet_test_bit(MC_LOOP, sk);
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
- return inet6_sk(sk)->mc_loop;
+ return inet6_test_bit(MC6_LOOP, sk);
#endif
}
WARN_ON_ONCE(1);
@@ -3001,6 +3001,11 @@ void __sk_flush_backlog(struct sock *sk)
{
spin_lock_bh(&sk->sk_lock.slock);
__release_sock(sk);
+
+ if (sk->sk_prot->release_cb)
+ INDIRECT_CALL_INET_1(sk->sk_prot->release_cb,
+ tcp_release_cb, sk);
+
spin_unlock_bh(&sk->sk_lock.slock);
}
EXPORT_SYMBOL_GPL(__sk_flush_backlog);
@@ -3519,11 +3524,9 @@ void release_sock(struct sock *sk)
if (sk->sk_backlog.tail)
__release_sock(sk);
- /* Warning : release_cb() might need to release sk ownership,
- * ie call sock_release_ownership(sk) before us.
- */
if (sk->sk_prot->release_cb)
- sk->sk_prot->release_cb(sk);
+ INDIRECT_CALL_INET_1(sk->sk_prot->release_cb,
+ tcp_release_cb, sk);
sock_release_ownership(sk);
if (waitqueue_active(&sk->sk_lock.wq))
diff --git a/net/core/xdp.c b/net/core/xdp.c
index a70670fe9a2d..df4789ab512d 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -741,7 +741,7 @@ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash,
__diag_pop();
BTF_SET8_START(xdp_metadata_kfunc_ids)
-#define XDP_METADATA_KFUNC(_, name) BTF_ID_FLAGS(func, name, KF_TRUSTED_ARGS)
+#define XDP_METADATA_KFUNC(_, __, name, ___) BTF_ID_FLAGS(func, name, KF_TRUSTED_ARGS)
XDP_METADATA_KFUNC_xxx
#undef XDP_METADATA_KFUNC
BTF_SET8_END(xdp_metadata_kfunc_ids)
@@ -752,7 +752,7 @@ static const struct btf_kfunc_id_set xdp_metadata_kfunc_set = {
};
BTF_ID_LIST(xdp_metadata_kfunc_ids_unsorted)
-#define XDP_METADATA_KFUNC(name, str) BTF_ID(func, str)
+#define XDP_METADATA_KFUNC(name, _, str, __) BTF_ID(func, str)
XDP_METADATA_KFUNC_xxx
#undef XDP_METADATA_KFUNC
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index c693a570682f..80b956b39252 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -180,7 +180,7 @@ static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
goto out;
}
- if (!sock_owned_by_user(sk) && np->recverr) {
+ if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) {
sk->sk_err = err;
sk_error_report(sk);
} else {
@@ -671,10 +671,10 @@ ipv6_pktoptions:
if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
np->mcast_oif = inet6_iif(opt_skb);
if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
- np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
+ WRITE_ONCE(np->mcast_hops, ipv6_hdr(opt_skb)->hop_limit);
if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
- if (np->repflow)
+ if (inet6_test_bit(REPFLOW, sk))
np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
if (ipv6_opt_accepted(sk, opt_skb,
&DCCP_SKB_CB(opt_skb)->header.h6)) {
@@ -839,7 +839,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
memset(&fl6, 0, sizeof(fl6));
- if (np->sndflow) {
+ if (inet6_test_bit(SNDFLOW, sk)) {
fl6.flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK;
IP6_ECN_flow_init(fl6.flowlabel);
if (fl6.flowlabel & IPV6_FLOWLABEL_MASK) {
diff --git a/net/devlink/core.c b/net/devlink/core.c
index 6cec4afb01fb..bcbbb952569f 100644
--- a/net/devlink/core.c
+++ b/net/devlink/core.c
@@ -16,6 +16,219 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_trap_report);
DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC);
+static struct devlink *devlinks_xa_get(unsigned long index)
+{
+ struct devlink *devlink;
+
+ rcu_read_lock();
+ devlink = xa_find(&devlinks, &index, index, DEVLINK_REGISTERED);
+ if (!devlink || !devlink_try_get(devlink))
+ devlink = NULL;
+ rcu_read_unlock();
+ return devlink;
+}
+
+/* devlink_rels xarray contains 1:1 relationships between
+ * devlink object and related nested devlink instance.
+ * The xarray index is used to get the nested object from
+ * the nested-in object code.
+ */
+static DEFINE_XARRAY_FLAGS(devlink_rels, XA_FLAGS_ALLOC1);
+
+#define DEVLINK_REL_IN_USE XA_MARK_0
+
+struct devlink_rel {
+ u32 index;
+ refcount_t refcount;
+ u32 devlink_index;
+ struct {
+ u32 devlink_index;
+ u32 obj_index;
+ devlink_rel_notify_cb_t *notify_cb;
+ devlink_rel_cleanup_cb_t *cleanup_cb;
+ struct work_struct notify_work;
+ } nested_in;
+};
+
+static void devlink_rel_free(struct devlink_rel *rel)
+{
+ xa_erase(&devlink_rels, rel->index);
+ kfree(rel);
+}
+
+static void __devlink_rel_get(struct devlink_rel *rel)
+{
+ refcount_inc(&rel->refcount);
+}
+
+static void __devlink_rel_put(struct devlink_rel *rel)
+{
+ if (refcount_dec_and_test(&rel->refcount))
+ devlink_rel_free(rel);
+}
+
+static void devlink_rel_nested_in_notify_work(struct work_struct *work)
+{
+ struct devlink_rel *rel = container_of(work, struct devlink_rel,
+ nested_in.notify_work);
+ struct devlink *devlink;
+
+ devlink = devlinks_xa_get(rel->nested_in.devlink_index);
+ if (!devlink)
+ goto rel_put;
+ if (!devl_trylock(devlink)) {
+ devlink_put(devlink);
+ goto reschedule_work;
+ }
+ if (!devl_is_registered(devlink)) {
+ devl_unlock(devlink);
+ devlink_put(devlink);
+ goto rel_put;
+ }
+ if (!xa_get_mark(&devlink_rels, rel->index, DEVLINK_REL_IN_USE))
+ rel->nested_in.cleanup_cb(devlink, rel->nested_in.obj_index, rel->index);
+ rel->nested_in.notify_cb(devlink, rel->nested_in.obj_index);
+ devl_unlock(devlink);
+ devlink_put(devlink);
+
+rel_put:
+ __devlink_rel_put(rel);
+ return;
+
+reschedule_work:
+ schedule_work(&rel->nested_in.notify_work);
+}
+
+static void devlink_rel_nested_in_notify_work_schedule(struct devlink_rel *rel)
+{
+ __devlink_rel_get(rel);
+ schedule_work(&rel->nested_in.notify_work);
+}
+
+static struct devlink_rel *devlink_rel_alloc(void)
+{
+ struct devlink_rel *rel;
+ static u32 next;
+ int err;
+
+ rel = kzalloc(sizeof(*rel), GFP_KERNEL);
+ if (!rel)
+ return ERR_PTR(-ENOMEM);
+
+ err = xa_alloc_cyclic(&devlink_rels, &rel->index, rel,
+ xa_limit_32b, &next, GFP_KERNEL);
+ if (err) {
+ kfree(rel);
+ return ERR_PTR(err);
+ }
+
+ refcount_set(&rel->refcount, 1);
+ INIT_WORK(&rel->nested_in.notify_work,
+ &devlink_rel_nested_in_notify_work);
+ return rel;
+}
+
+static void devlink_rel_put(struct devlink *devlink)
+{
+ struct devlink_rel *rel = devlink->rel;
+
+ if (!rel)
+ return;
+ xa_clear_mark(&devlink_rels, rel->index, DEVLINK_REL_IN_USE);
+ devlink_rel_nested_in_notify_work_schedule(rel);
+ __devlink_rel_put(rel);
+ devlink->rel = NULL;
+}
+
+void devlink_rel_nested_in_clear(u32 rel_index)
+{
+ xa_clear_mark(&devlink_rels, rel_index, DEVLINK_REL_IN_USE);
+}
+
+int devlink_rel_nested_in_add(u32 *rel_index, u32 devlink_index,
+ u32 obj_index, devlink_rel_notify_cb_t *notify_cb,
+ devlink_rel_cleanup_cb_t *cleanup_cb,
+ struct devlink *devlink)
+{
+ struct devlink_rel *rel = devlink_rel_alloc();
+
+ ASSERT_DEVLINK_NOT_REGISTERED(devlink);
+
+ if (IS_ERR(rel))
+ return PTR_ERR(rel);
+
+ rel->devlink_index = devlink->index;
+ rel->nested_in.devlink_index = devlink_index;
+ rel->nested_in.obj_index = obj_index;
+ rel->nested_in.notify_cb = notify_cb;
+ rel->nested_in.cleanup_cb = cleanup_cb;
+ *rel_index = rel->index;
+ xa_set_mark(&devlink_rels, rel->index, DEVLINK_REL_IN_USE);
+ devlink->rel = rel;
+ return 0;
+}
+
+void devlink_rel_nested_in_notify(struct devlink *devlink)
+{
+ struct devlink_rel *rel = devlink->rel;
+
+ if (!rel)
+ return;
+ devlink_rel_nested_in_notify_work_schedule(rel);
+}
+
+static struct devlink_rel *devlink_rel_find(unsigned long rel_index)
+{
+ return xa_find(&devlink_rels, &rel_index, rel_index,
+ DEVLINK_REL_IN_USE);
+}
+
+static struct devlink *devlink_rel_devlink_get_lock(u32 rel_index)
+{
+ struct devlink *devlink;
+ struct devlink_rel *rel;
+ u32 devlink_index;
+
+ if (!rel_index)
+ return NULL;
+ xa_lock(&devlink_rels);
+ rel = devlink_rel_find(rel_index);
+ if (rel)
+ devlink_index = rel->devlink_index;
+ xa_unlock(&devlink_rels);
+ if (!rel)
+ return NULL;
+ devlink = devlinks_xa_get(devlink_index);
+ if (!devlink)
+ return NULL;
+ devl_lock(devlink);
+ if (!devl_is_registered(devlink)) {
+ devl_unlock(devlink);
+ devlink_put(devlink);
+ return NULL;
+ }
+ return devlink;
+}
+
+int devlink_rel_devlink_handle_put(struct sk_buff *msg, struct devlink *devlink,
+ u32 rel_index, int attrtype,
+ bool *msg_updated)
+{
+ struct net *net = devlink_net(devlink);
+ struct devlink *rel_devlink;
+ int err;
+
+ rel_devlink = devlink_rel_devlink_get_lock(rel_index);
+ if (!rel_devlink)
+ return 0;
+ err = devlink_nl_put_nested_handle(msg, net, rel_devlink, attrtype);
+ devl_unlock(rel_devlink);
+ devlink_put(rel_devlink);
+ if (!err && msg_updated)
+ *msg_updated = true;
+ return err;
+}
+
void *devlink_priv(struct devlink *devlink)
{
return &devlink->priv;
@@ -142,6 +355,7 @@ int devl_register(struct devlink *devlink)
xa_set_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
devlink_notify_register(devlink);
+ devlink_rel_nested_in_notify(devlink);
return 0;
}
@@ -166,6 +380,7 @@ void devl_unregister(struct devlink *devlink)
devlink_notify_unregister(devlink);
xa_clear_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
+ devlink_rel_put(devlink);
}
EXPORT_SYMBOL_GPL(devl_unregister);
@@ -215,6 +430,7 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
xa_init_flags(&devlink->ports, XA_FLAGS_ALLOC);
xa_init_flags(&devlink->params, XA_FLAGS_ALLOC);
xa_init_flags(&devlink->snapshot_ids, XA_FLAGS_ALLOC);
+ xa_init_flags(&devlink->nested_rels, XA_FLAGS_ALLOC);
write_pnet(&devlink->_net, net);
INIT_LIST_HEAD(&devlink->rate_list);
INIT_LIST_HEAD(&devlink->linecard_list);
@@ -261,6 +477,7 @@ void devlink_free(struct devlink *devlink)
WARN_ON(!list_empty(&devlink->linecard_list));
WARN_ON(!xa_empty(&devlink->ports));
+ xa_destroy(&devlink->nested_rels);
xa_destroy(&devlink->snapshot_ids);
xa_destroy(&devlink->params);
xa_destroy(&devlink->ports);
diff --git a/net/devlink/dev.c b/net/devlink/dev.c
index bba4ace7d22b..dc8039ca2b38 100644
--- a/net/devlink/dev.c
+++ b/net/devlink/dev.c
@@ -138,6 +138,23 @@ nla_put_failure:
return -EMSGSIZE;
}
+static int devlink_nl_nested_fill(struct sk_buff *msg, struct devlink *devlink)
+{
+ unsigned long rel_index;
+ void *unused;
+ int err;
+
+ xa_for_each(&devlink->nested_rels, rel_index, unused) {
+ err = devlink_rel_devlink_handle_put(msg, devlink,
+ rel_index,
+ DEVLINK_ATTR_NESTED_DEVLINK,
+ NULL);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink,
enum devlink_command cmd, u32 portid,
u32 seq, int flags)
@@ -164,6 +181,10 @@ static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink,
goto dev_stats_nest_cancel;
nla_nest_end(msg, dev_stats);
+
+ if (devlink_nl_nested_fill(msg, devlink))
+ goto nla_put_failure;
+
genlmsg_end(msg, hdr);
return 0;
@@ -230,6 +251,34 @@ int devlink_nl_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb)
return devlink_nl_dumpit(msg, cb, devlink_nl_get_dump_one);
}
+static void devlink_rel_notify_cb(struct devlink *devlink, u32 obj_index)
+{
+ devlink_notify(devlink, DEVLINK_CMD_NEW);
+}
+
+static void devlink_rel_cleanup_cb(struct devlink *devlink, u32 obj_index,
+ u32 rel_index)
+{
+ xa_erase(&devlink->nested_rels, rel_index);
+}
+
+int devl_nested_devlink_set(struct devlink *devlink,
+ struct devlink *nested_devlink)
+{
+ u32 rel_index;
+ int err;
+
+ err = devlink_rel_nested_in_add(&rel_index, devlink->index, 0,
+ devlink_rel_notify_cb,
+ devlink_rel_cleanup_cb,
+ nested_devlink);
+ if (err)
+ return err;
+ return xa_insert(&devlink->nested_rels, rel_index,
+ xa_mk_value(0), GFP_KERNEL);
+}
+EXPORT_SYMBOL_GPL(devl_nested_devlink_set);
+
void devlink_notify_register(struct devlink *devlink)
{
devlink_notify(devlink, DEVLINK_CMD_NEW);
@@ -372,6 +421,7 @@ static void devlink_reload_netns_change(struct devlink *devlink,
devlink_notify_unregister(devlink);
write_pnet(&devlink->_net, dest_net);
devlink_notify_register(devlink);
+ devlink_rel_nested_in_notify(devlink);
}
int devlink_reload(struct devlink *devlink, struct net *dest_net,
diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h
index f6b5fea2e13c..741d1bf1bec8 100644
--- a/net/devlink/devl_internal.h
+++ b/net/devlink/devl_internal.h
@@ -17,6 +17,8 @@
#include "netlink_gen.h"
+struct devlink_rel;
+
#define DEVLINK_REGISTERED XA_MARK_1
#define DEVLINK_RELOAD_STATS_ARRAY_SIZE \
@@ -55,6 +57,8 @@ struct devlink {
u8 reload_failed:1;
refcount_t refcount;
struct rcu_work rwork;
+ struct devlink_rel *rel;
+ struct xarray nested_rels;
char priv[] __aligned(NETDEV_ALIGN);
};
@@ -92,6 +96,20 @@ static inline bool devl_is_registered(struct devlink *devlink)
return xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
}
+typedef void devlink_rel_notify_cb_t(struct devlink *devlink, u32 obj_index);
+typedef void devlink_rel_cleanup_cb_t(struct devlink *devlink, u32 obj_index,
+ u32 rel_index);
+
+void devlink_rel_nested_in_clear(u32 rel_index);
+int devlink_rel_nested_in_add(u32 *rel_index, u32 devlink_index,
+ u32 obj_index, devlink_rel_notify_cb_t *notify_cb,
+ devlink_rel_cleanup_cb_t *cleanup_cb,
+ struct devlink *devlink);
+void devlink_rel_nested_in_notify(struct devlink *devlink);
+int devlink_rel_devlink_handle_put(struct sk_buff *msg, struct devlink *devlink,
+ u32 rel_index, int attrtype,
+ bool *msg_updated);
+
/* Netlink */
#define DEVLINK_NL_FLAG_NEED_PORT BIT(0)
#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1)
@@ -145,6 +163,8 @@ devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink)
return 0;
}
+int devlink_nl_put_nested_handle(struct sk_buff *msg, struct net *net,
+ struct devlink *devlink, int attrtype);
int devlink_nl_msg_reply_and_new(struct sk_buff **msg, struct genl_info *info);
/* Notify */
@@ -206,19 +226,7 @@ int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
struct netlink_ext_ack *extack);
/* Linecards */
-struct devlink_linecard {
- struct list_head list;
- struct devlink *devlink;
- unsigned int index;
- const struct devlink_linecard_ops *ops;
- void *priv;
- enum devlink_linecard_state state;
- struct mutex state_lock; /* Protects state */
- const char *type;
- struct devlink_linecard_type *types;
- unsigned int types_count;
- struct devlink *nested_devlink;
-};
+unsigned int devlink_linecard_index(struct devlink_linecard *linecard);
/* Devlink nl cmds */
int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/devlink/linecard.c b/net/devlink/linecard.c
index 85c32c314b0f..9ff1813f88c5 100644
--- a/net/devlink/linecard.c
+++ b/net/devlink/linecard.c
@@ -6,6 +6,25 @@
#include "devl_internal.h"
+struct devlink_linecard {
+ struct list_head list;
+ struct devlink *devlink;
+ unsigned int index;
+ const struct devlink_linecard_ops *ops;
+ void *priv;
+ enum devlink_linecard_state state;
+ struct mutex state_lock; /* Protects state */
+ const char *type;
+ struct devlink_linecard_type *types;
+ unsigned int types_count;
+ u32 rel_index;
+};
+
+unsigned int devlink_linecard_index(struct devlink_linecard *linecard)
+{
+ return linecard->index;
+}
+
static struct devlink_linecard *
devlink_linecard_get_by_index(struct devlink *devlink,
unsigned int linecard_index)
@@ -46,24 +65,6 @@ devlink_linecard_get_from_info(struct devlink *devlink, struct genl_info *info)
return devlink_linecard_get_from_attrs(devlink, info->attrs);
}
-static int devlink_nl_put_nested_handle(struct sk_buff *msg, struct devlink *devlink)
-{
- struct nlattr *nested_attr;
-
- nested_attr = nla_nest_start(msg, DEVLINK_ATTR_NESTED_DEVLINK);
- if (!nested_attr)
- return -EMSGSIZE;
- if (devlink_nl_put_handle(msg, devlink))
- goto nla_put_failure;
-
- nla_nest_end(msg, nested_attr);
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(msg, nested_attr);
- return -EMSGSIZE;
-}
-
struct devlink_linecard_type {
const char *type;
const void *priv;
@@ -111,8 +112,10 @@ static int devlink_nl_linecard_fill(struct sk_buff *msg,
nla_nest_end(msg, attr);
}
- if (linecard->nested_devlink &&
- devlink_nl_put_nested_handle(msg, linecard->nested_devlink))
+ if (devlink_rel_devlink_handle_put(msg, devlink,
+ linecard->rel_index,
+ DEVLINK_ATTR_NESTED_DEVLINK,
+ NULL))
goto nla_put_failure;
genlmsg_end(msg, hdr);
@@ -521,7 +524,6 @@ EXPORT_SYMBOL_GPL(devlink_linecard_provision_set);
void devlink_linecard_provision_clear(struct devlink_linecard *linecard)
{
mutex_lock(&linecard->state_lock);
- WARN_ON(linecard->nested_devlink);
linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
linecard->type = NULL;
devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
@@ -540,7 +542,6 @@ EXPORT_SYMBOL_GPL(devlink_linecard_provision_clear);
void devlink_linecard_provision_fail(struct devlink_linecard *linecard)
{
mutex_lock(&linecard->state_lock);
- WARN_ON(linecard->nested_devlink);
linecard->state = DEVLINK_LINECARD_STATE_PROVISIONING_FAILED;
devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
mutex_unlock(&linecard->state_lock);
@@ -588,6 +589,27 @@ void devlink_linecard_deactivate(struct devlink_linecard *linecard)
}
EXPORT_SYMBOL_GPL(devlink_linecard_deactivate);
+static void devlink_linecard_rel_notify_cb(struct devlink *devlink,
+ u32 linecard_index)
+{
+ struct devlink_linecard *linecard;
+
+ linecard = devlink_linecard_get_by_index(devlink, linecard_index);
+ if (!linecard)
+ return;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+}
+
+static void devlink_linecard_rel_cleanup_cb(struct devlink *devlink,
+ u32 linecard_index, u32 rel_index)
+{
+ struct devlink_linecard *linecard;
+
+ linecard = devlink_linecard_get_by_index(devlink, linecard_index);
+ if (linecard && linecard->rel_index == rel_index)
+ linecard->rel_index = 0;
+}
+
/**
* devlink_linecard_nested_dl_set - Attach/detach nested devlink
* instance to linecard.
@@ -595,12 +617,14 @@ EXPORT_SYMBOL_GPL(devlink_linecard_deactivate);
* @linecard: devlink linecard
* @nested_devlink: devlink instance to attach or NULL to detach
*/
-void devlink_linecard_nested_dl_set(struct devlink_linecard *linecard,
- struct devlink *nested_devlink)
+int devlink_linecard_nested_dl_set(struct devlink_linecard *linecard,
+ struct devlink *nested_devlink)
{
- mutex_lock(&linecard->state_lock);
- linecard->nested_devlink = nested_devlink;
- devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
- mutex_unlock(&linecard->state_lock);
+ return devlink_rel_nested_in_add(&linecard->rel_index,
+ linecard->devlink->index,
+ linecard->index,
+ devlink_linecard_rel_notify_cb,
+ devlink_linecard_rel_cleanup_cb,
+ nested_devlink);
}
EXPORT_SYMBOL_GPL(devlink_linecard_nested_dl_set);
diff --git a/net/devlink/netlink.c b/net/devlink/netlink.c
index fc3e7c029a3b..499304d9de49 100644
--- a/net/devlink/netlink.c
+++ b/net/devlink/netlink.c
@@ -82,6 +82,32 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_REGION_DIRECT] = { .type = NLA_FLAG },
};
+int devlink_nl_put_nested_handle(struct sk_buff *msg, struct net *net,
+ struct devlink *devlink, int attrtype)
+{
+ struct nlattr *nested_attr;
+
+ nested_attr = nla_nest_start(msg, attrtype);
+ if (!nested_attr)
+ return -EMSGSIZE;
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+ if (!net_eq(net, devlink_net(devlink))) {
+ int id = peernet2id_alloc(net, devlink_net(devlink),
+ GFP_KERNEL);
+
+ if (nla_put_s32(msg, DEVLINK_ATTR_NETNS_ID, id))
+ return -EMSGSIZE;
+ }
+
+ nla_nest_end(msg, nested_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, nested_attr);
+ return -EMSGSIZE;
+}
+
int devlink_nl_msg_reply_and_new(struct sk_buff **msg, struct genl_info *info)
{
int err;
diff --git a/net/devlink/port.c b/net/devlink/port.c
index 4763b42885fb..4e9003242448 100644
--- a/net/devlink/port.c
+++ b/net/devlink/port.c
@@ -428,6 +428,13 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por
if (err)
goto out;
err = devlink_port_fn_state_fill(port, msg, extack, &msg_updated);
+ if (err)
+ goto out;
+ err = devlink_rel_devlink_handle_put(msg, port->devlink,
+ port->rel_index,
+ DEVLINK_PORT_FN_ATTR_DEVLINK,
+ &msg_updated);
+
out:
if (err || !msg_updated)
nla_nest_cancel(msg, function_attr);
@@ -483,7 +490,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg,
goto nla_put_failure;
if (devlink_port->linecard &&
nla_put_u32(msg, DEVLINK_ATTR_LINECARD_INDEX,
- devlink_port->linecard->index))
+ devlink_linecard_index(devlink_port->linecard)))
goto nla_put_failure;
genlmsg_end(msg, hdr);
@@ -1392,6 +1399,50 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 contro
}
EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_sf_set);
+static void devlink_port_rel_notify_cb(struct devlink *devlink, u32 port_index)
+{
+ struct devlink_port *devlink_port;
+
+ devlink_port = devlink_port_get_by_index(devlink, port_index);
+ if (!devlink_port)
+ return;
+ devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
+}
+
+static void devlink_port_rel_cleanup_cb(struct devlink *devlink, u32 port_index,
+ u32 rel_index)
+{
+ struct devlink_port *devlink_port;
+
+ devlink_port = devlink_port_get_by_index(devlink, port_index);
+ if (devlink_port && devlink_port->rel_index == rel_index)
+ devlink_port->rel_index = 0;
+}
+
+/**
+ * devl_port_fn_devlink_set - Attach peer devlink
+ * instance to port function.
+ * @devlink_port: devlink port
+ * @fn_devlink: devlink instance to attach
+ */
+int devl_port_fn_devlink_set(struct devlink_port *devlink_port,
+ struct devlink *fn_devlink)
+{
+ ASSERT_DEVLINK_PORT_REGISTERED(devlink_port);
+
+ if (WARN_ON(devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_SF ||
+ devlink_port->attrs.pci_sf.external))
+ return -EINVAL;
+
+ return devlink_rel_nested_in_add(&devlink_port->rel_index,
+ devlink_port->devlink->index,
+ devlink_port->index,
+ devlink_port_rel_notify_cb,
+ devlink_port_rel_cleanup_cb,
+ fn_devlink);
+}
+EXPORT_SYMBOL_GPL(devl_port_fn_devlink_set);
+
/**
* devlink_port_linecard_set - Link port with a linecard
*
@@ -1420,7 +1471,7 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
case DEVLINK_PORT_FLAVOUR_PHYSICAL:
if (devlink_port->linecard)
n = snprintf(name, len, "l%u",
- devlink_port->linecard->index);
+ devlink_linecard_index(devlink_port->linecard));
if (n < len)
n += snprintf(name + n, len - n, "p%u",
attrs->phys.port_number);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 2713c9b06c4c..fb81de10d332 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -452,7 +452,7 @@ int inet_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len)
/* BPF prog is run before any checks are done so that if the prog
* changes context in a wrong way it will be caught.
*/
- err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr,
+ err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, &addr_len,
CGROUP_INET4_BIND, &flags);
if (err)
return err;
@@ -794,6 +794,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
struct sock *sk = sock->sk;
struct inet_sock *inet = inet_sk(sk);
DECLARE_SOCKADDR(struct sockaddr_in *, sin, uaddr);
+ int sin_addr_len = sizeof(*sin);
sin->sin_family = AF_INET;
lock_sock(sk);
@@ -806,7 +807,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
}
sin->sin_port = inet->inet_dport;
sin->sin_addr.s_addr = inet->inet_daddr;
- BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin,
+ BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len,
CGROUP_INET4_GETPEERNAME);
} else {
__be32 addr = inet->inet_rcv_saddr;
@@ -814,12 +815,12 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
addr = inet->inet_saddr;
sin->sin_port = inet->inet_sport;
sin->sin_addr.s_addr = addr;
- BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin,
+ BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len,
CGROUP_INET4_GETSOCKNAME);
}
release_sock(sk);
memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
- return sizeof(*sin);
+ return sin_addr_len;
}
EXPORT_SYMBOL(inet_getname);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 418e5fb58fd3..76c3ea75b8dd 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2944,8 +2944,6 @@ static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_l
continue;
state->im = rcu_dereference(state->idev->mc_list);
}
- if (!state->im)
- break;
spin_lock_bh(&state->im->lock);
psf = state->im->sources;
}
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 75e0aee35eb7..2887177822c9 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -301,7 +301,7 @@ static int ping_pre_connect(struct sock *sk, struct sockaddr *uaddr,
if (addr_len < sizeof(struct sockaddr_in))
return -EINVAL;
- return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr);
+ return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, &addr_len);
}
/* Checks the bind address and possibly modifies sk->sk_bound_dev_if. */
@@ -581,7 +581,7 @@ void ping_err(struct sk_buff *skb, int offset, u32 info)
* 4.1.3.3.
*/
if ((family == AF_INET && !inet_test_bit(RECVERR, sk)) ||
- (family == AF_INET6 && !inet6_sk(sk)->recverr)) {
+ (family == AF_INET6 && !inet6_test_bit(RECVERR6, sk))) {
if (!harderr || sk->sk_state != TCP_ESTABLISHED)
goto out;
} else {
@@ -899,7 +899,6 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
#if IS_ENABLED(CONFIG_IPV6)
} else if (family == AF_INET6) {
- struct ipv6_pinfo *np = inet6_sk(sk);
struct ipv6hdr *ip6 = ipv6_hdr(skb);
DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
@@ -908,7 +907,7 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
sin6->sin6_port = 0;
sin6->sin6_addr = ip6->saddr;
sin6->sin6_flowinfo = 0;
- if (np->sndflow)
+ if (inet6_test_bit(SNDFLOW, sk))
sin6->sin6_flowinfo = ip6_flowinfo(ip6);
sin6->sin6_scope_id =
ipv6_iface_scope_id(&sin6->sin6_addr,
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index b214b5a2e045..e2bf4602b559 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1632,7 +1632,7 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
{
struct rtable *rt;
- rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
+ rt = dst_alloc(&ipv4_dst_ops, dev, DST_OBSOLETE_FORCE_CHK,
(noxfrm ? DST_NOXFRM : 0));
if (rt) {
@@ -1660,7 +1660,7 @@ struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt)
{
struct rtable *new_rt;
- new_rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
+ new_rt = dst_alloc(&ipv4_dst_ops, dev, DST_OBSOLETE_FORCE_CHK,
rt->dst.flags);
if (new_rt) {
@@ -2834,7 +2834,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
struct rtable *ort = (struct rtable *) dst_orig;
struct rtable *rt;
- rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_DEAD, 0);
+ rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, DST_OBSOLETE_DEAD, 0);
if (rt) {
struct dst_entry *new = &rt->dst;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 6ac890b4073f..e7f024d93572 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -1367,6 +1367,15 @@ static struct ctl_table ipv4_net_table[] = {
.extra1 = SYSCTL_ZERO,
},
{
+ .procname = "tcp_backlog_ack_defer",
+ .data = &init_net.ipv4.sysctl_tcp_backlog_ack_defer,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
.procname = "tcp_reflect_tos",
.data = &init_net.ipv4.sysctl_tcp_reflect_tos,
.maxlen = sizeof(u8),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3d3a24f79573..30e8e5d35983 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3814,6 +3814,15 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_rcv_wnd = tp->rcv_wnd;
info->tcpi_rehash = tp->plb_rehash + tp->timeout_rehash;
info->tcpi_fastopen_client_fail = tp->fastopen_client_fail;
+
+ info->tcpi_total_rto = tp->total_rto;
+ info->tcpi_total_rto_recoveries = tp->total_rto_recoveries;
+ info->tcpi_total_rto_time = tp->total_rto_time;
+ if (tp->rto_stamp) {
+ info->tcpi_total_rto_time += tcp_time_stamp_raw() -
+ tp->rto_stamp;
+ }
+
unlock_sock_fast(sk, slow);
}
EXPORT_SYMBOL_GPL(tcp_get_info);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 804821d6bd4d..2ce14ff2b00c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2101,6 +2101,10 @@ void tcp_clear_retrans(struct tcp_sock *tp)
tp->undo_marker = 0;
tp->undo_retrans = -1;
tp->sacked_out = 0;
+ tp->rto_stamp = 0;
+ tp->total_rto = 0;
+ tp->total_rto_recoveries = 0;
+ tp->total_rto_time = 0;
}
static inline void tcp_init_undo(struct tcp_sock *tp)
@@ -2839,6 +2843,14 @@ void tcp_enter_recovery(struct sock *sk, bool ece_ack)
tcp_set_ca_state(sk, TCP_CA_Recovery);
}
+static void tcp_update_rto_time(struct tcp_sock *tp)
+{
+ if (tp->rto_stamp) {
+ tp->total_rto_time += tcp_time_stamp(tp) - tp->rto_stamp;
+ tp->rto_stamp = 0;
+ }
+}
+
/* Process an ACK in CA_Loss state. Move to CA_Open if lost data are
* recovered or spurious. Otherwise retransmits more on partial ACKs.
*/
@@ -3043,6 +3055,8 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
break;
case TCP_CA_Loss:
tcp_process_loss(sk, flag, num_dupack, rexmit);
+ if (icsk->icsk_ca_state != TCP_CA_Loss)
+ tcp_update_rto_time(tp);
tcp_identify_packet_loss(sk, ack_flag);
if (!(icsk->icsk_ca_state == TCP_CA_Open ||
(*ack_flag & FLAG_LOST_RETRANS)))
@@ -5567,6 +5581,14 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
tcp_in_quickack_mode(sk) ||
/* Protocol state mandates a one-time immediate ACK */
inet_csk(sk)->icsk_ack.pending & ICSK_ACK_NOW) {
+ /* If we are running from __release_sock() in user context,
+ * Defer the ack until tcp_release_cb().
+ */
+ if (sock_owned_by_user_nocheck(sk) &&
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_backlog_ack_defer)) {
+ set_bit(TCP_ACK_DEFERRED, &sk->sk_tsq_flags);
+ return;
+ }
send_now:
tcp_send_ack(sk);
return;
@@ -6450,22 +6472,24 @@ reset_and_undo:
static void tcp_rcv_synrecv_state_fastopen(struct sock *sk)
{
+ struct tcp_sock *tp = tcp_sk(sk);
struct request_sock *req;
/* If we are still handling the SYNACK RTO, see if timestamp ECR allows
* undo. If peer SACKs triggered fast recovery, we can't undo here.
*/
- if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
- tcp_try_undo_loss(sk, false);
+ if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss && !tp->packets_out)
+ tcp_try_undo_recovery(sk);
/* Reset rtx states to prevent spurious retransmits_timed_out() */
- tcp_sk(sk)->retrans_stamp = 0;
+ tcp_update_rto_time(tp);
+ tp->retrans_stamp = 0;
inet_csk(sk)->icsk_retransmits = 0;
/* Once we leave TCP_SYN_RECV or TCP_FIN_WAIT_1,
* we no longer need req so release it.
*/
- req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk,
+ req = rcu_dereference_protected(tp->fastopen_rsk,
lockdep_sock_is_held(sk));
reqsk_fastopen_remove(sk, req, false);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4167e8a48b60..38892ae83e17 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -194,7 +194,7 @@ static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr,
sock_owned_by_me(sk);
- return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr);
+ return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, &addr_len);
}
/* This will initiate an outgoing connection. */
@@ -3264,6 +3264,7 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_comp_sack_delay_ns = NSEC_PER_MSEC;
net->ipv4.sysctl_tcp_comp_sack_slack_ns = 100 * NSEC_PER_USEC;
net->ipv4.sysctl_tcp_comp_sack_nr = 44;
+ net->ipv4.sysctl_tcp_backlog_ack_defer = 1;
net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 0;
atomic_set(&net->ipv4.tfo_active_disable_times, 0);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index b98d476f1594..eee8ab1bfa0e 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -565,6 +565,10 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->undo_marker = treq->snt_isn;
newtp->retrans_stamp = div_u64(treq->snt_synack,
USEC_PER_SEC / TCP_TS_HZ);
+ newtp->total_rto = req->num_timeout;
+ newtp->total_rto_recoveries = 1;
+ newtp->total_rto_time = tcp_time_stamp_raw() -
+ newtp->retrans_stamp;
}
newtp->tsoffset = treq->ts_off;
#ifdef CONFIG_TCP_MD5SIG
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f0723460753c..3b833fd76b24 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1076,7 +1076,8 @@ static void tcp_tasklet_func(struct tasklet_struct *t)
#define TCP_DEFERRED_ALL (TCPF_TSQ_DEFERRED | \
TCPF_WRITE_TIMER_DEFERRED | \
TCPF_DELACK_TIMER_DEFERRED | \
- TCPF_MTU_REDUCED_DEFERRED)
+ TCPF_MTU_REDUCED_DEFERRED | \
+ TCPF_ACK_DEFERRED)
/**
* tcp_release_cb - tcp release_sock() callback
* @sk: socket
@@ -1100,16 +1101,6 @@ void tcp_release_cb(struct sock *sk)
tcp_tsq_write(sk);
__sock_put(sk);
}
- /* Here begins the tricky part :
- * We are called from release_sock() with :
- * 1) BH disabled
- * 2) sk_lock.slock spinlock held
- * 3) socket owned by us (sk->sk_lock.owned == 1)
- *
- * But following code is meant to be called from BH handlers,
- * so we should keep BH disabled, but early release socket ownership
- */
- sock_release_ownership(sk);
if (flags & TCPF_WRITE_TIMER_DEFERRED) {
tcp_write_timer_handler(sk);
@@ -1123,6 +1114,8 @@ void tcp_release_cb(struct sock *sk)
inet_csk(sk)->icsk_af_ops->mtu_reduced(sk);
__sock_put(sk);
}
+ if ((flags & TCPF_ACK_DEFERRED) && inet_csk_ack_scheduled(sk))
+ tcp_send_ack(sk);
}
EXPORT_SYMBOL(tcp_release_cb);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 984ab4a0421e..3f61c6a70a1f 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -394,7 +394,7 @@ static void tcp_probe_timer(struct sock *sk)
if (user_timeout &&
(s32)(tcp_jiffies32 - icsk->icsk_probes_tstamp) >=
msecs_to_jiffies(user_timeout))
- goto abort;
+ goto abort;
}
max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2);
if (sock_flag(sk, SOCK_DEAD)) {
@@ -415,6 +415,19 @@ abort: tcp_write_err(sk);
}
}
+static void tcp_update_rto_stats(struct sock *sk)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (!icsk->icsk_retransmits) {
+ tp->total_rto_recoveries++;
+ tp->rto_stamp = tcp_time_stamp(tp);
+ }
+ icsk->icsk_retransmits++;
+ tp->total_rto++;
+}
+
/*
* Timer for Fast Open socket to retransmit SYNACK. Note that the
* sk here is the child socket, not the parent (listener) socket.
@@ -447,7 +460,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req)
*/
inet_rtx_syn_ack(sk, req);
req->num_timeout++;
- icsk->icsk_retransmits++;
+ tcp_update_rto_stats(sk);
if (!tp->retrans_stamp)
tp->retrans_stamp = tcp_time_stamp(tp);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
@@ -575,7 +588,7 @@ void tcp_retransmit_timer(struct sock *sk)
tcp_enter_loss(sk);
- icsk->icsk_retransmits++;
+ tcp_update_rto_stats(sk);
if (tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1) > 0) {
/* Retransmission failed because of local congestion,
* Let senders fight for local resources conservatively.
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f39b9c844580..7b21a51dd25a 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -714,7 +714,7 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
iph->saddr, uh->source, skb->dev->ifindex,
inet_sdif(skb), udptable, NULL);
- if (!sk || udp_sk(sk)->encap_type) {
+ if (!sk || READ_ONCE(udp_sk(sk)->encap_type)) {
/* No socket for error: try tunnels before discarding */
if (static_branch_unlikely(&udp_encap_needed_key)) {
sk = __udp4_lib_err_encap(net, iph, uh, udptable, sk, skb,
@@ -1051,7 +1051,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
u8 tos, scope;
__be16 dport;
int err, is_udplite = IS_UDPLITE(sk);
- int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE;
+ int corkreq = udp_test_bit(CORK, sk) || msg->msg_flags & MSG_MORE;
int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
struct sk_buff *skb;
struct ip_options_data opt_copy;
@@ -1143,7 +1143,9 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (cgroup_bpf_enabled(CGROUP_UDP4_SENDMSG) && !connected) {
err = BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk,
- (struct sockaddr *)usin, &ipc.addr);
+ (struct sockaddr *)usin,
+ &msg->msg_namelen,
+ &ipc.addr);
if (err)
goto out_free;
if (usin) {
@@ -1315,11 +1317,11 @@ void udp_splice_eof(struct socket *sock)
struct sock *sk = sock->sk;
struct udp_sock *up = udp_sk(sk);
- if (!up->pending || READ_ONCE(up->corkflag))
+ if (!up->pending || udp_test_bit(CORK, sk))
return;
lock_sock(sk);
- if (up->pending && !READ_ONCE(up->corkflag))
+ if (up->pending && !udp_test_bit(CORK, sk))
udp_push_pending_frames(sk);
release_sock(sk);
}
@@ -1865,10 +1867,11 @@ try_again:
*addr_len = sizeof(*sin);
BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk,
- (struct sockaddr *)sin);
+ (struct sockaddr *)sin,
+ addr_len);
}
- if (udp_sk(sk)->gro_enabled)
+ if (udp_test_bit(GRO_ENABLED, sk))
udp_cmsg_recv(msg, sk, skb);
if (inet_cmsg_flags(inet))
@@ -1904,7 +1907,7 @@ int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (addr_len < sizeof(struct sockaddr_in))
return -EINVAL;
- return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr);
+ return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, &addr_len);
}
EXPORT_SYMBOL(udp_pre_connect);
@@ -2081,7 +2084,8 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
}
nf_reset_ct(skb);
- if (static_branch_unlikely(&udp_encap_needed_key) && up->encap_type) {
+ if (static_branch_unlikely(&udp_encap_needed_key) &&
+ READ_ONCE(up->encap_type)) {
int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
/*
@@ -2119,7 +2123,8 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
/*
* UDP-Lite specific tests, ignored on UDP sockets
*/
- if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
+ if (udp_test_bit(UDPLITE_RECV_CC, sk) && UDP_SKB_CB(skb)->partial_cov) {
+ u16 pcrlen = READ_ONCE(up->pcrlen);
/*
* MIB statistics other than incrementing the error count are
@@ -2132,7 +2137,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
* delivery of packets with coverage values less than a value
* provided by the application."
*/
- if (up->pcrlen == 0) { /* full coverage was set */
+ if (pcrlen == 0) { /* full coverage was set */
net_dbg_ratelimited("UDPLite: partial coverage %d while full coverage %d requested\n",
UDP_SKB_CB(skb)->cscov, skb->len);
goto drop;
@@ -2143,9 +2148,9 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
* that it wants x while sender emits packets of smaller size y.
* Therefore the above ...()->partial_cov statement is essential.
*/
- if (UDP_SKB_CB(skb)->cscov < up->pcrlen) {
+ if (UDP_SKB_CB(skb)->cscov < pcrlen) {
net_dbg_ratelimited("UDPLite: coverage %d too small, need min %d\n",
- UDP_SKB_CB(skb)->cscov, up->pcrlen);
+ UDP_SKB_CB(skb)->cscov, pcrlen);
goto drop;
}
}
@@ -2618,7 +2623,7 @@ void udp_destroy_sock(struct sock *sk)
if (encap_destroy)
encap_destroy(sk);
}
- if (up->encap_enabled)
+ if (udp_test_bit(ENCAP_ENABLED, sk))
static_branch_dec(&udp_encap_needed_key);
}
}
@@ -2658,9 +2663,9 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
switch (optname) {
case UDP_CORK:
if (val != 0) {
- WRITE_ONCE(up->corkflag, 1);
+ udp_set_bit(CORK, sk);
} else {
- WRITE_ONCE(up->corkflag, 0);
+ udp_clear_bit(CORK, sk);
lock_sock(sk);
push_pending_frames(sk);
release_sock(sk);
@@ -2675,17 +2680,17 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
case UDP_ENCAP_ESPINUDP_NON_IKE:
#if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family == AF_INET6)
- up->encap_rcv = ipv6_stub->xfrm6_udp_encap_rcv;
+ WRITE_ONCE(up->encap_rcv,
+ ipv6_stub->xfrm6_udp_encap_rcv);
else
#endif
- up->encap_rcv = xfrm4_udp_encap_rcv;
+ WRITE_ONCE(up->encap_rcv,
+ xfrm4_udp_encap_rcv);
#endif
fallthrough;
case UDP_ENCAP_L2TPINUDP:
- up->encap_type = val;
- lock_sock(sk);
- udp_tunnel_encap_enable(sk->sk_socket);
- release_sock(sk);
+ WRITE_ONCE(up->encap_type, val);
+ udp_tunnel_encap_enable(sk);
break;
default:
err = -ENOPROTOOPT;
@@ -2694,11 +2699,11 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
break;
case UDP_NO_CHECK6_TX:
- up->no_check6_tx = valbool;
+ udp_set_no_check6_tx(sk, valbool);
break;
case UDP_NO_CHECK6_RX:
- up->no_check6_rx = valbool;
+ udp_set_no_check6_rx(sk, valbool);
break;
case UDP_SEGMENT:
@@ -2708,14 +2713,12 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
break;
case UDP_GRO:
- lock_sock(sk);
/* when enabling GRO, accept the related GSO packet type */
if (valbool)
- udp_tunnel_encap_enable(sk->sk_socket);
- up->gro_enabled = valbool;
- up->accept_udp_l4 = valbool;
- release_sock(sk);
+ udp_tunnel_encap_enable(sk);
+ udp_assign_bit(GRO_ENABLED, sk, valbool);
+ udp_assign_bit(ACCEPT_L4, sk, valbool);
break;
/*
@@ -2730,8 +2733,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
val = 8;
else if (val > USHRT_MAX)
val = USHRT_MAX;
- up->pcslen = val;
- up->pcflag |= UDPLITE_SEND_CC;
+ WRITE_ONCE(up->pcslen, val);
+ udp_set_bit(UDPLITE_SEND_CC, sk);
break;
/* The receiver specifies a minimum checksum coverage value. To make
@@ -2744,8 +2747,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
val = 8;
else if (val > USHRT_MAX)
val = USHRT_MAX;
- up->pcrlen = val;
- up->pcflag |= UDPLITE_RECV_CC;
+ WRITE_ONCE(up->pcrlen, val);
+ udp_set_bit(UDPLITE_RECV_CC, sk);
break;
default:
@@ -2783,19 +2786,19 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
switch (optname) {
case UDP_CORK:
- val = READ_ONCE(up->corkflag);
+ val = udp_test_bit(CORK, sk);
break;
case UDP_ENCAP:
- val = up->encap_type;
+ val = READ_ONCE(up->encap_type);
break;
case UDP_NO_CHECK6_TX:
- val = up->no_check6_tx;
+ val = udp_get_no_check6_tx(sk);
break;
case UDP_NO_CHECK6_RX:
- val = up->no_check6_rx;
+ val = udp_get_no_check6_rx(sk);
break;
case UDP_SEGMENT:
@@ -2803,17 +2806,17 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
break;
case UDP_GRO:
- val = up->gro_enabled;
+ val = udp_test_bit(GRO_ENABLED, sk);
break;
/* The following two cannot be changed on UDP sockets, the return is
* always 0 (which corresponds to the full checksum coverage of UDP). */
case UDPLITE_SEND_CSCOV:
- val = up->pcslen;
+ val = READ_ONCE(up->pcslen);
break;
case UDPLITE_RECV_CSCOV:
- val = up->pcrlen;
+ val = READ_ONCE(up->pcrlen);
break;
default:
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 0f46b3c2e4ac..6c95d28d0c4a 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -557,10 +557,10 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
NAPI_GRO_CB(skb)->is_flist = 0;
if (!sk || !udp_sk(sk)->gro_receive) {
if (skb->dev->features & NETIF_F_GRO_FRAGLIST)
- NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled : 1;
+ NAPI_GRO_CB(skb)->is_flist = sk ? !udp_test_bit(GRO_ENABLED, sk) : 1;
if ((!sk && (skb->dev->features & NETIF_F_GRO_UDP_FWD)) ||
- (sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_flist)
+ (sk && udp_test_bit(GRO_ENABLED, sk)) || NAPI_GRO_CB(skb)->is_flist)
return call_gro_receive(udp_gro_receive_segment, head, skb);
/* no GRO, be sure flush the current packet */
diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c
index 9b18f371af0d..1e7e4aecdc48 100644
--- a/net/ipv4/udp_tunnel_core.c
+++ b/net/ipv4/udp_tunnel_core.c
@@ -78,7 +78,7 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
udp_sk(sk)->gro_receive = cfg->gro_receive;
udp_sk(sk)->gro_complete = cfg->gro_complete;
- udp_tunnel_encap_enable(sock);
+ udp_tunnel_encap_enable(sk);
}
EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock);
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 39ecdad1b50c..af37af3ab727 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -21,7 +21,6 @@ EXPORT_SYMBOL(udplite_table);
static int udplite_sk_init(struct sock *sk)
{
udp_init_sock(sk);
- udp_sk(sk)->pcflag = UDPLITE_BIT;
pr_warn_once("UDP-Lite is deprecated and scheduled to be removed in 2025, "
"please contact the netdev mailing list\n");
return 0;
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index eac206a290d0..183f6dc37242 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -85,11 +85,11 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
struct udphdr *uh;
struct iphdr *iph;
int iphlen, len;
-
__u8 *udpdata;
__be32 *udpdata32;
- __u16 encap_type = up->encap_type;
+ u16 encap_type;
+ encap_type = READ_ONCE(up->encap_type);
/* if this is not encapsulated socket, then just return now */
if (!encap_type)
return 1;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 368824fe9719..c35d302a3da9 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -217,10 +217,11 @@ lookup_protocol:
inet_sk(sk)->pinet6 = np = inet6_sk_generic(sk);
np->hop_limit = -1;
np->mcast_hops = IPV6_DEFAULT_MCASTHOPS;
- np->mc_loop = 1;
- np->mc_all = 1;
+ inet6_set_bit(MC6_LOOP, sk);
+ inet6_set_bit(MC6_ALL, sk);
np->pmtudisc = IPV6_PMTUDISC_WANT;
- np->repflow = net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ESTABLISHED;
+ inet6_assign_bit(REPFLOW, sk, net->ipv6.sysctl.flowlabel_reflect &
+ FLOWLABEL_REFLECT_ESTABLISHED);
sk->sk_ipv6only = net->ipv6.sysctl.bindv6only;
sk->sk_txrehash = READ_ONCE(net->core.sysctl_txrehash);
@@ -453,7 +454,7 @@ int inet6_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len)
/* BPF prog is run before any checks are done so that if the prog
* changes context in a wrong way it will be caught.
*/
- err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr,
+ err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, &addr_len,
CGROUP_INET6_BIND, &flags);
if (err)
return err;
@@ -519,6 +520,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
int peer)
{
struct sockaddr_in6 *sin = (struct sockaddr_in6 *)uaddr;
+ int sin_addr_len = sizeof(*sin);
struct sock *sk = sock->sk;
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -536,9 +538,9 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
}
sin->sin6_port = inet->inet_dport;
sin->sin6_addr = sk->sk_v6_daddr;
- if (np->sndflow)
+ if (inet6_test_bit(SNDFLOW, sk))
sin->sin6_flowinfo = np->flow_label;
- BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin,
+ BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len,
CGROUP_INET6_GETPEERNAME);
} else {
if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
@@ -546,13 +548,13 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
else
sin->sin6_addr = sk->sk_v6_rcv_saddr;
sin->sin6_port = inet->inet_sport;
- BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin,
+ BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len,
CGROUP_INET6_GETSOCKNAME);
}
sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr,
sk->sk_bound_dev_if);
release_sock(sk);
- return sizeof(*sin);
+ return sin_addr_len;
}
EXPORT_SYMBOL(inet6_getname);
@@ -1060,6 +1062,7 @@ static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
.udp6_lib_lookup = __udp6_lib_lookup,
.ipv6_setsockopt = do_ipv6_setsockopt,
.ipv6_getsockopt = do_ipv6_getsockopt,
+ .ipv6_dev_get_saddr = ipv6_dev_get_saddr,
};
static int __init inet6_init(void)
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 41ebc4e57473..cc6a502db39d 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -80,7 +80,8 @@ int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr)
struct flowi6 fl6;
int err = 0;
- if (np->sndflow && (np->flow_label & IPV6_FLOWLABEL_MASK)) {
+ if (inet6_test_bit(SNDFLOW, sk) &&
+ (np->flow_label & IPV6_FLOWLABEL_MASK)) {
flowlabel = fl6_sock_lookup(sk, np->flow_label);
if (IS_ERR(flowlabel))
return -EINVAL;
@@ -163,7 +164,7 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr,
if (usin->sin6_family != AF_INET6)
return -EAFNOSUPPORT;
- if (np->sndflow)
+ if (inet6_test_bit(SNDFLOW, sk))
fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK;
if (ipv6_addr_any(&usin->sin6_addr)) {
@@ -305,11 +306,10 @@ static void ipv6_icmp_error_rfc4884(const struct sk_buff *skb,
void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
__be16 port, u32 info, u8 *payload)
{
- struct ipv6_pinfo *np = inet6_sk(sk);
struct icmp6hdr *icmph = icmp6_hdr(skb);
struct sock_exterr_skb *serr;
- if (!np->recverr)
+ if (!inet6_test_bit(RECVERR6, sk))
return;
skb = skb_clone(skb, GFP_ATOMIC);
@@ -332,7 +332,7 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
__skb_pull(skb, payload - skb->data);
- if (inet6_sk(sk)->recverr_rfc4884)
+ if (inet6_test_bit(RECVERR6_RFC4884, sk))
ipv6_icmp_error_rfc4884(skb, &serr->ee.ee_rfc4884);
skb_reset_transport_header(skb);
@@ -344,12 +344,11 @@ EXPORT_SYMBOL_GPL(ipv6_icmp_error);
void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info)
{
- const struct ipv6_pinfo *np = inet6_sk(sk);
struct sock_exterr_skb *serr;
struct ipv6hdr *iph;
struct sk_buff *skb;
- if (!np->recverr)
+ if (!inet6_test_bit(RECVERR6, sk))
return;
skb = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
@@ -493,7 +492,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
const struct ipv6hdr *ip6h = container_of((struct in6_addr *)(nh + serr->addr_offset),
struct ipv6hdr, daddr);
sin->sin6_addr = ip6h->daddr;
- if (np->sndflow)
+ if (inet6_test_bit(SNDFLOW, sk))
sin->sin6_flowinfo = ip6_flowinfo(ip6h);
sin->sin6_scope_id =
ipv6_iface_scope_id(&sin->sin6_addr,
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 93a594a901d1..8fb4a791881a 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -588,7 +588,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
else if (!fl6.flowi6_oif)
fl6.flowi6_oif = np->ucast_oif;
- ipcm6_init_sk(&ipc6, np);
+ ipcm6_init_sk(&ipc6, sk);
ipc6.sockc.mark = mark;
fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
@@ -791,7 +791,7 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
msg.offset = 0;
msg.type = type;
- ipcm6_init_sk(&ipc6, np);
+ ipcm6_init_sk(&ipc6, sk);
ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
ipc6.sockc.mark = mark;
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index b3ca4beb4405..eca07e10e21f 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -513,7 +513,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq,
return 0;
}
- if (np->repflow) {
+ if (inet6_test_bit(REPFLOW, sk)) {
freq->flr_label = np->flow_label;
return 0;
}
@@ -551,10 +551,10 @@ static int ipv6_flowlabel_put(struct sock *sk, struct in6_flowlabel_req *freq)
if (freq->flr_flags & IPV6_FL_F_REFLECT) {
if (sk->sk_protocol != IPPROTO_TCP)
return -ENOPROTOOPT;
- if (!np->repflow)
+ if (!inet6_test_bit(REPFLOW, sk))
return -ESRCH;
np->flow_label = 0;
- np->repflow = 0;
+ inet6_clear_bit(REPFLOW, sk);
return 0;
}
@@ -626,7 +626,7 @@ static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq,
if (sk->sk_protocol != IPPROTO_TCP)
return -ENOPROTOOPT;
- np->repflow = 1;
+ inet6_set_bit(REPFLOW, sk);
return 0;
}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 54fc4c711f2c..951ba8089b5b 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -232,12 +232,11 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(ip6_output);
-bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
+bool ip6_autoflowlabel(struct net *net, const struct sock *sk)
{
- if (!np->autoflowlabel_set)
+ if (!inet6_test_bit(AUTOFLOWLABEL_SET, sk))
return ip6_default_np_autolabel(net);
- else
- return np->autoflowlabel;
+ return inet6_test_bit(AUTOFLOWLABEL, sk);
}
/*
@@ -309,12 +308,12 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
* Fill in the IPv6 header
*/
if (np)
- hlimit = np->hop_limit;
+ hlimit = READ_ONCE(np->hop_limit);
if (hlimit < 0)
hlimit = ip6_dst_hoplimit(dst);
ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
- ip6_autoflowlabel(net, np), fl6));
+ ip6_autoflowlabel(net, sk), fl6));
hdr->payload_len = htons(seg_len);
hdr->nexthdr = proto;
@@ -369,9 +368,8 @@ static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
if (sk && ra->sel == sel &&
(!sk->sk_bound_dev_if ||
sk->sk_bound_dev_if == skb->dev->ifindex)) {
- struct ipv6_pinfo *np = inet6_sk(sk);
- if (np && np->rtalert_isolate &&
+ if (inet6_test_bit(RTALERT_ISOLATE, sk) &&
!net_eq(sock_net(sk), dev_net(skb->dev))) {
continue;
}
@@ -881,9 +879,11 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
mtu = IPV6_MIN_MTU;
}
- if (np && np->frag_size < mtu) {
- if (np->frag_size)
- mtu = np->frag_size;
+ if (np) {
+ u32 frag_size = READ_ONCE(np->frag_size);
+
+ if (frag_size && frag_size < mtu)
+ mtu = frag_size;
}
if (mtu < hlen + sizeof(struct frag_hdr) + 8)
goto fail_toobig;
@@ -1113,7 +1113,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
rcu_read_lock();
from = rt ? rcu_dereference(rt->from) : NULL;
err = ip6_route_get_saddr(net, from, &fl6->daddr,
- sk ? inet6_sk(sk)->srcprefs : 0,
+ sk ? READ_ONCE(inet6_sk(sk)->srcprefs) : 0,
&fl6->saddr);
rcu_read_unlock();
@@ -1392,7 +1392,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
struct rt6_info *rt)
{
struct ipv6_pinfo *np = inet6_sk(sk);
- unsigned int mtu;
+ unsigned int mtu, frag_size;
struct ipv6_txoptions *nopt, *opt = ipc6->opt;
/* callers pass dst together with a reference, set it first so
@@ -1436,15 +1436,16 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
v6_cork->hop_limit = ipc6->hlimit;
v6_cork->tclass = ipc6->tclass;
if (rt->dst.flags & DST_XFRM_TUNNEL)
- mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
+ mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ?
READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
else
- mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
+ mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ?
READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
- if (np->frag_size < mtu) {
- if (np->frag_size)
- mtu = np->frag_size;
- }
+
+ frag_size = READ_ONCE(np->frag_size);
+ if (frag_size && frag_size < mtu)
+ mtu = frag_size;
+
cork->base.fragsize = mtu;
cork->base.gso_size = ipc6->gso_size;
cork->base.tx_flags = 0;
@@ -1935,7 +1936,6 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
struct sk_buff *skb, *tmp_skb;
struct sk_buff **tail_skb;
struct in6_addr *final_dst;
- struct ipv6_pinfo *np = inet6_sk(sk);
struct net *net = sock_net(sk);
struct ipv6hdr *hdr;
struct ipv6_txoptions *opt = v6_cork->opt;
@@ -1978,7 +1978,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
ip6_flow_hdr(hdr, v6_cork->tclass,
ip6_make_flowlabel(net, skb, fl6->flowlabel,
- ip6_autoflowlabel(net, np), fl6));
+ ip6_autoflowlabel(net, sk), fl6));
hdr->hop_limit = v6_cork->hop_limit;
hdr->nexthdr = proto;
hdr->saddr = fl6->saddr;
@@ -2091,7 +2091,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
return ERR_PTR(err);
}
if (ipc6->dontfrag < 0)
- ipc6->dontfrag = inet6_sk(sk)->dontfrag;
+ ipc6->dontfrag = inet6_test_bit(DONTFRAG, sk);
err = __ip6_append_data(sk, &queue, cork, &v6_cork,
&current->task_frag, getfrag, from,
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 0e2a0847b387..7d661735cb9d 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -415,6 +415,101 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
if (ip6_mroute_opt(optname))
return ip6_mroute_setsockopt(sk, optname, optval, optlen);
+ /* Handle options that can be set without locking the socket. */
+ switch (optname) {
+ case IPV6_UNICAST_HOPS:
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ if (val > 255 || val < -1)
+ return -EINVAL;
+ WRITE_ONCE(np->hop_limit, val);
+ return 0;
+ case IPV6_MULTICAST_LOOP:
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ if (val != valbool)
+ return -EINVAL;
+ inet6_assign_bit(MC6_LOOP, sk, valbool);
+ return 0;
+ case IPV6_MULTICAST_HOPS:
+ if (sk->sk_type == SOCK_STREAM)
+ return retv;
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ if (val > 255 || val < -1)
+ return -EINVAL;
+ WRITE_ONCE(np->mcast_hops,
+ val == -1 ? IPV6_DEFAULT_MCASTHOPS : val);
+ return 0;
+ case IPV6_MTU:
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ if (val && val < IPV6_MIN_MTU)
+ return -EINVAL;
+ WRITE_ONCE(np->frag_size, val);
+ return 0;
+ case IPV6_MINHOPCOUNT:
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ if (val < 0 || val > 255)
+ return -EINVAL;
+
+ if (val)
+ static_branch_enable(&ip6_min_hopcount);
+
+ /* tcp_v6_err() and tcp_v6_rcv() might read min_hopcount
+ * while we are changing it.
+ */
+ WRITE_ONCE(np->min_hopcount, val);
+ return 0;
+ case IPV6_RECVERR_RFC4884:
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ if (val < 0 || val > 1)
+ return -EINVAL;
+ inet6_assign_bit(RECVERR6_RFC4884, sk, valbool);
+ return 0;
+ case IPV6_MULTICAST_ALL:
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ inet6_assign_bit(MC6_ALL, sk, valbool);
+ return 0;
+ case IPV6_AUTOFLOWLABEL:
+ inet6_assign_bit(AUTOFLOWLABEL, sk, valbool);
+ inet6_set_bit(AUTOFLOWLABEL_SET, sk);
+ return 0;
+ case IPV6_DONTFRAG:
+ inet6_assign_bit(DONTFRAG, sk, valbool);
+ return 0;
+ case IPV6_RECVERR:
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ inet6_assign_bit(RECVERR6, sk, valbool);
+ if (!val)
+ skb_errqueue_purge(&sk->sk_error_queue);
+ return 0;
+ case IPV6_ROUTER_ALERT_ISOLATE:
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ inet6_assign_bit(RTALERT_ISOLATE, sk, valbool);
+ return 0;
+ case IPV6_MTU_DISCOVER:
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT)
+ return -EINVAL;
+ WRITE_ONCE(np->pmtudisc, val);
+ return 0;
+ case IPV6_FLOWINFO_SEND:
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ inet6_assign_bit(SNDFLOW, sk, valbool);
+ return 0;
+ case IPV6_ADDR_PREFERENCES:
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ return ip6_sock_set_addr_preferences(sk, val);
+ }
if (needs_rtnl)
rtnl_lock();
sockopt_lock_sock(sk);
@@ -733,34 +828,7 @@ done:
}
break;
}
- case IPV6_UNICAST_HOPS:
- if (optlen < sizeof(int))
- goto e_inval;
- if (val > 255 || val < -1)
- goto e_inval;
- np->hop_limit = val;
- retv = 0;
- break;
- case IPV6_MULTICAST_HOPS:
- if (sk->sk_type == SOCK_STREAM)
- break;
- if (optlen < sizeof(int))
- goto e_inval;
- if (val > 255 || val < -1)
- goto e_inval;
- np->mcast_hops = (val == -1 ? IPV6_DEFAULT_MCASTHOPS : val);
- retv = 0;
- break;
-
- case IPV6_MULTICAST_LOOP:
- if (optlen < sizeof(int))
- goto e_inval;
- if (val != valbool)
- goto e_inval;
- np->mc_loop = valbool;
- retv = 0;
- break;
case IPV6_UNICAST_IF:
{
@@ -862,13 +930,6 @@ done:
retv = ipv6_sock_ac_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_acaddr);
break;
}
- case IPV6_MULTICAST_ALL:
- if (optlen < sizeof(int))
- goto e_inval;
- np->mc_all = valbool;
- retv = 0;
- break;
-
case MCAST_JOIN_GROUP:
case MCAST_LEAVE_GROUP:
if (in_compat_syscall())
@@ -896,42 +957,6 @@ done:
goto e_inval;
retv = ip6_ra_control(sk, val);
break;
- case IPV6_ROUTER_ALERT_ISOLATE:
- if (optlen < sizeof(int))
- goto e_inval;
- np->rtalert_isolate = valbool;
- retv = 0;
- break;
- case IPV6_MTU_DISCOVER:
- if (optlen < sizeof(int))
- goto e_inval;
- if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT)
- goto e_inval;
- np->pmtudisc = val;
- retv = 0;
- break;
- case IPV6_MTU:
- if (optlen < sizeof(int))
- goto e_inval;
- if (val && val < IPV6_MIN_MTU)
- goto e_inval;
- np->frag_size = val;
- retv = 0;
- break;
- case IPV6_RECVERR:
- if (optlen < sizeof(int))
- goto e_inval;
- np->recverr = valbool;
- if (!val)
- skb_errqueue_purge(&sk->sk_error_queue);
- retv = 0;
- break;
- case IPV6_FLOWINFO_SEND:
- if (optlen < sizeof(int))
- goto e_inval;
- np->sndflow = valbool;
- retv = 0;
- break;
case IPV6_FLOWLABEL_MGR:
retv = ipv6_flowlabel_opt(sk, optval, optlen);
break;
@@ -943,47 +968,10 @@ done:
retv = xfrm_user_policy(sk, optname, optval, optlen);
break;
- case IPV6_ADDR_PREFERENCES:
- if (optlen < sizeof(int))
- goto e_inval;
- retv = __ip6_sock_set_addr_preferences(sk, val);
- break;
- case IPV6_MINHOPCOUNT:
- if (optlen < sizeof(int))
- goto e_inval;
- if (val < 0 || val > 255)
- goto e_inval;
-
- if (val)
- static_branch_enable(&ip6_min_hopcount);
-
- /* tcp_v6_err() and tcp_v6_rcv() might read min_hopcount
- * while we are changing it.
- */
- WRITE_ONCE(np->min_hopcount, val);
- retv = 0;
- break;
- case IPV6_DONTFRAG:
- np->dontfrag = valbool;
- retv = 0;
- break;
- case IPV6_AUTOFLOWLABEL:
- np->autoflowlabel = valbool;
- np->autoflowlabel_set = 1;
- retv = 0;
- break;
case IPV6_RECVFRAGSIZE:
np->rxopt.bits.recvfragsize = valbool;
retv = 0;
break;
- case IPV6_RECVERR_RFC4884:
- if (optlen < sizeof(int))
- goto e_inval;
- if (val < 0 || val > 1)
- goto e_inval;
- np->recverr_rfc4884 = valbool;
- retv = 0;
- break;
}
unlock:
@@ -1180,7 +1168,8 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
}
if (np->rxopt.bits.rxhlim) {
- int hlim = np->mcast_hops;
+ int hlim = READ_ONCE(np->mcast_hops);
+
put_cmsg(&msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim);
}
if (np->rxopt.bits.rxtclass) {
@@ -1197,7 +1186,8 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info);
}
if (np->rxopt.bits.rxohlim) {
- int hlim = np->mcast_hops;
+ int hlim = READ_ONCE(np->mcast_hops);
+
put_cmsg(&msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim);
}
if (np->rxopt.bits.rxflow) {
@@ -1347,9 +1337,9 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
struct dst_entry *dst;
if (optname == IPV6_UNICAST_HOPS)
- val = np->hop_limit;
+ val = READ_ONCE(np->hop_limit);
else
- val = np->mcast_hops;
+ val = READ_ONCE(np->mcast_hops);
if (val < 0) {
rcu_read_lock();
@@ -1365,7 +1355,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
}
case IPV6_MULTICAST_LOOP:
- val = np->mc_loop;
+ val = inet6_test_bit(MC6_LOOP, sk);
break;
case IPV6_MULTICAST_IF:
@@ -1373,7 +1363,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
break;
case IPV6_MULTICAST_ALL:
- val = np->mc_all;
+ val = inet6_test_bit(MC6_ALL, sk);
break;
case IPV6_UNICAST_IF:
@@ -1381,15 +1371,15 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
break;
case IPV6_MTU_DISCOVER:
- val = np->pmtudisc;
+ val = READ_ONCE(np->pmtudisc);
break;
case IPV6_RECVERR:
- val = np->recverr;
+ val = inet6_test_bit(RECVERR6, sk);
break;
case IPV6_FLOWINFO_SEND:
- val = np->sndflow;
+ val = inet6_test_bit(SNDFLOW, sk);
break;
case IPV6_FLOWLABEL_MGR:
@@ -1424,33 +1414,35 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
}
case IPV6_ADDR_PREFERENCES:
+ {
+ u8 srcprefs = READ_ONCE(np->srcprefs);
val = 0;
- if (np->srcprefs & IPV6_PREFER_SRC_TMP)
+ if (srcprefs & IPV6_PREFER_SRC_TMP)
val |= IPV6_PREFER_SRC_TMP;
- else if (np->srcprefs & IPV6_PREFER_SRC_PUBLIC)
+ else if (srcprefs & IPV6_PREFER_SRC_PUBLIC)
val |= IPV6_PREFER_SRC_PUBLIC;
else {
/* XXX: should we return system default? */
val |= IPV6_PREFER_SRC_PUBTMP_DEFAULT;
}
- if (np->srcprefs & IPV6_PREFER_SRC_COA)
+ if (srcprefs & IPV6_PREFER_SRC_COA)
val |= IPV6_PREFER_SRC_COA;
else
val |= IPV6_PREFER_SRC_HOME;
break;
-
+ }
case IPV6_MINHOPCOUNT:
- val = np->min_hopcount;
+ val = READ_ONCE(np->min_hopcount);
break;
case IPV6_DONTFRAG:
- val = np->dontfrag;
+ val = inet6_test_bit(DONTFRAG, sk);
break;
case IPV6_AUTOFLOWLABEL:
- val = ip6_autoflowlabel(sock_net(sk), np);
+ val = ip6_autoflowlabel(sock_net(sk), sk);
break;
case IPV6_RECVFRAGSIZE:
@@ -1458,11 +1450,11 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
break;
case IPV6_ROUTER_ALERT_ISOLATE:
- val = np->rtalert_isolate;
+ val = inet6_test_bit(RTALERT_ISOLATE, sk);
break;
case IPV6_RECVERR_RFC4884:
- val = np->recverr_rfc4884;
+ val = inet6_test_bit(RECVERR6_RFC4884, sk);
break;
default:
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 5ce25bcb9974..99e28b444a4c 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -642,7 +642,7 @@ bool inet6_mc_check(const struct sock *sk, const struct in6_addr *mc_addr,
}
if (!mc) {
rcu_read_unlock();
- return np->mc_all;
+ return inet6_test_bit(MC6_ALL, sk);
}
psl = rcu_dereference(mc->sflist);
if (!psl) {
@@ -1716,7 +1716,7 @@ static void ip6_mc_hdr(const struct sock *sk, struct sk_buff *skb,
hdr->payload_len = htons(len);
hdr->nexthdr = proto;
- hdr->hop_limit = inet6_sk(sk)->hop_limit;
+ hdr->hop_limit = READ_ONCE(inet6_sk(sk)->hop_limit);
hdr->saddr = *saddr;
hdr->daddr = *daddr;
@@ -3011,8 +3011,6 @@ static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_s
continue;
state->im = rcu_dereference(state->idev->mc_list);
}
- if (!state->im)
- break;
psf = rcu_dereference(state->im->mca_sources);
}
out:
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 553c8664e0a7..679443d7ecb5 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -500,7 +500,7 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr,
csum_partial(icmp6h,
skb->len, 0));
- ip6_nd_hdr(skb, saddr, daddr, inet6_sk(sk)->hop_limit, skb->len);
+ ip6_nd_hdr(skb, saddr, daddr, READ_ONCE(inet6_sk(sk)->hop_limit), skb->len);
rcu_read_lock();
idev = __in6_dev_get(dst->dev);
@@ -1996,7 +1996,7 @@ static int __net_init ndisc_net_init(struct net *net)
np = inet6_sk(sk);
np->hop_limit = 255;
/* Do not loopback ndisc messages */
- np->mc_loop = 0;
+ inet6_clear_bit(MC6_LOOP, sk);
return 0;
}
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 5831aaa53d75..d2098dd4ceae 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -56,7 +56,7 @@ static int ping_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
- return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr);
+ return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, &addr_len);
}
static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
@@ -89,7 +89,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
return -EAFNOSUPPORT;
}
daddr = &(u->sin6_addr);
- if (np->sndflow)
+ if (inet6_test_bit(SNDFLOW, sk))
fl6.flowlabel = u->sin6_flowinfo & IPV6_FLOWINFO_MASK;
if (__ipv6_addr_needs_scope_id(ipv6_addr_type(daddr)))
oif = u->sin6_scope_id;
@@ -118,7 +118,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
l3mdev_master_ifindex_by_index(sock_net(sk), oif) != sk->sk_bound_dev_if))
return -EINVAL;
- ipcm6_init_sk(&ipc6, np);
+ ipcm6_init_sk(&ipc6, sk);
ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 42fcec3ecf5e..a2aa54a2baae 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -291,6 +291,7 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
+ bool recverr = inet6_test_bit(RECVERR6, sk);
struct ipv6_pinfo *np = inet6_sk(sk);
int err;
int harderr;
@@ -300,26 +301,26 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
2. Socket is connected (otherwise the error indication
is useless without recverr and error is hard.
*/
- if (!np->recverr && sk->sk_state != TCP_ESTABLISHED)
+ if (!recverr && sk->sk_state != TCP_ESTABLISHED)
return;
harderr = icmpv6_err_convert(type, code, &err);
if (type == ICMPV6_PKT_TOOBIG) {
ip6_sk_update_pmtu(skb, sk, info);
- harderr = (np->pmtudisc == IPV6_PMTUDISC_DO);
+ harderr = (READ_ONCE(np->pmtudisc) == IPV6_PMTUDISC_DO);
}
if (type == NDISC_REDIRECT) {
ip6_sk_redirect(skb, sk);
return;
}
- if (np->recverr) {
+ if (recverr) {
u8 *payload = skb->data;
if (!inet_test_bit(HDRINCL, sk))
payload += offset;
ipv6_icmp_error(sk, skb, err, 0, ntohl(info), payload);
}
- if (np->recverr || harderr) {
+ if (recverr || harderr) {
sk->sk_err = err;
sk_error_report(sk);
}
@@ -587,7 +588,6 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
struct flowi6 *fl6, struct dst_entry **dstp,
unsigned int flags, const struct sockcm_cookie *sockc)
{
- struct ipv6_pinfo *np = inet6_sk(sk);
struct net *net = sock_net(sk);
struct ipv6hdr *iph;
struct sk_buff *skb;
@@ -668,7 +668,7 @@ out:
error:
IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
error_check:
- if (err == -ENOBUFS && !np->recverr)
+ if (err == -ENOBUFS && !inet6_test_bit(RECVERR6, sk))
err = 0;
return err;
}
@@ -795,7 +795,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
return -EINVAL;
daddr = &sin6->sin6_addr;
- if (np->sndflow) {
+ if (inet6_test_bit(SNDFLOW, sk)) {
fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
@@ -898,7 +898,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
if (ipc6.dontfrag < 0)
- ipc6.dontfrag = np->dontfrag;
+ ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk);
if (msg->msg_flags&MSG_CONFIRM)
goto do_confirm;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 9c687b357e6a..b132feae3393 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -341,7 +341,7 @@ struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
int flags)
{
struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
- 1, DST_OBSOLETE_FORCE_CHK, flags);
+ DST_OBSOLETE_FORCE_CHK, flags);
if (rt) {
rt6_info_init(rt);
@@ -2622,7 +2622,7 @@ static struct dst_entry *ip6_route_output_flags_noref(struct net *net,
if (!any_src)
flags |= RT6_LOOKUP_F_HAS_SADDR;
else if (sk)
- flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
+ flags |= rt6_srcprefs2flags(READ_ONCE(inet6_sk(sk)->srcprefs));
return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
}
@@ -2655,7 +2655,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
struct net_device *loopback_dev = net->loopback_dev;
struct dst_entry *new = NULL;
- rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
+ rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev,
DST_OBSOLETE_DEAD, 0);
if (rt) {
rt6_info_init(rt);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 44b6949d72b2..4eb0c6386aca 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -135,7 +135,7 @@ static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
sock_owned_by_me(sk);
- return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
+ return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len);
}
static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
@@ -163,7 +163,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
memset(&fl6, 0, sizeof(fl6));
- if (np->sndflow) {
+ if (inet6_test_bit(SNDFLOW, sk)) {
fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
IP6_ECN_flow_init(fl6.flowlabel);
if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
@@ -508,7 +508,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
tcp_ld_RTO_revert(sk, seq);
}
- if (!sock_owned_by_user(sk) && np->recverr) {
+ if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) {
WRITE_ONCE(sk->sk_err, err);
sk_error_report(sk);
} else {
@@ -548,7 +548,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
&ireq->ir_v6_rmt_addr);
fl6->daddr = ireq->ir_v6_rmt_addr;
- if (np->repflow && ireq->pktopts)
+ if (inet6_test_bit(REPFLOW, sk) && ireq->pktopts)
fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
@@ -797,7 +797,7 @@ static void tcp_v6_init_req(struct request_sock *req,
(ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
np->rxopt.bits.rxinfo ||
np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
- np->rxopt.bits.rxohlim || np->repflow)) {
+ np->rxopt.bits.rxohlim || inet6_test_bit(REPFLOW, sk_listener))) {
refcount_inc(&skb->users);
ireq->pktopts = skb;
}
@@ -1055,10 +1055,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
if (sk) {
oif = sk->sk_bound_dev_if;
if (sk_fullsock(sk)) {
- const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
-
trace_tcp_send_reset(sk, skb);
- if (np->repflow)
+ if (inet6_test_bit(REPFLOW, sk))
label = ip6_flowlabel(ipv6h);
priority = sk->sk_priority;
txhash = sk->sk_txhash;
@@ -1247,7 +1245,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
newnp->mcast_oif = inet_iif(skb);
newnp->mcast_hops = ip_hdr(skb)->ttl;
newnp->rcv_flowinfo = 0;
- if (np->repflow)
+ if (inet6_test_bit(REPFLOW, sk))
newnp->flow_label = 0;
/*
@@ -1320,7 +1318,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
newnp->mcast_oif = tcp_v6_iif(skb);
newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
- if (np->repflow)
+ if (inet6_test_bit(REPFLOW, sk))
newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
/* Set ToS of the new socket based upon the value of incoming SYN.
@@ -1542,10 +1540,11 @@ ipv6_pktoptions:
if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
np->mcast_oif = tcp_v6_iif(opt_skb);
if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
- np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
+ WRITE_ONCE(np->mcast_hops,
+ ipv6_hdr(opt_skb)->hop_limit);
if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
- if (np->repflow)
+ if (inet6_test_bit(REPFLOW, sk))
np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
tcp_v6_restore_cb(opt_skb);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 86b5d509a468..622b10a549f7 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -410,10 +410,11 @@ try_again:
*addr_len = sizeof(*sin6);
BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk,
- (struct sockaddr *)sin6);
+ (struct sockaddr *)sin6,
+ addr_len);
}
- if (udp_sk(sk)->gro_enabled)
+ if (udp_test_bit(GRO_ENABLED, sk))
udp_cmsg_recv(msg, sk, skb);
if (np->rxopt.all)
@@ -571,7 +572,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
inet6_iif(skb), inet6_sdif(skb), udptable, NULL);
- if (!sk || udp_sk(sk)->encap_type) {
+ if (!sk || READ_ONCE(udp_sk(sk)->encap_type)) {
/* No socket for error: try tunnels before discarding */
if (static_branch_unlikely(&udpv6_encap_needed_key)) {
sk = __udp6_lib_err_encap(net, hdr, offset, uh,
@@ -598,7 +599,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (!ip6_sk_accept_pmtu(sk))
goto out;
ip6_sk_update_pmtu(skb, sk, info);
- if (np->pmtudisc != IPV6_PMTUDISC_DONT)
+ if (READ_ONCE(np->pmtudisc) != IPV6_PMTUDISC_DONT)
harderr = 1;
}
if (type == NDISC_REDIRECT) {
@@ -619,7 +620,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
goto out;
}
- if (!np->recverr) {
+ if (!inet6_test_bit(RECVERR6, sk)) {
if (!harderr || sk->sk_state != TCP_ESTABLISHED)
goto out;
} else {
@@ -688,7 +689,8 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
}
nf_reset_ct(skb);
- if (static_branch_unlikely(&udpv6_encap_needed_key) && up->encap_type) {
+ if (static_branch_unlikely(&udpv6_encap_needed_key) &&
+ READ_ONCE(up->encap_type)) {
int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
/*
@@ -726,16 +728,17 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
/*
* UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c).
*/
- if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
+ if (udp_test_bit(UDPLITE_RECV_CC, sk) && UDP_SKB_CB(skb)->partial_cov) {
+ u16 pcrlen = READ_ONCE(up->pcrlen);
- if (up->pcrlen == 0) { /* full coverage was set */
+ if (pcrlen == 0) { /* full coverage was set */
net_dbg_ratelimited("UDPLITE6: partial coverage %d while full coverage %d requested\n",
UDP_SKB_CB(skb)->cscov, skb->len);
goto drop;
}
- if (UDP_SKB_CB(skb)->cscov < up->pcrlen) {
+ if (UDP_SKB_CB(skb)->cscov < pcrlen) {
net_dbg_ratelimited("UDPLITE6: coverage %d too small, need min %d\n",
- UDP_SKB_CB(skb)->cscov, up->pcrlen);
+ UDP_SKB_CB(skb)->cscov, pcrlen);
goto drop;
}
}
@@ -858,7 +861,7 @@ start_lookup:
/* If zero checksum and no_check is not on for
* the socket then skip it.
*/
- if (!uh->check && !udp_sk(sk)->no_check6_rx)
+ if (!uh->check && !udp_get_no_check6_rx(sk))
continue;
if (!first) {
first = sk;
@@ -980,7 +983,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
if (unlikely(rcu_dereference(sk->sk_rx_dst) != dst))
udp6_sk_rx_dst_set(sk, dst);
- if (!uh->check && !udp_sk(sk)->no_check6_rx) {
+ if (!uh->check && !udp_get_no_check6_rx(sk)) {
if (refcounted)
sock_put(sk);
goto report_csum_error;
@@ -1002,7 +1005,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
/* Unicast */
sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
if (sk) {
- if (!uh->check && !udp_sk(sk)->no_check6_rx)
+ if (!uh->check && !udp_get_no_check6_rx(sk))
goto report_csum_error;
return udp6_unicast_rcv_skb(sk, skb, uh);
}
@@ -1155,7 +1158,7 @@ static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
- return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr);
+ return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, &addr_len);
}
/**
@@ -1241,7 +1244,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
kfree_skb(skb);
return -EINVAL;
}
- if (udp_sk(sk)->no_check6_tx) {
+ if (udp_get_no_check6_tx(sk)) {
kfree_skb(skb);
return -EINVAL;
}
@@ -1262,7 +1265,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
if (is_udplite)
csum = udplite_csum(skb);
- else if (udp_sk(sk)->no_check6_tx) { /* UDP csum disabled */
+ else if (udp_get_no_check6_tx(sk)) { /* UDP csum disabled */
skb->ip_summed = CHECKSUM_NONE;
goto send;
} else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
@@ -1281,7 +1284,7 @@ csum_partial:
send:
err = ip6_send_skb(skb);
if (err) {
- if (err == -ENOBUFS && !inet6_sk(sk)->recverr) {
+ if (err == -ENOBUFS && !inet6_test_bit(RECVERR6, sk)) {
UDP6_INC_STATS(sock_net(sk),
UDP_MIB_SNDBUFERRORS, is_udplite);
err = 0;
@@ -1332,7 +1335,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int addr_len = msg->msg_namelen;
bool connected = false;
int ulen = len;
- int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE;
+ int corkreq = udp_test_bit(CORK, sk) || msg->msg_flags & MSG_MORE;
int err;
int is_udplite = IS_UDPLITE(sk);
int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
@@ -1427,7 +1430,7 @@ do_udp_sendmsg:
fl6->fl6_dport = sin6->sin6_port;
daddr = &sin6->sin6_addr;
- if (np->sndflow) {
+ if (inet6_test_bit(SNDFLOW, sk)) {
fl6->flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
if (fl6->flowlabel & IPV6_FLOWLABEL_MASK) {
flowlabel = fl6_sock_lookup(sk, fl6->flowlabel);
@@ -1508,6 +1511,7 @@ do_udp_sendmsg:
if (cgroup_bpf_enabled(CGROUP_UDP6_SENDMSG) && !connected) {
err = BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk,
(struct sockaddr *)sin6,
+ &addr_len,
&fl6->saddr);
if (err)
goto out_no_dst;
@@ -1593,7 +1597,7 @@ back_from_confirm:
do_append_data:
if (ipc6.dontfrag < 0)
- ipc6.dontfrag = np->dontfrag;
+ ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk);
up->len += ulen;
err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr),
&ipc6, fl6, (struct rt6_info *)dst,
@@ -1606,7 +1610,7 @@ do_append_data:
up->pending = 0;
if (err > 0)
- err = np->recverr ? net_xmit_errno(err) : 0;
+ err = inet6_test_bit(RECVERR6, sk) ? net_xmit_errno(err) : 0;
release_sock(sk);
out:
@@ -1644,11 +1648,11 @@ static void udpv6_splice_eof(struct socket *sock)
struct sock *sk = sock->sk;
struct udp_sock *up = udp_sk(sk);
- if (!up->pending || READ_ONCE(up->corkflag))
+ if (!up->pending || udp_test_bit(CORK, sk))
return;
lock_sock(sk);
- if (up->pending && !READ_ONCE(up->corkflag))
+ if (up->pending && !udp_test_bit(CORK, sk))
udp_v6_push_pending_frames(sk);
release_sock(sk);
}
@@ -1670,7 +1674,7 @@ void udpv6_destroy_sock(struct sock *sk)
if (encap_destroy)
encap_destroy(sk);
}
- if (up->encap_enabled) {
+ if (udp_test_bit(ENCAP_ENABLED, sk)) {
static_branch_dec(&udpv6_encap_needed_key);
udp_encap_disable();
}
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 267d491e9707..a60bec9b14f1 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -17,7 +17,6 @@
static int udplitev6_sk_init(struct sock *sk)
{
udpv6_init_sock(sk);
- udp_sk(sk)->pcflag = UDPLITE_BIT;
pr_warn_once("UDP-Lite is deprecated and scheduled to be removed in 2025, "
"please contact the netdev mailing list\n");
return 0;
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 4907ab241d6b..4156387248e4 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -81,14 +81,14 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
struct ipv6hdr *ip6h;
int len;
int ip6hlen = sizeof(struct ipv6hdr);
-
__u8 *udpdata;
__be32 *udpdata32;
- __u16 encap_type = up->encap_type;
+ u16 encap_type;
if (skb->protocol == htons(ETH_P_IP))
return xfrm4_udp_encap_rcv(sk, skb);
+ encap_type = READ_ONCE(up->encap_type);
/* if this is not encapsulated socket, then just return now */
if (!encap_type)
return 1;
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 03608d3ded4b..8d21ff25f160 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1139,9 +1139,9 @@ static void l2tp_tunnel_destruct(struct sock *sk)
switch (tunnel->encap) {
case L2TP_ENCAPTYPE_UDP:
/* No longer an encapsulation socket. See net/ipv4/udp.c */
- (udp_sk(sk))->encap_type = 0;
- (udp_sk(sk))->encap_rcv = NULL;
- (udp_sk(sk))->encap_destroy = NULL;
+ WRITE_ONCE(udp_sk(sk)->encap_type, 0);
+ udp_sk(sk)->encap_rcv = NULL;
+ udp_sk(sk)->encap_destroy = NULL;
break;
case L2TP_ENCAPTYPE_IP:
break;
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 11f3d375cec0..bb373e249237 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -431,7 +431,7 @@ static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr,
return -ENOTCONN;
lsa->l2tp_conn_id = lsk->peer_conn_id;
lsa->l2tp_addr = sk->sk_v6_daddr;
- if (np->sndflow)
+ if (inet6_test_bit(SNDFLOW, sk))
lsa->l2tp_flowinfo = np->flow_label;
} else {
if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
@@ -528,7 +528,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
return -EAFNOSUPPORT;
daddr = &lsa->l2tp_addr;
- if (np->sndflow) {
+ if (inet6_test_bit(SNDFLOW, sk)) {
fl6.flowlabel = lsa->l2tp_flowinfo & IPV6_FLOWINFO_MASK;
if (fl6.flowlabel & IPV6_FLOWLABEL_MASK) {
flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
@@ -620,7 +620,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
if (ipc6.dontfrag < 0)
- ipc6.dontfrag = np->dontfrag;
+ ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk);
if (msg->msg_flags & MSG_CONFIRM)
goto do_confirm;
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index a4af3b7675ef..c3a60fd19c37 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -309,7 +309,7 @@ ieee80211_tdls_chandef_vht_upgrade(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta)
{
/* IEEE802.11ac-2013 Table E-4 */
- u16 centers_80mhz[] = { 5210, 5290, 5530, 5610, 5690, 5775 };
+ static const u16 centers_80mhz[] = { 5210, 5290, 5530, 5610, 5690, 5775 };
struct cfg80211_chan_def uc = sta->tdls_chandef;
enum nl80211_chan_width max_width =
ieee80211_sta_cap_chan_bw(&sta->deflink);
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 4174076c66fa..05d47c0e5ec3 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1298,17 +1298,13 @@ static void set_sock_size(struct sock *sk, int mode, int val)
static void set_mcast_loop(struct sock *sk, u_char loop)
{
/* setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); */
- lock_sock(sk);
inet_assign_bit(MC_LOOP, sk, loop);
#ifdef CONFIG_IP_VS_IPV6
- if (sk->sk_family == AF_INET6) {
- struct ipv6_pinfo *np = inet6_sk(sk);
-
+ if (READ_ONCE(sk->sk_family) == AF_INET6) {
/* IPV6_MULTICAST_LOOP */
- np->mc_loop = loop ? 1 : 0;
+ inet6_assign_bit(MC6_LOOP, sk, loop);
}
#endif
- release_sock(sk);
}
/*
@@ -1326,7 +1322,7 @@ static void set_mcast_ttl(struct sock *sk, u_char ttl)
struct ipv6_pinfo *np = inet6_sk(sk);
/* IPV6_MULTICAST_HOPS */
- np->mcast_hops = ttl;
+ WRITE_ONCE(np->mcast_hops, ttl);
}
#endif
release_sock(sk);
@@ -1345,7 +1341,7 @@ static void set_mcast_pmtudisc(struct sock *sk, int val)
struct ipv6_pinfo *np = inet6_sk(sk);
/* IPV6_MTU_DISCOVER */
- np->pmtudisc = val;
+ WRITE_ONCE(np->pmtudisc, val);
}
#endif
release_sock(sk);
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index fd66014d8a76..5f8094acd056 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -873,7 +873,7 @@ static void ovs_fragment(struct net *net, struct vport *vport,
prepare_frag(vport, skb, orig_network_offset,
ovs_key_mac_proto(key));
- dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1,
+ dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL,
DST_OBSOLETE_NONE, DST_NOCOUNT);
ovs_rt.dst.dev = vport->dev;
@@ -890,7 +890,7 @@ static void ovs_fragment(struct net *net, struct vport *vport,
prepare_frag(vport, skb, orig_network_offset,
ovs_key_mac_proto(key));
memset(&ovs_rt, 0, sizeof(ovs_rt));
- dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1,
+ dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL,
DST_OBSOLETE_NONE, DST_NOCOUNT);
ovs_rt.dst.dev = vport->dev;
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 1e20bbd687f1..1424bfeaca73 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -375,9 +375,9 @@ out:
static const struct nla_policy route4_policy[TCA_ROUTE4_MAX + 1] = {
[TCA_ROUTE4_CLASSID] = { .type = NLA_U32 },
- [TCA_ROUTE4_TO] = { .type = NLA_U32 },
- [TCA_ROUTE4_FROM] = { .type = NLA_U32 },
- [TCA_ROUTE4_IIF] = { .type = NLA_U32 },
+ [TCA_ROUTE4_TO] = NLA_POLICY_MAX(NLA_U32, 0xFF),
+ [TCA_ROUTE4_FROM] = NLA_POLICY_MAX(NLA_U32, 0xFF),
+ [TCA_ROUTE4_IIF] = NLA_POLICY_MAX(NLA_U32, 0x7FFF),
};
static int route4_set_parms(struct net *net, struct tcf_proto *tp,
@@ -397,33 +397,37 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
return err;
if (tb[TCA_ROUTE4_TO]) {
- if (new && handle & 0x8000)
+ if (new && handle & 0x8000) {
+ NL_SET_ERR_MSG(extack, "Invalid handle");
return -EINVAL;
+ }
to = nla_get_u32(tb[TCA_ROUTE4_TO]);
- if (to > 0xFF)
- return -EINVAL;
nhandle = to;
}
+ if (tb[TCA_ROUTE4_FROM] && tb[TCA_ROUTE4_IIF]) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[TCA_ROUTE4_FROM],
+ "'from' and 'fromif' are mutually exclusive");
+ return -EINVAL;
+ }
+
if (tb[TCA_ROUTE4_FROM]) {
- if (tb[TCA_ROUTE4_IIF])
- return -EINVAL;
id = nla_get_u32(tb[TCA_ROUTE4_FROM]);
- if (id > 0xFF)
- return -EINVAL;
nhandle |= id << 16;
} else if (tb[TCA_ROUTE4_IIF]) {
id = nla_get_u32(tb[TCA_ROUTE4_IIF]);
- if (id > 0x7FFF)
- return -EINVAL;
nhandle |= (id | 0x8000) << 16;
} else
nhandle |= 0xFFFF << 16;
if (handle && new) {
nhandle |= handle & 0x7F00;
- if (nhandle != handle)
+ if (nhandle != handle) {
+ NL_SET_ERR_MSG_FMT(extack,
+ "Handle mismatch constructed: %x (expected: %x)",
+ handle, nhandle);
return -EINVAL;
+ }
}
if (!nhandle) {
@@ -478,7 +482,6 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
struct route4_filter __rcu **fp;
struct route4_filter *fold, *f1, *pfp, *f = NULL;
struct route4_bucket *b;
- struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_ROUTE4_MAX + 1];
unsigned int h, th;
int err;
@@ -489,10 +492,12 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
return -EINVAL;
}
- if (opt == NULL)
+ if (NL_REQ_ATTR_CHECK(extack, NULL, tca, TCA_OPTIONS)) {
+ NL_SET_ERR_MSG_MOD(extack, "Missing options");
return -EINVAL;
+ }
- err = nla_parse_nested_deprecated(tb, TCA_ROUTE4_MAX, opt,
+ err = nla_parse_nested_deprecated(tb, TCA_ROUTE4_MAX, tca[TCA_OPTIONS],
route4_policy, NULL);
if (err < 0)
return err;
diff --git a/net/sched/sch_frag.c b/net/sched/sch_frag.c
index a9bd0a235890..ce63414185fd 100644
--- a/net/sched/sch_frag.c
+++ b/net/sched/sch_frag.c
@@ -96,7 +96,7 @@ static int sch_fragment(struct net *net, struct sk_buff *skb,
unsigned long orig_dst;
sch_frag_prepare_frag(skb, xmit);
- dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL, 1,
+ dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL,
DST_OBSOLETE_NONE, DST_NOCOUNT);
sch_frag_rt.dst.dev = skb->dev;
@@ -112,7 +112,7 @@ static int sch_fragment(struct net *net, struct sk_buff *skb,
sch_frag_prepare_frag(skb, xmit);
memset(&sch_frag_rt, 0, sizeof(sch_frag_rt));
- dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL, 1,
+ dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL,
DST_OBSOLETE_NONE, DST_NOCOUNT);
sch_frag_rt.dst.dev = skb->dev;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 43f2731bf590..5c0ed5909d85 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -128,7 +128,6 @@ static void sctp_v6_err_handle(struct sctp_transport *t, struct sk_buff *skb,
{
struct sctp_association *asoc = t->asoc;
struct sock *sk = asoc->base.sk;
- struct ipv6_pinfo *np;
int err = 0;
switch (type) {
@@ -149,9 +148,8 @@ static void sctp_v6_err_handle(struct sctp_transport *t, struct sk_buff *skb,
break;
}
- np = inet6_sk(sk);
icmpv6_err_convert(type, code, &err);
- if (!sock_owned_by_user(sk) && np->recverr) {
+ if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) {
sk->sk_err = err;
sk_error_report(sk);
} else {
@@ -298,7 +296,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
if (t->flowlabel & SCTP_FLOWLABEL_SET_MASK)
fl6->flowlabel = htonl(t->flowlabel & SCTP_FLOWLABEL_VAL_MASK);
- if (np->sndflow && (fl6->flowlabel & IPV6_FLOWLABEL_MASK)) {
+ if (inet6_test_bit(SNDFLOW, sk) &&
+ (fl6->flowlabel & IPV6_FLOWLABEL_MASK)) {
struct ip6_flowlabel *flowlabel;
flowlabel = fl6_sock_lookup(sk, fl6->flowlabel);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index e33b4f29f77c..d0143823658d 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1446,7 +1446,7 @@ u16 tipc_get_gap_ack_blks(struct tipc_gap_ack_blks **ga, struct tipc_link *l,
p = (struct tipc_gap_ack_blks *)msg_data(hdr);
sz = ntohs(p->len);
/* Sanity check */
- if (sz == struct_size(p, gacks, p->ugack_cnt + p->bgack_cnt)) {
+ if (sz == struct_size(p, gacks, size_add(p->ugack_cnt, p->bgack_cnt))) {
/* Good, check if the desired type exists */
if ((uc && p->ugack_cnt) || (!uc && p->bgack_cnt))
goto ok;
@@ -1533,7 +1533,7 @@ static u16 tipc_build_gap_ack_blks(struct tipc_link *l, struct tipc_msg *hdr)
__tipc_build_gap_ack_blks(ga, l, ga->bgack_cnt) : 0;
/* Total len */
- len = struct_size(ga, gacks, ga->bgack_cnt + ga->ugack_cnt);
+ len = struct_size(ga, gacks, size_add(ga->bgack_cnt, ga->ugack_cnt));
ga->len = htons(len);
return len;
}
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index e9d1e83a859d..9634dfd636fd 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1491,7 +1491,7 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov,
*/
aead_size = sizeof(*aead_req) + crypto_aead_reqsize(ctx->aead_recv);
aead_size = ALIGN(aead_size, __alignof__(*dctx));
- mem = kmalloc(aead_size + struct_size(dctx, sg, n_sgin + n_sgout),
+ mem = kmalloc(aead_size + struct_size(dctx, sg, size_add(n_sgin, n_sgout)),
sk->sk_allocation);
if (!mem) {
err = -ENOMEM;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 3e8a04a13668..e10d07c76044 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -116,6 +116,7 @@
#include <linux/freezer.h>
#include <linux/file.h>
#include <linux/btf_ids.h>
+#include <linux/bpf-cgroup.h>
#include "scm.h"
@@ -1381,6 +1382,10 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
if (err)
goto out;
+ err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, addr, &alen);
+ if (err)
+ goto out;
+
if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
!unix_sk(sk)->addr) {
@@ -1490,6 +1495,10 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
if (err)
goto out;
+ err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, &addr_len);
+ if (err)
+ goto out;
+
if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) {
err = unix_autobind(sk);
@@ -1770,6 +1779,13 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
} else {
err = addr->len;
memcpy(sunaddr, addr->name, addr->len);
+
+ if (peer)
+ BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err,
+ CGROUP_UNIX_GETPEERNAME);
+ else
+ BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err,
+ CGROUP_UNIX_GETSOCKNAME);
}
sock_put(sk);
out:
@@ -1922,6 +1938,13 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
err = unix_validate_addr(sunaddr, msg->msg_namelen);
if (err)
goto out;
+
+ err = BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk,
+ msg->msg_name,
+ &msg->msg_namelen,
+ NULL);
+ if (err)
+ goto out;
} else {
sunaddr = NULL;
err = -ENOTCONN;
@@ -2390,9 +2413,14 @@ int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
EPOLLOUT | EPOLLWRNORM |
EPOLLWRBAND);
- if (msg->msg_name)
+ if (msg->msg_name) {
unix_copy_addr(msg, skb->sk);
+ BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk,
+ msg->msg_name,
+ &msg->msg_namelen);
+ }
+
if (size > skb->len - skip)
size = skb->len - skip;
else if (size < skb->len - skip)
@@ -2744,6 +2772,11 @@ unlock:
DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
state->msg->msg_name);
unix_copy_addr(state->msg, skb->sk);
+
+ BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk,
+ state->msg->msg_name,
+ &state->msg->msg_namelen);
+
sunaddr = NULL;
}
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 020cf17ab7e4..013b65241b65 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1921,6 +1921,9 @@ out_err:
err = total_written;
}
out:
+ if (sk->sk_type == SOCK_STREAM)
+ err = sk_stream_error(sk, msg->msg_flags, err);
+
release_sock(sk);
return err;
}
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index b80bf681327b..fdb95da12029 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -63,6 +63,17 @@ struct virtio_vsock {
u32 guest_cid;
bool seqpacket_allow;
+
+ /* These fields are used only in tx path in function
+ * 'virtio_transport_send_pkt_work()', so to save
+ * stack space in it, place both of them here. Each
+ * pointer from 'out_sgs' points to the corresponding
+ * element in 'out_bufs' - this is initialized in
+ * 'virtio_vsock_probe()'. Both fields are protected
+ * by 'tx_lock'. +1 is needed for packet header.
+ */
+ struct scatterlist *out_sgs[MAX_SKB_FRAGS + 1];
+ struct scatterlist out_bufs[MAX_SKB_FRAGS + 1];
};
static u32 virtio_transport_get_local_cid(void)
@@ -100,8 +111,8 @@ virtio_transport_send_pkt_work(struct work_struct *work)
vq = vsock->vqs[VSOCK_VQ_TX];
for (;;) {
- struct scatterlist hdr, buf, *sgs[2];
int ret, in_sg = 0, out_sg = 0;
+ struct scatterlist **sgs;
struct sk_buff *skb;
bool reply;
@@ -111,12 +122,43 @@ virtio_transport_send_pkt_work(struct work_struct *work)
virtio_transport_deliver_tap_pkt(skb);
reply = virtio_vsock_skb_reply(skb);
-
- sg_init_one(&hdr, virtio_vsock_hdr(skb), sizeof(*virtio_vsock_hdr(skb)));
- sgs[out_sg++] = &hdr;
- if (skb->len > 0) {
- sg_init_one(&buf, skb->data, skb->len);
- sgs[out_sg++] = &buf;
+ sgs = vsock->out_sgs;
+ sg_init_one(sgs[out_sg], virtio_vsock_hdr(skb),
+ sizeof(*virtio_vsock_hdr(skb)));
+ out_sg++;
+
+ if (!skb_is_nonlinear(skb)) {
+ if (skb->len > 0) {
+ sg_init_one(sgs[out_sg], skb->data, skb->len);
+ out_sg++;
+ }
+ } else {
+ struct skb_shared_info *si;
+ int i;
+
+ /* If skb is nonlinear, then its buffer must contain
+ * only header and nothing more. Data is stored in
+ * the fragged part.
+ */
+ WARN_ON_ONCE(skb_headroom(skb) != sizeof(*virtio_vsock_hdr(skb)));
+
+ si = skb_shinfo(skb);
+
+ for (i = 0; i < si->nr_frags; i++) {
+ skb_frag_t *skb_frag = &si->frags[i];
+ void *va;
+
+ /* We will use 'page_to_virt()' for the userspace page
+ * here, because virtio or dma-mapping layers will call
+ * 'virt_to_phys()' later to fill the buffer descriptor.
+ * We don't touch memory at "virtual" address of this page.
+ */
+ va = page_to_virt(skb_frag->bv_page);
+ sg_init_one(sgs[out_sg],
+ va + skb_frag->bv_offset,
+ skb_frag->bv_len);
+ out_sg++;
+ }
}
ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, skb, GFP_KERNEL);
@@ -413,6 +455,37 @@ static void virtio_vsock_rx_done(struct virtqueue *vq)
queue_work(virtio_vsock_workqueue, &vsock->rx_work);
}
+static bool virtio_transport_can_msgzerocopy(int bufs_num)
+{
+ struct virtio_vsock *vsock;
+ bool res = false;
+
+ rcu_read_lock();
+
+ vsock = rcu_dereference(the_virtio_vsock);
+ if (vsock) {
+ struct virtqueue *vq = vsock->vqs[VSOCK_VQ_TX];
+
+ /* Check that tx queue is large enough to keep whole
+ * data to send. This is needed, because when there is
+ * not enough free space in the queue, current skb to
+ * send will be reinserted to the head of tx list of
+ * the socket to retry transmission later, so if skb
+ * is bigger than whole queue, it will be reinserted
+ * again and again, thus blocking other skbs to be sent.
+ * Each page of the user provided buffer will be added
+ * as a single buffer to the tx virtqueue, so compare
+ * number of pages against maximum capacity of the queue.
+ */
+ if (bufs_num <= vq->num_max)
+ res = true;
+ }
+
+ rcu_read_unlock();
+
+ return res;
+}
+
static bool virtio_transport_seqpacket_allow(u32 remote_cid);
static struct virtio_transport virtio_transport = {
@@ -462,6 +535,7 @@ static struct virtio_transport virtio_transport = {
},
.send_pkt = virtio_transport_send_pkt,
+ .can_msgzerocopy = virtio_transport_can_msgzerocopy,
};
static bool virtio_transport_seqpacket_allow(u32 remote_cid)
@@ -635,6 +709,7 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
{
struct virtio_vsock *vsock = NULL;
int ret;
+ int i;
ret = mutex_lock_interruptible(&the_virtio_vsock_mutex);
if (ret)
@@ -677,6 +752,9 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
if (ret < 0)
goto out;
+ for (i = 0; i < ARRAY_SIZE(vsock->out_sgs); i++)
+ vsock->out_sgs[i] = &vsock->out_bufs[i];
+
rcu_assign_pointer(the_virtio_vsock, vsock);
virtio_vsock_vqs_start(vsock);
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 352d042b130b..e22c81435ef7 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -37,27 +37,89 @@ virtio_transport_get_ops(struct vsock_sock *vsk)
return container_of(t, struct virtio_transport, transport);
}
-/* Returns a new packet on success, otherwise returns NULL.
- *
- * If NULL is returned, errp is set to a negative errno.
- */
-static struct sk_buff *
-virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info,
- size_t len,
- u32 src_cid,
- u32 src_port,
- u32 dst_cid,
- u32 dst_port)
-{
- const size_t skb_len = VIRTIO_VSOCK_SKB_HEADROOM + len;
- struct virtio_vsock_hdr *hdr;
- struct sk_buff *skb;
- void *payload;
- int err;
+static bool virtio_transport_can_zcopy(const struct virtio_transport *t_ops,
+ struct virtio_vsock_pkt_info *info,
+ size_t pkt_len)
+{
+ struct iov_iter *iov_iter;
- skb = virtio_vsock_alloc_skb(skb_len, GFP_KERNEL);
- if (!skb)
- return NULL;
+ if (!info->msg)
+ return false;
+
+ iov_iter = &info->msg->msg_iter;
+
+ if (iov_iter->iov_offset)
+ return false;
+
+ /* We can't send whole iov. */
+ if (iov_iter->count > pkt_len)
+ return false;
+
+ /* Check that transport can send data in zerocopy mode. */
+ t_ops = virtio_transport_get_ops(info->vsk);
+
+ if (t_ops->can_msgzerocopy) {
+ int pages_in_iov = iov_iter_npages(iov_iter, MAX_SKB_FRAGS);
+ int pages_to_send = min(pages_in_iov, MAX_SKB_FRAGS);
+
+ /* +1 is for packet header. */
+ return t_ops->can_msgzerocopy(pages_to_send + 1);
+ }
+
+ return true;
+}
+
+static int virtio_transport_init_zcopy_skb(struct vsock_sock *vsk,
+ struct sk_buff *skb,
+ struct msghdr *msg,
+ bool zerocopy)
+{
+ struct ubuf_info *uarg;
+
+ if (msg->msg_ubuf) {
+ uarg = msg->msg_ubuf;
+ net_zcopy_get(uarg);
+ } else {
+ struct iov_iter *iter = &msg->msg_iter;
+ struct ubuf_info_msgzc *uarg_zc;
+
+ uarg = msg_zerocopy_realloc(sk_vsock(vsk),
+ iter->count,
+ NULL);
+ if (!uarg)
+ return -1;
+
+ uarg_zc = uarg_to_msgzc(uarg);
+ uarg_zc->zerocopy = zerocopy ? 1 : 0;
+ }
+
+ skb_zcopy_init(skb, uarg);
+
+ return 0;
+}
+
+static int virtio_transport_fill_skb(struct sk_buff *skb,
+ struct virtio_vsock_pkt_info *info,
+ size_t len,
+ bool zcopy)
+{
+ if (zcopy)
+ return __zerocopy_sg_from_iter(info->msg, NULL, skb,
+ &info->msg->msg_iter,
+ len);
+
+ return memcpy_from_msg(skb_put(skb, len), info->msg, len);
+}
+
+static void virtio_transport_init_hdr(struct sk_buff *skb,
+ struct virtio_vsock_pkt_info *info,
+ size_t payload_len,
+ u32 src_cid,
+ u32 src_port,
+ u32 dst_cid,
+ u32 dst_port)
+{
+ struct virtio_vsock_hdr *hdr;
hdr = virtio_vsock_hdr(skb);
hdr->type = cpu_to_le16(info->type);
@@ -67,43 +129,28 @@ virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info,
hdr->src_port = cpu_to_le32(src_port);
hdr->dst_port = cpu_to_le32(dst_port);
hdr->flags = cpu_to_le32(info->flags);
- hdr->len = cpu_to_le32(len);
-
- if (info->msg && len > 0) {
- payload = skb_put(skb, len);
- err = memcpy_from_msg(payload, info->msg, len);
- if (err)
- goto out;
-
- if (msg_data_left(info->msg) == 0 &&
- info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) {
- hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM);
-
- if (info->msg->msg_flags & MSG_EOR)
- hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
- }
- }
+ hdr->len = cpu_to_le32(payload_len);
+}
- if (info->reply)
- virtio_vsock_skb_set_reply(skb);
+static void virtio_transport_copy_nonlinear_skb(const struct sk_buff *skb,
+ void *dst,
+ size_t len)
+{
+ struct iov_iter iov_iter = { 0 };
+ struct kvec kvec;
+ size_t to_copy;
- trace_virtio_transport_alloc_pkt(src_cid, src_port,
- dst_cid, dst_port,
- len,
- info->type,
- info->op,
- info->flags);
+ kvec.iov_base = dst;
+ kvec.iov_len = len;
- if (info->vsk && !skb_set_owner_sk_safe(skb, sk_vsock(info->vsk))) {
- WARN_ONCE(1, "failed to allocate skb on vsock socket with sk_refcnt == 0\n");
- goto out;
- }
+ iov_iter.iter_type = ITER_KVEC;
+ iov_iter.kvec = &kvec;
+ iov_iter.nr_segs = 1;
- return skb;
+ to_copy = min_t(size_t, len, skb->len);
-out:
- kfree_skb(skb);
- return NULL;
+ skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset,
+ &iov_iter, to_copy);
}
/* Packet capture */
@@ -114,7 +161,6 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque)
struct af_vsockmon_hdr *hdr;
struct sk_buff *skb;
size_t payload_len;
- void *payload_buf;
/* A packet could be split to fit the RX buffer, so we can retrieve
* the payload length from the header and the buffer pointer taking
@@ -122,7 +168,6 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque)
*/
pkt_hdr = virtio_vsock_hdr(pkt);
payload_len = pkt->len;
- payload_buf = pkt->data;
skb = alloc_skb(sizeof(*hdr) + sizeof(*pkt_hdr) + payload_len,
GFP_ATOMIC);
@@ -165,7 +210,13 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque)
skb_put_data(skb, pkt_hdr, sizeof(*pkt_hdr));
if (payload_len) {
- skb_put_data(skb, payload_buf, payload_len);
+ if (skb_is_nonlinear(pkt)) {
+ void *data = skb_put(skb, payload_len);
+
+ virtio_transport_copy_nonlinear_skb(pkt, data, payload_len);
+ } else {
+ skb_put_data(skb, pkt->data, payload_len);
+ }
}
return skb;
@@ -189,6 +240,82 @@ static u16 virtio_transport_get_type(struct sock *sk)
return VIRTIO_VSOCK_TYPE_SEQPACKET;
}
+/* Returns new sk_buff on success, otherwise returns NULL. */
+static struct sk_buff *virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info,
+ size_t payload_len,
+ bool zcopy,
+ u32 src_cid,
+ u32 src_port,
+ u32 dst_cid,
+ u32 dst_port)
+{
+ struct vsock_sock *vsk;
+ struct sk_buff *skb;
+ size_t skb_len;
+
+ skb_len = VIRTIO_VSOCK_SKB_HEADROOM;
+
+ if (!zcopy)
+ skb_len += payload_len;
+
+ skb = virtio_vsock_alloc_skb(skb_len, GFP_KERNEL);
+ if (!skb)
+ return NULL;
+
+ virtio_transport_init_hdr(skb, info, payload_len, src_cid, src_port,
+ dst_cid, dst_port);
+
+ vsk = info->vsk;
+
+ /* If 'vsk' != NULL then payload is always present, so we
+ * will never call '__zerocopy_sg_from_iter()' below without
+ * setting skb owner in 'skb_set_owner_w()'. The only case
+ * when 'vsk' == NULL is VIRTIO_VSOCK_OP_RST control message
+ * without payload.
+ */
+ WARN_ON_ONCE(!(vsk && (info->msg && payload_len)) && zcopy);
+
+ /* Set owner here, because '__zerocopy_sg_from_iter()' uses
+ * owner of skb without check to update 'sk_wmem_alloc'.
+ */
+ if (vsk)
+ skb_set_owner_w(skb, sk_vsock(vsk));
+
+ if (info->msg && payload_len > 0) {
+ int err;
+
+ err = virtio_transport_fill_skb(skb, info, payload_len, zcopy);
+ if (err)
+ goto out;
+
+ if (msg_data_left(info->msg) == 0 &&
+ info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) {
+ struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
+
+ hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM);
+
+ if (info->msg->msg_flags & MSG_EOR)
+ hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
+ }
+ }
+
+ if (info->reply)
+ virtio_vsock_skb_set_reply(skb);
+
+ trace_virtio_transport_alloc_pkt(src_cid, src_port,
+ dst_cid, dst_port,
+ payload_len,
+ info->type,
+ info->op,
+ info->flags,
+ zcopy);
+
+ return skb;
+out:
+ kfree_skb(skb);
+ return NULL;
+}
+
/* This function can only be used on connecting/connected sockets,
* since a socket assigned to a transport is required.
*
@@ -197,10 +324,12 @@ static u16 virtio_transport_get_type(struct sock *sk)
static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
struct virtio_vsock_pkt_info *info)
{
+ u32 max_skb_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
u32 src_cid, src_port, dst_cid, dst_port;
const struct virtio_transport *t_ops;
struct virtio_vsock_sock *vvs;
u32 pkt_len = info->pkt_len;
+ bool can_zcopy = false;
u32 rest_len;
int ret;
@@ -229,15 +358,30 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
return pkt_len;
+ if (info->msg) {
+ /* If zerocopy is not enabled by 'setsockopt()', we behave as
+ * there is no MSG_ZEROCOPY flag set.
+ */
+ if (!sock_flag(sk_vsock(vsk), SOCK_ZEROCOPY))
+ info->msg->msg_flags &= ~MSG_ZEROCOPY;
+
+ if (info->msg->msg_flags & MSG_ZEROCOPY)
+ can_zcopy = virtio_transport_can_zcopy(t_ops, info, pkt_len);
+
+ if (can_zcopy)
+ max_skb_len = min_t(u32, VIRTIO_VSOCK_MAX_PKT_BUF_SIZE,
+ (MAX_SKB_FRAGS * PAGE_SIZE));
+ }
+
rest_len = pkt_len;
do {
struct sk_buff *skb;
size_t skb_len;
- skb_len = min_t(u32, VIRTIO_VSOCK_MAX_PKT_BUF_SIZE, rest_len);
+ skb_len = min(max_skb_len, rest_len);
- skb = virtio_transport_alloc_skb(info, skb_len,
+ skb = virtio_transport_alloc_skb(info, skb_len, can_zcopy,
src_cid, src_port,
dst_cid, dst_port);
if (!skb) {
@@ -245,6 +389,21 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
break;
}
+ /* We process buffer part by part, allocating skb on
+ * each iteration. If this is last skb for this buffer
+ * and MSG_ZEROCOPY mode is in use - we must allocate
+ * completion for the current syscall.
+ */
+ if (info->msg && info->msg->msg_flags & MSG_ZEROCOPY &&
+ skb_len == rest_len && info->op == VIRTIO_VSOCK_OP_RW) {
+ if (virtio_transport_init_zcopy_skb(vsk, skb,
+ info->msg,
+ can_zcopy)) {
+ ret = -ENOMEM;
+ break;
+ }
+ }
+
virtio_transport_inc_tx_pkt(vvs, skb);
ret = t_ops->send_pkt(skb);
@@ -364,9 +523,10 @@ virtio_transport_stream_do_peek(struct vsock_sock *vsk,
spin_unlock_bh(&vvs->rx_lock);
/* sk_lock is held by caller so no one else can dequeue.
- * Unlock rx_lock since memcpy_to_msg() may sleep.
+ * Unlock rx_lock since skb_copy_datagram_iter() may sleep.
*/
- err = memcpy_to_msg(msg, skb->data, bytes);
+ err = skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset,
+ &msg->msg_iter, bytes);
if (err)
goto out;
@@ -410,25 +570,27 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
while (total < len && !skb_queue_empty(&vvs->rx_queue)) {
skb = skb_peek(&vvs->rx_queue);
- bytes = len - total;
- if (bytes > skb->len)
- bytes = skb->len;
+ bytes = min_t(size_t, len - total,
+ skb->len - VIRTIO_VSOCK_SKB_CB(skb)->offset);
/* sk_lock is held by caller so no one else can dequeue.
- * Unlock rx_lock since memcpy_to_msg() may sleep.
+ * Unlock rx_lock since skb_copy_datagram_iter() may sleep.
*/
spin_unlock_bh(&vvs->rx_lock);
- err = memcpy_to_msg(msg, skb->data, bytes);
+ err = skb_copy_datagram_iter(skb,
+ VIRTIO_VSOCK_SKB_CB(skb)->offset,
+ &msg->msg_iter, bytes);
if (err)
goto out;
spin_lock_bh(&vvs->rx_lock);
total += bytes;
- skb_pull(skb, bytes);
- if (skb->len == 0) {
+ VIRTIO_VSOCK_SKB_CB(skb)->offset += bytes;
+
+ if (skb->len == VIRTIO_VSOCK_SKB_CB(skb)->offset) {
u32 pkt_len = le32_to_cpu(virtio_vsock_hdr(skb)->len);
virtio_transport_dec_rx_pkt(vvs, pkt_len);
@@ -492,9 +654,10 @@ virtio_transport_seqpacket_do_peek(struct vsock_sock *vsk,
spin_unlock_bh(&vvs->rx_lock);
/* sk_lock is held by caller so no one else can dequeue.
- * Unlock rx_lock since memcpy_to_msg() may sleep.
+ * Unlock rx_lock since skb_copy_datagram_iter() may sleep.
*/
- err = memcpy_to_msg(msg, skb->data, bytes);
+ err = skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset,
+ &msg->msg_iter, bytes);
if (err)
return err;
@@ -553,11 +716,13 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
int err;
/* sk_lock is held by caller so no one else can dequeue.
- * Unlock rx_lock since memcpy_to_msg() may sleep.
+ * Unlock rx_lock since skb_copy_datagram_iter() may sleep.
*/
spin_unlock_bh(&vvs->rx_lock);
- err = memcpy_to_msg(msg, skb->data, bytes_to_copy);
+ err = skb_copy_datagram_iter(skb, 0,
+ &msg->msg_iter,
+ bytes_to_copy);
if (err) {
/* Copy of message failed. Rest of
* fragments will be freed without copy.
@@ -954,7 +1119,7 @@ static int virtio_transport_reset_no_sock(const struct virtio_transport *t,
if (!t)
return -ENOTCONN;
- reply = virtio_transport_alloc_skb(&info, 0,
+ reply = virtio_transport_alloc_skb(&info, 0, false,
le64_to_cpu(hdr->dst_cid),
le32_to_cpu(hdr->dst_port),
le64_to_cpu(hdr->src_cid),
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 55f8b9b0e06d..7482d0aca504 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -1228,7 +1228,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
xs->dev = dev;
xs->zc = xs->umem->zc;
- xs->sg = !!(flags & XDP_USE_SG);
+ xs->sg = !!(xs->umem->flags & XDP_UMEM_SG_FLAG);
xs->queue_id = qid;
xp_add_xsk(xs->pool, xs);
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index b3f7b310811e..49cb9f9a09be 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -170,6 +170,9 @@ int xp_assign_dev(struct xsk_buff_pool *pool,
if (err)
return err;
+ if (flags & XDP_USE_SG)
+ pool->umem->flags |= XDP_UMEM_SG_FLAG;
+
if (flags & XDP_USE_NEED_WAKEUP)
pool->uses_need_wakeup = true;
/* Tx needs to be explicitly woken up the first time. Also
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index d24b4d4f620e..5cdd3bca3637 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2566,7 +2566,7 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
default:
BUG();
}
- xdst = dst_alloc(dst_ops, NULL, 1, DST_OBSOLETE_NONE, 0);
+ xdst = dst_alloc(dst_ops, NULL, DST_OBSOLETE_NONE, 0);
if (likely(xdst)) {
memset_after(xdst, 0, u.dst);
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 4ccf4236031c..90af76fa9dd8 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -169,12 +169,16 @@ endif
TPROGS_CFLAGS += -Wall -O2
TPROGS_CFLAGS += -Wmissing-prototypes
TPROGS_CFLAGS += -Wstrict-prototypes
+TPROGS_CFLAGS += $(call try-run,\
+ printf "int main() { return 0; }" |\
+ $(CC) -Werror -fsanitize=bounds -x c - -o "$$TMP",-fsanitize=bounds,)
TPROGS_CFLAGS += -I$(objtree)/usr/include
TPROGS_CFLAGS += -I$(srctree)/tools/testing/selftests/bpf/
TPROGS_CFLAGS += -I$(LIBBPF_INCLUDE)
TPROGS_CFLAGS += -I$(srctree)/tools/include
TPROGS_CFLAGS += -I$(srctree)/tools/perf
+TPROGS_CFLAGS += -I$(srctree)/tools/lib
TPROGS_CFLAGS += -DHAVE_ATTR_TEST=0
ifdef SYSROOT
@@ -314,6 +318,9 @@ XDP_SAMPLE_CFLAGS += -Wall -O2 \
$(obj)/$(XDP_SAMPLE): TPROGS_CFLAGS = $(XDP_SAMPLE_CFLAGS)
$(obj)/$(XDP_SAMPLE): $(src)/xdp_sample_user.h $(src)/xdp_sample_shared.h
+# Override includes for trace_helpers.o because __must_check won't be defined
+# in our include path.
+$(obj)/$(TRACE_HELPERS): TPROGS_CFLAGS := $(TPROGS_CFLAGS) -D__must_check=
-include $(BPF_SAMPLES_PATH)/Makefile.target
diff --git a/samples/bpf/syscall_tp_user.c b/samples/bpf/syscall_tp_user.c
index 7a788bb837fc..7a09ac74fac0 100644
--- a/samples/bpf/syscall_tp_user.c
+++ b/samples/bpf/syscall_tp_user.c
@@ -17,9 +17,9 @@
static void usage(const char *cmd)
{
- printf("USAGE: %s [-i num_progs] [-h]\n", cmd);
- printf(" -i num_progs # number of progs of the test\n");
- printf(" -h # help\n");
+ printf("USAGE: %s [-i nr_tests] [-h]\n", cmd);
+ printf(" -i nr_tests # rounds of test to run\n");
+ printf(" -h # help\n");
}
static void verify_map(int map_id)
@@ -45,14 +45,14 @@ static void verify_map(int map_id)
}
}
-static int test(char *filename, int num_progs)
+static int test(char *filename, int nr_tests)
{
- int map0_fds[num_progs], map1_fds[num_progs], fd, i, j = 0;
- struct bpf_link *links[num_progs * 4];
- struct bpf_object *objs[num_progs];
+ int map0_fds[nr_tests], map1_fds[nr_tests], fd, i, j = 0;
+ struct bpf_link **links = NULL;
+ struct bpf_object *objs[nr_tests];
struct bpf_program *prog;
- for (i = 0; i < num_progs; i++) {
+ for (i = 0; i < nr_tests; i++) {
objs[i] = bpf_object__open_file(filename, NULL);
if (libbpf_get_error(objs[i])) {
fprintf(stderr, "opening BPF object file failed\n");
@@ -60,6 +60,19 @@ static int test(char *filename, int num_progs)
goto cleanup;
}
+ /* One-time initialization */
+ if (!links) {
+ int nr_progs = 0;
+
+ bpf_object__for_each_program(prog, objs[i])
+ nr_progs += 1;
+
+ links = calloc(nr_progs * nr_tests, sizeof(struct bpf_link *));
+
+ if (!links)
+ goto cleanup;
+ }
+
/* load BPF program */
if (bpf_object__load(objs[i])) {
fprintf(stderr, "loading BPF object file failed\n");
@@ -101,14 +114,18 @@ static int test(char *filename, int num_progs)
close(fd);
/* verify the map */
- for (i = 0; i < num_progs; i++) {
+ for (i = 0; i < nr_tests; i++) {
verify_map(map0_fds[i]);
verify_map(map1_fds[i]);
}
cleanup:
- for (j--; j >= 0; j--)
- bpf_link__destroy(links[j]);
+ if (links) {
+ for (j--; j >= 0; j--)
+ bpf_link__destroy(links[j]);
+
+ free(links);
+ }
for (i--; i >= 0; i--)
bpf_object__close(objs[i]);
@@ -117,13 +134,13 @@ cleanup:
int main(int argc, char **argv)
{
- int opt, num_progs = 1;
+ int opt, nr_tests = 1;
char filename[256];
while ((opt = getopt(argc, argv, "i:h")) != -1) {
switch (opt) {
case 'i':
- num_progs = atoi(optarg);
+ nr_tests = atoi(optarg);
break;
case 'h':
default:
@@ -134,5 +151,5 @@ int main(int argc, char **argv)
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- return test(filename, num_progs);
+ return test(filename, nr_tests);
}
diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
index bd015ec9847b..2ce900f66d6e 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -36,11 +36,14 @@ CGROUP COMMANDS
| **cgroup_device** | **cgroup_inet4_bind** | **cgroup_inet6_bind** |
| **cgroup_inet4_post_bind** | **cgroup_inet6_post_bind** |
| **cgroup_inet4_connect** | **cgroup_inet6_connect** |
-| **cgroup_inet4_getpeername** | **cgroup_inet6_getpeername** |
+| **cgroup_unix_connect** | **cgroup_inet4_getpeername** |
+| **cgroup_inet6_getpeername** | **cgroup_unix_getpeername** |
| **cgroup_inet4_getsockname** | **cgroup_inet6_getsockname** |
-| **cgroup_udp4_sendmsg** | **cgroup_udp6_sendmsg** |
+| **cgroup_unix_getsockname** | **cgroup_udp4_sendmsg** |
+| **cgroup_udp6_sendmsg** | **cgroup_unix_sendmsg** |
| **cgroup_udp4_recvmsg** | **cgroup_udp6_recvmsg** |
-| **cgroup_sysctl** | **cgroup_getsockopt** | **cgroup_setsockopt** |
+| **cgroup_unix_recvmsg** | **cgroup_sysctl** |
+| **cgroup_getsockopt** | **cgroup_setsockopt** |
| **cgroup_inet_sock_release** }
| *ATTACH_FLAGS* := { **multi** | **override** }
@@ -102,21 +105,28 @@ DESCRIPTION
**post_bind6** return from bind(2) for an inet6 socket (since 4.17);
**connect4** call to connect(2) for an inet4 socket (since 4.17);
**connect6** call to connect(2) for an inet6 socket (since 4.17);
+ **connect_unix** call to connect(2) for a unix socket (since 6.7);
**sendmsg4** call to sendto(2), sendmsg(2), sendmmsg(2) for an
unconnected udp4 socket (since 4.18);
**sendmsg6** call to sendto(2), sendmsg(2), sendmmsg(2) for an
unconnected udp6 socket (since 4.18);
+ **sendmsg_unix** call to sendto(2), sendmsg(2), sendmmsg(2) for
+ an unconnected unix socket (since 6.7);
**recvmsg4** call to recvfrom(2), recvmsg(2), recvmmsg(2) for
an unconnected udp4 socket (since 5.2);
**recvmsg6** call to recvfrom(2), recvmsg(2), recvmmsg(2) for
an unconnected udp6 socket (since 5.2);
+ **recvmsg_unix** call to recvfrom(2), recvmsg(2), recvmmsg(2) for
+ an unconnected unix socket (since 6.7);
**sysctl** sysctl access (since 5.2);
**getsockopt** call to getsockopt (since 5.3);
**setsockopt** call to setsockopt (since 5.3);
**getpeername4** call to getpeername(2) for an inet4 socket (since 5.8);
**getpeername6** call to getpeername(2) for an inet6 socket (since 5.8);
+ **getpeername_unix** call to getpeername(2) for a unix socket (since 6.7);
**getsockname4** call to getsockname(2) for an inet4 socket (since 5.8);
**getsockname6** call to getsockname(2) for an inet6 socket (since 5.8).
+ **getsockname_unix** call to getsockname(2) for a unix socket (since 6.7);
**sock_release** closing an userspace inet socket (since 5.9).
**bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG*
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index dcae81bd27ed..58e6a5b10ef7 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -47,9 +47,11 @@ PROG COMMANDS
| **cgroup/sock** | **cgroup/dev** | **lwt_in** | **lwt_out** | **lwt_xmit** |
| **lwt_seg6local** | **sockops** | **sk_skb** | **sk_msg** | **lirc_mode2** |
| **cgroup/bind4** | **cgroup/bind6** | **cgroup/post_bind4** | **cgroup/post_bind6** |
-| **cgroup/connect4** | **cgroup/connect6** | **cgroup/getpeername4** | **cgroup/getpeername6** |
-| **cgroup/getsockname4** | **cgroup/getsockname6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** |
-| **cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl** |
+| **cgroup/connect4** | **cgroup/connect6** | **cgroup/connect_unix** |
+| **cgroup/getpeername4** | **cgroup/getpeername6** | **cgroup/getpeername_unix** |
+| **cgroup/getsockname4** | **cgroup/getsockname6** | **cgroup/getsockname_unix** |
+| **cgroup/sendmsg4** | **cgroup/sendmsg6** | **cgroup/sendmsg_unix** |
+| **cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/recvmsg_unix** | **cgroup/sysctl** |
| **cgroup/getsockopt** | **cgroup/setsockopt** | **cgroup/sock_release** |
| **struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup**
| }
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 085bf18f3659..6e4f7ce6bc01 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -480,13 +480,13 @@ _bpftool()
action tracepoint raw_tracepoint \
xdp perf_event cgroup/skb cgroup/sock \
cgroup/dev lwt_in lwt_out lwt_xmit \
- lwt_seg6local sockops sk_skb sk_msg \
- lirc_mode2 cgroup/bind4 cgroup/bind6 \
- cgroup/connect4 cgroup/connect6 \
- cgroup/getpeername4 cgroup/getpeername6 \
- cgroup/getsockname4 cgroup/getsockname6 \
- cgroup/sendmsg4 cgroup/sendmsg6 \
- cgroup/recvmsg4 cgroup/recvmsg6 \
+ lwt_seg6local sockops sk_skb sk_msg lirc_mode2 \
+ cgroup/bind4 cgroup/bind6 \
+ cgroup/connect4 cgroup/connect6 cgroup/connect_unix \
+ cgroup/getpeername4 cgroup/getpeername6 cgroup/getpeername_unix \
+ cgroup/getsockname4 cgroup/getsockname6 cgroup/getsockname_unix \
+ cgroup/sendmsg4 cgroup/sendmsg6 cgroup/sendmsg_unix \
+ cgroup/recvmsg4 cgroup/recvmsg6 cgroup/recvmsg_unix \
cgroup/post_bind4 cgroup/post_bind6 \
cgroup/sysctl cgroup/getsockopt \
cgroup/setsockopt cgroup/sock_release struct_ops \
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index ac846b0805b4..af6898c0f388 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -28,13 +28,15 @@
" cgroup_device | cgroup_inet4_bind |\n" \
" cgroup_inet6_bind | cgroup_inet4_post_bind |\n" \
" cgroup_inet6_post_bind | cgroup_inet4_connect |\n" \
- " cgroup_inet6_connect | cgroup_inet4_getpeername |\n" \
- " cgroup_inet6_getpeername | cgroup_inet4_getsockname |\n" \
- " cgroup_inet6_getsockname | cgroup_udp4_sendmsg |\n" \
- " cgroup_udp6_sendmsg | cgroup_udp4_recvmsg |\n" \
- " cgroup_udp6_recvmsg | cgroup_sysctl |\n" \
- " cgroup_getsockopt | cgroup_setsockopt |\n" \
- " cgroup_inet_sock_release }"
+ " cgroup_inet6_connect | cgroup_unix_connect |\n" \
+ " cgroup_inet4_getpeername | cgroup_inet6_getpeername |\n" \
+ " cgroup_unix_getpeername | cgroup_inet4_getsockname |\n" \
+ " cgroup_inet6_getsockname | cgroup_unix_getsockname |\n" \
+ " cgroup_udp4_sendmsg | cgroup_udp6_sendmsg |\n" \
+ " cgroup_unix_sendmsg | cgroup_udp4_recvmsg |\n" \
+ " cgroup_udp6_recvmsg | cgroup_unix_recvmsg |\n" \
+ " cgroup_sysctl | cgroup_getsockopt |\n" \
+ " cgroup_setsockopt | cgroup_inet_sock_release }"
static unsigned int query_flags;
static struct btf *btf_vmlinux;
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 2883660d6b67..ee3ce2b8000d 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -708,17 +708,22 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h
codegen("\
\n\
- skel->%1$s = skel_prep_map_data((void *)\"\\ \n\
- ", ident);
+ { \n\
+ static const char data[] __attribute__((__aligned__(8))) = \"\\\n\
+ ");
mmap_data = bpf_map__initial_value(map, &mmap_size);
print_hex(mmap_data, mmap_size);
codegen("\
\n\
- \", %1$zd, %2$zd); \n\
- if (!skel->%3$s) \n\
- goto cleanup; \n\
- skel->maps.%3$s.initial_value = (__u64) (long) skel->%3$s;\n\
- ", bpf_map_mmap_sz(map), mmap_size, ident);
+ \"; \n\
+ \n\
+ skel->%1$s = skel_prep_map_data((void *)data, %2$zd,\n\
+ sizeof(data) - 1);\n\
+ if (!skel->%1$s) \n\
+ goto cleanup; \n\
+ skel->maps.%1$s.initial_value = (__u64) (long) skel->%1$s;\n\
+ } \n\
+ ", ident, bpf_map_mmap_sz(map));
}
codegen("\
\n\
@@ -733,32 +738,30 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h
{ \n\
struct bpf_load_and_run_opts opts = {}; \n\
int err; \n\
- \n\
- opts.ctx = (struct bpf_loader_ctx *)skel; \n\
- opts.data_sz = %2$d; \n\
- opts.data = (void *)\"\\ \n\
+ static const char opts_data[] __attribute__((__aligned__(8))) = \"\\\n\
",
- obj_name, opts.data_sz);
+ obj_name);
print_hex(opts.data, opts.data_sz);
codegen("\
\n\
\"; \n\
+ static const char opts_insn[] __attribute__((__aligned__(8))) = \"\\\n\
");
-
- codegen("\
- \n\
- opts.insns_sz = %d; \n\
- opts.insns = (void *)\"\\ \n\
- ",
- opts.insns_sz);
print_hex(opts.insns, opts.insns_sz);
codegen("\
\n\
\"; \n\
+ \n\
+ opts.ctx = (struct bpf_loader_ctx *)skel; \n\
+ opts.data_sz = sizeof(opts_data) - 1; \n\
+ opts.data = (void *)opts_data; \n\
+ opts.insns_sz = sizeof(opts_insn) - 1; \n\
+ opts.insns = (void *)opts_insn; \n\
+ \n\
err = bpf_load_and_run(&opts); \n\
if (err < 0) \n\
return err; \n\
- ", obj_name);
+ ");
bpf_object__for_each_map(map, obj) {
const char *mmap_flags;
@@ -1209,7 +1212,7 @@ static int do_skeleton(int argc, char **argv)
codegen("\
\n\
\n\
- s->data = (void *)%2$s__elf_bytes(&s->data_sz); \n\
+ s->data = %1$s__elf_bytes(&s->data_sz); \n\
\n\
obj->skeleton = s; \n\
return 0; \n\
@@ -1218,12 +1221,12 @@ static int do_skeleton(int argc, char **argv)
return err; \n\
} \n\
\n\
- static inline const void *%2$s__elf_bytes(size_t *sz) \n\
+ static inline const void *%1$s__elf_bytes(size_t *sz) \n\
{ \n\
- *sz = %1$d; \n\
- return (const void *)\"\\ \n\
- "
- , file_sz, obj_name);
+ static const char data[] __attribute__((__aligned__(8))) = \"\\\n\
+ ",
+ obj_name
+ );
/* embed contents of BPF object file */
print_hex(obj_data, file_sz);
@@ -1231,6 +1234,9 @@ static int do_skeleton(int argc, char **argv)
codegen("\
\n\
\"; \n\
+ \n\
+ *sz = sizeof(data) - 1; \n\
+ return (const void *)data; \n\
} \n\
\n\
#ifdef __cplusplus \n\
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index 2e5c231e08ac..4b1407b05056 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -265,6 +265,7 @@ show_kprobe_multi_json(struct bpf_link_info *info, json_writer_t *wtr)
jsonw_bool_field(json_wtr, "retprobe",
info->kprobe_multi.flags & BPF_F_KPROBE_MULTI_RETURN);
jsonw_uint_field(json_wtr, "func_cnt", info->kprobe_multi.count);
+ jsonw_uint_field(json_wtr, "missed", info->kprobe_multi.missed);
jsonw_name(json_wtr, "funcs");
jsonw_start_array(json_wtr);
addrs = u64_to_ptr(info->kprobe_multi.addrs);
@@ -301,6 +302,7 @@ show_perf_event_kprobe_json(struct bpf_link_info *info, json_writer_t *wtr)
jsonw_string_field(wtr, "func",
u64_to_ptr(info->perf_event.kprobe.func_name));
jsonw_uint_field(wtr, "offset", info->perf_event.kprobe.offset);
+ jsonw_uint_field(wtr, "missed", info->perf_event.kprobe.missed);
}
static void
@@ -641,6 +643,8 @@ static void show_kprobe_multi_plain(struct bpf_link_info *info)
else
printf("\n\tkprobe.multi ");
printf("func_cnt %u ", info->kprobe_multi.count);
+ if (info->kprobe_multi.missed)
+ printf("missed %llu ", info->kprobe_multi.missed);
addrs = (__u64 *)u64_to_ptr(info->kprobe_multi.addrs);
qsort(addrs, info->kprobe_multi.count, sizeof(__u64), cmp_u64);
@@ -683,6 +687,8 @@ static void show_perf_event_kprobe_plain(struct bpf_link_info *info)
printf("%s", buf);
if (info->perf_event.kprobe.offset)
printf("+%#x", info->perf_event.kprobe.offset);
+ if (info->perf_event.kprobe.missed)
+ printf(" missed %llu", info->perf_event.kprobe.missed);
printf(" ");
}
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 8443a149dd17..7ec4f5671e7a 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -2475,9 +2475,10 @@ static int do_help(int argc, char **argv)
" sk_reuseport | flow_dissector | cgroup/sysctl |\n"
" cgroup/bind4 | cgroup/bind6 | cgroup/post_bind4 |\n"
" cgroup/post_bind6 | cgroup/connect4 | cgroup/connect6 |\n"
- " cgroup/getpeername4 | cgroup/getpeername6 |\n"
- " cgroup/getsockname4 | cgroup/getsockname6 | cgroup/sendmsg4 |\n"
- " cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n"
+ " cgroup/connect_unix | cgroup/getpeername4 | cgroup/getpeername6 |\n"
+ " cgroup/getpeername_unix | cgroup/getsockname4 | cgroup/getsockname6 |\n"
+ " cgroup/getsockname_unix | cgroup/sendmsg4 | cgroup/sendmsg6 |\n"
+ " cgroup/sendmsg°unix | cgroup/recvmsg4 | cgroup/recvmsg6 | cgroup/recvmsg_unix |\n"
" cgroup/getsockopt | cgroup/setsockopt | cgroup/sock_release |\n"
" struct_ops | fentry | fexit | freplace | sk_lookup }\n"
" ATTACH_TYPE := { sk_msg_verdict | sk_skb_verdict | sk_skb_stream_verdict |\n"
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 0448700890f7..7ba61b75bc0e 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -932,7 +932,14 @@ enum bpf_map_type {
*/
BPF_MAP_TYPE_CGROUP_STORAGE = BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED,
BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
- BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+ BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED,
+ /* BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE is available to bpf programs
+ * attaching to a cgroup. The new mechanism (BPF_MAP_TYPE_CGRP_STORAGE +
+ * local percpu kptr) supports all BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE
+ * functionality and more. So mark * BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE
+ * deprecated.
+ */
+ BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE = BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED,
BPF_MAP_TYPE_QUEUE,
BPF_MAP_TYPE_STACK,
BPF_MAP_TYPE_SK_STORAGE,
@@ -1040,6 +1047,11 @@ enum bpf_attach_type {
BPF_TCX_INGRESS,
BPF_TCX_EGRESS,
BPF_TRACE_UPROBE_MULTI,
+ BPF_CGROUP_UNIX_CONNECT,
+ BPF_CGROUP_UNIX_SENDMSG,
+ BPF_CGROUP_UNIX_RECVMSG,
+ BPF_CGROUP_UNIX_GETPEERNAME,
+ BPF_CGROUP_UNIX_GETSOCKNAME,
__MAX_BPF_ATTACH_TYPE
};
@@ -2697,8 +2709,8 @@ union bpf_attr {
* *bpf_socket* should be one of the following:
*
* * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
- * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
- * and **BPF_CGROUP_INET6_CONNECT**.
+ * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**,
+ * **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**.
*
* This helper actually implements a subset of **setsockopt()**.
* It supports the following *level*\ s:
@@ -2936,8 +2948,8 @@ union bpf_attr {
* *bpf_socket* should be one of the following:
*
* * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
- * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
- * and **BPF_CGROUP_INET6_CONNECT**.
+ * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**,
+ * **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**.
*
* This helper actually implements a subset of **getsockopt()**.
* It supports the same set of *optname*\ s that is supported by
@@ -3257,6 +3269,11 @@ union bpf_attr {
* and *params*->smac will not be set as output. A common
* use case is to call **bpf_redirect_neigh**\ () after
* doing **bpf_fib_lookup**\ ().
+ * **BPF_FIB_LOOKUP_SRC**
+ * Derive and set source IP addr in *params*->ipv{4,6}_src
+ * for the nexthop. If the src addr cannot be derived,
+ * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this
+ * case, *params*->dmac and *params*->smac are not set either.
*
* *ctx* is either **struct xdp_md** for XDP programs or
* **struct sk_buff** tc cls_act programs.
@@ -5089,6 +5106,8 @@ union bpf_attr {
* **BPF_F_TIMER_ABS**
* Start the timer in absolute expire value instead of the
* default relative one.
+ * **BPF_F_TIMER_CPU_PIN**
+ * Timer will be pinned to the CPU of the caller.
*
* Return
* 0 on success.
@@ -6525,6 +6544,7 @@ struct bpf_link_info {
__aligned_u64 addrs;
__u32 count; /* in/out: kprobe_multi function count */
__u32 flags;
+ __u64 missed;
} kprobe_multi;
struct {
__u32 type; /* enum bpf_perf_event_type */
@@ -6540,6 +6560,7 @@ struct bpf_link_info {
__u32 name_len;
__u32 offset; /* offset from func_name */
__u64 addr;
+ __u64 missed;
} kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */
struct {
__aligned_u64 tp_name; /* in/out */
@@ -6953,6 +6974,7 @@ enum {
BPF_FIB_LOOKUP_OUTPUT = (1U << 1),
BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
BPF_FIB_LOOKUP_TBID = (1U << 3),
+ BPF_FIB_LOOKUP_SRC = (1U << 4),
};
enum {
@@ -6965,6 +6987,7 @@ enum {
BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */
BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */
BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */
+ BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */
};
struct bpf_fib_lookup {
@@ -6999,6 +7022,9 @@ struct bpf_fib_lookup {
__u32 rt_metric;
};
+ /* input: source address to consider for lookup
+ * output: source address result from lookup
+ */
union {
__be32 ipv4_src;
__u32 ipv6_src[4]; /* in6_addr; network order */
@@ -7300,9 +7326,11 @@ struct bpf_core_relo {
* Flags to control bpf_timer_start() behaviour.
* - BPF_F_TIMER_ABS: Timeout passed is absolute time, by default it is
* relative to current time.
+ * - BPF_F_TIMER_CPU_PIN: Timer will be pinned to the CPU of the caller.
*/
enum {
BPF_F_TIMER_ABS = (1ULL << 0),
+ BPF_F_TIMER_CPU_PIN = (1ULL << 1),
};
/* BPF numbers iterator state */
diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h
index c1634b95c223..2943a151d4f1 100644
--- a/tools/include/uapi/linux/netdev.h
+++ b/tools/include/uapi/linux/netdev.h
@@ -38,11 +38,27 @@ enum netdev_xdp_act {
NETDEV_XDP_ACT_MASK = 127,
};
+/**
+ * enum netdev_xdp_rx_metadata
+ * @NETDEV_XDP_RX_METADATA_TIMESTAMP: Device is capable of exposing receive HW
+ * timestamp via bpf_xdp_metadata_rx_timestamp().
+ * @NETDEV_XDP_RX_METADATA_HASH: Device is capable of exposing receive packet
+ * hash via bpf_xdp_metadata_rx_hash().
+ */
+enum netdev_xdp_rx_metadata {
+ NETDEV_XDP_RX_METADATA_TIMESTAMP = 1,
+ NETDEV_XDP_RX_METADATA_HASH = 2,
+
+ /* private: */
+ NETDEV_XDP_RX_METADATA_MASK = 3,
+};
+
enum {
NETDEV_A_DEV_IFINDEX = 1,
NETDEV_A_DEV_PAD,
NETDEV_A_DEV_XDP_FEATURES,
NETDEV_A_DEV_XDP_ZC_MAX_SEGS,
+ NETDEV_A_DEV_XDP_RX_METADATA_FEATURES,
__NETDEV_A_DEV_MAX,
NETDEV_A_DEV_MAX = (__NETDEV_A_DEV_MAX - 1)
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index bbab9ad9dc5a..77ceea575dc7 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -181,6 +181,7 @@ enum libbpf_tristate {
#define __ksym __attribute__((section(".ksyms")))
#define __kptr_untrusted __attribute__((btf_type_tag("kptr_untrusted")))
#define __kptr __attribute__((btf_type_tag("kptr")))
+#define __percpu_kptr __attribute__((btf_type_tag("percpu_kptr")))
#define bpf_ksym_exists(sym) ({ \
_Static_assert(!__builtin_constant_p(!!sym), #sym " should be marked as __weak"); \
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index 3803479dbe10..1c13f8e88833 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -362,8 +362,6 @@ struct pt_regs___arm64 {
#define __PT_PARM7_REG a6
#define __PT_PARM8_REG a7
-/* riscv does not select ARCH_HAS_SYSCALL_WRAPPER. */
-#define PT_REGS_SYSCALL_REGS(ctx) ctx
#define __PT_PARM1_SYSCALL_REG __PT_PARM1_REG
#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG
#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 8484b563b53d..ee95fd379d4d 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -448,6 +448,165 @@ static int btf_parse_type_sec(struct btf *btf)
return 0;
}
+static int btf_validate_str(const struct btf *btf, __u32 str_off, const char *what, __u32 type_id)
+{
+ const char *s;
+
+ s = btf__str_by_offset(btf, str_off);
+ if (!s) {
+ pr_warn("btf: type [%u]: invalid %s (string offset %u)\n", type_id, what, str_off);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int btf_validate_id(const struct btf *btf, __u32 id, __u32 ctx_id)
+{
+ const struct btf_type *t;
+
+ t = btf__type_by_id(btf, id);
+ if (!t) {
+ pr_warn("btf: type [%u]: invalid referenced type ID %u\n", ctx_id, id);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int btf_validate_type(const struct btf *btf, const struct btf_type *t, __u32 id)
+{
+ __u32 kind = btf_kind(t);
+ int err, i, n;
+
+ err = btf_validate_str(btf, t->name_off, "type name", id);
+ if (err)
+ return err;
+
+ switch (kind) {
+ case BTF_KIND_UNKN:
+ case BTF_KIND_INT:
+ case BTF_KIND_FWD:
+ case BTF_KIND_FLOAT:
+ break;
+ case BTF_KIND_PTR:
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_CONST:
+ case BTF_KIND_RESTRICT:
+ case BTF_KIND_VAR:
+ case BTF_KIND_DECL_TAG:
+ case BTF_KIND_TYPE_TAG:
+ err = btf_validate_id(btf, t->type, id);
+ if (err)
+ return err;
+ break;
+ case BTF_KIND_ARRAY: {
+ const struct btf_array *a = btf_array(t);
+
+ err = btf_validate_id(btf, a->type, id);
+ err = err ?: btf_validate_id(btf, a->index_type, id);
+ if (err)
+ return err;
+ break;
+ }
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION: {
+ const struct btf_member *m = btf_members(t);
+
+ n = btf_vlen(t);
+ for (i = 0; i < n; i++, m++) {
+ err = btf_validate_str(btf, m->name_off, "field name", id);
+ err = err ?: btf_validate_id(btf, m->type, id);
+ if (err)
+ return err;
+ }
+ break;
+ }
+ case BTF_KIND_ENUM: {
+ const struct btf_enum *m = btf_enum(t);
+
+ n = btf_vlen(t);
+ for (i = 0; i < n; i++, m++) {
+ err = btf_validate_str(btf, m->name_off, "enum name", id);
+ if (err)
+ return err;
+ }
+ break;
+ }
+ case BTF_KIND_ENUM64: {
+ const struct btf_enum64 *m = btf_enum64(t);
+
+ n = btf_vlen(t);
+ for (i = 0; i < n; i++, m++) {
+ err = btf_validate_str(btf, m->name_off, "enum name", id);
+ if (err)
+ return err;
+ }
+ break;
+ }
+ case BTF_KIND_FUNC: {
+ const struct btf_type *ft;
+
+ err = btf_validate_id(btf, t->type, id);
+ if (err)
+ return err;
+ ft = btf__type_by_id(btf, t->type);
+ if (btf_kind(ft) != BTF_KIND_FUNC_PROTO) {
+ pr_warn("btf: type [%u]: referenced type [%u] is not FUNC_PROTO\n", id, t->type);
+ return -EINVAL;
+ }
+ break;
+ }
+ case BTF_KIND_FUNC_PROTO: {
+ const struct btf_param *m = btf_params(t);
+
+ n = btf_vlen(t);
+ for (i = 0; i < n; i++, m++) {
+ err = btf_validate_str(btf, m->name_off, "param name", id);
+ err = err ?: btf_validate_id(btf, m->type, id);
+ if (err)
+ return err;
+ }
+ break;
+ }
+ case BTF_KIND_DATASEC: {
+ const struct btf_var_secinfo *m = btf_var_secinfos(t);
+
+ n = btf_vlen(t);
+ for (i = 0; i < n; i++, m++) {
+ err = btf_validate_id(btf, m->type, id);
+ if (err)
+ return err;
+ }
+ break;
+ }
+ default:
+ pr_warn("btf: type [%u]: unrecognized kind %u\n", id, kind);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/* Validate basic sanity of BTF. It's intentionally less thorough than
+ * kernel's validation and validates only properties of BTF that libbpf relies
+ * on to be correct (e.g., valid type IDs, valid string offsets, etc)
+ */
+static int btf_sanity_check(const struct btf *btf)
+{
+ const struct btf_type *t;
+ __u32 i, n = btf__type_cnt(btf);
+ int err;
+
+ for (i = 1; i < n; i++) {
+ t = btf_type_by_id(btf, i);
+ err = btf_validate_type(btf, t, i);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
__u32 btf__type_cnt(const struct btf *btf)
{
return btf->start_id + btf->nr_types;
@@ -902,6 +1061,7 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf)
err = btf_parse_str_sec(btf);
err = err ?: btf_parse_type_sec(btf);
+ err = err ?: btf_sanity_check(btf);
if (err)
goto done;
diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c
index 9d0296c1726a..2a158e8a8b7c 100644
--- a/tools/lib/bpf/elf.c
+++ b/tools/lib/bpf/elf.c
@@ -1,5 +1,8 @@
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
#include <libelf.h>
#include <gelf.h>
#include <fcntl.h>
@@ -10,6 +13,17 @@
#define STRERR_BUFSIZE 128
+/* A SHT_GNU_versym section holds 16-bit words. This bit is set if
+ * the symbol is hidden and can only be seen when referenced using an
+ * explicit version number. This is a GNU extension.
+ */
+#define VERSYM_HIDDEN 0x8000
+
+/* This is the mask for the rest of the data in a word read from a
+ * SHT_GNU_versym section.
+ */
+#define VERSYM_VERSION 0x7fff
+
int elf_open(const char *binary_path, struct elf_fd *elf_fd)
{
char errmsg[STRERR_BUFSIZE];
@@ -64,13 +78,18 @@ struct elf_sym {
const char *name;
GElf_Sym sym;
GElf_Shdr sh;
+ int ver;
+ bool hidden;
};
struct elf_sym_iter {
Elf *elf;
Elf_Data *syms;
+ Elf_Data *versyms;
+ Elf_Data *verdefs;
size_t nr_syms;
size_t strtabidx;
+ size_t verdef_strtabidx;
size_t next_sym_idx;
struct elf_sym sym;
int st_type;
@@ -111,6 +130,26 @@ static int elf_sym_iter_new(struct elf_sym_iter *iter,
iter->nr_syms = iter->syms->d_size / sh.sh_entsize;
iter->elf = elf;
iter->st_type = st_type;
+
+ /* Version symbol table is meaningful to dynsym only */
+ if (sh_type != SHT_DYNSYM)
+ return 0;
+
+ scn = elf_find_next_scn_by_type(elf, SHT_GNU_versym, NULL);
+ if (!scn)
+ return 0;
+ iter->versyms = elf_getdata(scn, 0);
+
+ scn = elf_find_next_scn_by_type(elf, SHT_GNU_verdef, NULL);
+ if (!scn) {
+ pr_debug("elf: failed to find verdef ELF sections in '%s'\n", binary_path);
+ return -ENOENT;
+ }
+ if (!gelf_getshdr(scn, &sh))
+ return -EINVAL;
+ iter->verdef_strtabidx = sh.sh_link;
+ iter->verdefs = elf_getdata(scn, 0);
+
return 0;
}
@@ -119,6 +158,7 @@ static struct elf_sym *elf_sym_iter_next(struct elf_sym_iter *iter)
struct elf_sym *ret = &iter->sym;
GElf_Sym *sym = &ret->sym;
const char *name = NULL;
+ GElf_Versym versym;
Elf_Scn *sym_scn;
size_t idx;
@@ -138,12 +178,80 @@ static struct elf_sym *elf_sym_iter_next(struct elf_sym_iter *iter)
iter->next_sym_idx = idx + 1;
ret->name = name;
+ ret->ver = 0;
+ ret->hidden = false;
+
+ if (iter->versyms) {
+ if (!gelf_getversym(iter->versyms, idx, &versym))
+ continue;
+ ret->ver = versym & VERSYM_VERSION;
+ ret->hidden = versym & VERSYM_HIDDEN;
+ }
return ret;
}
return NULL;
}
+static const char *elf_get_vername(struct elf_sym_iter *iter, int ver)
+{
+ GElf_Verdaux verdaux;
+ GElf_Verdef verdef;
+ int offset;
+
+ offset = 0;
+ while (gelf_getverdef(iter->verdefs, offset, &verdef)) {
+ if (verdef.vd_ndx != ver) {
+ if (!verdef.vd_next)
+ break;
+
+ offset += verdef.vd_next;
+ continue;
+ }
+
+ if (!gelf_getverdaux(iter->verdefs, offset + verdef.vd_aux, &verdaux))
+ break;
+
+ return elf_strptr(iter->elf, iter->verdef_strtabidx, verdaux.vda_name);
+
+ }
+ return NULL;
+}
+
+static bool symbol_match(struct elf_sym_iter *iter, int sh_type, struct elf_sym *sym,
+ const char *name, size_t name_len, const char *lib_ver)
+{
+ const char *ver_name;
+
+ /* Symbols are in forms of func, func@LIB_VER or func@@LIB_VER
+ * make sure the func part matches the user specified name
+ */
+ if (strncmp(sym->name, name, name_len) != 0)
+ return false;
+
+ /* ...but we don't want a search for "foo" to match 'foo2" also, so any
+ * additional characters in sname should be of the form "@@LIB".
+ */
+ if (sym->name[name_len] != '\0' && sym->name[name_len] != '@')
+ return false;
+
+ /* If user does not specify symbol version, then we got a match */
+ if (!lib_ver)
+ return true;
+
+ /* If user specifies symbol version, for dynamic symbols,
+ * get version name from ELF verdef section for comparison.
+ */
+ if (sh_type == SHT_DYNSYM) {
+ ver_name = elf_get_vername(iter, sym->ver);
+ if (!ver_name)
+ return false;
+ return strcmp(ver_name, lib_ver) == 0;
+ }
+
+ /* For normal symbols, it is already in form of func@LIB_VER */
+ return strcmp(sym->name, name) == 0;
+}
/* Transform symbol's virtual address (absolute for binaries and relative
* for shared libs) into file offset, which is what kernel is expecting
@@ -166,7 +274,8 @@ static unsigned long elf_sym_offset(struct elf_sym *sym)
long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name)
{
int i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB };
- bool is_shared_lib, is_name_qualified;
+ const char *at_symbol, *lib_ver;
+ bool is_shared_lib;
long ret = -ENOENT;
size_t name_len;
GElf_Ehdr ehdr;
@@ -179,9 +288,18 @@ long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name)
/* for shared lib case, we do not need to calculate relative offset */
is_shared_lib = ehdr.e_type == ET_DYN;
- name_len = strlen(name);
- /* Does name specify "@@LIB"? */
- is_name_qualified = strstr(name, "@@") != NULL;
+ /* Does name specify "@@LIB_VER" or "@LIB_VER" ? */
+ at_symbol = strchr(name, '@');
+ if (at_symbol) {
+ name_len = at_symbol - name;
+ /* skip second @ if it's @@LIB_VER case */
+ if (at_symbol[1] == '@')
+ at_symbol++;
+ lib_ver = at_symbol + 1;
+ } else {
+ name_len = strlen(name);
+ lib_ver = NULL;
+ }
/* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if
* a binary is stripped, it may only have SHT_DYNSYM, and a fully-statically
@@ -201,20 +319,17 @@ long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name)
goto out;
while ((sym = elf_sym_iter_next(&iter))) {
- /* User can specify func, func@@LIB or func@@LIB_VERSION. */
- if (strncmp(sym->name, name, name_len) != 0)
- continue;
- /* ...but we don't want a search for "foo" to match 'foo2" also, so any
- * additional characters in sname should be of the form "@@LIB".
- */
- if (!is_name_qualified && sym->name[name_len] != '\0' && sym->name[name_len] != '@')
+ if (!symbol_match(&iter, sh_types[i], sym, name, name_len, lib_ver))
continue;
cur_bind = GELF_ST_BIND(sym->sym.st_info);
if (ret > 0) {
/* handle multiple matches */
- if (last_bind != STB_WEAK && cur_bind != STB_WEAK) {
+ if (elf_sym_offset(sym) == ret) {
+ /* same offset, no problem */
+ continue;
+ } else if (last_bind != STB_WEAK && cur_bind != STB_WEAK) {
/* Only accept one non-weak bind. */
pr_warn("elf: ambiguous match for '%s', '%s' in '%s'\n",
sym->name, name, binary_path);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 96ff1aa4bf6a..a295f6acf98f 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -82,17 +82,22 @@ static const char * const attach_type_name[] = {
[BPF_CGROUP_INET6_BIND] = "cgroup_inet6_bind",
[BPF_CGROUP_INET4_CONNECT] = "cgroup_inet4_connect",
[BPF_CGROUP_INET6_CONNECT] = "cgroup_inet6_connect",
+ [BPF_CGROUP_UNIX_CONNECT] = "cgroup_unix_connect",
[BPF_CGROUP_INET4_POST_BIND] = "cgroup_inet4_post_bind",
[BPF_CGROUP_INET6_POST_BIND] = "cgroup_inet6_post_bind",
[BPF_CGROUP_INET4_GETPEERNAME] = "cgroup_inet4_getpeername",
[BPF_CGROUP_INET6_GETPEERNAME] = "cgroup_inet6_getpeername",
+ [BPF_CGROUP_UNIX_GETPEERNAME] = "cgroup_unix_getpeername",
[BPF_CGROUP_INET4_GETSOCKNAME] = "cgroup_inet4_getsockname",
[BPF_CGROUP_INET6_GETSOCKNAME] = "cgroup_inet6_getsockname",
+ [BPF_CGROUP_UNIX_GETSOCKNAME] = "cgroup_unix_getsockname",
[BPF_CGROUP_UDP4_SENDMSG] = "cgroup_udp4_sendmsg",
[BPF_CGROUP_UDP6_SENDMSG] = "cgroup_udp6_sendmsg",
+ [BPF_CGROUP_UNIX_SENDMSG] = "cgroup_unix_sendmsg",
[BPF_CGROUP_SYSCTL] = "cgroup_sysctl",
[BPF_CGROUP_UDP4_RECVMSG] = "cgroup_udp4_recvmsg",
[BPF_CGROUP_UDP6_RECVMSG] = "cgroup_udp6_recvmsg",
+ [BPF_CGROUP_UNIX_RECVMSG] = "cgroup_unix_recvmsg",
[BPF_CGROUP_GETSOCKOPT] = "cgroup_getsockopt",
[BPF_CGROUP_SETSOCKOPT] = "cgroup_setsockopt",
[BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser",
@@ -436,9 +441,11 @@ struct bpf_program {
int fd;
bool autoload;
bool autoattach;
+ bool sym_global;
bool mark_btf_static;
enum bpf_prog_type type;
enum bpf_attach_type expected_attach_type;
+ int exception_cb_idx;
int prog_ifindex;
__u32 attach_btf_obj_fd;
@@ -765,6 +772,7 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
prog->type = BPF_PROG_TYPE_UNSPEC;
prog->fd = -1;
+ prog->exception_cb_idx = -1;
/* libbpf's convention for SEC("?abc...") is that it's just like
* SEC("abc...") but the corresponding bpf_program starts out with
@@ -871,14 +879,16 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
if (err)
return err;
+ if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL)
+ prog->sym_global = true;
+
/* if function is a global/weak symbol, but has restricted
* (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF FUNC
* as static to enable more permissive BPF verification mode
* with more outside context available to BPF verifier
*/
- if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL
- && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN
- || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL))
+ if (prog->sym_global && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN
+ || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL))
prog->mark_btf_static = true;
nr_progs++;
@@ -3142,6 +3152,86 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
}
}
+ if (!kernel_supports(obj, FEAT_BTF_DECL_TAG))
+ goto skip_exception_cb;
+ for (i = 0; i < obj->nr_programs; i++) {
+ struct bpf_program *prog = &obj->programs[i];
+ int j, k, n;
+
+ if (prog_is_subprog(obj, prog))
+ continue;
+ n = btf__type_cnt(obj->btf);
+ for (j = 1; j < n; j++) {
+ const char *str = "exception_callback:", *name;
+ size_t len = strlen(str);
+ struct btf_type *t;
+
+ t = btf_type_by_id(obj->btf, j);
+ if (!btf_is_decl_tag(t) || btf_decl_tag(t)->component_idx != -1)
+ continue;
+
+ name = btf__str_by_offset(obj->btf, t->name_off);
+ if (strncmp(name, str, len))
+ continue;
+
+ t = btf_type_by_id(obj->btf, t->type);
+ if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) {
+ pr_warn("prog '%s': exception_callback:<value> decl tag not applied to the main program\n",
+ prog->name);
+ return -EINVAL;
+ }
+ if (strcmp(prog->name, btf__str_by_offset(obj->btf, t->name_off)))
+ continue;
+ /* Multiple callbacks are specified for the same prog,
+ * the verifier will eventually return an error for this
+ * case, hence simply skip appending a subprog.
+ */
+ if (prog->exception_cb_idx >= 0) {
+ prog->exception_cb_idx = -1;
+ break;
+ }
+
+ name += len;
+ if (str_is_empty(name)) {
+ pr_warn("prog '%s': exception_callback:<value> decl tag contains empty value\n",
+ prog->name);
+ return -EINVAL;
+ }
+
+ for (k = 0; k < obj->nr_programs; k++) {
+ struct bpf_program *subprog = &obj->programs[k];
+
+ if (!prog_is_subprog(obj, subprog))
+ continue;
+ if (strcmp(name, subprog->name))
+ continue;
+ /* Enforce non-hidden, as from verifier point of
+ * view it expects global functions, whereas the
+ * mark_btf_static fixes up linkage as static.
+ */
+ if (!subprog->sym_global || subprog->mark_btf_static) {
+ pr_warn("prog '%s': exception callback %s must be a global non-hidden function\n",
+ prog->name, subprog->name);
+ return -EINVAL;
+ }
+ /* Let's see if we already saw a static exception callback with the same name */
+ if (prog->exception_cb_idx >= 0) {
+ pr_warn("prog '%s': multiple subprogs with same name as exception callback '%s'\n",
+ prog->name, subprog->name);
+ return -EINVAL;
+ }
+ prog->exception_cb_idx = k;
+ break;
+ }
+
+ if (prog->exception_cb_idx >= 0)
+ continue;
+ pr_warn("prog '%s': cannot find exception callback '%s'\n", prog->name, name);
+ return -ENOENT;
+ }
+ }
+skip_exception_cb:
+
sanitize = btf_needs_sanitization(obj);
if (sanitize) {
const void *raw_data;
@@ -6235,13 +6325,45 @@ static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_progra
}
static int
+bpf_object__append_subprog_code(struct bpf_object *obj, struct bpf_program *main_prog,
+ struct bpf_program *subprog)
+{
+ struct bpf_insn *insns;
+ size_t new_cnt;
+ int err;
+
+ subprog->sub_insn_off = main_prog->insns_cnt;
+
+ new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
+ insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
+ if (!insns) {
+ pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
+ return -ENOMEM;
+ }
+ main_prog->insns = insns;
+ main_prog->insns_cnt = new_cnt;
+
+ memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
+ subprog->insns_cnt * sizeof(*insns));
+
+ pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
+ main_prog->name, subprog->insns_cnt, subprog->name);
+
+ /* The subprog insns are now appended. Append its relos too. */
+ err = append_subprog_relos(main_prog, subprog);
+ if (err)
+ return err;
+ return 0;
+}
+
+static int
bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
struct bpf_program *prog)
{
- size_t sub_insn_idx, insn_idx, new_cnt;
+ size_t sub_insn_idx, insn_idx;
struct bpf_program *subprog;
- struct bpf_insn *insns, *insn;
struct reloc_desc *relo;
+ struct bpf_insn *insn;
int err;
err = reloc_prog_func_and_line_info(obj, main_prog, prog);
@@ -6316,25 +6438,7 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
* and relocate.
*/
if (subprog->sub_insn_off == 0) {
- subprog->sub_insn_off = main_prog->insns_cnt;
-
- new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
- insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
- if (!insns) {
- pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
- return -ENOMEM;
- }
- main_prog->insns = insns;
- main_prog->insns_cnt = new_cnt;
-
- memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
- subprog->insns_cnt * sizeof(*insns));
-
- pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
- main_prog->name, subprog->insns_cnt, subprog->name);
-
- /* The subprog insns are now appended. Append its relos too. */
- err = append_subprog_relos(main_prog, subprog);
+ err = bpf_object__append_subprog_code(obj, main_prog, subprog);
if (err)
return err;
err = bpf_object__reloc_code(obj, main_prog, subprog);
@@ -6568,6 +6672,25 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
prog->name, err);
return err;
}
+
+ /* Now, also append exception callback if it has not been done already. */
+ if (prog->exception_cb_idx >= 0) {
+ struct bpf_program *subprog = &obj->programs[prog->exception_cb_idx];
+
+ /* Calling exception callback directly is disallowed, which the
+ * verifier will reject later. In case it was processed already,
+ * we can skip this step, otherwise for all other valid cases we
+ * have to append exception callback now.
+ */
+ if (subprog->sub_insn_off == 0) {
+ err = bpf_object__append_subprog_code(obj, prog, subprog);
+ if (err)
+ return err;
+ err = bpf_object__reloc_code(obj, prog, subprog);
+ if (err)
+ return err;
+ }
+ }
}
/* Process data relos for main programs */
for (i = 0; i < obj->nr_programs; i++) {
@@ -8842,14 +8965,19 @@ static const struct bpf_sec_def section_defs[] = {
SEC_DEF("cgroup/bind6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE),
SEC_DEF("cgroup/connect4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE),
SEC_DEF("cgroup/connect6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/connect_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_CONNECT, SEC_ATTACHABLE),
SEC_DEF("cgroup/sendmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE),
SEC_DEF("cgroup/sendmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/sendmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_SENDMSG, SEC_ATTACHABLE),
SEC_DEF("cgroup/recvmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE),
SEC_DEF("cgroup/recvmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/recvmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_RECVMSG, SEC_ATTACHABLE),
SEC_DEF("cgroup/getpeername4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE),
SEC_DEF("cgroup/getpeername6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/getpeername_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETPEERNAME, SEC_ATTACHABLE),
SEC_DEF("cgroup/getsockname4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE),
SEC_DEF("cgroup/getsockname6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE),
+ SEC_DEF("cgroup/getsockname_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETSOCKNAME, SEC_ATTACHABLE),
SEC_DEF("cgroup/sysctl", CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE),
SEC_DEF("cgroup/getsockopt", CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE),
SEC_DEF("cgroup/setsockopt", CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE),
@@ -10996,7 +11124,7 @@ static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, stru
*link = NULL;
- n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%ms",
+ n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]",
&probe_type, &binary_path, &func_name);
switch (n) {
case 1:
@@ -11506,14 +11634,14 @@ err_out:
static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
{
DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts);
- char *probe_type = NULL, *binary_path = NULL, *func_name = NULL;
- int n, ret = -EINVAL;
+ char *probe_type = NULL, *binary_path = NULL, *func_name = NULL, *func_off;
+ int n, c, ret = -EINVAL;
long offset = 0;
*link = NULL;
- n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[a-zA-Z0-9_.]+%li",
- &probe_type, &binary_path, &func_name, &offset);
+ n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]",
+ &probe_type, &binary_path, &func_name);
switch (n) {
case 1:
/* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */
@@ -11524,7 +11652,17 @@ static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf
prog->name, prog->sec_name);
break;
case 3:
- case 4:
+ /* check if user specifies `+offset`, if yes, this should be
+ * the last part of the string, make sure sscanf read to EOL
+ */
+ func_off = strrchr(func_name, '+');
+ if (func_off) {
+ n = sscanf(func_off, "+%li%n", &offset, &c);
+ if (n == 1 && *(func_off + c) == '\0')
+ func_off[0] = '\0';
+ else
+ offset = 0;
+ }
opts.retprobe = strcmp(probe_type, "uretprobe") == 0 ||
strcmp(probe_type, "uretprobe.s") == 0;
if (opts.retprobe && offset != 0) {
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 0e52621cba43..475378438545 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -1229,6 +1229,7 @@ LIBBPF_API int bpf_tc_query(const struct bpf_tc_hook *hook,
/* Ring buffer APIs */
struct ring_buffer;
+struct ring;
struct user_ring_buffer;
typedef int (*ring_buffer_sample_fn)(void *ctx, void *data, size_t size);
@@ -1249,6 +1250,78 @@ LIBBPF_API int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms);
LIBBPF_API int ring_buffer__consume(struct ring_buffer *rb);
LIBBPF_API int ring_buffer__epoll_fd(const struct ring_buffer *rb);
+/**
+ * @brief **ring_buffer__ring()** returns the ringbuffer object inside a given
+ * ringbuffer manager representing a single BPF_MAP_TYPE_RINGBUF map instance.
+ *
+ * @param rb A ringbuffer manager object.
+ * @param idx An index into the ringbuffers contained within the ringbuffer
+ * manager object. The index is 0-based and corresponds to the order in which
+ * ring_buffer__add was called.
+ * @return A ringbuffer object on success; NULL and errno set if the index is
+ * invalid.
+ */
+LIBBPF_API struct ring *ring_buffer__ring(struct ring_buffer *rb,
+ unsigned int idx);
+
+/**
+ * @brief **ring__consumer_pos()** returns the current consumer position in the
+ * given ringbuffer.
+ *
+ * @param r A ringbuffer object.
+ * @return The current consumer position.
+ */
+LIBBPF_API unsigned long ring__consumer_pos(const struct ring *r);
+
+/**
+ * @brief **ring__producer_pos()** returns the current producer position in the
+ * given ringbuffer.
+ *
+ * @param r A ringbuffer object.
+ * @return The current producer position.
+ */
+LIBBPF_API unsigned long ring__producer_pos(const struct ring *r);
+
+/**
+ * @brief **ring__avail_data_size()** returns the number of bytes in the
+ * ringbuffer not yet consumed. This has no locking associated with it, so it
+ * can be inaccurate if operations are ongoing while this is called. However, it
+ * should still show the correct trend over the long-term.
+ *
+ * @param r A ringbuffer object.
+ * @return The number of bytes not yet consumed.
+ */
+LIBBPF_API size_t ring__avail_data_size(const struct ring *r);
+
+/**
+ * @brief **ring__size()** returns the total size of the ringbuffer's map data
+ * area (excluding special producer/consumer pages). Effectively this gives the
+ * amount of usable bytes of data inside the ringbuffer.
+ *
+ * @param r A ringbuffer object.
+ * @return The total size of the ringbuffer map data area.
+ */
+LIBBPF_API size_t ring__size(const struct ring *r);
+
+/**
+ * @brief **ring__map_fd()** returns the file descriptor underlying the given
+ * ringbuffer.
+ *
+ * @param r A ringbuffer object.
+ * @return The underlying ringbuffer file descriptor
+ */
+LIBBPF_API int ring__map_fd(const struct ring *r);
+
+/**
+ * @brief **ring__consume()** consumes available ringbuffer data without event
+ * polling.
+ *
+ * @param r A ringbuffer object.
+ * @return The number of records consumed (or INT_MAX, whichever is less), or
+ * a negative number if any of the callbacks return an error.
+ */
+LIBBPF_API int ring__consume(struct ring *r);
+
struct user_ring_buffer_opts {
size_t sz; /* size of this struct, for forward/backward compatibility */
};
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 57712321490f..cc973b678a39 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -400,4 +400,11 @@ LIBBPF_1.3.0 {
bpf_program__attach_netfilter;
bpf_program__attach_tcx;
bpf_program__attach_uprobe_multi;
+ ring__avail_data_size;
+ ring__consume;
+ ring__consumer_pos;
+ ring__map_fd;
+ ring__producer_pos;
+ ring__size;
+ ring_buffer__ring;
} LIBBPF_1.2.0;
diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c
index 02199364db13..aacb64278a01 100644
--- a/tools/lib/bpf/ringbuf.c
+++ b/tools/lib/bpf/ringbuf.c
@@ -34,7 +34,7 @@ struct ring {
struct ring_buffer {
struct epoll_event *events;
- struct ring *rings;
+ struct ring **rings;
size_t page_size;
int epoll_fd;
int ring_cnt;
@@ -57,7 +57,7 @@ struct ringbuf_hdr {
__u32 pad;
};
-static void ringbuf_unmap_ring(struct ring_buffer *rb, struct ring *r)
+static void ringbuf_free_ring(struct ring_buffer *rb, struct ring *r)
{
if (r->consumer_pos) {
munmap(r->consumer_pos, rb->page_size);
@@ -67,6 +67,8 @@ static void ringbuf_unmap_ring(struct ring_buffer *rb, struct ring *r)
munmap(r->producer_pos, rb->page_size + 2 * (r->mask + 1));
r->producer_pos = NULL;
}
+
+ free(r);
}
/* Add extra RINGBUF maps to this ring buffer manager */
@@ -107,8 +109,10 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
return libbpf_err(-ENOMEM);
rb->events = tmp;
- r = &rb->rings[rb->ring_cnt];
- memset(r, 0, sizeof(*r));
+ r = calloc(1, sizeof(*r));
+ if (!r)
+ return libbpf_err(-ENOMEM);
+ rb->rings[rb->ring_cnt] = r;
r->map_fd = map_fd;
r->sample_cb = sample_cb;
@@ -121,7 +125,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
err = -errno;
pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %d\n",
map_fd, err);
- return libbpf_err(err);
+ goto err_out;
}
r->consumer_pos = tmp;
@@ -131,16 +135,16 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
*/
mmap_sz = rb->page_size + 2 * (__u64)info.max_entries;
if (mmap_sz != (__u64)(size_t)mmap_sz) {
+ err = -E2BIG;
pr_warn("ringbuf: ring buffer size (%u) is too big\n", info.max_entries);
- return libbpf_err(-E2BIG);
+ goto err_out;
}
tmp = mmap(NULL, (size_t)mmap_sz, PROT_READ, MAP_SHARED, map_fd, rb->page_size);
if (tmp == MAP_FAILED) {
err = -errno;
- ringbuf_unmap_ring(rb, r);
pr_warn("ringbuf: failed to mmap data pages for map fd=%d: %d\n",
map_fd, err);
- return libbpf_err(err);
+ goto err_out;
}
r->producer_pos = tmp;
r->data = tmp + rb->page_size;
@@ -152,14 +156,17 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
e->data.fd = rb->ring_cnt;
if (epoll_ctl(rb->epoll_fd, EPOLL_CTL_ADD, map_fd, e) < 0) {
err = -errno;
- ringbuf_unmap_ring(rb, r);
pr_warn("ringbuf: failed to epoll add map fd=%d: %d\n",
map_fd, err);
- return libbpf_err(err);
+ goto err_out;
}
rb->ring_cnt++;
return 0;
+
+err_out:
+ ringbuf_free_ring(rb, r);
+ return libbpf_err(err);
}
void ring_buffer__free(struct ring_buffer *rb)
@@ -170,7 +177,7 @@ void ring_buffer__free(struct ring_buffer *rb)
return;
for (i = 0; i < rb->ring_cnt; ++i)
- ringbuf_unmap_ring(rb, &rb->rings[i]);
+ ringbuf_free_ring(rb, rb->rings[i]);
if (rb->epoll_fd >= 0)
close(rb->epoll_fd);
@@ -278,7 +285,7 @@ int ring_buffer__consume(struct ring_buffer *rb)
int i;
for (i = 0; i < rb->ring_cnt; i++) {
- struct ring *ring = &rb->rings[i];
+ struct ring *ring = rb->rings[i];
err = ringbuf_process_ring(ring);
if (err < 0)
@@ -305,7 +312,7 @@ int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms)
for (i = 0; i < cnt; i++) {
__u32 ring_id = rb->events[i].data.fd;
- struct ring *ring = &rb->rings[ring_id];
+ struct ring *ring = rb->rings[ring_id];
err = ringbuf_process_ring(ring);
if (err < 0)
@@ -323,6 +330,58 @@ int ring_buffer__epoll_fd(const struct ring_buffer *rb)
return rb->epoll_fd;
}
+struct ring *ring_buffer__ring(struct ring_buffer *rb, unsigned int idx)
+{
+ if (idx >= rb->ring_cnt)
+ return errno = ERANGE, NULL;
+
+ return rb->rings[idx];
+}
+
+unsigned long ring__consumer_pos(const struct ring *r)
+{
+ /* Synchronizes with smp_store_release() in ringbuf_process_ring(). */
+ return smp_load_acquire(r->consumer_pos);
+}
+
+unsigned long ring__producer_pos(const struct ring *r)
+{
+ /* Synchronizes with smp_store_release() in __bpf_ringbuf_reserve() in
+ * the kernel.
+ */
+ return smp_load_acquire(r->producer_pos);
+}
+
+size_t ring__avail_data_size(const struct ring *r)
+{
+ unsigned long cons_pos, prod_pos;
+
+ cons_pos = ring__consumer_pos(r);
+ prod_pos = ring__producer_pos(r);
+ return prod_pos - cons_pos;
+}
+
+size_t ring__size(const struct ring *r)
+{
+ return r->mask + 1;
+}
+
+int ring__map_fd(const struct ring *r)
+{
+ return r->map_fd;
+}
+
+int ring__consume(struct ring *r)
+{
+ int64_t res;
+
+ res = ringbuf_process_ring(r);
+ if (res < 0)
+ return libbpf_err(res);
+
+ return res > INT_MAX ? INT_MAX : res;
+}
+
static void user_ringbuf_unmap_ring(struct user_ring_buffer *rb)
{
if (rb->consumer_pos) {
diff --git a/tools/net/ynl/generated/netdev-user.c b/tools/net/ynl/generated/netdev-user.c
index 68b408ca0f7f..b5ffe8cd1144 100644
--- a/tools/net/ynl/generated/netdev-user.c
+++ b/tools/net/ynl/generated/netdev-user.c
@@ -45,12 +45,26 @@ const char *netdev_xdp_act_str(enum netdev_xdp_act value)
return netdev_xdp_act_strmap[value];
}
+static const char * const netdev_xdp_rx_metadata_strmap[] = {
+ [0] = "timestamp",
+ [1] = "hash",
+};
+
+const char *netdev_xdp_rx_metadata_str(enum netdev_xdp_rx_metadata value)
+{
+ value = ffs(value) - 1;
+ if (value < 0 || value >= (int)MNL_ARRAY_SIZE(netdev_xdp_rx_metadata_strmap))
+ return NULL;
+ return netdev_xdp_rx_metadata_strmap[value];
+}
+
/* Policies */
struct ynl_policy_attr netdev_dev_policy[NETDEV_A_DEV_MAX + 1] = {
[NETDEV_A_DEV_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, },
[NETDEV_A_DEV_PAD] = { .name = "pad", .type = YNL_PT_IGNORE, },
[NETDEV_A_DEV_XDP_FEATURES] = { .name = "xdp-features", .type = YNL_PT_U64, },
[NETDEV_A_DEV_XDP_ZC_MAX_SEGS] = { .name = "xdp-zc-max-segs", .type = YNL_PT_U32, },
+ [NETDEV_A_DEV_XDP_RX_METADATA_FEATURES] = { .name = "xdp-rx-metadata-features", .type = YNL_PT_U64, },
};
struct ynl_policy_nest netdev_dev_nest = {
@@ -97,6 +111,11 @@ int netdev_dev_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
return MNL_CB_ERROR;
dst->_present.xdp_zc_max_segs = 1;
dst->xdp_zc_max_segs = mnl_attr_get_u32(attr);
+ } else if (type == NETDEV_A_DEV_XDP_RX_METADATA_FEATURES) {
+ if (ynl_attr_validate(yarg, attr))
+ return MNL_CB_ERROR;
+ dst->_present.xdp_rx_metadata_features = 1;
+ dst->xdp_rx_metadata_features = mnl_attr_get_u64(attr);
}
}
diff --git a/tools/net/ynl/generated/netdev-user.h b/tools/net/ynl/generated/netdev-user.h
index 0952d3261f4d..b4351ff34595 100644
--- a/tools/net/ynl/generated/netdev-user.h
+++ b/tools/net/ynl/generated/netdev-user.h
@@ -18,6 +18,7 @@ extern const struct ynl_family ynl_netdev_family;
/* Enums */
const char *netdev_op_str(int op);
const char *netdev_xdp_act_str(enum netdev_xdp_act value);
+const char *netdev_xdp_rx_metadata_str(enum netdev_xdp_rx_metadata value);
/* Common nested types */
/* ============== NETDEV_CMD_DEV_GET ============== */
@@ -48,11 +49,13 @@ struct netdev_dev_get_rsp {
__u32 ifindex:1;
__u32 xdp_features:1;
__u32 xdp_zc_max_segs:1;
+ __u32 xdp_rx_metadata_features:1;
} _present;
__u32 ifindex;
__u64 xdp_features;
__u32 xdp_zc_max_segs;
+ __u64 xdp_rx_metadata_features;
};
void netdev_dev_get_rsp_free(struct netdev_dev_get_rsp *rsp);
diff --git a/tools/net/ynl/samples/Makefile b/tools/net/ynl/samples/Makefile
index f2db8bb78309..32abbc0af39e 100644
--- a/tools/net/ynl/samples/Makefile
+++ b/tools/net/ynl/samples/Makefile
@@ -4,7 +4,7 @@ include ../Makefile.deps
CC=gcc
CFLAGS=-std=gnu11 -O2 -W -Wall -Wextra -Wno-unused-parameter -Wshadow \
- -I../lib/ -I../generated/ -idirafter $(UAPI_PATH)
+ -I../../../include/uapi -I../lib/ -I../generated/ -idirafter $(UAPI_PATH)
ifeq ("$(DEBUG)","1")
CFLAGS += -g -fsanitize=address -fsanitize=leak -static-libasan
endif
diff --git a/tools/net/ynl/samples/netdev.c b/tools/net/ynl/samples/netdev.c
index 06433400dddd..b828225daad0 100644
--- a/tools/net/ynl/samples/netdev.c
+++ b/tools/net/ynl/samples/netdev.c
@@ -32,12 +32,18 @@ static void netdev_print_device(struct netdev_dev_get_rsp *d, unsigned int op)
if (!d->_present.xdp_features)
return;
- printf("%llx:", d->xdp_features);
+ printf("xdp-features (%llx):", d->xdp_features);
for (int i = 0; d->xdp_features > 1U << i; i++) {
if (d->xdp_features & (1U << i))
printf(" %s", netdev_xdp_act_str(1 << i));
}
+ printf(" xdp-rx-metadata-features (%llx):", d->xdp_rx_metadata_features);
+ for (int i = 0; d->xdp_rx_metadata_features > 1U << i; i++) {
+ if (d->xdp_rx_metadata_features & (1U << i))
+ printf(" %s", netdev_xdp_rx_metadata_str(1 << i));
+ }
+
printf(" xdp-zc-max-segs=%u", d->xdp_zc_max_segs);
name = netdev_op_str(op);
diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64
index 3babaf3eee5c..5c2cc7e8c5d0 100644
--- a/tools/testing/selftests/bpf/DENYLIST.aarch64
+++ b/tools/testing/selftests/bpf/DENYLIST.aarch64
@@ -1,5 +1,6 @@
bpf_cookie/multi_kprobe_attach_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3
bpf_cookie/multi_kprobe_link_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3
+exceptions # JIT does not support calling kfunc bpf_throw: -524
fexit_sleep # The test never returns. The remaining tests cannot start.
kprobe_multi_bench_attach # needs CONFIG_FPROBE
kprobe_multi_test # needs CONFIG_FPROBE
@@ -9,3 +10,4 @@ fexit_test/fexit_many_args # fexit_many_args:FAIL:fexit_ma
fill_link_info/kprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95
fill_link_info/kretprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95
fill_link_info/kprobe_multi_invalid_ubuff # bpf_program__attach_kprobe_multi_opts unexpected error: -95
+missed/kprobe_recursion # missed_kprobe_recursion__attach unexpected error: -95 (errno 95)
diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index 5061d9e24c16..1a63996c0304 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -1,29 +1,5 @@
# TEMPORARY
# Alphabetical order
-bloom_filter_map # failed to find kernel BTF type ID of '__x64_sys_getpgid': -3 (?)
-bpf_cookie # failed to open_and_load program: -524 (trampoline)
-bpf_loop # attaches to __x64_sys_nanosleep
-cgrp_local_storage # prog_attach unexpected error: -524 (trampoline)
-dynptr/test_dynptr_skb_data
-dynptr/test_skb_readonly
-fexit_sleep # fexit_skel_load fexit skeleton failed (trampoline)
+exceptions # JIT does not support calling kfunc bpf_throw (exceptions)
get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace)
-iters/testmod_seq* # s390x doesn't support kfuncs in modules yet
-kprobe_multi_bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95
-kprobe_multi_test # relies on fentry
-ksyms_btf/weak_ksyms* # test_ksyms_weak__open_and_load unexpected error: -22 (kfunc)
-ksyms_module # test_ksyms_module__open_and_load unexpected error: -9 (?)
-ksyms_module_libbpf # JIT does not support calling kernel function (kfunc)
-ksyms_module_lskel # test_ksyms_module_lskel__open_and_load unexpected error: -9 (?)
-module_attach # skel_attach skeleton attach failed: -524 (trampoline)
-ringbuf # skel_load skeleton load failed (?)
stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?)
-test_lsm # attach unexpected error: -524 (trampoline)
-trace_printk # trace_printk__load unexpected error: -2 (errno 2) (?)
-trace_vprintk # trace_vprintk__open_and_load unexpected error: -9 (?)
-unpriv_bpf_disabled # fentry
-user_ringbuf # failed to find kernel BTF type ID of '__s390x_sys_prctl': -3 (?)
-verif_stats # trace_vprintk__open_and_load unexpected error: -9 (?)
-xdp_bonding # failed to auto-attach program 'trace_on_entry': -524 (trampoline)
-xdp_metadata # JIT does not support calling kernel function (kfunc)
-test_task_under_cgroup # JIT does not support calling kernel function (kfunc)
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index caede9b574cb..4225f975fce3 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -27,7 +27,11 @@ endif
BPF_GCC ?= $(shell command -v bpf-gcc;)
SAN_CFLAGS ?=
SAN_LDFLAGS ?= $(SAN_CFLAGS)
-CFLAGS += -g -O0 -rdynamic -Wall -Werror $(GENFLAGS) $(SAN_CFLAGS) \
+RELEASE ?=
+OPT_FLAGS ?= $(if $(RELEASE),-O2,-O0)
+CFLAGS += -g $(OPT_FLAGS) -rdynamic \
+ -Wall -Werror \
+ $(GENFLAGS) $(SAN_CFLAGS) \
-I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \
-I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT)
LDFLAGS += $(SAN_LDFLAGS)
@@ -104,7 +108,7 @@ TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
xskxceiver xdp_redirect_multi xdp_synproxy veristat xdp_hw_metadata \
xdp_features
-TEST_GEN_FILES += liburandom_read.so urandom_read sign-file
+TEST_GEN_FILES += liburandom_read.so urandom_read sign-file uprobe_multi
# Emit succinct information message describing current building step
# $1 - generic step name (e.g., CC, LINK, etc);
@@ -188,7 +192,7 @@ $(OUTPUT)/%:%.c
$(Q)$(LINK.c) $^ $(LDLIBS) -o $@
# LLVM's ld.lld doesn't support all the architectures, so use it only on x86
-ifeq ($(SRCARCH),x86)
+ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 riscv))
LLD := lld
else
LLD := ld
@@ -196,17 +200,20 @@ endif
# Filter out -static for liburandom_read.so and its dependent targets so that static builds
# do not fail. Static builds leave urandom_read relying on system-wide shared libraries.
-$(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c
+$(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c liburandom_read.map
$(call msg,LIB,,$@)
- $(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) \
- $^ $(filter-out -static,$(LDLIBS)) \
+ $(Q)$(CLANG) $(CLANG_TARGET_ARCH) \
+ $(filter-out -static,$(CFLAGS) $(LDFLAGS)) \
+ $(filter %.c,$^) $(filter-out -static,$(LDLIBS)) \
-fuse-ld=$(LLD) -Wl,-znoseparate-code -Wl,--build-id=sha1 \
+ -Wl,--version-script=liburandom_read.map \
-fPIC -shared -o $@
$(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_read.so
$(call msg,BINARY,,$@)
- $(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \
- -lurandom_read $(filter-out -static,$(LDLIBS)) -L$(OUTPUT) \
+ $(Q)$(CLANG) $(CLANG_TARGET_ARCH) \
+ $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \
+ -lurandom_read $(filter-out -static,$(LDLIBS)) -L$(OUTPUT) \
-fuse-ld=$(LLD) -Wl,-znoseparate-code -Wl,--build-id=sha1 \
-Wl,-rpath=. -o $@
@@ -238,7 +245,7 @@ $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(RUNQSLOWER_OUTPUT)
BPFTOOL_OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \
BPFOBJ_OUTPUT=$(BUILD_DIR)/libbpf \
BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) \
- EXTRA_CFLAGS='-g -O0 $(SAN_CFLAGS)' \
+ EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS)' \
EXTRA_LDFLAGS='$(SAN_LDFLAGS)' && \
cp $(RUNQSLOWER_OUTPUT)runqslower $@
@@ -276,7 +283,7 @@ $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \
$(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/bpftool
$(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \
ARCH= CROSS_COMPILE= CC="$(HOSTCC)" LD="$(HOSTLD)" \
- EXTRA_CFLAGS='-g -O0' \
+ EXTRA_CFLAGS='-g $(OPT_FLAGS)' \
OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \
LIBBPF_OUTPUT=$(HOST_BUILD_DIR)/libbpf/ \
LIBBPF_DESTDIR=$(HOST_SCRATCH_DIR)/ \
@@ -287,7 +294,7 @@ $(CROSS_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \
$(BPFOBJ) | $(BUILD_DIR)/bpftool
$(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \
ARCH=$(ARCH) CROSS_COMPILE=$(CROSS_COMPILE) \
- EXTRA_CFLAGS='-g -O0' \
+ EXTRA_CFLAGS='-g $(OPT_FLAGS)' \
OUTPUT=$(BUILD_DIR)/bpftool/ \
LIBBPF_OUTPUT=$(BUILD_DIR)/libbpf/ \
LIBBPF_DESTDIR=$(SCRATCH_DIR)/ \
@@ -310,7 +317,7 @@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
$(APIDIR)/linux/bpf.h \
| $(BUILD_DIR)/libbpf
$(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \
- EXTRA_CFLAGS='-g -O0 $(SAN_CFLAGS)' \
+ EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS)' \
EXTRA_LDFLAGS='$(SAN_LDFLAGS)' \
DESTDIR=$(SCRATCH_DIR) prefix= all install_headers
@@ -319,7 +326,7 @@ $(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
$(APIDIR)/linux/bpf.h \
| $(HOST_BUILD_DIR)/libbpf
$(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) \
- EXTRA_CFLAGS='-g -O0' ARCH= CROSS_COMPILE= \
+ EXTRA_CFLAGS='-g $(OPT_FLAGS)' ARCH= CROSS_COMPILE= \
OUTPUT=$(HOST_BUILD_DIR)/libbpf/ \
CC="$(HOSTCC)" LD="$(HOSTLD)" \
DESTDIR=$(HOST_SCRATCH_DIR)/ prefix= all install_headers
@@ -640,7 +647,9 @@ $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT)
$(call msg,BINARY,,$@)
$(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
-$(OUTPUT)/xskxceiver: xskxceiver.c xskxceiver.h $(OUTPUT)/xsk.o $(OUTPUT)/xsk_xdp_progs.skel.h $(BPFOBJ) | $(OUTPUT)
+# Include find_bit.c to compile xskxceiver.
+EXTRA_SRC := $(TOOLSDIR)/lib/find_bit.c
+$(OUTPUT)/xskxceiver: $(EXTRA_SRC) xskxceiver.c xskxceiver.h $(OUTPUT)/xsk.o $(OUTPUT)/xsk_xdp_progs.skel.h $(BPFOBJ) | $(OUTPUT)
$(call msg,BINARY,,$@)
$(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
index 209811b1993a..2c8cb3f61529 100644
--- a/tools/testing/selftests/bpf/bpf_experimental.h
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -131,4 +131,331 @@ extern int bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *nod
*/
extern struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root) __ksym;
+/* Description
+ * Allocates a percpu object of the type represented by 'local_type_id' in
+ * program BTF. User may use the bpf_core_type_id_local macro to pass the
+ * type ID of a struct in program BTF.
+ *
+ * The 'local_type_id' parameter must be a known constant.
+ * The 'meta' parameter is rewritten by the verifier, no need for BPF
+ * program to set it.
+ * Returns
+ * A pointer to a percpu object of the type corresponding to the passed in
+ * 'local_type_id', or NULL on failure.
+ */
+extern void *bpf_percpu_obj_new_impl(__u64 local_type_id, void *meta) __ksym;
+
+/* Convenience macro to wrap over bpf_percpu_obj_new_impl */
+#define bpf_percpu_obj_new(type) ((type __percpu_kptr *)bpf_percpu_obj_new_impl(bpf_core_type_id_local(type), NULL))
+
+/* Description
+ * Free an allocated percpu object. All fields of the object that require
+ * destruction will be destructed before the storage is freed.
+ *
+ * The 'meta' parameter is rewritten by the verifier, no need for BPF
+ * program to set it.
+ * Returns
+ * Void.
+ */
+extern void bpf_percpu_obj_drop_impl(void *kptr, void *meta) __ksym;
+
+struct bpf_iter_task_vma;
+
+extern int bpf_iter_task_vma_new(struct bpf_iter_task_vma *it,
+ struct task_struct *task,
+ unsigned long addr) __ksym;
+extern struct vm_area_struct *bpf_iter_task_vma_next(struct bpf_iter_task_vma *it) __ksym;
+extern void bpf_iter_task_vma_destroy(struct bpf_iter_task_vma *it) __ksym;
+
+/* Convenience macro to wrap over bpf_obj_drop_impl */
+#define bpf_percpu_obj_drop(kptr) bpf_percpu_obj_drop_impl(kptr, NULL)
+
+/* Description
+ * Throw a BPF exception from the program, immediately terminating its
+ * execution and unwinding the stack. The supplied 'cookie' parameter
+ * will be the return value of the program when an exception is thrown,
+ * and the default exception callback is used. Otherwise, if an exception
+ * callback is set using the '__exception_cb(callback)' declaration tag
+ * on the main program, the 'cookie' parameter will be the callback's only
+ * input argument.
+ *
+ * Thus, in case of default exception callback, 'cookie' is subjected to
+ * constraints on the program's return value (as with R0 on exit).
+ * Otherwise, the return value of the marked exception callback will be
+ * subjected to the same checks.
+ *
+ * Note that throwing an exception with lingering resources (locks,
+ * references, etc.) will lead to a verification error.
+ *
+ * Note that callbacks *cannot* call this helper.
+ * Returns
+ * Never.
+ * Throws
+ * An exception with the specified 'cookie' value.
+ */
+extern void bpf_throw(u64 cookie) __ksym;
+
+/* This macro must be used to mark the exception callback corresponding to the
+ * main program. For example:
+ *
+ * int exception_cb(u64 cookie) {
+ * return cookie;
+ * }
+ *
+ * SEC("tc")
+ * __exception_cb(exception_cb)
+ * int main_prog(struct __sk_buff *ctx) {
+ * ...
+ * return TC_ACT_OK;
+ * }
+ *
+ * Here, exception callback for the main program will be 'exception_cb'. Note
+ * that this attribute can only be used once, and multiple exception callbacks
+ * specified for the main program will lead to verification error.
+ */
+#define __exception_cb(name) __attribute__((btf_decl_tag("exception_callback:" #name)))
+
+#define __bpf_assert_signed(x) _Generic((x), \
+ unsigned long: 0, \
+ unsigned long long: 0, \
+ signed long: 1, \
+ signed long long: 1 \
+)
+
+#define __bpf_assert_check(LHS, op, RHS) \
+ _Static_assert(sizeof(&(LHS)), "1st argument must be an lvalue expression"); \
+ _Static_assert(sizeof(LHS) == 8, "Only 8-byte integers are supported\n"); \
+ _Static_assert(__builtin_constant_p(__bpf_assert_signed(LHS)), "internal static assert"); \
+ _Static_assert(__builtin_constant_p((RHS)), "2nd argument must be a constant expression")
+
+#define __bpf_assert(LHS, op, cons, RHS, VAL) \
+ ({ \
+ (void)bpf_throw; \
+ asm volatile ("if %[lhs] " op " %[rhs] goto +2; r1 = %[value]; call bpf_throw" \
+ : : [lhs] "r"(LHS), [rhs] cons(RHS), [value] "ri"(VAL) : ); \
+ })
+
+#define __bpf_assert_op_sign(LHS, op, cons, RHS, VAL, supp_sign) \
+ ({ \
+ __bpf_assert_check(LHS, op, RHS); \
+ if (__bpf_assert_signed(LHS) && !(supp_sign)) \
+ __bpf_assert(LHS, "s" #op, cons, RHS, VAL); \
+ else \
+ __bpf_assert(LHS, #op, cons, RHS, VAL); \
+ })
+
+#define __bpf_assert_op(LHS, op, RHS, VAL, supp_sign) \
+ ({ \
+ if (sizeof(typeof(RHS)) == 8) { \
+ const typeof(RHS) rhs_var = (RHS); \
+ __bpf_assert_op_sign(LHS, op, "r", rhs_var, VAL, supp_sign); \
+ } else { \
+ __bpf_assert_op_sign(LHS, op, "i", RHS, VAL, supp_sign); \
+ } \
+ })
+
+/* Description
+ * Assert that a conditional expression is true.
+ * Returns
+ * Void.
+ * Throws
+ * An exception with the value zero when the assertion fails.
+ */
+#define bpf_assert(cond) if (!(cond)) bpf_throw(0);
+
+/* Description
+ * Assert that a conditional expression is true.
+ * Returns
+ * Void.
+ * Throws
+ * An exception with the specified value when the assertion fails.
+ */
+#define bpf_assert_with(cond, value) if (!(cond)) bpf_throw(value);
+
+/* Description
+ * Assert that LHS is equal to RHS. This statement updates the known value
+ * of LHS during verification. Note that RHS must be a constant value, and
+ * must fit within the data type of LHS.
+ * Returns
+ * Void.
+ * Throws
+ * An exception with the value zero when the assertion fails.
+ */
+#define bpf_assert_eq(LHS, RHS) \
+ ({ \
+ barrier_var(LHS); \
+ __bpf_assert_op(LHS, ==, RHS, 0, true); \
+ })
+
+/* Description
+ * Assert that LHS is equal to RHS. This statement updates the known value
+ * of LHS during verification. Note that RHS must be a constant value, and
+ * must fit within the data type of LHS.
+ * Returns
+ * Void.
+ * Throws
+ * An exception with the specified value when the assertion fails.
+ */
+#define bpf_assert_eq_with(LHS, RHS, value) \
+ ({ \
+ barrier_var(LHS); \
+ __bpf_assert_op(LHS, ==, RHS, value, true); \
+ })
+
+/* Description
+ * Assert that LHS is less than RHS. This statement updates the known
+ * bounds of LHS during verification. Note that RHS must be a constant
+ * value, and must fit within the data type of LHS.
+ * Returns
+ * Void.
+ * Throws
+ * An exception with the value zero when the assertion fails.
+ */
+#define bpf_assert_lt(LHS, RHS) \
+ ({ \
+ barrier_var(LHS); \
+ __bpf_assert_op(LHS, <, RHS, 0, false); \
+ })
+
+/* Description
+ * Assert that LHS is less than RHS. This statement updates the known
+ * bounds of LHS during verification. Note that RHS must be a constant
+ * value, and must fit within the data type of LHS.
+ * Returns
+ * Void.
+ * Throws
+ * An exception with the specified value when the assertion fails.
+ */
+#define bpf_assert_lt_with(LHS, RHS, value) \
+ ({ \
+ barrier_var(LHS); \
+ __bpf_assert_op(LHS, <, RHS, value, false); \
+ })
+
+/* Description
+ * Assert that LHS is greater than RHS. This statement updates the known
+ * bounds of LHS during verification. Note that RHS must be a constant
+ * value, and must fit within the data type of LHS.
+ * Returns
+ * Void.
+ * Throws
+ * An exception with the value zero when the assertion fails.
+ */
+#define bpf_assert_gt(LHS, RHS) \
+ ({ \
+ barrier_var(LHS); \
+ __bpf_assert_op(LHS, >, RHS, 0, false); \
+ })
+
+/* Description
+ * Assert that LHS is greater than RHS. This statement updates the known
+ * bounds of LHS during verification. Note that RHS must be a constant
+ * value, and must fit within the data type of LHS.
+ * Returns
+ * Void.
+ * Throws
+ * An exception with the specified value when the assertion fails.
+ */
+#define bpf_assert_gt_with(LHS, RHS, value) \
+ ({ \
+ barrier_var(LHS); \
+ __bpf_assert_op(LHS, >, RHS, value, false); \
+ })
+
+/* Description
+ * Assert that LHS is less than or equal to RHS. This statement updates the
+ * known bounds of LHS during verification. Note that RHS must be a
+ * constant value, and must fit within the data type of LHS.
+ * Returns
+ * Void.
+ * Throws
+ * An exception with the value zero when the assertion fails.
+ */
+#define bpf_assert_le(LHS, RHS) \
+ ({ \
+ barrier_var(LHS); \
+ __bpf_assert_op(LHS, <=, RHS, 0, false); \
+ })
+
+/* Description
+ * Assert that LHS is less than or equal to RHS. This statement updates the
+ * known bounds of LHS during verification. Note that RHS must be a
+ * constant value, and must fit within the data type of LHS.
+ * Returns
+ * Void.
+ * Throws
+ * An exception with the specified value when the assertion fails.
+ */
+#define bpf_assert_le_with(LHS, RHS, value) \
+ ({ \
+ barrier_var(LHS); \
+ __bpf_assert_op(LHS, <=, RHS, value, false); \
+ })
+
+/* Description
+ * Assert that LHS is greater than or equal to RHS. This statement updates
+ * the known bounds of LHS during verification. Note that RHS must be a
+ * constant value, and must fit within the data type of LHS.
+ * Returns
+ * Void.
+ * Throws
+ * An exception with the value zero when the assertion fails.
+ */
+#define bpf_assert_ge(LHS, RHS) \
+ ({ \
+ barrier_var(LHS); \
+ __bpf_assert_op(LHS, >=, RHS, 0, false); \
+ })
+
+/* Description
+ * Assert that LHS is greater than or equal to RHS. This statement updates
+ * the known bounds of LHS during verification. Note that RHS must be a
+ * constant value, and must fit within the data type of LHS.
+ * Returns
+ * Void.
+ * Throws
+ * An exception with the specified value when the assertion fails.
+ */
+#define bpf_assert_ge_with(LHS, RHS, value) \
+ ({ \
+ barrier_var(LHS); \
+ __bpf_assert_op(LHS, >=, RHS, value, false); \
+ })
+
+/* Description
+ * Assert that LHS is in the range [BEG, END] (inclusive of both). This
+ * statement updates the known bounds of LHS during verification. Note
+ * that both BEG and END must be constant values, and must fit within the
+ * data type of LHS.
+ * Returns
+ * Void.
+ * Throws
+ * An exception with the value zero when the assertion fails.
+ */
+#define bpf_assert_range(LHS, BEG, END) \
+ ({ \
+ _Static_assert(BEG <= END, "BEG must be <= END"); \
+ barrier_var(LHS); \
+ __bpf_assert_op(LHS, >=, BEG, 0, false); \
+ __bpf_assert_op(LHS, <=, END, 0, false); \
+ })
+
+/* Description
+ * Assert that LHS is in the range [BEG, END] (inclusive of both). This
+ * statement updates the known bounds of LHS during verification. Note
+ * that both BEG and END must be constant values, and must fit within the
+ * data type of LHS.
+ * Returns
+ * Void.
+ * Throws
+ * An exception with the specified value when the assertion fails.
+ */
+#define bpf_assert_range_with(LHS, BEG, END, value) \
+ ({ \
+ _Static_assert(BEG <= END, "BEG must be <= END"); \
+ barrier_var(LHS); \
+ __bpf_assert_op(LHS, >=, BEG, value, false); \
+ __bpf_assert_op(LHS, <=, END, value, false); \
+ })
+
#endif
diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h
index 642dda0e758a..5ca68ff0b59f 100644
--- a/tools/testing/selftests/bpf/bpf_kfuncs.h
+++ b/tools/testing/selftests/bpf/bpf_kfuncs.h
@@ -1,6 +1,8 @@
#ifndef __BPF_KFUNCS__
#define __BPF_KFUNCS__
+struct bpf_sock_addr_kern;
+
/* Description
* Initializes an skb-type dynptr
* Returns
@@ -41,4 +43,16 @@ extern bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *ptr) __ksym;
extern __u32 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym;
extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clone__init) __ksym;
+/* Description
+ * Modify the address of a AF_UNIX sockaddr.
+ * Returns__bpf_kfunc
+ * -EINVAL if the address size is too big or, 0 if the sockaddr was successfully modified.
+ */
+extern int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern,
+ const __u8 *sun_path, __u32 sun_path__sz) __ksym;
+
+void *bpf_cast_to_kern_ctx(void *) __ksym;
+
+void *bpf_rdonly_cast(void *obj, __u32 btf_id) __ksym;
+
#endif
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
index cefc5dd72573..a5e246f7b202 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -138,6 +138,10 @@ __bpf_kfunc void bpf_iter_testmod_seq_destroy(struct bpf_iter_testmod_seq *it)
it->cnt = 0;
}
+__bpf_kfunc void bpf_kfunc_common_test(void)
+{
+}
+
struct bpf_testmod_btf_type_tag_1 {
int a;
};
@@ -343,6 +347,7 @@ BTF_SET8_START(bpf_testmod_common_kfunc_ids)
BTF_ID_FLAGS(func, bpf_iter_testmod_seq_new, KF_ITER_NEW)
BTF_ID_FLAGS(func, bpf_iter_testmod_seq_next, KF_ITER_NEXT | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_iter_testmod_seq_destroy, KF_ITER_DESTROY)
+BTF_ID_FLAGS(func, bpf_kfunc_common_test)
BTF_SET8_END(bpf_testmod_common_kfunc_ids)
static const struct btf_kfunc_id_set bpf_testmod_common_kfunc_set = {
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h
index f5c5b1375c24..7c664dd61059 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h
@@ -104,4 +104,6 @@ void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p);
void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p);
void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p);
void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len);
+
+void bpf_kfunc_common_test(void) __ksym;
#endif /* _BPF_TESTMOD_KFUNC_H */
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index 2caee8423ee0..5b1da2a32ea7 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -49,6 +49,10 @@
snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH, \
CGROUP_WORK_DIR)
+static __thread bool cgroup_workdir_mounted;
+
+static void __cleanup_cgroup_environment(void);
+
static int __enable_controllers(const char *cgroup_path, const char *controllers)
{
char path[PATH_MAX + 1];
@@ -195,6 +199,11 @@ int setup_cgroup_environment(void)
format_cgroup_path(cgroup_workdir, "");
+ if (mkdir(CGROUP_MOUNT_PATH, 0777) && errno != EEXIST) {
+ log_err("mkdir mount");
+ return 1;
+ }
+
if (unshare(CLONE_NEWNS)) {
log_err("unshare");
return 1;
@@ -209,9 +218,10 @@ int setup_cgroup_environment(void)
log_err("mount cgroup2");
return 1;
}
+ cgroup_workdir_mounted = true;
/* Cleanup existing failed runs, now that the environment is setup */
- cleanup_cgroup_environment();
+ __cleanup_cgroup_environment();
if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {
log_err("mkdir cgroup work dir");
@@ -306,10 +316,25 @@ int join_parent_cgroup(const char *relative_path)
}
/**
+ * __cleanup_cgroup_environment() - Delete temporary cgroups
+ *
+ * This is a helper for cleanup_cgroup_environment() that is responsible for
+ * deletion of all temporary cgroups that have been created during the test.
+ */
+static void __cleanup_cgroup_environment(void)
+{
+ char cgroup_workdir[PATH_MAX + 1];
+
+ format_cgroup_path(cgroup_workdir, "");
+ join_cgroup_from_top(CGROUP_MOUNT_PATH);
+ nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
+}
+
+/**
* cleanup_cgroup_environment() - Cleanup Cgroup Testing Environment
*
* This is an idempotent function to delete all temporary cgroups that
- * have been created during the test, including the cgroup testing work
+ * have been created during the test and unmount the cgroup testing work
* directory.
*
* At call time, it moves the calling process to the root cgroup, and then
@@ -320,11 +345,10 @@ int join_parent_cgroup(const char *relative_path)
*/
void cleanup_cgroup_environment(void)
{
- char cgroup_workdir[PATH_MAX + 1];
-
- format_cgroup_path(cgroup_workdir, "");
- join_cgroup_from_top(CGROUP_MOUNT_PATH);
- nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
+ __cleanup_cgroup_environment();
+ if (cgroup_workdir_mounted && umount(CGROUP_MOUNT_PATH))
+ log_err("umount cgroup2");
+ cgroup_workdir_mounted = false;
}
/**
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index e41eb33b2704..02dd4409200e 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -84,3 +84,4 @@ CONFIG_USERFAULTFD=y
CONFIG_VXLAN=y
CONFIG_XDP_SOCKETS=y
CONFIG_XFRM_INTERFACE=y
+CONFIG_VSOCKETS=y
diff --git a/tools/testing/selftests/bpf/liburandom_read.map b/tools/testing/selftests/bpf/liburandom_read.map
new file mode 100644
index 000000000000..38a97a419a04
--- /dev/null
+++ b/tools/testing/selftests/bpf/liburandom_read.map
@@ -0,0 +1,15 @@
+LIBURANDOM_READ_1.0.0 {
+ global:
+ urandlib_api;
+ urandlib_api_sameoffset;
+ urandlib_read_without_sema;
+ urandlib_read_with_sema;
+ urandlib_read_with_sema_semaphore;
+ local:
+ *;
+};
+
+LIBURANDOM_READ_2.0.0 {
+ global:
+ urandlib_api;
+} LIBURANDOM_READ_1.0.0;
diff --git a/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c
index 16f1671e4bde..66191ae9863c 100644
--- a/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c
+++ b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c
@@ -33,11 +33,11 @@ static void create_inner_maps(enum bpf_map_type map_type,
{
int map_fd, map_index, ret;
__u32 map_key = 0, map_id;
- char map_name[15];
+ char map_name[16];
for (map_index = 0; map_index < OUTER_MAP_ENTRIES; map_index++) {
memset(map_name, 0, sizeof(map_name));
- sprintf(map_name, "inner_map_fd_%d", map_index);
+ snprintf(map_name, sizeof(map_name), "inner_map_fd_%d", map_index);
map_fd = bpf_map_create(map_type, map_name, sizeof(__u32),
sizeof(__u32), 1, NULL);
CHECK(map_fd < 0,
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index da72a3a66230..6db27a9088e9 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -11,6 +11,7 @@
#include <arpa/inet.h>
#include <sys/mount.h>
#include <sys/stat.h>
+#include <sys/un.h>
#include <linux/err.h>
#include <linux/in.h>
@@ -257,6 +258,26 @@ static int connect_fd_to_addr(int fd,
return 0;
}
+int connect_to_addr(const struct sockaddr_storage *addr, socklen_t addrlen, int type)
+{
+ int fd;
+
+ fd = socket(addr->ss_family, type, 0);
+ if (fd < 0) {
+ log_err("Failed to create client socket");
+ return -1;
+ }
+
+ if (connect_fd_to_addr(fd, addr, addrlen, false))
+ goto error_close;
+
+ return fd;
+
+error_close:
+ save_errno_close(fd);
+ return -1;
+}
+
static const struct network_helper_opts default_opts;
int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts)
@@ -380,6 +401,19 @@ int make_sockaddr(int family, const char *addr_str, __u16 port,
if (len)
*len = sizeof(*sin6);
return 0;
+ } else if (family == AF_UNIX) {
+ /* Note that we always use abstract unix sockets to avoid having
+ * to clean up leftover files.
+ */
+ struct sockaddr_un *sun = (void *)addr;
+
+ memset(addr, 0, sizeof(*sun));
+ sun->sun_family = family;
+ sun->sun_path[0] = 0;
+ strcpy(sun->sun_path + 1, addr_str);
+ if (len)
+ *len = offsetof(struct sockaddr_un, sun_path) + 1 + strlen(addr_str);
+ return 0;
}
return -1;
}
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index 5eccc67d1a99..34f1200a781b 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -51,6 +51,7 @@ int *start_reuseport_server(int family, int type, const char *addr_str,
__u16 port, int timeout_ms,
unsigned int nr_listens);
void free_fds(int *fds, unsigned int nr_close_fds);
+int connect_to_addr(const struct sockaddr_storage *addr, socklen_t len, int type);
int connect_to_fd(int server_fd, int timeout_ms);
int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts);
int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms);
diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c
index b92770592563..465c1c3a3d3c 100644
--- a/tools/testing/selftests/bpf/prog_tests/align.c
+++ b/tools/testing/selftests/bpf/prog_tests/align.c
@@ -6,6 +6,7 @@
struct bpf_reg_match {
unsigned int line;
+ const char *reg;
const char *match;
};
@@ -39,13 +40,13 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {0, "R1=ctx(off=0,imm=0)"},
- {0, "R10=fp0"},
- {0, "R3_w=2"},
- {1, "R3_w=4"},
- {2, "R3_w=8"},
- {3, "R3_w=16"},
- {4, "R3_w=32"},
+ {0, "R1", "ctx(off=0,imm=0)"},
+ {0, "R10", "fp0"},
+ {0, "R3_w", "2"},
+ {1, "R3_w", "4"},
+ {2, "R3_w", "8"},
+ {3, "R3_w", "16"},
+ {4, "R3_w", "32"},
},
},
{
@@ -67,19 +68,19 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {0, "R1=ctx(off=0,imm=0)"},
- {0, "R10=fp0"},
- {0, "R3_w=1"},
- {1, "R3_w=2"},
- {2, "R3_w=4"},
- {3, "R3_w=8"},
- {4, "R3_w=16"},
- {5, "R3_w=1"},
- {6, "R4_w=32"},
- {7, "R4_w=16"},
- {8, "R4_w=8"},
- {9, "R4_w=4"},
- {10, "R4_w=2"},
+ {0, "R1", "ctx(off=0,imm=0)"},
+ {0, "R10", "fp0"},
+ {0, "R3_w", "1"},
+ {1, "R3_w", "2"},
+ {2, "R3_w", "4"},
+ {3, "R3_w", "8"},
+ {4, "R3_w", "16"},
+ {5, "R3_w", "1"},
+ {6, "R4_w", "32"},
+ {7, "R4_w", "16"},
+ {8, "R4_w", "8"},
+ {9, "R4_w", "4"},
+ {10, "R4_w", "2"},
},
},
{
@@ -96,14 +97,14 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {0, "R1=ctx(off=0,imm=0)"},
- {0, "R10=fp0"},
- {0, "R3_w=4"},
- {1, "R3_w=8"},
- {2, "R3_w=10"},
- {3, "R4_w=8"},
- {4, "R4_w=12"},
- {5, "R4_w=14"},
+ {0, "R1", "ctx(off=0,imm=0)"},
+ {0, "R10", "fp0"},
+ {0, "R3_w", "4"},
+ {1, "R3_w", "8"},
+ {2, "R3_w", "10"},
+ {3, "R4_w", "8"},
+ {4, "R4_w", "12"},
+ {5, "R4_w", "14"},
},
},
{
@@ -118,12 +119,12 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {0, "R1=ctx(off=0,imm=0)"},
- {0, "R10=fp0"},
- {0, "R3_w=7"},
- {1, "R3_w=7"},
- {2, "R3_w=14"},
- {3, "R3_w=56"},
+ {0, "R1", "ctx(off=0,imm=0)"},
+ {0, "R10", "fp0"},
+ {0, "R3_w", "7"},
+ {1, "R3_w", "7"},
+ {2, "R3_w", "14"},
+ {3, "R3_w", "56"},
},
},
@@ -161,19 +162,19 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {6, "R0_w=pkt(off=8,r=8,imm=0)"},
- {6, "R3_w=scalar(umax=255,var_off=(0x0; 0xff))"},
- {7, "R3_w=scalar(umax=510,var_off=(0x0; 0x1fe))"},
- {8, "R3_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"},
- {9, "R3_w=scalar(umax=2040,var_off=(0x0; 0x7f8))"},
- {10, "R3_w=scalar(umax=4080,var_off=(0x0; 0xff0))"},
- {12, "R3_w=pkt_end(off=0,imm=0)"},
- {17, "R4_w=scalar(umax=255,var_off=(0x0; 0xff))"},
- {18, "R4_w=scalar(umax=8160,var_off=(0x0; 0x1fe0))"},
- {19, "R4_w=scalar(umax=4080,var_off=(0x0; 0xff0))"},
- {20, "R4_w=scalar(umax=2040,var_off=(0x0; 0x7f8))"},
- {21, "R4_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"},
- {22, "R4_w=scalar(umax=510,var_off=(0x0; 0x1fe))"},
+ {6, "R0_w", "pkt(off=8,r=8,imm=0)"},
+ {6, "R3_w", "var_off=(0x0; 0xff)"},
+ {7, "R3_w", "var_off=(0x0; 0x1fe)"},
+ {8, "R3_w", "var_off=(0x0; 0x3fc)"},
+ {9, "R3_w", "var_off=(0x0; 0x7f8)"},
+ {10, "R3_w", "var_off=(0x0; 0xff0)"},
+ {12, "R3_w", "pkt_end(off=0,imm=0)"},
+ {17, "R4_w", "var_off=(0x0; 0xff)"},
+ {18, "R4_w", "var_off=(0x0; 0x1fe0)"},
+ {19, "R4_w", "var_off=(0x0; 0xff0)"},
+ {20, "R4_w", "var_off=(0x0; 0x7f8)"},
+ {21, "R4_w", "var_off=(0x0; 0x3fc)"},
+ {22, "R4_w", "var_off=(0x0; 0x1fe)"},
},
},
{
@@ -194,16 +195,16 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {6, "R3_w=scalar(umax=255,var_off=(0x0; 0xff))"},
- {7, "R4_w=scalar(id=1,umax=255,var_off=(0x0; 0xff))"},
- {8, "R4_w=scalar(umax=255,var_off=(0x0; 0xff))"},
- {9, "R4_w=scalar(id=1,umax=255,var_off=(0x0; 0xff))"},
- {10, "R4_w=scalar(umax=510,var_off=(0x0; 0x1fe))"},
- {11, "R4_w=scalar(id=1,umax=255,var_off=(0x0; 0xff))"},
- {12, "R4_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"},
- {13, "R4_w=scalar(id=1,umax=255,var_off=(0x0; 0xff))"},
- {14, "R4_w=scalar(umax=2040,var_off=(0x0; 0x7f8))"},
- {15, "R4_w=scalar(umax=4080,var_off=(0x0; 0xff0))"},
+ {6, "R3_w", "var_off=(0x0; 0xff)"},
+ {7, "R4_w", "var_off=(0x0; 0xff)"},
+ {8, "R4_w", "var_off=(0x0; 0xff)"},
+ {9, "R4_w", "var_off=(0x0; 0xff)"},
+ {10, "R4_w", "var_off=(0x0; 0x1fe)"},
+ {11, "R4_w", "var_off=(0x0; 0xff)"},
+ {12, "R4_w", "var_off=(0x0; 0x3fc)"},
+ {13, "R4_w", "var_off=(0x0; 0xff)"},
+ {14, "R4_w", "var_off=(0x0; 0x7f8)"},
+ {15, "R4_w", "var_off=(0x0; 0xff0)"},
},
},
{
@@ -234,14 +235,14 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {2, "R5_w=pkt(off=0,r=0,imm=0)"},
- {4, "R5_w=pkt(off=14,r=0,imm=0)"},
- {5, "R4_w=pkt(off=14,r=0,imm=0)"},
- {9, "R2=pkt(off=0,r=18,imm=0)"},
- {10, "R5=pkt(off=14,r=18,imm=0)"},
- {10, "R4_w=scalar(umax=255,var_off=(0x0; 0xff))"},
- {13, "R4_w=scalar(umax=65535,var_off=(0x0; 0xffff))"},
- {14, "R4_w=scalar(umax=65535,var_off=(0x0; 0xffff))"},
+ {2, "R5_w", "pkt(off=0,r=0,imm=0)"},
+ {4, "R5_w", "pkt(off=14,r=0,imm=0)"},
+ {5, "R4_w", "pkt(off=14,r=0,imm=0)"},
+ {9, "R2", "pkt(off=0,r=18,imm=0)"},
+ {10, "R5", "pkt(off=14,r=18,imm=0)"},
+ {10, "R4_w", "var_off=(0x0; 0xff)"},
+ {13, "R4_w", "var_off=(0x0; 0xffff)"},
+ {14, "R4_w", "var_off=(0x0; 0xffff)"},
},
},
{
@@ -298,20 +299,20 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {6, "R2_w=pkt(off=0,r=8,imm=0)"},
- {7, "R6_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"},
+ {6, "R2_w", "pkt(off=0,r=8,imm=0)"},
+ {7, "R6_w", "var_off=(0x0; 0x3fc)"},
/* Offset is added to packet pointer R5, resulting in
* known fixed offset, and variable offset from R6.
*/
- {11, "R5_w=pkt(id=1,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"},
+ {11, "R5_w", "pkt(id=1,off=14,"},
/* At the time the word size load is performed from R5,
* it's total offset is NET_IP_ALIGN + reg->off (0) +
* reg->aux_off (14) which is 16. Then the variable
* offset is considered using reg->aux_off_align which
* is 4 and meets the load's requirements.
*/
- {15, "R4=pkt(id=1,off=18,r=18,umax=1020,var_off=(0x0; 0x3fc))"},
- {15, "R5=pkt(id=1,off=14,r=18,umax=1020,var_off=(0x0; 0x3fc))"},
+ {15, "R4", "var_off=(0x0; 0x3fc)"},
+ {15, "R5", "var_off=(0x0; 0x3fc)"},
/* Variable offset is added to R5 packet pointer,
* resulting in auxiliary alignment of 4. To avoid BPF
* verifier's precision backtracking logging
@@ -319,46 +320,46 @@ static struct bpf_align_test tests[] = {
* instruction to validate R5 state. We also check
* that R4 is what it should be in such case.
*/
- {18, "R4_w=pkt(id=2,off=0,r=0,umax=1020,var_off=(0x0; 0x3fc))"},
- {18, "R5_w=pkt(id=2,off=0,r=0,umax=1020,var_off=(0x0; 0x3fc))"},
+ {18, "R4_w", "var_off=(0x0; 0x3fc)"},
+ {18, "R5_w", "var_off=(0x0; 0x3fc)"},
/* Constant offset is added to R5, resulting in
* reg->off of 14.
*/
- {19, "R5_w=pkt(id=2,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"},
+ {19, "R5_w", "pkt(id=2,off=14,"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off
* (14) which is 16. Then the variable offset is 4-byte
* aligned, so the total offset is 4-byte aligned and
* meets the load's requirements.
*/
- {24, "R4=pkt(id=2,off=18,r=18,umax=1020,var_off=(0x0; 0x3fc))"},
- {24, "R5=pkt(id=2,off=14,r=18,umax=1020,var_off=(0x0; 0x3fc))"},
+ {24, "R4", "var_off=(0x0; 0x3fc)"},
+ {24, "R5", "var_off=(0x0; 0x3fc)"},
/* Constant offset is added to R5 packet pointer,
* resulting in reg->off value of 14.
*/
- {26, "R5_w=pkt(off=14,r=8"},
+ {26, "R5_w", "pkt(off=14,r=8,"},
/* Variable offset is added to R5, resulting in a
* variable offset of (4n). See comment for insn #18
* for R4 = R5 trick.
*/
- {28, "R4_w=pkt(id=3,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"},
- {28, "R5_w=pkt(id=3,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"},
+ {28, "R4_w", "var_off=(0x0; 0x3fc)"},
+ {28, "R5_w", "var_off=(0x0; 0x3fc)"},
/* Constant is added to R5 again, setting reg->off to 18. */
- {29, "R5_w=pkt(id=3,off=18,r=0,umax=1020,var_off=(0x0; 0x3fc))"},
+ {29, "R5_w", "pkt(id=3,off=18,"},
/* And once more we add a variable; resulting var_off
* is still (4n), fixed offset is not changed.
* Also, we create a new reg->id.
*/
- {31, "R4_w=pkt(id=4,off=18,r=0,umax=2040,var_off=(0x0; 0x7fc)"},
- {31, "R5_w=pkt(id=4,off=18,r=0,umax=2040,var_off=(0x0; 0x7fc)"},
+ {31, "R4_w", "var_off=(0x0; 0x7fc)"},
+ {31, "R5_w", "var_off=(0x0; 0x7fc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (18)
* which is 20. Then the variable offset is (4n), so
* the total offset is 4-byte aligned and meets the
* load's requirements.
*/
- {35, "R4=pkt(id=4,off=22,r=22,umax=2040,var_off=(0x0; 0x7fc)"},
- {35, "R5=pkt(id=4,off=18,r=22,umax=2040,var_off=(0x0; 0x7fc)"},
+ {35, "R4", "var_off=(0x0; 0x7fc)"},
+ {35, "R5", "var_off=(0x0; 0x7fc)"},
},
},
{
@@ -396,36 +397,36 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {6, "R2_w=pkt(off=0,r=8,imm=0)"},
- {7, "R6_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"},
+ {6, "R2_w", "pkt(off=0,r=8,imm=0)"},
+ {7, "R6_w", "var_off=(0x0; 0x3fc)"},
/* Adding 14 makes R6 be (4n+2) */
- {8, "R6_w=scalar(umin=14,umax=1034,var_off=(0x2; 0x7fc))"},
+ {8, "R6_w", "var_off=(0x2; 0x7fc)"},
/* Packet pointer has (4n+2) offset */
- {11, "R5_w=pkt(id=1,off=0,r=0,umin=14,umax=1034,var_off=(0x2; 0x7fc)"},
- {12, "R4=pkt(id=1,off=4,r=0,umin=14,umax=1034,var_off=(0x2; 0x7fc)"},
+ {11, "R5_w", "var_off=(0x2; 0x7fc)"},
+ {12, "R4", "var_off=(0x2; 0x7fc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
* which is 2. Then the variable offset is (4n+2), so
* the total offset is 4-byte aligned and meets the
* load's requirements.
*/
- {15, "R5=pkt(id=1,off=0,r=4,umin=14,umax=1034,var_off=(0x2; 0x7fc)"},
+ {15, "R5", "var_off=(0x2; 0x7fc)"},
/* Newly read value in R6 was shifted left by 2, so has
* known alignment of 4.
*/
- {17, "R6_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"},
+ {17, "R6_w", "var_off=(0x0; 0x3fc)"},
/* Added (4n) to packet pointer's (4n+2) var_off, giving
* another (4n+2).
*/
- {19, "R5_w=pkt(id=2,off=0,r=0,umin=14,umax=2054,var_off=(0x2; 0xffc)"},
- {20, "R4=pkt(id=2,off=4,r=0,umin=14,umax=2054,var_off=(0x2; 0xffc)"},
+ {19, "R5_w", "var_off=(0x2; 0xffc)"},
+ {20, "R4", "var_off=(0x2; 0xffc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
* which is 2. Then the variable offset is (4n+2), so
* the total offset is 4-byte aligned and meets the
* load's requirements.
*/
- {23, "R5=pkt(id=2,off=0,r=4,umin=14,umax=2054,var_off=(0x2; 0xffc)"},
+ {23, "R5", "var_off=(0x2; 0xffc)"},
},
},
{
@@ -458,18 +459,18 @@ static struct bpf_align_test tests[] = {
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
.matches = {
- {3, "R5_w=pkt_end(off=0,imm=0)"},
+ {3, "R5_w", "pkt_end(off=0,imm=0)"},
/* (ptr - ptr) << 2 == unknown, (4n) */
- {5, "R5_w=scalar(smax=9223372036854775804,umax=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc)"},
+ {5, "R5_w", "var_off=(0x0; 0xfffffffffffffffc)"},
/* (4n) + 14 == (4n+2). We blow our bounds, because
* the add could overflow.
*/
- {6, "R5_w=scalar(smin=-9223372036854775806,smax=9223372036854775806,umin=2,umax=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"},
+ {6, "R5_w", "var_off=(0x2; 0xfffffffffffffffc)"},
/* Checked s>=0 */
- {9, "R5=scalar(umin=2,umax=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"},
+ {9, "R5", "var_off=(0x2; 0x7ffffffffffffffc)"},
/* packet pointer + nonnegative (4n+2) */
- {11, "R6_w=pkt(id=1,off=0,r=0,umin=2,umax=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"},
- {12, "R4_w=pkt(id=1,off=4,r=0,umin=2,umax=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"},
+ {11, "R6_w", "var_off=(0x2; 0x7ffffffffffffffc)"},
+ {12, "R4_w", "var_off=(0x2; 0x7ffffffffffffffc)"},
/* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
* We checked the bounds, but it might have been able
* to overflow if the packet pointer started in the
@@ -477,7 +478,7 @@ static struct bpf_align_test tests[] = {
* So we did not get a 'range' on R6, and the access
* attempt will fail.
*/
- {15, "R6_w=pkt(id=1,off=0,r=0,umin=2,umax=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"},
+ {15, "R6_w", "var_off=(0x2; 0x7ffffffffffffffc)"},
}
},
{
@@ -512,24 +513,23 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {6, "R2_w=pkt(off=0,r=8,imm=0)"},
- {8, "R6_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"},
+ {6, "R2_w", "pkt(off=0,r=8,imm=0)"},
+ {8, "R6_w", "var_off=(0x0; 0x3fc)"},
/* Adding 14 makes R6 be (4n+2) */
- {9, "R6_w=scalar(umin=14,umax=1034,var_off=(0x2; 0x7fc))"},
+ {9, "R6_w", "var_off=(0x2; 0x7fc)"},
/* New unknown value in R7 is (4n) */
- {10, "R7_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"},
+ {10, "R7_w", "var_off=(0x0; 0x3fc)"},
/* Subtracting it from R6 blows our unsigned bounds */
- {11, "R6=scalar(smin=-1006,smax=1034,umin=2,umax=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"},
+ {11, "R6", "var_off=(0x2; 0xfffffffffffffffc)"},
/* Checked s>= 0 */
- {14, "R6=scalar(umin=2,umax=1034,var_off=(0x2; 0x7fc))"},
+ {14, "R6", "var_off=(0x2; 0x7fc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
* which is 2. Then the variable offset is (4n+2), so
* the total offset is 4-byte aligned and meets the
* load's requirements.
*/
- {20, "R5=pkt(id=2,off=0,r=4,umin=2,umax=1034,var_off=(0x2; 0x7fc)"},
-
+ {20, "R5", "var_off=(0x2; 0x7fc)"},
},
},
{
@@ -566,23 +566,23 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {6, "R2_w=pkt(off=0,r=8,imm=0)"},
- {9, "R6_w=scalar(umax=60,var_off=(0x0; 0x3c))"},
+ {6, "R2_w", "pkt(off=0,r=8,imm=0)"},
+ {9, "R6_w", "var_off=(0x0; 0x3c)"},
/* Adding 14 makes R6 be (4n+2) */
- {10, "R6_w=scalar(umin=14,umax=74,var_off=(0x2; 0x7c))"},
+ {10, "R6_w", "var_off=(0x2; 0x7c)"},
/* Subtracting from packet pointer overflows ubounds */
- {13, "R5_w=pkt(id=2,off=0,r=8,umin=18446744073709551542,umax=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c)"},
+ {13, "R5_w", "var_off=(0xffffffffffffff82; 0x7c)"},
/* New unknown value in R7 is (4n), >= 76 */
- {14, "R7_w=scalar(umin=76,umax=1096,var_off=(0x0; 0x7fc))"},
+ {14, "R7_w", "var_off=(0x0; 0x7fc)"},
/* Adding it to packet pointer gives nice bounds again */
- {16, "R5_w=pkt(id=3,off=0,r=0,umin=2,umax=1082,var_off=(0x2; 0x7fc)"},
+ {16, "R5_w", "var_off=(0x2; 0x7fc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
* which is 2. Then the variable offset is (4n+2), so
* the total offset is 4-byte aligned and meets the
* load's requirements.
*/
- {20, "R5=pkt(id=3,off=0,r=4,umin=2,umax=1082,var_off=(0x2; 0x7fc)"},
+ {20, "R5", "var_off=(0x2; 0x7fc)"},
},
},
};
@@ -635,6 +635,7 @@ static int do_test_single(struct bpf_align_test *test)
line_ptr = strtok(bpf_vlog_copy, "\n");
for (i = 0; i < MAX_MATCHES; i++) {
struct bpf_reg_match m = test->matches[i];
+ const char *p;
int tmp;
if (!m.match)
@@ -649,8 +650,8 @@ static int do_test_single(struct bpf_align_test *test)
line_ptr = strtok(NULL, "\n");
}
if (!line_ptr) {
- printf("Failed to find line %u for match: %s\n",
- m.line, m.match);
+ printf("Failed to find line %u for match: %s=%s\n",
+ m.line, m.reg, m.match);
ret = 1;
printf("%s", bpf_vlog);
break;
@@ -667,15 +668,15 @@ static int do_test_single(struct bpf_align_test *test)
* 6: R0_w=pkt(off=8,r=8,imm=0) R1=ctx(off=0,imm=0) R2_w=pkt(off=0,r=8,imm=0) R3_w=pkt_end(off=0,imm=0) R10=fp0
* 6: (71) r3 = *(u8 *)(r2 +0) ; R2_w=pkt(off=0,r=8,imm=0) R3_w=scalar(umax=255,var_off=(0x0; 0xff))
*/
- while (!strstr(line_ptr, m.match)) {
+ while (!(p = strstr(line_ptr, m.reg)) || !strstr(p, m.match)) {
cur_line = -1;
line_ptr = strtok(NULL, "\n");
sscanf(line_ptr ?: "", "%u: ", &cur_line);
if (!line_ptr || cur_line != m.line)
break;
}
- if (cur_line != m.line || !line_ptr || !strstr(line_ptr, m.match)) {
- printf("Failed to find match %u: %s\n", m.line, m.match);
+ if (cur_line != m.line || !line_ptr || !(p = strstr(line_ptr, m.reg)) || !strstr(p, m.match)) {
+ printf("Failed to find match %u: %s=%s\n", m.line, m.reg, m.match);
ret = 1;
printf("%s", bpf_vlog);
break;
diff --git a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c
index d2d9e965eba5..053f4d6da77a 100644
--- a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c
@@ -193,8 +193,8 @@ error:
void test_bloom_filter_map(void)
{
- __u32 *rand_vals, nr_rand_vals;
- struct bloom_filter_map *skel;
+ __u32 *rand_vals = NULL, nr_rand_vals = 0;
+ struct bloom_filter_map *skel = NULL;
int err;
test_fail_cases();
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 1f02168103dd..41aba139b20b 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -10,7 +10,7 @@
#include "bpf_iter_task.skel.h"
#include "bpf_iter_task_stack.skel.h"
#include "bpf_iter_task_file.skel.h"
-#include "bpf_iter_task_vma.skel.h"
+#include "bpf_iter_task_vmas.skel.h"
#include "bpf_iter_task_btf.skel.h"
#include "bpf_iter_tcp4.skel.h"
#include "bpf_iter_tcp6.skel.h"
@@ -1399,19 +1399,19 @@ static void str_strip_first_line(char *str)
static void test_task_vma_common(struct bpf_iter_attach_opts *opts)
{
int err, iter_fd = -1, proc_maps_fd = -1;
- struct bpf_iter_task_vma *skel;
+ struct bpf_iter_task_vmas *skel;
int len, read_size = 4;
char maps_path[64];
- skel = bpf_iter_task_vma__open();
- if (!ASSERT_OK_PTR(skel, "bpf_iter_task_vma__open"))
+ skel = bpf_iter_task_vmas__open();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_task_vmas__open"))
return;
skel->bss->pid = getpid();
skel->bss->one_task = opts ? 1 : 0;
- err = bpf_iter_task_vma__load(skel);
- if (!ASSERT_OK(err, "bpf_iter_task_vma__load"))
+ err = bpf_iter_task_vmas__load(skel);
+ if (!ASSERT_OK(err, "bpf_iter_task_vmas__load"))
goto out;
skel->links.proc_maps = bpf_program__attach_iter(
@@ -1462,25 +1462,25 @@ static void test_task_vma_common(struct bpf_iter_attach_opts *opts)
out:
close(proc_maps_fd);
close(iter_fd);
- bpf_iter_task_vma__destroy(skel);
+ bpf_iter_task_vmas__destroy(skel);
}
static void test_task_vma_dead_task(void)
{
- struct bpf_iter_task_vma *skel;
+ struct bpf_iter_task_vmas *skel;
int wstatus, child_pid = -1;
time_t start_tm, cur_tm;
int err, iter_fd = -1;
int wait_sec = 3;
- skel = bpf_iter_task_vma__open();
- if (!ASSERT_OK_PTR(skel, "bpf_iter_task_vma__open"))
+ skel = bpf_iter_task_vmas__open();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_task_vmas__open"))
return;
skel->bss->pid = getpid();
- err = bpf_iter_task_vma__load(skel);
- if (!ASSERT_OK(err, "bpf_iter_task_vma__load"))
+ err = bpf_iter_task_vmas__load(skel);
+ if (!ASSERT_OK(err, "bpf_iter_task_vmas__load"))
goto out;
skel->links.proc_maps = bpf_program__attach_iter(
@@ -1533,7 +1533,7 @@ static void test_task_vma_dead_task(void)
out:
waitpid(child_pid, &wstatus, 0);
close(iter_fd);
- bpf_iter_task_vma__destroy(skel);
+ bpf_iter_task_vmas__destroy(skel);
}
void test_bpf_sockmap_map_iter_fd(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 4e0cdb593318..92d51f377fe5 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -7296,7 +7296,7 @@ static struct btf_dedup_test dedup_tests[] = {
BTF_FUNC_PROTO_ENC(0, 2), /* [3] */
BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1),
BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(3), 1),
- BTF_FUNC_ENC(NAME_NTH(4), 2), /* [4] */
+ BTF_FUNC_ENC(NAME_NTH(4), 3), /* [4] */
/* tag -> t */
BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1), /* [5] */
BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1), /* [6] */
@@ -7317,7 +7317,7 @@ static struct btf_dedup_test dedup_tests[] = {
BTF_FUNC_PROTO_ENC(0, 2), /* [3] */
BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1),
BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(3), 1),
- BTF_FUNC_ENC(NAME_NTH(4), 2), /* [4] */
+ BTF_FUNC_ENC(NAME_NTH(4), 3), /* [4] */
BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1), /* [5] */
BTF_DECL_TAG_ENC(NAME_NTH(5), 4, -1), /* [6] */
BTF_DECL_TAG_ENC(NAME_NTH(5), 4, 1), /* [7] */
diff --git a/tools/testing/selftests/bpf/prog_tests/connect_ping.c b/tools/testing/selftests/bpf/prog_tests/connect_ping.c
index 289218c2216c..40fe571f2fe7 100644
--- a/tools/testing/selftests/bpf/prog_tests/connect_ping.c
+++ b/tools/testing/selftests/bpf/prog_tests/connect_ping.c
@@ -28,9 +28,9 @@ static void subtest(int cgroup_fd, struct connect_ping *skel,
.sin6_family = AF_INET6,
.sin6_addr = IN6ADDR_LOOPBACK_INIT,
};
- struct sockaddr *sa;
+ struct sockaddr *sa = NULL;
socklen_t sa_len;
- int protocol;
+ int protocol = -1;
int sock_fd;
switch (family) {
diff --git a/tools/testing/selftests/bpf/prog_tests/exceptions.c b/tools/testing/selftests/bpf/prog_tests/exceptions.c
new file mode 100644
index 000000000000..516f4a13013c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/exceptions.c
@@ -0,0 +1,409 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "exceptions.skel.h"
+#include "exceptions_ext.skel.h"
+#include "exceptions_fail.skel.h"
+#include "exceptions_assert.skel.h"
+
+static char log_buf[1024 * 1024];
+
+static void test_exceptions_failure(void)
+{
+ RUN_TESTS(exceptions_fail);
+}
+
+static void test_exceptions_success(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, ropts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct exceptions_ext *eskel = NULL;
+ struct exceptions *skel;
+ int ret;
+
+ skel = exceptions__open();
+ if (!ASSERT_OK_PTR(skel, "exceptions__open"))
+ return;
+
+ ret = exceptions__load(skel);
+ if (!ASSERT_OK(ret, "exceptions__load"))
+ goto done;
+
+ if (!ASSERT_OK(bpf_map_update_elem(bpf_map__fd(skel->maps.jmp_table), &(int){0},
+ &(int){bpf_program__fd(skel->progs.exception_tail_call_target)}, BPF_ANY),
+ "bpf_map_update_elem jmp_table"))
+ goto done;
+
+#define RUN_SUCCESS(_prog, return_val) \
+ if (!test__start_subtest(#_prog)) goto _prog##_##return_val; \
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs._prog), &ropts); \
+ ASSERT_OK(ret, #_prog " prog run ret"); \
+ ASSERT_EQ(ropts.retval, return_val, #_prog " prog run retval"); \
+ _prog##_##return_val:
+
+ RUN_SUCCESS(exception_throw_always_1, 64);
+ RUN_SUCCESS(exception_throw_always_2, 32);
+ RUN_SUCCESS(exception_throw_unwind_1, 16);
+ RUN_SUCCESS(exception_throw_unwind_2, 32);
+ RUN_SUCCESS(exception_throw_default, 0);
+ RUN_SUCCESS(exception_throw_default_value, 5);
+ RUN_SUCCESS(exception_tail_call, 24);
+ RUN_SUCCESS(exception_ext, 0);
+ RUN_SUCCESS(exception_ext_mod_cb_runtime, 35);
+ RUN_SUCCESS(exception_throw_subprog, 1);
+ RUN_SUCCESS(exception_assert_nz_gfunc, 1);
+ RUN_SUCCESS(exception_assert_zero_gfunc, 1);
+ RUN_SUCCESS(exception_assert_neg_gfunc, 1);
+ RUN_SUCCESS(exception_assert_pos_gfunc, 1);
+ RUN_SUCCESS(exception_assert_negeq_gfunc, 1);
+ RUN_SUCCESS(exception_assert_poseq_gfunc, 1);
+ RUN_SUCCESS(exception_assert_nz_gfunc_with, 1);
+ RUN_SUCCESS(exception_assert_zero_gfunc_with, 1);
+ RUN_SUCCESS(exception_assert_neg_gfunc_with, 1);
+ RUN_SUCCESS(exception_assert_pos_gfunc_with, 1);
+ RUN_SUCCESS(exception_assert_negeq_gfunc_with, 1);
+ RUN_SUCCESS(exception_assert_poseq_gfunc_with, 1);
+ RUN_SUCCESS(exception_bad_assert_nz_gfunc, 0);
+ RUN_SUCCESS(exception_bad_assert_zero_gfunc, 0);
+ RUN_SUCCESS(exception_bad_assert_neg_gfunc, 0);
+ RUN_SUCCESS(exception_bad_assert_pos_gfunc, 0);
+ RUN_SUCCESS(exception_bad_assert_negeq_gfunc, 0);
+ RUN_SUCCESS(exception_bad_assert_poseq_gfunc, 0);
+ RUN_SUCCESS(exception_bad_assert_nz_gfunc_with, 100);
+ RUN_SUCCESS(exception_bad_assert_zero_gfunc_with, 105);
+ RUN_SUCCESS(exception_bad_assert_neg_gfunc_with, 200);
+ RUN_SUCCESS(exception_bad_assert_pos_gfunc_with, 0);
+ RUN_SUCCESS(exception_bad_assert_negeq_gfunc_with, 101);
+ RUN_SUCCESS(exception_bad_assert_poseq_gfunc_with, 99);
+ RUN_SUCCESS(exception_assert_range, 1);
+ RUN_SUCCESS(exception_assert_range_with, 1);
+ RUN_SUCCESS(exception_bad_assert_range, 0);
+ RUN_SUCCESS(exception_bad_assert_range_with, 10);
+
+#define RUN_EXT(load_ret, attach_err, expr, msg, after_link) \
+ { \
+ LIBBPF_OPTS(bpf_object_open_opts, o, .kernel_log_buf = log_buf, \
+ .kernel_log_size = sizeof(log_buf), \
+ .kernel_log_level = 2); \
+ exceptions_ext__destroy(eskel); \
+ eskel = exceptions_ext__open_opts(&o); \
+ struct bpf_program *prog = NULL; \
+ struct bpf_link *link = NULL; \
+ if (!ASSERT_OK_PTR(eskel, "exceptions_ext__open")) \
+ goto done; \
+ (expr); \
+ ASSERT_OK_PTR(bpf_program__name(prog), bpf_program__name(prog)); \
+ if (!ASSERT_EQ(exceptions_ext__load(eskel), load_ret, \
+ "exceptions_ext__load")) { \
+ printf("%s\n", log_buf); \
+ goto done; \
+ } \
+ if (load_ret != 0) { \
+ if (!ASSERT_OK_PTR(strstr(log_buf, msg), "strstr")) { \
+ printf("%s\n", log_buf); \
+ goto done; \
+ } \
+ } \
+ if (!load_ret && attach_err) { \
+ if (!ASSERT_ERR_PTR(link = bpf_program__attach(prog), "attach err")) \
+ goto done; \
+ } else if (!load_ret) { \
+ if (!ASSERT_OK_PTR(link = bpf_program__attach(prog), "attach ok")) \
+ goto done; \
+ (void)(after_link); \
+ bpf_link__destroy(link); \
+ } \
+ }
+
+ if (test__start_subtest("non-throwing fentry -> exception_cb"))
+ RUN_EXT(-EINVAL, true, ({
+ prog = eskel->progs.pfentry;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_ext_mod_cb_runtime),
+ "exception_cb_mod"), "set_attach_target"))
+ goto done;
+ }), "FENTRY/FEXIT programs cannot attach to exception callback", 0);
+
+ if (test__start_subtest("throwing fentry -> exception_cb"))
+ RUN_EXT(-EINVAL, true, ({
+ prog = eskel->progs.throwing_fentry;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_ext_mod_cb_runtime),
+ "exception_cb_mod"), "set_attach_target"))
+ goto done;
+ }), "FENTRY/FEXIT programs cannot attach to exception callback", 0);
+
+ if (test__start_subtest("non-throwing fexit -> exception_cb"))
+ RUN_EXT(-EINVAL, true, ({
+ prog = eskel->progs.pfexit;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_ext_mod_cb_runtime),
+ "exception_cb_mod"), "set_attach_target"))
+ goto done;
+ }), "FENTRY/FEXIT programs cannot attach to exception callback", 0);
+
+ if (test__start_subtest("throwing fexit -> exception_cb"))
+ RUN_EXT(-EINVAL, true, ({
+ prog = eskel->progs.throwing_fexit;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_ext_mod_cb_runtime),
+ "exception_cb_mod"), "set_attach_target"))
+ goto done;
+ }), "FENTRY/FEXIT programs cannot attach to exception callback", 0);
+
+ if (test__start_subtest("throwing extension (with custom cb) -> exception_cb"))
+ RUN_EXT(-EINVAL, true, ({
+ prog = eskel->progs.throwing_exception_cb_extension;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_ext_mod_cb_runtime),
+ "exception_cb_mod"), "set_attach_target"))
+ goto done;
+ }), "Extension programs cannot attach to exception callback", 0);
+
+ if (test__start_subtest("throwing extension -> global func in exception_cb"))
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.throwing_exception_cb_extension;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_ext_mod_cb_runtime),
+ "exception_cb_mod_global"), "set_attach_target"))
+ goto done;
+ }), "", ({ RUN_SUCCESS(exception_ext_mod_cb_runtime, 131); }));
+
+ if (test__start_subtest("throwing extension (with custom cb) -> global func in exception_cb"))
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.throwing_extension;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_ext),
+ "exception_ext_global"), "set_attach_target"))
+ goto done;
+ }), "", ({ RUN_SUCCESS(exception_ext, 128); }));
+
+ if (test__start_subtest("non-throwing fentry -> non-throwing subprog"))
+ /* non-throwing fentry -> non-throwing subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.pfentry;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+ if (test__start_subtest("throwing fentry -> non-throwing subprog"))
+ /* throwing fentry -> non-throwing subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.throwing_fentry;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+ if (test__start_subtest("non-throwing fentry -> throwing subprog"))
+ /* non-throwing fentry -> throwing subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.pfentry;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "throwing_subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+ if (test__start_subtest("throwing fentry -> throwing subprog"))
+ /* throwing fentry -> throwing subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.throwing_fentry;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "throwing_subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+ if (test__start_subtest("non-throwing fexit -> non-throwing subprog"))
+ /* non-throwing fexit -> non-throwing subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.pfexit;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+ if (test__start_subtest("throwing fexit -> non-throwing subprog"))
+ /* throwing fexit -> non-throwing subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.throwing_fexit;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+ if (test__start_subtest("non-throwing fexit -> throwing subprog"))
+ /* non-throwing fexit -> throwing subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.pfexit;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "throwing_subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+ if (test__start_subtest("throwing fexit -> throwing subprog"))
+ /* throwing fexit -> throwing subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.throwing_fexit;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "throwing_subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+ /* fmod_ret not allowed for subprog - Check so we remember to handle its
+ * throwing specification compatibility with target when supported.
+ */
+ if (test__start_subtest("non-throwing fmod_ret -> non-throwing subprog"))
+ RUN_EXT(-EINVAL, true, ({
+ prog = eskel->progs.pfmod_ret;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "subprog"), "set_attach_target"))
+ goto done;
+ }), "can't modify return codes of BPF program", 0);
+
+ /* fmod_ret not allowed for subprog - Check so we remember to handle its
+ * throwing specification compatibility with target when supported.
+ */
+ if (test__start_subtest("non-throwing fmod_ret -> non-throwing global subprog"))
+ RUN_EXT(-EINVAL, true, ({
+ prog = eskel->progs.pfmod_ret;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "global_subprog"), "set_attach_target"))
+ goto done;
+ }), "can't modify return codes of BPF program", 0);
+
+ if (test__start_subtest("non-throwing extension -> non-throwing subprog"))
+ /* non-throwing extension -> non-throwing subprog : BAD (!global) */
+ RUN_EXT(-EINVAL, true, ({
+ prog = eskel->progs.extension;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "subprog"), "set_attach_target"))
+ goto done;
+ }), "subprog() is not a global function", 0);
+
+ if (test__start_subtest("non-throwing extension -> throwing subprog"))
+ /* non-throwing extension -> throwing subprog : BAD (!global) */
+ RUN_EXT(-EINVAL, true, ({
+ prog = eskel->progs.extension;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "throwing_subprog"), "set_attach_target"))
+ goto done;
+ }), "throwing_subprog() is not a global function", 0);
+
+ if (test__start_subtest("non-throwing extension -> non-throwing subprog"))
+ /* non-throwing extension -> non-throwing global subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.extension;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "global_subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+ if (test__start_subtest("non-throwing extension -> throwing global subprog"))
+ /* non-throwing extension -> throwing global subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.extension;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "throwing_global_subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+ if (test__start_subtest("throwing extension -> throwing global subprog"))
+ /* throwing extension -> throwing global subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.throwing_extension;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "throwing_global_subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+ if (test__start_subtest("throwing extension -> non-throwing global subprog"))
+ /* throwing extension -> non-throwing global subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.throwing_extension;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "global_subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+ if (test__start_subtest("non-throwing extension -> main subprog"))
+ /* non-throwing extension -> main subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.extension;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "exception_throw_subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+ if (test__start_subtest("throwing extension -> main subprog"))
+ /* throwing extension -> main subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.throwing_extension;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "exception_throw_subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+done:
+ exceptions_ext__destroy(eskel);
+ exceptions__destroy(skel);
+}
+
+static void test_exceptions_assertions(void)
+{
+ RUN_TESTS(exceptions_assert);
+}
+
+void test_exceptions(void)
+{
+ test_exceptions_success();
+ test_exceptions_failure();
+ test_exceptions_assertions();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
index 2fd05649bad1..4ad4cd69152e 100644
--- a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
@@ -11,9 +11,13 @@
#define NS_TEST "fib_lookup_ns"
#define IPV6_IFACE_ADDR "face::face"
+#define IPV6_IFACE_ADDR_SEC "cafe::cafe"
+#define IPV6_ADDR_DST "face::3"
#define IPV6_NUD_FAILED_ADDR "face::1"
#define IPV6_NUD_STALE_ADDR "face::2"
#define IPV4_IFACE_ADDR "10.0.0.254"
+#define IPV4_IFACE_ADDR_SEC "10.1.0.254"
+#define IPV4_ADDR_DST "10.2.0.254"
#define IPV4_NUD_FAILED_ADDR "10.0.0.1"
#define IPV4_NUD_STALE_ADDR "10.0.0.2"
#define IPV4_TBID_ADDR "172.0.0.254"
@@ -31,6 +35,7 @@ struct fib_lookup_test {
const char *desc;
const char *daddr;
int expected_ret;
+ const char *expected_src;
int lookup_flags;
__u32 tbid;
__u8 dmac[6];
@@ -69,6 +74,22 @@ static const struct fib_lookup_test tests[] = {
.daddr = IPV6_TBID_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
.lookup_flags = BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID, .tbid = 100,
.dmac = DMAC_INIT2, },
+ { .desc = "IPv4 set src addr from netdev",
+ .daddr = IPV4_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_src = IPV4_IFACE_ADDR,
+ .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, },
+ { .desc = "IPv6 set src addr from netdev",
+ .daddr = IPV6_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_src = IPV6_IFACE_ADDR,
+ .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, },
+ { .desc = "IPv4 set prefsrc addr from route",
+ .daddr = IPV4_ADDR_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_src = IPV4_IFACE_ADDR_SEC,
+ .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, },
+ { .desc = "IPv6 set prefsrc addr route",
+ .daddr = IPV6_ADDR_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_src = IPV6_IFACE_ADDR_SEC,
+ .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, },
};
static int ifindex;
@@ -97,6 +118,13 @@ static int setup_netns(void)
SYS(fail, "ip neigh add %s dev veth1 nud failed", IPV4_NUD_FAILED_ADDR);
SYS(fail, "ip neigh add %s dev veth1 lladdr %s nud stale", IPV4_NUD_STALE_ADDR, DMAC);
+ /* Setup for prefsrc IP addr selection */
+ SYS(fail, "ip addr add %s/24 dev veth1", IPV4_IFACE_ADDR_SEC);
+ SYS(fail, "ip route add %s/32 dev veth1 src %s", IPV4_ADDR_DST, IPV4_IFACE_ADDR_SEC);
+
+ SYS(fail, "ip addr add %s/64 dev veth1 nodad", IPV6_IFACE_ADDR_SEC);
+ SYS(fail, "ip route add %s/128 dev veth1 src %s", IPV6_ADDR_DST, IPV6_IFACE_ADDR_SEC);
+
/* Setup for tbid lookup tests */
SYS(fail, "ip addr add %s/24 dev veth2", IPV4_TBID_ADDR);
SYS(fail, "ip route del %s/24 dev veth2", IPV4_TBID_NET);
@@ -133,9 +161,12 @@ static int set_lookup_params(struct bpf_fib_lookup *params, const struct fib_loo
if (inet_pton(AF_INET6, test->daddr, params->ipv6_dst) == 1) {
params->family = AF_INET6;
- ret = inet_pton(AF_INET6, IPV6_IFACE_ADDR, params->ipv6_src);
- if (!ASSERT_EQ(ret, 1, "inet_pton(IPV6_IFACE_ADDR)"))
- return -1;
+ if (!(test->lookup_flags & BPF_FIB_LOOKUP_SRC)) {
+ ret = inet_pton(AF_INET6, IPV6_IFACE_ADDR, params->ipv6_src);
+ if (!ASSERT_EQ(ret, 1, "inet_pton(IPV6_IFACE_ADDR)"))
+ return -1;
+ }
+
return 0;
}
@@ -143,9 +174,12 @@ static int set_lookup_params(struct bpf_fib_lookup *params, const struct fib_loo
if (!ASSERT_EQ(ret, 1, "convert IP[46] address"))
return -1;
params->family = AF_INET;
- ret = inet_pton(AF_INET, IPV4_IFACE_ADDR, &params->ipv4_src);
- if (!ASSERT_EQ(ret, 1, "inet_pton(IPV4_IFACE_ADDR)"))
- return -1;
+
+ if (!(test->lookup_flags & BPF_FIB_LOOKUP_SRC)) {
+ ret = inet_pton(AF_INET, IPV4_IFACE_ADDR, &params->ipv4_src);
+ if (!ASSERT_EQ(ret, 1, "inet_pton(IPV4_IFACE_ADDR)"))
+ return -1;
+ }
return 0;
}
@@ -156,6 +190,40 @@ static void mac_str(char *b, const __u8 *mac)
mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
}
+static void assert_src_ip(struct bpf_fib_lookup *fib_params, const char *expected_src)
+{
+ int ret;
+ __u32 src6[4];
+ __be32 src4;
+
+ switch (fib_params->family) {
+ case AF_INET6:
+ ret = inet_pton(AF_INET6, expected_src, src6);
+ ASSERT_EQ(ret, 1, "inet_pton(expected_src)");
+
+ ret = memcmp(src6, fib_params->ipv6_src, sizeof(fib_params->ipv6_src));
+ if (!ASSERT_EQ(ret, 0, "fib_lookup ipv6 src")) {
+ char str_src6[64];
+
+ inet_ntop(AF_INET6, fib_params->ipv6_src, str_src6,
+ sizeof(str_src6));
+ printf("ipv6 expected %s actual %s ", expected_src,
+ str_src6);
+ }
+
+ break;
+ case AF_INET:
+ ret = inet_pton(AF_INET, expected_src, &src4);
+ ASSERT_EQ(ret, 1, "inet_pton(expected_src)");
+
+ ASSERT_EQ(fib_params->ipv4_src, src4, "fib_lookup ipv4 src");
+
+ break;
+ default:
+ PRINT_FAIL("invalid addr family: %d", fib_params->family);
+ }
+}
+
void test_fib_lookup(void)
{
struct bpf_fib_lookup *fib_params;
@@ -207,6 +275,9 @@ void test_fib_lookup(void)
ASSERT_EQ(skel->bss->fib_lookup_ret, tests[i].expected_ret,
"fib_lookup_ret");
+ if (tests[i].expected_src)
+ assert_src_ip(fib_params, tests[i].expected_src);
+
ret = memcmp(tests[i].dmac, fib_params->dmac, sizeof(tests[i].dmac));
if (!ASSERT_EQ(ret, 0, "dmac not match")) {
char expected[18], actual[18];
diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
index 9d768e083714..97142a4db374 100644
--- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
+++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
@@ -308,7 +308,7 @@ void test_fill_link_info(void)
return;
/* load kallsyms to compare the addr */
- if (!ASSERT_OK(load_kallsyms_refresh(), "load_kallsyms_refresh"))
+ if (!ASSERT_OK(load_kallsyms(), "load_kallsyms"))
goto cleanup;
kprobe_addr = ksym_get_addr(KPROBE_FUNC);
diff --git a/tools/testing/selftests/bpf/prog_tests/iters.c b/tools/testing/selftests/bpf/prog_tests/iters.c
index 10804ae5ae97..b696873c5455 100644
--- a/tools/testing/selftests/bpf/prog_tests/iters.c
+++ b/tools/testing/selftests/bpf/prog_tests/iters.c
@@ -8,6 +8,7 @@
#include "iters_looping.skel.h"
#include "iters_num.skel.h"
#include "iters_testmod_seq.skel.h"
+#include "iters_task_vma.skel.h"
static void subtest_num_iters(void)
{
@@ -90,6 +91,61 @@ cleanup:
iters_testmod_seq__destroy(skel);
}
+static void subtest_task_vma_iters(void)
+{
+ unsigned long start, end, bpf_iter_start, bpf_iter_end;
+ struct iters_task_vma *skel;
+ char rest_of_line[1000];
+ unsigned int seen;
+ FILE *f = NULL;
+ int err;
+
+ skel = iters_task_vma__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ skel->bss->target_pid = getpid();
+
+ err = iters_task_vma__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ getpgid(skel->bss->target_pid);
+ iters_task_vma__detach(skel);
+
+ if (!ASSERT_GT(skel->bss->vmas_seen, 0, "vmas_seen_gt_zero"))
+ goto cleanup;
+
+ f = fopen("/proc/self/maps", "r");
+ if (!ASSERT_OK_PTR(f, "proc_maps_fopen"))
+ goto cleanup;
+
+ seen = 0;
+ while (fscanf(f, "%lx-%lx %[^\n]\n", &start, &end, rest_of_line) == 3) {
+ /* [vsyscall] vma isn't _really_ part of task->mm vmas.
+ * /proc/PID/maps returns it when out of vmas - see get_gate_vma
+ * calls in fs/proc/task_mmu.c
+ */
+ if (strstr(rest_of_line, "[vsyscall]"))
+ continue;
+
+ bpf_iter_start = skel->bss->vm_ranges[seen].vm_start;
+ bpf_iter_end = skel->bss->vm_ranges[seen].vm_end;
+
+ ASSERT_EQ(bpf_iter_start, start, "vma->vm_start match");
+ ASSERT_EQ(bpf_iter_end, end, "vma->vm_end match");
+ seen++;
+ }
+
+ if (!ASSERT_EQ(skel->bss->vmas_seen, seen, "vmas_seen_eq"))
+ goto cleanup;
+
+cleanup:
+ if (f)
+ fclose(f);
+ iters_task_vma__destroy(skel);
+}
+
void test_iters(void)
{
RUN_TESTS(iters_state_safety);
@@ -103,4 +159,6 @@ void test_iters(void)
subtest_num_iters();
if (test__start_subtest("testmod_seq"))
subtest_testmod_seq_iters();
+ if (test__start_subtest("task_vma"))
+ subtest_task_vma_iters();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_testmod_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_testmod_test.c
index 1fbe7e4ac00a..9d03528f05db 100644
--- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_testmod_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_testmod_test.c
@@ -4,6 +4,8 @@
#include "trace_helpers.h"
#include "bpf/libbpf_internal.h"
+static struct ksyms *ksyms;
+
static void kprobe_multi_testmod_check(struct kprobe_multi *skel)
{
ASSERT_EQ(skel->bss->kprobe_testmod_test1_result, 1, "kprobe_test1_result");
@@ -50,12 +52,12 @@ static void test_testmod_attach_api_addrs(void)
LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
unsigned long long addrs[3];
- addrs[0] = ksym_get_addr("bpf_testmod_fentry_test1");
- ASSERT_NEQ(addrs[0], 0, "ksym_get_addr");
- addrs[1] = ksym_get_addr("bpf_testmod_fentry_test2");
- ASSERT_NEQ(addrs[1], 0, "ksym_get_addr");
- addrs[2] = ksym_get_addr("bpf_testmod_fentry_test3");
- ASSERT_NEQ(addrs[2], 0, "ksym_get_addr");
+ addrs[0] = ksym_get_addr_local(ksyms, "bpf_testmod_fentry_test1");
+ ASSERT_NEQ(addrs[0], 0, "ksym_get_addr_local");
+ addrs[1] = ksym_get_addr_local(ksyms, "bpf_testmod_fentry_test2");
+ ASSERT_NEQ(addrs[1], 0, "ksym_get_addr_local");
+ addrs[2] = ksym_get_addr_local(ksyms, "bpf_testmod_fentry_test3");
+ ASSERT_NEQ(addrs[2], 0, "ksym_get_addr_local");
opts.addrs = (const unsigned long *) addrs;
opts.cnt = ARRAY_SIZE(addrs);
@@ -79,11 +81,15 @@ static void test_testmod_attach_api_syms(void)
void serial_test_kprobe_multi_testmod_test(void)
{
- if (!ASSERT_OK(load_kallsyms_refresh(), "load_kallsyms_refresh"))
+ ksyms = load_kallsyms_local();
+ if (!ASSERT_OK_PTR(ksyms, "load_kallsyms_local"))
return;
if (test__start_subtest("testmod_attach_api_syms"))
test_testmod_attach_api_syms();
+
if (test__start_subtest("testmod_attach_api_addrs"))
test_testmod_attach_api_addrs();
+
+ free_kallsyms_local(ksyms);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c
index efb8bd43653c..c440ea3311ed 100644
--- a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c
+++ b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c
@@ -142,10 +142,14 @@ static void test_libbpf_bpf_map_type_str(void)
/* Special case for map_type_name BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED
* where it and BPF_MAP_TYPE_CGROUP_STORAGE have the same enum value
* (map_type). For this enum value, libbpf_bpf_map_type_str() picks
- * BPF_MAP_TYPE_CGROUP_STORAGE.
+ * BPF_MAP_TYPE_CGROUP_STORAGE. The same for
+ * BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED and
+ * BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE.
*/
if (strcmp(map_type_name, "BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED") == 0)
continue;
+ if (strcmp(map_type_name, "BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED") == 0)
+ continue;
ASSERT_STREQ(buf, map_type_name, "exp_str_value");
}
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c
index 18cf7b17463d..69dc31383b78 100644
--- a/tools/testing/selftests/bpf/prog_tests/linked_list.c
+++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c
@@ -65,8 +65,8 @@ static struct {
{ "map_compat_raw_tp", "tracing progs cannot use bpf_{list_head,rb_root} yet" },
{ "map_compat_raw_tp_w", "tracing progs cannot use bpf_{list_head,rb_root} yet" },
{ "obj_type_id_oor", "local type ID argument must be in range [0, U32_MAX]" },
- { "obj_new_no_composite", "bpf_obj_new type ID argument must be of a struct" },
- { "obj_new_no_struct", "bpf_obj_new type ID argument must be of a struct" },
+ { "obj_new_no_composite", "bpf_obj_new/bpf_percpu_obj_new type ID argument must be of a struct" },
+ { "obj_new_no_struct", "bpf_obj_new/bpf_percpu_obj_new type ID argument must be of a struct" },
{ "obj_drop_non_zero_off", "R1 must have zero offset when passed to release func" },
{ "new_null_ret", "R0 invalid mem access 'ptr_or_null_'" },
{ "obj_new_acq", "Unreleased reference id=" },
@@ -268,7 +268,7 @@ end:
static void list_and_rb_node_same_struct(bool refcount_field)
{
- int bpf_rb_node_btf_id, bpf_refcount_btf_id, foo_btf_id;
+ int bpf_rb_node_btf_id, bpf_refcount_btf_id = 0, foo_btf_id;
struct btf *btf;
int id, err;
diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h
index 61333f2a03f9..e9190574e79f 100644
--- a/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h
+++ b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h
@@ -49,7 +49,8 @@ static int open_tuntap(const char *dev_name, bool need_mac)
return -1;
ifr.ifr_flags = IFF_NO_PI | (need_mac ? IFF_TAP : IFF_TUN);
- memcpy(ifr.ifr_name, dev_name, IFNAMSIZ);
+ strncpy(ifr.ifr_name, dev_name, IFNAMSIZ - 1);
+ ifr.ifr_name[IFNAMSIZ - 1] = '\0';
err = ioctl(fd, TUNSETIFF, &ifr);
if (!ASSERT_OK(err, "ioctl(TUNSETIFF)")) {
diff --git a/tools/testing/selftests/bpf/prog_tests/missed.c b/tools/testing/selftests/bpf/prog_tests/missed.c
new file mode 100644
index 000000000000..70d90c43537c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/missed.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "missed_kprobe.skel.h"
+#include "missed_kprobe_recursion.skel.h"
+#include "missed_tp_recursion.skel.h"
+
+/*
+ * Putting kprobe on bpf_fentry_test1 that calls bpf_kfunc_common_test
+ * kfunc, which has also kprobe on. The latter won't get triggered due
+ * to kprobe recursion check and kprobe missed counter is incremented.
+ */
+static void test_missed_perf_kprobe(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct bpf_link_info info = {};
+ struct missed_kprobe *skel;
+ __u32 len = sizeof(info);
+ int err, prog_fd;
+
+ skel = missed_kprobe__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "missed_kprobe__open_and_load"))
+ goto cleanup;
+
+ err = missed_kprobe__attach(skel);
+ if (!ASSERT_OK(err, "missed_kprobe__attach"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.trigger);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
+
+ err = bpf_link_get_info_by_fd(bpf_link__fd(skel->links.test2), &info, &len);
+ if (!ASSERT_OK(err, "bpf_link_get_info_by_fd"))
+ goto cleanup;
+
+ ASSERT_EQ(info.type, BPF_LINK_TYPE_PERF_EVENT, "info.type");
+ ASSERT_EQ(info.perf_event.type, BPF_PERF_EVENT_KPROBE, "info.perf_event.type");
+ ASSERT_EQ(info.perf_event.kprobe.missed, 1, "info.perf_event.kprobe.missed");
+
+cleanup:
+ missed_kprobe__destroy(skel);
+}
+
+static __u64 get_missed_count(int fd)
+{
+ struct bpf_prog_info info = {};
+ __u32 len = sizeof(info);
+ int err;
+
+ err = bpf_prog_get_info_by_fd(fd, &info, &len);
+ if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd"))
+ return (__u64) -1;
+ return info.recursion_misses;
+}
+
+/*
+ * Putting kprobe.multi on bpf_fentry_test1 that calls bpf_kfunc_common_test
+ * kfunc which has 3 perf event kprobes and 1 kprobe.multi attached.
+ *
+ * Because fprobe (kprobe.multi attach layear) does not have strict recursion
+ * check the kprobe's bpf_prog_active check is hit for test2-5.
+ */
+static void test_missed_kprobe_recursion(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct missed_kprobe_recursion *skel;
+ int err, prog_fd;
+
+ skel = missed_kprobe_recursion__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "missed_kprobe_recursion__open_and_load"))
+ goto cleanup;
+
+ err = missed_kprobe_recursion__attach(skel);
+ if (!ASSERT_OK(err, "missed_kprobe_recursion__attach"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.trigger);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
+
+ ASSERT_EQ(get_missed_count(bpf_program__fd(skel->progs.test1)), 0, "test1_recursion_misses");
+ ASSERT_GE(get_missed_count(bpf_program__fd(skel->progs.test2)), 1, "test2_recursion_misses");
+ ASSERT_GE(get_missed_count(bpf_program__fd(skel->progs.test3)), 1, "test3_recursion_misses");
+ ASSERT_GE(get_missed_count(bpf_program__fd(skel->progs.test4)), 1, "test4_recursion_misses");
+ ASSERT_GE(get_missed_count(bpf_program__fd(skel->progs.test5)), 1, "test5_recursion_misses");
+
+cleanup:
+ missed_kprobe_recursion__destroy(skel);
+}
+
+/*
+ * Putting kprobe on bpf_fentry_test1 that calls bpf_printk and invokes
+ * bpf_trace_printk tracepoint. The bpf_trace_printk tracepoint has test[234]
+ * programs attached to it.
+ *
+ * Because kprobe execution goes through bpf_prog_active check, programs
+ * attached to the tracepoint will fail the recursion check and increment
+ * the recursion_misses stats.
+ */
+static void test_missed_tp_recursion(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct missed_tp_recursion *skel;
+ int err, prog_fd;
+
+ skel = missed_tp_recursion__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "missed_tp_recursion__open_and_load"))
+ goto cleanup;
+
+ err = missed_tp_recursion__attach(skel);
+ if (!ASSERT_OK(err, "missed_tp_recursion__attach"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.trigger);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
+
+ ASSERT_EQ(get_missed_count(bpf_program__fd(skel->progs.test1)), 0, "test1_recursion_misses");
+ ASSERT_EQ(get_missed_count(bpf_program__fd(skel->progs.test2)), 1, "test2_recursion_misses");
+ ASSERT_EQ(get_missed_count(bpf_program__fd(skel->progs.test3)), 1, "test3_recursion_misses");
+ ASSERT_EQ(get_missed_count(bpf_program__fd(skel->progs.test4)), 1, "test4_recursion_misses");
+
+cleanup:
+ missed_tp_recursion__destroy(skel);
+}
+
+void test_missed(void)
+{
+ if (test__start_subtest("perf_kprobe"))
+ test_missed_perf_kprobe();
+ if (test__start_subtest("kprobe_recursion"))
+ test_missed_kprobe_recursion();
+ if (test__start_subtest("tp_recursion"))
+ test_missed_tp_recursion();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/module_fentry_shadow.c b/tools/testing/selftests/bpf/prog_tests/module_fentry_shadow.c
index c7636e18b1eb..aa9f67eb1c95 100644
--- a/tools/testing/selftests/bpf/prog_tests/module_fentry_shadow.c
+++ b/tools/testing/selftests/bpf/prog_tests/module_fentry_shadow.c
@@ -61,6 +61,11 @@ void test_module_fentry_shadow(void)
int link_fd[2] = {};
__s32 btf_id[2] = {};
+ if (!env.has_testmod) {
+ test__skip();
+ return;
+ }
+
LIBBPF_OPTS(bpf_prog_load_opts, load_opts,
.expected_attach_type = BPF_TRACE_FENTRY,
);
diff --git a/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c b/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c
new file mode 100644
index 000000000000..343da65864d6
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "percpu_alloc_array.skel.h"
+#include "percpu_alloc_cgrp_local_storage.skel.h"
+#include "percpu_alloc_fail.skel.h"
+
+static void test_array(void)
+{
+ struct percpu_alloc_array *skel;
+ int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ skel = percpu_alloc_array__open();
+ if (!ASSERT_OK_PTR(skel, "percpu_alloc_array__open"))
+ return;
+
+ bpf_program__set_autoload(skel->progs.test_array_map_1, true);
+ bpf_program__set_autoload(skel->progs.test_array_map_2, true);
+ bpf_program__set_autoload(skel->progs.test_array_map_3, true);
+ bpf_program__set_autoload(skel->progs.test_array_map_4, true);
+
+ skel->bss->my_pid = getpid();
+ skel->rodata->nr_cpus = libbpf_num_possible_cpus();
+
+ err = percpu_alloc_array__load(skel);
+ if (!ASSERT_OK(err, "percpu_alloc_array__load"))
+ goto out;
+
+ err = percpu_alloc_array__attach(skel);
+ if (!ASSERT_OK(err, "percpu_alloc_array__attach"))
+ goto out;
+
+ prog_fd = bpf_program__fd(skel->progs.test_array_map_1);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run array_map 1-4");
+ ASSERT_EQ(topts.retval, 0, "test_run array_map 1-4");
+ ASSERT_EQ(skel->bss->cpu0_field_d, 2, "cpu0_field_d");
+ ASSERT_EQ(skel->bss->sum_field_c, 1, "sum_field_c");
+out:
+ percpu_alloc_array__destroy(skel);
+}
+
+static void test_array_sleepable(void)
+{
+ struct percpu_alloc_array *skel;
+ int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ skel = percpu_alloc_array__open();
+ if (!ASSERT_OK_PTR(skel, "percpu_alloc__open"))
+ return;
+
+ bpf_program__set_autoload(skel->progs.test_array_map_10, true);
+
+ skel->bss->my_pid = getpid();
+ skel->rodata->nr_cpus = libbpf_num_possible_cpus();
+
+ err = percpu_alloc_array__load(skel);
+ if (!ASSERT_OK(err, "percpu_alloc_array__load"))
+ goto out;
+
+ err = percpu_alloc_array__attach(skel);
+ if (!ASSERT_OK(err, "percpu_alloc_array__attach"))
+ goto out;
+
+ prog_fd = bpf_program__fd(skel->progs.test_array_map_10);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run array_map_10");
+ ASSERT_EQ(topts.retval, 0, "test_run array_map_10");
+ ASSERT_EQ(skel->bss->cpu0_field_d, 2, "cpu0_field_d");
+ ASSERT_EQ(skel->bss->sum_field_c, 1, "sum_field_c");
+out:
+ percpu_alloc_array__destroy(skel);
+}
+
+static void test_cgrp_local_storage(void)
+{
+ struct percpu_alloc_cgrp_local_storage *skel;
+ int err, cgroup_fd, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ cgroup_fd = test__join_cgroup("/percpu_alloc");
+ if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /percpu_alloc"))
+ return;
+
+ skel = percpu_alloc_cgrp_local_storage__open();
+ if (!ASSERT_OK_PTR(skel, "percpu_alloc_cgrp_local_storage__open"))
+ goto close_fd;
+
+ skel->bss->my_pid = getpid();
+ skel->rodata->nr_cpus = libbpf_num_possible_cpus();
+
+ err = percpu_alloc_cgrp_local_storage__load(skel);
+ if (!ASSERT_OK(err, "percpu_alloc_cgrp_local_storage__load"))
+ goto destroy_skel;
+
+ err = percpu_alloc_cgrp_local_storage__attach(skel);
+ if (!ASSERT_OK(err, "percpu_alloc_cgrp_local_storage__attach"))
+ goto destroy_skel;
+
+ prog_fd = bpf_program__fd(skel->progs.test_cgrp_local_storage_1);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run cgrp_local_storage 1-3");
+ ASSERT_EQ(topts.retval, 0, "test_run cgrp_local_storage 1-3");
+ ASSERT_EQ(skel->bss->cpu0_field_d, 2, "cpu0_field_d");
+ ASSERT_EQ(skel->bss->sum_field_c, 1, "sum_field_c");
+
+destroy_skel:
+ percpu_alloc_cgrp_local_storage__destroy(skel);
+close_fd:
+ close(cgroup_fd);
+}
+
+static void test_failure(void) {
+ RUN_TESTS(percpu_alloc_fail);
+}
+
+void test_percpu_alloc(void)
+{
+ if (test__start_subtest("array"))
+ test_array();
+ if (test__start_subtest("array_sleepable"))
+ test_array_sleepable();
+ if (test__start_subtest("cgrp_local_storage"))
+ test_cgrp_local_storage();
+ if (test__start_subtest("failure_tests"))
+ test_failure();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/preempted_bpf_ma_op.c b/tools/testing/selftests/bpf/prog_tests/preempted_bpf_ma_op.c
new file mode 100644
index 000000000000..3a2ec3923fca
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/preempted_bpf_ma_op.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <test_progs.h>
+
+#include "preempted_bpf_ma_op.skel.h"
+
+#define ALLOC_THREAD_NR 4
+#define ALLOC_LOOP_NR 512
+
+struct alloc_ctx {
+ /* output */
+ int run_err;
+ /* input */
+ int fd;
+ bool *nomem_err;
+};
+
+static void *run_alloc_prog(void *data)
+{
+ struct alloc_ctx *ctx = data;
+ cpu_set_t cpu_set;
+ int i;
+
+ CPU_ZERO(&cpu_set);
+ CPU_SET(0, &cpu_set);
+ pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
+
+ for (i = 0; i < ALLOC_LOOP_NR && !*ctx->nomem_err; i++) {
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err;
+
+ err = bpf_prog_test_run_opts(ctx->fd, &topts);
+ ctx->run_err |= err | topts.retval;
+ }
+
+ return NULL;
+}
+
+void test_preempted_bpf_ma_op(void)
+{
+ struct alloc_ctx ctx[ALLOC_THREAD_NR];
+ struct preempted_bpf_ma_op *skel;
+ pthread_t tid[ALLOC_THREAD_NR];
+ int i, err;
+
+ skel = preempted_bpf_ma_op__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ err = preempted_bpf_ma_op__attach(skel);
+ if (!ASSERT_OK(err, "attach"))
+ goto out;
+
+ for (i = 0; i < ARRAY_SIZE(ctx); i++) {
+ struct bpf_program *prog;
+ char name[8];
+
+ snprintf(name, sizeof(name), "test%d", i);
+ prog = bpf_object__find_program_by_name(skel->obj, name);
+ if (!ASSERT_OK_PTR(prog, "no test prog"))
+ goto out;
+
+ ctx[i].run_err = 0;
+ ctx[i].fd = bpf_program__fd(prog);
+ ctx[i].nomem_err = &skel->bss->nomem_err;
+ }
+
+ memset(tid, 0, sizeof(tid));
+ for (i = 0; i < ARRAY_SIZE(tid); i++) {
+ err = pthread_create(&tid[i], NULL, run_alloc_prog, &ctx[i]);
+ if (!ASSERT_OK(err, "pthread_create"))
+ break;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(tid); i++) {
+ if (!tid[i])
+ break;
+ pthread_join(tid[i], NULL);
+ ASSERT_EQ(ctx[i].run_err, 0, "run prog err");
+ }
+
+ ASSERT_FALSE(skel->bss->nomem_err, "ENOMEM");
+out:
+ preempted_bpf_ma_op__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
index 722c5f2a7776..a043af9cd6d9 100644
--- a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
@@ -14,7 +14,7 @@ static void test_queue_stack_map_by_type(int type)
int i, err, prog_fd, map_in_fd, map_out_fd;
char file[32], buf[128];
struct bpf_object *obj;
- struct iphdr iph;
+ struct iphdr iph = {};
LIBBPF_OPTS(bpf_test_run_opts, topts,
.data_in = &pkt_v4,
.data_size_in = sizeof(pkt_v4),
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
index ac104dc652e3..48c5695b7abf 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -91,6 +91,9 @@ static void ringbuf_subtest(void)
int err, cnt, rb_fd;
int page_size = getpagesize();
void *mmap_ptr, *tmp_ptr;
+ struct ring *ring;
+ int map_fd;
+ unsigned long avail_data, ring_size, cons_pos, prod_pos;
skel = test_ringbuf_lskel__open();
if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
@@ -162,6 +165,13 @@ static void ringbuf_subtest(void)
trigger_samples();
+ ring = ring_buffer__ring(ringbuf, 0);
+ if (!ASSERT_OK_PTR(ring, "ring_buffer__ring_idx_0"))
+ goto cleanup;
+
+ map_fd = ring__map_fd(ring);
+ ASSERT_EQ(map_fd, skel->maps.ringbuf.map_fd, "ring_map_fd");
+
/* 2 submitted + 1 discarded records */
CHECK(skel->bss->avail_data != 3 * rec_sz,
"err_avail_size", "exp %ld, got %ld\n",
@@ -176,6 +186,18 @@ static void ringbuf_subtest(void)
"err_prod_pos", "exp %ld, got %ld\n",
3L * rec_sz, skel->bss->prod_pos);
+ /* verify getting this data directly via the ring object yields the same
+ * results
+ */
+ avail_data = ring__avail_data_size(ring);
+ ASSERT_EQ(avail_data, 3 * rec_sz, "ring_avail_size");
+ ring_size = ring__size(ring);
+ ASSERT_EQ(ring_size, page_size, "ring_ring_size");
+ cons_pos = ring__consumer_pos(ring);
+ ASSERT_EQ(cons_pos, 0, "ring_cons_pos");
+ prod_pos = ring__producer_pos(ring);
+ ASSERT_EQ(prod_pos, 3 * rec_sz, "ring_prod_pos");
+
/* poll for samples */
err = ring_buffer__poll(ringbuf, -1);
@@ -282,6 +304,10 @@ static void ringbuf_subtest(void)
err = ring_buffer__consume(ringbuf);
CHECK(err < 0, "rb_consume", "failed: %d\b", err);
+ /* also consume using ring__consume to make sure it works the same */
+ err = ring__consume(ring);
+ ASSERT_GE(err, 0, "ring_consume");
+
/* 3 rounds, 2 samples each */
cnt = atomic_xchg(&sample_cnt, 0);
CHECK(cnt != 6, "cnt", "exp %d samples, got %d\n", 6, cnt);
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
index 1455911d9fcb..58522195081b 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
@@ -42,6 +42,8 @@ void test_ringbuf_multi(void)
{
struct test_ringbuf_multi *skel;
struct ring_buffer *ringbuf = NULL;
+ struct ring *ring_old;
+ struct ring *ring;
int err;
int page_size = getpagesize();
int proto_fd = -1;
@@ -84,11 +86,24 @@ void test_ringbuf_multi(void)
if (CHECK(!ringbuf, "ringbuf_create", "failed to create ringbuf\n"))
goto cleanup;
+ /* verify ring_buffer__ring returns expected results */
+ ring = ring_buffer__ring(ringbuf, 0);
+ if (!ASSERT_OK_PTR(ring, "ring_buffer__ring_idx_0"))
+ goto cleanup;
+ ring_old = ring;
+ ring = ring_buffer__ring(ringbuf, 1);
+ ASSERT_ERR_PTR(ring, "ring_buffer__ring_idx_1");
+
err = ring_buffer__add(ringbuf, bpf_map__fd(skel->maps.ringbuf2),
process_sample, (void *)(long)2);
if (CHECK(err, "ringbuf_add", "failed to add another ring\n"))
goto cleanup;
+ /* verify adding a new ring didn't invalidate our older pointer */
+ ring = ring_buffer__ring(ringbuf, 0);
+ if (!ASSERT_EQ(ring, ring_old, "ring_buffer__ring_again"))
+ goto cleanup;
+
err = test_ringbuf_multi__attach(skel);
if (CHECK(err, "skel_attach", "skeleton attachment failed: %d\n", err))
goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/section_names.c b/tools/testing/selftests/bpf/prog_tests/section_names.c
index 8b571890c57e..c3d78846f31a 100644
--- a/tools/testing/selftests/bpf/prog_tests/section_names.c
+++ b/tools/testing/selftests/bpf/prog_tests/section_names.c
@@ -124,6 +124,11 @@ static struct sec_name_test tests[] = {
{0, BPF_CGROUP_INET6_CONNECT},
},
{
+ "cgroup/connect_unix",
+ {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_CONNECT},
+ {0, BPF_CGROUP_UNIX_CONNECT},
+ },
+ {
"cgroup/sendmsg4",
{0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG},
{0, BPF_CGROUP_UDP4_SENDMSG},
@@ -134,6 +139,11 @@ static struct sec_name_test tests[] = {
{0, BPF_CGROUP_UDP6_SENDMSG},
},
{
+ "cgroup/sendmsg_unix",
+ {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_SENDMSG},
+ {0, BPF_CGROUP_UNIX_SENDMSG},
+ },
+ {
"cgroup/recvmsg4",
{0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG},
{0, BPF_CGROUP_UDP4_RECVMSG},
@@ -144,6 +154,11 @@ static struct sec_name_test tests[] = {
{0, BPF_CGROUP_UDP6_RECVMSG},
},
{
+ "cgroup/recvmsg_unix",
+ {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_RECVMSG},
+ {0, BPF_CGROUP_UNIX_RECVMSG},
+ },
+ {
"cgroup/sysctl",
{0, BPF_PROG_TYPE_CGROUP_SYSCTL, BPF_CGROUP_SYSCTL},
{0, BPF_CGROUP_SYSCTL},
@@ -158,6 +173,36 @@ static struct sec_name_test tests[] = {
{0, BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT},
{0, BPF_CGROUP_SETSOCKOPT},
},
+ {
+ "cgroup/getpeername4",
+ {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME},
+ {0, BPF_CGROUP_INET4_GETPEERNAME},
+ },
+ {
+ "cgroup/getpeername6",
+ {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME},
+ {0, BPF_CGROUP_INET6_GETPEERNAME},
+ },
+ {
+ "cgroup/getpeername_unix",
+ {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETPEERNAME},
+ {0, BPF_CGROUP_UNIX_GETPEERNAME},
+ },
+ {
+ "cgroup/getsockname4",
+ {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME},
+ {0, BPF_CGROUP_INET4_GETSOCKNAME},
+ },
+ {
+ "cgroup/getsockname6",
+ {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME},
+ {0, BPF_CGROUP_INET6_GETSOCKNAME},
+ },
+ {
+ "cgroup/getsockname_unix",
+ {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETSOCKNAME},
+ {0, BPF_CGROUP_UNIX_GETSOCKNAME},
+ },
};
static void test_prog_type_by_name(const struct sec_name_test *test)
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_addr.c b/tools/testing/selftests/bpf/prog_tests/sock_addr.c
new file mode 100644
index 000000000000..5fd617718991
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sock_addr.c
@@ -0,0 +1,612 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/un.h>
+
+#include "test_progs.h"
+
+#include "connect_unix_prog.skel.h"
+#include "sendmsg_unix_prog.skel.h"
+#include "recvmsg_unix_prog.skel.h"
+#include "getsockname_unix_prog.skel.h"
+#include "getpeername_unix_prog.skel.h"
+#include "network_helpers.h"
+
+#define SERVUN_ADDRESS "bpf_cgroup_unix_test"
+#define SERVUN_REWRITE_ADDRESS "bpf_cgroup_unix_test_rewrite"
+#define SRCUN_ADDRESS "bpf_cgroup_unix_test_src"
+
+enum sock_addr_test_type {
+ SOCK_ADDR_TEST_BIND,
+ SOCK_ADDR_TEST_CONNECT,
+ SOCK_ADDR_TEST_SENDMSG,
+ SOCK_ADDR_TEST_RECVMSG,
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ SOCK_ADDR_TEST_GETPEERNAME,
+};
+
+typedef void *(*load_fn)(int cgroup_fd);
+typedef void (*destroy_fn)(void *skel);
+
+struct sock_addr_test {
+ enum sock_addr_test_type type;
+ const char *name;
+ /* BPF prog properties */
+ load_fn loadfn;
+ destroy_fn destroyfn;
+ /* Socket properties */
+ int socket_family;
+ int socket_type;
+ /* IP:port pairs for BPF prog to override */
+ const char *requested_addr;
+ unsigned short requested_port;
+ const char *expected_addr;
+ unsigned short expected_port;
+ const char *expected_src_addr;
+};
+
+static void *connect_unix_prog_load(int cgroup_fd)
+{
+ struct connect_unix_prog *skel;
+
+ skel = connect_unix_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ skel->links.connect_unix_prog = bpf_program__attach_cgroup(
+ skel->progs.connect_unix_prog, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.connect_unix_prog, "prog_attach"))
+ goto cleanup;
+
+ return skel;
+cleanup:
+ connect_unix_prog__destroy(skel);
+ return NULL;
+}
+
+static void connect_unix_prog_destroy(void *skel)
+{
+ connect_unix_prog__destroy(skel);
+}
+
+static void *sendmsg_unix_prog_load(int cgroup_fd)
+{
+ struct sendmsg_unix_prog *skel;
+
+ skel = sendmsg_unix_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ skel->links.sendmsg_unix_prog = bpf_program__attach_cgroup(
+ skel->progs.sendmsg_unix_prog, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.sendmsg_unix_prog, "prog_attach"))
+ goto cleanup;
+
+ return skel;
+cleanup:
+ sendmsg_unix_prog__destroy(skel);
+ return NULL;
+}
+
+static void sendmsg_unix_prog_destroy(void *skel)
+{
+ sendmsg_unix_prog__destroy(skel);
+}
+
+static void *recvmsg_unix_prog_load(int cgroup_fd)
+{
+ struct recvmsg_unix_prog *skel;
+
+ skel = recvmsg_unix_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ skel->links.recvmsg_unix_prog = bpf_program__attach_cgroup(
+ skel->progs.recvmsg_unix_prog, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.recvmsg_unix_prog, "prog_attach"))
+ goto cleanup;
+
+ return skel;
+cleanup:
+ recvmsg_unix_prog__destroy(skel);
+ return NULL;
+}
+
+static void recvmsg_unix_prog_destroy(void *skel)
+{
+ recvmsg_unix_prog__destroy(skel);
+}
+
+static void *getsockname_unix_prog_load(int cgroup_fd)
+{
+ struct getsockname_unix_prog *skel;
+
+ skel = getsockname_unix_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ skel->links.getsockname_unix_prog = bpf_program__attach_cgroup(
+ skel->progs.getsockname_unix_prog, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.getsockname_unix_prog, "prog_attach"))
+ goto cleanup;
+
+ return skel;
+cleanup:
+ getsockname_unix_prog__destroy(skel);
+ return NULL;
+}
+
+static void getsockname_unix_prog_destroy(void *skel)
+{
+ getsockname_unix_prog__destroy(skel);
+}
+
+static void *getpeername_unix_prog_load(int cgroup_fd)
+{
+ struct getpeername_unix_prog *skel;
+
+ skel = getpeername_unix_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ skel->links.getpeername_unix_prog = bpf_program__attach_cgroup(
+ skel->progs.getpeername_unix_prog, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.getpeername_unix_prog, "prog_attach"))
+ goto cleanup;
+
+ return skel;
+cleanup:
+ getpeername_unix_prog__destroy(skel);
+ return NULL;
+}
+
+static void getpeername_unix_prog_destroy(void *skel)
+{
+ getpeername_unix_prog__destroy(skel);
+}
+
+static struct sock_addr_test tests[] = {
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect_unix",
+ connect_unix_prog_load,
+ connect_unix_prog_destroy,
+ AF_UNIX,
+ SOCK_STREAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg_unix",
+ sendmsg_unix_prog_load,
+ sendmsg_unix_prog_destroy,
+ AF_UNIX,
+ SOCK_DGRAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ },
+ {
+ SOCK_ADDR_TEST_RECVMSG,
+ "recvmsg_unix-dgram",
+ recvmsg_unix_prog_load,
+ recvmsg_unix_prog_destroy,
+ AF_UNIX,
+ SOCK_DGRAM,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ SERVUN_ADDRESS,
+ },
+ {
+ SOCK_ADDR_TEST_RECVMSG,
+ "recvmsg_unix-stream",
+ recvmsg_unix_prog_load,
+ recvmsg_unix_prog_destroy,
+ AF_UNIX,
+ SOCK_STREAM,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ SERVUN_ADDRESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ "getsockname_unix",
+ getsockname_unix_prog_load,
+ getsockname_unix_prog_destroy,
+ AF_UNIX,
+ SOCK_STREAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ },
+ {
+ SOCK_ADDR_TEST_GETPEERNAME,
+ "getpeername_unix",
+ getpeername_unix_prog_load,
+ getpeername_unix_prog_destroy,
+ AF_UNIX,
+ SOCK_STREAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ },
+};
+
+typedef int (*info_fn)(int, struct sockaddr *, socklen_t *);
+
+static int cmp_addr(const struct sockaddr_storage *addr1, socklen_t addr1_len,
+ const struct sockaddr_storage *addr2, socklen_t addr2_len,
+ bool cmp_port)
+{
+ const struct sockaddr_in *four1, *four2;
+ const struct sockaddr_in6 *six1, *six2;
+ const struct sockaddr_un *un1, *un2;
+
+ if (addr1->ss_family != addr2->ss_family)
+ return -1;
+
+ if (addr1_len != addr2_len)
+ return -1;
+
+ if (addr1->ss_family == AF_INET) {
+ four1 = (const struct sockaddr_in *)addr1;
+ four2 = (const struct sockaddr_in *)addr2;
+ return !((four1->sin_port == four2->sin_port || !cmp_port) &&
+ four1->sin_addr.s_addr == four2->sin_addr.s_addr);
+ } else if (addr1->ss_family == AF_INET6) {
+ six1 = (const struct sockaddr_in6 *)addr1;
+ six2 = (const struct sockaddr_in6 *)addr2;
+ return !((six1->sin6_port == six2->sin6_port || !cmp_port) &&
+ !memcmp(&six1->sin6_addr, &six2->sin6_addr,
+ sizeof(struct in6_addr)));
+ } else if (addr1->ss_family == AF_UNIX) {
+ un1 = (const struct sockaddr_un *)addr1;
+ un2 = (const struct sockaddr_un *)addr2;
+ return memcmp(un1, un2, addr1_len);
+ }
+
+ return -1;
+}
+
+static int cmp_sock_addr(info_fn fn, int sock1,
+ const struct sockaddr_storage *addr2,
+ socklen_t addr2_len, bool cmp_port)
+{
+ struct sockaddr_storage addr1;
+ socklen_t len1 = sizeof(addr1);
+
+ memset(&addr1, 0, len1);
+ if (fn(sock1, (struct sockaddr *)&addr1, (socklen_t *)&len1) != 0)
+ return -1;
+
+ return cmp_addr(&addr1, len1, addr2, addr2_len, cmp_port);
+}
+
+static int cmp_local_addr(int sock1, const struct sockaddr_storage *addr2,
+ socklen_t addr2_len, bool cmp_port)
+{
+ return cmp_sock_addr(getsockname, sock1, addr2, addr2_len, cmp_port);
+}
+
+static int cmp_peer_addr(int sock1, const struct sockaddr_storage *addr2,
+ socklen_t addr2_len, bool cmp_port)
+{
+ return cmp_sock_addr(getpeername, sock1, addr2, addr2_len, cmp_port);
+}
+
+static void test_bind(struct sock_addr_test *test)
+{
+ struct sockaddr_storage expected_addr;
+ socklen_t expected_addr_len = sizeof(struct sockaddr_storage);
+ int serv = -1, client = -1, err;
+
+ serv = start_server(test->socket_family, test->socket_type,
+ test->requested_addr, test->requested_port, 0);
+ if (!ASSERT_GE(serv, 0, "start_server"))
+ goto cleanup;
+
+ err = make_sockaddr(test->socket_family,
+ test->expected_addr, test->expected_port,
+ &expected_addr, &expected_addr_len);
+ if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+ goto cleanup;
+
+ err = cmp_local_addr(serv, &expected_addr, expected_addr_len, true);
+ if (!ASSERT_EQ(err, 0, "cmp_local_addr"))
+ goto cleanup;
+
+ /* Try to connect to server just in case */
+ client = connect_to_addr(&expected_addr, expected_addr_len, test->socket_type);
+ if (!ASSERT_GE(client, 0, "connect_to_addr"))
+ goto cleanup;
+
+cleanup:
+ if (client != -1)
+ close(client);
+ if (serv != -1)
+ close(serv);
+}
+
+static void test_connect(struct sock_addr_test *test)
+{
+ struct sockaddr_storage addr, expected_addr, expected_src_addr;
+ socklen_t addr_len = sizeof(struct sockaddr_storage),
+ expected_addr_len = sizeof(struct sockaddr_storage),
+ expected_src_addr_len = sizeof(struct sockaddr_storage);
+ int serv = -1, client = -1, err;
+
+ serv = start_server(test->socket_family, test->socket_type,
+ test->expected_addr, test->expected_port, 0);
+ if (!ASSERT_GE(serv, 0, "start_server"))
+ goto cleanup;
+
+ err = make_sockaddr(test->socket_family, test->requested_addr, test->requested_port,
+ &addr, &addr_len);
+ if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+ goto cleanup;
+
+ client = connect_to_addr(&addr, addr_len, test->socket_type);
+ if (!ASSERT_GE(client, 0, "connect_to_addr"))
+ goto cleanup;
+
+ err = make_sockaddr(test->socket_family, test->expected_addr, test->expected_port,
+ &expected_addr, &expected_addr_len);
+ if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+ goto cleanup;
+
+ if (test->expected_src_addr) {
+ err = make_sockaddr(test->socket_family, test->expected_src_addr, 0,
+ &expected_src_addr, &expected_src_addr_len);
+ if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+ goto cleanup;
+ }
+
+ err = cmp_peer_addr(client, &expected_addr, expected_addr_len, true);
+ if (!ASSERT_EQ(err, 0, "cmp_peer_addr"))
+ goto cleanup;
+
+ if (test->expected_src_addr) {
+ err = cmp_local_addr(client, &expected_src_addr, expected_src_addr_len, false);
+ if (!ASSERT_EQ(err, 0, "cmp_local_addr"))
+ goto cleanup;
+ }
+cleanup:
+ if (client != -1)
+ close(client);
+ if (serv != -1)
+ close(serv);
+}
+
+static void test_xmsg(struct sock_addr_test *test)
+{
+ struct sockaddr_storage addr, src_addr;
+ socklen_t addr_len = sizeof(struct sockaddr_storage),
+ src_addr_len = sizeof(struct sockaddr_storage);
+ struct msghdr hdr;
+ struct iovec iov;
+ char data = 'a';
+ int serv = -1, client = -1, err;
+
+ /* Unlike the other tests, here we test that we can rewrite the src addr
+ * with a recvmsg() hook.
+ */
+
+ serv = start_server(test->socket_family, test->socket_type,
+ test->expected_addr, test->expected_port, 0);
+ if (!ASSERT_GE(serv, 0, "start_server"))
+ goto cleanup;
+
+ client = socket(test->socket_family, test->socket_type, 0);
+ if (!ASSERT_GE(client, 0, "socket"))
+ goto cleanup;
+
+ /* AF_UNIX sockets have to be bound to something to trigger the recvmsg bpf program. */
+ if (test->socket_family == AF_UNIX) {
+ err = make_sockaddr(AF_UNIX, SRCUN_ADDRESS, 0, &src_addr, &src_addr_len);
+ if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+ goto cleanup;
+
+ err = bind(client, (const struct sockaddr *) &src_addr, src_addr_len);
+ if (!ASSERT_OK(err, "bind"))
+ goto cleanup;
+ }
+
+ err = make_sockaddr(test->socket_family, test->requested_addr, test->requested_port,
+ &addr, &addr_len);
+ if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+ goto cleanup;
+
+ if (test->socket_type == SOCK_DGRAM) {
+ memset(&iov, 0, sizeof(iov));
+ iov.iov_base = &data;
+ iov.iov_len = sizeof(data);
+
+ memset(&hdr, 0, sizeof(hdr));
+ hdr.msg_name = (void *)&addr;
+ hdr.msg_namelen = addr_len;
+ hdr.msg_iov = &iov;
+ hdr.msg_iovlen = 1;
+
+ err = sendmsg(client, &hdr, 0);
+ if (!ASSERT_EQ(err, sizeof(data), "sendmsg"))
+ goto cleanup;
+ } else {
+ /* Testing with connection-oriented sockets is only valid for
+ * recvmsg() tests.
+ */
+ if (!ASSERT_EQ(test->type, SOCK_ADDR_TEST_RECVMSG, "recvmsg"))
+ goto cleanup;
+
+ err = connect(client, (const struct sockaddr *)&addr, addr_len);
+ if (!ASSERT_OK(err, "connect"))
+ goto cleanup;
+
+ err = send(client, &data, sizeof(data), 0);
+ if (!ASSERT_EQ(err, sizeof(data), "send"))
+ goto cleanup;
+
+ err = listen(serv, 0);
+ if (!ASSERT_OK(err, "listen"))
+ goto cleanup;
+
+ err = accept(serv, NULL, NULL);
+ if (!ASSERT_GE(err, 0, "accept"))
+ goto cleanup;
+
+ close(serv);
+ serv = err;
+ }
+
+ addr_len = src_addr_len = sizeof(struct sockaddr_storage);
+
+ err = recvfrom(serv, &data, sizeof(data), 0, (struct sockaddr *) &src_addr, &src_addr_len);
+ if (!ASSERT_EQ(err, sizeof(data), "recvfrom"))
+ goto cleanup;
+
+ ASSERT_EQ(data, 'a', "data mismatch");
+
+ if (test->expected_src_addr) {
+ err = make_sockaddr(test->socket_family, test->expected_src_addr, 0,
+ &addr, &addr_len);
+ if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+ goto cleanup;
+
+ err = cmp_addr(&src_addr, src_addr_len, &addr, addr_len, false);
+ if (!ASSERT_EQ(err, 0, "cmp_addr"))
+ goto cleanup;
+ }
+
+cleanup:
+ if (client != -1)
+ close(client);
+ if (serv != -1)
+ close(serv);
+}
+
+static void test_getsockname(struct sock_addr_test *test)
+{
+ struct sockaddr_storage expected_addr;
+ socklen_t expected_addr_len = sizeof(struct sockaddr_storage);
+ int serv = -1, err;
+
+ serv = start_server(test->socket_family, test->socket_type,
+ test->requested_addr, test->requested_port, 0);
+ if (!ASSERT_GE(serv, 0, "start_server"))
+ goto cleanup;
+
+ err = make_sockaddr(test->socket_family,
+ test->expected_addr, test->expected_port,
+ &expected_addr, &expected_addr_len);
+ if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+ goto cleanup;
+
+ err = cmp_local_addr(serv, &expected_addr, expected_addr_len, true);
+ if (!ASSERT_EQ(err, 0, "cmp_local_addr"))
+ goto cleanup;
+
+cleanup:
+ if (serv != -1)
+ close(serv);
+}
+
+static void test_getpeername(struct sock_addr_test *test)
+{
+ struct sockaddr_storage addr, expected_addr;
+ socklen_t addr_len = sizeof(struct sockaddr_storage),
+ expected_addr_len = sizeof(struct sockaddr_storage);
+ int serv = -1, client = -1, err;
+
+ serv = start_server(test->socket_family, test->socket_type,
+ test->requested_addr, test->requested_port, 0);
+ if (!ASSERT_GE(serv, 0, "start_server"))
+ goto cleanup;
+
+ err = make_sockaddr(test->socket_family, test->requested_addr, test->requested_port,
+ &addr, &addr_len);
+ if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+ goto cleanup;
+
+ client = connect_to_addr(&addr, addr_len, test->socket_type);
+ if (!ASSERT_GE(client, 0, "connect_to_addr"))
+ goto cleanup;
+
+ err = make_sockaddr(test->socket_family, test->expected_addr, test->expected_port,
+ &expected_addr, &expected_addr_len);
+ if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+ goto cleanup;
+
+ err = cmp_peer_addr(client, &expected_addr, expected_addr_len, true);
+ if (!ASSERT_EQ(err, 0, "cmp_peer_addr"))
+ goto cleanup;
+
+cleanup:
+ if (client != -1)
+ close(client);
+ if (serv != -1)
+ close(serv);
+}
+
+void test_sock_addr(void)
+{
+ int cgroup_fd = -1;
+ void *skel;
+
+ cgroup_fd = test__join_cgroup("/sock_addr");
+ if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup"))
+ goto cleanup;
+
+ for (size_t i = 0; i < ARRAY_SIZE(tests); ++i) {
+ struct sock_addr_test *test = &tests[i];
+
+ if (!test__start_subtest(test->name))
+ continue;
+
+ skel = test->loadfn(cgroup_fd);
+ if (!skel)
+ continue;
+
+ switch (test->type) {
+ /* Not exercised yet but we leave this code here for when the
+ * INET and INET6 sockaddr tests are migrated to this file in
+ * the future.
+ */
+ case SOCK_ADDR_TEST_BIND:
+ test_bind(test);
+ break;
+ case SOCK_ADDR_TEST_CONNECT:
+ test_connect(test);
+ break;
+ case SOCK_ADDR_TEST_SENDMSG:
+ case SOCK_ADDR_TEST_RECVMSG:
+ test_xmsg(test);
+ break;
+ case SOCK_ADDR_TEST_GETSOCKNAME:
+ test_getsockname(test);
+ break;
+ case SOCK_ADDR_TEST_GETPEERNAME:
+ test_getpeername(test);
+ break;
+ default:
+ ASSERT_TRUE(false, "Unknown sock addr test type");
+ break;
+ }
+
+ test->destroyfn(skel);
+ }
+
+cleanup:
+ if (cgroup_fd >= 0)
+ close(cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index dda7060e86a0..f75f84d0b3d7 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -359,7 +359,7 @@ out:
static void test_sockmap_skb_verdict_shutdown(void)
{
struct epoll_event ev, events[MAX_EVENTS];
- int n, err, map, verdict, s, c1, p1;
+ int n, err, map, verdict, s, c1 = -1, p1 = -1;
struct test_sockmap_pass_prog *skel;
int epollfd;
int zero = 0;
@@ -414,9 +414,9 @@ out:
static void test_sockmap_skb_verdict_fionread(bool pass_prog)
{
int expected, zero = 0, sent, recvd, avail;
- int err, map, verdict, s, c0, c1, p0, p1;
- struct test_sockmap_pass_prog *pass;
- struct test_sockmap_drop_prog *drop;
+ int err, map, verdict, s, c0 = -1, c1 = -1, p0 = -1, p1 = -1;
+ struct test_sockmap_pass_prog *pass = NULL;
+ struct test_sockmap_drop_prog *drop = NULL;
char buf[256] = "0123456789";
if (pass_prog) {
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
index 36d829a65aa4..e880f97bc44d 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
@@ -378,7 +378,7 @@ static inline int enable_reuseport(int s, int progfd)
static inline int socket_loopback_reuseport(int family, int sotype, int progfd)
{
struct sockaddr_storage addr;
- socklen_t len;
+ socklen_t len = 0;
int err, s;
init_addr_loopback(family, &addr, &len);
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index 8df8cbb447f1..a934d430c20c 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -73,7 +73,7 @@ static void test_insert_bound(struct test_sockmap_listen *skel __always_unused,
int family, int sotype, int mapfd)
{
struct sockaddr_storage addr;
- socklen_t len;
+ socklen_t len = 0;
u32 key = 0;
u64 value;
int err, s;
@@ -871,7 +871,7 @@ static void test_msg_redir_to_listening(struct test_sockmap_listen *skel,
static void redir_partial(int family, int sotype, int sock_map, int parser_map)
{
- int s, c0, c1, p0, p1;
+ int s, c0 = -1, c1 = -1, p0 = -1, p1 = -1;
int err, n, key, value;
char buf[] = "abc";
@@ -1336,53 +1336,59 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
}
}
-static void unix_redir_to_connected(int sotype, int sock_mapfd,
- int verd_mapfd, enum redir_mode mode)
+static void pairs_redir_to_connected(int cli0, int peer0, int cli1, int peer1,
+ int sock_mapfd, int verd_mapfd, enum redir_mode mode)
{
const char *log_prefix = redir_mode_str(mode);
- int c0, c1, p0, p1;
unsigned int pass;
int err, n;
- int sfd[2];
u32 key;
char b;
zero_verdict_count(verd_mapfd);
- if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
- return;
- c0 = sfd[0], p0 = sfd[1];
-
- if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
- goto close0;
- c1 = sfd[0], p1 = sfd[1];
-
- err = add_to_sockmap(sock_mapfd, p0, p1);
+ err = add_to_sockmap(sock_mapfd, peer0, peer1);
if (err)
- goto close;
+ return;
- n = write(c1, "a", 1);
+ n = write(cli1, "a", 1);
if (n < 0)
FAIL_ERRNO("%s: write", log_prefix);
if (n == 0)
FAIL("%s: incomplete write", log_prefix);
if (n < 1)
- goto close;
+ return;
key = SK_PASS;
err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
if (err)
- goto close;
+ return;
if (pass != 1)
FAIL("%s: want pass count 1, have %d", log_prefix, pass);
- n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC);
+ n = recv_timeout(mode == REDIR_INGRESS ? peer0 : cli0, &b, 1, 0, IO_TIMEOUT_SEC);
if (n < 0)
FAIL_ERRNO("%s: recv_timeout", log_prefix);
if (n == 0)
FAIL("%s: incomplete recv", log_prefix);
+}
+
+static void unix_redir_to_connected(int sotype, int sock_mapfd,
+ int verd_mapfd, enum redir_mode mode)
+{
+ int c0, c1, p0, p1;
+ int sfd[2];
+
+ if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
+ return;
+ c0 = sfd[0], p0 = sfd[1];
+
+ if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
+ goto close0;
+ c1 = sfd[0], p1 = sfd[1];
+
+ pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode);
-close:
xclose(c1);
xclose(p1);
close0:
@@ -1661,14 +1667,8 @@ close_peer0:
static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd,
enum redir_mode mode)
{
- const char *log_prefix = redir_mode_str(mode);
int c0, c1, p0, p1;
- unsigned int pass;
- int err, n;
- u32 key;
- char b;
-
- zero_verdict_count(verd_mapfd);
+ int err;
err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
if (err)
@@ -1677,32 +1677,8 @@ static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd,
if (err)
goto close_cli0;
- err = add_to_sockmap(sock_mapfd, p0, p1);
- if (err)
- goto close_cli1;
+ pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode);
- n = write(c1, "a", 1);
- if (n < 0)
- FAIL_ERRNO("%s: write", log_prefix);
- if (n == 0)
- FAIL("%s: incomplete write", log_prefix);
- if (n < 1)
- goto close_cli1;
-
- key = SK_PASS;
- err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
- if (err)
- goto close_cli1;
- if (pass != 1)
- FAIL("%s: want pass count 1, have %d", log_prefix, pass);
-
- n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC);
- if (n < 0)
- FAIL_ERRNO("%s: recv_timeout", log_prefix);
- if (n == 0)
- FAIL("%s: incomplete recv", log_prefix);
-
-close_cli1:
xclose(c1);
xclose(p1);
close_cli0:
@@ -1747,15 +1723,9 @@ static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map
static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd,
int verd_mapfd, enum redir_mode mode)
{
- const char *log_prefix = redir_mode_str(mode);
int c0, c1, p0, p1;
- unsigned int pass;
- int err, n;
int sfd[2];
- u32 key;
- char b;
-
- zero_verdict_count(verd_mapfd);
+ int err;
if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd))
return;
@@ -1765,32 +1735,8 @@ static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd,
if (err)
goto close;
- err = add_to_sockmap(sock_mapfd, p0, p1);
- if (err)
- goto close_cli1;
-
- n = write(c1, "a", 1);
- if (n < 0)
- FAIL_ERRNO("%s: write", log_prefix);
- if (n == 0)
- FAIL("%s: incomplete write", log_prefix);
- if (n < 1)
- goto close_cli1;
-
- key = SK_PASS;
- err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
- if (err)
- goto close_cli1;
- if (pass != 1)
- FAIL("%s: want pass count 1, have %d", log_prefix, pass);
-
- n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC);
- if (n < 0)
- FAIL_ERRNO("%s: recv_timeout", log_prefix);
- if (n == 0)
- FAIL("%s: incomplete recv", log_prefix);
+ pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode);
-close_cli1:
xclose(c1);
xclose(p1);
close:
@@ -1827,15 +1773,9 @@ static void inet_unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd,
int verd_mapfd, enum redir_mode mode)
{
- const char *log_prefix = redir_mode_str(mode);
int c0, c1, p0, p1;
- unsigned int pass;
- int err, n;
int sfd[2];
- u32 key;
- char b;
-
- zero_verdict_count(verd_mapfd);
+ int err;
err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
if (err)
@@ -1845,32 +1785,8 @@ static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd,
goto close_cli0;
c1 = sfd[0], p1 = sfd[1];
- err = add_to_sockmap(sock_mapfd, p0, p1);
- if (err)
- goto close;
-
- n = write(c1, "a", 1);
- if (n < 0)
- FAIL_ERRNO("%s: write", log_prefix);
- if (n == 0)
- FAIL("%s: incomplete write", log_prefix);
- if (n < 1)
- goto close;
-
- key = SK_PASS;
- err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
- if (err)
- goto close;
- if (pass != 1)
- FAIL("%s: want pass count 1, have %d", log_prefix, pass);
-
- n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC);
- if (n < 0)
- FAIL_ERRNO("%s: recv_timeout", log_prefix);
- if (n == 0)
- FAIL("%s: incomplete recv", log_prefix);
+ pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode);
-close:
xclose(c1);
xclose(p1);
close_cli0:
diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
index 58fe2c586ed7..fc6b2954e8f5 100644
--- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c
+++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
@@ -218,12 +218,14 @@ out:
bpf_object__close(obj);
}
-static void test_tailcall_count(const char *which)
+static void test_tailcall_count(const char *which, bool test_fentry,
+ bool test_fexit)
{
+ struct bpf_object *obj = NULL, *fentry_obj = NULL, *fexit_obj = NULL;
+ struct bpf_link *fentry_link = NULL, *fexit_link = NULL;
int err, map_fd, prog_fd, main_fd, data_fd, i, val;
struct bpf_map *prog_array, *data_map;
struct bpf_program *prog;
- struct bpf_object *obj;
char buff[128] = {};
LIBBPF_OPTS(bpf_test_run_opts, topts,
.data_in = buff,
@@ -265,23 +267,105 @@ static void test_tailcall_count(const char *which)
if (CHECK_FAIL(err))
goto out;
+ if (test_fentry) {
+ fentry_obj = bpf_object__open_file("tailcall_bpf2bpf_fentry.bpf.o",
+ NULL);
+ if (!ASSERT_OK_PTR(fentry_obj, "open fentry_obj file"))
+ goto out;
+
+ prog = bpf_object__find_program_by_name(fentry_obj, "fentry");
+ if (!ASSERT_OK_PTR(prog, "find fentry prog"))
+ goto out;
+
+ err = bpf_program__set_attach_target(prog, prog_fd,
+ "subprog_tail");
+ if (!ASSERT_OK(err, "set_attach_target subprog_tail"))
+ goto out;
+
+ err = bpf_object__load(fentry_obj);
+ if (!ASSERT_OK(err, "load fentry_obj"))
+ goto out;
+
+ fentry_link = bpf_program__attach_trace(prog);
+ if (!ASSERT_OK_PTR(fentry_link, "attach_trace"))
+ goto out;
+ }
+
+ if (test_fexit) {
+ fexit_obj = bpf_object__open_file("tailcall_bpf2bpf_fexit.bpf.o",
+ NULL);
+ if (!ASSERT_OK_PTR(fexit_obj, "open fexit_obj file"))
+ goto out;
+
+ prog = bpf_object__find_program_by_name(fexit_obj, "fexit");
+ if (!ASSERT_OK_PTR(prog, "find fexit prog"))
+ goto out;
+
+ err = bpf_program__set_attach_target(prog, prog_fd,
+ "subprog_tail");
+ if (!ASSERT_OK(err, "set_attach_target subprog_tail"))
+ goto out;
+
+ err = bpf_object__load(fexit_obj);
+ if (!ASSERT_OK(err, "load fexit_obj"))
+ goto out;
+
+ fexit_link = bpf_program__attach_trace(prog);
+ if (!ASSERT_OK_PTR(fexit_link, "attach_trace"))
+ goto out;
+ }
+
err = bpf_prog_test_run_opts(main_fd, &topts);
ASSERT_OK(err, "tailcall");
ASSERT_EQ(topts.retval, 1, "tailcall retval");
data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
- return;
+ goto out;
data_fd = bpf_map__fd(data_map);
- if (CHECK_FAIL(map_fd < 0))
- return;
+ if (CHECK_FAIL(data_fd < 0))
+ goto out;
i = 0;
err = bpf_map_lookup_elem(data_fd, &i, &val);
ASSERT_OK(err, "tailcall count");
ASSERT_EQ(val, 33, "tailcall count");
+ if (test_fentry) {
+ data_map = bpf_object__find_map_by_name(fentry_obj, ".bss");
+ if (!ASSERT_FALSE(!data_map || !bpf_map__is_internal(data_map),
+ "find tailcall_bpf2bpf_fentry.bss map"))
+ goto out;
+
+ data_fd = bpf_map__fd(data_map);
+ if (!ASSERT_FALSE(data_fd < 0,
+ "find tailcall_bpf2bpf_fentry.bss map fd"))
+ goto out;
+
+ i = 0;
+ err = bpf_map_lookup_elem(data_fd, &i, &val);
+ ASSERT_OK(err, "fentry count");
+ ASSERT_EQ(val, 33, "fentry count");
+ }
+
+ if (test_fexit) {
+ data_map = bpf_object__find_map_by_name(fexit_obj, ".bss");
+ if (!ASSERT_FALSE(!data_map || !bpf_map__is_internal(data_map),
+ "find tailcall_bpf2bpf_fexit.bss map"))
+ goto out;
+
+ data_fd = bpf_map__fd(data_map);
+ if (!ASSERT_FALSE(data_fd < 0,
+ "find tailcall_bpf2bpf_fexit.bss map fd"))
+ goto out;
+
+ i = 0;
+ err = bpf_map_lookup_elem(data_fd, &i, &val);
+ ASSERT_OK(err, "fexit count");
+ ASSERT_EQ(val, 33, "fexit count");
+ }
+
i = 0;
err = bpf_map_delete_elem(map_fd, &i);
if (CHECK_FAIL(err))
@@ -291,6 +375,10 @@ static void test_tailcall_count(const char *which)
ASSERT_OK(err, "tailcall");
ASSERT_OK(topts.retval, "tailcall retval");
out:
+ bpf_link__destroy(fentry_link);
+ bpf_link__destroy(fexit_link);
+ bpf_object__close(fentry_obj);
+ bpf_object__close(fexit_obj);
bpf_object__close(obj);
}
@@ -299,7 +387,7 @@ out:
*/
static void test_tailcall_3(void)
{
- test_tailcall_count("tailcall3.bpf.o");
+ test_tailcall_count("tailcall3.bpf.o", false, false);
}
/* test_tailcall_6 checks that the count value of the tail call limit
@@ -307,7 +395,7 @@ static void test_tailcall_3(void)
*/
static void test_tailcall_6(void)
{
- test_tailcall_count("tailcall6.bpf.o");
+ test_tailcall_count("tailcall6.bpf.o", false, false);
}
/* test_tailcall_4 checks that the kernel properly selects indirect jump
@@ -352,11 +440,11 @@ static void test_tailcall_4(void)
data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
- return;
+ goto out;
data_fd = bpf_map__fd(data_map);
- if (CHECK_FAIL(map_fd < 0))
- return;
+ if (CHECK_FAIL(data_fd < 0))
+ goto out;
for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
@@ -442,11 +530,11 @@ static void test_tailcall_5(void)
data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
- return;
+ goto out;
data_fd = bpf_map__fd(data_map);
- if (CHECK_FAIL(map_fd < 0))
- return;
+ if (CHECK_FAIL(data_fd < 0))
+ goto out;
for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
@@ -631,11 +719,11 @@ static void test_tailcall_bpf2bpf_2(void)
data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
- return;
+ goto out;
data_fd = bpf_map__fd(data_map);
- if (CHECK_FAIL(map_fd < 0))
- return;
+ if (CHECK_FAIL(data_fd < 0))
+ goto out;
i = 0;
err = bpf_map_lookup_elem(data_fd, &i, &val);
@@ -805,11 +893,11 @@ static void test_tailcall_bpf2bpf_4(bool noise)
data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
- return;
+ goto out;
data_fd = bpf_map__fd(data_map);
- if (CHECK_FAIL(map_fd < 0))
- return;
+ if (CHECK_FAIL(data_fd < 0))
+ goto out;
i = 0;
val.noise = noise;
@@ -872,7 +960,7 @@ static void test_tailcall_bpf2bpf_6(void)
ASSERT_EQ(topts.retval, 0, "tailcall retval");
data_fd = bpf_map__fd(obj->maps.bss);
- if (!ASSERT_GE(map_fd, 0, "bss map fd"))
+ if (!ASSERT_GE(data_fd, 0, "bss map fd"))
goto out;
i = 0;
@@ -884,6 +972,139 @@ out:
tailcall_bpf2bpf6__destroy(obj);
}
+/* test_tailcall_bpf2bpf_fentry checks that the count value of the tail call
+ * limit enforcement matches with expectations when tailcall is preceded with
+ * bpf2bpf call, and the bpf2bpf call is traced by fentry.
+ */
+static void test_tailcall_bpf2bpf_fentry(void)
+{
+ test_tailcall_count("tailcall_bpf2bpf2.bpf.o", true, false);
+}
+
+/* test_tailcall_bpf2bpf_fexit checks that the count value of the tail call
+ * limit enforcement matches with expectations when tailcall is preceded with
+ * bpf2bpf call, and the bpf2bpf call is traced by fexit.
+ */
+static void test_tailcall_bpf2bpf_fexit(void)
+{
+ test_tailcall_count("tailcall_bpf2bpf2.bpf.o", false, true);
+}
+
+/* test_tailcall_bpf2bpf_fentry_fexit checks that the count value of the tail
+ * call limit enforcement matches with expectations when tailcall is preceded
+ * with bpf2bpf call, and the bpf2bpf call is traced by both fentry and fexit.
+ */
+static void test_tailcall_bpf2bpf_fentry_fexit(void)
+{
+ test_tailcall_count("tailcall_bpf2bpf2.bpf.o", true, true);
+}
+
+/* test_tailcall_bpf2bpf_fentry_entry checks that the count value of the tail
+ * call limit enforcement matches with expectations when tailcall is preceded
+ * with bpf2bpf call, and the bpf2bpf caller is traced by fentry.
+ */
+static void test_tailcall_bpf2bpf_fentry_entry(void)
+{
+ struct bpf_object *tgt_obj = NULL, *fentry_obj = NULL;
+ int err, map_fd, prog_fd, data_fd, i, val;
+ struct bpf_map *prog_array, *data_map;
+ struct bpf_link *fentry_link = NULL;
+ struct bpf_program *prog;
+ char buff[128] = {};
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = buff,
+ .data_size_in = sizeof(buff),
+ .repeat = 1,
+ );
+
+ err = bpf_prog_test_load("tailcall_bpf2bpf2.bpf.o",
+ BPF_PROG_TYPE_SCHED_CLS,
+ &tgt_obj, &prog_fd);
+ if (!ASSERT_OK(err, "load tgt_obj"))
+ return;
+
+ prog_array = bpf_object__find_map_by_name(tgt_obj, "jmp_table");
+ if (!ASSERT_OK_PTR(prog_array, "find jmp_table map"))
+ goto out;
+
+ map_fd = bpf_map__fd(prog_array);
+ if (!ASSERT_FALSE(map_fd < 0, "find jmp_table map fd"))
+ goto out;
+
+ prog = bpf_object__find_program_by_name(tgt_obj, "classifier_0");
+ if (!ASSERT_OK_PTR(prog, "find classifier_0 prog"))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+ if (!ASSERT_FALSE(prog_fd < 0, "find classifier_0 prog fd"))
+ goto out;
+
+ i = 0;
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (!ASSERT_OK(err, "update jmp_table"))
+ goto out;
+
+ fentry_obj = bpf_object__open_file("tailcall_bpf2bpf_fentry.bpf.o",
+ NULL);
+ if (!ASSERT_OK_PTR(fentry_obj, "open fentry_obj file"))
+ goto out;
+
+ prog = bpf_object__find_program_by_name(fentry_obj, "fentry");
+ if (!ASSERT_OK_PTR(prog, "find fentry prog"))
+ goto out;
+
+ err = bpf_program__set_attach_target(prog, prog_fd, "classifier_0");
+ if (!ASSERT_OK(err, "set_attach_target classifier_0"))
+ goto out;
+
+ err = bpf_object__load(fentry_obj);
+ if (!ASSERT_OK(err, "load fentry_obj"))
+ goto out;
+
+ fentry_link = bpf_program__attach_trace(prog);
+ if (!ASSERT_OK_PTR(fentry_link, "attach_trace"))
+ goto out;
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 1, "tailcall retval");
+
+ data_map = bpf_object__find_map_by_name(tgt_obj, "tailcall.bss");
+ if (!ASSERT_FALSE(!data_map || !bpf_map__is_internal(data_map),
+ "find tailcall.bss map"))
+ goto out;
+
+ data_fd = bpf_map__fd(data_map);
+ if (!ASSERT_FALSE(data_fd < 0, "find tailcall.bss map fd"))
+ goto out;
+
+ i = 0;
+ err = bpf_map_lookup_elem(data_fd, &i, &val);
+ ASSERT_OK(err, "tailcall count");
+ ASSERT_EQ(val, 34, "tailcall count");
+
+ data_map = bpf_object__find_map_by_name(fentry_obj, ".bss");
+ if (!ASSERT_FALSE(!data_map || !bpf_map__is_internal(data_map),
+ "find tailcall_bpf2bpf_fentry.bss map"))
+ goto out;
+
+ data_fd = bpf_map__fd(data_map);
+ if (!ASSERT_FALSE(data_fd < 0,
+ "find tailcall_bpf2bpf_fentry.bss map fd"))
+ goto out;
+
+ i = 0;
+ err = bpf_map_lookup_elem(data_fd, &i, &val);
+ ASSERT_OK(err, "fentry count");
+ ASSERT_EQ(val, 1, "fentry count");
+
+out:
+ bpf_link__destroy(fentry_link);
+ bpf_object__close(fentry_obj);
+ bpf_object__close(tgt_obj);
+}
+
void test_tailcalls(void)
{
if (test__start_subtest("tailcall_1"))
@@ -910,4 +1131,12 @@ void test_tailcalls(void)
test_tailcall_bpf2bpf_4(true);
if (test__start_subtest("tailcall_bpf2bpf_6"))
test_tailcall_bpf2bpf_6();
+ if (test__start_subtest("tailcall_bpf2bpf_fentry"))
+ test_tailcall_bpf2bpf_fentry();
+ if (test__start_subtest("tailcall_bpf2bpf_fexit"))
+ test_tailcall_bpf2bpf_fexit();
+ if (test__start_subtest("tailcall_bpf2bpf_fentry_fexit"))
+ test_tailcall_bpf2bpf_fentry_fexit();
+ if (test__start_subtest("tailcall_bpf2bpf_fentry_entry"))
+ test_tailcall_bpf2bpf_fentry_entry();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c
index ce2c61d62fc6..760ad96b4be0 100644
--- a/tools/testing/selftests/bpf/prog_tests/timer.c
+++ b/tools/testing/selftests/bpf/prog_tests/timer.c
@@ -15,6 +15,7 @@ static int timer(struct timer *timer_skel)
ASSERT_EQ(timer_skel->data->callback_check, 52, "callback_check1");
ASSERT_EQ(timer_skel->data->callback2_check, 52, "callback2_check1");
+ ASSERT_EQ(timer_skel->bss->pinned_callback_check, 0, "pinned_callback_check1");
prog_fd = bpf_program__fd(timer_skel->progs.test1);
err = bpf_prog_test_run_opts(prog_fd, &topts);
@@ -33,6 +34,9 @@ static int timer(struct timer *timer_skel)
/* check that timer_cb3() was executed twice */
ASSERT_EQ(timer_skel->bss->abs_data, 12, "abs_data");
+ /* check that timer_cb_pinned() was executed twice */
+ ASSERT_EQ(timer_skel->bss->pinned_callback_check, 2, "pinned_callback_check");
+
/* check that there were no errors in timer execution */
ASSERT_EQ(timer_skel->bss->err, 0, "err");
diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe.c b/tools/testing/selftests/bpf/prog_tests/uprobe.c
new file mode 100644
index 000000000000..cf3e0e7a64fa
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Hengqi Chen */
+
+#include <test_progs.h>
+#include "test_uprobe.skel.h"
+
+static FILE *urand_spawn(int *pid)
+{
+ FILE *f;
+
+ /* urandom_read's stdout is wired into f */
+ f = popen("./urandom_read 1 report-pid", "r");
+ if (!f)
+ return NULL;
+
+ if (fscanf(f, "%d", pid) != 1) {
+ pclose(f);
+ errno = EINVAL;
+ return NULL;
+ }
+
+ return f;
+}
+
+static int urand_trigger(FILE **urand_pipe)
+{
+ int exit_code;
+
+ /* pclose() waits for child process to exit and returns their exit code */
+ exit_code = pclose(*urand_pipe);
+ *urand_pipe = NULL;
+
+ return exit_code;
+}
+
+void test_uprobe(void)
+{
+ LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
+ struct test_uprobe *skel;
+ FILE *urand_pipe = NULL;
+ int urand_pid = 0, err;
+
+ skel = test_uprobe__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ urand_pipe = urand_spawn(&urand_pid);
+ if (!ASSERT_OK_PTR(urand_pipe, "urand_spawn"))
+ goto cleanup;
+
+ skel->bss->my_pid = urand_pid;
+
+ /* Manual attach uprobe to urandlib_api
+ * There are two `urandlib_api` symbols in .dynsym section:
+ * - urandlib_api@LIBURANDOM_READ_1.0.0
+ * - urandlib_api@@LIBURANDOM_READ_2.0.0
+ * Both are global bind and would cause a conflict if user
+ * specify the symbol name without a version suffix
+ */
+ uprobe_opts.func_name = "urandlib_api";
+ skel->links.test4 = bpf_program__attach_uprobe_opts(skel->progs.test4,
+ urand_pid,
+ "./liburandom_read.so",
+ 0 /* offset */,
+ &uprobe_opts);
+ if (!ASSERT_ERR_PTR(skel->links.test4, "urandlib_api_attach_conflict"))
+ goto cleanup;
+
+ uprobe_opts.func_name = "urandlib_api@LIBURANDOM_READ_1.0.0";
+ skel->links.test4 = bpf_program__attach_uprobe_opts(skel->progs.test4,
+ urand_pid,
+ "./liburandom_read.so",
+ 0 /* offset */,
+ &uprobe_opts);
+ if (!ASSERT_OK_PTR(skel->links.test4, "urandlib_api_attach_ok"))
+ goto cleanup;
+
+ /* Auto attach 3 u[ret]probes to urandlib_api_sameoffset */
+ err = test_uprobe__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ /* trigger urandom_read */
+ ASSERT_OK(urand_trigger(&urand_pipe), "urand_exit_code");
+
+ ASSERT_EQ(skel->bss->test1_result, 1, "urandlib_api_sameoffset");
+ ASSERT_EQ(skel->bss->test2_result, 1, "urandlib_api_sameoffset@v1");
+ ASSERT_EQ(skel->bss->test3_result, 3, "urandlib_api_sameoffset@@v2");
+ ASSERT_EQ(skel->bss->test4_result, 1, "urandlib_api");
+
+cleanup:
+ if (urand_pipe)
+ pclose(urand_pipe);
+ test_uprobe__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
index 626c461fa34d..4439ba9392f8 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
@@ -226,7 +226,7 @@ static int verify_xsk_metadata(struct xsk *xsk)
__u64 comp_addr;
void *data;
__u64 addr;
- __u32 idx;
+ __u32 idx = 0;
int ret;
ret = recvfrom(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, NULL);
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c
index dd923dc637d5..dd923dc637d5 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c
diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
index 38a57a2e70db..799fff4995d8 100644
--- a/tools/testing/selftests/bpf/progs/bpf_misc.h
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -99,6 +99,9 @@
#elif defined(__TARGET_ARCH_arm64)
#define SYSCALL_WRAPPER 1
#define SYS_PREFIX "__arm64_"
+#elif defined(__TARGET_ARCH_riscv)
+#define SYSCALL_WRAPPER 1
+#define SYS_PREFIX "__riscv_"
#else
#define SYSCALL_WRAPPER 0
#define SYS_PREFIX "__se_"
diff --git a/tools/testing/selftests/bpf/progs/connect_unix_prog.c b/tools/testing/selftests/bpf/progs/connect_unix_prog.c
new file mode 100644
index 000000000000..ca8aa2f116b3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/connect_unix_prog.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+
+#include <string.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_kfuncs.h"
+
+__u8 SERVUN_REWRITE_ADDRESS[] = "\0bpf_cgroup_unix_test_rewrite";
+
+SEC("cgroup/connect_unix")
+int connect_unix_prog(struct bpf_sock_addr *ctx)
+{
+ struct bpf_sock_addr_kern *sa_kern = bpf_cast_to_kern_ctx(ctx);
+ struct sockaddr_un *sa_kern_unaddr;
+ __u32 unaddrlen = offsetof(struct sockaddr_un, sun_path) +
+ sizeof(SERVUN_REWRITE_ADDRESS) - 1;
+ int ret;
+
+ /* Rewrite destination. */
+ ret = bpf_sock_addr_set_sun_path(sa_kern, SERVUN_REWRITE_ADDRESS,
+ sizeof(SERVUN_REWRITE_ADDRESS) - 1);
+ if (ret)
+ return 0;
+
+ if (sa_kern->uaddrlen != unaddrlen)
+ return 0;
+
+ sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr,
+ bpf_core_type_id_kernel(struct sockaddr_un));
+ if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS,
+ sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0)
+ return 0;
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/exceptions.c b/tools/testing/selftests/bpf/progs/exceptions.c
new file mode 100644
index 000000000000..2811ee842b01
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/exceptions.c
@@ -0,0 +1,368 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_endian.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+#ifndef ETH_P_IP
+#define ETH_P_IP 0x0800
+#endif
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 4);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+static __noinline int static_func(u64 i)
+{
+ bpf_throw(32);
+ return i;
+}
+
+__noinline int global2static_simple(u64 i)
+{
+ static_func(i + 2);
+ return i - 1;
+}
+
+__noinline int global2static(u64 i)
+{
+ if (i == ETH_P_IP)
+ bpf_throw(16);
+ return static_func(i);
+}
+
+static __noinline int static2global(u64 i)
+{
+ return global2static(i) + i;
+}
+
+SEC("tc")
+int exception_throw_always_1(struct __sk_buff *ctx)
+{
+ bpf_throw(64);
+ return 0;
+}
+
+/* In this case, the global func will never be seen executing after call to
+ * static subprog, hence verifier will DCE the remaining instructions. Ensure we
+ * are resilient to that.
+ */
+SEC("tc")
+int exception_throw_always_2(struct __sk_buff *ctx)
+{
+ return global2static_simple(ctx->protocol);
+}
+
+SEC("tc")
+int exception_throw_unwind_1(struct __sk_buff *ctx)
+{
+ return static2global(bpf_ntohs(ctx->protocol));
+}
+
+SEC("tc")
+int exception_throw_unwind_2(struct __sk_buff *ctx)
+{
+ return static2global(bpf_ntohs(ctx->protocol) - 1);
+}
+
+SEC("tc")
+int exception_throw_default(struct __sk_buff *ctx)
+{
+ bpf_throw(0);
+ return 1;
+}
+
+SEC("tc")
+int exception_throw_default_value(struct __sk_buff *ctx)
+{
+ bpf_throw(5);
+ return 1;
+}
+
+SEC("tc")
+int exception_tail_call_target(struct __sk_buff *ctx)
+{
+ bpf_throw(16);
+ return 0;
+}
+
+static __noinline
+int exception_tail_call_subprog(struct __sk_buff *ctx)
+{
+ volatile int ret = 10;
+
+ bpf_tail_call_static(ctx, &jmp_table, 0);
+ return ret;
+}
+
+SEC("tc")
+int exception_tail_call(struct __sk_buff *ctx) {
+ volatile int ret = 0;
+
+ ret = exception_tail_call_subprog(ctx);
+ return ret + 8;
+}
+
+__noinline int exception_ext_global(struct __sk_buff *ctx)
+{
+ volatile int ret = 0;
+
+ return ret;
+}
+
+static __noinline int exception_ext_static(struct __sk_buff *ctx)
+{
+ return exception_ext_global(ctx);
+}
+
+SEC("tc")
+int exception_ext(struct __sk_buff *ctx)
+{
+ return exception_ext_static(ctx);
+}
+
+__noinline int exception_cb_mod_global(u64 cookie)
+{
+ volatile int ret = 0;
+
+ return ret;
+}
+
+/* Example of how the exception callback supplied during verification can still
+ * introduce extensions by calling to dummy global functions, and alter runtime
+ * behavior.
+ *
+ * Right now we don't allow freplace attachment to exception callback itself,
+ * but if the need arises this restriction is technically feasible to relax in
+ * the future.
+ */
+__noinline int exception_cb_mod(u64 cookie)
+{
+ return exception_cb_mod_global(cookie) + cookie + 10;
+}
+
+SEC("tc")
+__exception_cb(exception_cb_mod)
+int exception_ext_mod_cb_runtime(struct __sk_buff *ctx)
+{
+ bpf_throw(25);
+ return 0;
+}
+
+__noinline static int subprog(struct __sk_buff *ctx)
+{
+ return bpf_ktime_get_ns();
+}
+
+__noinline static int throwing_subprog(struct __sk_buff *ctx)
+{
+ if (ctx->tstamp)
+ bpf_throw(0);
+ return bpf_ktime_get_ns();
+}
+
+__noinline int global_subprog(struct __sk_buff *ctx)
+{
+ return bpf_ktime_get_ns();
+}
+
+__noinline int throwing_global_subprog(struct __sk_buff *ctx)
+{
+ if (ctx->tstamp)
+ bpf_throw(0);
+ return bpf_ktime_get_ns();
+}
+
+SEC("tc")
+int exception_throw_subprog(struct __sk_buff *ctx)
+{
+ switch (ctx->protocol) {
+ case 1:
+ return subprog(ctx);
+ case 2:
+ return global_subprog(ctx);
+ case 3:
+ return throwing_subprog(ctx);
+ case 4:
+ return throwing_global_subprog(ctx);
+ default:
+ break;
+ }
+ bpf_throw(1);
+ return 0;
+}
+
+__noinline int assert_nz_gfunc(u64 c)
+{
+ volatile u64 cookie = c;
+
+ bpf_assert(cookie != 0);
+ return 0;
+}
+
+__noinline int assert_zero_gfunc(u64 c)
+{
+ volatile u64 cookie = c;
+
+ bpf_assert_eq(cookie, 0);
+ return 0;
+}
+
+__noinline int assert_neg_gfunc(s64 c)
+{
+ volatile s64 cookie = c;
+
+ bpf_assert_lt(cookie, 0);
+ return 0;
+}
+
+__noinline int assert_pos_gfunc(s64 c)
+{
+ volatile s64 cookie = c;
+
+ bpf_assert_gt(cookie, 0);
+ return 0;
+}
+
+__noinline int assert_negeq_gfunc(s64 c)
+{
+ volatile s64 cookie = c;
+
+ bpf_assert_le(cookie, -1);
+ return 0;
+}
+
+__noinline int assert_poseq_gfunc(s64 c)
+{
+ volatile s64 cookie = c;
+
+ bpf_assert_ge(cookie, 1);
+ return 0;
+}
+
+__noinline int assert_nz_gfunc_with(u64 c)
+{
+ volatile u64 cookie = c;
+
+ bpf_assert_with(cookie != 0, cookie + 100);
+ return 0;
+}
+
+__noinline int assert_zero_gfunc_with(u64 c)
+{
+ volatile u64 cookie = c;
+
+ bpf_assert_eq_with(cookie, 0, cookie + 100);
+ return 0;
+}
+
+__noinline int assert_neg_gfunc_with(s64 c)
+{
+ volatile s64 cookie = c;
+
+ bpf_assert_lt_with(cookie, 0, cookie + 100);
+ return 0;
+}
+
+__noinline int assert_pos_gfunc_with(s64 c)
+{
+ volatile s64 cookie = c;
+
+ bpf_assert_gt_with(cookie, 0, cookie + 100);
+ return 0;
+}
+
+__noinline int assert_negeq_gfunc_with(s64 c)
+{
+ volatile s64 cookie = c;
+
+ bpf_assert_le_with(cookie, -1, cookie + 100);
+ return 0;
+}
+
+__noinline int assert_poseq_gfunc_with(s64 c)
+{
+ volatile s64 cookie = c;
+
+ bpf_assert_ge_with(cookie, 1, cookie + 100);
+ return 0;
+}
+
+#define check_assert(name, cookie, tag) \
+SEC("tc") \
+int exception##tag##name(struct __sk_buff *ctx) \
+{ \
+ return name(cookie) + 1; \
+}
+
+check_assert(assert_nz_gfunc, 5, _);
+check_assert(assert_zero_gfunc, 0, _);
+check_assert(assert_neg_gfunc, -100, _);
+check_assert(assert_pos_gfunc, 100, _);
+check_assert(assert_negeq_gfunc, -1, _);
+check_assert(assert_poseq_gfunc, 1, _);
+
+check_assert(assert_nz_gfunc_with, 5, _);
+check_assert(assert_zero_gfunc_with, 0, _);
+check_assert(assert_neg_gfunc_with, -100, _);
+check_assert(assert_pos_gfunc_with, 100, _);
+check_assert(assert_negeq_gfunc_with, -1, _);
+check_assert(assert_poseq_gfunc_with, 1, _);
+
+check_assert(assert_nz_gfunc, 0, _bad_);
+check_assert(assert_zero_gfunc, 5, _bad_);
+check_assert(assert_neg_gfunc, 100, _bad_);
+check_assert(assert_pos_gfunc, -100, _bad_);
+check_assert(assert_negeq_gfunc, 1, _bad_);
+check_assert(assert_poseq_gfunc, -1, _bad_);
+
+check_assert(assert_nz_gfunc_with, 0, _bad_);
+check_assert(assert_zero_gfunc_with, 5, _bad_);
+check_assert(assert_neg_gfunc_with, 100, _bad_);
+check_assert(assert_pos_gfunc_with, -100, _bad_);
+check_assert(assert_negeq_gfunc_with, 1, _bad_);
+check_assert(assert_poseq_gfunc_with, -1, _bad_);
+
+SEC("tc")
+int exception_assert_range(struct __sk_buff *ctx)
+{
+ u64 time = bpf_ktime_get_ns();
+
+ bpf_assert_range(time, 0, ~0ULL);
+ return 1;
+}
+
+SEC("tc")
+int exception_assert_range_with(struct __sk_buff *ctx)
+{
+ u64 time = bpf_ktime_get_ns();
+
+ bpf_assert_range_with(time, 0, ~0ULL, 10);
+ return 1;
+}
+
+SEC("tc")
+int exception_bad_assert_range(struct __sk_buff *ctx)
+{
+ u64 time = bpf_ktime_get_ns();
+
+ bpf_assert_range(time, -100, 100);
+ return 1;
+}
+
+SEC("tc")
+int exception_bad_assert_range_with(struct __sk_buff *ctx)
+{
+ u64 time = bpf_ktime_get_ns();
+
+ bpf_assert_range_with(time, -1000, 1000, 10);
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/exceptions_assert.c b/tools/testing/selftests/bpf/progs/exceptions_assert.c
new file mode 100644
index 000000000000..e1e5c54a6a11
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/exceptions_assert.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <limits.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_endian.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+#define check_assert(type, op, name, value) \
+ SEC("?tc") \
+ __log_level(2) __failure \
+ int check_assert_##op##_##name(void *ctx) \
+ { \
+ type num = bpf_ktime_get_ns(); \
+ bpf_assert_##op(num, value); \
+ return *(u64 *)num; \
+ }
+
+__msg(": R0_w=-2147483648 R10=fp0")
+check_assert(s64, eq, int_min, INT_MIN);
+__msg(": R0_w=2147483647 R10=fp0")
+check_assert(s64, eq, int_max, INT_MAX);
+__msg(": R0_w=0 R10=fp0")
+check_assert(s64, eq, zero, 0);
+__msg(": R0_w=-9223372036854775808 R1_w=-9223372036854775808 R10=fp0")
+check_assert(s64, eq, llong_min, LLONG_MIN);
+__msg(": R0_w=9223372036854775807 R1_w=9223372036854775807 R10=fp0")
+check_assert(s64, eq, llong_max, LLONG_MAX);
+
+__msg(": R0_w=scalar(smax=2147483646) R10=fp0")
+check_assert(s64, lt, pos, INT_MAX);
+__msg(": R0_w=scalar(smax=-1,umin=9223372036854775808,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
+check_assert(s64, lt, zero, 0);
+__msg(": R0_w=scalar(smax=-2147483649,umin=9223372036854775808,umax=18446744071562067967,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
+check_assert(s64, lt, neg, INT_MIN);
+
+__msg(": R0_w=scalar(smax=2147483647) R10=fp0")
+check_assert(s64, le, pos, INT_MAX);
+__msg(": R0_w=scalar(smax=0) R10=fp0")
+check_assert(s64, le, zero, 0);
+__msg(": R0_w=scalar(smax=-2147483648,umin=9223372036854775808,umax=18446744071562067968,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
+check_assert(s64, le, neg, INT_MIN);
+
+__msg(": R0_w=scalar(smin=umin=2147483648,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))")
+check_assert(s64, gt, pos, INT_MAX);
+__msg(": R0_w=scalar(smin=umin=1,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))")
+check_assert(s64, gt, zero, 0);
+__msg(": R0_w=scalar(smin=-2147483647) R10=fp0")
+check_assert(s64, gt, neg, INT_MIN);
+
+__msg(": R0_w=scalar(smin=umin=2147483647,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))")
+check_assert(s64, ge, pos, INT_MAX);
+__msg(": R0_w=scalar(smin=0,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff)) R10=fp0")
+check_assert(s64, ge, zero, 0);
+__msg(": R0_w=scalar(smin=-2147483648) R10=fp0")
+check_assert(s64, ge, neg, INT_MIN);
+
+SEC("?tc")
+__log_level(2) __failure
+__msg(": R0=0 R1=ctx(off=0,imm=0) R2=scalar(smin=smin32=-2147483646,smax=smax32=2147483645) R10=fp0")
+int check_assert_range_s64(struct __sk_buff *ctx)
+{
+ struct bpf_sock *sk = ctx->sk;
+ s64 num;
+
+ _Static_assert(_Generic((sk->rx_queue_mapping), s32: 1, default: 0), "type match");
+ if (!sk)
+ return 0;
+ num = sk->rx_queue_mapping;
+ bpf_assert_range(num, INT_MIN + 2, INT_MAX - 2);
+ return *((u8 *)ctx + num);
+}
+
+SEC("?tc")
+__log_level(2) __failure
+__msg(": R1=ctx(off=0,imm=0) R2=scalar(smin=umin=smin32=umin32=4096,smax=umax=smax32=umax32=8192,var_off=(0x0; 0x3fff))")
+int check_assert_range_u64(struct __sk_buff *ctx)
+{
+ u64 num = ctx->len;
+
+ bpf_assert_range(num, 4096, 8192);
+ return *((u8 *)ctx + num);
+}
+
+SEC("?tc")
+__log_level(2) __failure
+__msg(": R0=0 R1=ctx(off=0,imm=0) R2=4096 R10=fp0")
+int check_assert_single_range_s64(struct __sk_buff *ctx)
+{
+ struct bpf_sock *sk = ctx->sk;
+ s64 num;
+
+ _Static_assert(_Generic((sk->rx_queue_mapping), s32: 1, default: 0), "type match");
+ if (!sk)
+ return 0;
+ num = sk->rx_queue_mapping;
+
+ bpf_assert_range(num, 4096, 4096);
+ return *((u8 *)ctx + num);
+}
+
+SEC("?tc")
+__log_level(2) __failure
+__msg(": R1=ctx(off=0,imm=0) R2=4096 R10=fp0")
+int check_assert_single_range_u64(struct __sk_buff *ctx)
+{
+ u64 num = ctx->len;
+
+ bpf_assert_range(num, 4096, 4096);
+ return *((u8 *)ctx + num);
+}
+
+SEC("?tc")
+__log_level(2) __failure
+__msg(": R1=pkt(off=64,r=64,imm=0) R2=pkt_end(off=0,imm=0) R6=pkt(off=0,r=64,imm=0) R10=fp0")
+int check_assert_generic(struct __sk_buff *ctx)
+{
+ u8 *data_end = (void *)(long)ctx->data_end;
+ u8 *data = (void *)(long)ctx->data;
+
+ bpf_assert(data + 64 <= data_end);
+ return data[128];
+}
+
+SEC("?fentry/bpf_check")
+__failure __msg("At program exit the register R0 has value (0x40; 0x0)")
+int check_assert_with_return(void *ctx)
+{
+ bpf_assert_with(!ctx, 64);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/exceptions_ext.c b/tools/testing/selftests/bpf/progs/exceptions_ext.c
new file mode 100644
index 000000000000..743c05185d9b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/exceptions_ext.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_experimental.h"
+
+SEC("?fentry")
+int pfentry(void *ctx)
+{
+ return 0;
+}
+
+SEC("?fentry")
+int throwing_fentry(void *ctx)
+{
+ bpf_throw(0);
+ return 0;
+}
+
+__noinline int exception_cb(u64 cookie)
+{
+ return cookie + 64;
+}
+
+SEC("?freplace")
+int extension(struct __sk_buff *ctx)
+{
+ return 0;
+}
+
+SEC("?freplace")
+__exception_cb(exception_cb)
+int throwing_exception_cb_extension(u64 cookie)
+{
+ bpf_throw(32);
+ return 0;
+}
+
+SEC("?freplace")
+__exception_cb(exception_cb)
+int throwing_extension(struct __sk_buff *ctx)
+{
+ bpf_throw(64);
+ return 0;
+}
+
+SEC("?fexit")
+int pfexit(void *ctx)
+{
+ return 0;
+}
+
+SEC("?fexit")
+int throwing_fexit(void *ctx)
+{
+ bpf_throw(0);
+ return 0;
+}
+
+SEC("?fmod_ret")
+int pfmod_ret(void *ctx)
+{
+ return 0;
+}
+
+SEC("?fmod_ret")
+int throwing_fmod_ret(void *ctx)
+{
+ bpf_throw(0);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/exceptions_fail.c b/tools/testing/selftests/bpf/progs/exceptions_fail.c
new file mode 100644
index 000000000000..4c39e920dac2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/exceptions_fail.c
@@ -0,0 +1,347 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+extern void bpf_rcu_read_lock(void) __ksym;
+
+#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8)))
+
+struct foo {
+ struct bpf_rb_node node;
+};
+
+struct hmap_elem {
+ struct bpf_timer timer;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 64);
+ __type(key, int);
+ __type(value, struct hmap_elem);
+} hmap SEC(".maps");
+
+private(A) struct bpf_spin_lock lock;
+private(A) struct bpf_rb_root rbtree __contains(foo, node);
+
+__noinline void *exception_cb_bad_ret_type(u64 cookie)
+{
+ return NULL;
+}
+
+__noinline int exception_cb_bad_arg_0(void)
+{
+ return 0;
+}
+
+__noinline int exception_cb_bad_arg_2(int a, int b)
+{
+ return 0;
+}
+
+__noinline int exception_cb_ok_arg_small(int a)
+{
+ return 0;
+}
+
+SEC("?tc")
+__exception_cb(exception_cb_bad_ret_type)
+__failure __msg("Global function exception_cb_bad_ret_type() doesn't return scalar.")
+int reject_exception_cb_type_1(struct __sk_buff *ctx)
+{
+ bpf_throw(0);
+ return 0;
+}
+
+SEC("?tc")
+__exception_cb(exception_cb_bad_arg_0)
+__failure __msg("exception cb only supports single integer argument")
+int reject_exception_cb_type_2(struct __sk_buff *ctx)
+{
+ bpf_throw(0);
+ return 0;
+}
+
+SEC("?tc")
+__exception_cb(exception_cb_bad_arg_2)
+__failure __msg("exception cb only supports single integer argument")
+int reject_exception_cb_type_3(struct __sk_buff *ctx)
+{
+ bpf_throw(0);
+ return 0;
+}
+
+SEC("?tc")
+__exception_cb(exception_cb_ok_arg_small)
+__success
+int reject_exception_cb_type_4(struct __sk_buff *ctx)
+{
+ bpf_throw(0);
+ return 0;
+}
+
+__noinline
+static int timer_cb(void *map, int *key, struct bpf_timer *timer)
+{
+ bpf_throw(0);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot be called from callback subprog")
+int reject_async_callback_throw(struct __sk_buff *ctx)
+{
+ struct hmap_elem *elem;
+
+ elem = bpf_map_lookup_elem(&hmap, &(int){0});
+ if (!elem)
+ return 0;
+ return bpf_timer_set_callback(&elem->timer, timer_cb);
+}
+
+__noinline static int subprog_lock(struct __sk_buff *ctx)
+{
+ volatile int ret = 0;
+
+ bpf_spin_lock(&lock);
+ if (ctx->len)
+ bpf_throw(0);
+ return ret;
+}
+
+SEC("?tc")
+__failure __msg("function calls are not allowed while holding a lock")
+int reject_with_lock(void *ctx)
+{
+ bpf_spin_lock(&lock);
+ bpf_throw(0);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("function calls are not allowed while holding a lock")
+int reject_subprog_with_lock(void *ctx)
+{
+ return subprog_lock(ctx);
+}
+
+SEC("?tc")
+__failure __msg("bpf_rcu_read_unlock is missing")
+int reject_with_rcu_read_lock(void *ctx)
+{
+ bpf_rcu_read_lock();
+ bpf_throw(0);
+ return 0;
+}
+
+__noinline static int throwing_subprog(struct __sk_buff *ctx)
+{
+ if (ctx->len)
+ bpf_throw(0);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("bpf_rcu_read_unlock is missing")
+int reject_subprog_with_rcu_read_lock(void *ctx)
+{
+ bpf_rcu_read_lock();
+ return throwing_subprog(ctx);
+}
+
+static bool rbless(struct bpf_rb_node *n1, const struct bpf_rb_node *n2)
+{
+ bpf_throw(0);
+ return true;
+}
+
+SEC("?tc")
+__failure __msg("function calls are not allowed while holding a lock")
+int reject_with_rbtree_add_throw(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_spin_lock(&lock);
+ bpf_rbtree_add(&rbtree, &f->node, rbless);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("Unreleased reference")
+int reject_with_reference(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_throw(0);
+ return 0;
+}
+
+__noinline static int subprog_ref(struct __sk_buff *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_throw(0);
+ return 0;
+}
+
+__noinline static int subprog_cb_ref(u32 i, void *ctx)
+{
+ bpf_throw(0);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("Unreleased reference")
+int reject_with_cb_reference(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_loop(5, subprog_cb_ref, NULL, 0);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot be called from callback")
+int reject_with_cb(void *ctx)
+{
+ bpf_loop(5, subprog_cb_ref, NULL, 0);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("Unreleased reference")
+int reject_with_subprog_reference(void *ctx)
+{
+ return subprog_ref(ctx) + 1;
+}
+
+__noinline int throwing_exception_cb(u64 c)
+{
+ bpf_throw(0);
+ return c;
+}
+
+__noinline int exception_cb1(u64 c)
+{
+ return c;
+}
+
+__noinline int exception_cb2(u64 c)
+{
+ return c;
+}
+
+static __noinline int static_func(struct __sk_buff *ctx)
+{
+ return exception_cb1(ctx->tstamp);
+}
+
+__noinline int global_func(struct __sk_buff *ctx)
+{
+ return exception_cb1(ctx->tstamp);
+}
+
+SEC("?tc")
+__exception_cb(throwing_exception_cb)
+__failure __msg("cannot be called from callback subprog")
+int reject_throwing_exception_cb(struct __sk_buff *ctx)
+{
+ return 0;
+}
+
+SEC("?tc")
+__exception_cb(exception_cb1)
+__failure __msg("cannot call exception cb directly")
+int reject_exception_cb_call_global_func(struct __sk_buff *ctx)
+{
+ return global_func(ctx);
+}
+
+SEC("?tc")
+__exception_cb(exception_cb1)
+__failure __msg("cannot call exception cb directly")
+int reject_exception_cb_call_static_func(struct __sk_buff *ctx)
+{
+ return static_func(ctx);
+}
+
+SEC("?tc")
+__exception_cb(exception_cb1)
+__exception_cb(exception_cb2)
+__failure __msg("multiple exception callback tags for main subprog")
+int reject_multiple_exception_cb(struct __sk_buff *ctx)
+{
+ bpf_throw(0);
+ return 16;
+}
+
+__noinline int exception_cb_bad_ret(u64 c)
+{
+ return c;
+}
+
+SEC("?fentry/bpf_check")
+__exception_cb(exception_cb_bad_ret)
+__failure __msg("At program exit the register R0 has unknown scalar value should")
+int reject_set_exception_cb_bad_ret1(void *ctx)
+{
+ return 0;
+}
+
+SEC("?fentry/bpf_check")
+__failure __msg("At program exit the register R0 has value (0x40; 0x0) should")
+int reject_set_exception_cb_bad_ret2(void *ctx)
+{
+ bpf_throw(64);
+ return 0;
+}
+
+__noinline static int loop_cb1(u32 index, int *ctx)
+{
+ bpf_throw(0);
+ return 0;
+}
+
+__noinline static int loop_cb2(u32 index, int *ctx)
+{
+ bpf_throw(0);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot be called from callback")
+int reject_exception_throw_cb(struct __sk_buff *ctx)
+{
+ bpf_loop(5, loop_cb1, NULL, 0);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot be called from callback")
+int reject_exception_throw_cb_diff(struct __sk_buff *ctx)
+{
+ if (ctx->protocol)
+ bpf_loop(5, loop_cb1, NULL, 0);
+ else
+ bpf_loop(5, loop_cb2, NULL, 0);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/getpeername_unix_prog.c b/tools/testing/selftests/bpf/progs/getpeername_unix_prog.c
new file mode 100644
index 000000000000..9c078f34bbb2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/getpeername_unix_prog.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+
+#include <string.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_kfuncs.h"
+
+__u8 SERVUN_REWRITE_ADDRESS[] = "\0bpf_cgroup_unix_test_rewrite";
+
+SEC("cgroup/getpeername_unix")
+int getpeername_unix_prog(struct bpf_sock_addr *ctx)
+{
+ struct bpf_sock_addr_kern *sa_kern = bpf_cast_to_kern_ctx(ctx);
+ struct sockaddr_un *sa_kern_unaddr;
+ __u32 unaddrlen = offsetof(struct sockaddr_un, sun_path) +
+ sizeof(SERVUN_REWRITE_ADDRESS) - 1;
+ int ret;
+
+ ret = bpf_sock_addr_set_sun_path(sa_kern, SERVUN_REWRITE_ADDRESS,
+ sizeof(SERVUN_REWRITE_ADDRESS) - 1);
+ if (ret)
+ return 1;
+
+ if (sa_kern->uaddrlen != unaddrlen)
+ return 1;
+
+ sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr,
+ bpf_core_type_id_kernel(struct sockaddr_un));
+ if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS,
+ sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0)
+ return 1;
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/getsockname_unix_prog.c b/tools/testing/selftests/bpf/progs/getsockname_unix_prog.c
new file mode 100644
index 000000000000..ac7145111497
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/getsockname_unix_prog.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+
+#include <string.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_kfuncs.h"
+
+__u8 SERVUN_REWRITE_ADDRESS[] = "\0bpf_cgroup_unix_test_rewrite";
+
+SEC("cgroup/getsockname_unix")
+int getsockname_unix_prog(struct bpf_sock_addr *ctx)
+{
+ struct bpf_sock_addr_kern *sa_kern = bpf_cast_to_kern_ctx(ctx);
+ struct sockaddr_un *sa_kern_unaddr;
+ __u32 unaddrlen = offsetof(struct sockaddr_un, sun_path) +
+ sizeof(SERVUN_REWRITE_ADDRESS) - 1;
+ int ret;
+
+ ret = bpf_sock_addr_set_sun_path(sa_kern, SERVUN_REWRITE_ADDRESS,
+ sizeof(SERVUN_REWRITE_ADDRESS) - 1);
+ if (ret)
+ return 1;
+
+ if (sa_kern->uaddrlen != unaddrlen)
+ return 1;
+
+ sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr,
+ bpf_core_type_id_kernel(struct sockaddr_un));
+ if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS,
+ sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0)
+ return 1;
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/iters_task_vma.c b/tools/testing/selftests/bpf/progs/iters_task_vma.c
new file mode 100644
index 000000000000..44edecfdfaee
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/iters_task_vma.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include "bpf_experimental.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+pid_t target_pid = 0;
+unsigned int vmas_seen = 0;
+
+struct {
+ __u64 vm_start;
+ __u64 vm_end;
+} vm_ranges[1000];
+
+SEC("raw_tp/sys_enter")
+int iter_task_vma_for_each(const void *ctx)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+ struct vm_area_struct *vma;
+ unsigned int seen = 0;
+
+ if (task->pid != target_pid)
+ return 0;
+
+ if (vmas_seen)
+ return 0;
+
+ bpf_for_each(task_vma, vma, task, 0) {
+ if (seen >= 1000)
+ break;
+
+ vm_ranges[seen].vm_start = vma->vm_start;
+ vm_ranges[seen].vm_end = vma->vm_end;
+ seen++;
+ }
+
+ vmas_seen = seen;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/missed_kprobe.c b/tools/testing/selftests/bpf/progs/missed_kprobe.c
new file mode 100644
index 000000000000..7f9ef701f5de
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/missed_kprobe.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+/*
+ * No tests in here, just to trigger 'bpf_fentry_test*'
+ * through tracing test_run
+ */
+SEC("fentry/bpf_modify_return_test")
+int BPF_PROG(trigger)
+{
+ return 0;
+}
+
+SEC("kprobe/bpf_fentry_test1")
+int test1(struct pt_regs *ctx)
+{
+ bpf_kfunc_common_test();
+ return 0;
+}
+
+SEC("kprobe/bpf_kfunc_common_test")
+int test2(struct pt_regs *ctx)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c b/tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c
new file mode 100644
index 000000000000..8ea71cbd6c45
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+/*
+ * No tests in here, just to trigger 'bpf_fentry_test*'
+ * through tracing test_run
+ */
+SEC("fentry/bpf_modify_return_test")
+int BPF_PROG(trigger)
+{
+ return 0;
+}
+
+SEC("kprobe.multi/bpf_fentry_test1")
+int test1(struct pt_regs *ctx)
+{
+ bpf_kfunc_common_test();
+ return 0;
+}
+
+SEC("kprobe/bpf_kfunc_common_test")
+int test2(struct pt_regs *ctx)
+{
+ return 0;
+}
+
+SEC("kprobe/bpf_kfunc_common_test")
+int test3(struct pt_regs *ctx)
+{
+ return 0;
+}
+
+SEC("kprobe/bpf_kfunc_common_test")
+int test4(struct pt_regs *ctx)
+{
+ return 0;
+}
+
+SEC("kprobe.multi/bpf_kfunc_common_test")
+int test5(struct pt_regs *ctx)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/missed_tp_recursion.c b/tools/testing/selftests/bpf/progs/missed_tp_recursion.c
new file mode 100644
index 000000000000..762385f827c5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/missed_tp_recursion.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+/*
+ * No tests in here, just to trigger 'bpf_fentry_test*'
+ * through tracing test_run
+ */
+SEC("fentry/bpf_modify_return_test")
+int BPF_PROG(trigger)
+{
+ return 0;
+}
+
+SEC("kprobe/bpf_fentry_test1")
+int test1(struct pt_regs *ctx)
+{
+ bpf_printk("test");
+ return 0;
+}
+
+SEC("tp/bpf_trace/bpf_trace_printk")
+int test2(struct pt_regs *ctx)
+{
+ return 0;
+}
+
+SEC("tp/bpf_trace/bpf_trace_printk")
+int test3(struct pt_regs *ctx)
+{
+ return 0;
+}
+
+SEC("tp/bpf_trace/bpf_trace_printk")
+int test4(struct pt_regs *ctx)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/percpu_alloc_array.c b/tools/testing/selftests/bpf/progs/percpu_alloc_array.c
new file mode 100644
index 000000000000..37c2d2608ec0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/percpu_alloc_array.c
@@ -0,0 +1,190 @@
+#include "bpf_experimental.h"
+
+struct val_t {
+ long b, c, d;
+};
+
+struct elem {
+ long sum;
+ struct val_t __percpu_kptr *pc;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} array SEC(".maps");
+
+void bpf_rcu_read_lock(void) __ksym;
+void bpf_rcu_read_unlock(void) __ksym;
+
+const volatile int nr_cpus;
+
+/* Initialize the percpu object */
+SEC("?fentry/bpf_fentry_test1")
+int BPF_PROG(test_array_map_1)
+{
+ struct val_t __percpu_kptr *p;
+ struct elem *e;
+ int index = 0;
+
+ e = bpf_map_lookup_elem(&array, &index);
+ if (!e)
+ return 0;
+
+ p = bpf_percpu_obj_new(struct val_t);
+ if (!p)
+ return 0;
+
+ p = bpf_kptr_xchg(&e->pc, p);
+ if (p)
+ bpf_percpu_obj_drop(p);
+
+ return 0;
+}
+
+/* Update percpu data */
+SEC("?fentry/bpf_fentry_test2")
+int BPF_PROG(test_array_map_2)
+{
+ struct val_t __percpu_kptr *p;
+ struct val_t *v;
+ struct elem *e;
+ int index = 0;
+
+ e = bpf_map_lookup_elem(&array, &index);
+ if (!e)
+ return 0;
+
+ p = e->pc;
+ if (!p)
+ return 0;
+
+ v = bpf_per_cpu_ptr(p, 0);
+ if (!v)
+ return 0;
+ v->c = 1;
+ v->d = 2;
+
+ return 0;
+}
+
+int cpu0_field_d, sum_field_c;
+int my_pid;
+
+/* Summarize percpu data */
+SEC("?fentry/bpf_fentry_test3")
+int BPF_PROG(test_array_map_3)
+{
+ struct val_t __percpu_kptr *p;
+ int i, index = 0;
+ struct val_t *v;
+ struct elem *e;
+
+ if ((bpf_get_current_pid_tgid() >> 32) != my_pid)
+ return 0;
+
+ e = bpf_map_lookup_elem(&array, &index);
+ if (!e)
+ return 0;
+
+ p = e->pc;
+ if (!p)
+ return 0;
+
+ bpf_for(i, 0, nr_cpus) {
+ v = bpf_per_cpu_ptr(p, i);
+ if (v) {
+ if (i == 0)
+ cpu0_field_d = v->d;
+ sum_field_c += v->c;
+ }
+ }
+
+ return 0;
+}
+
+/* Explicitly free allocated percpu data */
+SEC("?fentry/bpf_fentry_test4")
+int BPF_PROG(test_array_map_4)
+{
+ struct val_t __percpu_kptr *p;
+ struct elem *e;
+ int index = 0;
+
+ e = bpf_map_lookup_elem(&array, &index);
+ if (!e)
+ return 0;
+
+ /* delete */
+ p = bpf_kptr_xchg(&e->pc, NULL);
+ if (p) {
+ bpf_percpu_obj_drop(p);
+ }
+
+ return 0;
+}
+
+SEC("?fentry.s/bpf_fentry_test1")
+int BPF_PROG(test_array_map_10)
+{
+ struct val_t __percpu_kptr *p, *p1;
+ int i, index = 0;
+ struct val_t *v;
+ struct elem *e;
+
+ if ((bpf_get_current_pid_tgid() >> 32) != my_pid)
+ return 0;
+
+ e = bpf_map_lookup_elem(&array, &index);
+ if (!e)
+ return 0;
+
+ bpf_rcu_read_lock();
+ p = e->pc;
+ if (!p) {
+ p = bpf_percpu_obj_new(struct val_t);
+ if (!p)
+ goto out;
+
+ p1 = bpf_kptr_xchg(&e->pc, p);
+ if (p1) {
+ /* race condition */
+ bpf_percpu_obj_drop(p1);
+ }
+ }
+
+ v = bpf_this_cpu_ptr(p);
+ v->c = 3;
+ v = bpf_this_cpu_ptr(p);
+ v->c = 0;
+
+ v = bpf_per_cpu_ptr(p, 0);
+ if (!v)
+ goto out;
+ v->c = 1;
+ v->d = 2;
+
+ /* delete */
+ p1 = bpf_kptr_xchg(&e->pc, NULL);
+ if (!p1)
+ goto out;
+
+ bpf_for(i, 0, nr_cpus) {
+ v = bpf_per_cpu_ptr(p, i);
+ if (v) {
+ if (i == 0)
+ cpu0_field_d = v->d;
+ sum_field_c += v->c;
+ }
+ }
+
+ /* finally release p */
+ bpf_percpu_obj_drop(p1);
+out:
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/percpu_alloc_cgrp_local_storage.c b/tools/testing/selftests/bpf/progs/percpu_alloc_cgrp_local_storage.c
new file mode 100644
index 000000000000..a2acf9aa6c24
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/percpu_alloc_cgrp_local_storage.c
@@ -0,0 +1,109 @@
+#include "bpf_experimental.h"
+
+struct val_t {
+ long b, c, d;
+};
+
+struct elem {
+ long sum;
+ struct val_t __percpu_kptr *pc;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGRP_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct elem);
+} cgrp SEC(".maps");
+
+const volatile int nr_cpus;
+
+/* Initialize the percpu object */
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test_cgrp_local_storage_1)
+{
+ struct task_struct *task;
+ struct val_t __percpu_kptr *p;
+ struct elem *e;
+
+ task = bpf_get_current_task_btf();
+ e = bpf_cgrp_storage_get(&cgrp, task->cgroups->dfl_cgrp, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!e)
+ return 0;
+
+ p = bpf_percpu_obj_new(struct val_t);
+ if (!p)
+ return 0;
+
+ p = bpf_kptr_xchg(&e->pc, p);
+ if (p)
+ bpf_percpu_obj_drop(p);
+
+ return 0;
+}
+
+/* Percpu data collection */
+SEC("fentry/bpf_fentry_test2")
+int BPF_PROG(test_cgrp_local_storage_2)
+{
+ struct task_struct *task;
+ struct val_t __percpu_kptr *p;
+ struct val_t *v;
+ struct elem *e;
+
+ task = bpf_get_current_task_btf();
+ e = bpf_cgrp_storage_get(&cgrp, task->cgroups->dfl_cgrp, 0, 0);
+ if (!e)
+ return 0;
+
+ p = e->pc;
+ if (!p)
+ return 0;
+
+ v = bpf_per_cpu_ptr(p, 0);
+ if (!v)
+ return 0;
+ v->c = 1;
+ v->d = 2;
+ return 0;
+}
+
+int cpu0_field_d, sum_field_c;
+int my_pid;
+
+/* Summarize percpu data collection */
+SEC("fentry/bpf_fentry_test3")
+int BPF_PROG(test_cgrp_local_storage_3)
+{
+ struct task_struct *task;
+ struct val_t __percpu_kptr *p;
+ struct val_t *v;
+ struct elem *e;
+ int i;
+
+ if ((bpf_get_current_pid_tgid() >> 32) != my_pid)
+ return 0;
+
+ task = bpf_get_current_task_btf();
+ e = bpf_cgrp_storage_get(&cgrp, task->cgroups->dfl_cgrp, 0, 0);
+ if (!e)
+ return 0;
+
+ p = e->pc;
+ if (!p)
+ return 0;
+
+ bpf_for(i, 0, nr_cpus) {
+ v = bpf_per_cpu_ptr(p, i);
+ if (v) {
+ if (i == 0)
+ cpu0_field_d = v->d;
+ sum_field_c += v->c;
+ }
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c b/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c
new file mode 100644
index 000000000000..1a891d30f1fe
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c
@@ -0,0 +1,164 @@
+#include "bpf_experimental.h"
+#include "bpf_misc.h"
+
+struct val_t {
+ long b, c, d;
+};
+
+struct val2_t {
+ long b;
+};
+
+struct val_with_ptr_t {
+ char *p;
+};
+
+struct val_with_rb_root_t {
+ struct bpf_spin_lock lock;
+};
+
+struct elem {
+ long sum;
+ struct val_t __percpu_kptr *pc;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} array SEC(".maps");
+
+long ret;
+
+SEC("?fentry/bpf_fentry_test1")
+__failure __msg("store to referenced kptr disallowed")
+int BPF_PROG(test_array_map_1)
+{
+ struct val_t __percpu_kptr *p;
+ struct elem *e;
+ int index = 0;
+
+ e = bpf_map_lookup_elem(&array, &index);
+ if (!e)
+ return 0;
+
+ p = bpf_percpu_obj_new(struct val_t);
+ if (!p)
+ return 0;
+
+ p = bpf_kptr_xchg(&e->pc, p);
+ if (p)
+ bpf_percpu_obj_drop(p);
+
+ e->pc = (struct val_t __percpu_kptr *)ret;
+ return 0;
+}
+
+SEC("?fentry/bpf_fentry_test1")
+__failure __msg("invalid kptr access, R2 type=percpu_ptr_val2_t expected=ptr_val_t")
+int BPF_PROG(test_array_map_2)
+{
+ struct val2_t __percpu_kptr *p2;
+ struct val_t __percpu_kptr *p;
+ struct elem *e;
+ int index = 0;
+
+ e = bpf_map_lookup_elem(&array, &index);
+ if (!e)
+ return 0;
+
+ p2 = bpf_percpu_obj_new(struct val2_t);
+ if (!p2)
+ return 0;
+
+ p = bpf_kptr_xchg(&e->pc, p2);
+ if (p)
+ bpf_percpu_obj_drop(p);
+
+ return 0;
+}
+
+SEC("?fentry.s/bpf_fentry_test1")
+__failure __msg("R1 type=scalar expected=percpu_ptr_, percpu_rcu_ptr_, percpu_trusted_ptr_")
+int BPF_PROG(test_array_map_3)
+{
+ struct val_t __percpu_kptr *p, *p1;
+ struct val_t *v;
+ struct elem *e;
+ int index = 0;
+
+ e = bpf_map_lookup_elem(&array, &index);
+ if (!e)
+ return 0;
+
+ p = bpf_percpu_obj_new(struct val_t);
+ if (!p)
+ return 0;
+
+ p1 = bpf_kptr_xchg(&e->pc, p);
+ if (p1)
+ bpf_percpu_obj_drop(p1);
+
+ v = bpf_this_cpu_ptr(p);
+ ret = v->b;
+ return 0;
+}
+
+SEC("?fentry.s/bpf_fentry_test1")
+__failure __msg("arg#0 expected for bpf_percpu_obj_drop_impl()")
+int BPF_PROG(test_array_map_4)
+{
+ struct val_t __percpu_kptr *p;
+
+ p = bpf_percpu_obj_new(struct val_t);
+ if (!p)
+ return 0;
+
+ bpf_obj_drop(p);
+ return 0;
+}
+
+SEC("?fentry.s/bpf_fentry_test1")
+__failure __msg("arg#0 expected for bpf_obj_drop_impl()")
+int BPF_PROG(test_array_map_5)
+{
+ struct val_t *p;
+
+ p = bpf_obj_new(struct val_t);
+ if (!p)
+ return 0;
+
+ bpf_percpu_obj_drop(p);
+ return 0;
+}
+
+SEC("?fentry.s/bpf_fentry_test1")
+__failure __msg("bpf_percpu_obj_new type ID argument must be of a struct of scalars")
+int BPF_PROG(test_array_map_6)
+{
+ struct val_with_ptr_t __percpu_kptr *p;
+
+ p = bpf_percpu_obj_new(struct val_with_ptr_t);
+ if (!p)
+ return 0;
+
+ bpf_percpu_obj_drop(p);
+ return 0;
+}
+
+SEC("?fentry.s/bpf_fentry_test1")
+__failure __msg("bpf_percpu_obj_new type ID argument must not contain special fields")
+int BPF_PROG(test_array_map_7)
+{
+ struct val_with_rb_root_t __percpu_kptr *p;
+
+ p = bpf_percpu_obj_new(struct val_with_rb_root_t);
+ if (!p)
+ return 0;
+
+ bpf_percpu_obj_drop(p);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/preempted_bpf_ma_op.c b/tools/testing/selftests/bpf/progs/preempted_bpf_ma_op.c
new file mode 100644
index 000000000000..55907ef961bf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/preempted_bpf_ma_op.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_experimental.h"
+
+struct bin_data {
+ char data[256];
+ struct bpf_spin_lock lock;
+};
+
+struct map_value {
+ struct bin_data __kptr * data;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 2048);
+} array SEC(".maps");
+
+char _license[] SEC("license") = "GPL";
+
+bool nomem_err = false;
+
+static int del_array(unsigned int i, int *from)
+{
+ struct map_value *value;
+ struct bin_data *old;
+
+ value = bpf_map_lookup_elem(&array, from);
+ if (!value)
+ return 1;
+
+ old = bpf_kptr_xchg(&value->data, NULL);
+ if (old)
+ bpf_obj_drop(old);
+
+ (*from)++;
+ return 0;
+}
+
+static int add_array(unsigned int i, int *from)
+{
+ struct bin_data *old, *new;
+ struct map_value *value;
+
+ value = bpf_map_lookup_elem(&array, from);
+ if (!value)
+ return 1;
+
+ new = bpf_obj_new(typeof(*new));
+ if (!new) {
+ nomem_err = true;
+ return 1;
+ }
+
+ old = bpf_kptr_xchg(&value->data, new);
+ if (old)
+ bpf_obj_drop(old);
+
+ (*from)++;
+ return 0;
+}
+
+static void del_then_add_array(int from)
+{
+ int i;
+
+ i = from;
+ bpf_loop(512, del_array, &i, 0);
+
+ i = from;
+ bpf_loop(512, add_array, &i, 0);
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG2(test0, int, a)
+{
+ del_then_add_array(0);
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test2")
+int BPF_PROG2(test1, int, a, u64, b)
+{
+ del_then_add_array(512);
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test3")
+int BPF_PROG2(test2, char, a, int, b, u64, c)
+{
+ del_then_add_array(1024);
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test4")
+int BPF_PROG2(test3, void *, a, char, b, int, c, u64, d)
+{
+ del_then_add_array(1536);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h
index f799d87e8700..897061930cb7 100644
--- a/tools/testing/selftests/bpf/progs/profiler.inc.h
+++ b/tools/testing/selftests/bpf/progs/profiler.inc.h
@@ -609,7 +609,7 @@ out:
}
SEC("tracepoint/syscalls/sys_enter_kill")
-int tracepoint__syscalls__sys_enter_kill(struct trace_event_raw_sys_enter* ctx)
+int tracepoint__syscalls__sys_enter_kill(struct syscall_trace_enter* ctx)
{
struct bpf_func_stats_ctx stats_ctx;
diff --git a/tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c b/tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c
new file mode 100644
index 000000000000..4dfbc8552558
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+
+#include <string.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_kfuncs.h"
+
+__u8 SERVUN_ADDRESS[] = "\0bpf_cgroup_unix_test";
+
+SEC("cgroup/recvmsg_unix")
+int recvmsg_unix_prog(struct bpf_sock_addr *ctx)
+{
+ struct bpf_sock_addr_kern *sa_kern = bpf_cast_to_kern_ctx(ctx);
+ struct sockaddr_un *sa_kern_unaddr;
+ __u32 unaddrlen = offsetof(struct sockaddr_un, sun_path) +
+ sizeof(SERVUN_ADDRESS) - 1;
+ int ret;
+
+ ret = bpf_sock_addr_set_sun_path(sa_kern, SERVUN_ADDRESS,
+ sizeof(SERVUN_ADDRESS) - 1);
+ if (ret)
+ return 1;
+
+ if (sa_kern->uaddrlen != unaddrlen)
+ return 1;
+
+ sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr,
+ bpf_core_type_id_kernel(struct sockaddr_un));
+ if (memcmp(sa_kern_unaddr->sun_path, SERVUN_ADDRESS,
+ sizeof(SERVUN_ADDRESS) - 1) != 0)
+ return 1;
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c b/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c
new file mode 100644
index 000000000000..1f67e832666e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+
+#include <string.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_kfuncs.h"
+
+__u8 SERVUN_REWRITE_ADDRESS[] = "\0bpf_cgroup_unix_test_rewrite";
+
+SEC("cgroup/sendmsg_unix")
+int sendmsg_unix_prog(struct bpf_sock_addr *ctx)
+{
+ struct bpf_sock_addr_kern *sa_kern = bpf_cast_to_kern_ctx(ctx);
+ struct sockaddr_un *sa_kern_unaddr;
+ __u32 unaddrlen = offsetof(struct sockaddr_un, sun_path) +
+ sizeof(SERVUN_REWRITE_ADDRESS) - 1;
+ int ret;
+
+ /* Rewrite destination. */
+ ret = bpf_sock_addr_set_sun_path(sa_kern, SERVUN_REWRITE_ADDRESS,
+ sizeof(SERVUN_REWRITE_ADDRESS) - 1);
+ if (ret)
+ return 0;
+
+ if (sa_kern->uaddrlen != unaddrlen)
+ return 0;
+
+ sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr,
+ bpf_core_type_id_kernel(struct sockaddr_un));
+ if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS,
+ sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0)
+ return 0;
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_fentry.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_fentry.c
new file mode 100644
index 000000000000..8436c6729167
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_fentry.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Leon Hwang */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+int count = 0;
+
+SEC("fentry/subprog_tail")
+int BPF_PROG(fentry, struct sk_buff *skb)
+{
+ count++;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_fexit.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_fexit.c
new file mode 100644
index 000000000000..fe16412c6e6e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_fexit.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Leon Hwang */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+int count = 0;
+
+SEC("fexit/subprog_tail")
+int BPF_PROG(fexit, struct sk_buff *skb)
+{
+ count++;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_ldsx_insn.c b/tools/testing/selftests/bpf/progs/test_ldsx_insn.c
index 67c14ba1e87b..3ddcb3777912 100644
--- a/tools/testing/selftests/bpf/progs/test_ldsx_insn.c
+++ b/tools/testing/selftests/bpf/progs/test_ldsx_insn.c
@@ -6,7 +6,8 @@
#include <bpf/bpf_tracing.h>
#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
- (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
+ defined(__TARGET_ARCH_s390)) && __clang_major__ >= 18
const volatile int skip = 0;
#else
const volatile int skip = 1;
@@ -104,7 +105,11 @@ int _tc(volatile struct __sk_buff *skb)
"%[tmp_mark] = r1"
: [tmp_mark]"=r"(tmp_mark)
: [ctx]"r"(skb),
- [off_mark]"i"(offsetof(struct __sk_buff, mark))
+ [off_mark]"i"(offsetof(struct __sk_buff, mark)
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ + sizeof(skb->mark) - 1
+#endif
+ )
: "r1");
#else
tmp_mark = (char)skb->mark;
diff --git a/tools/testing/selftests/bpf/progs/test_uprobe.c b/tools/testing/selftests/bpf/progs/test_uprobe.c
new file mode 100644
index 000000000000..896c88a4960d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_uprobe.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Hengqi Chen */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+pid_t my_pid = 0;
+
+int test1_result = 0;
+int test2_result = 0;
+int test3_result = 0;
+int test4_result = 0;
+
+SEC("uprobe/./liburandom_read.so:urandlib_api_sameoffset")
+int BPF_UPROBE(test1)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (pid != my_pid)
+ return 0;
+
+ test1_result = 1;
+ return 0;
+}
+
+SEC("uprobe/./liburandom_read.so:urandlib_api_sameoffset@LIBURANDOM_READ_1.0.0")
+int BPF_UPROBE(test2)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (pid != my_pid)
+ return 0;
+
+ test2_result = 1;
+ return 0;
+}
+
+SEC("uretprobe/./liburandom_read.so:urandlib_api_sameoffset@@LIBURANDOM_READ_2.0.0")
+int BPF_URETPROBE(test3, int ret)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (pid != my_pid)
+ return 0;
+
+ test3_result = ret;
+ return 0;
+}
+
+SEC("uprobe")
+int BPF_UPROBE(test4)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (pid != my_pid)
+ return 0;
+
+ test4_result = 1;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_vmlinux.c b/tools/testing/selftests/bpf/progs/test_vmlinux.c
index 4b8e37f7fd06..78b23934d9f8 100644
--- a/tools/testing/selftests/bpf/progs/test_vmlinux.c
+++ b/tools/testing/selftests/bpf/progs/test_vmlinux.c
@@ -16,12 +16,12 @@ bool kprobe_called = false;
bool fentry_called = false;
SEC("tp/syscalls/sys_enter_nanosleep")
-int handle__tp(struct trace_event_raw_sys_enter *args)
+int handle__tp(struct syscall_trace_enter *args)
{
struct __kernel_timespec *ts;
long tv_nsec;
- if (args->id != __NR_nanosleep)
+ if (args->nr != __NR_nanosleep)
return 0;
ts = (void *)args->args[0];
diff --git a/tools/testing/selftests/bpf/progs/timer.c b/tools/testing/selftests/bpf/progs/timer.c
index 9a16d95213e1..8b946c8188c6 100644
--- a/tools/testing/selftests/bpf/progs/timer.c
+++ b/tools/testing/selftests/bpf/progs/timer.c
@@ -51,7 +51,7 @@ struct {
__uint(max_entries, 1);
__type(key, int);
__type(value, struct elem);
-} abs_timer SEC(".maps");
+} abs_timer SEC(".maps"), soft_timer_pinned SEC(".maps"), abs_timer_pinned SEC(".maps");
__u64 bss_data;
__u64 abs_data;
@@ -59,6 +59,8 @@ __u64 err;
__u64 ok;
__u64 callback_check = 52;
__u64 callback2_check = 52;
+__u64 pinned_callback_check;
+__s32 pinned_cpu;
#define ARRAY 1
#define HTAB 2
@@ -329,3 +331,62 @@ int BPF_PROG2(test3, int, a)
return 0;
}
+
+/* callback for pinned timer */
+static int timer_cb_pinned(void *map, int *key, struct bpf_timer *timer)
+{
+ __s32 cpu = bpf_get_smp_processor_id();
+
+ if (cpu != pinned_cpu)
+ err |= 16384;
+
+ pinned_callback_check++;
+ return 0;
+}
+
+static void test_pinned_timer(bool soft)
+{
+ int key = 0;
+ void *map;
+ struct bpf_timer *timer;
+ __u64 flags = BPF_F_TIMER_CPU_PIN;
+ __u64 start_time;
+
+ if (soft) {
+ map = &soft_timer_pinned;
+ start_time = 0;
+ } else {
+ map = &abs_timer_pinned;
+ start_time = bpf_ktime_get_boot_ns();
+ flags |= BPF_F_TIMER_ABS;
+ }
+
+ timer = bpf_map_lookup_elem(map, &key);
+ if (timer) {
+ if (bpf_timer_init(timer, map, CLOCK_BOOTTIME) != 0)
+ err |= 4096;
+ bpf_timer_set_callback(timer, timer_cb_pinned);
+ pinned_cpu = bpf_get_smp_processor_id();
+ bpf_timer_start(timer, start_time + 1000, flags);
+ } else {
+ err |= 8192;
+ }
+}
+
+SEC("fentry/bpf_fentry_test4")
+int BPF_PROG2(test4, int, a)
+{
+ bpf_printk("test4");
+ test_pinned_timer(true);
+
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test5")
+int BPF_PROG2(test5, int, a)
+{
+ bpf_printk("test5");
+ test_pinned_timer(false);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/verifier_bswap.c b/tools/testing/selftests/bpf/progs/verifier_bswap.c
index 8893094725f0..107525fb4a6a 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bswap.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bswap.c
@@ -5,7 +5,9 @@
#include "bpf_misc.h"
#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
- (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
+ defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \
+ __clang_major__ >= 18
SEC("socket")
__description("BSWAP, 16")
diff --git a/tools/testing/selftests/bpf/progs/verifier_gotol.c b/tools/testing/selftests/bpf/progs/verifier_gotol.c
index 2dae5322a18e..9f202eda952f 100644
--- a/tools/testing/selftests/bpf/progs/verifier_gotol.c
+++ b/tools/testing/selftests/bpf/progs/verifier_gotol.c
@@ -5,7 +5,9 @@
#include "bpf_misc.h"
#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
- (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
+ defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \
+ __clang_major__ >= 18
SEC("socket")
__description("gotol, small_imm")
diff --git a/tools/testing/selftests/bpf/progs/verifier_ldsx.c b/tools/testing/selftests/bpf/progs/verifier_ldsx.c
index 0c638f45aaf1..375525329637 100644
--- a/tools/testing/selftests/bpf/progs/verifier_ldsx.c
+++ b/tools/testing/selftests/bpf/progs/verifier_ldsx.c
@@ -5,19 +5,25 @@
#include "bpf_misc.h"
#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
- (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
+ defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \
+ __clang_major__ >= 18
SEC("socket")
__description("LDSX, S8")
__success __success_unpriv __retval(-2)
__naked void ldsx_s8(void)
{
- asm volatile (" \
- r1 = 0x3fe; \
- *(u64 *)(r10 - 8) = r1; \
- r0 = *(s8 *)(r10 - 8); \
- exit; \
-" ::: __clobber_all);
+ asm volatile (
+ "r1 = 0x3fe;"
+ "*(u64 *)(r10 - 8) = r1;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r0 = *(s8 *)(r10 - 8);"
+#else
+ "r0 = *(s8 *)(r10 - 1);"
+#endif
+ "exit;"
+ ::: __clobber_all);
}
SEC("socket")
@@ -25,12 +31,16 @@ __description("LDSX, S16")
__success __success_unpriv __retval(-2)
__naked void ldsx_s16(void)
{
- asm volatile (" \
- r1 = 0x3fffe; \
- *(u64 *)(r10 - 8) = r1; \
- r0 = *(s16 *)(r10 - 8); \
- exit; \
-" ::: __clobber_all);
+ asm volatile (
+ "r1 = 0x3fffe;"
+ "*(u64 *)(r10 - 8) = r1;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r0 = *(s16 *)(r10 - 8);"
+#else
+ "r0 = *(s16 *)(r10 - 2);"
+#endif
+ "exit;"
+ ::: __clobber_all);
}
SEC("socket")
@@ -38,35 +48,43 @@ __description("LDSX, S32")
__success __success_unpriv __retval(-1)
__naked void ldsx_s32(void)
{
- asm volatile (" \
- r1 = 0xfffffffe; \
- *(u64 *)(r10 - 8) = r1; \
- r0 = *(s32 *)(r10 - 8); \
- r0 >>= 1; \
- exit; \
-" ::: __clobber_all);
+ asm volatile (
+ "r1 = 0xfffffffe;"
+ "*(u64 *)(r10 - 8) = r1;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r0 = *(s32 *)(r10 - 8);"
+#else
+ "r0 = *(s32 *)(r10 - 4);"
+#endif
+ "r0 >>= 1;"
+ "exit;"
+ ::: __clobber_all);
}
SEC("socket")
__description("LDSX, S8 range checking, privileged")
__log_level(2) __success __retval(1)
-__msg("R1_w=scalar(smin=-128,smax=127)")
+__msg("R1_w=scalar(smin=smin32=-128,smax=smax32=127)")
__naked void ldsx_s8_range_priv(void)
{
- asm volatile (" \
- call %[bpf_get_prandom_u32]; \
- *(u64 *)(r10 - 8) = r0; \
- r1 = *(s8 *)(r10 - 8); \
- /* r1 with s8 range */ \
- if r1 s> 0x7f goto l0_%=; \
- if r1 s< -0x80 goto l0_%=; \
- r0 = 1; \
-l1_%=: \
- exit; \
-l0_%=: \
- r0 = 2; \
- goto l1_%=; \
-" :
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "*(u64 *)(r10 - 8) = r0;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r1 = *(s8 *)(r10 - 8);"
+#else
+ "r1 = *(s8 *)(r10 - 1);"
+#endif
+ /* r1 with s8 range */
+ "if r1 s> 0x7f goto l0_%=;"
+ "if r1 s< -0x80 goto l0_%=;"
+ "r0 = 1;"
+"l1_%=:"
+ "exit;"
+"l0_%=:"
+ "r0 = 2;"
+ "goto l1_%=;"
+ :
: __imm(bpf_get_prandom_u32)
: __clobber_all);
}
@@ -76,20 +94,24 @@ __description("LDSX, S16 range checking")
__success __success_unpriv __retval(1)
__naked void ldsx_s16_range(void)
{
- asm volatile (" \
- call %[bpf_get_prandom_u32]; \
- *(u64 *)(r10 - 8) = r0; \
- r1 = *(s16 *)(r10 - 8); \
- /* r1 with s16 range */ \
- if r1 s> 0x7fff goto l0_%=; \
- if r1 s< -0x8000 goto l0_%=; \
- r0 = 1; \
-l1_%=: \
- exit; \
-l0_%=: \
- r0 = 2; \
- goto l1_%=; \
-" :
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "*(u64 *)(r10 - 8) = r0;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r1 = *(s16 *)(r10 - 8);"
+#else
+ "r1 = *(s16 *)(r10 - 2);"
+#endif
+ /* r1 with s16 range */
+ "if r1 s> 0x7fff goto l0_%=;"
+ "if r1 s< -0x8000 goto l0_%=;"
+ "r0 = 1;"
+"l1_%=:"
+ "exit;"
+"l0_%=:"
+ "r0 = 2;"
+ "goto l1_%=;"
+ :
: __imm(bpf_get_prandom_u32)
: __clobber_all);
}
@@ -99,20 +121,24 @@ __description("LDSX, S32 range checking")
__success __success_unpriv __retval(1)
__naked void ldsx_s32_range(void)
{
- asm volatile (" \
- call %[bpf_get_prandom_u32]; \
- *(u64 *)(r10 - 8) = r0; \
- r1 = *(s32 *)(r10 - 8); \
- /* r1 with s16 range */ \
- if r1 s> 0x7fffFFFF goto l0_%=; \
- if r1 s< -0x80000000 goto l0_%=; \
- r0 = 1; \
-l1_%=: \
- exit; \
-l0_%=: \
- r0 = 2; \
- goto l1_%=; \
-" :
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "*(u64 *)(r10 - 8) = r0;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r1 = *(s32 *)(r10 - 8);"
+#else
+ "r1 = *(s32 *)(r10 - 4);"
+#endif
+ /* r1 with s16 range */
+ "if r1 s> 0x7fffFFFF goto l0_%=;"
+ "if r1 s< -0x80000000 goto l0_%=;"
+ "r0 = 1;"
+"l1_%=:"
+ "exit;"
+"l0_%=:"
+ "r0 = 2;"
+ "goto l1_%=;"
+ :
: __imm(bpf_get_prandom_u32)
: __clobber_all);
}
diff --git a/tools/testing/selftests/bpf/progs/verifier_movsx.c b/tools/testing/selftests/bpf/progs/verifier_movsx.c
index 3c8ac2c57b1b..b2a04d1179d0 100644
--- a/tools/testing/selftests/bpf/progs/verifier_movsx.c
+++ b/tools/testing/selftests/bpf/progs/verifier_movsx.c
@@ -5,7 +5,9 @@
#include "bpf_misc.h"
#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
- (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
+ defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \
+ __clang_major__ >= 18
SEC("socket")
__description("MOV32SX, S8")
diff --git a/tools/testing/selftests/bpf/progs/verifier_sdiv.c b/tools/testing/selftests/bpf/progs/verifier_sdiv.c
index 0990f8825675..8fc5174808b2 100644
--- a/tools/testing/selftests/bpf/progs/verifier_sdiv.c
+++ b/tools/testing/selftests/bpf/progs/verifier_sdiv.c
@@ -5,7 +5,9 @@
#include "bpf_misc.h"
#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
- (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
+ defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \
+ __clang_major__ >= 18
SEC("socket")
__description("SDIV32, non-zero imm divisor, check 1")
diff --git a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c
index 24369f242853..ccde6a4c6319 100644
--- a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c
+++ b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c
@@ -3,11 +3,12 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-#include "xsk_xdp_metadata.h"
+#include <linux/if_ether.h>
+#include "xsk_xdp_common.h"
struct {
__uint(type, BPF_MAP_TYPE_XSKMAP);
- __uint(max_entries, 1);
+ __uint(max_entries, 2);
__uint(key_size, sizeof(int));
__uint(value_size, sizeof(int));
} xsk SEC(".maps");
@@ -52,4 +53,21 @@ SEC("xdp.frags") int xsk_xdp_populate_metadata(struct xdp_md *xdp)
return bpf_redirect_map(&xsk, 0, XDP_DROP);
}
+SEC("xdp") int xsk_xdp_shared_umem(struct xdp_md *xdp)
+{
+ void *data = (void *)(long)xdp->data;
+ void *data_end = (void *)(long)xdp->data_end;
+ struct ethhdr *eth = data;
+
+ if (eth + 1 > data_end)
+ return XDP_DROP;
+
+ /* Redirecting packets based on the destination MAC address */
+ idx = ((unsigned int)(eth->h_dest[5])) / 2;
+ if (idx > MAX_SOCKETS)
+ return XDP_DROP;
+
+ return bpf_redirect_map(&xsk, idx, XDP_DROP);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
index 0cfece7ff4f8..0ed67b6b31dd 100755
--- a/tools/testing/selftests/bpf/test_bpftool_synctypes.py
+++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
@@ -509,6 +509,15 @@ def main():
source_map_types.remove('cgroup_storage_deprecated')
source_map_types.add('cgroup_storage')
+ # The same applied to BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED and
+ # BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE which share the same enum value
+ # and source_map_types picks
+ # BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED/percpu_cgroup_storage_deprecated.
+ # Replace 'percpu_cgroup_storage_deprecated' with 'percpu_cgroup_storage'
+ # so it aligns with what `bpftool map help` shows.
+ source_map_types.remove('percpu_cgroup_storage_deprecated')
+ source_map_types.add('percpu_cgroup_storage')
+
help_map_types = map_info.get_map_help()
help_map_options = map_info.get_options()
map_info.close()
diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
index b4edd8454934..37ffa57f28a1 100644
--- a/tools/testing/selftests/bpf/test_loader.c
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -69,7 +69,7 @@ static int tester_init(struct test_loader *tester)
{
if (!tester->log_buf) {
tester->log_buf_sz = TEST_LOADER_LOG_BUF_SZ;
- tester->log_buf = malloc(tester->log_buf_sz);
+ tester->log_buf = calloc(tester->log_buf_sz, 1);
if (!ASSERT_OK_PTR(tester->log_buf, "tester_log_buf"))
return -ENOMEM;
}
@@ -538,7 +538,7 @@ void run_subtest(struct test_loader *tester,
bool unpriv)
{
struct test_subspec *subspec = unpriv ? &spec->unpriv : &spec->priv;
- struct bpf_program *tprog, *tprog_iter;
+ struct bpf_program *tprog = NULL, *tprog_iter;
struct test_spec *spec_iter;
struct cap_state caps = {};
struct bpf_object *tobj;
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 4d582cac2c09..1b9387890148 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -255,7 +255,7 @@ static void print_subtest_name(int test_num, int subtest_num,
const char *test_name, char *subtest_name,
char *result)
{
- char test_num_str[TEST_NUM_WIDTH + 1];
+ char test_num_str[32];
snprintf(test_num_str, sizeof(test_num_str), "%d/%d", test_num, subtest_num);
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 77bd492c6024..2f9f6f250f17 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -417,6 +417,8 @@ int get_bpf_max_tramp_links(void);
#define SYS_NANOSLEEP_KPROBE_NAME "__s390x_sys_nanosleep"
#elif defined(__aarch64__)
#define SYS_NANOSLEEP_KPROBE_NAME "__arm64_sys_nanosleep"
+#elif defined(__riscv)
+#define SYS_NANOSLEEP_KPROBE_NAME "__riscv_sys_nanosleep"
#else
#define SYS_NANOSLEEP_KPROBE_NAME "sys_nanosleep"
#endif
diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index 2aa5a3445056..65aafe0003db 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -73,17 +73,33 @@
#
# Run test suite for physical device in loopback mode
# sudo ./test_xsk.sh -i IFACE
+#
+# Run test suite in a specific mode only [skb,drv,zc]
+# sudo ./test_xsk.sh -m MODE
+#
+# List available tests
+# ./test_xsk.sh -l
+#
+# Run a specific test from the test suite
+# sudo ./test_xsk.sh -t TEST_NAME
+#
+# Display the available command line options
+# ./test_xsk.sh -h
. xsk_prereqs.sh
ETH=""
-while getopts "vi:d" flag
+while getopts "vi:dm:lt:h" flag
do
case "${flag}" in
v) verbose=1;;
d) debug=1;;
i) ETH=${OPTARG};;
+ m) MODE=${OPTARG};;
+ l) list=1;;
+ t) TEST=${OPTARG};;
+ h) help=1;;
esac
done
@@ -131,6 +147,16 @@ setup_vethPairs() {
ip link set ${VETH0} up
}
+if [[ $list -eq 1 ]]; then
+ ./${XSKOBJ} -l
+ exit
+fi
+
+if [[ $help -eq 1 ]]; then
+ ./${XSKOBJ}
+ exit
+fi
+
if [ ! -z $ETH ]; then
VETH0=${ETH}
VETH1=${ETH}
@@ -153,6 +179,14 @@ if [[ $verbose -eq 1 ]]; then
ARGS+="-v "
fi
+if [ -n "$MODE" ]; then
+ ARGS+="-m ${MODE} "
+fi
+
+if [ -n "$TEST" ]; then
+ ARGS+="-t ${TEST} "
+fi
+
retval=$?
test_status $retval "${TEST_NAME}"
@@ -175,6 +209,10 @@ else
cleanup_iface ${ETH} ${MTU}
fi
+if [[ $list -eq 1 ]]; then
+ exit
+fi
+
TEST_NAME="XSK_SELFTESTS_${VETH0}_BUSY_POLL"
busy_poll=1
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index f83d9f65c65b..4faa898ff7fc 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -7,6 +7,7 @@
#include <errno.h>
#include <fcntl.h>
#include <poll.h>
+#include <pthread.h>
#include <unistd.h>
#include <linux/perf_event.h>
#include <sys/mman.h>
@@ -14,104 +15,165 @@
#include <linux/limits.h>
#include <libelf.h>
#include <gelf.h>
+#include "bpf/libbpf_internal.h"
#define TRACEFS_PIPE "/sys/kernel/tracing/trace_pipe"
#define DEBUGFS_PIPE "/sys/kernel/debug/tracing/trace_pipe"
-#define MAX_SYMS 400000
-static struct ksym syms[MAX_SYMS];
-static int sym_cnt;
+struct ksyms {
+ struct ksym *syms;
+ size_t sym_cap;
+ size_t sym_cnt;
+};
+
+static struct ksyms *ksyms;
+static pthread_mutex_t ksyms_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static int ksyms__add_symbol(struct ksyms *ksyms, const char *name,
+ unsigned long addr)
+{
+ void *tmp;
+
+ tmp = strdup(name);
+ if (!tmp)
+ return -ENOMEM;
+ ksyms->syms[ksyms->sym_cnt].addr = addr;
+ ksyms->syms[ksyms->sym_cnt].name = tmp;
+ ksyms->sym_cnt++;
+ return 0;
+}
+
+void free_kallsyms_local(struct ksyms *ksyms)
+{
+ unsigned int i;
+
+ if (!ksyms)
+ return;
+
+ if (!ksyms->syms) {
+ free(ksyms);
+ return;
+ }
+
+ for (i = 0; i < ksyms->sym_cnt; i++)
+ free(ksyms->syms[i].name);
+ free(ksyms->syms);
+ free(ksyms);
+}
static int ksym_cmp(const void *p1, const void *p2)
{
return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr;
}
-int load_kallsyms_refresh(void)
+struct ksyms *load_kallsyms_local(void)
{
FILE *f;
char func[256], buf[256];
char symbol;
void *addr;
- int i = 0;
-
- sym_cnt = 0;
+ int ret;
+ struct ksyms *ksyms;
f = fopen("/proc/kallsyms", "r");
if (!f)
- return -ENOENT;
+ return NULL;
+
+ ksyms = calloc(1, sizeof(struct ksyms));
+ if (!ksyms) {
+ fclose(f);
+ return NULL;
+ }
while (fgets(buf, sizeof(buf), f)) {
if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3)
break;
if (!addr)
continue;
- if (i >= MAX_SYMS)
- return -EFBIG;
- syms[i].addr = (long) addr;
- syms[i].name = strdup(func);
- i++;
+ ret = libbpf_ensure_mem((void **) &ksyms->syms, &ksyms->sym_cap,
+ sizeof(struct ksym), ksyms->sym_cnt + 1);
+ if (ret)
+ goto error;
+ ret = ksyms__add_symbol(ksyms, func, (unsigned long)addr);
+ if (ret)
+ goto error;
}
fclose(f);
- sym_cnt = i;
- qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp);
- return 0;
+ qsort(ksyms->syms, ksyms->sym_cnt, sizeof(struct ksym), ksym_cmp);
+ return ksyms;
+
+error:
+ fclose(f);
+ free_kallsyms_local(ksyms);
+ return NULL;
}
int load_kallsyms(void)
{
- /*
- * This is called/used from multiplace places,
- * load symbols just once.
- */
- if (sym_cnt)
- return 0;
- return load_kallsyms_refresh();
+ pthread_mutex_lock(&ksyms_mutex);
+ if (!ksyms)
+ ksyms = load_kallsyms_local();
+ pthread_mutex_unlock(&ksyms_mutex);
+ return ksyms ? 0 : 1;
}
-struct ksym *ksym_search(long key)
+struct ksym *ksym_search_local(struct ksyms *ksyms, long key)
{
- int start = 0, end = sym_cnt;
+ int start = 0, end = ksyms->sym_cnt;
int result;
/* kallsyms not loaded. return NULL */
- if (sym_cnt <= 0)
+ if (ksyms->sym_cnt <= 0)
return NULL;
while (start < end) {
size_t mid = start + (end - start) / 2;
- result = key - syms[mid].addr;
+ result = key - ksyms->syms[mid].addr;
if (result < 0)
end = mid;
else if (result > 0)
start = mid + 1;
else
- return &syms[mid];
+ return &ksyms->syms[mid];
}
- if (start >= 1 && syms[start - 1].addr < key &&
- key < syms[start].addr)
+ if (start >= 1 && ksyms->syms[start - 1].addr < key &&
+ key < ksyms->syms[start].addr)
/* valid ksym */
- return &syms[start - 1];
+ return &ksyms->syms[start - 1];
/* out of range. return _stext */
- return &syms[0];
+ return &ksyms->syms[0];
}
-long ksym_get_addr(const char *name)
+struct ksym *ksym_search(long key)
+{
+ if (!ksyms)
+ return NULL;
+ return ksym_search_local(ksyms, key);
+}
+
+long ksym_get_addr_local(struct ksyms *ksyms, const char *name)
{
int i;
- for (i = 0; i < sym_cnt; i++) {
- if (strcmp(syms[i].name, name) == 0)
- return syms[i].addr;
+ for (i = 0; i < ksyms->sym_cnt; i++) {
+ if (strcmp(ksyms->syms[i].name, name) == 0)
+ return ksyms->syms[i].addr;
}
return 0;
}
+long ksym_get_addr(const char *name)
+{
+ if (!ksyms)
+ return 0;
+ return ksym_get_addr_local(ksyms, name);
+}
+
/* open kallsyms and read symbol addresses on the fly. Without caching all symbols,
* this is faster than load + find.
*/
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
index 876f3e711df6..04fd1da7079d 100644
--- a/tools/testing/selftests/bpf/trace_helpers.h
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -11,13 +11,17 @@ struct ksym {
long addr;
char *name;
};
+struct ksyms;
int load_kallsyms(void);
-int load_kallsyms_refresh(void);
-
struct ksym *ksym_search(long key);
long ksym_get_addr(const char *name);
+struct ksyms *load_kallsyms_local(void);
+struct ksym *ksym_search_local(struct ksyms *ksyms, long key);
+long ksym_get_addr_local(struct ksyms *ksyms, const char *name);
+void free_kallsyms_local(struct ksyms *ksyms);
+
/* open kallsyms and find addresses on the fly, faster than load + search. */
int kallsyms_find(const char *sym, unsigned long long *addr);
diff --git a/tools/testing/selftests/bpf/urandom_read.c b/tools/testing/selftests/bpf/urandom_read.c
index e92644d0fa75..4ed795655b9f 100644
--- a/tools/testing/selftests/bpf/urandom_read.c
+++ b/tools/testing/selftests/bpf/urandom_read.c
@@ -11,6 +11,9 @@
#define _SDT_HAS_SEMAPHORES 1
#include "sdt.h"
+#define SHARED 1
+#include "bpf/libbpf_internal.h"
+
#define SEC(name) __attribute__((section(name), used))
#define BUF_SIZE 256
@@ -21,10 +24,14 @@ void urand_read_without_sema(int iter_num, int iter_cnt, int read_sz);
void urandlib_read_with_sema(int iter_num, int iter_cnt, int read_sz);
void urandlib_read_without_sema(int iter_num, int iter_cnt, int read_sz);
+int urandlib_api(void);
+COMPAT_VERSION(urandlib_api_old, urandlib_api, LIBURANDOM_READ_1.0.0)
+int urandlib_api_old(void);
+int urandlib_api_sameoffset(void);
+
unsigned short urand_read_with_sema_semaphore SEC(".probes");
-static __attribute__((noinline))
-void urandom_read(int fd, int count)
+static noinline void urandom_read(int fd, int count)
{
char buf[BUF_SIZE];
int i;
@@ -83,6 +90,10 @@ int main(int argc, char *argv[])
urandom_read(fd, count);
+ urandlib_api();
+ urandlib_api_old();
+ urandlib_api_sameoffset();
+
close(fd);
return 0;
}
diff --git a/tools/testing/selftests/bpf/urandom_read_lib1.c b/tools/testing/selftests/bpf/urandom_read_lib1.c
index 86186e24b740..8c1356d8b4ee 100644
--- a/tools/testing/selftests/bpf/urandom_read_lib1.c
+++ b/tools/testing/selftests/bpf/urandom_read_lib1.c
@@ -3,6 +3,9 @@
#define _SDT_HAS_SEMAPHORES 1
#include "sdt.h"
+#define SHARED 1
+#include "bpf/libbpf_internal.h"
+
#define SEC(name) __attribute__((section(name), used))
unsigned short urandlib_read_with_sema_semaphore SEC(".probes");
@@ -11,3 +14,22 @@ void urandlib_read_with_sema(int iter_num, int iter_cnt, int read_sz)
{
STAP_PROBE3(urandlib, read_with_sema, iter_num, iter_cnt, read_sz);
}
+
+COMPAT_VERSION(urandlib_api_v1, urandlib_api, LIBURANDOM_READ_1.0.0)
+int urandlib_api_v1(void)
+{
+ return 1;
+}
+
+DEFAULT_VERSION(urandlib_api_v2, urandlib_api, LIBURANDOM_READ_2.0.0)
+int urandlib_api_v2(void)
+{
+ return 2;
+}
+
+COMPAT_VERSION(urandlib_api_sameoffset, urandlib_api_sameoffset, LIBURANDOM_READ_1.0.0)
+DEFAULT_VERSION(urandlib_api_sameoffset, urandlib_api_sameoffset, LIBURANDOM_READ_2.0.0)
+int urandlib_api_sameoffset(void)
+{
+ return 3;
+}
diff --git a/tools/testing/selftests/bpf/xdp_features.c b/tools/testing/selftests/bpf/xdp_features.c
index b449788fbd39..595c79141cf3 100644
--- a/tools/testing/selftests/bpf/xdp_features.c
+++ b/tools/testing/selftests/bpf/xdp_features.c
@@ -360,9 +360,9 @@ static int recv_msg(int sockfd, void *buf, size_t bufsize, void *val,
static int dut_run(struct xdp_features *skel)
{
int flags = XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_DRV_MODE;
- int state, err, *sockfd, ctrl_sockfd, echo_sockfd;
+ int state, err = 0, *sockfd, ctrl_sockfd, echo_sockfd;
struct sockaddr_storage ctrl_addr;
- pthread_t dut_thread;
+ pthread_t dut_thread = 0;
socklen_t addrlen;
sockfd = start_reuseport_server(AF_INET6, SOCK_STREAM, NULL,
diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
index 613321eb84c1..17c980138796 100644
--- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
@@ -234,7 +234,7 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t
struct pollfd fds[rxq + 1];
__u64 comp_addr;
__u64 addr;
- __u32 idx;
+ __u32 idx = 0;
int ret;
int i;
diff --git a/tools/testing/selftests/bpf/xsk.c b/tools/testing/selftests/bpf/xsk.c
index d9fb2b730a2c..e574711eeb84 100644
--- a/tools/testing/selftests/bpf/xsk.c
+++ b/tools/testing/selftests/bpf/xsk.c
@@ -442,10 +442,9 @@ void xsk_clear_xskmap(struct bpf_map *map)
bpf_map_delete_elem(map_fd, &index);
}
-int xsk_update_xskmap(struct bpf_map *map, struct xsk_socket *xsk)
+int xsk_update_xskmap(struct bpf_map *map, struct xsk_socket *xsk, u32 index)
{
int map_fd, sock_fd;
- u32 index = 0;
map_fd = bpf_map__fd(map);
sock_fd = xsk_socket__fd(xsk);
diff --git a/tools/testing/selftests/bpf/xsk.h b/tools/testing/selftests/bpf/xsk.h
index d93200fdaa8d..771570bc3731 100644
--- a/tools/testing/selftests/bpf/xsk.h
+++ b/tools/testing/selftests/bpf/xsk.h
@@ -204,7 +204,7 @@ struct xsk_umem_config {
int xsk_attach_xdp_program(struct bpf_program *prog, int ifindex, u32 xdp_flags);
void xsk_detach_xdp_program(int ifindex, u32 xdp_flags);
-int xsk_update_xskmap(struct bpf_map *map, struct xsk_socket *xsk);
+int xsk_update_xskmap(struct bpf_map *map, struct xsk_socket *xsk, u32 index);
void xsk_clear_xskmap(struct bpf_map *map);
bool xsk_is_in_mode(u32 ifindex, int mode);
diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh
index 29175682c44d..47c7b8064f38 100755
--- a/tools/testing/selftests/bpf/xsk_prereqs.sh
+++ b/tools/testing/selftests/bpf/xsk_prereqs.sh
@@ -83,9 +83,11 @@ exec_xskxceiver()
fi
./${XSKOBJ} -i ${VETH0} -i ${VETH1} ${ARGS}
-
retval=$?
- test_status $retval "${TEST_NAME}"
- statusList+=($retval)
- nameList+=(${TEST_NAME})
+
+ if [[ $list -ne 1 ]]; then
+ test_status $retval "${TEST_NAME}"
+ statusList+=($retval)
+ nameList+=(${TEST_NAME})
+ fi
}
diff --git a/tools/testing/selftests/bpf/xsk_xdp_common.h b/tools/testing/selftests/bpf/xsk_xdp_common.h
new file mode 100644
index 000000000000..5a6f36f07383
--- /dev/null
+++ b/tools/testing/selftests/bpf/xsk_xdp_common.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef XSK_XDP_COMMON_H_
+#define XSK_XDP_COMMON_H_
+
+#define MAX_SOCKETS 2
+
+struct xdp_info {
+ __u64 count;
+} __attribute__((aligned(32)));
+
+#endif /* XSK_XDP_COMMON_H_ */
diff --git a/tools/testing/selftests/bpf/xsk_xdp_metadata.h b/tools/testing/selftests/bpf/xsk_xdp_metadata.h
deleted file mode 100644
index 943133da378a..000000000000
--- a/tools/testing/selftests/bpf/xsk_xdp_metadata.h
+++ /dev/null
@@ -1,5 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-struct xdp_info {
- __u64 count;
-} __attribute__((aligned(32)));
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index 2827f2d7cf30..591ca9637b23 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -80,6 +80,7 @@
#include <linux/if_ether.h>
#include <linux/mman.h>
#include <linux/netdev.h>
+#include <linux/bitmap.h>
#include <arpa/inet.h>
#include <net/if.h>
#include <locale.h>
@@ -102,10 +103,12 @@
#include <bpf/bpf.h>
#include <linux/filter.h>
#include "../kselftest.h"
-#include "xsk_xdp_metadata.h"
+#include "xsk_xdp_common.h"
-static const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62";
-static const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61";
+static bool opt_verbose;
+static bool opt_print_tests;
+static enum test_mode opt_mode = TEST_MODE_ALL;
+static u32 opt_run_test = RUN_ALL_TESTS;
static void __exit_with_error(int error, const char *file, const char *func, int line)
{
@@ -154,10 +157,10 @@ static void write_payload(void *dest, u32 pkt_nb, u32 start, u32 size)
ptr[i] = htonl(pkt_nb << 16 | (i + start));
}
-static void gen_eth_hdr(struct ifobject *ifobject, struct ethhdr *eth_hdr)
+static void gen_eth_hdr(struct xsk_socket_info *xsk, struct ethhdr *eth_hdr)
{
- memcpy(eth_hdr->h_dest, ifobject->dst_mac, ETH_ALEN);
- memcpy(eth_hdr->h_source, ifobject->src_mac, ETH_ALEN);
+ memcpy(eth_hdr->h_dest, xsk->dst_mac, ETH_ALEN);
+ memcpy(eth_hdr->h_source, xsk->src_mac, ETH_ALEN);
eth_hdr->h_proto = htons(ETH_P_LOOPBACK);
}
@@ -255,7 +258,7 @@ static int __xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_i
cfg.bind_flags = ifobject->bind_flags;
if (shared)
cfg.bind_flags |= XDP_SHARED_UMEM;
- if (ifobject->pkt_stream && ifobject->mtu > MAX_ETH_PKT_SIZE)
+ if (ifobject->mtu > MAX_ETH_PKT_SIZE)
cfg.bind_flags |= XDP_USE_SG;
txr = ifobject->tx_on ? &xsk->tx : NULL;
@@ -310,19 +313,28 @@ static struct option long_options[] = {
{"interface", required_argument, 0, 'i'},
{"busy-poll", no_argument, 0, 'b'},
{"verbose", no_argument, 0, 'v'},
+ {"mode", required_argument, 0, 'm'},
+ {"list", no_argument, 0, 'l'},
+ {"test", required_argument, 0, 't'},
+ {"help", no_argument, 0, 'h'},
{0, 0, 0, 0}
};
-static void usage(const char *prog)
+static void print_usage(char **argv)
{
const char *str =
- " Usage: %s [OPTIONS]\n"
+ " Usage: xskxceiver [OPTIONS]\n"
" Options:\n"
" -i, --interface Use interface\n"
" -v, --verbose Verbose output\n"
- " -b, --busy-poll Enable busy poll\n";
+ " -b, --busy-poll Enable busy poll\n"
+ " -m, --mode Run only mode skb, drv, or zc\n"
+ " -l, --list List all available tests\n"
+ " -t, --test Run a specific test. Enter number from -l option.\n"
+ " -h, --help Display this help and exit\n";
- ksft_print_msg(str, prog);
+ ksft_print_msg(str, basename(argv[0]));
+ ksft_exit_xfail();
}
static bool validate_interface(struct ifobject *ifobj)
@@ -342,7 +354,7 @@ static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj
opterr = 0;
for (;;) {
- c = getopt_long(argc, argv, "i:vb", long_options, &option_index);
+ c = getopt_long(argc, argv, "i:vbm:lt:", long_options, &option_index);
if (c == -1)
break;
@@ -371,9 +383,28 @@ static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj
ifobj_tx->busy_poll = true;
ifobj_rx->busy_poll = true;
break;
+ case 'm':
+ if (!strncmp("skb", optarg, strlen(optarg)))
+ opt_mode = TEST_MODE_SKB;
+ else if (!strncmp("drv", optarg, strlen(optarg)))
+ opt_mode = TEST_MODE_DRV;
+ else if (!strncmp("zc", optarg, strlen(optarg)))
+ opt_mode = TEST_MODE_ZC;
+ else
+ print_usage(argv);
+ break;
+ case 'l':
+ opt_print_tests = true;
+ break;
+ case 't':
+ errno = 0;
+ opt_run_test = strtol(optarg, NULL, 0);
+ if (errno)
+ print_usage(argv);
+ break;
+ case 'h':
default:
- usage(basename(argv[0]));
- ksft_exit_xfail();
+ print_usage(argv);
}
}
}
@@ -396,11 +427,9 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
if (i == 0) {
ifobj->rx_on = false;
ifobj->tx_on = true;
- ifobj->pkt_stream = test->tx_pkt_stream_default;
} else {
ifobj->rx_on = true;
ifobj->tx_on = false;
- ifobj->pkt_stream = test->rx_pkt_stream_default;
}
memset(ifobj->umem, 0, sizeof(*ifobj->umem));
@@ -410,6 +439,15 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
for (j = 0; j < MAX_SOCKETS; j++) {
memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j]));
ifobj->xsk_arr[j].rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+ if (i == 0)
+ ifobj->xsk_arr[j].pkt_stream = test->tx_pkt_stream_default;
+ else
+ ifobj->xsk_arr[j].pkt_stream = test->rx_pkt_stream_default;
+
+ memcpy(ifobj->xsk_arr[j].src_mac, g_mac, ETH_ALEN);
+ memcpy(ifobj->xsk_arr[j].dst_mac, g_mac, ETH_ALEN);
+ ifobj->xsk_arr[j].src_mac[5] += ((j * 2) + 0);
+ ifobj->xsk_arr[j].dst_mac[5] += ((j * 2) + 1);
}
}
@@ -427,7 +465,8 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
}
static void test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
- struct ifobject *ifobj_rx, enum test_mode mode)
+ struct ifobject *ifobj_rx, enum test_mode mode,
+ const struct test_spec *test_to_run)
{
struct pkt_stream *tx_pkt_stream;
struct pkt_stream *rx_pkt_stream;
@@ -449,6 +488,8 @@ static void test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
ifobj->bind_flags |= XDP_COPY;
}
+ strncpy(test->name, test_to_run->name, MAX_TEST_NAME_SIZE);
+ test->test_func = test_to_run->test_func;
test->mode = mode;
__test_spec_init(test, ifobj_tx, ifobj_rx);
}
@@ -458,11 +499,6 @@ static void test_spec_reset(struct test_spec *test)
__test_spec_init(test, test->ifobj_tx, test->ifobj_rx);
}
-static void test_spec_set_name(struct test_spec *test, const char *name)
-{
- strncpy(test->name, name, MAX_TEST_NAME_SIZE);
-}
-
static void test_spec_set_xdp_prog(struct test_spec *test, struct bpf_program *xdp_prog_rx,
struct bpf_program *xdp_prog_tx, struct bpf_map *xskmap_rx,
struct bpf_map *xskmap_tx)
@@ -495,8 +531,10 @@ static int test_spec_set_mtu(struct test_spec *test, int mtu)
static void pkt_stream_reset(struct pkt_stream *pkt_stream)
{
- if (pkt_stream)
+ if (pkt_stream) {
pkt_stream->current_pkt_nb = 0;
+ pkt_stream->nb_rx_pkts = 0;
+ }
}
static struct pkt *pkt_stream_get_next_tx_pkt(struct pkt_stream *pkt_stream)
@@ -526,17 +564,17 @@ static void pkt_stream_delete(struct pkt_stream *pkt_stream)
static void pkt_stream_restore_default(struct test_spec *test)
{
- struct pkt_stream *tx_pkt_stream = test->ifobj_tx->pkt_stream;
- struct pkt_stream *rx_pkt_stream = test->ifobj_rx->pkt_stream;
+ struct pkt_stream *tx_pkt_stream = test->ifobj_tx->xsk->pkt_stream;
+ struct pkt_stream *rx_pkt_stream = test->ifobj_rx->xsk->pkt_stream;
if (tx_pkt_stream != test->tx_pkt_stream_default) {
- pkt_stream_delete(test->ifobj_tx->pkt_stream);
- test->ifobj_tx->pkt_stream = test->tx_pkt_stream_default;
+ pkt_stream_delete(test->ifobj_tx->xsk->pkt_stream);
+ test->ifobj_tx->xsk->pkt_stream = test->tx_pkt_stream_default;
}
if (rx_pkt_stream != test->rx_pkt_stream_default) {
- pkt_stream_delete(test->ifobj_rx->pkt_stream);
- test->ifobj_rx->pkt_stream = test->rx_pkt_stream_default;
+ pkt_stream_delete(test->ifobj_rx->xsk->pkt_stream);
+ test->ifobj_rx->xsk->pkt_stream = test->rx_pkt_stream_default;
}
}
@@ -596,14 +634,16 @@ static u32 pkt_nb_frags(u32 frame_size, struct pkt_stream *pkt_stream, struct pk
return nb_frags;
}
-static void pkt_set(struct xsk_umem_info *umem, struct pkt *pkt, int offset, u32 len)
+static void pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len)
{
pkt->offset = offset;
pkt->len = len;
- if (len > MAX_ETH_JUMBO_SIZE)
+ if (len > MAX_ETH_JUMBO_SIZE) {
pkt->valid = false;
- else
+ } else {
pkt->valid = true;
+ pkt_stream->nb_valid_entries++;
+ }
}
static u32 pkt_get_buffer_len(struct xsk_umem_info *umem, u32 len)
@@ -611,7 +651,7 @@ static u32 pkt_get_buffer_len(struct xsk_umem_info *umem, u32 len)
return ceil_u32(len, umem->frame_size) * umem->frame_size;
}
-static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb_pkts, u32 pkt_len)
+static struct pkt_stream *__pkt_stream_generate(u32 nb_pkts, u32 pkt_len, u32 nb_start, u32 nb_off)
{
struct pkt_stream *pkt_stream;
u32 i;
@@ -625,41 +665,45 @@ static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb
for (i = 0; i < nb_pkts; i++) {
struct pkt *pkt = &pkt_stream->pkts[i];
- pkt_set(umem, pkt, 0, pkt_len);
- pkt->pkt_nb = i;
+ pkt_set(pkt_stream, pkt, 0, pkt_len);
+ pkt->pkt_nb = nb_start + i * nb_off;
}
return pkt_stream;
}
-static struct pkt_stream *pkt_stream_clone(struct xsk_umem_info *umem,
- struct pkt_stream *pkt_stream)
+static struct pkt_stream *pkt_stream_generate(u32 nb_pkts, u32 pkt_len)
{
- return pkt_stream_generate(umem, pkt_stream->nb_pkts, pkt_stream->pkts[0].len);
+ return __pkt_stream_generate(nb_pkts, pkt_len, 0, 1);
+}
+
+static struct pkt_stream *pkt_stream_clone(struct pkt_stream *pkt_stream)
+{
+ return pkt_stream_generate(pkt_stream->nb_pkts, pkt_stream->pkts[0].len);
}
static void pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len)
{
struct pkt_stream *pkt_stream;
- pkt_stream = pkt_stream_generate(test->ifobj_tx->umem, nb_pkts, pkt_len);
- test->ifobj_tx->pkt_stream = pkt_stream;
- pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, nb_pkts, pkt_len);
- test->ifobj_rx->pkt_stream = pkt_stream;
+ pkt_stream = pkt_stream_generate(nb_pkts, pkt_len);
+ test->ifobj_tx->xsk->pkt_stream = pkt_stream;
+ pkt_stream = pkt_stream_generate(nb_pkts, pkt_len);
+ test->ifobj_rx->xsk->pkt_stream = pkt_stream;
}
static void __pkt_stream_replace_half(struct ifobject *ifobj, u32 pkt_len,
int offset)
{
- struct xsk_umem_info *umem = ifobj->umem;
struct pkt_stream *pkt_stream;
u32 i;
- pkt_stream = pkt_stream_clone(umem, ifobj->pkt_stream);
- for (i = 1; i < ifobj->pkt_stream->nb_pkts; i += 2)
- pkt_set(umem, &pkt_stream->pkts[i], offset, pkt_len);
+ pkt_stream = pkt_stream_clone(ifobj->xsk->pkt_stream);
+ for (i = 1; i < ifobj->xsk->pkt_stream->nb_pkts; i += 2)
+ pkt_set(pkt_stream, &pkt_stream->pkts[i], offset, pkt_len);
- ifobj->pkt_stream = pkt_stream;
+ ifobj->xsk->pkt_stream = pkt_stream;
+ pkt_stream->nb_valid_entries /= 2;
}
static void pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int offset)
@@ -670,15 +714,34 @@ static void pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int off
static void pkt_stream_receive_half(struct test_spec *test)
{
- struct xsk_umem_info *umem = test->ifobj_rx->umem;
- struct pkt_stream *pkt_stream = test->ifobj_tx->pkt_stream;
+ struct pkt_stream *pkt_stream = test->ifobj_tx->xsk->pkt_stream;
u32 i;
- test->ifobj_rx->pkt_stream = pkt_stream_generate(umem, pkt_stream->nb_pkts,
- pkt_stream->pkts[0].len);
- pkt_stream = test->ifobj_rx->pkt_stream;
+ test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(pkt_stream->nb_pkts,
+ pkt_stream->pkts[0].len);
+ pkt_stream = test->ifobj_rx->xsk->pkt_stream;
for (i = 1; i < pkt_stream->nb_pkts; i += 2)
pkt_stream->pkts[i].valid = false;
+
+ pkt_stream->nb_valid_entries /= 2;
+}
+
+static void pkt_stream_even_odd_sequence(struct test_spec *test)
+{
+ struct pkt_stream *pkt_stream;
+ u32 i;
+
+ for (i = 0; i < test->nb_sockets; i++) {
+ pkt_stream = test->ifobj_tx->xsk_arr[i].pkt_stream;
+ pkt_stream = __pkt_stream_generate(pkt_stream->nb_pkts / 2,
+ pkt_stream->pkts[0].len, i, 2);
+ test->ifobj_tx->xsk_arr[i].pkt_stream = pkt_stream;
+
+ pkt_stream = test->ifobj_rx->xsk_arr[i].pkt_stream;
+ pkt_stream = __pkt_stream_generate(pkt_stream->nb_pkts / 2,
+ pkt_stream->pkts[0].len, i, 2);
+ test->ifobj_rx->xsk_arr[i].pkt_stream = pkt_stream;
+ }
}
static u64 pkt_get_addr(struct pkt *pkt, struct xsk_umem_info *umem)
@@ -693,16 +756,16 @@ static void pkt_stream_cancel(struct pkt_stream *pkt_stream)
pkt_stream->current_pkt_nb--;
}
-static void pkt_generate(struct ifobject *ifobject, u64 addr, u32 len, u32 pkt_nb,
- u32 bytes_written)
+static void pkt_generate(struct xsk_socket_info *xsk, struct xsk_umem_info *umem, u64 addr, u32 len,
+ u32 pkt_nb, u32 bytes_written)
{
- void *data = xsk_umem__get_data(ifobject->umem->buffer, addr);
+ void *data = xsk_umem__get_data(umem->buffer, addr);
if (len < MIN_PKT_SIZE)
return;
if (!bytes_written) {
- gen_eth_hdr(ifobject, data);
+ gen_eth_hdr(xsk, data);
len -= PKT_HDR_SIZE;
data += PKT_HDR_SIZE;
@@ -747,8 +810,15 @@ static struct pkt_stream *__pkt_stream_generate_custom(struct ifobject *ifobj, s
len = 0;
}
+ print_verbose("offset: %d len: %u valid: %u options: %u pkt_nb: %u\n",
+ pkt->offset, pkt->len, pkt->valid, pkt->options, pkt->pkt_nb);
+
if (pkt->valid && pkt->len > pkt_stream->max_pkt_len)
pkt_stream->max_pkt_len = pkt->len;
+
+ if (pkt->valid)
+ pkt_stream->nb_valid_entries++;
+
pkt_nb++;
}
@@ -762,10 +832,10 @@ static void pkt_stream_generate_custom(struct test_spec *test, struct pkt *pkts,
struct pkt_stream *pkt_stream;
pkt_stream = __pkt_stream_generate_custom(test->ifobj_tx, pkts, nb_pkts, true);
- test->ifobj_tx->pkt_stream = pkt_stream;
+ test->ifobj_tx->xsk->pkt_stream = pkt_stream;
pkt_stream = __pkt_stream_generate_custom(test->ifobj_rx, pkts, nb_pkts, false);
- test->ifobj_rx->pkt_stream = pkt_stream;
+ test->ifobj_rx->xsk->pkt_stream = pkt_stream;
}
static void pkt_print_data(u32 *data, u32 cnt)
@@ -777,7 +847,7 @@ static void pkt_print_data(u32 *data, u32 cnt)
seqnum = ntohl(*data) & 0xffff;
pkt_nb = ntohl(*data) >> 16;
- fprintf(stdout, "%u:%u ", pkt_nb, seqnum);
+ ksft_print_msg("%u:%u ", pkt_nb, seqnum);
data++;
}
}
@@ -789,13 +859,13 @@ static void pkt_dump(void *pkt, u32 len, bool eth_header)
if (eth_header) {
/*extract L2 frame */
- fprintf(stdout, "DEBUG>> L2: dst mac: ");
+ ksft_print_msg("DEBUG>> L2: dst mac: ");
for (i = 0; i < ETH_ALEN; i++)
- fprintf(stdout, "%02X", ethhdr->h_dest[i]);
+ ksft_print_msg("%02X", ethhdr->h_dest[i]);
- fprintf(stdout, "\nDEBUG>> L2: src mac: ");
+ ksft_print_msg("\nDEBUG>> L2: src mac: ");
for (i = 0; i < ETH_ALEN; i++)
- fprintf(stdout, "%02X", ethhdr->h_source[i]);
+ ksft_print_msg("%02X", ethhdr->h_source[i]);
data = pkt + PKT_HDR_SIZE;
} else {
@@ -803,15 +873,15 @@ static void pkt_dump(void *pkt, u32 len, bool eth_header)
}
/*extract L5 frame */
- fprintf(stdout, "\nDEBUG>> L5: seqnum: ");
+ ksft_print_msg("\nDEBUG>> L5: seqnum: ");
pkt_print_data(data, PKT_DUMP_NB_TO_PRINT);
- fprintf(stdout, "....");
+ ksft_print_msg("....");
if (len > PKT_DUMP_NB_TO_PRINT * sizeof(u32)) {
- fprintf(stdout, "\n.... ");
+ ksft_print_msg("\n.... ");
pkt_print_data(data + len / sizeof(u32) - PKT_DUMP_NB_TO_PRINT,
PKT_DUMP_NB_TO_PRINT);
}
- fprintf(stdout, "\n---------------------------------------\n");
+ ksft_print_msg("\n---------------------------------------\n");
}
static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr)
@@ -916,36 +986,42 @@ static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len)
return true;
}
-static void kick_tx(struct xsk_socket_info *xsk)
+static int kick_tx(struct xsk_socket_info *xsk)
{
int ret;
ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
if (ret >= 0)
- return;
+ return TEST_PASS;
if (errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN) {
usleep(100);
- return;
+ return TEST_PASS;
}
- exit_with_error(errno);
+ return TEST_FAILURE;
}
-static void kick_rx(struct xsk_socket_info *xsk)
+static int kick_rx(struct xsk_socket_info *xsk)
{
int ret;
ret = recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL);
if (ret < 0)
- exit_with_error(errno);
+ return TEST_FAILURE;
+
+ return TEST_PASS;
}
static int complete_pkts(struct xsk_socket_info *xsk, int batch_size)
{
unsigned int rcvd;
u32 idx;
+ int ret;
- if (xsk_ring_prod__needs_wakeup(&xsk->tx))
- kick_tx(xsk);
+ if (xsk_ring_prod__needs_wakeup(&xsk->tx)) {
+ ret = kick_tx(xsk);
+ if (ret)
+ return TEST_FAILURE;
+ }
rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx);
if (rcvd) {
@@ -964,153 +1040,207 @@ static int complete_pkts(struct xsk_socket_info *xsk, int batch_size)
return TEST_PASS;
}
-static int receive_pkts(struct test_spec *test, struct pollfd *fds)
+static int __receive_pkts(struct test_spec *test, struct xsk_socket_info *xsk)
{
- struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0};
- struct pkt_stream *pkt_stream = test->ifobj_rx->pkt_stream;
- struct xsk_socket_info *xsk = test->ifobj_rx->xsk;
+ u32 frags_processed = 0, nb_frags = 0, pkt_len = 0;
u32 idx_rx = 0, idx_fq = 0, rcvd, pkts_sent = 0;
+ struct pkt_stream *pkt_stream = xsk->pkt_stream;
struct ifobject *ifobj = test->ifobj_rx;
struct xsk_umem_info *umem = xsk->umem;
+ struct pollfd fds = { };
struct pkt *pkt;
+ u64 first_addr = 0;
int ret;
- ret = gettimeofday(&tv_now, NULL);
- if (ret)
- exit_with_error(errno);
- timeradd(&tv_now, &tv_timeout, &tv_end);
+ fds.fd = xsk_socket__fd(xsk->xsk);
+ fds.events = POLLIN;
- pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent);
- while (pkt) {
- u32 frags_processed = 0, nb_frags = 0, pkt_len = 0;
- u64 first_addr;
+ ret = kick_rx(xsk);
+ if (ret)
+ return TEST_FAILURE;
- ret = gettimeofday(&tv_now, NULL);
- if (ret)
- exit_with_error(errno);
- if (timercmp(&tv_now, &tv_end, >)) {
- ksft_print_msg("ERROR: [%s] Receive loop timed out\n", __func__);
+ if (ifobj->use_poll) {
+ ret = poll(&fds, 1, POLL_TMOUT);
+ if (ret < 0)
return TEST_FAILURE;
- }
-
- kick_rx(xsk);
- if (ifobj->use_poll) {
- ret = poll(fds, 1, POLL_TMOUT);
- if (ret < 0)
- exit_with_error(errno);
- if (!ret) {
- if (!is_umem_valid(test->ifobj_tx))
- return TEST_PASS;
-
- ksft_print_msg("ERROR: [%s] Poll timed out\n", __func__);
- return TEST_FAILURE;
- }
+ if (!ret) {
+ if (!is_umem_valid(test->ifobj_tx))
+ return TEST_PASS;
- if (!(fds->revents & POLLIN))
- continue;
+ ksft_print_msg("ERROR: [%s] Poll timed out\n", __func__);
+ return TEST_CONTINUE;
}
- rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
- if (!rcvd)
- continue;
+ if (!(fds.revents & POLLIN))
+ return TEST_CONTINUE;
+ }
- if (ifobj->use_fill_ring) {
- ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
- while (ret != rcvd) {
+ rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
+ if (!rcvd)
+ return TEST_CONTINUE;
+
+ if (ifobj->use_fill_ring) {
+ ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
+ while (ret != rcvd) {
+ if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
+ ret = poll(&fds, 1, POLL_TMOUT);
if (ret < 0)
- exit_with_error(-ret);
- if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
- ret = poll(fds, 1, POLL_TMOUT);
- if (ret < 0)
- exit_with_error(errno);
- }
- ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
+ return TEST_FAILURE;
}
+ ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
}
+ }
- while (frags_processed < rcvd) {
- const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++);
- u64 addr = desc->addr, orig;
+ while (frags_processed < rcvd) {
+ const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++);
+ u64 addr = desc->addr, orig;
- orig = xsk_umem__extract_addr(addr);
- addr = xsk_umem__add_offset_to_addr(addr);
+ orig = xsk_umem__extract_addr(addr);
+ addr = xsk_umem__add_offset_to_addr(addr);
+ if (!nb_frags) {
+ pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent);
if (!pkt) {
ksft_print_msg("[%s] received too many packets addr: %lx len %u\n",
__func__, addr, desc->len);
return TEST_FAILURE;
}
+ }
- if (!is_frag_valid(umem, addr, desc->len, pkt->pkt_nb, pkt_len) ||
- !is_offset_correct(umem, pkt, addr) ||
- (ifobj->use_metadata && !is_metadata_correct(pkt, umem->buffer, addr)))
- return TEST_FAILURE;
+ print_verbose("Rx: addr: %lx len: %u options: %u pkt_nb: %u valid: %u\n",
+ addr, desc->len, desc->options, pkt->pkt_nb, pkt->valid);
- if (!nb_frags++)
- first_addr = addr;
- frags_processed++;
- pkt_len += desc->len;
- if (ifobj->use_fill_ring)
- *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig;
+ if (!is_frag_valid(umem, addr, desc->len, pkt->pkt_nb, pkt_len) ||
+ !is_offset_correct(umem, pkt, addr) || (ifobj->use_metadata &&
+ !is_metadata_correct(pkt, umem->buffer, addr)))
+ return TEST_FAILURE;
- if (pkt_continues(desc->options))
- continue;
+ if (!nb_frags++)
+ first_addr = addr;
+ frags_processed++;
+ pkt_len += desc->len;
+ if (ifobj->use_fill_ring)
+ *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig;
- /* The complete packet has been received */
- if (!is_pkt_valid(pkt, umem->buffer, first_addr, pkt_len) ||
- !is_offset_correct(umem, pkt, addr))
- return TEST_FAILURE;
+ if (pkt_continues(desc->options))
+ continue;
- pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent);
- nb_frags = 0;
- pkt_len = 0;
- }
+ /* The complete packet has been received */
+ if (!is_pkt_valid(pkt, umem->buffer, first_addr, pkt_len) ||
+ !is_offset_correct(umem, pkt, addr))
+ return TEST_FAILURE;
- if (nb_frags) {
- /* In the middle of a packet. Start over from beginning of packet. */
- idx_rx -= nb_frags;
- xsk_ring_cons__cancel(&xsk->rx, nb_frags);
- if (ifobj->use_fill_ring) {
- idx_fq -= nb_frags;
- xsk_ring_prod__cancel(&umem->fq, nb_frags);
- }
- frags_processed -= nb_frags;
+ pkt_stream->nb_rx_pkts++;
+ nb_frags = 0;
+ pkt_len = 0;
+ }
+
+ if (nb_frags) {
+ /* In the middle of a packet. Start over from beginning of packet. */
+ idx_rx -= nb_frags;
+ xsk_ring_cons__cancel(&xsk->rx, nb_frags);
+ if (ifobj->use_fill_ring) {
+ idx_fq -= nb_frags;
+ xsk_ring_prod__cancel(&umem->fq, nb_frags);
}
+ frags_processed -= nb_frags;
+ }
- if (ifobj->use_fill_ring)
- xsk_ring_prod__submit(&umem->fq, frags_processed);
- if (ifobj->release_rx)
- xsk_ring_cons__release(&xsk->rx, frags_processed);
+ if (ifobj->use_fill_ring)
+ xsk_ring_prod__submit(&umem->fq, frags_processed);
+ if (ifobj->release_rx)
+ xsk_ring_cons__release(&xsk->rx, frags_processed);
+
+ pthread_mutex_lock(&pacing_mutex);
+ pkts_in_flight -= pkts_sent;
+ pthread_mutex_unlock(&pacing_mutex);
+ pkts_sent = 0;
+
+return TEST_CONTINUE;
+}
+
+bool all_packets_received(struct test_spec *test, struct xsk_socket_info *xsk, u32 sock_num,
+ unsigned long *bitmap)
+{
+ struct pkt_stream *pkt_stream = xsk->pkt_stream;
+
+ if (!pkt_stream) {
+ __set_bit(sock_num, bitmap);
+ return false;
+ }
+
+ if (pkt_stream->nb_rx_pkts == pkt_stream->nb_valid_entries) {
+ __set_bit(sock_num, bitmap);
+ if (bitmap_full(bitmap, test->nb_sockets))
+ return true;
+ }
+
+ return false;
+}
+
+static int receive_pkts(struct test_spec *test)
+{
+ struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0};
+ DECLARE_BITMAP(bitmap, test->nb_sockets);
+ struct xsk_socket_info *xsk;
+ u32 sock_num = 0;
+ int res, ret;
- pthread_mutex_lock(&pacing_mutex);
- pkts_in_flight -= pkts_sent;
- pthread_mutex_unlock(&pacing_mutex);
- pkts_sent = 0;
+ ret = gettimeofday(&tv_now, NULL);
+ if (ret)
+ exit_with_error(errno);
+
+ timeradd(&tv_now, &tv_timeout, &tv_end);
+
+ while (1) {
+ xsk = &test->ifobj_rx->xsk_arr[sock_num];
+
+ if ((all_packets_received(test, xsk, sock_num, bitmap)))
+ break;
+
+ res = __receive_pkts(test, xsk);
+ if (!(res == TEST_PASS || res == TEST_CONTINUE))
+ return res;
+
+ ret = gettimeofday(&tv_now, NULL);
+ if (ret)
+ exit_with_error(errno);
+
+ if (timercmp(&tv_now, &tv_end, >)) {
+ ksft_print_msg("ERROR: [%s] Receive loop timed out\n", __func__);
+ return TEST_FAILURE;
+ }
+ sock_num = (sock_num + 1) % test->nb_sockets;
}
return TEST_PASS;
}
-static int __send_pkts(struct ifobject *ifobject, struct pollfd *fds, bool timeout)
+static int __send_pkts(struct ifobject *ifobject, struct xsk_socket_info *xsk, bool timeout)
{
u32 i, idx = 0, valid_pkts = 0, valid_frags = 0, buffer_len;
- struct pkt_stream *pkt_stream = ifobject->pkt_stream;
- struct xsk_socket_info *xsk = ifobject->xsk;
+ struct pkt_stream *pkt_stream = xsk->pkt_stream;
struct xsk_umem_info *umem = ifobject->umem;
bool use_poll = ifobject->use_poll;
+ struct pollfd fds = { };
int ret;
buffer_len = pkt_get_buffer_len(umem, pkt_stream->max_pkt_len);
/* pkts_in_flight might be negative if many invalid packets are sent */
if (pkts_in_flight >= (int)((umem_size(umem) - BATCH_SIZE * buffer_len) / buffer_len)) {
- kick_tx(xsk);
+ ret = kick_tx(xsk);
+ if (ret)
+ return TEST_FAILURE;
return TEST_CONTINUE;
}
+ fds.fd = xsk_socket__fd(xsk->xsk);
+ fds.events = POLLOUT;
+
while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE) {
if (use_poll) {
- ret = poll(fds, 1, POLL_TMOUT);
+ ret = poll(&fds, 1, POLL_TMOUT);
if (timeout) {
if (ret < 0) {
ksft_print_msg("ERROR: [%s] Poll error %d\n",
@@ -1161,10 +1291,13 @@ static int __send_pkts(struct ifobject *ifobject, struct pollfd *fds, bool timeo
tx_desc->options = 0;
}
if (pkt->valid)
- pkt_generate(ifobject, tx_desc->addr, tx_desc->len, pkt->pkt_nb,
+ pkt_generate(xsk, umem, tx_desc->addr, tx_desc->len, pkt->pkt_nb,
bytes_written);
bytes_written += tx_desc->len;
+ print_verbose("Tx addr: %llx len: %u options: %u pkt_nb: %u\n",
+ tx_desc->addr, tx_desc->len, tx_desc->options, pkt->pkt_nb);
+
if (nb_frags_left) {
i++;
if (pkt_stream->verbatim)
@@ -1186,7 +1319,7 @@ static int __send_pkts(struct ifobject *ifobject, struct pollfd *fds, bool timeo
xsk->outstanding_tx += valid_frags;
if (use_poll) {
- ret = poll(fds, 1, POLL_TMOUT);
+ ret = poll(&fds, 1, POLL_TMOUT);
if (ret <= 0) {
if (ret == 0 && timeout)
return TEST_PASS;
@@ -1207,33 +1340,67 @@ static int __send_pkts(struct ifobject *ifobject, struct pollfd *fds, bool timeo
return TEST_CONTINUE;
}
-static void wait_for_tx_completion(struct xsk_socket_info *xsk)
+static int wait_for_tx_completion(struct xsk_socket_info *xsk)
{
- while (xsk->outstanding_tx)
+ struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0};
+ int ret;
+
+ ret = gettimeofday(&tv_now, NULL);
+ if (ret)
+ exit_with_error(errno);
+ timeradd(&tv_now, &tv_timeout, &tv_end);
+
+ while (xsk->outstanding_tx) {
+ ret = gettimeofday(&tv_now, NULL);
+ if (ret)
+ exit_with_error(errno);
+ if (timercmp(&tv_now, &tv_end, >)) {
+ ksft_print_msg("ERROR: [%s] Transmission loop timed out\n", __func__);
+ return TEST_FAILURE;
+ }
+
complete_pkts(xsk, BATCH_SIZE);
+ }
+
+ return TEST_PASS;
+}
+
+bool all_packets_sent(struct test_spec *test, unsigned long *bitmap)
+{
+ return bitmap_full(bitmap, test->nb_sockets);
}
static int send_pkts(struct test_spec *test, struct ifobject *ifobject)
{
- struct pkt_stream *pkt_stream = ifobject->pkt_stream;
bool timeout = !is_umem_valid(test->ifobj_rx);
- struct pollfd fds = { };
- u32 ret;
+ DECLARE_BITMAP(bitmap, test->nb_sockets);
+ u32 i, ret;
- fds.fd = xsk_socket__fd(ifobject->xsk->xsk);
- fds.events = POLLOUT;
+ while (!(all_packets_sent(test, bitmap))) {
+ for (i = 0; i < test->nb_sockets; i++) {
+ struct pkt_stream *pkt_stream;
- while (pkt_stream->current_pkt_nb < pkt_stream->nb_pkts) {
- ret = __send_pkts(ifobject, &fds, timeout);
- if (ret == TEST_CONTINUE && !test->fail)
- continue;
- if ((ret || test->fail) && !timeout)
- return TEST_FAILURE;
- if (ret == TEST_PASS && timeout)
- return ret;
+ pkt_stream = ifobject->xsk_arr[i].pkt_stream;
+ if (!pkt_stream || pkt_stream->current_pkt_nb >= pkt_stream->nb_pkts) {
+ __set_bit(i, bitmap);
+ continue;
+ }
+ ret = __send_pkts(ifobject, &ifobject->xsk_arr[i], timeout);
+ if (ret == TEST_CONTINUE && !test->fail)
+ continue;
+
+ if ((ret || test->fail) && !timeout)
+ return TEST_FAILURE;
+
+ if (ret == TEST_PASS && timeout)
+ return ret;
+
+ ret = wait_for_tx_completion(&ifobject->xsk_arr[i]);
+ if (ret)
+ return TEST_FAILURE;
+ }
}
- wait_for_tx_completion(ifobject->xsk);
return TEST_PASS;
}
@@ -1266,7 +1433,9 @@ static int validate_rx_dropped(struct ifobject *ifobject)
struct xdp_statistics stats;
int err;
- kick_rx(ifobject->xsk);
+ err = kick_rx(ifobject->xsk);
+ if (err)
+ return TEST_FAILURE;
err = get_xsk_stats(xsk, &stats);
if (err)
@@ -1278,8 +1447,8 @@ static int validate_rx_dropped(struct ifobject *ifobject)
* packet being invalid). Since the last packet may or may not have
* been dropped already, both outcomes must be allowed.
*/
- if (stats.rx_dropped == ifobject->pkt_stream->nb_pkts / 2 ||
- stats.rx_dropped == ifobject->pkt_stream->nb_pkts / 2 - 1)
+ if (stats.rx_dropped == ifobject->xsk->pkt_stream->nb_pkts / 2 ||
+ stats.rx_dropped == ifobject->xsk->pkt_stream->nb_pkts / 2 - 1)
return TEST_PASS;
return TEST_FAILURE;
@@ -1292,7 +1461,9 @@ static int validate_rx_full(struct ifobject *ifobject)
int err;
usleep(1000);
- kick_rx(ifobject->xsk);
+ err = kick_rx(ifobject->xsk);
+ if (err)
+ return TEST_FAILURE;
err = get_xsk_stats(xsk, &stats);
if (err)
@@ -1311,7 +1482,9 @@ static int validate_fill_empty(struct ifobject *ifobject)
int err;
usleep(1000);
- kick_rx(ifobject->xsk);
+ err = kick_rx(ifobject->xsk);
+ if (err)
+ return TEST_FAILURE;
err = get_xsk_stats(xsk, &stats);
if (err)
@@ -1339,9 +1512,10 @@ static int validate_tx_invalid_descs(struct ifobject *ifobject)
return TEST_FAILURE;
}
- if (stats.tx_invalid_descs != ifobject->pkt_stream->nb_pkts / 2) {
+ if (stats.tx_invalid_descs != ifobject->xsk->pkt_stream->nb_pkts / 2) {
ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%u] expected [%u]\n",
- __func__, stats.tx_invalid_descs, ifobject->pkt_stream->nb_pkts);
+ __func__, stats.tx_invalid_descs,
+ ifobject->xsk->pkt_stream->nb_pkts);
return TEST_FAILURE;
}
@@ -1433,6 +1607,7 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
LIBBPF_OPTS(bpf_xdp_query_opts, opts);
void *bufs;
int ret;
+ u32 i;
if (ifobject->umem->unaligned_mode)
mmap_flags |= MAP_HUGETLB | MAP_HUGE_2MB;
@@ -1455,11 +1630,14 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
if (!ifobject->rx_on)
return;
- xsk_populate_fill_ring(ifobject->umem, ifobject->pkt_stream, ifobject->use_fill_ring);
+ xsk_populate_fill_ring(ifobject->umem, ifobject->xsk->pkt_stream, ifobject->use_fill_ring);
- ret = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk);
- if (ret)
- exit_with_error(errno);
+ for (i = 0; i < test->nb_sockets; i++) {
+ ifobject->xsk = &ifobject->xsk_arr[i];
+ ret = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk, i);
+ if (ret)
+ exit_with_error(errno);
+ }
}
static void *worker_testapp_validate_tx(void *arg)
@@ -1475,8 +1653,6 @@ static void *worker_testapp_validate_tx(void *arg)
thread_common_ops_tx(test, ifobject);
}
- print_verbose("Sending %d packets on interface %s\n", ifobject->pkt_stream->nb_pkts,
- ifobject->ifname);
err = send_pkts(test, ifobject);
if (!err && ifobject->validation_func)
@@ -1491,26 +1667,23 @@ static void *worker_testapp_validate_rx(void *arg)
{
struct test_spec *test = (struct test_spec *)arg;
struct ifobject *ifobject = test->ifobj_rx;
- struct pollfd fds = { };
int err;
if (test->current_step == 1) {
thread_common_ops(test, ifobject);
} else {
xsk_clear_xskmap(ifobject->xskmap);
- err = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk);
+ err = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk, 0);
if (err) {
- printf("Error: Failed to update xskmap, error %s\n", strerror(-err));
+ ksft_print_msg("Error: Failed to update xskmap, error %s\n",
+ strerror(-err));
exit_with_error(-err);
}
}
- fds.fd = xsk_socket__fd(ifobject->xsk->xsk);
- fds.events = POLLIN;
-
pthread_barrier_wait(&barr);
- err = receive_pkts(test, &fds);
+ err = receive_pkts(test);
if (!err && ifobject->validation_func)
err = ifobject->validation_func(ifobject);
@@ -1564,7 +1737,7 @@ static void xsk_reattach_xdp(struct ifobject *ifobj, struct bpf_program *xdp_pro
xsk_detach_xdp_program(ifobj->ifindex, mode_to_xdp_flags(ifobj->mode));
err = xsk_attach_xdp_program(xdp_prog, ifobj->ifindex, mode_to_xdp_flags(mode));
if (err) {
- printf("Error attaching XDP program\n");
+ ksft_print_msg("Error attaching XDP program\n");
exit_with_error(-err);
}
@@ -1619,11 +1792,11 @@ static int __testapp_validate_traffic(struct test_spec *test, struct ifobject *i
if (ifobj2) {
if (pthread_barrier_init(&barr, NULL, 2))
exit_with_error(errno);
- pkt_stream_reset(ifobj2->pkt_stream);
+ pkt_stream_reset(ifobj2->xsk->pkt_stream);
}
test->current_step++;
- pkt_stream_reset(ifobj1->pkt_stream);
+ pkt_stream_reset(ifobj1->xsk->pkt_stream);
pkts_in_flight = 0;
signal(SIGUSR1, handler);
@@ -1647,9 +1820,15 @@ static int __testapp_validate_traffic(struct test_spec *test, struct ifobject *i
pthread_join(t0, NULL);
if (test->total_steps == test->current_step || test->fail) {
+ u32 i;
+
if (ifobj2)
- xsk_socket__delete(ifobj2->xsk->xsk);
- xsk_socket__delete(ifobj1->xsk->xsk);
+ for (i = 0; i < test->nb_sockets; i++)
+ xsk_socket__delete(ifobj2->xsk_arr[i].xsk);
+
+ for (i = 0; i < test->nb_sockets; i++)
+ xsk_socket__delete(ifobj1->xsk_arr[i].xsk);
+
testapp_clean_xsk_umem(ifobj1);
if (ifobj2 && !ifobj2->shared_umem)
testapp_clean_xsk_umem(ifobj2);
@@ -1682,7 +1861,6 @@ static int testapp_teardown(struct test_spec *test)
{
int i;
- test_spec_set_name(test, "TEARDOWN");
for (i = 0; i < MAX_TEARDOWN_ITER; i++) {
if (testapp_validate_traffic(test))
return TEST_FAILURE;
@@ -1704,18 +1882,17 @@ static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2)
*ifobj2 = tmp_ifobj;
}
-static int testapp_bidi(struct test_spec *test)
+static int testapp_bidirectional(struct test_spec *test)
{
int res;
- test_spec_set_name(test, "BIDIRECTIONAL");
test->ifobj_tx->rx_on = true;
test->ifobj_rx->tx_on = true;
test->total_steps = 2;
if (testapp_validate_traffic(test))
return TEST_FAILURE;
- print_verbose("Switching Tx/Rx vectors\n");
+ print_verbose("Switching Tx/Rx direction\n");
swap_directions(&test->ifobj_rx, &test->ifobj_tx);
res = __testapp_validate_traffic(test, test->ifobj_rx, test->ifobj_tx);
@@ -1723,42 +1900,44 @@ static int testapp_bidi(struct test_spec *test)
return res;
}
-static void swap_xsk_resources(struct ifobject *ifobj_tx, struct ifobject *ifobj_rx)
+static int swap_xsk_resources(struct test_spec *test)
{
int ret;
- xsk_socket__delete(ifobj_tx->xsk->xsk);
- xsk_socket__delete(ifobj_rx->xsk->xsk);
- ifobj_tx->xsk = &ifobj_tx->xsk_arr[1];
- ifobj_rx->xsk = &ifobj_rx->xsk_arr[1];
+ test->ifobj_tx->xsk_arr[0].pkt_stream = NULL;
+ test->ifobj_rx->xsk_arr[0].pkt_stream = NULL;
+ test->ifobj_tx->xsk_arr[1].pkt_stream = test->tx_pkt_stream_default;
+ test->ifobj_rx->xsk_arr[1].pkt_stream = test->rx_pkt_stream_default;
+ test->ifobj_tx->xsk = &test->ifobj_tx->xsk_arr[1];
+ test->ifobj_rx->xsk = &test->ifobj_rx->xsk_arr[1];
- ret = xsk_update_xskmap(ifobj_rx->xskmap, ifobj_rx->xsk->xsk);
+ ret = xsk_update_xskmap(test->ifobj_rx->xskmap, test->ifobj_rx->xsk->xsk, 0);
if (ret)
- exit_with_error(errno);
+ return TEST_FAILURE;
+
+ return TEST_PASS;
}
-static int testapp_bpf_res(struct test_spec *test)
+static int testapp_xdp_prog_cleanup(struct test_spec *test)
{
- test_spec_set_name(test, "BPF_RES");
test->total_steps = 2;
test->nb_sockets = 2;
if (testapp_validate_traffic(test))
return TEST_FAILURE;
- swap_xsk_resources(test->ifobj_tx, test->ifobj_rx);
+ if (swap_xsk_resources(test))
+ return TEST_FAILURE;
return testapp_validate_traffic(test);
}
static int testapp_headroom(struct test_spec *test)
{
- test_spec_set_name(test, "UMEM_HEADROOM");
test->ifobj_rx->umem->frame_headroom = UMEM_HEADROOM_TEST_SIZE;
return testapp_validate_traffic(test);
}
static int testapp_stats_rx_dropped(struct test_spec *test)
{
- test_spec_set_name(test, "STAT_RX_DROPPED");
if (test->mode == TEST_MODE_ZC) {
ksft_test_result_skip("Can not run RX_DROPPED test for ZC mode\n");
return TEST_SKIP;
@@ -1774,7 +1953,6 @@ static int testapp_stats_rx_dropped(struct test_spec *test)
static int testapp_stats_tx_invalid_descs(struct test_spec *test)
{
- test_spec_set_name(test, "STAT_TX_INVALID");
pkt_stream_replace_half(test, XSK_UMEM__INVALID_FRAME_SIZE, 0);
test->ifobj_tx->validation_func = validate_tx_invalid_descs;
return testapp_validate_traffic(test);
@@ -1782,10 +1960,8 @@ static int testapp_stats_tx_invalid_descs(struct test_spec *test)
static int testapp_stats_rx_full(struct test_spec *test)
{
- test_spec_set_name(test, "STAT_RX_FULL");
pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE);
- test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem,
- DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
+ test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
test->ifobj_rx->xsk->rxqsize = DEFAULT_UMEM_BUFFERS;
test->ifobj_rx->release_rx = false;
@@ -1795,19 +1971,16 @@ static int testapp_stats_rx_full(struct test_spec *test)
static int testapp_stats_fill_empty(struct test_spec *test)
{
- test_spec_set_name(test, "STAT_RX_FILL_EMPTY");
pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE);
- test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem,
- DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
+ test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
test->ifobj_rx->use_fill_ring = false;
test->ifobj_rx->validation_func = validate_fill_empty;
return testapp_validate_traffic(test);
}
-static int testapp_unaligned(struct test_spec *test)
+static int testapp_send_receive_unaligned(struct test_spec *test)
{
- test_spec_set_name(test, "UNALIGNED_MODE");
test->ifobj_tx->umem->unaligned_mode = true;
test->ifobj_rx->umem->unaligned_mode = true;
/* Let half of the packets straddle a 4K buffer boundary */
@@ -1816,9 +1989,8 @@ static int testapp_unaligned(struct test_spec *test)
return testapp_validate_traffic(test);
}
-static int testapp_unaligned_mb(struct test_spec *test)
+static int testapp_send_receive_unaligned_mb(struct test_spec *test)
{
- test_spec_set_name(test, "UNALIGNED_MODE_9K");
test->mtu = MAX_ETH_JUMBO_SIZE;
test->ifobj_tx->umem->unaligned_mode = true;
test->ifobj_rx->umem->unaligned_mode = true;
@@ -1834,9 +2006,8 @@ static int testapp_single_pkt(struct test_spec *test)
return testapp_validate_traffic(test);
}
-static int testapp_multi_buffer(struct test_spec *test)
+static int testapp_send_receive_mb(struct test_spec *test)
{
- test_spec_set_name(test, "RUN_TO_COMPLETION_9K_PACKETS");
test->mtu = MAX_ETH_JUMBO_SIZE;
pkt_stream_replace(test, DEFAULT_PKT_CNT, MAX_ETH_JUMBO_SIZE);
@@ -1933,7 +2104,6 @@ static int testapp_xdp_drop(struct test_spec *test)
struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
- test_spec_set_name(test, "XDP_DROP_HALF");
test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_drop, skel_tx->progs.xsk_xdp_drop,
skel_rx->maps.xsk, skel_tx->maps.xsk);
@@ -1941,7 +2111,7 @@ static int testapp_xdp_drop(struct test_spec *test)
return testapp_validate_traffic(test);
}
-static int testapp_xdp_metadata_count(struct test_spec *test)
+static int testapp_xdp_metadata_copy(struct test_spec *test)
{
struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
@@ -1955,19 +2125,38 @@ static int testapp_xdp_metadata_count(struct test_spec *test)
test->ifobj_rx->use_metadata = true;
data_map = bpf_object__find_map_by_name(skel_rx->obj, "xsk_xdp_.bss");
- if (!data_map || !bpf_map__is_internal(data_map))
- exit_with_error(ENOMEM);
+ if (!data_map || !bpf_map__is_internal(data_map)) {
+ ksft_print_msg("Error: could not find bss section of XDP program\n");
+ return TEST_FAILURE;
+ }
- if (bpf_map_update_elem(bpf_map__fd(data_map), &key, &count, BPF_ANY))
- exit_with_error(errno);
+ if (bpf_map_update_elem(bpf_map__fd(data_map), &key, &count, BPF_ANY)) {
+ ksft_print_msg("Error: could not update count element\n");
+ return TEST_FAILURE;
+ }
return testapp_validate_traffic(test);
}
-static int testapp_poll_txq_tmout(struct test_spec *test)
+static int testapp_xdp_shared_umem(struct test_spec *test)
{
- test_spec_set_name(test, "POLL_TXQ_FULL");
+ struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
+ struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
+
+ test->total_steps = 1;
+ test->nb_sockets = 2;
+
+ test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_shared_umem,
+ skel_tx->progs.xsk_xdp_shared_umem,
+ skel_rx->maps.xsk, skel_tx->maps.xsk);
+
+ pkt_stream_even_odd_sequence(test);
+
+ return testapp_validate_traffic(test);
+}
+static int testapp_poll_txq_tmout(struct test_spec *test)
+{
test->ifobj_tx->use_poll = true;
/* create invalid frame by set umem frame_size and pkt length equal to 2048 */
test->ifobj_tx->umem->frame_size = 2048;
@@ -1977,7 +2166,6 @@ static int testapp_poll_txq_tmout(struct test_spec *test)
static int testapp_poll_rxq_tmout(struct test_spec *test)
{
- test_spec_set_name(test, "POLL_RXQ_EMPTY");
test->ifobj_rx->use_poll = true;
return testapp_validate_traffic_single_thread(test, test->ifobj_rx);
}
@@ -1987,7 +2175,6 @@ static int testapp_too_many_frags(struct test_spec *test)
struct pkt pkts[2 * XSK_DESC__MAX_SKB_FRAGS + 2] = {};
u32 max_frags, i;
- test_spec_set_name(test, "TOO_MANY_FRAGS");
if (test->mode == TEST_MODE_ZC)
max_frags = test->ifobj_tx->xdp_zc_max_segs;
else
@@ -2054,20 +2241,16 @@ static bool hugepages_present(void)
return true;
}
-static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *src_mac,
- thread_func_t func_ptr)
+static void init_iface(struct ifobject *ifobj, thread_func_t func_ptr)
{
LIBBPF_OPTS(bpf_xdp_query_opts, query_opts);
int err;
- memcpy(ifobj->dst_mac, dst_mac, ETH_ALEN);
- memcpy(ifobj->src_mac, src_mac, ETH_ALEN);
-
ifobj->func_ptr = func_ptr;
err = xsk_load_xdp_programs(ifobj);
if (err) {
- printf("Error loading XDP program\n");
+ ksft_print_msg("Error loading XDP program\n");
exit_with_error(err);
}
@@ -2091,138 +2274,98 @@ static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *
}
}
-static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_type type)
-{
- int ret = TEST_SKIP;
-
- switch (type) {
- case TEST_TYPE_STATS_RX_DROPPED:
- ret = testapp_stats_rx_dropped(test);
- break;
- case TEST_TYPE_STATS_TX_INVALID_DESCS:
- ret = testapp_stats_tx_invalid_descs(test);
- break;
- case TEST_TYPE_STATS_RX_FULL:
- ret = testapp_stats_rx_full(test);
- break;
- case TEST_TYPE_STATS_FILL_EMPTY:
- ret = testapp_stats_fill_empty(test);
- break;
- case TEST_TYPE_TEARDOWN:
- ret = testapp_teardown(test);
- break;
- case TEST_TYPE_BIDI:
- ret = testapp_bidi(test);
- break;
- case TEST_TYPE_BPF_RES:
- ret = testapp_bpf_res(test);
- break;
- case TEST_TYPE_RUN_TO_COMPLETION:
- test_spec_set_name(test, "RUN_TO_COMPLETION");
- ret = testapp_validate_traffic(test);
- break;
- case TEST_TYPE_RUN_TO_COMPLETION_MB:
- ret = testapp_multi_buffer(test);
- break;
- case TEST_TYPE_RUN_TO_COMPLETION_SINGLE_PKT:
- test_spec_set_name(test, "RUN_TO_COMPLETION_SINGLE_PKT");
- ret = testapp_single_pkt(test);
- break;
- case TEST_TYPE_RUN_TO_COMPLETION_2K_FRAME:
- test_spec_set_name(test, "RUN_TO_COMPLETION_2K_FRAME_SIZE");
- test->ifobj_tx->umem->frame_size = 2048;
- test->ifobj_rx->umem->frame_size = 2048;
- pkt_stream_replace(test, DEFAULT_PKT_CNT, MIN_PKT_SIZE);
- ret = testapp_validate_traffic(test);
- break;
- case TEST_TYPE_RX_POLL:
- test->ifobj_rx->use_poll = true;
- test_spec_set_name(test, "POLL_RX");
- ret = testapp_validate_traffic(test);
- break;
- case TEST_TYPE_TX_POLL:
- test->ifobj_tx->use_poll = true;
- test_spec_set_name(test, "POLL_TX");
- ret = testapp_validate_traffic(test);
- break;
- case TEST_TYPE_POLL_TXQ_TMOUT:
- ret = testapp_poll_txq_tmout(test);
- break;
- case TEST_TYPE_POLL_RXQ_TMOUT:
- ret = testapp_poll_rxq_tmout(test);
- break;
- case TEST_TYPE_ALIGNED_INV_DESC:
- test_spec_set_name(test, "ALIGNED_INV_DESC");
- ret = testapp_invalid_desc(test);
- break;
- case TEST_TYPE_ALIGNED_INV_DESC_2K_FRAME:
- test_spec_set_name(test, "ALIGNED_INV_DESC_2K_FRAME_SIZE");
- test->ifobj_tx->umem->frame_size = 2048;
- test->ifobj_rx->umem->frame_size = 2048;
- ret = testapp_invalid_desc(test);
- break;
- case TEST_TYPE_UNALIGNED_INV_DESC:
- test_spec_set_name(test, "UNALIGNED_INV_DESC");
- test->ifobj_tx->umem->unaligned_mode = true;
- test->ifobj_rx->umem->unaligned_mode = true;
- ret = testapp_invalid_desc(test);
- break;
- case TEST_TYPE_UNALIGNED_INV_DESC_4K1_FRAME: {
- u64 page_size, umem_size;
-
- test_spec_set_name(test, "UNALIGNED_INV_DESC_4K1_FRAME_SIZE");
- /* Odd frame size so the UMEM doesn't end near a page boundary. */
- test->ifobj_tx->umem->frame_size = 4001;
- test->ifobj_rx->umem->frame_size = 4001;
- test->ifobj_tx->umem->unaligned_mode = true;
- test->ifobj_rx->umem->unaligned_mode = true;
- /* This test exists to test descriptors that staddle the end of
- * the UMEM but not a page.
- */
- page_size = sysconf(_SC_PAGESIZE);
- umem_size = test->ifobj_tx->umem->num_frames * test->ifobj_tx->umem->frame_size;
- assert(umem_size % page_size > MIN_PKT_SIZE);
- assert(umem_size % page_size < page_size - MIN_PKT_SIZE);
- ret = testapp_invalid_desc(test);
- break;
- }
- case TEST_TYPE_ALIGNED_INV_DESC_MB:
- test_spec_set_name(test, "ALIGNED_INV_DESC_MULTI_BUFF");
- ret = testapp_invalid_desc_mb(test);
- break;
- case TEST_TYPE_UNALIGNED_INV_DESC_MB:
- test_spec_set_name(test, "UNALIGNED_INV_DESC_MULTI_BUFF");
- test->ifobj_tx->umem->unaligned_mode = true;
- test->ifobj_rx->umem->unaligned_mode = true;
- ret = testapp_invalid_desc_mb(test);
- break;
- case TEST_TYPE_UNALIGNED:
- ret = testapp_unaligned(test);
- break;
- case TEST_TYPE_UNALIGNED_MB:
- ret = testapp_unaligned_mb(test);
- break;
- case TEST_TYPE_HEADROOM:
- ret = testapp_headroom(test);
- break;
- case TEST_TYPE_XDP_DROP_HALF:
- ret = testapp_xdp_drop(test);
- break;
- case TEST_TYPE_XDP_METADATA_COUNT:
- test_spec_set_name(test, "XDP_METADATA_COUNT");
- ret = testapp_xdp_metadata_count(test);
- break;
- case TEST_TYPE_XDP_METADATA_COUNT_MB:
- test_spec_set_name(test, "XDP_METADATA_COUNT_MULTI_BUFF");
- test->mtu = MAX_ETH_JUMBO_SIZE;
- ret = testapp_xdp_metadata_count(test);
- break;
- case TEST_TYPE_TOO_MANY_FRAGS:
- ret = testapp_too_many_frags(test);
- break;
- default:
- break;
- }
+static int testapp_send_receive(struct test_spec *test)
+{
+ return testapp_validate_traffic(test);
+}
+
+static int testapp_send_receive_2k_frame(struct test_spec *test)
+{
+ test->ifobj_tx->umem->frame_size = 2048;
+ test->ifobj_rx->umem->frame_size = 2048;
+ pkt_stream_replace(test, DEFAULT_PKT_CNT, MIN_PKT_SIZE);
+ return testapp_validate_traffic(test);
+}
+
+static int testapp_poll_rx(struct test_spec *test)
+{
+ test->ifobj_rx->use_poll = true;
+ return testapp_validate_traffic(test);
+}
+
+static int testapp_poll_tx(struct test_spec *test)
+{
+ test->ifobj_tx->use_poll = true;
+ return testapp_validate_traffic(test);
+}
+
+static int testapp_aligned_inv_desc(struct test_spec *test)
+{
+ return testapp_invalid_desc(test);
+}
+
+static int testapp_aligned_inv_desc_2k_frame(struct test_spec *test)
+{
+ test->ifobj_tx->umem->frame_size = 2048;
+ test->ifobj_rx->umem->frame_size = 2048;
+ return testapp_invalid_desc(test);
+}
+
+static int testapp_unaligned_inv_desc(struct test_spec *test)
+{
+ test->ifobj_tx->umem->unaligned_mode = true;
+ test->ifobj_rx->umem->unaligned_mode = true;
+ return testapp_invalid_desc(test);
+}
+
+static int testapp_unaligned_inv_desc_4001_frame(struct test_spec *test)
+{
+ u64 page_size, umem_size;
+
+ /* Odd frame size so the UMEM doesn't end near a page boundary. */
+ test->ifobj_tx->umem->frame_size = 4001;
+ test->ifobj_rx->umem->frame_size = 4001;
+ test->ifobj_tx->umem->unaligned_mode = true;
+ test->ifobj_rx->umem->unaligned_mode = true;
+ /* This test exists to test descriptors that staddle the end of
+ * the UMEM but not a page.
+ */
+ page_size = sysconf(_SC_PAGESIZE);
+ umem_size = test->ifobj_tx->umem->num_frames * test->ifobj_tx->umem->frame_size;
+ assert(umem_size % page_size > MIN_PKT_SIZE);
+ assert(umem_size % page_size < page_size - MIN_PKT_SIZE);
+
+ return testapp_invalid_desc(test);
+}
+
+static int testapp_aligned_inv_desc_mb(struct test_spec *test)
+{
+ return testapp_invalid_desc_mb(test);
+}
+
+static int testapp_unaligned_inv_desc_mb(struct test_spec *test)
+{
+ test->ifobj_tx->umem->unaligned_mode = true;
+ test->ifobj_rx->umem->unaligned_mode = true;
+ return testapp_invalid_desc_mb(test);
+}
+
+static int testapp_xdp_metadata(struct test_spec *test)
+{
+ return testapp_xdp_metadata_copy(test);
+}
+
+static int testapp_xdp_metadata_mb(struct test_spec *test)
+{
+ test->mtu = MAX_ETH_JUMBO_SIZE;
+ return testapp_xdp_metadata_copy(test);
+}
+
+static void run_pkt_test(struct test_spec *test)
+{
+ int ret;
+
+ ret = test->test_func(test);
if (ret == TEST_PASS)
ksft_test_result_pass("PASS: %s %s%s\n", mode_string(test), busy_poll_string(test),
@@ -2290,13 +2433,56 @@ static bool is_xdp_supported(int ifindex)
return true;
}
+static const struct test_spec tests[] = {
+ {.name = "SEND_RECEIVE", .test_func = testapp_send_receive},
+ {.name = "SEND_RECEIVE_2K_FRAME", .test_func = testapp_send_receive_2k_frame},
+ {.name = "SEND_RECEIVE_SINGLE_PKT", .test_func = testapp_single_pkt},
+ {.name = "POLL_RX", .test_func = testapp_poll_rx},
+ {.name = "POLL_TX", .test_func = testapp_poll_tx},
+ {.name = "POLL_RXQ_FULL", .test_func = testapp_poll_rxq_tmout},
+ {.name = "POLL_TXQ_FULL", .test_func = testapp_poll_txq_tmout},
+ {.name = "SEND_RECEIVE_UNALIGNED", .test_func = testapp_send_receive_unaligned},
+ {.name = "ALIGNED_INV_DESC", .test_func = testapp_aligned_inv_desc},
+ {.name = "ALIGNED_INV_DESC_2K_FRAME_SIZE", .test_func = testapp_aligned_inv_desc_2k_frame},
+ {.name = "UNALIGNED_INV_DESC", .test_func = testapp_unaligned_inv_desc},
+ {.name = "UNALIGNED_INV_DESC_4001_FRAME_SIZE",
+ .test_func = testapp_unaligned_inv_desc_4001_frame},
+ {.name = "UMEM_HEADROOM", .test_func = testapp_headroom},
+ {.name = "TEARDOWN", .test_func = testapp_teardown},
+ {.name = "BIDIRECTIONAL", .test_func = testapp_bidirectional},
+ {.name = "STAT_RX_DROPPED", .test_func = testapp_stats_rx_dropped},
+ {.name = "STAT_TX_INVALID", .test_func = testapp_stats_tx_invalid_descs},
+ {.name = "STAT_RX_FULL", .test_func = testapp_stats_rx_full},
+ {.name = "STAT_FILL_EMPTY", .test_func = testapp_stats_fill_empty},
+ {.name = "XDP_PROG_CLEANUP", .test_func = testapp_xdp_prog_cleanup},
+ {.name = "XDP_DROP_HALF", .test_func = testapp_xdp_drop},
+ {.name = "XDP_SHARED_UMEM", .test_func = testapp_xdp_shared_umem},
+ {.name = "XDP_METADATA_COPY", .test_func = testapp_xdp_metadata},
+ {.name = "XDP_METADATA_COPY_MULTI_BUFF", .test_func = testapp_xdp_metadata_mb},
+ {.name = "SEND_RECEIVE_9K_PACKETS", .test_func = testapp_send_receive_mb},
+ {.name = "SEND_RECEIVE_UNALIGNED_9K_PACKETS",
+ .test_func = testapp_send_receive_unaligned_mb},
+ {.name = "ALIGNED_INV_DESC_MULTI_BUFF", .test_func = testapp_aligned_inv_desc_mb},
+ {.name = "UNALIGNED_INV_DESC_MULTI_BUFF", .test_func = testapp_unaligned_inv_desc_mb},
+ {.name = "TOO_MANY_FRAGS", .test_func = testapp_too_many_frags},
+};
+
+static void print_tests(void)
+{
+ u32 i;
+
+ printf("Tests:\n");
+ for (i = 0; i < ARRAY_SIZE(tests); i++)
+ printf("%u: %s\n", i, tests[i].name);
+}
+
int main(int argc, char **argv)
{
struct pkt_stream *rx_pkt_stream_default;
struct pkt_stream *tx_pkt_stream_default;
struct ifobject *ifobj_tx, *ifobj_rx;
+ u32 i, j, failed_tests = 0, nb_tests;
int modes = TEST_MODE_SKB + 1;
- u32 i, j, failed_tests = 0;
struct test_spec test;
bool shared_netdev;
@@ -2314,14 +2500,21 @@ int main(int argc, char **argv)
parse_command_line(ifobj_tx, ifobj_rx, argc, argv);
+ if (opt_print_tests) {
+ print_tests();
+ ksft_exit_xpass();
+ }
+ if (opt_run_test != RUN_ALL_TESTS && opt_run_test >= ARRAY_SIZE(tests)) {
+ ksft_print_msg("Error: test %u does not exist.\n", opt_run_test);
+ ksft_exit_xfail();
+ }
+
shared_netdev = (ifobj_tx->ifindex == ifobj_rx->ifindex);
ifobj_tx->shared_umem = shared_netdev;
ifobj_rx->shared_umem = shared_netdev;
- if (!validate_interface(ifobj_tx) || !validate_interface(ifobj_rx)) {
- usage(basename(argv[0]));
- ksft_exit_xfail();
- }
+ if (!validate_interface(ifobj_tx) || !validate_interface(ifobj_rx))
+ print_usage(argv);
if (is_xdp_supported(ifobj_tx->ifindex)) {
modes++;
@@ -2329,23 +2522,46 @@ int main(int argc, char **argv)
modes++;
}
- init_iface(ifobj_rx, MAC1, MAC2, worker_testapp_validate_rx);
- init_iface(ifobj_tx, MAC2, MAC1, worker_testapp_validate_tx);
+ init_iface(ifobj_rx, worker_testapp_validate_rx);
+ init_iface(ifobj_tx, worker_testapp_validate_tx);
- test_spec_init(&test, ifobj_tx, ifobj_rx, 0);
- tx_pkt_stream_default = pkt_stream_generate(ifobj_tx->umem, DEFAULT_PKT_CNT, MIN_PKT_SIZE);
- rx_pkt_stream_default = pkt_stream_generate(ifobj_rx->umem, DEFAULT_PKT_CNT, MIN_PKT_SIZE);
+ test_spec_init(&test, ifobj_tx, ifobj_rx, 0, &tests[0]);
+ tx_pkt_stream_default = pkt_stream_generate(DEFAULT_PKT_CNT, MIN_PKT_SIZE);
+ rx_pkt_stream_default = pkt_stream_generate(DEFAULT_PKT_CNT, MIN_PKT_SIZE);
if (!tx_pkt_stream_default || !rx_pkt_stream_default)
exit_with_error(ENOMEM);
test.tx_pkt_stream_default = tx_pkt_stream_default;
test.rx_pkt_stream_default = rx_pkt_stream_default;
- ksft_set_plan(modes * TEST_TYPE_MAX);
+ if (opt_run_test == RUN_ALL_TESTS)
+ nb_tests = ARRAY_SIZE(tests);
+ else
+ nb_tests = 1;
+ if (opt_mode == TEST_MODE_ALL) {
+ ksft_set_plan(modes * nb_tests);
+ } else {
+ if (opt_mode == TEST_MODE_DRV && modes <= TEST_MODE_DRV) {
+ ksft_print_msg("Error: XDP_DRV mode not supported.\n");
+ ksft_exit_xfail();
+ }
+ if (opt_mode == TEST_MODE_ZC && modes <= TEST_MODE_ZC) {
+ ksft_print_msg("Error: zero-copy mode not supported.\n");
+ ksft_exit_xfail();
+ }
+
+ ksft_set_plan(nb_tests);
+ }
for (i = 0; i < modes; i++) {
- for (j = 0; j < TEST_TYPE_MAX; j++) {
- test_spec_init(&test, ifobj_tx, ifobj_rx, i);
- run_pkt_test(&test, i, j);
+ if (opt_mode != TEST_MODE_ALL && i != opt_mode)
+ continue;
+
+ for (j = 0; j < ARRAY_SIZE(tests); j++) {
+ if (opt_run_test != RUN_ALL_TESTS && j != opt_run_test)
+ continue;
+
+ test_spec_init(&test, ifobj_tx, ifobj_rx, i, &tests[j]);
+ run_pkt_test(&test);
usleep(USLEEP_MAX);
if (test.fail)
diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h
index 233b66cef64a..f174df2d693f 100644
--- a/tools/testing/selftests/bpf/xskxceiver.h
+++ b/tools/testing/selftests/bpf/xskxceiver.h
@@ -5,7 +5,10 @@
#ifndef XSKXCEIVER_H_
#define XSKXCEIVER_H_
+#include <limits.h>
+
#include "xsk_xdp_progs.skel.h"
+#include "xsk_xdp_common.h"
#ifndef SOL_XDP
#define SOL_XDP 283
@@ -33,8 +36,7 @@
#define TEST_SKIP 2
#define MAX_INTERFACES 2
#define MAX_INTERFACE_NAME_CHARS 16
-#define MAX_SOCKETS 2
-#define MAX_TEST_NAME_SIZE 32
+#define MAX_TEST_NAME_SIZE 48
#define MAX_TEARDOWN_ITER 10
#define PKT_HDR_SIZE (sizeof(struct ethhdr) + 2) /* Just to align the data in the packet */
#define MIN_PKT_SIZE 64
@@ -56,6 +58,8 @@
#define XSK_DESC__MAX_SKB_FRAGS 18
#define HUGEPAGE_SIZE (2 * 1024 * 1024)
#define PKT_DUMP_NB_TO_PRINT 16
+#define RUN_ALL_TESTS UINT_MAX
+#define NUM_MAC_ADDRESSES 4
#define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0)
@@ -63,43 +67,9 @@ enum test_mode {
TEST_MODE_SKB,
TEST_MODE_DRV,
TEST_MODE_ZC,
- TEST_MODE_MAX
-};
-
-enum test_type {
- TEST_TYPE_RUN_TO_COMPLETION,
- TEST_TYPE_RUN_TO_COMPLETION_2K_FRAME,
- TEST_TYPE_RUN_TO_COMPLETION_SINGLE_PKT,
- TEST_TYPE_RX_POLL,
- TEST_TYPE_TX_POLL,
- TEST_TYPE_POLL_RXQ_TMOUT,
- TEST_TYPE_POLL_TXQ_TMOUT,
- TEST_TYPE_UNALIGNED,
- TEST_TYPE_ALIGNED_INV_DESC,
- TEST_TYPE_ALIGNED_INV_DESC_2K_FRAME,
- TEST_TYPE_UNALIGNED_INV_DESC,
- TEST_TYPE_UNALIGNED_INV_DESC_4K1_FRAME,
- TEST_TYPE_HEADROOM,
- TEST_TYPE_TEARDOWN,
- TEST_TYPE_BIDI,
- TEST_TYPE_STATS_RX_DROPPED,
- TEST_TYPE_STATS_TX_INVALID_DESCS,
- TEST_TYPE_STATS_RX_FULL,
- TEST_TYPE_STATS_FILL_EMPTY,
- TEST_TYPE_BPF_RES,
- TEST_TYPE_XDP_DROP_HALF,
- TEST_TYPE_XDP_METADATA_COUNT,
- TEST_TYPE_XDP_METADATA_COUNT_MB,
- TEST_TYPE_RUN_TO_COMPLETION_MB,
- TEST_TYPE_UNALIGNED_MB,
- TEST_TYPE_ALIGNED_INV_DESC_MB,
- TEST_TYPE_UNALIGNED_INV_DESC_MB,
- TEST_TYPE_TOO_MANY_FRAGS,
- TEST_TYPE_MAX
+ TEST_MODE_ALL
};
-static bool opt_verbose;
-
struct xsk_umem_info {
struct xsk_ring_prod fq;
struct xsk_ring_cons cq;
@@ -118,8 +88,11 @@ struct xsk_socket_info {
struct xsk_ring_prod tx;
struct xsk_umem_info *umem;
struct xsk_socket *xsk;
+ struct pkt_stream *pkt_stream;
u32 outstanding_tx;
u32 rxqsize;
+ u8 dst_mac[ETH_ALEN];
+ u8 src_mac[ETH_ALEN];
};
struct pkt {
@@ -135,12 +108,16 @@ struct pkt_stream {
u32 current_pkt_nb;
struct pkt *pkts;
u32 max_pkt_len;
+ u32 nb_rx_pkts;
+ u32 nb_valid_entries;
bool verbatim;
};
struct ifobject;
+struct test_spec;
typedef int (*validation_func_t)(struct ifobject *ifobj);
typedef void *(*thread_func_t)(void *arg);
+typedef int (*test_func_t)(struct test_spec *test);
struct ifobject {
char ifname[MAX_INTERFACE_NAME_CHARS];
@@ -149,7 +126,6 @@ struct ifobject {
struct xsk_umem_info *umem;
thread_func_t func_ptr;
validation_func_t validation_func;
- struct pkt_stream *pkt_stream;
struct xsk_xdp_progs *xdp_progs;
struct bpf_map *xskmap;
struct bpf_program *xdp_prog;
@@ -169,8 +145,6 @@ struct ifobject {
bool unaligned_supp;
bool multi_buff_supp;
bool multi_buff_zc_supp;
- u8 dst_mac[ETH_ALEN];
- u8 src_mac[ETH_ALEN];
};
struct test_spec {
@@ -182,6 +156,7 @@ struct test_spec {
struct bpf_program *xdp_prog_tx;
struct bpf_map *xskmap_rx;
struct bpf_map *xskmap_tx;
+ test_func_t test_func;
int mtu;
u16 total_steps;
u16 current_step;
@@ -196,4 +171,6 @@ pthread_mutex_t pacing_mutex = PTHREAD_MUTEX_INITIALIZER;
int pkts_in_flight;
+static const u8 g_mac[ETH_ALEN] = {0x55, 0x44, 0x33, 0x22, 0x11, 0x00};
+
#endif /* XSKXCEIVER_H_ */
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index 488f4964365e..5f2b3f6c0d74 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -31,6 +31,9 @@ ALL_TESTS="
"
devdummy="test-dummy0"
+VERBOSE=0
+PAUSE=no
+PAUSE_ON_FAIL=no
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
@@ -51,35 +54,102 @@ check_fail()
fi
}
+run_cmd_common()
+{
+ local cmd="$*"
+ local out
+ if [ "$VERBOSE" = "1" ]; then
+ echo "COMMAND: ${cmd}"
+ fi
+ out=$($cmd 2>&1)
+ rc=$?
+ if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+ echo " $out"
+ fi
+ return $rc
+}
+
+run_cmd() {
+ run_cmd_common "$@"
+ rc=$?
+ check_err $rc
+ return $rc
+}
+run_cmd_fail()
+{
+ run_cmd_common "$@"
+ rc=$?
+ check_fail $rc
+ return $rc
+}
+
+run_cmd_grep_common()
+{
+ local find="$1"; shift
+ local cmd="$*"
+ local out
+ if [ "$VERBOSE" = "1" ]; then
+ echo "COMMAND: ${cmd} 2>&1 | grep -q '${find}'"
+ fi
+ out=$($cmd 2>&1 | grep -q "${find}" 2>&1)
+ return $?
+}
+
+run_cmd_grep() {
+ run_cmd_grep_common "$@"
+ rc=$?
+ check_err $rc
+ return $rc
+}
+
+run_cmd_grep_fail()
+{
+ run_cmd_grep_common "$@"
+ rc=$?
+ check_fail $rc
+ return $rc
+}
+
+end_test()
+{
+ echo "$*"
+ [ "${VERBOSE}" = "1" ] && echo
+
+ if [[ $ret -ne 0 ]] && [[ "${PAUSE_ON_FAIL}" = "yes" ]]; then
+ echo "Hit enter to continue"
+ read a
+ fi;
+
+ if [ "${PAUSE}" = "yes" ]; then
+ echo "Hit enter to continue"
+ read a
+ fi
+
+}
+
+
kci_add_dummy()
{
- ip link add name "$devdummy" type dummy
- check_err $?
- ip link set "$devdummy" up
- check_err $?
+ run_cmd ip link add name "$devdummy" type dummy
+ run_cmd ip link set "$devdummy" up
}
kci_del_dummy()
{
- ip link del dev "$devdummy"
- check_err $?
+ run_cmd ip link del dev "$devdummy"
}
kci_test_netconf()
{
dev="$1"
r=$ret
-
- ip netconf show dev "$dev" > /dev/null
- check_err $?
-
+ run_cmd ip netconf show dev "$dev"
for f in 4 6; do
- ip -$f netconf show dev "$dev" > /dev/null
- check_err $?
+ run_cmd ip -$f netconf show dev "$dev"
done
if [ $ret -ne 0 ] ;then
- echo "FAIL: ip netconf show $dev"
+ end_test "FAIL: ip netconf show $dev"
test $r -eq 0 && ret=0
return 1
fi
@@ -92,43 +162,27 @@ kci_test_bridge()
vlandev="testbr-vlan1"
local ret=0
- ip link add name "$devbr" type bridge
- check_err $?
-
- ip link set dev "$devdummy" master "$devbr"
- check_err $?
-
- ip link set "$devbr" up
- check_err $?
-
- ip link add link "$devbr" name "$vlandev" type vlan id 1
- check_err $?
- ip addr add dev "$vlandev" 10.200.7.23/30
- check_err $?
- ip -6 addr add dev "$vlandev" dead:42::1234/64
- check_err $?
- ip -d link > /dev/null
- check_err $?
- ip r s t all > /dev/null
- check_err $?
+ run_cmd ip link add name "$devbr" type bridge
+ run_cmd ip link set dev "$devdummy" master "$devbr"
+ run_cmd ip link set "$devbr" up
+ run_cmd ip link add link "$devbr" name "$vlandev" type vlan id 1
+ run_cmd ip addr add dev "$vlandev" 10.200.7.23/30
+ run_cmd ip -6 addr add dev "$vlandev" dead:42::1234/64
+ run_cmd ip -d link
+ run_cmd ip r s t all
for name in "$devbr" "$vlandev" "$devdummy" ; do
kci_test_netconf "$name"
done
-
- ip -6 addr del dev "$vlandev" dead:42::1234/64
- check_err $?
-
- ip link del dev "$vlandev"
- check_err $?
- ip link del dev "$devbr"
- check_err $?
+ run_cmd ip -6 addr del dev "$vlandev" dead:42::1234/64
+ run_cmd ip link del dev "$vlandev"
+ run_cmd ip link del dev "$devbr"
if [ $ret -ne 0 ];then
- echo "FAIL: bridge setup"
+ end_test "FAIL: bridge setup"
return 1
fi
- echo "PASS: bridge setup"
+ end_test "PASS: bridge setup"
}
@@ -139,34 +193,23 @@ kci_test_gre()
loc=10.0.0.1
local ret=0
- ip tunnel add $gredev mode gre remote $rem local $loc ttl 1
- check_err $?
- ip link set $gredev up
- check_err $?
- ip addr add 10.23.7.10 dev $gredev
- check_err $?
- ip route add 10.23.8.0/30 dev $gredev
- check_err $?
- ip addr add dev "$devdummy" 10.23.7.11/24
- check_err $?
- ip link > /dev/null
- check_err $?
- ip addr > /dev/null
- check_err $?
+ run_cmd ip tunnel add $gredev mode gre remote $rem local $loc ttl 1
+ run_cmd ip link set $gredev up
+ run_cmd ip addr add 10.23.7.10 dev $gredev
+ run_cmd ip route add 10.23.8.0/30 dev $gredev
+ run_cmd ip addr add dev "$devdummy" 10.23.7.11/24
+ run_cmd ip link
+ run_cmd ip addr
kci_test_netconf "$gredev"
-
- ip addr del dev "$devdummy" 10.23.7.11/24
- check_err $?
-
- ip link del $gredev
- check_err $?
+ run_cmd ip addr del dev "$devdummy" 10.23.7.11/24
+ run_cmd ip link del $gredev
if [ $ret -ne 0 ];then
- echo "FAIL: gre tunnel endpoint"
+ end_test "FAIL: gre tunnel endpoint"
return 1
fi
- echo "PASS: gre tunnel endpoint"
+ end_test "PASS: gre tunnel endpoint"
}
# tc uses rtnetlink too, for full tc testing
@@ -176,56 +219,40 @@ kci_test_tc()
dev=lo
local ret=0
- tc qdisc add dev "$dev" root handle 1: htb
- check_err $?
- tc class add dev "$dev" parent 1: classid 1:10 htb rate 1mbit
- check_err $?
- tc filter add dev "$dev" parent 1:0 prio 5 handle ffe: protocol ip u32 divisor 256
- check_err $?
- tc filter add dev "$dev" parent 1:0 prio 5 handle ffd: protocol ip u32 divisor 256
- check_err $?
- tc filter add dev "$dev" parent 1:0 prio 5 handle ffc: protocol ip u32 divisor 256
- check_err $?
- tc filter add dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:3 u32 ht ffe:2: match ip src 10.0.0.3 flowid 1:10
- check_err $?
- tc filter add dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:2 u32 ht ffe:2: match ip src 10.0.0.2 flowid 1:10
- check_err $?
- tc filter show dev "$dev" parent 1:0 > /dev/null
- check_err $?
- tc filter del dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:3 u32
- check_err $?
- tc filter show dev "$dev" parent 1:0 > /dev/null
- check_err $?
- tc qdisc del dev "$dev" root handle 1: htb
- check_err $?
+ run_cmd tc qdisc add dev "$dev" root handle 1: htb
+ run_cmd tc class add dev "$dev" parent 1: classid 1:10 htb rate 1mbit
+ run_cmd tc filter add dev "$dev" parent 1:0 prio 5 handle ffe: protocol ip u32 divisor 256
+ run_cmd tc filter add dev "$dev" parent 1:0 prio 5 handle ffd: protocol ip u32 divisor 256
+ run_cmd tc filter add dev "$dev" parent 1:0 prio 5 handle ffc: protocol ip u32 divisor 256
+ run_cmd tc filter add dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:3 u32 ht ffe:2: match ip src 10.0.0.3 flowid 1:10
+ run_cmd tc filter add dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:2 u32 ht ffe:2: match ip src 10.0.0.2 flowid 1:10
+ run_cmd tc filter show dev "$dev" parent 1:0
+ run_cmd tc filter del dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:3 u32
+ run_cmd tc filter show dev "$dev" parent 1:0
+ run_cmd tc qdisc del dev "$dev" root handle 1: htb
if [ $ret -ne 0 ];then
- echo "FAIL: tc htb hierarchy"
+ end_test "FAIL: tc htb hierarchy"
return 1
fi
- echo "PASS: tc htb hierarchy"
+ end_test "PASS: tc htb hierarchy"
}
kci_test_polrouting()
{
local ret=0
- ip rule add fwmark 1 lookup 100
- check_err $?
- ip route add local 0.0.0.0/0 dev lo table 100
- check_err $?
- ip r s t all > /dev/null
- check_err $?
- ip rule del fwmark 1 lookup 100
- check_err $?
- ip route del local 0.0.0.0/0 dev lo table 100
- check_err $?
+ run_cmd ip rule add fwmark 1 lookup 100
+ run_cmd ip route add local 0.0.0.0/0 dev lo table 100
+ run_cmd ip r s t all
+ run_cmd ip rule del fwmark 1 lookup 100
+ run_cmd ip route del local 0.0.0.0/0 dev lo table 100
if [ $ret -ne 0 ];then
- echo "FAIL: policy route test"
+ end_test "FAIL: policy route test"
return 1
fi
- echo "PASS: policy routing"
+ end_test "PASS: policy routing"
}
kci_test_route_get()
@@ -233,65 +260,51 @@ kci_test_route_get()
local hash_policy=$(sysctl -n net.ipv4.fib_multipath_hash_policy)
local ret=0
-
- ip route get 127.0.0.1 > /dev/null
- check_err $?
- ip route get 127.0.0.1 dev "$devdummy" > /dev/null
- check_err $?
- ip route get ::1 > /dev/null
- check_err $?
- ip route get fe80::1 dev "$devdummy" > /dev/null
- check_err $?
- ip route get 127.0.0.1 from 127.0.0.1 oif lo tos 0x10 mark 0x1 > /dev/null
- check_err $?
- ip route get ::1 from ::1 iif lo oif lo tos 0x10 mark 0x1 > /dev/null
- check_err $?
- ip addr add dev "$devdummy" 10.23.7.11/24
- check_err $?
- ip route get 10.23.7.11 from 10.23.7.12 iif "$devdummy" > /dev/null
- check_err $?
- ip route add 10.23.8.0/24 \
+ run_cmd ip route get 127.0.0.1
+ run_cmd ip route get 127.0.0.1 dev "$devdummy"
+ run_cmd ip route get ::1
+ run_cmd ip route get fe80::1 dev "$devdummy"
+ run_cmd ip route get 127.0.0.1 from 127.0.0.1 oif lo tos 0x10 mark 0x1
+ run_cmd ip route get ::1 from ::1 iif lo oif lo tos 0x10 mark 0x1
+ run_cmd ip addr add dev "$devdummy" 10.23.7.11/24
+ run_cmd ip route get 10.23.7.11 from 10.23.7.12 iif "$devdummy"
+ run_cmd ip route add 10.23.8.0/24 \
nexthop via 10.23.7.13 dev "$devdummy" \
nexthop via 10.23.7.14 dev "$devdummy"
- check_err $?
+
sysctl -wq net.ipv4.fib_multipath_hash_policy=0
- ip route get 10.23.8.11 > /dev/null
- check_err $?
+ run_cmd ip route get 10.23.8.11
sysctl -wq net.ipv4.fib_multipath_hash_policy=1
- ip route get 10.23.8.11 > /dev/null
- check_err $?
+ run_cmd ip route get 10.23.8.11
sysctl -wq net.ipv4.fib_multipath_hash_policy="$hash_policy"
- ip route del 10.23.8.0/24
- check_err $?
- ip addr del dev "$devdummy" 10.23.7.11/24
- check_err $?
+ run_cmd ip route del 10.23.8.0/24
+ run_cmd ip addr del dev "$devdummy" 10.23.7.11/24
+
if [ $ret -ne 0 ];then
- echo "FAIL: route get"
+ end_test "FAIL: route get"
return 1
fi
- echo "PASS: route get"
+ end_test "PASS: route get"
}
kci_test_addrlft()
{
for i in $(seq 10 100) ;do
lft=$(((RANDOM%3) + 1))
- ip addr add 10.23.11.$i/32 dev "$devdummy" preferred_lft $lft valid_lft $((lft+1))
- check_err $?
+ run_cmd ip addr add 10.23.11.$i/32 dev "$devdummy" preferred_lft $lft valid_lft $((lft+1))
done
sleep 5
-
- ip addr show dev "$devdummy" | grep "10.23.11."
+ run_cmd_grep "10.23.11." ip addr show dev "$devdummy"
if [ $? -eq 0 ]; then
- echo "FAIL: preferred_lft addresses remaining"
check_err 1
+ end_test "FAIL: preferred_lft addresses remaining"
return
fi
- echo "PASS: preferred_lft addresses have expired"
+ end_test "PASS: preferred_lft addresses have expired"
}
kci_test_promote_secondaries()
@@ -310,27 +323,17 @@ kci_test_promote_secondaries()
[ $promote -eq 0 ] && sysctl -q net.ipv4.conf.$devdummy.promote_secondaries=0
- echo "PASS: promote_secondaries complete"
+ end_test "PASS: promote_secondaries complete"
}
kci_test_addrlabel()
{
local ret=0
-
- ip addrlabel add prefix dead::/64 dev lo label 1
- check_err $?
-
- ip addrlabel list |grep -q "prefix dead::/64 dev lo label 1"
- check_err $?
-
- ip addrlabel del prefix dead::/64 dev lo label 1 2> /dev/null
- check_err $?
-
- ip addrlabel add prefix dead::/64 label 1 2> /dev/null
- check_err $?
-
- ip addrlabel del prefix dead::/64 label 1 2> /dev/null
- check_err $?
+ run_cmd ip addrlabel add prefix dead::/64 dev lo label 1
+ run_cmd_grep "prefix dead::/64 dev lo label 1" ip addrlabel list
+ run_cmd ip addrlabel del prefix dead::/64 dev lo label 1
+ run_cmd ip addrlabel add prefix dead::/64 label 1
+ run_cmd ip addrlabel del prefix dead::/64 label 1
# concurrent add/delete
for i in $(seq 1 1000); do
@@ -346,11 +349,11 @@ kci_test_addrlabel()
ip addrlabel del prefix 1c3::/64 label 12345 2>/dev/null
if [ $ret -ne 0 ];then
- echo "FAIL: ipv6 addrlabel"
+ end_test "FAIL: ipv6 addrlabel"
return 1
fi
- echo "PASS: ipv6 addrlabel"
+ end_test "PASS: ipv6 addrlabel"
}
kci_test_ifalias()
@@ -358,35 +361,28 @@ kci_test_ifalias()
local ret=0
namewant=$(uuidgen)
syspathname="/sys/class/net/$devdummy/ifalias"
-
- ip link set dev "$devdummy" alias "$namewant"
- check_err $?
+ run_cmd ip link set dev "$devdummy" alias "$namewant"
if [ $ret -ne 0 ]; then
- echo "FAIL: cannot set interface alias of $devdummy to $namewant"
+ end_test "FAIL: cannot set interface alias of $devdummy to $namewant"
return 1
fi
-
- ip link show "$devdummy" | grep -q "alias $namewant"
- check_err $?
+ run_cmd_grep "alias $namewant" ip link show "$devdummy"
if [ -r "$syspathname" ] ; then
read namehave < "$syspathname"
if [ "$namewant" != "$namehave" ]; then
- echo "FAIL: did set ifalias $namewant but got $namehave"
+ end_test "FAIL: did set ifalias $namewant but got $namehave"
return 1
fi
namewant=$(uuidgen)
echo "$namewant" > "$syspathname"
- ip link show "$devdummy" | grep -q "alias $namewant"
- check_err $?
+ run_cmd_grep "alias $namewant" ip link show "$devdummy"
# sysfs interface allows to delete alias again
echo "" > "$syspathname"
-
- ip link show "$devdummy" | grep -q "alias $namewant"
- check_fail $?
+ run_cmd_grep_fail "alias $namewant" ip link show "$devdummy"
for i in $(seq 1 100); do
uuidgen > "$syspathname" &
@@ -395,57 +391,48 @@ kci_test_ifalias()
wait
# re-add the alias -- kernel should free mem when dummy dev is removed
- ip link set dev "$devdummy" alias "$namewant"
- check_err $?
+ run_cmd ip link set dev "$devdummy" alias "$namewant"
+
fi
if [ $ret -ne 0 ]; then
- echo "FAIL: set interface alias $devdummy to $namewant"
+ end_test "FAIL: set interface alias $devdummy to $namewant"
return 1
fi
- echo "PASS: set ifalias $namewant for $devdummy"
+ end_test "PASS: set ifalias $namewant for $devdummy"
}
kci_test_vrf()
{
vrfname="test-vrf"
local ret=0
-
- ip link show type vrf 2>/dev/null
+ run_cmd ip link show type vrf
if [ $? -ne 0 ]; then
- echo "SKIP: vrf: iproute2 too old"
+ end_test "SKIP: vrf: iproute2 too old"
return $ksft_skip
fi
-
- ip link add "$vrfname" type vrf table 10
- check_err $?
+ run_cmd ip link add "$vrfname" type vrf table 10
if [ $ret -ne 0 ];then
- echo "FAIL: can't add vrf interface, skipping test"
+ end_test "FAIL: can't add vrf interface, skipping test"
return 0
fi
-
- ip -br link show type vrf | grep -q "$vrfname"
- check_err $?
+ run_cmd_grep "$vrfname" ip -br link show type vrf
if [ $ret -ne 0 ];then
- echo "FAIL: created vrf device not found"
+ end_test "FAIL: created vrf device not found"
return 1
fi
- ip link set dev "$vrfname" up
- check_err $?
-
- ip link set dev "$devdummy" master "$vrfname"
- check_err $?
- ip link del dev "$vrfname"
- check_err $?
+ run_cmd ip link set dev "$vrfname" up
+ run_cmd ip link set dev "$devdummy" master "$vrfname"
+ run_cmd ip link del dev "$vrfname"
if [ $ret -ne 0 ];then
- echo "FAIL: vrf"
+ end_test "FAIL: vrf"
return 1
fi
- echo "PASS: vrf"
+ end_test "PASS: vrf"
}
kci_test_encap_vxlan()
@@ -454,84 +441,44 @@ kci_test_encap_vxlan()
vxlan="test-vxlan0"
vlan="test-vlan0"
testns="$1"
-
- ip -netns "$testns" link add "$vxlan" type vxlan id 42 group 239.1.1.1 \
- dev "$devdummy" dstport 4789 2>/dev/null
+ run_cmd ip -netns "$testns" link add "$vxlan" type vxlan id 42 group 239.1.1.1 \
+ dev "$devdummy" dstport 4789
if [ $? -ne 0 ]; then
- echo "FAIL: can't add vxlan interface, skipping test"
+ end_test "FAIL: can't add vxlan interface, skipping test"
return 0
fi
- check_err $?
- ip -netns "$testns" addr add 10.2.11.49/24 dev "$vxlan"
- check_err $?
-
- ip -netns "$testns" link set up dev "$vxlan"
- check_err $?
-
- ip -netns "$testns" link add link "$vxlan" name "$vlan" type vlan id 1
- check_err $?
+ run_cmd ip -netns "$testns" addr add 10.2.11.49/24 dev "$vxlan"
+ run_cmd ip -netns "$testns" link set up dev "$vxlan"
+ run_cmd ip -netns "$testns" link add link "$vxlan" name "$vlan" type vlan id 1
# changelink testcases
- ip -netns "$testns" link set dev "$vxlan" type vxlan vni 43 2>/dev/null
- check_fail $?
-
- ip -netns "$testns" link set dev "$vxlan" type vxlan group ffe5::5 dev "$devdummy" 2>/dev/null
- check_fail $?
-
- ip -netns "$testns" link set dev "$vxlan" type vxlan ttl inherit 2>/dev/null
- check_fail $?
-
- ip -netns "$testns" link set dev "$vxlan" type vxlan ttl 64
- check_err $?
-
- ip -netns "$testns" link set dev "$vxlan" type vxlan nolearning
- check_err $?
-
- ip -netns "$testns" link set dev "$vxlan" type vxlan proxy 2>/dev/null
- check_fail $?
-
- ip -netns "$testns" link set dev "$vxlan" type vxlan norsc 2>/dev/null
- check_fail $?
-
- ip -netns "$testns" link set dev "$vxlan" type vxlan l2miss 2>/dev/null
- check_fail $?
-
- ip -netns "$testns" link set dev "$vxlan" type vxlan l3miss 2>/dev/null
- check_fail $?
-
- ip -netns "$testns" link set dev "$vxlan" type vxlan external 2>/dev/null
- check_fail $?
-
- ip -netns "$testns" link set dev "$vxlan" type vxlan udpcsum 2>/dev/null
- check_fail $?
-
- ip -netns "$testns" link set dev "$vxlan" type vxlan udp6zerocsumtx 2>/dev/null
- check_fail $?
-
- ip -netns "$testns" link set dev "$vxlan" type vxlan udp6zerocsumrx 2>/dev/null
- check_fail $?
-
- ip -netns "$testns" link set dev "$vxlan" type vxlan remcsumtx 2>/dev/null
- check_fail $?
-
- ip -netns "$testns" link set dev "$vxlan" type vxlan remcsumrx 2>/dev/null
- check_fail $?
-
- ip -netns "$testns" link set dev "$vxlan" type vxlan gbp 2>/dev/null
- check_fail $?
-
- ip -netns "$testns" link set dev "$vxlan" type vxlan gpe 2>/dev/null
- check_fail $?
-
- ip -netns "$testns" link del "$vxlan"
- check_err $?
+ run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan vni 43
+ run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan group ffe5::5 dev "$devdummy"
+ run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan ttl inherit
+
+ run_cmd ip -netns "$testns" link set dev "$vxlan" type vxlan ttl 64
+ run_cmd ip -netns "$testns" link set dev "$vxlan" type vxlan nolearning
+
+ run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan proxy
+ run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan norsc
+ run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan l2miss
+ run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan l3miss
+ run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan external
+ run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan udpcsum
+ run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan udp6zerocsumtx
+ run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan udp6zerocsumrx
+ run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan remcsumtx
+ run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan remcsumrx
+ run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan gbp
+ run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan gpe
+ run_cmd ip -netns "$testns" link del "$vxlan"
if [ $ret -ne 0 ]; then
- echo "FAIL: vxlan"
+ end_test "FAIL: vxlan"
return 1
fi
- echo "PASS: vxlan"
+ end_test "PASS: vxlan"
}
kci_test_encap_fou()
@@ -539,39 +486,32 @@ kci_test_encap_fou()
local ret=0
name="test-fou"
testns="$1"
-
- ip fou help 2>&1 |grep -q 'Usage: ip fou'
+ run_cmd_grep 'Usage: ip fou' ip fou help
if [ $? -ne 0 ];then
- echo "SKIP: fou: iproute2 too old"
+ end_test "SKIP: fou: iproute2 too old"
return $ksft_skip
fi
if ! /sbin/modprobe -q -n fou; then
- echo "SKIP: module fou is not found"
+ end_test "SKIP: module fou is not found"
return $ksft_skip
fi
/sbin/modprobe -q fou
- ip -netns "$testns" fou add port 7777 ipproto 47 2>/dev/null
+
+ run_cmd ip -netns "$testns" fou add port 7777 ipproto 47
if [ $? -ne 0 ];then
- echo "FAIL: can't add fou port 7777, skipping test"
+ end_test "FAIL: can't add fou port 7777, skipping test"
return 1
fi
-
- ip -netns "$testns" fou add port 8888 ipproto 4
- check_err $?
-
- ip -netns "$testns" fou del port 9999 2>/dev/null
- check_fail $?
-
- ip -netns "$testns" fou del port 7777
- check_err $?
-
+ run_cmd ip -netns "$testns" fou add port 8888 ipproto 4
+ run_cmd_fail ip -netns "$testns" fou del port 9999
+ run_cmd ip -netns "$testns" fou del port 7777
if [ $ret -ne 0 ]; then
- echo "FAIL: fou"
+ end_test "FAIL: fou"s
return 1
fi
- echo "PASS: fou"
+ end_test "PASS: fou"
}
# test various encap methods, use netns to avoid unwanted interference
@@ -579,25 +519,16 @@ kci_test_encap()
{
testns="testns"
local ret=0
-
- ip netns add "$testns"
+ run_cmd ip netns add "$testns"
if [ $? -ne 0 ]; then
- echo "SKIP encap tests: cannot add net namespace $testns"
+ end_test "SKIP encap tests: cannot add net namespace $testns"
return $ksft_skip
fi
-
- ip -netns "$testns" link set lo up
- check_err $?
-
- ip -netns "$testns" link add name "$devdummy" type dummy
- check_err $?
- ip -netns "$testns" link set "$devdummy" up
- check_err $?
-
- kci_test_encap_vxlan "$testns"
- check_err $?
- kci_test_encap_fou "$testns"
- check_err $?
+ run_cmd ip -netns "$testns" link set lo up
+ run_cmd ip -netns "$testns" link add name "$devdummy" type dummy
+ run_cmd ip -netns "$testns" link set "$devdummy" up
+ run_cmd kci_test_encap_vxlan "$testns"
+ run_cmd kci_test_encap_fou "$testns"
ip netns del "$testns"
return $ret
@@ -607,41 +538,28 @@ kci_test_macsec()
{
msname="test_macsec0"
local ret=0
-
- ip macsec help 2>&1 | grep -q "^Usage: ip macsec"
+ run_cmd_grep "^Usage: ip macsec" ip macsec help
if [ $? -ne 0 ]; then
- echo "SKIP: macsec: iproute2 too old"
+ end_test "SKIP: macsec: iproute2 too old"
return $ksft_skip
fi
-
- ip link add link "$devdummy" "$msname" type macsec port 42 encrypt on
- check_err $?
+ run_cmd ip link add link "$devdummy" "$msname" type macsec port 42 encrypt on
if [ $ret -ne 0 ];then
- echo "FAIL: can't add macsec interface, skipping test"
+ end_test "FAIL: can't add macsec interface, skipping test"
return 1
fi
-
- ip macsec add "$msname" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012
- check_err $?
-
- ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef"
- check_err $?
-
- ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on key 00 0123456789abcdef0123456789abcdef
- check_err $?
-
- ip macsec show > /dev/null
- check_err $?
-
- ip link del dev "$msname"
- check_err $?
+ run_cmd ip macsec add "$msname" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012
+ run_cmd ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef"
+ run_cmd ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on key 00 0123456789abcdef0123456789abcdef
+ run_cmd ip macsec show
+ run_cmd ip link del dev "$msname"
if [ $ret -ne 0 ];then
- echo "FAIL: macsec"
+ end_test "FAIL: macsec"
return 1
fi
- echo "PASS: macsec"
+ end_test "PASS: macsec"
}
kci_test_macsec_offload()
@@ -650,19 +568,18 @@ kci_test_macsec_offload()
sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/
probed=false
local ret=0
-
- ip macsec help 2>&1 | grep -q "^Usage: ip macsec"
+ run_cmd_grep "^Usage: ip macsec" ip macsec help
if [ $? -ne 0 ]; then
- echo "SKIP: macsec: iproute2 too old"
+ end_test "SKIP: macsec: iproute2 too old"
return $ksft_skip
fi
# setup netdevsim since dummydev doesn't have offload support
if [ ! -w /sys/bus/netdevsim/new_device ] ; then
- modprobe -q netdevsim
- check_err $?
+ run_cmd modprobe -q netdevsim
+
if [ $ret -ne 0 ]; then
- echo "SKIP: macsec_offload can't load netdevsim"
+ end_test "SKIP: macsec_offload can't load netdevsim"
return $ksft_skip
fi
probed=true
@@ -675,43 +592,25 @@ kci_test_macsec_offload()
ip link set $dev up
if [ ! -d $sysfsd ] ; then
- echo "FAIL: macsec_offload can't create device $dev"
+ end_test "FAIL: macsec_offload can't create device $dev"
return 1
fi
-
- ethtool -k $dev | grep -q 'macsec-hw-offload: on'
+ run_cmd_grep 'macsec-hw-offload: on' ethtool -k $dev
if [ $? -eq 1 ] ; then
- echo "FAIL: macsec_offload netdevsim doesn't support MACsec offload"
+ end_test "FAIL: macsec_offload netdevsim doesn't support MACsec offload"
return 1
fi
-
- ip link add link $dev kci_macsec1 type macsec port 4 offload mac
- check_err $?
-
- ip link add link $dev kci_macsec2 type macsec address "aa:bb:cc:dd:ee:ff" port 5 offload mac
- check_err $?
-
- ip link add link $dev kci_macsec3 type macsec sci abbacdde01020304 offload mac
- check_err $?
-
- ip link add link $dev kci_macsec4 type macsec port 8 offload mac 2> /dev/null
- check_fail $?
+ run_cmd ip link add link $dev kci_macsec1 type macsec port 4 offload mac
+ run_cmd ip link add link $dev kci_macsec2 type macsec address "aa:bb:cc:dd:ee:ff" port 5 offload mac
+ run_cmd ip link add link $dev kci_macsec3 type macsec sci abbacdde01020304 offload mac
+ run_cmd_fail ip link add link $dev kci_macsec4 type macsec port 8 offload mac
msname=kci_macsec1
-
- ip macsec add "$msname" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012
- check_err $?
-
- ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef"
- check_err $?
-
- ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on \
+ run_cmd ip macsec add "$msname" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012
+ run_cmd ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef"
+ run_cmd ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on \
key 00 0123456789abcdef0123456789abcdef
- check_err $?
-
- ip macsec add "$msname" rx port 1235 address "1c:ed:de:ad:be:ef" 2> /dev/null
- check_fail $?
-
+ run_cmd_fail ip macsec add "$msname" rx port 1235 address "1c:ed:de:ad:be:ef"
# clean up any leftovers
for msdev in kci_macsec{1,2,3,4} ; do
ip link del $msdev 2> /dev/null
@@ -720,10 +619,10 @@ kci_test_macsec_offload()
$probed && rmmod netdevsim
if [ $ret -ne 0 ]; then
- echo "FAIL: macsec_offload"
+ end_test "FAIL: macsec_offload"
return 1
fi
- echo "PASS: macsec_offload"
+ end_test "PASS: macsec_offload"
}
#-------------------------------------------------------------------
@@ -755,8 +654,7 @@ kci_test_ipsec()
ip addr add $srcip dev $devdummy
# flush to be sure there's nothing configured
- ip x s flush ; ip x p flush
- check_err $?
+ run_cmd ip x s flush ; ip x p flush
# start the monitor in the background
tmpfile=`mktemp /var/run/ipsectestXXX`
@@ -764,72 +662,57 @@ kci_test_ipsec()
sleep 0.2
ipsecid="proto esp src $srcip dst $dstip spi 0x07"
- ip x s add $ipsecid \
+ run_cmd ip x s add $ipsecid \
mode transport reqid 0x07 replay-window 32 \
$algo sel src $srcip/24 dst $dstip/24
- check_err $?
- lines=`ip x s list | grep $srcip | grep $dstip | wc -l`
- test $lines -eq 2
- check_err $?
- ip x s count | grep -q "SAD count 1"
- check_err $?
+ lines=`ip x s list | grep $srcip | grep $dstip | wc -l`
+ run_cmd test $lines -eq 2
+ run_cmd_grep "SAD count 1" ip x s count
lines=`ip x s get $ipsecid | grep $srcip | grep $dstip | wc -l`
- test $lines -eq 2
- check_err $?
-
- ip x s delete $ipsecid
- check_err $?
+ run_cmd test $lines -eq 2
+ run_cmd ip x s delete $ipsecid
lines=`ip x s list | wc -l`
- test $lines -eq 0
- check_err $?
+ run_cmd test $lines -eq 0
ipsecsel="dir out src $srcip/24 dst $dstip/24"
- ip x p add $ipsecsel \
+ run_cmd ip x p add $ipsecsel \
tmpl proto esp src $srcip dst $dstip \
spi 0x07 mode transport reqid 0x07
- check_err $?
+
lines=`ip x p list | grep $srcip | grep $dstip | wc -l`
- test $lines -eq 2
- check_err $?
+ run_cmd test $lines -eq 2
- ip x p count | grep -q "SPD IN 0 OUT 1 FWD 0"
- check_err $?
+ run_cmd_grep "SPD IN 0 OUT 1 FWD 0" ip x p count
lines=`ip x p get $ipsecsel | grep $srcip | grep $dstip | wc -l`
- test $lines -eq 2
- check_err $?
+ run_cmd test $lines -eq 2
- ip x p delete $ipsecsel
- check_err $?
+ run_cmd ip x p delete $ipsecsel
lines=`ip x p list | wc -l`
- test $lines -eq 0
- check_err $?
+ run_cmd test $lines -eq 0
# check the monitor results
kill $mpid
lines=`wc -l $tmpfile | cut "-d " -f1`
- test $lines -eq 20
- check_err $?
+ run_cmd test $lines -eq 20
rm -rf $tmpfile
# clean up any leftovers
- ip x s flush
- check_err $?
- ip x p flush
- check_err $?
+ run_cmd ip x s flush
+ run_cmd ip x p flush
ip addr del $srcip/32 dev $devdummy
if [ $ret -ne 0 ]; then
- echo "FAIL: ipsec"
+ end_test "FAIL: ipsec"
return 1
fi
- echo "PASS: ipsec"
+ end_test "PASS: ipsec"
}
#-------------------------------------------------------------------
@@ -857,10 +740,9 @@ kci_test_ipsec_offload()
# setup netdevsim since dummydev doesn't have offload support
if [ ! -w /sys/bus/netdevsim/new_device ] ; then
- modprobe -q netdevsim
- check_err $?
+ run_cmd modprobe -q netdevsim
if [ $ret -ne 0 ]; then
- echo "SKIP: ipsec_offload can't load netdevsim"
+ end_test "SKIP: ipsec_offload can't load netdevsim"
return $ksft_skip
fi
probed=true
@@ -874,11 +756,11 @@ kci_test_ipsec_offload()
ip addr add $srcip dev $dev
ip link set $dev up
if [ ! -d $sysfsd ] ; then
- echo "FAIL: ipsec_offload can't create device $dev"
+ end_test "FAIL: ipsec_offload can't create device $dev"
return 1
fi
if [ ! -f $sysfsf ] ; then
- echo "FAIL: ipsec_offload netdevsim doesn't support IPsec offload"
+ end_test "FAIL: ipsec_offload netdevsim doesn't support IPsec offload"
return 1
fi
@@ -886,40 +768,39 @@ kci_test_ipsec_offload()
ip x s flush ; ip x p flush
# create offloaded SAs, both in and out
- ip x p add dir out src $srcip/24 dst $dstip/24 \
+ run_cmd ip x p add dir out src $srcip/24 dst $dstip/24 \
tmpl proto esp src $srcip dst $dstip spi 9 \
mode transport reqid 42
- check_err $?
- ip x p add dir in src $dstip/24 dst $srcip/24 \
+
+ run_cmd ip x p add dir in src $dstip/24 dst $srcip/24 \
tmpl proto esp src $dstip dst $srcip spi 9 \
mode transport reqid 42
- check_err $?
- ip x s add proto esp src $srcip dst $dstip spi 9 \
+ run_cmd ip x s add proto esp src $srcip dst $dstip spi 9 \
mode transport reqid 42 $algo sel src $srcip/24 dst $dstip/24 \
offload dev $dev dir out
- check_err $?
- ip x s add proto esp src $dstip dst $srcip spi 9 \
+
+ run_cmd ip x s add proto esp src $dstip dst $srcip spi 9 \
mode transport reqid 42 $algo sel src $dstip/24 dst $srcip/24 \
offload dev $dev dir in
- check_err $?
+
if [ $ret -ne 0 ]; then
- echo "FAIL: ipsec_offload can't create SA"
+ end_test "FAIL: ipsec_offload can't create SA"
return 1
fi
# does offload show up in ip output
lines=`ip x s list | grep -c "crypto offload parameters: dev $dev dir"`
if [ $lines -ne 2 ] ; then
- echo "FAIL: ipsec_offload SA offload missing from list output"
check_err 1
+ end_test "FAIL: ipsec_offload SA offload missing from list output"
fi
# use ping to exercise the Tx path
ping -I $dev -c 3 -W 1 -i 0 $dstip >/dev/null
# does driver have correct offload info
- diff $sysfsf - << EOF
+ run_cmd diff $sysfsf - << EOF
SA count=2 tx=3
sa[0] tx ipaddr=0x00000000 00000000 00000000 00000000
sa[0] spi=0x00000009 proto=0x32 salt=0x61626364 crypt=1
@@ -929,7 +810,7 @@ sa[1] spi=0x00000009 proto=0x32 salt=0x61626364 crypt=1
sa[1] key=0x34333231 38373635 32313039 36353433
EOF
if [ $? -ne 0 ] ; then
- echo "FAIL: ipsec_offload incorrect driver data"
+ end_test "FAIL: ipsec_offload incorrect driver data"
check_err 1
fi
@@ -938,8 +819,8 @@ EOF
ip x p flush
lines=`grep -c "SA count=0" $sysfsf`
if [ $lines -ne 1 ] ; then
- echo "FAIL: ipsec_offload SA not removed from driver"
check_err 1
+ end_test "FAIL: ipsec_offload SA not removed from driver"
fi
# clean up any leftovers
@@ -947,10 +828,10 @@ EOF
$probed && rmmod netdevsim
if [ $ret -ne 0 ]; then
- echo "FAIL: ipsec_offload"
+ end_test "FAIL: ipsec_offload"
return 1
fi
- echo "PASS: ipsec_offload"
+ end_test "PASS: ipsec_offload"
}
kci_test_gretap()
@@ -959,46 +840,38 @@ kci_test_gretap()
DEV_NS=gretap00
local ret=0
- ip netns add "$testns"
+ run_cmd ip netns add "$testns"
if [ $? -ne 0 ]; then
- echo "SKIP gretap tests: cannot add net namespace $testns"
+ end_test "SKIP gretap tests: cannot add net namespace $testns"
return $ksft_skip
fi
- ip link help gretap 2>&1 | grep -q "^Usage:"
+ run_cmd_grep "^Usage:" ip link help gretap
if [ $? -ne 0 ];then
- echo "SKIP: gretap: iproute2 too old"
+ end_test "SKIP: gretap: iproute2 too old"
ip netns del "$testns"
return $ksft_skip
fi
# test native tunnel
- ip -netns "$testns" link add dev "$DEV_NS" type gretap seq \
+ run_cmd ip -netns "$testns" link add dev "$DEV_NS" type gretap seq \
key 102 local 172.16.1.100 remote 172.16.1.200
- check_err $?
-
- ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
- check_err $?
- ip -netns "$testns" link set dev $DEV_NS up
- check_err $?
- ip -netns "$testns" link del "$DEV_NS"
- check_err $?
+ run_cmd ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
+ run_cmd ip -netns "$testns" link set dev $DEV_NS ups
+ run_cmd ip -netns "$testns" link del "$DEV_NS"
# test external mode
- ip -netns "$testns" link add dev "$DEV_NS" type gretap external
- check_err $?
-
- ip -netns "$testns" link del "$DEV_NS"
- check_err $?
+ run_cmd ip -netns "$testns" link add dev "$DEV_NS" type gretap external
+ run_cmd ip -netns "$testns" link del "$DEV_NS"
if [ $ret -ne 0 ]; then
- echo "FAIL: gretap"
+ end_test "FAIL: gretap"
ip netns del "$testns"
return 1
fi
- echo "PASS: gretap"
+ end_test "PASS: gretap"
ip netns del "$testns"
}
@@ -1009,46 +882,38 @@ kci_test_ip6gretap()
DEV_NS=ip6gretap00
local ret=0
- ip netns add "$testns"
+ run_cmd ip netns add "$testns"
if [ $? -ne 0 ]; then
- echo "SKIP ip6gretap tests: cannot add net namespace $testns"
+ end_test "SKIP ip6gretap tests: cannot add net namespace $testns"
return $ksft_skip
fi
- ip link help ip6gretap 2>&1 | grep -q "^Usage:"
+ run_cmd_grep "^Usage:" ip link help ip6gretap
if [ $? -ne 0 ];then
- echo "SKIP: ip6gretap: iproute2 too old"
+ end_test "SKIP: ip6gretap: iproute2 too old"
ip netns del "$testns"
return $ksft_skip
fi
# test native tunnel
- ip -netns "$testns" link add dev "$DEV_NS" type ip6gretap seq \
+ run_cmd ip -netns "$testns" link add dev "$DEV_NS" type ip6gretap seq \
key 102 local fc00:100::1 remote fc00:100::2
- check_err $?
- ip -netns "$testns" addr add dev "$DEV_NS" fc00:200::1/96
- check_err $?
- ip -netns "$testns" link set dev $DEV_NS up
- check_err $?
-
- ip -netns "$testns" link del "$DEV_NS"
- check_err $?
+ run_cmd ip -netns "$testns" addr add dev "$DEV_NS" fc00:200::1/96
+ run_cmd ip -netns "$testns" link set dev $DEV_NS up
+ run_cmd ip -netns "$testns" link del "$DEV_NS"
# test external mode
- ip -netns "$testns" link add dev "$DEV_NS" type ip6gretap external
- check_err $?
-
- ip -netns "$testns" link del "$DEV_NS"
- check_err $?
+ run_cmd ip -netns "$testns" link add dev "$DEV_NS" type ip6gretap external
+ run_cmd ip -netns "$testns" link del "$DEV_NS"
if [ $ret -ne 0 ]; then
- echo "FAIL: ip6gretap"
+ end_test "FAIL: ip6gretap"
ip netns del "$testns"
return 1
fi
- echo "PASS: ip6gretap"
+ end_test "PASS: ip6gretap"
ip netns del "$testns"
}
@@ -1058,62 +923,47 @@ kci_test_erspan()
testns="testns"
DEV_NS=erspan00
local ret=0
-
- ip link help erspan 2>&1 | grep -q "^Usage:"
+ run_cmd_grep "^Usage:" ip link help erspan
if [ $? -ne 0 ];then
- echo "SKIP: erspan: iproute2 too old"
+ end_test "SKIP: erspan: iproute2 too old"
return $ksft_skip
fi
-
- ip netns add "$testns"
+ run_cmd ip netns add "$testns"
if [ $? -ne 0 ]; then
- echo "SKIP erspan tests: cannot add net namespace $testns"
+ end_test "SKIP erspan tests: cannot add net namespace $testns"
return $ksft_skip
fi
# test native tunnel erspan v1
- ip -netns "$testns" link add dev "$DEV_NS" type erspan seq \
+ run_cmd ip -netns "$testns" link add dev "$DEV_NS" type erspan seq \
key 102 local 172.16.1.100 remote 172.16.1.200 \
erspan_ver 1 erspan 488
- check_err $?
- ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
- check_err $?
- ip -netns "$testns" link set dev $DEV_NS up
- check_err $?
-
- ip -netns "$testns" link del "$DEV_NS"
- check_err $?
+ run_cmd ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
+ run_cmd ip -netns "$testns" link set dev $DEV_NS up
+ run_cmd ip -netns "$testns" link del "$DEV_NS"
# test native tunnel erspan v2
- ip -netns "$testns" link add dev "$DEV_NS" type erspan seq \
+ run_cmd ip -netns "$testns" link add dev "$DEV_NS" type erspan seq \
key 102 local 172.16.1.100 remote 172.16.1.200 \
erspan_ver 2 erspan_dir ingress erspan_hwid 7
- check_err $?
-
- ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
- check_err $?
- ip -netns "$testns" link set dev $DEV_NS up
- check_err $?
- ip -netns "$testns" link del "$DEV_NS"
- check_err $?
+ run_cmd ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
+ run_cmd ip -netns "$testns" link set dev $DEV_NS up
+ run_cmd ip -netns "$testns" link del "$DEV_NS"
# test external mode
- ip -netns "$testns" link add dev "$DEV_NS" type erspan external
- check_err $?
-
- ip -netns "$testns" link del "$DEV_NS"
- check_err $?
+ run_cmd ip -netns "$testns" link add dev "$DEV_NS" type erspan external
+ run_cmd ip -netns "$testns" link del "$DEV_NS"
if [ $ret -ne 0 ]; then
- echo "FAIL: erspan"
+ end_test "FAIL: erspan"
ip netns del "$testns"
return 1
fi
- echo "PASS: erspan"
+ end_test "PASS: erspan"
ip netns del "$testns"
}
@@ -1123,63 +973,49 @@ kci_test_ip6erspan()
testns="testns"
DEV_NS=ip6erspan00
local ret=0
-
- ip link help ip6erspan 2>&1 | grep -q "^Usage:"
+ run_cmd_grep "^Usage:" ip link help ip6erspan
if [ $? -ne 0 ];then
- echo "SKIP: ip6erspan: iproute2 too old"
+ end_test "SKIP: ip6erspan: iproute2 too old"
return $ksft_skip
fi
-
- ip netns add "$testns"
+ run_cmd ip netns add "$testns"
if [ $? -ne 0 ]; then
- echo "SKIP ip6erspan tests: cannot add net namespace $testns"
+ end_test "SKIP ip6erspan tests: cannot add net namespace $testns"
return $ksft_skip
fi
# test native tunnel ip6erspan v1
- ip -netns "$testns" link add dev "$DEV_NS" type ip6erspan seq \
+ run_cmd ip -netns "$testns" link add dev "$DEV_NS" type ip6erspan seq \
key 102 local fc00:100::1 remote fc00:100::2 \
erspan_ver 1 erspan 488
- check_err $?
- ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
- check_err $?
- ip -netns "$testns" link set dev $DEV_NS up
- check_err $?
-
- ip -netns "$testns" link del "$DEV_NS"
- check_err $?
+ run_cmd ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
+ run_cmd ip -netns "$testns" link set dev $DEV_NS up
+ run_cmd ip -netns "$testns" link del "$DEV_NS"
# test native tunnel ip6erspan v2
- ip -netns "$testns" link add dev "$DEV_NS" type ip6erspan seq \
+ run_cmd ip -netns "$testns" link add dev "$DEV_NS" type ip6erspan seq \
key 102 local fc00:100::1 remote fc00:100::2 \
erspan_ver 2 erspan_dir ingress erspan_hwid 7
- check_err $?
- ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
- check_err $?
- ip -netns "$testns" link set dev $DEV_NS up
- check_err $?
-
- ip -netns "$testns" link del "$DEV_NS"
- check_err $?
+ run_cmd ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
+ run_cmd ip -netns "$testns" link set dev $DEV_NS up
+ run_cmd ip -netns "$testns" link del "$DEV_NS"
# test external mode
- ip -netns "$testns" link add dev "$DEV_NS" \
+ run_cmd ip -netns "$testns" link add dev "$DEV_NS" \
type ip6erspan external
- check_err $?
- ip -netns "$testns" link del "$DEV_NS"
- check_err $?
+ run_cmd ip -netns "$testns" link del "$DEV_NS"
if [ $ret -ne 0 ]; then
- echo "FAIL: ip6erspan"
+ end_test "FAIL: ip6erspan"
ip netns del "$testns"
return 1
fi
- echo "PASS: ip6erspan"
+ end_test "PASS: ip6erspan"
ip netns del "$testns"
}
@@ -1195,45 +1031,35 @@ kci_test_fdb_get()
dstip="10.0.2.3"
local ret=0
- bridge fdb help 2>&1 |grep -q 'bridge fdb get'
+ run_cmd_grep 'bridge fdb get' bridge fdb help
if [ $? -ne 0 ];then
- echo "SKIP: fdb get tests: iproute2 too old"
+ end_test "SKIP: fdb get tests: iproute2 too old"
return $ksft_skip
fi
- ip netns add testns
+ run_cmd ip netns add testns
if [ $? -ne 0 ]; then
- echo "SKIP fdb get tests: cannot add net namespace $testns"
+ end_test "SKIP fdb get tests: cannot add net namespace $testns"
return $ksft_skip
fi
-
- $IP link add "$vxlandev" type vxlan id 10 local $localip \
- dstport 4789 2>/dev/null
- check_err $?
- $IP link add name "$brdev" type bridge &>/dev/null
- check_err $?
- $IP link set dev "$vxlandev" master "$brdev" &>/dev/null
- check_err $?
- $BRIDGE fdb add $test_mac dev "$vxlandev" master &>/dev/null
- check_err $?
- $BRIDGE fdb add $test_mac dev "$vxlandev" dst $dstip self &>/dev/null
- check_err $?
-
- $BRIDGE fdb get $test_mac brport "$vxlandev" 2>/dev/null | grep -q "dev $vxlandev master $brdev"
- check_err $?
- $BRIDGE fdb get $test_mac br "$brdev" 2>/dev/null | grep -q "dev $vxlandev master $brdev"
- check_err $?
- $BRIDGE fdb get $test_mac dev "$vxlandev" self 2>/dev/null | grep -q "dev $vxlandev dst $dstip"
- check_err $?
+ run_cmd $IP link add "$vxlandev" type vxlan id 10 local $localip \
+ dstport 4789
+ run_cmd $IP link add name "$brdev" type bridge
+ run_cmd $IP link set dev "$vxlandev" master "$brdev"
+ run_cmd $BRIDGE fdb add $test_mac dev "$vxlandev" master
+ run_cmd $BRIDGE fdb add $test_mac dev "$vxlandev" dst $dstip self
+ run_cmd_grep "dev $vxlandev master $brdev" $BRIDGE fdb get $test_mac brport "$vxlandev"
+ run_cmd_grep "dev $vxlandev master $brdev" $BRIDGE fdb get $test_mac br "$brdev"
+ run_cmd_grep "dev $vxlandev dst $dstip" $BRIDGE fdb get $test_mac dev "$vxlandev" self
ip netns del testns &>/dev/null
if [ $ret -ne 0 ]; then
- echo "FAIL: bridge fdb get"
+ end_test "FAIL: bridge fdb get"
return 1
fi
- echo "PASS: bridge fdb get"
+ end_test "PASS: bridge fdb get"
}
kci_test_neigh_get()
@@ -1243,50 +1069,38 @@ kci_test_neigh_get()
dstip6=dead::2
local ret=0
- ip neigh help 2>&1 |grep -q 'ip neigh get'
+ run_cmd_grep 'ip neigh get' ip neigh help
if [ $? -ne 0 ];then
- echo "SKIP: fdb get tests: iproute2 too old"
+ end_test "SKIP: fdb get tests: iproute2 too old"
return $ksft_skip
fi
# ipv4
- ip neigh add $dstip lladdr $dstmac dev "$devdummy" > /dev/null
- check_err $?
- ip neigh get $dstip dev "$devdummy" 2> /dev/null | grep -q "$dstmac"
- check_err $?
- ip neigh del $dstip lladdr $dstmac dev "$devdummy" > /dev/null
- check_err $?
+ run_cmd ip neigh add $dstip lladdr $dstmac dev "$devdummy"
+ run_cmd_grep "$dstmac" ip neigh get $dstip dev "$devdummy"
+ run_cmd ip neigh del $dstip lladdr $dstmac dev "$devdummy"
# ipv4 proxy
- ip neigh add proxy $dstip dev "$devdummy" > /dev/null
- check_err $?
- ip neigh get proxy $dstip dev "$devdummy" 2>/dev/null | grep -q "$dstip"
- check_err $?
- ip neigh del proxy $dstip dev "$devdummy" > /dev/null
- check_err $?
+ run_cmd ip neigh add proxy $dstip dev "$devdummy"
+ run_cmd_grep "$dstip" ip neigh get proxy $dstip dev "$devdummy"
+ run_cmd ip neigh del proxy $dstip dev "$devdummy"
# ipv6
- ip neigh add $dstip6 lladdr $dstmac dev "$devdummy" > /dev/null
- check_err $?
- ip neigh get $dstip6 dev "$devdummy" 2> /dev/null | grep -q "$dstmac"
- check_err $?
- ip neigh del $dstip6 lladdr $dstmac dev "$devdummy" > /dev/null
- check_err $?
+ run_cmd ip neigh add $dstip6 lladdr $dstmac dev "$devdummy"
+ run_cmd_grep "$dstmac" ip neigh get $dstip6 dev "$devdummy"
+ run_cmd ip neigh del $dstip6 lladdr $dstmac dev "$devdummy"
# ipv6 proxy
- ip neigh add proxy $dstip6 dev "$devdummy" > /dev/null
- check_err $?
- ip neigh get proxy $dstip6 dev "$devdummy" 2>/dev/null | grep -q "$dstip6"
- check_err $?
- ip neigh del proxy $dstip6 dev "$devdummy" > /dev/null
- check_err $?
+ run_cmd ip neigh add proxy $dstip6 dev "$devdummy"
+ run_cmd_grep "$dstip6" ip neigh get proxy $dstip6 dev "$devdummy"
+ run_cmd ip neigh del proxy $dstip6 dev "$devdummy"
if [ $ret -ne 0 ];then
- echo "FAIL: neigh get"
+ end_test "FAIL: neigh get"
return 1
fi
- echo "PASS: neigh get"
+ end_test "PASS: neigh get"
}
kci_test_bridge_parent_id()
@@ -1296,10 +1110,9 @@ kci_test_bridge_parent_id()
probed=false
if [ ! -w /sys/bus/netdevsim/new_device ] ; then
- modprobe -q netdevsim
- check_err $?
+ run_cmd modprobe -q netdevsim
if [ $ret -ne 0 ]; then
- echo "SKIP: bridge_parent_id can't load netdevsim"
+ end_test "SKIP: bridge_parent_id can't load netdevsim"
return $ksft_skip
fi
probed=true
@@ -1312,13 +1125,11 @@ kci_test_bridge_parent_id()
udevadm settle
dev10=`ls ${sysfsnet}10/net/`
dev20=`ls ${sysfsnet}20/net/`
-
- ip link add name test-bond0 type bond mode 802.3ad
- ip link set dev $dev10 master test-bond0
- ip link set dev $dev20 master test-bond0
- ip link add name test-br0 type bridge
- ip link set dev test-bond0 master test-br0
- check_err $?
+ run_cmd ip link add name test-bond0 type bond mode 802.3ad
+ run_cmd ip link set dev $dev10 master test-bond0
+ run_cmd ip link set dev $dev20 master test-bond0
+ run_cmd ip link add name test-br0 type bridge
+ run_cmd ip link set dev test-bond0 master test-br0
# clean up any leftovers
ip link del dev test-br0
@@ -1328,10 +1139,10 @@ kci_test_bridge_parent_id()
$probed && rmmod netdevsim
if [ $ret -ne 0 ]; then
- echo "FAIL: bridge_parent_id"
+ end_test "FAIL: bridge_parent_id"
return 1
fi
- echo "PASS: bridge_parent_id"
+ end_test "PASS: bridge_parent_id"
}
address_get_proto()
@@ -1409,10 +1220,10 @@ do_test_address_proto()
ip address del dev "$devdummy" "$addr3"
if [ $ret -ne 0 ]; then
- echo "FAIL: address proto $what"
+ end_test "FAIL: address proto $what"
return 1
fi
- echo "PASS: address proto $what"
+ end_test "PASS: address proto $what"
}
kci_test_address_proto()
@@ -1435,7 +1246,7 @@ kci_test_rtnl()
kci_add_dummy
if [ $ret -ne 0 ];then
- echo "FAIL: cannot add dummy interface"
+ end_test "FAIL: cannot add dummy interface"
return 1
fi
@@ -1455,31 +1266,39 @@ usage: ${0##*/} OPTS
-t <test> Test(s) to run (default: all)
(options: $(echo $ALL_TESTS))
+ -v Verbose mode (show commands and output)
+ -P Pause after every test
+ -p Pause after every failing test before cleanup (for debugging)
EOF
}
#check for needed privileges
if [ "$(id -u)" -ne 0 ];then
- echo "SKIP: Need root privileges"
+ end_test "SKIP: Need root privileges"
exit $ksft_skip
fi
for x in ip tc;do
$x -Version 2>/dev/null >/dev/null
if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without the $x tool"
+ end_test "SKIP: Could not run test without the $x tool"
exit $ksft_skip
fi
done
-while getopts t:h o; do
+while getopts t:hvpP o; do
case $o in
t) TESTS=$OPTARG;;
+ v) VERBOSE=1;;
+ p) PAUSE_ON_FAIL=yes;;
+ P) PAUSE=yes;;
h) usage; exit 0;;
*) usage; exit 1;;
esac
done
+[ $PAUSE = "yes" ] && PAUSE_ON_FAIL="no"
+
kci_test_rtnl
exit $?
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json b/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json
index 5272049566d6..a4a83fb3e96f 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json
@@ -1351,5 +1351,54 @@
"teardown": [
"$TC qdisc del dev $DEV1 ingress"
]
+ },
+ {
+ "id": "e470",
+ "name": "Try to delete class referenced by fw after a replace",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 parent root handle 10: drr",
+ "$TC class add dev $DEV1 parent root classid 1 drr",
+ "$TC filter add dev $DEV1 parent 10: handle 1 prio 1 fw classid 10:1 action ok",
+ "$TC filter replace dev $DEV1 parent 10: handle 1 prio 1 fw classid 10:1 action drop"
+ ],
+ "cmdUnderTest": "$TC class delete dev $DEV1 parent 10: classid 10:1",
+ "expExitCode": "2",
+ "verifyCmd": "$TC class show dev $DEV1",
+ "matchPattern": "class drr 10:1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 parent root drr"
+ ]
+ },
+ {
+ "id": "ec1a",
+ "name": "Replace fw classid with nil",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 parent root handle 10: drr",
+ "$TC class add dev $DEV1 parent root classid 1 drr",
+ "$TC filter add dev $DEV1 parent 10: handle 1 prio 1 fw classid 10:1 action ok"
+ ],
+ "cmdUnderTest": "$TC filter replace dev $DEV1 parent 10: handle 1 prio 1 fw action drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent 10:",
+ "matchPattern": "fw chain 0 handle 0x1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 parent root drr"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/route.json b/tools/testing/selftests/tc-testing/tc-tests/filters/route.json
index 1f6f19f02997..8d8de8f65aef 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/route.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/route.json
@@ -177,5 +177,30 @@
"teardown": [
"$TC qdisc del dev $DEV1 ingress"
]
+ },
+ {
+ "id": "b042",
+ "name": "Try to delete class referenced by route after a replace",
+ "category": [
+ "filter",
+ "route"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 parent root handle 10: drr",
+ "$TC class add dev $DEV1 parent root classid 1 drr",
+ "$TC filter add dev $DEV1 parent 10: prio 1 route from 10 classid 10:1 action ok",
+ "$TC filter replace dev $DEV1 parent 10: prio 1 route from 5 classid 10:1 action drop"
+ ],
+ "cmdUnderTest": "$TC class delete dev $DEV1 parent 10: classid 10:1",
+ "expExitCode": "2",
+ "verifyCmd": "$TC class show dev $DEV1",
+ "matchPattern": "class drr 10:1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 parent root drr"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json
index bd64a4bf11ab..ddc7c355be0a 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json
@@ -247,5 +247,30 @@
"teardown": [
"$TC qdisc del dev $DEV1 ingress"
]
+ },
+ {
+ "id": "0c37",
+ "name": "Try to delete class referenced by u32 after a replace",
+ "category": [
+ "filter",
+ "u32"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 parent root handle 10: drr",
+ "$TC class add dev $DEV1 parent root classid 1 drr",
+ "$TC filter add dev $DEV1 parent 10: prio 1 u32 match icmp type 1 0xff classid 10:1 action ok",
+ "$TC filter replace dev $DEV1 parent 10: prio 1 u32 match icmp type 1 0xff classid 10:1 action drop"
+ ],
+ "cmdUnderTest": "$TC class delete dev $DEV1 parent 10: classid 10:1",
+ "expExitCode": "2",
+ "verifyCmd": "$TC class show dev $DEV1",
+ "matchPattern": "class drr 10:1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 parent root drr"
+ ]
}
]
diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c
index 01b636d3039a..6779d5008b27 100644
--- a/tools/testing/vsock/util.c
+++ b/tools/testing/vsock/util.c
@@ -211,102 +211,138 @@ int vsock_seqpacket_accept(unsigned int cid, unsigned int port,
return vsock_accept(cid, port, clientaddrp, SOCK_SEQPACKET);
}
-/* Transmit one byte and check the return value.
+/* Transmit bytes from a buffer and check the return value.
*
* expected_ret:
* <0 Negative errno (for testing errors)
* 0 End-of-file
- * 1 Success
+ * >0 Success (bytes successfully written)
*/
-void send_byte(int fd, int expected_ret, int flags)
+void send_buf(int fd, const void *buf, size_t len, int flags,
+ ssize_t expected_ret)
{
- const uint8_t byte = 'A';
- ssize_t nwritten;
+ ssize_t nwritten = 0;
+ ssize_t ret;
timeout_begin(TIMEOUT);
do {
- nwritten = send(fd, &byte, sizeof(byte), flags);
- timeout_check("write");
- } while (nwritten < 0 && errno == EINTR);
+ ret = send(fd, buf + nwritten, len - nwritten, flags);
+ timeout_check("send");
+
+ if (ret == 0 || (ret < 0 && errno != EINTR))
+ break;
+
+ nwritten += ret;
+ } while (nwritten < len);
timeout_end();
if (expected_ret < 0) {
- if (nwritten != -1) {
- fprintf(stderr, "bogus send(2) return value %zd\n",
- nwritten);
+ if (ret != -1) {
+ fprintf(stderr, "bogus send(2) return value %zd (expected %zd)\n",
+ ret, expected_ret);
exit(EXIT_FAILURE);
}
if (errno != -expected_ret) {
- perror("write");
+ perror("send");
exit(EXIT_FAILURE);
}
return;
}
- if (nwritten < 0) {
- perror("write");
+ if (ret < 0) {
+ perror("send");
exit(EXIT_FAILURE);
}
- if (nwritten == 0) {
- if (expected_ret == 0)
- return;
- fprintf(stderr, "unexpected EOF while sending byte\n");
- exit(EXIT_FAILURE);
- }
- if (nwritten != sizeof(byte)) {
- fprintf(stderr, "bogus send(2) return value %zd\n", nwritten);
+ if (nwritten != expected_ret) {
+ if (ret == 0)
+ fprintf(stderr, "unexpected EOF while sending bytes\n");
+
+ fprintf(stderr, "bogus send(2) bytes written %zd (expected %zd)\n",
+ nwritten, expected_ret);
exit(EXIT_FAILURE);
}
}
-/* Receive one byte and check the return value.
+/* Receive bytes in a buffer and check the return value.
*
* expected_ret:
* <0 Negative errno (for testing errors)
* 0 End-of-file
- * 1 Success
+ * >0 Success (bytes successfully read)
*/
-void recv_byte(int fd, int expected_ret, int flags)
+void recv_buf(int fd, void *buf, size_t len, int flags, ssize_t expected_ret)
{
- uint8_t byte;
- ssize_t nread;
+ ssize_t nread = 0;
+ ssize_t ret;
timeout_begin(TIMEOUT);
do {
- nread = recv(fd, &byte, sizeof(byte), flags);
- timeout_check("read");
- } while (nread < 0 && errno == EINTR);
+ ret = recv(fd, buf + nread, len - nread, flags);
+ timeout_check("recv");
+
+ if (ret == 0 || (ret < 0 && errno != EINTR))
+ break;
+
+ nread += ret;
+ } while (nread < len);
timeout_end();
if (expected_ret < 0) {
- if (nread != -1) {
- fprintf(stderr, "bogus recv(2) return value %zd\n",
- nread);
+ if (ret != -1) {
+ fprintf(stderr, "bogus recv(2) return value %zd (expected %zd)\n",
+ ret, expected_ret);
exit(EXIT_FAILURE);
}
if (errno != -expected_ret) {
- perror("read");
+ perror("recv");
exit(EXIT_FAILURE);
}
return;
}
- if (nread < 0) {
- perror("read");
+ if (ret < 0) {
+ perror("recv");
exit(EXIT_FAILURE);
}
- if (nread == 0) {
- if (expected_ret == 0)
- return;
- fprintf(stderr, "unexpected EOF while receiving byte\n");
- exit(EXIT_FAILURE);
- }
- if (nread != sizeof(byte)) {
- fprintf(stderr, "bogus recv(2) return value %zd\n", nread);
+ if (nread != expected_ret) {
+ if (ret == 0)
+ fprintf(stderr, "unexpected EOF while receiving bytes\n");
+
+ fprintf(stderr, "bogus recv(2) bytes read %zd (expected %zd)\n",
+ nread, expected_ret);
exit(EXIT_FAILURE);
}
+}
+
+/* Transmit one byte and check the return value.
+ *
+ * expected_ret:
+ * <0 Negative errno (for testing errors)
+ * 0 End-of-file
+ * 1 Success
+ */
+void send_byte(int fd, int expected_ret, int flags)
+{
+ const uint8_t byte = 'A';
+
+ send_buf(fd, &byte, sizeof(byte), flags, expected_ret);
+}
+
+/* Receive one byte and check the return value.
+ *
+ * expected_ret:
+ * <0 Negative errno (for testing errors)
+ * 0 End-of-file
+ * 1 Success
+ */
+void recv_byte(int fd, int expected_ret, int flags)
+{
+ uint8_t byte;
+
+ recv_buf(fd, &byte, sizeof(byte), flags, expected_ret);
+
if (byte != 'A') {
fprintf(stderr, "unexpected byte read %c\n", byte);
exit(EXIT_FAILURE);
diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h
index fb99208a95ea..e5407677ce05 100644
--- a/tools/testing/vsock/util.h
+++ b/tools/testing/vsock/util.h
@@ -42,6 +42,9 @@ int vsock_stream_accept(unsigned int cid, unsigned int port,
int vsock_seqpacket_accept(unsigned int cid, unsigned int port,
struct sockaddr_vm *clientaddrp);
void vsock_wait_remote_close(int fd);
+void send_buf(int fd, const void *buf, size_t len, int flags,
+ ssize_t expected_ret);
+void recv_buf(int fd, void *buf, size_t len, int flags, ssize_t expected_ret);
void send_byte(int fd, int expected_ret, int flags);
void recv_byte(int fd, int expected_ret, int flags);
void run_tests(const struct test_case *test_cases,
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index 90718c2fd4ea..da4cb819a183 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -19,6 +19,7 @@
#include <time.h>
#include <sys/mman.h>
#include <poll.h>
+#include <signal.h>
#include "timeout.h"
#include "control.h"
@@ -261,7 +262,6 @@ static void test_msg_peek_client(const struct test_opts *opts,
bool seqpacket)
{
unsigned char buf[MSG_PEEK_BUF_LEN];
- ssize_t send_size;
int fd;
int i;
@@ -280,17 +280,7 @@ static void test_msg_peek_client(const struct test_opts *opts,
control_expectln("SRVREADY");
- send_size = send(fd, buf, sizeof(buf), 0);
-
- if (send_size < 0) {
- perror("send");
- exit(EXIT_FAILURE);
- }
-
- if (send_size != sizeof(buf)) {
- fprintf(stderr, "Invalid send size %zi\n", send_size);
- exit(EXIT_FAILURE);
- }
+ send_buf(fd, buf, sizeof(buf), 0, sizeof(buf));
close(fd);
}
@@ -301,7 +291,6 @@ static void test_msg_peek_server(const struct test_opts *opts,
unsigned char buf_half[MSG_PEEK_BUF_LEN / 2];
unsigned char buf_normal[MSG_PEEK_BUF_LEN];
unsigned char buf_peek[MSG_PEEK_BUF_LEN];
- ssize_t res;
int fd;
if (seqpacket)
@@ -315,34 +304,16 @@ static void test_msg_peek_server(const struct test_opts *opts,
}
/* Peek from empty socket. */
- res = recv(fd, buf_peek, sizeof(buf_peek), MSG_PEEK | MSG_DONTWAIT);
- if (res != -1) {
- fprintf(stderr, "expected recv(2) failure, got %zi\n", res);
- exit(EXIT_FAILURE);
- }
-
- if (errno != EAGAIN) {
- perror("EAGAIN expected");
- exit(EXIT_FAILURE);
- }
+ recv_buf(fd, buf_peek, sizeof(buf_peek), MSG_PEEK | MSG_DONTWAIT,
+ -EAGAIN);
control_writeln("SRVREADY");
/* Peek part of data. */
- res = recv(fd, buf_half, sizeof(buf_half), MSG_PEEK);
- if (res != sizeof(buf_half)) {
- fprintf(stderr, "recv(2) + MSG_PEEK, expected %zu, got %zi\n",
- sizeof(buf_half), res);
- exit(EXIT_FAILURE);
- }
+ recv_buf(fd, buf_half, sizeof(buf_half), MSG_PEEK, sizeof(buf_half));
/* Peek whole data. */
- res = recv(fd, buf_peek, sizeof(buf_peek), MSG_PEEK);
- if (res != sizeof(buf_peek)) {
- fprintf(stderr, "recv(2) + MSG_PEEK, expected %zu, got %zi\n",
- sizeof(buf_peek), res);
- exit(EXIT_FAILURE);
- }
+ recv_buf(fd, buf_peek, sizeof(buf_peek), MSG_PEEK, sizeof(buf_peek));
/* Compare partial and full peek. */
if (memcmp(buf_half, buf_peek, sizeof(buf_half))) {
@@ -355,22 +326,11 @@ static void test_msg_peek_server(const struct test_opts *opts,
* so check it with MSG_PEEK. We must get length
* of the message.
*/
- res = recv(fd, buf_half, sizeof(buf_half), MSG_PEEK |
- MSG_TRUNC);
- if (res != sizeof(buf_peek)) {
- fprintf(stderr,
- "recv(2) + MSG_PEEK | MSG_TRUNC, exp %zu, got %zi\n",
- sizeof(buf_half), res);
- exit(EXIT_FAILURE);
- }
+ recv_buf(fd, buf_half, sizeof(buf_half), MSG_PEEK | MSG_TRUNC,
+ sizeof(buf_peek));
}
- res = recv(fd, buf_normal, sizeof(buf_normal), 0);
- if (res != sizeof(buf_normal)) {
- fprintf(stderr, "recv(2), expected %zu, got %zi\n",
- sizeof(buf_normal), res);
- exit(EXIT_FAILURE);
- }
+ recv_buf(fd, buf_normal, sizeof(buf_normal), 0, sizeof(buf_normal));
/* Compare full peek and normal read. */
if (memcmp(buf_peek, buf_normal, sizeof(buf_peek))) {
@@ -415,7 +375,6 @@ static void test_seqpacket_msg_bounds_client(const struct test_opts *opts)
msg_count = SOCK_BUF_SIZE / MAX_MSG_SIZE;
for (int i = 0; i < msg_count; i++) {
- ssize_t send_size;
size_t buf_size;
int flags;
void *buf;
@@ -443,17 +402,7 @@ static void test_seqpacket_msg_bounds_client(const struct test_opts *opts)
flags = 0;
}
- send_size = send(fd, buf, buf_size, flags);
-
- if (send_size < 0) {
- perror("send");
- exit(EXIT_FAILURE);
- }
-
- if (send_size != buf_size) {
- fprintf(stderr, "Invalid send size\n");
- exit(EXIT_FAILURE);
- }
+ send_buf(fd, buf, buf_size, flags, buf_size);
/*
* Hash sum is computed at both client and server in
@@ -554,10 +503,7 @@ static void test_seqpacket_msg_trunc_client(const struct test_opts *opts)
exit(EXIT_FAILURE);
}
- if (send(fd, buf, sizeof(buf), 0) != sizeof(buf)) {
- perror("send failed");
- exit(EXIT_FAILURE);
- }
+ send_buf(fd, buf, sizeof(buf), 0, sizeof(buf));
control_writeln("SENDDONE");
close(fd);
@@ -679,7 +625,6 @@ static void test_seqpacket_timeout_server(const struct test_opts *opts)
static void test_seqpacket_bigmsg_client(const struct test_opts *opts)
{
unsigned long sock_buf_size;
- ssize_t send_size;
socklen_t len;
void *data;
int fd;
@@ -706,18 +651,7 @@ static void test_seqpacket_bigmsg_client(const struct test_opts *opts)
exit(EXIT_FAILURE);
}
- send_size = send(fd, data, sock_buf_size, 0);
- if (send_size != -1) {
- fprintf(stderr, "expected 'send(2)' failure, got %zi\n",
- send_size);
- exit(EXIT_FAILURE);
- }
-
- if (errno != EMSGSIZE) {
- fprintf(stderr, "expected EMSGSIZE in 'errno', got %i\n",
- errno);
- exit(EXIT_FAILURE);
- }
+ send_buf(fd, data, sock_buf_size, 0, -EMSGSIZE);
control_writeln("CLISENT");
@@ -771,15 +705,9 @@ static void test_seqpacket_invalid_rec_buffer_client(const struct test_opts *opt
memset(buf1, BUF_PATTERN_1, buf_size);
memset(buf2, BUF_PATTERN_2, buf_size);
- if (send(fd, buf1, buf_size, 0) != buf_size) {
- perror("send failed");
- exit(EXIT_FAILURE);
- }
+ send_buf(fd, buf1, buf_size, 0, buf_size);
- if (send(fd, buf2, buf_size, 0) != buf_size) {
- perror("send failed");
- exit(EXIT_FAILURE);
- }
+ send_buf(fd, buf2, buf_size, 0, buf_size);
close(fd);
}
@@ -900,7 +828,6 @@ static void test_stream_poll_rcvlowat_client(const struct test_opts *opts)
unsigned long lowat_val = RCVLOWAT_BUF_SIZE;
char buf[RCVLOWAT_BUF_SIZE];
struct pollfd fds;
- ssize_t read_res;
short poll_flags;
int fd;
@@ -955,12 +882,7 @@ static void test_stream_poll_rcvlowat_client(const struct test_opts *opts)
/* Use MSG_DONTWAIT, if call is going to wait, EAGAIN
* will be returned.
*/
- read_res = recv(fd, buf, sizeof(buf), MSG_DONTWAIT);
- if (read_res != RCVLOWAT_BUF_SIZE) {
- fprintf(stderr, "Unexpected recv result %zi\n",
- read_res);
- exit(EXIT_FAILURE);
- }
+ recv_buf(fd, buf, sizeof(buf), MSG_DONTWAIT, RCVLOWAT_BUF_SIZE);
control_writeln("POLLDONE");
@@ -972,7 +894,7 @@ static void test_stream_poll_rcvlowat_client(const struct test_opts *opts)
static void test_inv_buf_client(const struct test_opts *opts, bool stream)
{
unsigned char data[INV_BUF_TEST_DATA_LEN] = {0};
- ssize_t ret;
+ ssize_t expected_ret;
int fd;
if (stream)
@@ -988,39 +910,18 @@ static void test_inv_buf_client(const struct test_opts *opts, bool stream)
control_expectln("SENDDONE");
/* Use invalid buffer here. */
- ret = recv(fd, NULL, sizeof(data), 0);
- if (ret != -1) {
- fprintf(stderr, "expected recv(2) failure, got %zi\n", ret);
- exit(EXIT_FAILURE);
- }
-
- if (errno != EFAULT) {
- fprintf(stderr, "unexpected recv(2) errno %d\n", errno);
- exit(EXIT_FAILURE);
- }
-
- ret = recv(fd, data, sizeof(data), MSG_DONTWAIT);
+ recv_buf(fd, NULL, sizeof(data), 0, -EFAULT);
if (stream) {
/* For SOCK_STREAM we must continue reading. */
- if (ret != sizeof(data)) {
- fprintf(stderr, "expected recv(2) success, got %zi\n", ret);
- exit(EXIT_FAILURE);
- }
- /* Don't check errno in case of success. */
+ expected_ret = sizeof(data);
} else {
/* For SOCK_SEQPACKET socket's queue must be empty. */
- if (ret != -1) {
- fprintf(stderr, "expected recv(2) failure, got %zi\n", ret);
- exit(EXIT_FAILURE);
- }
-
- if (errno != EAGAIN) {
- fprintf(stderr, "unexpected recv(2) errno %d\n", errno);
- exit(EXIT_FAILURE);
- }
+ expected_ret = -EAGAIN;
}
+ recv_buf(fd, data, sizeof(data), MSG_DONTWAIT, expected_ret);
+
control_writeln("DONE");
close(fd);
@@ -1029,7 +930,6 @@ static void test_inv_buf_client(const struct test_opts *opts, bool stream)
static void test_inv_buf_server(const struct test_opts *opts, bool stream)
{
unsigned char data[INV_BUF_TEST_DATA_LEN] = {0};
- ssize_t res;
int fd;
if (stream)
@@ -1042,11 +942,7 @@ static void test_inv_buf_server(const struct test_opts *opts, bool stream)
exit(EXIT_FAILURE);
}
- res = send(fd, data, sizeof(data), 0);
- if (res != sizeof(data)) {
- fprintf(stderr, "unexpected send(2) result %zi\n", res);
- exit(EXIT_FAILURE);
- }
+ send_buf(fd, data, sizeof(data), 0, sizeof(data));
control_writeln("SENDDONE");
@@ -1080,7 +976,6 @@ static void test_seqpacket_inv_buf_server(const struct test_opts *opts)
static void test_stream_virtio_skb_merge_client(const struct test_opts *opts)
{
- ssize_t res;
int fd;
fd = vsock_stream_connect(opts->peer_cid, 1234);
@@ -1090,22 +985,14 @@ static void test_stream_virtio_skb_merge_client(const struct test_opts *opts)
}
/* Send first skbuff. */
- res = send(fd, HELLO_STR, strlen(HELLO_STR), 0);
- if (res != strlen(HELLO_STR)) {
- fprintf(stderr, "unexpected send(2) result %zi\n", res);
- exit(EXIT_FAILURE);
- }
+ send_buf(fd, HELLO_STR, strlen(HELLO_STR), 0, strlen(HELLO_STR));
control_writeln("SEND0");
/* Peer reads part of first skbuff. */
control_expectln("REPLY0");
/* Send second skbuff, it will be appended to the first. */
- res = send(fd, WORLD_STR, strlen(WORLD_STR), 0);
- if (res != strlen(WORLD_STR)) {
- fprintf(stderr, "unexpected send(2) result %zi\n", res);
- exit(EXIT_FAILURE);
- }
+ send_buf(fd, WORLD_STR, strlen(WORLD_STR), 0, strlen(WORLD_STR));
control_writeln("SEND1");
/* Peer reads merged skbuff packet. */
@@ -1116,8 +1003,8 @@ static void test_stream_virtio_skb_merge_client(const struct test_opts *opts)
static void test_stream_virtio_skb_merge_server(const struct test_opts *opts)
{
+ size_t read = 0, to_read;
unsigned char buf[64];
- ssize_t res;
int fd;
fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
@@ -1129,26 +1016,21 @@ static void test_stream_virtio_skb_merge_server(const struct test_opts *opts)
control_expectln("SEND0");
/* Read skbuff partially. */
- res = recv(fd, buf, 2, 0);
- if (res != 2) {
- fprintf(stderr, "expected recv(2) returns 2 bytes, got %zi\n", res);
- exit(EXIT_FAILURE);
- }
+ to_read = 2;
+ recv_buf(fd, buf + read, to_read, 0, to_read);
+ read += to_read;
control_writeln("REPLY0");
control_expectln("SEND1");
- res = recv(fd, buf + 2, sizeof(buf) - 2, 0);
- if (res != 8) {
- fprintf(stderr, "expected recv(2) returns 8 bytes, got %zi\n", res);
- exit(EXIT_FAILURE);
- }
+ /* Read the rest of both buffers */
+ to_read = strlen(HELLO_STR WORLD_STR) - read;
+ recv_buf(fd, buf + read, to_read, 0, to_read);
+ read += to_read;
- res = recv(fd, buf, sizeof(buf) - 8 - 2, MSG_DONTWAIT);
- if (res != -1) {
- fprintf(stderr, "expected recv(2) failure, got %zi\n", res);
- exit(EXIT_FAILURE);
- }
+ /* No more bytes should be there */
+ to_read = sizeof(buf) - read;
+ recv_buf(fd, buf + read, to_read, MSG_DONTWAIT, -EAGAIN);
if (memcmp(buf, HELLO_STR WORLD_STR, strlen(HELLO_STR WORLD_STR))) {
fprintf(stderr, "pattern mismatch\n");
@@ -1170,6 +1052,133 @@ static void test_seqpacket_msg_peek_server(const struct test_opts *opts)
return test_msg_peek_server(opts, true);
}
+static sig_atomic_t have_sigpipe;
+
+static void sigpipe(int signo)
+{
+ have_sigpipe = 1;
+}
+
+static void test_stream_check_sigpipe(int fd)
+{
+ ssize_t res;
+
+ have_sigpipe = 0;
+
+ res = send(fd, "A", 1, 0);
+ if (res != -1) {
+ fprintf(stderr, "expected send(2) failure, got %zi\n", res);
+ exit(EXIT_FAILURE);
+ }
+
+ if (!have_sigpipe) {
+ fprintf(stderr, "SIGPIPE expected\n");
+ exit(EXIT_FAILURE);
+ }
+
+ have_sigpipe = 0;
+
+ res = send(fd, "A", 1, MSG_NOSIGNAL);
+ if (res != -1) {
+ fprintf(stderr, "expected send(2) failure, got %zi\n", res);
+ exit(EXIT_FAILURE);
+ }
+
+ if (have_sigpipe) {
+ fprintf(stderr, "SIGPIPE not expected\n");
+ exit(EXIT_FAILURE);
+ }
+}
+
+static void test_stream_shutwr_client(const struct test_opts *opts)
+{
+ int fd;
+
+ struct sigaction act = {
+ .sa_handler = sigpipe,
+ };
+
+ sigaction(SIGPIPE, &act, NULL);
+
+ fd = vsock_stream_connect(opts->peer_cid, 1234);
+ if (fd < 0) {
+ perror("connect");
+ exit(EXIT_FAILURE);
+ }
+
+ if (shutdown(fd, SHUT_WR)) {
+ perror("shutdown");
+ exit(EXIT_FAILURE);
+ }
+
+ test_stream_check_sigpipe(fd);
+
+ control_writeln("CLIENTDONE");
+
+ close(fd);
+}
+
+static void test_stream_shutwr_server(const struct test_opts *opts)
+{
+ int fd;
+
+ fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+ if (fd < 0) {
+ perror("accept");
+ exit(EXIT_FAILURE);
+ }
+
+ control_expectln("CLIENTDONE");
+
+ close(fd);
+}
+
+static void test_stream_shutrd_client(const struct test_opts *opts)
+{
+ int fd;
+
+ struct sigaction act = {
+ .sa_handler = sigpipe,
+ };
+
+ sigaction(SIGPIPE, &act, NULL);
+
+ fd = vsock_stream_connect(opts->peer_cid, 1234);
+ if (fd < 0) {
+ perror("connect");
+ exit(EXIT_FAILURE);
+ }
+
+ control_expectln("SHUTRDDONE");
+
+ test_stream_check_sigpipe(fd);
+
+ control_writeln("CLIENTDONE");
+
+ close(fd);
+}
+
+static void test_stream_shutrd_server(const struct test_opts *opts)
+{
+ int fd;
+
+ fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+ if (fd < 0) {
+ perror("accept");
+ exit(EXIT_FAILURE);
+ }
+
+ if (shutdown(fd, SHUT_RD)) {
+ perror("shutdown");
+ exit(EXIT_FAILURE);
+ }
+
+ control_writeln("SHUTRDDONE");
+ control_expectln("CLIENTDONE");
+
+ close(fd);
+}
+
static struct test_case test_cases[] = {
{
.name = "SOCK_STREAM connection reset",
@@ -1250,6 +1259,16 @@ static struct test_case test_cases[] = {
.run_client = test_seqpacket_msg_peek_client,
.run_server = test_seqpacket_msg_peek_server,
},
+ {
+ .name = "SOCK_STREAM SHUT_WR",
+ .run_client = test_stream_shutwr_client,
+ .run_server = test_stream_shutwr_server,
+ },
+ {
+ .name = "SOCK_STREAM SHUT_RD",
+ .run_client = test_stream_shutrd_client,
+ .run_server = test_stream_shutrd_server,
+ },
{},
};