Merge 'bpf-next 2023-10-16' into loongarch-next

LoongArch architecture changes for 6.7 (BPF CPU v4 support) depend on the bpf changes to fix conflictions in selftests and work, so merge them to create a base.
author: Huacai Chen <chenhuacai@loongson.cn> 2023-11-01 10:55:00 +0800
committer: Huacai Chen <chenhuacai@loongson.cn> 2023-11-01 10:55:00 +0800
commit: a6bdc082ad1c91d389a6ba0c7a1945818f732114 (patch)
tree: fa630701d5d2a8bc1ab8c4abf759663bbb81aeeb
parent: ffc253263a1375a65fa6c9f62a893e9767fbebfa (diff)
parent: 99c9991f4e5d77328187187d0c921a3b62bfa998 (diff)
558 files changed, 44893 insertions, 4883 deletions
diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst
index 4877563241f3..c7525942f12c 100644
--- a/Documentation/admin-guide/sysctl/net.rst
+++ b/Documentation/admin-guide/sysctl/net.rst
@@ -71,6 +71,7 @@ two flavors of JITs, the newer eBPF JIT currently supported on:
   - s390x
   - riscv64
   - riscv32
+  - loongarch64
 
 And the older cBPF JIT supported on the following archs:
 
diff --git a/Documentation/bpf/libbpf/program_types.rst b/Documentation/bpf/libbpf/program_types.rst
index ad4d4d5eecb0..63bb88846e50 100644
--- a/Documentation/bpf/libbpf/program_types.rst
+++ b/Documentation/bpf/libbpf/program_types.rst
@@ -56,6 +56,16 @@ described in more detail in the footnotes.
 |                                           | ``BPF_CGROUP_UDP6_RECVMSG``            | ``cgroup/recvmsg6``              |           |
 +                                           +----------------------------------------+----------------------------------+-----------+
 |                                           | ``BPF_CGROUP_UDP6_SENDMSG``            | ``cgroup/sendmsg6``              |           |
+|                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_CGROUP_UNIX_CONNECT``            | ``cgroup/connect_unix``          |           |
+|                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_CGROUP_UNIX_SENDMSG``            | ``cgroup/sendmsg_unix``          |           |
+|                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_CGROUP_UNIX_RECVMSG``            | ``cgroup/recvmsg_unix``          |           |
+|                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_CGROUP_UNIX_GETPEERNAME``        | ``cgroup/getpeername_unix``      |           |
+|                                           +----------------------------------------+----------------------------------+-----------+
+|                                           | ``BPF_CGROUP_UNIX_GETSOCKNAME``        | ``cgroup/getsockname_unix``      |           |
 +-------------------------------------------+----------------------------------------+----------------------------------+-----------+
 | ``BPF_PROG_TYPE_CGROUP_SOCK``             | ``BPF_CGROUP_INET4_POST_BIND``         | ``cgroup/post_bind4``            |           |
 +                                           +----------------------------------------+----------------------------------+-----------+
diff --git a/Documentation/bpf/prog_flow_dissector.rst b/Documentation/bpf/prog_flow_dissector.rst
index 4d86780ab0f1..f24270b8b034 100644
--- a/Documentation/bpf/prog_flow_dissector.rst
+++ b/Documentation/bpf/prog_flow_dissector.rst
@@ -113,7 +113,7 @@ Flags
   used by ``eth_get_headlen`` to estimate length of all headers for GRO.
 * ``BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL`` - tells BPF flow dissector to
   stop parsing as soon as it reaches IPv6 flow label; used by
-  ``___skb_get_hash`` and ``__skb_get_hash_symmetric`` to get flow hash.
+  ``___skb_get_hash`` to get flow hash.
 * ``BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP`` - tells BPF flow dissector to stop
   parsing as soon as it reaches encapsulated headers; used by routing
   infrastructure.
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-wed.yaml b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-wed.yaml
index 28ded09d72e3..e7720caf31b3 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-wed.yaml
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-wed.yaml
@@ -22,6 +22,7 @@ properties:
           - mediatek,mt7622-wed
           - mediatek,mt7981-wed
           - mediatek,mt7986-wed
+          - mediatek,mt7988-wed
       - const: syscon
 
   reg:
diff --git a/Documentation/devicetree/bindings/mfd/syscon.yaml b/Documentation/devicetree/bindings/mfd/syscon.yaml
index 8103154bbb52..c77d7b155a4c 100644
--- a/Documentation/devicetree/bindings/mfd/syscon.yaml
+++ b/Documentation/devicetree/bindings/mfd/syscon.yaml
@@ -49,6 +49,8 @@ properties:
               - hisilicon,peri-subctrl
               - hpe,gxp-sysreg
               - intel,lgm-syscon
+              - loongson,ls1b-syscon
+              - loongson,ls1c-syscon
               - marvell,armada-3700-usb2-host-misc
               - mediatek,mt8135-pctl-a-syscfg
               - mediatek,mt8135-pctl-b-syscfg
diff --git a/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml b/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml
index 03b5567be389..41014f5c01c4 100644
--- a/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml
@@ -49,6 +49,26 @@ properties:
       Set if the output SYNCLKO clock should be disabled. Do not mix with
       microchip,synclko-125.
 
+  microchip,io-drive-strength-microamp:
+    description:
+      IO Pad Drive Strength
+    enum: [8000, 16000]
+    default: 16000
+
+  microchip,hi-drive-strength-microamp:
+    description:
+      High Speed Drive Strength. Controls drive strength of GMII / RGMII /
+      MII / RMII (except TX_CLK/REFCLKI, COL and CRS) and CLKO_25_125 lines.
+    enum: [2000, 4000, 8000, 12000, 16000, 20000, 24000, 28000]
+    default: 24000
+
+  microchip,lo-drive-strength-microamp:
+    description:
+      Low Speed Drive Strength. Controls drive strength of TX_CLK / REFCLKI,
+      COL, CRS, LEDs, PME_N, NTRP_N, SDO and SDI/SDA/MDIO lines.
+    enum: [2000, 4000, 8000, 12000, 16000, 20000, 24000, 28000]
+    default: 8000
+
   interrupts:
     maxItems: 1
 
diff --git a/Documentation/devicetree/bindings/net/loongson,ls1b-gmac.yaml b/Documentation/devicetree/bindings/net/loongson,ls1b-gmac.yaml
new file mode 100644
index 000000000000..c4f3224bad38
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/loongson,ls1b-gmac.yaml
@@ -0,0 +1,114 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/loongson,ls1b-gmac.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Loongson-1B Gigabit Ethernet MAC Controller
+
+maintainers:
+  - Keguang Zhang <keguang.zhang@gmail.com>
+
+description: |
+  Loongson-1B Gigabit Ethernet MAC Controller is based on
+  Synopsys DesignWare MAC (version 3.50a).
+
+  Main features
+  - Dual 10/100/1000Mbps GMAC controllers
+  - Full-duplex operation (IEEE 802.3x flow control automatic transmission)
+  - Half-duplex operation (CSMA/CD Protocol and back-pressure support)
+  - RX Checksum Offload
+  - TX Checksum insertion
+  - MII interface
+  - RGMII interface
+
+select:
+  properties:
+    compatible:
+      contains:
+        enum:
+          - loongson,ls1b-gmac
+  required:
+    - compatible
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - loongson,ls1b-gmac
+      - const: snps,dwmac-3.50a
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    items:
+      - const: stmmaceth
+
+  interrupts:
+    maxItems: 1
+
+  interrupt-names:
+    items:
+      - const: macirq
+
+  loongson,ls1-syscon:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description:
+      Phandle to the syscon containing some extra configurations
+      including PHY interface mode.
+
+  phy-mode:
+    enum:
+      - mii
+      - rgmii-id
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - interrupts
+  - interrupt-names
+  - loongson,ls1-syscon
+
+allOf:
+  - $ref: snps,dwmac.yaml#
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/loongson,ls1x-clk.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    gmac0: ethernet@1fe10000 {
+        compatible = "loongson,ls1b-gmac", "snps,dwmac-3.50a";
+        reg = <0x1fe10000 0x10000>;
+
+        clocks = <&clkc LS1X_CLKID_AHB>;
+        clock-names = "stmmaceth";
+
+        interrupt-parent = <&intc1>;
+        interrupts = <2 IRQ_TYPE_LEVEL_HIGH>;
+        interrupt-names = "macirq";
+
+        loongson,ls1-syscon = <&syscon>;
+
+        phy-handle = <&phy0>;
+        phy-mode = "mii";
+        snps,pbl = <1>;
+
+        mdio {
+            #address-cells = <1>;
+            #size-cells = <0>;
+            compatible = "snps,dwmac-mdio";
+
+            phy0: ethernet-phy@0 {
+                reg = <0x0>;
+            };
+        };
+    };
diff --git a/Documentation/devicetree/bindings/net/loongson,ls1c-emac.yaml b/Documentation/devicetree/bindings/net/loongson,ls1c-emac.yaml
new file mode 100644
index 000000000000..99001b940b83
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/loongson,ls1c-emac.yaml
@@ -0,0 +1,113 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/loongson,ls1c-emac.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Loongson-1C Ethernet MAC Controller
+
+maintainers:
+  - Keguang Zhang <keguang.zhang@gmail.com>
+
+description: |
+  Loongson-1C Ethernet MAC Controller is based on
+  Synopsys DesignWare MAC (version 3.50a).
+
+  Main features
+  - 10/100Mbps
+  - Full-duplex operation (IEEE 802.3x flow control automatic transmission)
+  - Half-duplex operation (CSMA/CD Protocol and back-pressure support)
+  - IEEE 802.1Q VLAN tag detection for reception frames
+  - MII interface
+  - RMII interface
+
+select:
+  properties:
+    compatible:
+      contains:
+        enum:
+          - loongson,ls1c-emac
+  required:
+    - compatible
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - loongson,ls1c-emac
+      - const: snps,dwmac-3.50a
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    items:
+      - const: stmmaceth
+
+  interrupts:
+    maxItems: 1
+
+  interrupt-names:
+    items:
+      - const: macirq
+
+  loongson,ls1-syscon:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description:
+      Phandle to the syscon containing some extra configurations
+      including PHY interface mode.
+
+  phy-mode:
+    enum:
+      - mii
+      - rmii
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - interrupts
+  - interrupt-names
+  - loongson,ls1-syscon
+
+allOf:
+  - $ref: snps,dwmac.yaml#
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/loongson,ls1x-clk.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    emac: ethernet@1fe10000 {
+        compatible = "loongson,ls1c-emac", "snps,dwmac-3.50a";
+        reg = <0x1fe10000 0x10000>;
+
+        clocks = <&clkc LS1X_CLKID_AHB>;
+        clock-names = "stmmaceth";
+
+        interrupt-parent = <&intc1>;
+        interrupts = <2 IRQ_TYPE_LEVEL_HIGH>;
+        interrupt-names = "macirq";
+
+        loongson,ls1-syscon = <&syscon>;
+
+        phy-handle = <&phy0>;
+        phy-mode = "mii";
+        snps,pbl = <1>;
+
+        mdio {
+            #address-cells = <1>;
+            #size-cells = <0>;
+            compatible = "snps,dwmac-mdio";
+
+            phy0: ethernet-phy@13 {
+                reg = <0x13>;
+            };
+        };
+    };
diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
index ddf9522a5dc2..5c2769dc689a 100644
--- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
@@ -394,6 +394,11 @@ properties:
               When a PFC frame is received with priorities matching the bitmask,
               the queue is blocked from transmitting for the pause time specified
               in the PFC frame.
+
+          snps,coe-unsupported:
+            type: boolean
+            description: TX checksum offload is unsupported by the TX queue.
+
         allOf:
           - if:
               required:
diff --git a/Documentation/devicetree/bindings/net/ti,icssg-prueth.yaml b/Documentation/devicetree/bindings/net/ti,icssg-prueth.yaml
index 311c570165f9..229c8f32019f 100644
--- a/Documentation/devicetree/bindings/net/ti,icssg-prueth.yaml
+++ b/Documentation/devicetree/bindings/net/ti,icssg-prueth.yaml
@@ -19,6 +19,7 @@ allOf:
 properties:
   compatible:
     enum:
+      - ti,am642-icssg-prueth  # for AM64x SoC family
       - ti,am654-icssg-prueth  # for AM65x SoC family
 
   sram:
@@ -106,6 +107,13 @@ properties:
               phandle to system controller node and register offset
               to ICSSG control register for RGMII transmit delay
 
+          ti,half-duplex-capable:
+            type: boolean
+            description:
+              Indicates that the PHY output pin COL is routed to ICSSG GPIO pin
+              (PRGx_PRU0/1_GPIO10) as input so that the ICSSG MII port is
+              capable of half duplex operations.
+
         required:
           - reg
     anyOf:
diff --git a/Documentation/devicetree/bindings/soc/mediatek/mediatek,mt7986-wo-ccif.yaml b/Documentation/devicetree/bindings/soc/mediatek/mediatek,mt7986-wo-ccif.yaml
index f0fa92b04b32..3b212f26abc5 100644
--- a/Documentation/devicetree/bindings/soc/mediatek/mediatek,mt7986-wo-ccif.yaml
+++ b/Documentation/devicetree/bindings/soc/mediatek/mediatek,mt7986-wo-ccif.yaml
@@ -20,6 +20,7 @@ properties:
     items:
       - enum:
           - mediatek,mt7986-wo-ccif
+          - mediatek,mt7988-wo-ccif
       - const: syscon
 
   reg:
diff --git a/Documentation/driver-api/dpll.rst b/Documentation/driver-api/dpll.rst
new file mode 100644
index 000000000000..bb52f1b8c0be
--- /dev/null
+++ b/Documentation/driver-api/dpll.rst
@@ -0,0 +1,497 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============================
+The Linux kernel dpll subsystem
+===============================
+
+DPLL
+====
+
+PLL - Phase Locked Loop is an electronic circuit which syntonizes clock
+signal of a device with an external clock signal. Effectively enabling
+device to run on the same clock signal beat as provided on a PLL input.
+
+DPLL - Digital Phase Locked Loop is an integrated circuit which in
+addition to plain PLL behavior incorporates a digital phase detector
+and may have digital divider in the loop. As a result, the frequency on
+DPLL's input and output may be configurable.
+
+Subsystem
+=========
+
+The main purpose of dpll subsystem is to provide general interface
+to configure devices that use any kind of Digital PLL and could use
+different sources of input signal to synchronize to, as well as
+different types of outputs.
+The main interface is NETLINK_GENERIC based protocol with an event
+monitoring multicast group defined.
+
+Device object
+=============
+
+Single dpll device object means single Digital PLL circuit and bunch of
+connected pins.
+It reports the supported modes of operation and current status to the
+user in response to the `do` request of netlink command
+``DPLL_CMD_DEVICE_GET`` and list of dplls registered in the subsystem
+with `dump` netlink request of the same command.
+Changing the configuration of dpll device is done with `do` request of
+netlink ``DPLL_CMD_DEVICE_SET`` command.
+A device handle is ``DPLL_A_ID``, it shall be provided to get or set
+configuration of particular device in the system. It can be obtained
+with a ``DPLL_CMD_DEVICE_GET`` `dump` request or
+a ``DPLL_CMD_DEVICE_ID_GET`` `do` request, where the one must provide
+attributes that result in single device match.
+
+Pin object
+==========
+
+A pin is amorphic object which represents either input or output, it
+could be internal component of the device, as well as externally
+connected.
+The number of pins per dpll vary, but usually multiple pins shall be
+provided for a single dpll device.
+Pin's properties, capabilities and status is provided to the user in
+response to `do` request of netlink ``DPLL_CMD_PIN_GET`` command.
+It is also possible to list all the pins that were registered in the
+system with `dump` request of ``DPLL_CMD_PIN_GET`` command.
+Configuration of a pin can be changed by `do` request of netlink
+``DPLL_CMD_PIN_SET`` command.
+Pin handle is a ``DPLL_A_PIN_ID``, it shall be provided to get or set
+configuration of particular pin in the system. It can be obtained with
+``DPLL_CMD_PIN_GET`` `dump` request or ``DPLL_CMD_PIN_ID_GET`` `do`
+request, where user provides attributes that result in single pin match.
+
+Pin selection
+=============
+
+In general, selected pin (the one which signal is driving the dpll
+device) can be obtained from ``DPLL_A_PIN_STATE`` attribute, and only
+one pin shall be in ``DPLL_PIN_STATE_CONNECTED`` state for any dpll
+device.
+
+Pin selection can be done either manually or automatically, depending
+on hardware capabilities and active dpll device work mode
+(``DPLL_A_MODE`` attribute). The consequence is that there are
+differences for each mode in terms of available pin states, as well as
+for the states the user can request for a dpll device.
+
+In manual mode (``DPLL_MODE_MANUAL``) the user can request or receive
+one of following pin states:
+
+- ``DPLL_PIN_STATE_CONNECTED`` - the pin is used to drive dpll device
+- ``DPLL_PIN_STATE_DISCONNECTED`` - the pin is not used to drive dpll
+  device
+
+In automatic mode (``DPLL_MODE_AUTOMATIC``) the user can request or
+receive one of following pin states:
+
+- ``DPLL_PIN_STATE_SELECTABLE`` - the pin shall be considered as valid
+  input for automatic selection algorithm
+- ``DPLL_PIN_STATE_DISCONNECTED`` - the pin shall be not considered as
+  a valid input for automatic selection algorithm
+
+In automatic mode (``DPLL_MODE_AUTOMATIC``) the user can only receive
+pin state ``DPLL_PIN_STATE_CONNECTED`` once automatic selection
+algorithm locks a dpll device with one of the inputs.
+
+Shared pins
+===========
+
+A single pin object can be attached to multiple dpll devices.
+Then there are two groups of configuration knobs:
+
+1) Set on a pin - the configuration affects all dpll devices pin is
+   registered to (i.e., ``DPLL_A_PIN_FREQUENCY``),
+2) Set on a pin-dpll tuple - the configuration affects only selected
+   dpll device (i.e., ``DPLL_A_PIN_PRIO``, ``DPLL_A_PIN_STATE``,
+   ``DPLL_A_PIN_DIRECTION``).
+
+MUX-type pins
+=============
+
+A pin can be MUX-type, it aggregates child pins and serves as a pin
+multiplexer. One or more pins are registered with MUX-type instead of
+being directly registered to a dpll device.
+Pins registered with a MUX-type pin provide user with additional nested
+attribute ``DPLL_A_PIN_PARENT_PIN`` for each parent they were registered
+with.
+If a pin was registered with multiple parent pins, they behave like a
+multiple output multiplexer. In this case output of a
+``DPLL_CMD_PIN_GET`` would contain multiple pin-parent nested
+attributes with current state related to each parent, like:
+
+'pin': [{{
+  'clock-id': 282574471561216,
+  'module-name': 'ice',
+  'capabilities': 4,
+  'id': 13,
+  'parent-pin': [
+  {'parent-id': 2, 'state': 'connected'},
+  {'parent-id': 3, 'state': 'disconnected'}
+  ],
+  'type': 'synce-eth-port'
+  }}]
+
+Only one child pin can provide its signal to the parent MUX-type pin at
+a time, the selection is done by requesting change of a child pin state
+on desired parent, with the use of ``DPLL_A_PIN_PARENT`` nested
+attribute. Example of netlink `set state on parent pin` message format:
+
+  ========================== =============================================
+  ``DPLL_A_PIN_ID``          child pin id
+  ``DPLL_A_PIN_PARENT_PIN``  nested attribute for requesting configuration
+                             related to parent pin
+    ``DPLL_A_PIN_PARENT_ID`` parent pin id
+    ``DPLL_A_PIN_STATE``     requested pin state on parent
+  ========================== =============================================
+
+Pin priority
+============
+
+Some devices might offer a capability of automatic pin selection mode
+(enum value ``DPLL_MODE_AUTOMATIC`` of ``DPLL_A_MODE`` attribute).
+Usually, automatic selection is performed on the hardware level, which
+means only pins directly connected to the dpll can be used for automatic
+input pin selection.
+In automatic selection mode, the user cannot manually select a input
+pin for the device, instead the user shall provide all directly
+connected pins with a priority ``DPLL_A_PIN_PRIO``, the device would
+pick a highest priority valid signal and use it to control the DPLL
+device. Example of netlink `set priority on parent pin` message format:
+
+  ============================ =============================================
+  ``DPLL_A_PIN_ID``            configured pin id
+  ``DPLL_A_PIN_PARENT_DEVICE`` nested attribute for requesting configuration
+                               related to parent dpll device
+    ``DPLL_A_PIN_PARENT_ID``   parent dpll device id
+    ``DPLL_A_PIN_PRIO``        requested pin prio on parent dpll
+  ============================ =============================================
+
+Child pin of MUX-type pin is not capable of automatic input pin selection,
+in order to configure active input of a MUX-type pin, the user needs to
+request desired pin state of the child pin on the parent pin,
+as described in the ``MUX-type pins`` chapter.
+
+Configuration commands group
+============================
+
+Configuration commands are used to get information about registered
+dpll devices (and pins), as well as set configuration of device or pins.
+As dpll devices must be abstracted and reflect real hardware,
+there is no way to add new dpll device via netlink from user space and
+each device should be registered by its driver.
+
+All netlink commands require ``GENL_ADMIN_PERM``. This is to prevent
+any spamming/DoS from unauthorized userspace applications.
+
+List of netlink commands with possible attributes
+=================================================
+
+Constants identifying command types for dpll device uses a
+``DPLL_CMD_`` prefix and suffix according to command purpose.
+The dpll device related attributes use a ``DPLL_A_`` prefix and
+suffix according to attribute purpose.
+
+  ==================================== =================================
+  ``DPLL_CMD_DEVICE_ID_GET``           command to get device ID
+    ``DPLL_A_MODULE_NAME``             attr module name of registerer
+    ``DPLL_A_CLOCK_ID``                attr Unique Clock Identifier
+                                       (EUI-64), as defined by the
+                                       IEEE 1588 standard
+    ``DPLL_A_TYPE``                    attr type of dpll device
+  ==================================== =================================
+
+  ==================================== =================================
+  ``DPLL_CMD_DEVICE_GET``              command to get device info or
+                                       dump list of available devices
+    ``DPLL_A_ID``                      attr unique dpll device ID
+    ``DPLL_A_MODULE_NAME``             attr module name of registerer
+    ``DPLL_A_CLOCK_ID``                attr Unique Clock Identifier
+                                       (EUI-64), as defined by the
+                                       IEEE 1588 standard
+    ``DPLL_A_MODE``                    attr selection mode
+    ``DPLL_A_MODE_SUPPORTED``          attr available selection modes
+    ``DPLL_A_LOCK_STATUS``             attr dpll device lock status
+    ``DPLL_A_TEMP``                    attr device temperature info
+    ``DPLL_A_TYPE``                    attr type of dpll device
+  ==================================== =================================
+
+  ==================================== =================================
+  ``DPLL_CMD_DEVICE_SET``              command to set dpll device config
+    ``DPLL_A_ID``                      attr internal dpll device index
+    ``DPLL_A_MODE``                    attr selection mode to configure
+  ==================================== =================================
+
+Constants identifying command types for pins uses a
+``DPLL_CMD_PIN_`` prefix and suffix according to command purpose.
+The pin related attributes use a ``DPLL_A_PIN_`` prefix and suffix
+according to attribute purpose.
+
+  ==================================== =================================
+  ``DPLL_CMD_PIN_ID_GET``              command to get pin ID
+    ``DPLL_A_PIN_MODULE_NAME``         attr module name of registerer
+    ``DPLL_A_PIN_CLOCK_ID``            attr Unique Clock Identifier
+                                       (EUI-64), as defined by the
+                                       IEEE 1588 standard
+    ``DPLL_A_PIN_BOARD_LABEL``         attr pin board label provided
+                                       by registerer
+    ``DPLL_A_PIN_PANEL_LABEL``         attr pin panel label provided
+                                       by registerer
+    ``DPLL_A_PIN_PACKAGE_LABEL``       attr pin package label provided
+                                       by registerer
+    ``DPLL_A_PIN_TYPE``                attr type of a pin
+  ==================================== =================================
+
+  ==================================== ==================================
+  ``DPLL_CMD_PIN_GET``                 command to get pin info or dump
+                                       list of available pins
+    ``DPLL_A_PIN_ID``                  attr unique a pin ID
+    ``DPLL_A_PIN_MODULE_NAME``         attr module name of registerer
+    ``DPLL_A_PIN_CLOCK_ID``            attr Unique Clock Identifier
+                                       (EUI-64), as defined by the
+                                       IEEE 1588 standard
+    ``DPLL_A_PIN_BOARD_LABEL``         attr pin board label provided
+                                       by registerer
+    ``DPLL_A_PIN_PANEL_LABEL``         attr pin panel label provided
+                                       by registerer
+    ``DPLL_A_PIN_PACKAGE_LABEL``       attr pin package label provided
+                                       by registerer
+    ``DPLL_A_PIN_TYPE``                attr type of a pin
+    ``DPLL_A_PIN_FREQUENCY``           attr current frequency of a pin
+    ``DPLL_A_PIN_FREQUENCY_SUPPORTED`` nested attr provides supported
+                                       frequencies
+      ``DPLL_A_PIN_ANY_FREQUENCY_MIN`` attr minimum value of frequency
+      ``DPLL_A_PIN_ANY_FREQUENCY_MAX`` attr maximum value of frequency
+    ``DPLL_A_PIN_PARENT_DEVICE``       nested attr for each parent device
+                                       the pin is connected with
+      ``DPLL_A_PIN_PARENT_ID``         attr parent dpll device id
+      ``DPLL_A_PIN_PRIO``              attr priority of pin on the
+                                       dpll device
+      ``DPLL_A_PIN_STATE``             attr state of pin on the parent
+                                       dpll device
+     ``DPLL_A_PIN_DIRECTION``          attr direction of a pin on the
+                                       parent dpll device
+    ``DPLL_A_PIN_PARENT_PIN``          nested attr for each parent pin
+                                       the pin is connected with
+      ``DPLL_A_PIN_PARENT_ID``         attr parent pin id
+      ``DPLL_A_PIN_STATE``             attr state of pin on the parent
+                                       pin
+    ``DPLL_A_PIN_CAPABILITIES``        attr bitmask of pin capabilities
+  ==================================== ==================================
+
+  ==================================== =================================
+  ``DPLL_CMD_PIN_SET``                 command to set pins configuration
+    ``DPLL_A_PIN_ID``                  attr unique a pin ID
+    ``DPLL_A_PIN_FREQUENCY``           attr requested frequency of a pin
+    ``DPLL_A_PIN_PARENT_DEVICE``       nested attr for each parent dpll
+                                       device configuration request
+      ``DPLL_A_PIN_PARENT_ID``         attr parent dpll device id
+      ``DPLL_A_PIN_DIRECTION``         attr requested direction of a pin
+      ``DPLL_A_PIN_PRIO``              attr requested priority of pin on
+                                       the dpll device
+      ``DPLL_A_PIN_STATE``             attr requested state of pin on
+                                       the dpll device
+    ``DPLL_A_PIN_PARENT_PIN``          nested attr for each parent pin
+                                       configuration request
+      ``DPLL_A_PIN_PARENT_ID``         attr parent pin id
+      ``DPLL_A_PIN_STATE``             attr requested state of pin on
+                                       parent pin
+  ==================================== =================================
+
+Netlink dump requests
+=====================
+
+The ``DPLL_CMD_DEVICE_GET`` and ``DPLL_CMD_PIN_GET`` commands are
+capable of dump type netlink requests, in which case the response is in
+the same format as for their ``do`` request, but every device or pin
+registered in the system is returned.
+
+SET commands format
+===================
+
+``DPLL_CMD_DEVICE_SET`` - to target a dpll device, the user provides
+``DPLL_A_ID``, which is unique identifier of dpll device in the system,
+as well as parameter being configured (``DPLL_A_MODE``).
+
+``DPLL_CMD_PIN_SET`` - to target a pin user must provide a
+``DPLL_A_PIN_ID``, which is unique identifier of a pin in the system.
+Also configured pin parameters must be added.
+If ``DPLL_A_PIN_FREQUENCY`` is configured, this affects all the dpll
+devices that are connected with the pin, that is why frequency attribute
+shall not be enclosed in ``DPLL_A_PIN_PARENT_DEVICE``.
+Other attributes: ``DPLL_A_PIN_PRIO``, ``DPLL_A_PIN_STATE`` or
+``DPLL_A_PIN_DIRECTION`` must be enclosed in
+``DPLL_A_PIN_PARENT_DEVICE`` as their configuration relates to only one
+of parent dplls, targeted by ``DPLL_A_PIN_PARENT_ID`` attribute which is
+also required inside that nest.
+For MUX-type pins the ``DPLL_A_PIN_STATE`` attribute is configured in
+similar way, by enclosing required state in ``DPLL_A_PIN_PARENT_PIN``
+nested attribute and targeted parent pin id in ``DPLL_A_PIN_PARENT_ID``.
+
+In general, it is possible to configure multiple parameters at once, but
+internally each parameter change will be invoked separately, where order
+of configuration is not guaranteed by any means.
+
+Configuration pre-defined enums
+===============================
+
+.. kernel-doc:: include/uapi/linux/dpll.h
+
+Notifications
+=============
+
+dpll device can provide notifications regarding status changes of the
+device, i.e. lock status changes, input/output changes or other alarms.
+There is one multicast group that is used to notify user-space apps via
+netlink socket: ``DPLL_MCGRP_MONITOR``
+
+Notifications messages:
+
+  ============================== =====================================
+  ``DPLL_CMD_DEVICE_CREATE_NTF`` dpll device was created
+  ``DPLL_CMD_DEVICE_DELETE_NTF`` dpll device was deleted
+  ``DPLL_CMD_DEVICE_CHANGE_NTF`` dpll device has changed
+  ``DPLL_CMD_PIN_CREATE_NTF``    dpll pin was created
+  ``DPLL_CMD_PIN_DELETE_NTF``    dpll pin was deleted
+  ``DPLL_CMD_PIN_CHANGE_NTF``    dpll pin has changed
+  ============================== =====================================
+
+Events format is the same as for the corresponding get command.
+Format of ``DPLL_CMD_DEVICE_`` events is the same as response of
+``DPLL_CMD_DEVICE_GET``.
+Format of ``DPLL_CMD_PIN_`` events is same as response of
+``DPLL_CMD_PIN_GET``.
+
+Device driver implementation
+============================
+
+Device is allocated by dpll_device_get() call. Second call with the
+same arguments will not create new object but provides pointer to
+previously created device for given arguments, it also increases
+refcount of that object.
+Device is deallocated by dpll_device_put() call, which first
+decreases the refcount, once refcount is cleared the object is
+destroyed.
+
+Device should implement set of operations and register device via
+dpll_device_register() at which point it becomes available to the
+users. Multiple driver instances can obtain reference to it with
+dpll_device_get(), as well as register dpll device with their own
+ops and priv.
+
+The pins are allocated separately with dpll_pin_get(), it works
+similarly to dpll_device_get(). Function first creates object and then
+for each call with the same arguments only the object refcount
+increases. Also dpll_pin_put() works similarly to dpll_device_put().
+
+A pin can be registered with parent dpll device or parent pin, depending
+on hardware needs. Each registration requires registerer to provide set
+of pin callbacks, and private data pointer for calling them:
+
+- dpll_pin_register() - register pin with a dpll device,
+- dpll_pin_on_pin_register() - register pin with another MUX type pin.
+
+Notifications of adding or removing dpll devices are created within
+subsystem itself.
+Notifications about registering/deregistering pins are also invoked by
+the subsystem.
+Notifications about status changes either of dpll device or a pin are
+invoked in two ways:
+
+- after successful change was requested on dpll subsystem, the subsystem
+  calls corresponding notification,
+- requested by device driver with dpll_device_change_ntf() or
+  dpll_pin_change_ntf() when driver informs about the status change.
+
+The device driver using dpll interface is not required to implement all
+the callback operation. Nevertheless, there are few required to be
+implemented.
+Required dpll device level callback operations:
+
+- ``.mode_get``,
+- ``.lock_status_get``.
+
+Required pin level callback operations:
+
+- ``.state_on_dpll_get`` (pins registered with dpll device),
+- ``.state_on_pin_get`` (pins registered with parent pin),
+- ``.direction_get``.
+
+Every other operation handler is checked for existence and
+``-EOPNOTSUPP`` is returned in case of absence of specific handler.
+
+The simplest implementation is in the OCP TimeCard driver. The ops
+structures are defined like this:
+
+.. code-block:: c
+	static const struct dpll_device_ops dpll_ops = {
+		.lock_status_get = ptp_ocp_dpll_lock_status_get,
+		.mode_get = ptp_ocp_dpll_mode_get,
+		.mode_supported = ptp_ocp_dpll_mode_supported,
+	};
+
+	static const struct dpll_pin_ops dpll_pins_ops = {
+		.frequency_get = ptp_ocp_dpll_frequency_get,
+		.frequency_set = ptp_ocp_dpll_frequency_set,
+		.direction_get = ptp_ocp_dpll_direction_get,
+		.direction_set = ptp_ocp_dpll_direction_set,
+		.state_on_dpll_get = ptp_ocp_dpll_state_get,
+	};
+
+The registration part is then looks like this part:
+
+.. code-block:: c
+        clkid = pci_get_dsn(pdev);
+        bp->dpll = dpll_device_get(clkid, 0, THIS_MODULE);
+        if (IS_ERR(bp->dpll)) {
+                err = PTR_ERR(bp->dpll);
+                dev_err(&pdev->dev, "dpll_device_alloc failed\n");
+                goto out;
+        }
+
+        err = dpll_device_register(bp->dpll, DPLL_TYPE_PPS, &dpll_ops, bp);
+        if (err)
+                goto out;
+
+        for (i = 0; i < OCP_SMA_NUM; i++) {
+                bp->sma[i].dpll_pin = dpll_pin_get(clkid, i, THIS_MODULE, &bp->sma[i].dpll_prop);
+                if (IS_ERR(bp->sma[i].dpll_pin)) {
+                        err = PTR_ERR(bp->dpll);
+                        goto out_dpll;
+                }
+
+                err = dpll_pin_register(bp->dpll, bp->sma[i].dpll_pin, &dpll_pins_ops,
+                                        &bp->sma[i]);
+                if (err) {
+                        dpll_pin_put(bp->sma[i].dpll_pin);
+                        goto out_dpll;
+                }
+        }
+
+In the error path we have to rewind every allocation in the reverse order:
+
+.. code-block:: c
+        while (i) {
+                --i;
+                dpll_pin_unregister(bp->dpll, bp->sma[i].dpll_pin, &dpll_pins_ops, &bp->sma[i]);
+                dpll_pin_put(bp->sma[i].dpll_pin);
+        }
+        dpll_device_put(bp->dpll);
+
+More complex example can be found in Intel's ICE driver or nVidia's mlx5 driver.
+
+SyncE enablement
+================
+For SyncE enablement it is required to allow control over dpll device
+for a software application which monitors and configures the inputs of
+dpll device in response to current state of a dpll device and its
+inputs.
+In such scenario, dpll device input signal shall be also configurable
+to drive dpll with signal recovered from the PHY netdevice.
+This is done by exposing a pin to the netdevice - attaching pin to the
+netdevice itself with
+``netdev_dpll_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin)``.
+Exposed pin id handle ``DPLL_A_PIN_ID`` is then identifiable by the user
+as it is attached to rtnetlink respond to get ``RTM_NEWLINK`` command in
+nested attribute ``IFLA_DPLL_PIN``.
diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
index 1e16a40da3ba..f549a68951d7 100644
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -114,6 +114,7 @@ available subsections can be seen below.
    zorro
    hte/index
    wmi
+   dpll
 
 .. only::  subproject and html
 
diff --git a/Documentation/netlink/specs/dpll.yaml b/Documentation/netlink/specs/dpll.yaml
new file mode 100644
index 000000000000..8b86b28b47a6
--- /dev/null
+++ b/Documentation/netlink/specs/dpll.yaml
@@ -0,0 +1,488 @@
+# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+
+name: dpll
+
+doc: DPLL subsystem.
+
+definitions:
+  -
+    type: enum
+    name: mode
+    doc: |
+      working modes a dpll can support, differentiates if and how dpll selects
+      one of its inputs to syntonize with it, valid values for DPLL_A_MODE
+      attribute
+    entries:
+      -
+        name: manual
+        doc: input can be only selected by sending a request to dpll
+        value: 1
+      -
+        name: automatic
+        doc: highest prio input pin auto selected by dpll
+    render-max: true
+  -
+    type: enum
+    name: lock-status
+    doc: |
+      provides information of dpll device lock status, valid values for
+      DPLL_A_LOCK_STATUS attribute
+    entries:
+      -
+        name: unlocked
+        doc: |
+          dpll was not yet locked to any valid input (or forced by setting
+          DPLL_A_MODE to DPLL_MODE_DETACHED)
+        value: 1
+      -
+        name: locked
+        doc: |
+          dpll is locked to a valid signal, but no holdover available
+      -
+        name: locked-ho-acq
+        doc: |
+          dpll is locked and holdover acquired
+      -
+        name: holdover
+        doc: |
+          dpll is in holdover state - lost a valid lock or was forced
+          by disconnecting all the pins (latter possible only
+          when dpll lock-state was already DPLL_LOCK_STATUS_LOCKED_HO_ACQ,
+          if dpll lock-state was not DPLL_LOCK_STATUS_LOCKED_HO_ACQ, the
+          dpll's lock-state shall remain DPLL_LOCK_STATUS_UNLOCKED)
+    render-max: true
+  -
+    type: const
+    name: temp-divider
+    value: 1000
+    doc: |
+      temperature divider allowing userspace to calculate the
+      temperature as float with three digit decimal precision.
+      Value of (DPLL_A_TEMP / DPLL_TEMP_DIVIDER) is integer part of
+      temperature value.
+      Value of (DPLL_A_TEMP % DPLL_TEMP_DIVIDER) is fractional part of
+      temperature value.
+  -
+    type: enum
+    name: type
+    doc: type of dpll, valid values for DPLL_A_TYPE attribute
+    entries:
+      -
+        name: pps
+        doc: dpll produces Pulse-Per-Second signal
+        value: 1
+      -
+        name: eec
+        doc: dpll drives the Ethernet Equipment Clock
+    render-max: true
+  -
+    type: enum
+    name: pin-type
+    doc: |
+      defines possible types of a pin, valid values for DPLL_A_PIN_TYPE
+      attribute
+    entries:
+      -
+        name: mux
+        doc: aggregates another layer of selectable pins
+        value: 1
+      -
+        name: ext
+        doc: external input
+      -
+        name: synce-eth-port
+        doc: ethernet port PHY's recovered clock
+      -
+        name: int-oscillator
+        doc: device internal oscillator
+      -
+        name: gnss
+        doc: GNSS recovered clock
+    render-max: true
+  -
+    type: enum
+    name: pin-direction
+    doc: |
+      defines possible direction of a pin, valid values for
+      DPLL_A_PIN_DIRECTION attribute
+    entries:
+      -
+        name: input
+        doc: pin used as a input of a signal
+        value: 1
+      -
+        name: output
+        doc: pin used to output the signal
+    render-max: true
+  -
+    type: const
+    name: pin-frequency-1-hz
+    value: 1
+  -
+    type: const
+    name: pin-frequency-10-khz
+    value: 10000
+  -
+    type: const
+    name: pin-frequency-77_5-khz
+    value: 77500
+  -
+    type: const
+    name: pin-frequency-10-mhz
+    value: 10000000
+  -
+    type: enum
+    name: pin-state
+    doc: |
+      defines possible states of a pin, valid values for
+      DPLL_A_PIN_STATE attribute
+    entries:
+      -
+        name: connected
+        doc: pin connected, active input of phase locked loop
+        value: 1
+      -
+        name: disconnected
+        doc: pin disconnected, not considered as a valid input
+      -
+        name: selectable
+        doc: pin enabled for automatic input selection
+    render-max: true
+  -
+    type: flags
+    name: pin-capabilities
+    doc: |
+      defines possible capabilities of a pin, valid flags on
+      DPLL_A_PIN_CAPABILITIES attribute
+    entries:
+      -
+        name: direction-can-change
+        doc: pin direction can be changed
+      -
+        name: priority-can-change
+        doc: pin priority can be changed
+      -
+        name: state-can-change
+        doc: pin state can be changed
+
+attribute-sets:
+  -
+    name: dpll
+    enum-name: dpll_a
+    attributes:
+      -
+        name: id
+        type: u32
+      -
+        name: module-name
+        type: string
+      -
+        name: pad
+        type: pad
+      -
+        name: clock-id
+        type: u64
+      -
+        name: mode
+        type: u32
+        enum: mode
+      -
+        name: mode-supported
+        type: u32
+        enum: mode
+        multi-attr: true
+      -
+        name: lock-status
+        type: u32
+        enum: lock-status
+      -
+        name: temp
+        type: s32
+      -
+        name: type
+        type: u32
+        enum: type
+  -
+    name: pin
+    enum-name: dpll_a_pin
+    attributes:
+      -
+        name: id
+        type: u32
+      -
+        name: parent-id
+        type: u32
+      -
+        name: module-name
+        type: string
+      -
+        name: pad
+        type: pad
+      -
+        name: clock-id
+        type: u64
+      -
+        name: board-label
+        type: string
+      -
+        name: panel-label
+        type: string
+      -
+        name: package-label
+        type: string
+      -
+        name: type
+        type: u32
+        enum: pin-type
+      -
+        name: direction
+        type: u32
+        enum: pin-direction
+      -
+        name: frequency
+        type: u64
+      -
+        name: frequency-supported
+        type: nest
+        multi-attr: true
+        nested-attributes: frequency-range
+      -
+        name: frequency-min
+        type: u64
+      -
+        name: frequency-max
+        type: u64
+      -
+        name: prio
+        type: u32
+      -
+        name: state
+        type: u32
+        enum: pin-state
+      -
+        name: capabilities
+        type: u32
+      -
+        name: parent-device
+        type: nest
+        multi-attr: true
+        nested-attributes: pin-parent-device
+      -
+        name: parent-pin
+        type: nest
+        multi-attr: true
+        nested-attributes: pin-parent-pin
+  -
+    name: pin-parent-device
+    subset-of: pin
+    attributes:
+      -
+        name: parent-id
+        type: u32
+      -
+        name: direction
+        type: u32
+      -
+        name: prio
+        type: u32
+      -
+        name: state
+        type: u32
+  -
+    name: pin-parent-pin
+    subset-of: pin
+    attributes:
+      -
+        name: parent-id
+        type: u32
+      -
+        name: state
+        type: u32
+  -
+    name: frequency-range
+    subset-of: pin
+    attributes:
+      -
+        name: frequency-min
+        type: u64
+      -
+        name: frequency-max
+        type: u64
+
+operations:
+  enum-name: dpll_cmd
+  list:
+    -
+      name: device-id-get
+      doc: |
+        Get id of dpll device that matches given attributes
+      attribute-set: dpll
+      flags: [ admin-perm ]
+
+      do:
+        pre: dpll-lock-doit
+        post: dpll-unlock-doit
+        request:
+          attributes:
+            - module-name
+            - clock-id
+            - type
+        reply:
+          attributes:
+            - id
+
+    -
+      name: device-get
+      doc: |
+        Get list of DPLL devices (dump) or attributes of a single dpll device
+      attribute-set: dpll
+      flags: [ admin-perm ]
+
+      do:
+        pre: dpll-pre-doit
+        post: dpll-post-doit
+        request:
+          attributes:
+            - id
+        reply: &dev-attrs
+          attributes:
+            - id
+            - module-name
+            - mode
+            - mode-supported
+            - lock-status
+            - temp
+            - clock-id
+            - type
+
+      dump:
+        pre: dpll-lock-dumpit
+        post: dpll-unlock-dumpit
+        reply: *dev-attrs
+
+    -
+      name: device-set
+      doc: Set attributes for a DPLL device
+      attribute-set: dpll
+      flags: [ admin-perm ]
+
+      do:
+        pre: dpll-pre-doit
+        post: dpll-post-doit
+        request:
+          attributes:
+            - id
+    -
+      name: device-create-ntf
+      doc: Notification about device appearing
+      notify: device-get
+      mcgrp: monitor
+    -
+      name: device-delete-ntf
+      doc: Notification about device disappearing
+      notify: device-get
+      mcgrp: monitor
+    -
+      name: device-change-ntf
+      doc: Notification about device configuration being changed
+      notify: device-get
+      mcgrp: monitor
+    -
+      name: pin-id-get
+      doc: |
+        Get id of a pin that matches given attributes
+      attribute-set: pin
+      flags: [ admin-perm ]
+
+      do:
+        pre: dpll-lock-doit
+        post: dpll-unlock-doit
+        request:
+          attributes:
+            - module-name
+            - clock-id
+            - board-label
+            - panel-label
+            - package-label
+            - type
+        reply:
+          attributes:
+            - id
+
+    -
+      name: pin-get
+      doc: |
+        Get list of pins and its attributes.
+        - dump request without any attributes given - list all the pins in the
+          system
+        - dump request with target dpll - list all the pins registered with
+          a given dpll device
+        - do request with target dpll and target pin - single pin attributes
+      attribute-set: pin
+      flags: [ admin-perm ]
+
+      do:
+        pre: dpll-pin-pre-doit
+        post: dpll-pin-post-doit
+        request:
+          attributes:
+            - id
+        reply: &pin-attrs
+          attributes:
+            - id
+            - board-label
+            - panel-label
+            - package-label
+            - type
+            - frequency
+            - frequency-supported
+            - capabilities
+            - parent-device
+            - parent-pin
+
+      dump:
+        pre: dpll-lock-dumpit
+        post: dpll-unlock-dumpit
+        request:
+          attributes:
+            - id
+        reply: *pin-attrs
+
+    -
+      name: pin-set
+      doc: Set attributes of a target pin
+      attribute-set: pin
+      flags: [ admin-perm ]
+
+      do:
+        pre: dpll-pin-pre-doit
+        post: dpll-pin-post-doit
+        request:
+          attributes:
+            - id
+            - frequency
+            - direction
+            - prio
+            - state
+            - parent-device
+            - parent-pin
+    -
+      name: pin-create-ntf
+      doc: Notification about pin appearing
+      notify: pin-get
+      mcgrp: monitor
+    -
+      name: pin-delete-ntf
+      doc: Notification about pin disappearing
+      notify: pin-get
+      mcgrp: monitor
+    -
+      name: pin-change-ntf
+      doc: Notification about pin configuration being changed
+      notify: pin-get
+      mcgrp: monitor
+
+mcast-groups:
+  list:
+    -
+      name: monitor
diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml
index 1c7284fd535b..c46fcc78fc04 100644
--- a/Documentation/netlink/specs/netdev.yaml
+++ b/Documentation/netlink/specs/netdev.yaml
@@ -42,6 +42,19 @@ definitions:
         doc:
           This feature informs if netdev implements non-linear XDP buffer
           support in ndo_xdp_xmit callback.
+  -
+    type: flags
+    name: xdp-rx-metadata
+    render-max: true
+    entries:
+      -
+        name: timestamp
+        doc:
+          Device is capable of exposing receive HW timestamp via bpf_xdp_metadata_rx_timestamp().
+      -
+        name: hash
+        doc:
+          Device is capable of exposing receive packet hash via bpf_xdp_metadata_rx_hash().
 
 attribute-sets:
   -
@@ -68,6 +81,13 @@ attribute-sets:
         type: u32
         checks:
           min: 1
+      -
+        name: xdp-rx-metadata-features
+        doc: Bitmask of supported XDP receive metadata features.
+             See Documentation/networking/xdp-rx-metadata.rst for more details.
+        type: u64
+        enum: xdp-rx-metadata
+        enum-as-flags: true
 
 operations:
   list:
@@ -84,6 +104,7 @@ operations:
             - ifindex
             - xdp-features
             - xdp-zc-max-segs
+            - xdp-rx-metadata-features
       dump:
         reply: *dev-all
     -
diff --git a/Documentation/networking/device_drivers/ethernet/index.rst b/Documentation/networking/device_drivers/ethernet/index.rst
index 9827e816084b..43de285b8a92 100644
--- a/Documentation/networking/device_drivers/ethernet/index.rst
+++ b/Documentation/networking/device_drivers/ethernet/index.rst
@@ -32,6 +32,7 @@ Contents:
    intel/e1000
    intel/e1000e
    intel/fm10k
+   intel/idpf
    intel/igb
    intel/igbvf
    intel/ixgbe
diff --git a/Documentation/networking/device_drivers/ethernet/intel/idpf.rst b/Documentation/networking/device_drivers/ethernet/intel/idpf.rst
new file mode 100644
index 000000000000..adb16e2abd21
--- /dev/null
+++ b/Documentation/networking/device_drivers/ethernet/intel/idpf.rst
@@ -0,0 +1,160 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+==========================================================================
+idpf Linux* Base Driver for the Intel(R) Infrastructure Data Path Function
+==========================================================================
+
+Intel idpf Linux driver.
+Copyright(C) 2023 Intel Corporation.
+
+.. contents::
+
+The idpf driver serves as both the Physical Function (PF) and Virtual Function
+(VF) driver for the Intel(R) Infrastructure Data Path Function.
+
+Driver information can be obtained using ethtool, lspci, and ip.
+
+For questions related to hardware requirements, refer to the documentation
+supplied with your Intel adapter. All hardware requirements listed apply to use
+with Linux.
+
+
+Identifying Your Adapter
+========================
+For information on how to identify your adapter, and for the latest Intel
+network drivers, refer to the Intel Support website:
+http://www.intel.com/support
+
+
+Additional Features and Configurations
+======================================
+
+ethtool
+-------
+The driver utilizes the ethtool interface for driver configuration and
+diagnostics, as well as displaying statistical information. The latest ethtool
+version is required for this functionality. If you don't have one yet, you can
+obtain it at:
+https://kernel.org/pub/software/network/ethtool/
+
+
+Viewing Link Messages
+---------------------
+Link messages will not be displayed to the console if the distribution is
+restricting system messages. In order to see network driver link messages on
+your console, set dmesg to eight by entering the following::
+
+  # dmesg -n 8
+
+.. note::
+   This setting is not saved across reboots.
+
+
+Jumbo Frames
+------------
+Jumbo Frames support is enabled by changing the Maximum Transmission Unit (MTU)
+to a value larger than the default value of 1500.
+
+Use the ip command to increase the MTU size. For example, enter the following
+where <ethX> is the interface number::
+
+  # ip link set mtu 9000 dev <ethX>
+  # ip link set up dev <ethX>
+
+.. note::
+   The maximum MTU setting for jumbo frames is 9706. This corresponds to the
+   maximum jumbo frame size of 9728 bytes.
+
+.. note::
+   This driver will attempt to use multiple page sized buffers to receive
+   each jumbo packet. This should help to avoid buffer starvation issues when
+   allocating receive packets.
+
+.. note::
+   Packet loss may have a greater impact on throughput when you use jumbo
+   frames. If you observe a drop in performance after enabling jumbo frames,
+   enabling flow control may mitigate the issue.
+
+
+Performance Optimization
+========================
+Driver defaults are meant to fit a wide variety of workloads, but if further
+optimization is required, we recommend experimenting with the following
+settings.
+
+
+Interrupt Rate Limiting
+-----------------------
+This driver supports an adaptive interrupt throttle rate (ITR) mechanism that
+is tuned for general workloads. The user can customize the interrupt rate
+control for specific workloads, via ethtool, adjusting the number of
+microseconds between interrupts.
+
+To set the interrupt rate manually, you must disable adaptive mode::
+
+  # ethtool -C <ethX> adaptive-rx off adaptive-tx off
+
+For lower CPU utilization:
+ - Disable adaptive ITR and lower Rx and Tx interrupts. The examples below
+   affect every queue of the specified interface.
+
+ - Setting rx-usecs and tx-usecs to 80 will limit interrupts to about
+   12,500 interrupts per second per queue::
+
+     # ethtool -C <ethX> adaptive-rx off adaptive-tx off rx-usecs 80
+     tx-usecs 80
+
+For reduced latency:
+ - Disable adaptive ITR and ITR by setting rx-usecs and tx-usecs to 0
+   using ethtool::
+
+     # ethtool -C <ethX> adaptive-rx off adaptive-tx off rx-usecs 0
+     tx-usecs 0
+
+Per-queue interrupt rate settings:
+ - The following examples are for queues 1 and 3, but you can adjust other
+   queues.
+
+ - To disable Rx adaptive ITR and set static Rx ITR to 10 microseconds or
+   about 100,000 interrupts/second, for queues 1 and 3::
+
+     # ethtool --per-queue <ethX> queue_mask 0xa --coalesce adaptive-rx off
+     rx-usecs 10
+
+ - To show the current coalesce settings for queues 1 and 3::
+
+     # ethtool --per-queue <ethX> queue_mask 0xa --show-coalesce
+
+
+
+Virtualized Environments
+------------------------
+In addition to the other suggestions in this section, the following may be
+helpful to optimize performance in VMs.
+
+ - Using the appropriate mechanism (vcpupin) in the VM, pin the CPUs to
+   individual LCPUs, making sure to use a set of CPUs included in the
+   device's local_cpulist: /sys/class/net/<ethX>/device/local_cpulist.
+
+ - Configure as many Rx/Tx queues in the VM as available. (See the idpf driver
+   documentation for the number of queues supported.) For example::
+
+     # ethtool -L <virt_interface> rx <max> tx <max>
+
+
+Support
+=======
+For general information, go to the Intel support website at:
+http://www.intel.com/support/
+
+If an issue is identified with the released source code on a supported kernel
+with a supported adapter, email the specific information related to the issue
+to intel-wired-lan@lists.osuosl.org.
+
+
+Trademarks
+==========
+Intel is a trademark or registered trademark of Intel Corporation or its
+subsidiaries in the United States and/or other countries.
+
+* Other names and brands may be claimed as the property of others.
diff --git a/Documentation/networking/filter.rst b/Documentation/networking/filter.rst
index f69da5074860..7d8c5380492f 100644
--- a/Documentation/networking/filter.rst
+++ b/Documentation/networking/filter.rst
@@ -650,8 +650,8 @@ before a conversion to the new layout is being done behind the scenes!
 
 Currently, the classic BPF format is being used for JITing on most
 32-bit architectures, whereas x86-64, aarch64, s390x, powerpc64,
-sparc64, arm32, riscv64, riscv32 perform JIT compilation from eBPF
-instruction set.
+sparc64, arm32, riscv64, riscv32, loongarch64 perform JIT compilation
+from eBPF instruction set.
 
 Testing
 -------
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index a66054d0763a..5bfa1837968c 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -745,6 +745,13 @@ tcp_comp_sack_nr - INTEGER
 
 	Default : 44
 
+tcp_backlog_ack_defer - BOOLEAN
+	If set, user thread processing socket backlog tries sending
+	one ACK for the whole queue. This helps to avoid potential
+	long latencies at end of a TCP socket syscall.
+
+	Default : true
+
 tcp_slow_start_after_idle - BOOLEAN
 	If set, provide RFC2861 behavior and time out the congestion
 	window after an idle period.  An idle period is defined at
diff --git a/Documentation/networking/xdp-rx-metadata.rst b/Documentation/networking/xdp-rx-metadata.rst
index 25ce72af81c2..205696780b78 100644
--- a/Documentation/networking/xdp-rx-metadata.rst
+++ b/Documentation/networking/xdp-rx-metadata.rst
@@ -105,6 +105,13 @@ bpf_tail_call
 Adding programs that access metadata kfuncs to the ``BPF_MAP_TYPE_PROG_ARRAY``
 is currently not supported.
 
+Supported Devices
+=================
+
+It is possible to query which kfunc the particular netdev implements via
+netlink. See ``xdp-rx-metadata-features`` attribute set in
+``Documentation/netlink/specs/netdev.yaml``.
+
 Example
 =======
 
diff --git a/MAINTAINERS b/MAINTAINERS
index dd5de540ec0b..80a46642a662 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3614,9 +3614,10 @@ F:	Documentation/devicetree/bindings/iio/accel/bosch,bma400.yaml
 F:	drivers/iio/accel/bma400*
 
 BPF JIT for ARM
-M:	Shubham Bansal <illusionist.neo@gmail.com>
+M:	Russell King <linux@armlinux.org.uk>
+M:	Puranjay Mohan <puranjay12@gmail.com>
 L:	bpf@vger.kernel.org
-S:	Odd Fixes
+S:	Maintained
 F:	arch/arm/net/
 
 BPF JIT for ARM64
@@ -4338,8 +4339,7 @@ F:	drivers/net/ethernet/broadcom/bcmsysport.*
 F:	drivers/net/ethernet/broadcom/unimac.h
 
 BROADCOM TG3 GIGABIT ETHERNET DRIVER
-M:	Siva Reddy Kallam <siva.kallam@broadcom.com>
-M:	Prashant Sreedharan <prashant@broadcom.com>
+M:	Pavan Chebbi <pavan.chebbi@broadcom.com>
 M:	Michael Chan <mchan@broadcom.com>
 L:	netdev@vger.kernel.org
 S:	Supported
@@ -6352,6 +6352,17 @@ F:	Documentation/networking/device_drivers/ethernet/freescale/dpaa2/switch-drive
 F:	drivers/net/ethernet/freescale/dpaa2/dpaa2-switch*
 F:	drivers/net/ethernet/freescale/dpaa2/dpsw*
 
+DPLL SUBSYSTEM
+M:	Vadim Fedorenko <vadim.fedorenko@linux.dev>
+M:	Arkadiusz Kubalewski <arkadiusz.kubalewski@intel.com>
+M:	Jiri Pirko <jiri@resnulli.us>
+L:	netdev@vger.kernel.org
+S:	Supported
+F:	Documentation/driver-api/dpll.rst
+F:	drivers/dpll/*
+F:	include/net/dpll.h
+F:	include/uapi/linux/dpll.h
+
 DRBD DRIVER
 M:	Philipp Reisner <philipp.reisner@linbit.com>
 M:	Lars Ellenberg <lars.ellenberg@linbit.com>
@@ -14347,9 +14358,11 @@ MIPS/LOONGSON1 ARCHITECTURE
 M:	Keguang Zhang <keguang.zhang@gmail.com>
 L:	linux-mips@vger.kernel.org
 S:	Maintained
+F:	Documentation/devicetree/bindings/*/loongson,ls1*.yaml
 F:	arch/mips/include/asm/mach-loongson32/
 F:	arch/mips/loongson32/
 F:	drivers/*/*loongson1*
+F:	drivers/net/ethernet/stmicro/stmmac/dwmac-loongson1.c
 
 MIPS/LOONGSON2EF ARCHITECTURE
 M:	Jiaxun Yang <jiaxun.yang@flygoat.com>
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 6a1c9fca5260..1d672457d02f 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -2,6 +2,7 @@
 /*
  * Just-In-Time compiler for eBPF filters on 32bit ARM
  *
+ * Copyright (c) 2023 Puranjay Mohan <puranjay12@gmail.com>
  * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com>
  * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com>
  */
@@ -15,6 +16,7 @@
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/if_vlan.h>
+#include <linux/math64.h>
 
 #include <asm/cacheflush.h>
 #include <asm/hwcap.h>
@@ -228,6 +230,44 @@ static u32 jit_mod32(u32 dividend, u32 divisor)
 	return dividend % divisor;
 }
 
+static s32 jit_sdiv32(s32 dividend, s32 divisor)
+{
+	return dividend / divisor;
+}
+
+static s32 jit_smod32(s32 dividend, s32 divisor)
+{
+	return dividend % divisor;
+}
+
+/* Wrappers for 64-bit div/mod */
+static u64 jit_udiv64(u64 dividend, u64 divisor)
+{
+	return div64_u64(dividend, divisor);
+}
+
+static u64 jit_mod64(u64 dividend, u64 divisor)
+{
+	u64 rem;
+
+	div64_u64_rem(dividend, divisor, &rem);
+	return rem;
+}
+
+static s64 jit_sdiv64(s64 dividend, s64 divisor)
+{
+	return div64_s64(dividend, divisor);
+}
+
+static s64 jit_smod64(s64 dividend, s64 divisor)
+{
+	u64 q;
+
+	q = div64_s64(dividend, divisor);
+
+	return dividend - q * divisor;
+}
+
 static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx)
 {
 	inst |= (cond << 28);
@@ -333,6 +373,9 @@ static u32 arm_bpf_ldst_imm8(u32 op, u8 rt, u8 rn, s16 imm8)
 #define ARM_LDRD_I(rt, rn, off)	arm_bpf_ldst_imm8(ARM_INST_LDRD_I, rt, rn, off)
 #define ARM_LDRH_I(rt, rn, off)	arm_bpf_ldst_imm8(ARM_INST_LDRH_I, rt, rn, off)
 
+#define ARM_LDRSH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRSH_I, rt, rn, off)
+#define ARM_LDRSB_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRSB_I, rt, rn, off)
+
 #define ARM_STR_I(rt, rn, off)	arm_bpf_ldst_imm12(ARM_INST_STR_I, rt, rn, off)
 #define ARM_STRB_I(rt, rn, off)	arm_bpf_ldst_imm12(ARM_INST_STRB_I, rt, rn, off)
 #define ARM_STRD_I(rt, rn, off)	arm_bpf_ldst_imm8(ARM_INST_STRD_I, rt, rn, off)
@@ -474,17 +517,18 @@ static inline int epilogue_offset(const struct jit_ctx *ctx)
 	return to - from - 2;
 }
 
-static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
+static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op, u8 sign)
 {
 	const int exclude_mask = BIT(ARM_R0) | BIT(ARM_R1);
 	const s8 *tmp = bpf2a32[TMP_REG_1];
+	u32 dst;
 
 #if __LINUX_ARM_ARCH__ == 7
 	if (elf_hwcap & HWCAP_IDIVA) {
-		if (op == BPF_DIV)
-			emit(ARM_UDIV(rd, rm, rn), ctx);
-		else {
-			emit(ARM_UDIV(ARM_IP, rm, rn), ctx);
+		if (op == BPF_DIV) {
+			emit(sign ? ARM_SDIV(rd, rm, rn) : ARM_UDIV(rd, rm, rn), ctx);
+		} else {
+			emit(sign ? ARM_SDIV(ARM_IP, rm, rn) : ARM_UDIV(ARM_IP, rm, rn), ctx);
 			emit(ARM_MLS(rd, rn, ARM_IP, rm), ctx);
 		}
 		return;
@@ -512,8 +556,19 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
 	emit(ARM_PUSH(CALLER_MASK & ~exclude_mask), ctx);
 
 	/* Call appropriate function */
-	emit_mov_i(ARM_IP, op == BPF_DIV ?
-		   (u32)jit_udiv32 : (u32)jit_mod32, ctx);
+	if (sign) {
+		if (op == BPF_DIV)
+			dst = (u32)jit_sdiv32;
+		else
+			dst = (u32)jit_smod32;
+	} else {
+		if (op == BPF_DIV)
+			dst = (u32)jit_udiv32;
+		else
+			dst = (u32)jit_mod32;
+	}
+
+	emit_mov_i(ARM_IP, dst, ctx);
 	emit_blx_r(ARM_IP, ctx);
 
 	/* Restore caller-saved registers from stack */
@@ -530,6 +585,78 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
 		emit(ARM_MOV_R(ARM_R0, tmp[1]), ctx);
 }
 
+static inline void emit_udivmod64(const s8 *rd, const s8 *rm, const s8 *rn, struct jit_ctx *ctx,
+				  u8 op, u8 sign)
+{
+	u32 dst;
+
+	/* Push caller-saved registers on stack */
+	emit(ARM_PUSH(CALLER_MASK), ctx);
+
+	/*
+	 * As we are implementing 64-bit div/mod as function calls, We need to put the dividend in
+	 * R0-R1 and the divisor in R2-R3. As we have already pushed these registers on the stack,
+	 * we can recover them later after returning from the function call.
+	 */
+	if (rm[1] != ARM_R0 || rn[1] != ARM_R2) {
+		/*
+		 * Move Rm to {R1, R0} if it is not already there.
+		 */
+		if (rm[1] != ARM_R0) {
+			if (rn[1] == ARM_R0)
+				emit(ARM_PUSH(BIT(ARM_R0) | BIT(ARM_R1)), ctx);
+			emit(ARM_MOV_R(ARM_R1, rm[0]), ctx);
+			emit(ARM_MOV_R(ARM_R0, rm[1]), ctx);
+			if (rn[1] == ARM_R0) {
+				emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx);
+				goto cont;
+			}
+		}
+		/*
+		 * Move Rn to {R3, R2} if it is not already there.
+		 */
+		if (rn[1] != ARM_R2) {
+			emit(ARM_MOV_R(ARM_R3, rn[0]), ctx);
+			emit(ARM_MOV_R(ARM_R2, rn[1]), ctx);
+		}
+	}
+
+cont:
+
+	/* Call appropriate function */
+	if (sign) {
+		if (op == BPF_DIV)
+			dst = (u32)jit_sdiv64;
+		else
+			dst = (u32)jit_smod64;
+	} else {
+		if (op == BPF_DIV)
+			dst = (u32)jit_udiv64;
+		else
+			dst = (u32)jit_mod64;
+	}
+
+	emit_mov_i(ARM_IP, dst, ctx);
+	emit_blx_r(ARM_IP, ctx);
+
+	/* Save return value */
+	if (rd[1] != ARM_R0) {
+		emit(ARM_MOV_R(rd[0], ARM_R1), ctx);
+		emit(ARM_MOV_R(rd[1], ARM_R0), ctx);
+	}
+
+	/* Recover {R3, R2} and {R1, R0} from stack if they are not Rd */
+	if (rd[1] != ARM_R0 && rd[1] != ARM_R2) {
+		emit(ARM_POP(CALLER_MASK), ctx);
+	} else if (rd[1] != ARM_R0) {
+		emit(ARM_POP(BIT(ARM_R0) | BIT(ARM_R1)), ctx);
+		emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx);
+	} else {
+		emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx);
+		emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx);
+	}
+}
+
 /* Is the translated BPF register on stack? */
 static bool is_stacked(s8 reg)
 {
@@ -744,12 +871,16 @@ static inline void emit_a32_alu_r64(const bool is64, const s8 dst[],
 }
 
 /* dst = src (4 bytes)*/
-static inline void emit_a32_mov_r(const s8 dst, const s8 src,
+static inline void emit_a32_mov_r(const s8 dst, const s8 src, const u8 off,
 				  struct jit_ctx *ctx) {
 	const s8 *tmp = bpf2a32[TMP_REG_1];
 	s8 rt;
 
 	rt = arm_bpf_get_reg32(src, tmp[0], ctx);
+	if (off && off != 32) {
+		emit(ARM_LSL_I(rt, rt, 32 - off), ctx);
+		emit(ARM_ASR_I(rt, rt, 32 - off), ctx);
+	}
 	arm_bpf_put_reg32(dst, rt, ctx);
 }
 
@@ -758,15 +889,15 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[],
 				  const s8 src[],
 				  struct jit_ctx *ctx) {
 	if (!is64) {
-		emit_a32_mov_r(dst_lo, src_lo, ctx);
+		emit_a32_mov_r(dst_lo, src_lo, 0, ctx);
 		if (!ctx->prog->aux->verifier_zext)
 			/* Zero out high 4 bytes */
 			emit_a32_mov_i(dst_hi, 0, ctx);
 	} else if (__LINUX_ARM_ARCH__ < 6 &&
 		   ctx->cpu_architecture < CPU_ARCH_ARMv5TE) {
 		/* complete 8 byte move */
-		emit_a32_mov_r(dst_lo, src_lo, ctx);
-		emit_a32_mov_r(dst_hi, src_hi, ctx);
+		emit_a32_mov_r(dst_lo, src_lo, 0, ctx);
+		emit_a32_mov_r(dst_hi, src_hi, 0, ctx);
 	} else if (is_stacked(src_lo) && is_stacked(dst_lo)) {
 		const u8 *tmp = bpf2a32[TMP_REG_1];
 
@@ -782,6 +913,24 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[],
 	}
 }
 
+/* dst = (signed)src */
+static inline void emit_a32_movsx_r64(const bool is64, const u8 off, const s8 dst[], const s8 src[],
+				      struct jit_ctx *ctx) {
+	const s8 *tmp = bpf2a32[TMP_REG_1];
+	const s8 *rt;
+
+	rt = arm_bpf_get_reg64(dst, tmp, ctx);
+
+	emit_a32_mov_r(dst_lo, src_lo, off, ctx);
+	if (!is64) {
+		if (!ctx->prog->aux->verifier_zext)
+			/* Zero out high 4 bytes */
+			emit_a32_mov_i(dst_hi, 0, ctx);
+	} else {
+		emit(ARM_ASR_I(rt[0], rt[1], 31), ctx);
+	}
+}
+
 /* Shift operations */
 static inline void emit_a32_alu_i(const s8 dst, const u32 val,
 				struct jit_ctx *ctx, const u8 op) {
@@ -1026,6 +1175,24 @@ static bool is_ldst_imm(s16 off, const u8 size)
 	return -off_max <= off && off <= off_max;
 }
 
+static bool is_ldst_imm8(s16 off, const u8 size)
+{
+	s16 off_max = 0;
+
+	switch (size) {
+	case BPF_B:
+		off_max = 0xff;
+		break;
+	case BPF_W:
+		off_max = 0xfff;
+		break;
+	case BPF_H:
+		off_max = 0xff;
+		break;
+	}
+	return -off_max <= off && off <= off_max;
+}
+
 /* *(size *)(dst + off) = src */
 static inline void emit_str_r(const s8 dst, const s8 src[],
 			      s16 off, struct jit_ctx *ctx, const u8 sz){
@@ -1105,6 +1272,50 @@ static inline void emit_ldx_r(const s8 dst[], const s8 src,
 	arm_bpf_put_reg64(dst, rd, ctx);
 }
 
+/* dst = *(signed size*)(src + off) */
+static inline void emit_ldsx_r(const s8 dst[], const s8 src,
+			       s16 off, struct jit_ctx *ctx, const u8 sz){
+	const s8 *tmp = bpf2a32[TMP_REG_1];
+	const s8 *rd = is_stacked(dst_lo) ? tmp : dst;
+	s8 rm = src;
+	int add_off;
+
+	if (!is_ldst_imm8(off, sz)) {
+		/*
+		 * offset does not fit in the load/store immediate,
+		 * construct an ADD instruction to apply the offset.
+		 */
+		add_off = imm8m(off);
+		if (add_off > 0) {
+			emit(ARM_ADD_I(tmp[0], src, add_off), ctx);
+			rm = tmp[0];
+		} else {
+			emit_a32_mov_i(tmp[0], off, ctx);
+			emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx);
+			rm = tmp[0];
+		}
+		off = 0;
+	}
+
+	switch (sz) {
+	case BPF_B:
+		/* Load a Byte with sign extension*/
+		emit(ARM_LDRSB_I(rd[1], rm, off), ctx);
+		break;
+	case BPF_H:
+		/* Load a HalfWord with sign extension*/
+		emit(ARM_LDRSH_I(rd[1], rm, off), ctx);
+		break;
+	case BPF_W:
+		/* Load a Word*/
+		emit(ARM_LDR_I(rd[1], rm, off), ctx);
+		break;
+	}
+	/* Carry the sign extension to upper 32 bits */
+	emit(ARM_ASR_I(rd[0], rd[1], 31), ctx);
+	arm_bpf_put_reg64(dst, rd, ctx);
+}
+
 /* Arithmatic Operation */
 static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm,
 			     const u8 rn, struct jit_ctx *ctx, u8 op,
@@ -1385,7 +1596,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
 				emit_a32_mov_i(dst_hi, 0, ctx);
 				break;
 			}
-			emit_a32_mov_r64(is64, dst, src, ctx);
+			if (insn->off)
+				emit_a32_movsx_r64(is64, insn->off, dst, src, ctx);
+			else
+				emit_a32_mov_r64(is64, dst, src, ctx);
 			break;
 		case BPF_K:
 			/* Sign-extend immediate value to destination reg */
@@ -1461,7 +1675,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
 			rt = src_lo;
 			break;
 		}
-		emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code));
+		emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code), off);
 		arm_bpf_put_reg32(dst_lo, rd_lo, ctx);
 		if (!ctx->prog->aux->verifier_zext)
 			emit_a32_mov_i(dst_hi, 0, ctx);
@@ -1470,7 +1684,19 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
 	case BPF_ALU64 | BPF_DIV | BPF_X:
 	case BPF_ALU64 | BPF_MOD | BPF_K:
 	case BPF_ALU64 | BPF_MOD | BPF_X:
-		goto notyet;
+		rd = arm_bpf_get_reg64(dst, tmp2, ctx);
+		switch (BPF_SRC(code)) {
+		case BPF_X:
+			rs = arm_bpf_get_reg64(src, tmp, ctx);
+			break;
+		case BPF_K:
+			rs = tmp;
+			emit_a32_mov_se_i64(is64, rs, imm, ctx);
+			break;
+		}
+		emit_udivmod64(rd, rd, rs, ctx, BPF_OP(code), off);
+		arm_bpf_put_reg64(dst, rd, ctx);
+		break;
 	/* dst = dst << imm */
 	/* dst = dst >> imm */
 	/* dst = dst >> imm (signed) */
@@ -1545,10 +1771,12 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
 		break;
 	/* dst = htole(dst) */
 	/* dst = htobe(dst) */
-	case BPF_ALU | BPF_END | BPF_FROM_LE:
-	case BPF_ALU | BPF_END | BPF_FROM_BE:
+	case BPF_ALU | BPF_END | BPF_FROM_LE: /* also BPF_TO_LE */
+	case BPF_ALU | BPF_END | BPF_FROM_BE: /* also BPF_TO_BE */
+	/* dst = bswap(dst) */
+	case BPF_ALU64 | BPF_END | BPF_FROM_LE: /* also BPF_TO_LE */
 		rd = arm_bpf_get_reg64(dst, tmp, ctx);
-		if (BPF_SRC(code) == BPF_FROM_LE)
+		if (BPF_SRC(code) == BPF_FROM_LE && BPF_CLASS(code) != BPF_ALU64)
 			goto emit_bswap_uxt;
 		switch (imm) {
 		case 16:
@@ -1603,8 +1831,15 @@ exit:
 	case BPF_LDX | BPF_MEM | BPF_H:
 	case BPF_LDX | BPF_MEM | BPF_B:
 	case BPF_LDX | BPF_MEM | BPF_DW:
+	/* LDSX: dst = *(signed size *)(src + off) */
+	case BPF_LDX | BPF_MEMSX | BPF_B:
+	case BPF_LDX | BPF_MEMSX | BPF_H:
+	case BPF_LDX | BPF_MEMSX | BPF_W:
 		rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx);
-		emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code));
+		if (BPF_MODE(insn->code) == BPF_MEMSX)
+			emit_ldsx_r(dst, rn, off, ctx, BPF_SIZE(code));
+		else
+			emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code));
 		break;
 	/* speculation barrier */
 	case BPF_ST | BPF_NOSPEC:
@@ -1761,10 +1996,15 @@ go_jmp:
 		break;
 	/* JMP OFF */
 	case BPF_JMP | BPF_JA:
+	case BPF_JMP32 | BPF_JA:
 	{
-		if (off == 0)
+		if (BPF_CLASS(code) == BPF_JMP32 && imm != 0)
+			jmp_offset = bpf2a32_offset(i + imm, i, ctx);
+		else if (BPF_CLASS(code) == BPF_JMP && off != 0)
+			jmp_offset = bpf2a32_offset(i + off, i, ctx);
+		else
 			break;
-		jmp_offset = bpf2a32_offset(i+off, i, ctx);
+
 		check_imm24(jmp_offset);
 		emit(ARM_B(jmp_offset), ctx);
 		break;
diff --git a/arch/arm/net/bpf_jit_32.h b/arch/arm/net/bpf_jit_32.h
index e0b593a1498d..438f0e1f91a0 100644
--- a/arch/arm/net/bpf_jit_32.h
+++ b/arch/arm/net/bpf_jit_32.h
@@ -79,9 +79,11 @@
 #define ARM_INST_LDST__IMM12	0x00000fff
 #define ARM_INST_LDRB_I		0x05500000
 #define ARM_INST_LDRB_R		0x07d00000
+#define ARM_INST_LDRSB_I	0x015000d0
 #define ARM_INST_LDRD_I		0x014000d0
 #define ARM_INST_LDRH_I		0x015000b0
 #define ARM_INST_LDRH_R		0x019000b0
+#define ARM_INST_LDRSH_I	0x015000f0
 #define ARM_INST_LDR_I		0x05100000
 #define ARM_INST_LDR_R		0x07900000
 
@@ -137,6 +139,7 @@
 #define ARM_INST_TST_I		0x03100000
 
 #define ARM_INST_UDIV		0x0730f010
+#define ARM_INST_SDIV		0x0710f010
 
 #define ARM_INST_UMULL		0x00800090
 
@@ -265,6 +268,7 @@
 #define ARM_TST_I(rn, imm)	_AL3_I(ARM_INST_TST, 0, rn, imm)
 
 #define ARM_UDIV(rd, rn, rm)	(ARM_INST_UDIV | (rd) << 16 | (rn) | (rm) << 8)
+#define ARM_SDIV(rd, rn, rm)	(ARM_INST_SDIV | (rd) << 16 | (rn) | (rm) << 8)
 
 #define ARM_UMULL(rd_lo, rd_hi, rn, rm)	(ARM_INST_UMULL | (rd_hi) << 16 \
 					 | (rd_lo) << 12 | (rm) << 8 | rn)
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 150d1c6543f7..7d4af64e3982 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -288,7 +288,7 @@ static bool is_lsi_offset(int offset, int scale)
 static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
 {
 	const struct bpf_prog *prog = ctx->prog;
-	const bool is_main_prog = prog->aux->func_idx == 0;
+	const bool is_main_prog = !bpf_is_subprog(prog);
 	const u8 r6 = bpf2a64[BPF_REG_6];
 	const u8 r7 = bpf2a64[BPF_REG_7];
 	const u8 r8 = bpf2a64[BPF_REG_8];
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index e507692e51e7..bf06b7283f0c 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -556,7 +556,7 @@ static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp,
 	EMIT6_PCREL_RILC(0xc0040000, 0, jit->prologue_plt);
 	jit->prologue_plt_ret = jit->prg;
 
-	if (fp->aux->func_idx == 0) {
+	if (!bpf_is_subprog(fp)) {
 		/* Initialize the tail call counter in the main program. */
 		/* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */
 		_EMIT6(0xd703f000 | STK_OFF_TCCNT, 0xf000 | STK_OFF_TCCNT);
@@ -670,15 +670,18 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
 static int get_probe_mem_regno(const u8 *insn)
 {
 	/*
-	 * insn must point to llgc, llgh, llgf or lg, which have destination
-	 * register at the same position.
+	 * insn must point to llgc, llgh, llgf, lg, lgb, lgh or lgf, which have
+	 * destination register at the same position.
 	 */
-	if (insn[0] != 0xe3) /* common llgc, llgh, llgf and lg prefix */
+	if (insn[0] != 0xe3) /* common prefix */
 		return -1;
 	if (insn[5] != 0x90 && /* llgc */
 	    insn[5] != 0x91 && /* llgh */
 	    insn[5] != 0x16 && /* llgf */
-	    insn[5] != 0x04) /* lg */
+	    insn[5] != 0x04 && /* lg */
+	    insn[5] != 0x77 && /* lgb */
+	    insn[5] != 0x15 && /* lgh */
+	    insn[5] != 0x14) /* lgf */
 		return -1;
 	return insn[1] >> 4;
 }
@@ -776,6 +779,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 				 int i, bool extra_pass, u32 stack_depth)
 {
 	struct bpf_insn *insn = &fp->insnsi[i];
+	s16 branch_oc_off = insn->off;
 	u32 dst_reg = insn->dst_reg;
 	u32 src_reg = insn->src_reg;
 	int last, insn_count = 1;
@@ -788,22 +792,55 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 	int err;
 
 	if (BPF_CLASS(insn->code) == BPF_LDX &&
-	    BPF_MODE(insn->code) == BPF_PROBE_MEM)
+	    (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
+	     BPF_MODE(insn->code) == BPF_PROBE_MEMSX))
 		probe_prg = jit->prg;
 
 	switch (insn->code) {
 	/*
 	 * BPF_MOV
 	 */
-	case BPF_ALU | BPF_MOV | BPF_X: /* dst = (u32) src */
-		/* llgfr %dst,%src */
-		EMIT4(0xb9160000, dst_reg, src_reg);
-		if (insn_is_zext(&insn[1]))
-			insn_count = 2;
+	case BPF_ALU | BPF_MOV | BPF_X:
+		switch (insn->off) {
+		case 0: /* DST = (u32) SRC */
+			/* llgfr %dst,%src */
+			EMIT4(0xb9160000, dst_reg, src_reg);
+			if (insn_is_zext(&insn[1]))
+				insn_count = 2;
+			break;
+		case 8: /* DST = (u32)(s8) SRC */
+			/* lbr %dst,%src */
+			EMIT4(0xb9260000, dst_reg, src_reg);
+			/* llgfr %dst,%dst */
+			EMIT4(0xb9160000, dst_reg, dst_reg);
+			break;
+		case 16: /* DST = (u32)(s16) SRC */
+			/* lhr %dst,%src */
+			EMIT4(0xb9270000, dst_reg, src_reg);
+			/* llgfr %dst,%dst */
+			EMIT4(0xb9160000, dst_reg, dst_reg);
+			break;
+		}
 		break;
-	case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
-		/* lgr %dst,%src */
-		EMIT4(0xb9040000, dst_reg, src_reg);
+	case BPF_ALU64 | BPF_MOV | BPF_X:
+		switch (insn->off) {
+		case 0: /* DST = SRC */
+			/* lgr %dst,%src */
+			EMIT4(0xb9040000, dst_reg, src_reg);
+			break;
+		case 8: /* DST = (s8) SRC */
+			/* lgbr %dst,%src */
+			EMIT4(0xb9060000, dst_reg, src_reg);
+			break;
+		case 16: /* DST = (s16) SRC */
+			/* lghr %dst,%src */
+			EMIT4(0xb9070000, dst_reg, src_reg);
+			break;
+		case 32: /* DST = (s32) SRC */
+			/* lgfr %dst,%src */
+			EMIT4(0xb9140000, dst_reg, src_reg);
+			break;
+		}
 		break;
 	case BPF_ALU | BPF_MOV | BPF_K: /* dst = (u32) imm */
 		/* llilf %dst,imm */
@@ -912,66 +949,115 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 	/*
 	 * BPF_DIV / BPF_MOD
 	 */
-	case BPF_ALU | BPF_DIV | BPF_X: /* dst = (u32) dst / (u32) src */
-	case BPF_ALU | BPF_MOD | BPF_X: /* dst = (u32) dst % (u32) src */
+	case BPF_ALU | BPF_DIV | BPF_X:
+	case BPF_ALU | BPF_MOD | BPF_X:
 	{
 		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
 
-		/* lhi %w0,0 */
-		EMIT4_IMM(0xa7080000, REG_W0, 0);
-		/* lr %w1,%dst */
-		EMIT2(0x1800, REG_W1, dst_reg);
-		/* dlr %w0,%src */
-		EMIT4(0xb9970000, REG_W0, src_reg);
+		switch (off) {
+		case 0: /* dst = (u32) dst {/,%} (u32) src */
+			/* xr %w0,%w0 */
+			EMIT2(0x1700, REG_W0, REG_W0);
+			/* lr %w1,%dst */
+			EMIT2(0x1800, REG_W1, dst_reg);
+			/* dlr %w0,%src */
+			EMIT4(0xb9970000, REG_W0, src_reg);
+			break;
+		case 1: /* dst = (u32) ((s32) dst {/,%} (s32) src) */
+			/* lgfr %r1,%dst */
+			EMIT4(0xb9140000, REG_W1, dst_reg);
+			/* dsgfr %r0,%src */
+			EMIT4(0xb91d0000, REG_W0, src_reg);
+			break;
+		}
 		/* llgfr %dst,%rc */
 		EMIT4(0xb9160000, dst_reg, rc_reg);
 		if (insn_is_zext(&insn[1]))
 			insn_count = 2;
 		break;
 	}
-	case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / src */
-	case BPF_ALU64 | BPF_MOD | BPF_X: /* dst = dst % src */
+	case BPF_ALU64 | BPF_DIV | BPF_X:
+	case BPF_ALU64 | BPF_MOD | BPF_X:
 	{
 		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
 
-		/* lghi %w0,0 */
-		EMIT4_IMM(0xa7090000, REG_W0, 0);
-		/* lgr %w1,%dst */
-		EMIT4(0xb9040000, REG_W1, dst_reg);
-		/* dlgr %w0,%dst */
-		EMIT4(0xb9870000, REG_W0, src_reg);
+		switch (off) {
+		case 0: /* dst = dst {/,%} src */
+			/* lghi %w0,0 */
+			EMIT4_IMM(0xa7090000, REG_W0, 0);
+			/* lgr %w1,%dst */
+			EMIT4(0xb9040000, REG_W1, dst_reg);
+			/* dlgr %w0,%src */
+			EMIT4(0xb9870000, REG_W0, src_reg);
+			break;
+		case 1: /* dst = (s64) dst {/,%} (s64) src */
+			/* lgr %w1,%dst */
+			EMIT4(0xb9040000, REG_W1, dst_reg);
+			/* dsgr %w0,%src */
+			EMIT4(0xb90d0000, REG_W0, src_reg);
+			break;
+		}
 		/* lgr %dst,%rc */
 		EMIT4(0xb9040000, dst_reg, rc_reg);
 		break;
 	}
-	case BPF_ALU | BPF_DIV | BPF_K: /* dst = (u32) dst / (u32) imm */
-	case BPF_ALU | BPF_MOD | BPF_K: /* dst = (u32) dst % (u32) imm */
+	case BPF_ALU | BPF_DIV | BPF_K:
+	case BPF_ALU | BPF_MOD | BPF_K:
 	{
 		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
 
 		if (imm == 1) {
 			if (BPF_OP(insn->code) == BPF_MOD)
-				/* lhgi %dst,0 */
+				/* lghi %dst,0 */
 				EMIT4_IMM(0xa7090000, dst_reg, 0);
 			else
 				EMIT_ZERO(dst_reg);
 			break;
 		}
-		/* lhi %w0,0 */
-		EMIT4_IMM(0xa7080000, REG_W0, 0);
-		/* lr %w1,%dst */
-		EMIT2(0x1800, REG_W1, dst_reg);
 		if (!is_first_pass(jit) && can_use_ldisp_for_lit32(jit)) {
-			/* dl %w0,<d(imm)>(%l) */
-			EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0, REG_L,
-				      EMIT_CONST_U32(imm));
+			switch (off) {
+			case 0: /* dst = (u32) dst {/,%} (u32) imm */
+				/* xr %w0,%w0 */
+				EMIT2(0x1700, REG_W0, REG_W0);
+				/* lr %w1,%dst */
+				EMIT2(0x1800, REG_W1, dst_reg);
+				/* dl %w0,<d(imm)>(%l) */
+				EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0,
+					      REG_L, EMIT_CONST_U32(imm));
+				break;
+			case 1: /* dst = (s32) dst {/,%} (s32) imm */
+				/* lgfr %r1,%dst */
+				EMIT4(0xb9140000, REG_W1, dst_reg);
+				/* dsgf %r0,<d(imm)>(%l) */
+				EMIT6_DISP_LH(0xe3000000, 0x001d, REG_W0, REG_0,
+					      REG_L, EMIT_CONST_U32(imm));
+				break;
+			}
 		} else {
-			/* lgfrl %dst,imm */
-			EMIT6_PCREL_RILB(0xc40c0000, dst_reg,
-					 _EMIT_CONST_U32(imm));
-			jit->seen |= SEEN_LITERAL;
-			/* dlr %w0,%dst */
-			EMIT4(0xb9970000, REG_W0, dst_reg);
+			switch (off) {
+			case 0: /* dst = (u32) dst {/,%} (u32) imm */
+				/* xr %w0,%w0 */
+				EMIT2(0x1700, REG_W0, REG_W0);
+				/* lr %w1,%dst */
+				EMIT2(0x1800, REG_W1, dst_reg);
+				/* lrl %dst,imm */
+				EMIT6_PCREL_RILB(0xc40d0000, dst_reg,
+						 _EMIT_CONST_U32(imm));
+				jit->seen |= SEEN_LITERAL;
+				/* dlr %w0,%dst */
+				EMIT4(0xb9970000, REG_W0, dst_reg);
+				break;
+			case 1: /* dst = (s32) dst {/,%} (s32) imm */
+				/* lgfr %w1,%dst */
+				EMIT4(0xb9140000, REG_W1, dst_reg);
+				/* lgfrl %dst,imm */
+				EMIT6_PCREL_RILB(0xc40c0000, dst_reg,
+						 _EMIT_CONST_U32(imm));
+				jit->seen |= SEEN_LITERAL;
+				/* dsgr %w0,%dst */
+				EMIT4(0xb90d0000, REG_W0, dst_reg);
+				break;
+			}
 		}
 		/* llgfr %dst,%rc */
 		EMIT4(0xb9160000, dst_reg, rc_reg);
@@ -979,8 +1065,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 			insn_count = 2;
 		break;
 	}
-	case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / imm */
-	case BPF_ALU64 | BPF_MOD | BPF_K: /* dst = dst % imm */
+	case BPF_ALU64 | BPF_DIV | BPF_K:
+	case BPF_ALU64 | BPF_MOD | BPF_K:
 	{
 		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
 
@@ -990,21 +1076,50 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 				EMIT4_IMM(0xa7090000, dst_reg, 0);
 			break;
 		}
-		/* lghi %w0,0 */
-		EMIT4_IMM(0xa7090000, REG_W0, 0);
-		/* lgr %w1,%dst */
-		EMIT4(0xb9040000, REG_W1, dst_reg);
 		if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
-			/* dlg %w0,<d(imm)>(%l) */
-			EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L,
-				      EMIT_CONST_U64(imm));
+			switch (off) {
+			case 0: /* dst = dst {/,%} imm */
+				/* lghi %w0,0 */
+				EMIT4_IMM(0xa7090000, REG_W0, 0);
+				/* lgr %w1,%dst */
+				EMIT4(0xb9040000, REG_W1, dst_reg);
+				/* dlg %w0,<d(imm)>(%l) */
+				EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0,
+					      REG_L, EMIT_CONST_U64(imm));
+				break;
+			case 1: /* dst = (s64) dst {/,%} (s64) imm */
+				/* lgr %w1,%dst */
+				EMIT4(0xb9040000, REG_W1, dst_reg);
+				/* dsg %w0,<d(imm)>(%l) */
+				EMIT6_DISP_LH(0xe3000000, 0x000d, REG_W0, REG_0,
+					      REG_L, EMIT_CONST_U64(imm));
+				break;
+			}
 		} else {
-			/* lgrl %dst,imm */
-			EMIT6_PCREL_RILB(0xc4080000, dst_reg,
-					 _EMIT_CONST_U64(imm));
-			jit->seen |= SEEN_LITERAL;
-			/* dlgr %w0,%dst */
-			EMIT4(0xb9870000, REG_W0, dst_reg);
+			switch (off) {
+			case 0: /* dst = dst {/,%} imm */
+				/* lghi %w0,0 */
+				EMIT4_IMM(0xa7090000, REG_W0, 0);
+				/* lgr %w1,%dst */
+				EMIT4(0xb9040000, REG_W1, dst_reg);
+				/* lgrl %dst,imm */
+				EMIT6_PCREL_RILB(0xc4080000, dst_reg,
+						 _EMIT_CONST_U64(imm));
+				jit->seen |= SEEN_LITERAL;
+				/* dlgr %w0,%dst */
+				EMIT4(0xb9870000, REG_W0, dst_reg);
+				break;
+			case 1: /* dst = (s64) dst {/,%} (s64) imm */
+				/* lgr %w1,%dst */
+				EMIT4(0xb9040000, REG_W1, dst_reg);
+				/* lgrl %dst,imm */
+				EMIT6_PCREL_RILB(0xc4080000, dst_reg,
+						 _EMIT_CONST_U64(imm));
+				jit->seen |= SEEN_LITERAL;
+				/* dsgr %w0,%dst */
+				EMIT4(0xb90d0000, REG_W0, dst_reg);
+				break;
+			}
 		}
 		/* lgr %dst,%rc */
 		EMIT4(0xb9040000, dst_reg, rc_reg);
@@ -1217,6 +1332,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 		}
 		break;
 	case BPF_ALU | BPF_END | BPF_FROM_LE:
+	case BPF_ALU64 | BPF_END | BPF_FROM_LE:
 		switch (imm) {
 		case 16: /* dst = (u16) cpu_to_le16(dst) */
 			/* lrvr %dst,%dst */
@@ -1374,6 +1490,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 		if (insn_is_zext(&insn[1]))
 			insn_count = 2;
 		break;
+	case BPF_LDX | BPF_MEMSX | BPF_B: /* dst = *(s8 *)(ul) (src + off) */
+	case BPF_LDX | BPF_PROBE_MEMSX | BPF_B:
+		/* lgb %dst,0(off,%src) */
+		EMIT6_DISP_LH(0xe3000000, 0x0077, dst_reg, src_reg, REG_0, off);
+		jit->seen |= SEEN_MEM;
+		break;
 	case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */
 	case BPF_LDX | BPF_PROBE_MEM | BPF_H:
 		/* llgh %dst,0(off,%src) */
@@ -1382,6 +1504,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 		if (insn_is_zext(&insn[1]))
 			insn_count = 2;
 		break;
+	case BPF_LDX | BPF_MEMSX | BPF_H: /* dst = *(s16 *)(ul) (src + off) */
+	case BPF_LDX | BPF_PROBE_MEMSX | BPF_H:
+		/* lgh %dst,0(off,%src) */
+		EMIT6_DISP_LH(0xe3000000, 0x0015, dst_reg, src_reg, REG_0, off);
+		jit->seen |= SEEN_MEM;
+		break;
 	case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */
 	case BPF_LDX | BPF_PROBE_MEM | BPF_W:
 		/* llgf %dst,off(%src) */
@@ -1390,6 +1518,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 		if (insn_is_zext(&insn[1]))
 			insn_count = 2;
 		break;
+	case BPF_LDX | BPF_MEMSX | BPF_W: /* dst = *(s32 *)(ul) (src + off) */
+	case BPF_LDX | BPF_PROBE_MEMSX | BPF_W:
+		/* lgf %dst,off(%src) */
+		jit->seen |= SEEN_MEM;
+		EMIT6_DISP_LH(0xe3000000, 0x0014, dst_reg, src_reg, REG_0, off);
+		break;
 	case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
 	case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
 		/* lg %dst,0(off,%src) */
@@ -1570,6 +1704,9 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 	 * instruction itself (loop) and for BPF with offset 0 we
 	 * branch to the instruction behind the branch.
 	 */
+	case BPF_JMP32 | BPF_JA: /* if (true) */
+		branch_oc_off = imm;
+		fallthrough;
 	case BPF_JMP | BPF_JA: /* if (true) */
 		mask = 0xf000; /* j */
 		goto branch_oc;
@@ -1738,14 +1875,16 @@ branch_xu:
 		break;
 branch_oc:
 		if (!is_first_pass(jit) &&
-		    can_use_rel(jit, addrs[i + off + 1])) {
+		    can_use_rel(jit, addrs[i + branch_oc_off + 1])) {
 			/* brc mask,off */
 			EMIT4_PCREL_RIC(0xa7040000,
-					mask >> 12, addrs[i + off + 1]);
+					mask >> 12,
+					addrs[i + branch_oc_off + 1]);
 		} else {
 			/* brcl mask,off */
 			EMIT6_PCREL_RILC(0xc0040000,
-					 mask >> 12, addrs[i + off + 1]);
+					 mask >> 12,
+					 addrs[i + branch_oc_off + 1]);
 		}
 		break;
 	}
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index a5930042139d..8c10d9abc239 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -16,6 +16,9 @@
 #include <asm/set_memory.h>
 #include <asm/nospec-branch.h>
 #include <asm/text-patching.h>
+#include <asm/unwind.h>
+
+static bool all_callee_regs_used[4] = {true, true, true, true};
 
 static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
 {
@@ -255,6 +258,14 @@ struct jit_context {
 /* Number of bytes that will be skipped on tailcall */
 #define X86_TAIL_CALL_OFFSET	(11 + ENDBR_INSN_SIZE)
 
+static void push_r12(u8 **pprog)
+{
+	u8 *prog = *pprog;
+
+	EMIT2(0x41, 0x54);   /* push r12 */
+	*pprog = prog;
+}
+
 static void push_callee_regs(u8 **pprog, bool *callee_regs_used)
 {
 	u8 *prog = *pprog;
@@ -270,6 +281,14 @@ static void push_callee_regs(u8 **pprog, bool *callee_regs_used)
 	*pprog = prog;
 }
 
+static void pop_r12(u8 **pprog)
+{
+	u8 *prog = *pprog;
+
+	EMIT2(0x41, 0x5C);   /* pop r12 */
+	*pprog = prog;
+}
+
 static void pop_callee_regs(u8 **pprog, bool *callee_regs_used)
 {
 	u8 *prog = *pprog;
@@ -291,7 +310,8 @@ static void pop_callee_regs(u8 **pprog, bool *callee_regs_used)
  * while jumping to another program
  */
 static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
-			  bool tail_call_reachable, bool is_subprog)
+			  bool tail_call_reachable, bool is_subprog,
+			  bool is_exception_cb)
 {
 	u8 *prog = *pprog;
 
@@ -303,12 +323,30 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
 	prog += X86_PATCH_SIZE;
 	if (!ebpf_from_cbpf) {
 		if (tail_call_reachable && !is_subprog)
+			/* When it's the entry of the whole tailcall context,
+			 * zeroing rax means initialising tail_call_cnt.
+			 */
 			EMIT2(0x31, 0xC0); /* xor eax, eax */
 		else
+			/* Keep the same instruction layout. */
 			EMIT2(0x66, 0x90); /* nop2 */
 	}
-	EMIT1(0x55);             /* push rbp */
-	EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
+	/* Exception callback receives FP as third parameter */
+	if (is_exception_cb) {
+		EMIT3(0x48, 0x89, 0xF4); /* mov rsp, rsi */
+		EMIT3(0x48, 0x89, 0xD5); /* mov rbp, rdx */
+		/* The main frame must have exception_boundary as true, so we
+		 * first restore those callee-saved regs from stack, before
+		 * reusing the stack frame.
+		 */
+		pop_callee_regs(&prog, all_callee_regs_used);
+		pop_r12(&prog);
+		/* Reset the stack frame. */
+		EMIT3(0x48, 0x89, 0xEC); /* mov rsp, rbp */
+	} else {
+		EMIT1(0x55);             /* push rbp */
+		EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
+	}
 
 	/* X86_TAIL_CALL_OFFSET is here */
 	EMIT_ENDBR();
@@ -467,7 +505,8 @@ static void emit_return(u8 **pprog, u8 *ip)
  *   goto *(prog->bpf_func + prologue_size);
  * out:
  */
-static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
+static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog,
+					u8 **pprog, bool *callee_regs_used,
 					u32 stack_depth, u8 *ip,
 					struct jit_context *ctx)
 {
@@ -517,7 +556,12 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
 	offset = ctx->tail_call_indirect_label - (prog + 2 - start);
 	EMIT2(X86_JE, offset);                    /* je out */
 
-	pop_callee_regs(&prog, callee_regs_used);
+	if (bpf_prog->aux->exception_boundary) {
+		pop_callee_regs(&prog, all_callee_regs_used);
+		pop_r12(&prog);
+	} else {
+		pop_callee_regs(&prog, callee_regs_used);
+	}
 
 	EMIT1(0x58);                              /* pop rax */
 	if (stack_depth)
@@ -541,7 +585,8 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
 	*pprog = prog;
 }
 
-static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
+static void emit_bpf_tail_call_direct(struct bpf_prog *bpf_prog,
+				      struct bpf_jit_poke_descriptor *poke,
 				      u8 **pprog, u8 *ip,
 				      bool *callee_regs_used, u32 stack_depth,
 				      struct jit_context *ctx)
@@ -570,7 +615,13 @@ static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
 	emit_jump(&prog, (u8 *)poke->tailcall_target + X86_PATCH_SIZE,
 		  poke->tailcall_bypass);
 
-	pop_callee_regs(&prog, callee_regs_used);
+	if (bpf_prog->aux->exception_boundary) {
+		pop_callee_regs(&prog, all_callee_regs_used);
+		pop_r12(&prog);
+	} else {
+		pop_callee_regs(&prog, callee_regs_used);
+	}
+
 	EMIT1(0x58);                                  /* pop rax */
 	if (stack_depth)
 		EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8));
@@ -1018,6 +1069,10 @@ static void emit_shiftx(u8 **pprog, u32 dst_reg, u8 src_reg, bool is64, u8 op)
 
 #define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp)))
 
+/* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */
+#define RESTORE_TAIL_CALL_CNT(stack)				\
+	EMIT3_off32(0x48, 0x8B, 0x85, -round_up(stack, 8) - 8)
+
 static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image,
 		  int oldproglen, struct jit_context *ctx, bool jmp_padding)
 {
@@ -1041,8 +1096,20 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
 
 	emit_prologue(&prog, bpf_prog->aux->stack_depth,
 		      bpf_prog_was_classic(bpf_prog), tail_call_reachable,
-		      bpf_prog->aux->func_idx != 0);
-	push_callee_regs(&prog, callee_regs_used);
+		      bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb);
+	/* Exception callback will clobber callee regs for its own use, and
+	 * restore the original callee regs from main prog's stack frame.
+	 */
+	if (bpf_prog->aux->exception_boundary) {
+		/* We also need to save r12, which is not mapped to any BPF
+		 * register, as we throw after entry into the kernel, which may
+		 * overwrite r12.
+		 */
+		push_r12(&prog);
+		push_callee_regs(&prog, all_callee_regs_used);
+	} else {
+		push_callee_regs(&prog, callee_regs_used);
+	}
 
 	ilen = prog - temp;
 	if (rw_image)
@@ -1623,9 +1690,7 @@ st:			if (is_imm8(insn->off))
 
 			func = (u8 *) __bpf_call_base + imm32;
 			if (tail_call_reachable) {
-				/* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */
-				EMIT3_off32(0x48, 0x8B, 0x85,
-					    -round_up(bpf_prog->aux->stack_depth, 8) - 8);
+				RESTORE_TAIL_CALL_CNT(bpf_prog->aux->stack_depth);
 				if (!imm32)
 					return -EINVAL;
 				offs = 7 + x86_call_depth_emit_accounting(&prog, func);
@@ -1641,13 +1706,15 @@ st:			if (is_imm8(insn->off))
 
 		case BPF_JMP | BPF_TAIL_CALL:
 			if (imm32)
-				emit_bpf_tail_call_direct(&bpf_prog->aux->poke_tab[imm32 - 1],
+				emit_bpf_tail_call_direct(bpf_prog,
+							  &bpf_prog->aux->poke_tab[imm32 - 1],
 							  &prog, image + addrs[i - 1],
 							  callee_regs_used,
 							  bpf_prog->aux->stack_depth,
 							  ctx);
 			else
-				emit_bpf_tail_call_indirect(&prog,
+				emit_bpf_tail_call_indirect(bpf_prog,
+							    &prog,
 							    callee_regs_used,
 							    bpf_prog->aux->stack_depth,
 							    image + addrs[i - 1],
@@ -1900,7 +1967,12 @@ emit_jmp:
 			seen_exit = true;
 			/* Update cleanup_addr */
 			ctx->cleanup_addr = proglen;
-			pop_callee_regs(&prog, callee_regs_used);
+			if (bpf_prog->aux->exception_boundary) {
+				pop_callee_regs(&prog, all_callee_regs_used);
+				pop_r12(&prog);
+			} else {
+				pop_callee_regs(&prog, callee_regs_used);
+			}
 			EMIT1(0xC9);         /* leave */
 			emit_return(&prog, image + addrs[i - 1] + (prog - temp));
 			break;
@@ -2400,6 +2472,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
 	 *                     [ ...        ]
 	 *                     [ stack_arg2 ]
 	 * RBP - arg_stack_off [ stack_arg1 ]
+	 * RSP                 [ tail_call_cnt ] BPF_TRAMP_F_TAIL_CALL_CTX
 	 */
 
 	/* room for return value of orig_call or fentry prog */
@@ -2464,6 +2537,8 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
 	else
 		/* sub rsp, stack_size */
 		EMIT4(0x48, 0x83, 0xEC, stack_size);
+	if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
+		EMIT1(0x50);		/* push rax */
 	/* mov QWORD PTR [rbp - rbx_off], rbx */
 	emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_6, -rbx_off);
 
@@ -2516,9 +2591,15 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
 		restore_regs(m, &prog, regs_off);
 		save_args(m, &prog, arg_stack_off, true);
 
+		if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
+			/* Before calling the original function, restore the
+			 * tail_call_cnt from stack to rax.
+			 */
+			RESTORE_TAIL_CALL_CNT(stack_size);
+
 		if (flags & BPF_TRAMP_F_ORIG_STACK) {
-			emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, 8);
-			EMIT2(0xff, 0xd0); /* call *rax */
+			emit_ldx(&prog, BPF_DW, BPF_REG_6, BPF_REG_FP, 8);
+			EMIT2(0xff, 0xd3); /* call *rbx */
 		} else {
 			/* call original function */
 			if (emit_rsb_call(&prog, orig_call, prog)) {
@@ -2569,7 +2650,12 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
 			ret = -EINVAL;
 			goto cleanup;
 		}
-	}
+	} else if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
+		/* Before running the original function, restore the
+		 * tail_call_cnt from stack to rax.
+		 */
+		RESTORE_TAIL_CALL_CNT(stack_size);
+
 	/* restore return value of orig_call or fentry prog back into RAX */
 	if (save_ret)
 		emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
@@ -2913,3 +2999,29 @@ void bpf_jit_free(struct bpf_prog *prog)
 
 	bpf_prog_unlock_free(prog);
 }
+
+bool bpf_jit_supports_exceptions(void)
+{
+	/* We unwind through both kernel frames (starting from within bpf_throw
+	 * call) and BPF frames. Therefore we require ORC unwinder to be enabled
+	 * to walk kernel frames and reach BPF frames in the stack trace.
+	 */
+	return IS_ENABLED(CONFIG_UNWINDER_ORC);
+}
+
+void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie)
+{
+#if defined(CONFIG_UNWINDER_ORC)
+	struct unwind_state state;
+	unsigned long addr;
+
+	for (unwind_start(&state, current, NULL, NULL); !unwind_done(&state);
+	     unwind_next_frame(&state)) {
+		addr = unwind_get_return_address(&state);
+		if (!addr || !consume_fn(cookie, (u64)addr, (u64)state.sp, (u64)state.bp))
+			break;
+	}
+	return;
+#endif
+	WARN(1, "verification of programs using bpf_throw should have failed\n");
+}
diff --git a/drivers/Kconfig b/drivers/Kconfig
index efb66e25fa2d..8ba3e8b9ad72 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -243,4 +243,6 @@ source "drivers/hte/Kconfig"
 
 source "drivers/cdx/Kconfig"
 
+source "drivers/dpll/Kconfig"
+
 endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index 1bec7819a837..722d15be0eb7 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -197,5 +197,6 @@ obj-$(CONFIG_PECI)		+= peci/
 obj-$(CONFIG_HTE)		+= hte/
 obj-$(CONFIG_DRM_ACCEL)		+= accel/
 obj-$(CONFIG_CDX_BUS)		+= cdx/
+obj-$(CONFIG_DPLL)		+= dpll/
 
 obj-$(CONFIG_S390)		+= s390/
diff --git a/drivers/dpll/Kconfig b/drivers/dpll/Kconfig
new file mode 100644
index 000000000000..a4cae73f20d3
--- /dev/null
+++ b/drivers/dpll/Kconfig
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Generic DPLL drivers configuration
+#
+
+config DPLL
+  bool
diff --git a/drivers/dpll/Makefile b/drivers/dpll/Makefile
new file mode 100644
index 000000000000..2e5b27850110
--- /dev/null
+++ b/drivers/dpll/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for DPLL drivers.
+#
+
+obj-$(CONFIG_DPLL)      += dpll.o
+dpll-y                  += dpll_core.o
+dpll-y                  += dpll_netlink.o
+dpll-y                  += dpll_nl.o
diff --git a/drivers/dpll/dpll_core.c b/drivers/dpll/dpll_core.c
new file mode 100644
index 000000000000..3568149b9562
--- /dev/null
+++ b/drivers/dpll/dpll_core.c
@@ -0,0 +1,798 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  dpll_core.c - DPLL subsystem kernel-space interface implementation.
+ *
+ *  Copyright (c) 2023 Meta Platforms, Inc. and affiliates
+ *  Copyright (c) 2023 Intel Corporation.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+#include "dpll_core.h"
+#include "dpll_netlink.h"
+
+/* Mutex lock to protect DPLL subsystem devices and pins */
+DEFINE_MUTEX(dpll_lock);
+
+DEFINE_XARRAY_FLAGS(dpll_device_xa, XA_FLAGS_ALLOC);
+DEFINE_XARRAY_FLAGS(dpll_pin_xa, XA_FLAGS_ALLOC);
+
+static u32 dpll_xa_id;
+
+#define ASSERT_DPLL_REGISTERED(d)	\
+	WARN_ON_ONCE(!xa_get_mark(&dpll_device_xa, (d)->id, DPLL_REGISTERED))
+#define ASSERT_DPLL_NOT_REGISTERED(d)	\
+	WARN_ON_ONCE(xa_get_mark(&dpll_device_xa, (d)->id, DPLL_REGISTERED))
+#define ASSERT_PIN_REGISTERED(p)	\
+	WARN_ON_ONCE(!xa_get_mark(&dpll_pin_xa, (p)->id, DPLL_REGISTERED))
+
+struct dpll_device_registration {
+	struct list_head list;
+	const struct dpll_device_ops *ops;
+	void *priv;
+};
+
+struct dpll_pin_registration {
+	struct list_head list;
+	const struct dpll_pin_ops *ops;
+	void *priv;
+};
+
+struct dpll_device *dpll_device_get_by_id(int id)
+{
+	if (xa_get_mark(&dpll_device_xa, id, DPLL_REGISTERED))
+		return xa_load(&dpll_device_xa, id);
+
+	return NULL;
+}
+
+static struct dpll_pin_registration *
+dpll_pin_registration_find(struct dpll_pin_ref *ref,
+			   const struct dpll_pin_ops *ops, void *priv)
+{
+	struct dpll_pin_registration *reg;
+
+	list_for_each_entry(reg, &ref->registration_list, list) {
+		if (reg->ops == ops && reg->priv == priv)
+			return reg;
+	}
+	return NULL;
+}
+
+static int
+dpll_xa_ref_pin_add(struct xarray *xa_pins, struct dpll_pin *pin,
+		    const struct dpll_pin_ops *ops, void *priv)
+{
+	struct dpll_pin_registration *reg;
+	struct dpll_pin_ref *ref;
+	bool ref_exists = false;
+	unsigned long i;
+	int ret;
+
+	xa_for_each(xa_pins, i, ref) {
+		if (ref->pin != pin)
+			continue;
+		reg = dpll_pin_registration_find(ref, ops, priv);
+		if (reg) {
+			refcount_inc(&ref->refcount);
+			return 0;
+		}
+		ref_exists = true;
+		break;
+	}
+
+	if (!ref_exists) {
+		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
+		if (!ref)
+			return -ENOMEM;
+		ref->pin = pin;
+		INIT_LIST_HEAD(&ref->registration_list);
+		ret = xa_insert(xa_pins, pin->pin_idx, ref, GFP_KERNEL);
+		if (ret) {
+			kfree(ref);
+			return ret;
+		}
+		refcount_set(&ref->refcount, 1);
+	}
+
+	reg = kzalloc(sizeof(*reg), GFP_KERNEL);
+	if (!reg) {
+		if (!ref_exists) {
+			xa_erase(xa_pins, pin->pin_idx);
+			kfree(ref);
+		}
+		return -ENOMEM;
+	}
+	reg->ops = ops;
+	reg->priv = priv;
+	if (ref_exists)
+		refcount_inc(&ref->refcount);
+	list_add_tail(&reg->list, &ref->registration_list);
+
+	return 0;
+}
+
+static int dpll_xa_ref_pin_del(struct xarray *xa_pins, struct dpll_pin *pin,
+			       const struct dpll_pin_ops *ops, void *priv)
+{
+	struct dpll_pin_registration *reg;
+	struct dpll_pin_ref *ref;
+	unsigned long i;
+
+	xa_for_each(xa_pins, i, ref) {
+		if (ref->pin != pin)
+			continue;
+		reg = dpll_pin_registration_find(ref, ops, priv);
+		if (WARN_ON(!reg))
+			return -EINVAL;
+		if (refcount_dec_and_test(&ref->refcount)) {
+			list_del(&reg->list);
+			kfree(reg);
+			xa_erase(xa_pins, i);
+			WARN_ON(!list_empty(&ref->registration_list));
+			kfree(ref);
+		}
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static int
+dpll_xa_ref_dpll_add(struct xarray *xa_dplls, struct dpll_device *dpll,
+		     const struct dpll_pin_ops *ops, void *priv)
+{
+	struct dpll_pin_registration *reg;
+	struct dpll_pin_ref *ref;
+	bool ref_exists = false;
+	unsigned long i;
+	int ret;
+
+	xa_for_each(xa_dplls, i, ref) {
+		if (ref->dpll != dpll)
+			continue;
+		reg = dpll_pin_registration_find(ref, ops, priv);
+		if (reg) {
+			refcount_inc(&ref->refcount);
+			return 0;
+		}
+		ref_exists = true;
+		break;
+	}
+
+	if (!ref_exists) {
+		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
+		if (!ref)
+			return -ENOMEM;
+		ref->dpll = dpll;
+		INIT_LIST_HEAD(&ref->registration_list);
+		ret = xa_insert(xa_dplls, dpll->id, ref, GFP_KERNEL);
+		if (ret) {
+			kfree(ref);
+			return ret;
+		}
+		refcount_set(&ref->refcount, 1);
+	}
+
+	reg = kzalloc(sizeof(*reg), GFP_KERNEL);
+	if (!reg) {
+		if (!ref_exists) {
+			xa_erase(xa_dplls, dpll->id);
+			kfree(ref);
+		}
+		return -ENOMEM;
+	}
+	reg->ops = ops;
+	reg->priv = priv;
+	if (ref_exists)
+		refcount_inc(&ref->refcount);
+	list_add_tail(&reg->list, &ref->registration_list);
+
+	return 0;
+}
+
+static void
+dpll_xa_ref_dpll_del(struct xarray *xa_dplls, struct dpll_device *dpll,
+		     const struct dpll_pin_ops *ops, void *priv)
+{
+	struct dpll_pin_registration *reg;
+	struct dpll_pin_ref *ref;
+	unsigned long i;
+
+	xa_for_each(xa_dplls, i, ref) {
+		if (ref->dpll != dpll)
+			continue;
+		reg = dpll_pin_registration_find(ref, ops, priv);
+		if (WARN_ON(!reg))
+			return;
+		if (refcount_dec_and_test(&ref->refcount)) {
+			list_del(&reg->list);
+			kfree(reg);
+			xa_erase(xa_dplls, i);
+			WARN_ON(!list_empty(&ref->registration_list));
+			kfree(ref);
+		}
+		return;
+	}
+}
+
+struct dpll_pin_ref *dpll_xa_ref_dpll_first(struct xarray *xa_refs)
+{
+	struct dpll_pin_ref *ref;
+	unsigned long i = 0;
+
+	ref = xa_find(xa_refs, &i, ULONG_MAX, XA_PRESENT);
+	WARN_ON(!ref);
+	return ref;
+}
+
+static struct dpll_device *
+dpll_device_alloc(const u64 clock_id, u32 device_idx, struct module *module)
+{
+	struct dpll_device *dpll;
+	int ret;
+
+	dpll = kzalloc(sizeof(*dpll), GFP_KERNEL);
+	if (!dpll)
+		return ERR_PTR(-ENOMEM);
+	refcount_set(&dpll->refcount, 1);
+	INIT_LIST_HEAD(&dpll->registration_list);
+	dpll->device_idx = device_idx;
+	dpll->clock_id = clock_id;
+	dpll->module = module;
+	ret = xa_alloc_cyclic(&dpll_device_xa, &dpll->id, dpll, xa_limit_32b,
+			      &dpll_xa_id, GFP_KERNEL);
+	if (ret < 0) {
+		kfree(dpll);
+		return ERR_PTR(ret);
+	}
+	xa_init_flags(&dpll->pin_refs, XA_FLAGS_ALLOC);
+
+	return dpll;
+}
+
+/**
+ * dpll_device_get - find existing or create new dpll device
+ * @clock_id: clock_id of creator
+ * @device_idx: idx given by device driver
+ * @module: reference to registering module
+ *
+ * Get existing object of a dpll device, unique for given arguments.
+ * Create new if doesn't exist yet.
+ *
+ * Context: Acquires a lock (dpll_lock)
+ * Return:
+ * * valid dpll_device struct pointer if succeeded
+ * * ERR_PTR(X) - error
+ */
+struct dpll_device *
+dpll_device_get(u64 clock_id, u32 device_idx, struct module *module)
+{
+	struct dpll_device *dpll, *ret = NULL;
+	unsigned long index;
+
+	mutex_lock(&dpll_lock);
+	xa_for_each(&dpll_device_xa, index, dpll) {
+		if (dpll->clock_id == clock_id &&
+		    dpll->device_idx == device_idx &&
+		    dpll->module == module) {
+			ret = dpll;
+			refcount_inc(&ret->refcount);
+			break;
+		}
+	}
+	if (!ret)
+		ret = dpll_device_alloc(clock_id, device_idx, module);
+	mutex_unlock(&dpll_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dpll_device_get);
+
+/**
+ * dpll_device_put - decrease the refcount and free memory if possible
+ * @dpll: dpll_device struct pointer
+ *
+ * Context: Acquires a lock (dpll_lock)
+ * Drop reference for a dpll device, if all references are gone, delete
+ * dpll device object.
+ */
+void dpll_device_put(struct dpll_device *dpll)
+{
+	mutex_lock(&dpll_lock);
+	if (refcount_dec_and_test(&dpll->refcount)) {
+		ASSERT_DPLL_NOT_REGISTERED(dpll);
+		WARN_ON_ONCE(!xa_empty(&dpll->pin_refs));
+		xa_destroy(&dpll->pin_refs);
+		xa_erase(&dpll_device_xa, dpll->id);
+		WARN_ON(!list_empty(&dpll->registration_list));
+		kfree(dpll);
+	}
+	mutex_unlock(&dpll_lock);
+}
+EXPORT_SYMBOL_GPL(dpll_device_put);
+
+static struct dpll_device_registration *
+dpll_device_registration_find(struct dpll_device *dpll,
+			      const struct dpll_device_ops *ops, void *priv)
+{
+	struct dpll_device_registration *reg;
+
+	list_for_each_entry(reg, &dpll->registration_list, list) {
+		if (reg->ops == ops && reg->priv == priv)
+			return reg;
+	}
+	return NULL;
+}
+
+/**
+ * dpll_device_register - register the dpll device in the subsystem
+ * @dpll: pointer to a dpll
+ * @type: type of a dpll
+ * @ops: ops for a dpll device
+ * @priv: pointer to private information of owner
+ *
+ * Make dpll device available for user space.
+ *
+ * Context: Acquires a lock (dpll_lock)
+ * Return:
+ * * 0 on success
+ * * negative - error value
+ */
+int dpll_device_register(struct dpll_device *dpll, enum dpll_type type,
+			 const struct dpll_device_ops *ops, void *priv)
+{
+	struct dpll_device_registration *reg;
+	bool first_registration = false;
+
+	if (WARN_ON(!ops))
+		return -EINVAL;
+	if (WARN_ON(!ops->mode_get))
+		return -EINVAL;
+	if (WARN_ON(!ops->lock_status_get))
+		return -EINVAL;
+	if (WARN_ON(type < DPLL_TYPE_PPS || type > DPLL_TYPE_MAX))
+		return -EINVAL;
+
+	mutex_lock(&dpll_lock);
+	reg = dpll_device_registration_find(dpll, ops, priv);
+	if (reg) {
+		mutex_unlock(&dpll_lock);
+		return -EEXIST;
+	}
+
+	reg = kzalloc(sizeof(*reg), GFP_KERNEL);
+	if (!reg) {
+		mutex_unlock(&dpll_lock);
+		return -ENOMEM;
+	}
+	reg->ops = ops;
+	reg->priv = priv;
+	dpll->type = type;
+	first_registration = list_empty(&dpll->registration_list);
+	list_add_tail(&reg->list, &dpll->registration_list);
+	if (!first_registration) {
+		mutex_unlock(&dpll_lock);
+		return 0;
+	}
+
+	xa_set_mark(&dpll_device_xa, dpll->id, DPLL_REGISTERED);
+	dpll_device_create_ntf(dpll);
+	mutex_unlock(&dpll_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dpll_device_register);
+
+/**
+ * dpll_device_unregister - unregister dpll device
+ * @dpll: registered dpll pointer
+ * @ops: ops for a dpll device
+ * @priv: pointer to private information of owner
+ *
+ * Unregister device, make it unavailable for userspace.
+ * Note: It does not free the memory
+ * Context: Acquires a lock (dpll_lock)
+ */
+void dpll_device_unregister(struct dpll_device *dpll,
+			    const struct dpll_device_ops *ops, void *priv)
+{
+	struct dpll_device_registration *reg;
+
+	mutex_lock(&dpll_lock);
+	ASSERT_DPLL_REGISTERED(dpll);
+	dpll_device_delete_ntf(dpll);
+	reg = dpll_device_registration_find(dpll, ops, priv);
+	if (WARN_ON(!reg)) {
+		mutex_unlock(&dpll_lock);
+		return;
+	}
+	list_del(&reg->list);
+	kfree(reg);
+
+	if (!list_empty(&dpll->registration_list)) {
+		mutex_unlock(&dpll_lock);
+		return;
+	}
+	xa_clear_mark(&dpll_device_xa, dpll->id, DPLL_REGISTERED);
+	mutex_unlock(&dpll_lock);
+}
+EXPORT_SYMBOL_GPL(dpll_device_unregister);
+
+static struct dpll_pin *
+dpll_pin_alloc(u64 clock_id, u32 pin_idx, struct module *module,
+	       const struct dpll_pin_properties *prop)
+{
+	struct dpll_pin *pin;
+	int ret;
+
+	pin = kzalloc(sizeof(*pin), GFP_KERNEL);
+	if (!pin)
+		return ERR_PTR(-ENOMEM);
+	pin->pin_idx = pin_idx;
+	pin->clock_id = clock_id;
+	pin->module = module;
+	if (WARN_ON(prop->type < DPLL_PIN_TYPE_MUX ||
+		    prop->type > DPLL_PIN_TYPE_MAX)) {
+		ret = -EINVAL;
+		goto err;
+	}
+	pin->prop = prop;
+	refcount_set(&pin->refcount, 1);
+	xa_init_flags(&pin->dpll_refs, XA_FLAGS_ALLOC);
+	xa_init_flags(&pin->parent_refs, XA_FLAGS_ALLOC);
+	ret = xa_alloc(&dpll_pin_xa, &pin->id, pin, xa_limit_16b, GFP_KERNEL);
+	if (ret)
+		goto err;
+	return pin;
+err:
+	xa_destroy(&pin->dpll_refs);
+	xa_destroy(&pin->parent_refs);
+	kfree(pin);
+	return ERR_PTR(ret);
+}
+
+/**
+ * dpll_pin_get - find existing or create new dpll pin
+ * @clock_id: clock_id of creator
+ * @pin_idx: idx given by dev driver
+ * @module: reference to registering module
+ * @prop: dpll pin properties
+ *
+ * Get existing object of a pin (unique for given arguments) or create new
+ * if doesn't exist yet.
+ *
+ * Context: Acquires a lock (dpll_lock)
+ * Return:
+ * * valid allocated dpll_pin struct pointer if succeeded
+ * * ERR_PTR(X) - error
+ */
+struct dpll_pin *
+dpll_pin_get(u64 clock_id, u32 pin_idx, struct module *module,
+	     const struct dpll_pin_properties *prop)
+{
+	struct dpll_pin *pos, *ret = NULL;
+	unsigned long i;
+
+	mutex_lock(&dpll_lock);
+	xa_for_each(&dpll_pin_xa, i, pos) {
+		if (pos->clock_id == clock_id &&
+		    pos->pin_idx == pin_idx &&
+		    pos->module == module) {
+			ret = pos;
+			refcount_inc(&ret->refcount);
+			break;
+		}
+	}
+	if (!ret)
+		ret = dpll_pin_alloc(clock_id, pin_idx, module, prop);
+	mutex_unlock(&dpll_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dpll_pin_get);
+
+/**
+ * dpll_pin_put - decrease the refcount and free memory if possible
+ * @pin: pointer to a pin to be put
+ *
+ * Drop reference for a pin, if all references are gone, delete pin object.
+ *
+ * Context: Acquires a lock (dpll_lock)
+ */
+void dpll_pin_put(struct dpll_pin *pin)
+{
+	mutex_lock(&dpll_lock);
+	if (refcount_dec_and_test(&pin->refcount)) {
+		xa_destroy(&pin->dpll_refs);
+		xa_destroy(&pin->parent_refs);
+		xa_erase(&dpll_pin_xa, pin->id);
+		kfree(pin);
+	}
+	mutex_unlock(&dpll_lock);
+}
+EXPORT_SYMBOL_GPL(dpll_pin_put);
+
+static int
+__dpll_pin_register(struct dpll_device *dpll, struct dpll_pin *pin,
+		    const struct dpll_pin_ops *ops, void *priv)
+{
+	int ret;
+
+	ret = dpll_xa_ref_pin_add(&dpll->pin_refs, pin, ops, priv);
+	if (ret)
+		return ret;
+	ret = dpll_xa_ref_dpll_add(&pin->dpll_refs, dpll, ops, priv);
+	if (ret)
+		goto ref_pin_del;
+	xa_set_mark(&dpll_pin_xa, pin->id, DPLL_REGISTERED);
+	dpll_pin_create_ntf(pin);
+
+	return ret;
+
+ref_pin_del:
+	dpll_xa_ref_pin_del(&dpll->pin_refs, pin, ops, priv);
+	return ret;
+}
+
+/**
+ * dpll_pin_register - register the dpll pin in the subsystem
+ * @dpll: pointer to a dpll
+ * @pin: pointer to a dpll pin
+ * @ops: ops for a dpll pin ops
+ * @priv: pointer to private information of owner
+ *
+ * Context: Acquires a lock (dpll_lock)
+ * Return:
+ * * 0 on success
+ * * negative - error value
+ */
+int
+dpll_pin_register(struct dpll_device *dpll, struct dpll_pin *pin,
+		  const struct dpll_pin_ops *ops, void *priv)
+{
+	int ret;
+
+	if (WARN_ON(!ops) ||
+	    WARN_ON(!ops->state_on_dpll_get) ||
+	    WARN_ON(!ops->direction_get))
+		return -EINVAL;
+	if (ASSERT_DPLL_REGISTERED(dpll))
+		return -EINVAL;
+
+	mutex_lock(&dpll_lock);
+	if (WARN_ON(!(dpll->module == pin->module &&
+		      dpll->clock_id == pin->clock_id)))
+		ret = -EINVAL;
+	else
+		ret = __dpll_pin_register(dpll, pin, ops, priv);
+	mutex_unlock(&dpll_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dpll_pin_register);
+
+static void
+__dpll_pin_unregister(struct dpll_device *dpll, struct dpll_pin *pin,
+		      const struct dpll_pin_ops *ops, void *priv)
+{
+	dpll_xa_ref_pin_del(&dpll->pin_refs, pin, ops, priv);
+	dpll_xa_ref_dpll_del(&pin->dpll_refs, dpll, ops, priv);
+	if (xa_empty(&pin->dpll_refs))
+		xa_clear_mark(&dpll_pin_xa, pin->id, DPLL_REGISTERED);
+}
+
+/**
+ * dpll_pin_unregister - unregister dpll pin from dpll device
+ * @dpll: registered dpll pointer
+ * @pin: pointer to a pin
+ * @ops: ops for a dpll pin
+ * @priv: pointer to private information of owner
+ *
+ * Note: It does not free the memory
+ * Context: Acquires a lock (dpll_lock)
+ */
+void dpll_pin_unregister(struct dpll_device *dpll, struct dpll_pin *pin,
+			 const struct dpll_pin_ops *ops, void *priv)
+{
+	if (WARN_ON(xa_empty(&dpll->pin_refs)))
+		return;
+	if (WARN_ON(!xa_empty(&pin->parent_refs)))
+		return;
+
+	mutex_lock(&dpll_lock);
+	dpll_pin_delete_ntf(pin);
+	__dpll_pin_unregister(dpll, pin, ops, priv);
+	mutex_unlock(&dpll_lock);
+}
+EXPORT_SYMBOL_GPL(dpll_pin_unregister);
+
+/**
+ * dpll_pin_on_pin_register - register a pin with a parent pin
+ * @parent: pointer to a parent pin
+ * @pin: pointer to a pin
+ * @ops: ops for a dpll pin
+ * @priv: pointer to private information of owner
+ *
+ * Register a pin with a parent pin, create references between them and
+ * between newly registered pin and dplls connected with a parent pin.
+ *
+ * Context: Acquires a lock (dpll_lock)
+ * Return:
+ * * 0 on success
+ * * negative - error value
+ */
+int dpll_pin_on_pin_register(struct dpll_pin *parent, struct dpll_pin *pin,
+			     const struct dpll_pin_ops *ops, void *priv)
+{
+	struct dpll_pin_ref *ref;
+	unsigned long i, stop;
+	int ret;
+
+	if (WARN_ON(parent->prop->type != DPLL_PIN_TYPE_MUX))
+		return -EINVAL;
+
+	if (WARN_ON(!ops) ||
+	    WARN_ON(!ops->state_on_pin_get) ||
+	    WARN_ON(!ops->direction_get))
+		return -EINVAL;
+	if (ASSERT_PIN_REGISTERED(parent))
+		return -EINVAL;
+
+	mutex_lock(&dpll_lock);
+	ret = dpll_xa_ref_pin_add(&pin->parent_refs, parent, ops, priv);
+	if (ret)
+		goto unlock;
+	refcount_inc(&pin->refcount);
+	xa_for_each(&parent->dpll_refs, i, ref) {
+		ret = __dpll_pin_register(ref->dpll, pin, ops, priv);
+		if (ret) {
+			stop = i;
+			goto dpll_unregister;
+		}
+		dpll_pin_create_ntf(pin);
+	}
+	mutex_unlock(&dpll_lock);
+
+	return ret;
+
+dpll_unregister:
+	xa_for_each(&parent->dpll_refs, i, ref)
+		if (i < stop) {
+			__dpll_pin_unregister(ref->dpll, pin, ops, priv);
+			dpll_pin_delete_ntf(pin);
+		}
+	refcount_dec(&pin->refcount);
+	dpll_xa_ref_pin_del(&pin->parent_refs, parent, ops, priv);
+unlock:
+	mutex_unlock(&dpll_lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dpll_pin_on_pin_register);
+
+/**
+ * dpll_pin_on_pin_unregister - unregister dpll pin from a parent pin
+ * @parent: pointer to a parent pin
+ * @pin: pointer to a pin
+ * @ops: ops for a dpll pin
+ * @priv: pointer to private information of owner
+ *
+ * Context: Acquires a lock (dpll_lock)
+ * Note: It does not free the memory
+ */
+void dpll_pin_on_pin_unregister(struct dpll_pin *parent, struct dpll_pin *pin,
+				const struct dpll_pin_ops *ops, void *priv)
+{
+	struct dpll_pin_ref *ref;
+	unsigned long i;
+
+	mutex_lock(&dpll_lock);
+	dpll_pin_delete_ntf(pin);
+	dpll_xa_ref_pin_del(&pin->parent_refs, parent, ops, priv);
+	refcount_dec(&pin->refcount);
+	xa_for_each(&pin->dpll_refs, i, ref)
+		__dpll_pin_unregister(ref->dpll, pin, ops, priv);
+	mutex_unlock(&dpll_lock);
+}
+EXPORT_SYMBOL_GPL(dpll_pin_on_pin_unregister);
+
+static struct dpll_device_registration *
+dpll_device_registration_first(struct dpll_device *dpll)
+{
+	struct dpll_device_registration *reg;
+
+	reg = list_first_entry_or_null((struct list_head *)&dpll->registration_list,
+				       struct dpll_device_registration, list);
+	WARN_ON(!reg);
+	return reg;
+}
+
+void *dpll_priv(struct dpll_device *dpll)
+{
+	struct dpll_device_registration *reg;
+
+	reg = dpll_device_registration_first(dpll);
+	return reg->priv;
+}
+
+const struct dpll_device_ops *dpll_device_ops(struct dpll_device *dpll)
+{
+	struct dpll_device_registration *reg;
+
+	reg = dpll_device_registration_first(dpll);
+	return reg->ops;
+}
+
+static struct dpll_pin_registration *
+dpll_pin_registration_first(struct dpll_pin_ref *ref)
+{
+	struct dpll_pin_registration *reg;
+
+	reg = list_first_entry_or_null(&ref->registration_list,
+				       struct dpll_pin_registration, list);
+	WARN_ON(!reg);
+	return reg;
+}
+
+void *dpll_pin_on_dpll_priv(struct dpll_device *dpll,
+			    struct dpll_pin *pin)
+{
+	struct dpll_pin_registration *reg;
+	struct dpll_pin_ref *ref;
+
+	ref = xa_load(&dpll->pin_refs, pin->pin_idx);
+	if (!ref)
+		return NULL;
+	reg = dpll_pin_registration_first(ref);
+	return reg->priv;
+}
+
+void *dpll_pin_on_pin_priv(struct dpll_pin *parent,
+			   struct dpll_pin *pin)
+{
+	struct dpll_pin_registration *reg;
+	struct dpll_pin_ref *ref;
+
+	ref = xa_load(&pin->parent_refs, parent->pin_idx);
+	if (!ref)
+		return NULL;
+	reg = dpll_pin_registration_first(ref);
+	return reg->priv;
+}
+
+const struct dpll_pin_ops *dpll_pin_ops(struct dpll_pin_ref *ref)
+{
+	struct dpll_pin_registration *reg;
+
+	reg = dpll_pin_registration_first(ref);
+	return reg->ops;
+}
+
+static int __init dpll_init(void)
+{
+	int ret;
+
+	ret = genl_register_family(&dpll_nl_family);
+	if (ret)
+		goto error;
+
+	return 0;
+
+error:
+	mutex_destroy(&dpll_lock);
+	return ret;
+}
+
+static void __exit dpll_exit(void)
+{
+	genl_unregister_family(&dpll_nl_family);
+	mutex_destroy(&dpll_lock);
+}
+
+subsys_initcall(dpll_init);
+module_exit(dpll_exit);
diff --git a/drivers/dpll/dpll_core.h b/drivers/dpll/dpll_core.h
new file mode 100644
index 000000000000..5585873c5c1b
--- /dev/null
+++ b/drivers/dpll/dpll_core.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  Copyright (c) 2023 Meta Platforms, Inc. and affiliates
+ *  Copyright (c) 2023 Intel and affiliates
+ */
+
+#ifndef __DPLL_CORE_H__
+#define __DPLL_CORE_H__
+
+#include <linux/dpll.h>
+#include <linux/list.h>
+#include <linux/refcount.h>
+#include "dpll_nl.h"
+
+#define DPLL_REGISTERED		XA_MARK_1
+
+/**
+ * struct dpll_device - stores DPLL device internal data
+ * @id:			unique id number for device given by dpll subsystem
+ * @device_idx:		id given by dev driver
+ * @clock_id:		unique identifier (clock_id) of a dpll
+ * @module:		module of creator
+ * @type:		type of a dpll
+ * @pin_refs:		stores pins registered within a dpll
+ * @refcount:		refcount
+ * @registration_list:	list of registered ops and priv data of dpll owners
+ **/
+struct dpll_device {
+	u32 id;
+	u32 device_idx;
+	u64 clock_id;
+	struct module *module;
+	enum dpll_type type;
+	struct xarray pin_refs;
+	refcount_t refcount;
+	struct list_head registration_list;
+};
+
+/**
+ * struct dpll_pin - structure for a dpll pin
+ * @id:			unique id number for pin given by dpll subsystem
+ * @pin_idx:		index of a pin given by dev driver
+ * @clock_id:		clock_id of creator
+ * @module:		module of creator
+ * @dpll_refs:		hold referencees to dplls pin was registered with
+ * @parent_refs:	hold references to parent pins pin was registered with
+ * @prop:		pointer to pin properties given by registerer
+ * @rclk_dev_name:	holds name of device when pin can recover clock from it
+ * @refcount:		refcount
+ **/
+struct dpll_pin {
+	u32 id;
+	u32 pin_idx;
+	u64 clock_id;
+	struct module *module;
+	struct xarray dpll_refs;
+	struct xarray parent_refs;
+	const struct dpll_pin_properties *prop;
+	refcount_t refcount;
+};
+
+/**
+ * struct dpll_pin_ref - structure for referencing either dpll or pins
+ * @dpll:		pointer to a dpll
+ * @pin:		pointer to a pin
+ * @registration_list:	list of ops and priv data registered with the ref
+ * @refcount:		refcount
+ **/
+struct dpll_pin_ref {
+	union {
+		struct dpll_device *dpll;
+		struct dpll_pin *pin;
+	};
+	struct list_head registration_list;
+	refcount_t refcount;
+};
+
+void *dpll_priv(struct dpll_device *dpll);
+void *dpll_pin_on_dpll_priv(struct dpll_device *dpll, struct dpll_pin *pin);
+void *dpll_pin_on_pin_priv(struct dpll_pin *parent, struct dpll_pin *pin);
+
+const struct dpll_device_ops *dpll_device_ops(struct dpll_device *dpll);
+struct dpll_device *dpll_device_get_by_id(int id);
+const struct dpll_pin_ops *dpll_pin_ops(struct dpll_pin_ref *ref);
+struct dpll_pin_ref *dpll_xa_ref_dpll_first(struct xarray *xa_refs);
+extern struct xarray dpll_device_xa;
+extern struct xarray dpll_pin_xa;
+extern struct mutex dpll_lock;
+#endif
diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c
new file mode 100644
index 000000000000..e20daba6896a
--- /dev/null
+++ b/drivers/dpll/dpll_netlink.c
@@ -0,0 +1,1253 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Generic netlink for DPLL management framework
+ *
+ *  Copyright (c) 2023 Meta Platforms, Inc. and affiliates
+ *  Copyright (c) 2023 Intel and affiliates
+ *
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <net/genetlink.h>
+#include "dpll_core.h"
+#include "dpll_netlink.h"
+#include "dpll_nl.h"
+#include <uapi/linux/dpll.h>
+
+#define ASSERT_NOT_NULL(ptr)	(WARN_ON(!ptr))
+
+#define xa_for_each_marked_start(xa, index, entry, filter, start) \
+	for (index = start, entry = xa_find(xa, &index, ULONG_MAX, filter); \
+	     entry; entry = xa_find_after(xa, &index, ULONG_MAX, filter))
+
+struct dpll_dump_ctx {
+	unsigned long idx;
+};
+
+static struct dpll_dump_ctx *dpll_dump_context(struct netlink_callback *cb)
+{
+	return (struct dpll_dump_ctx *)cb->ctx;
+}
+
+static int
+dpll_msg_add_dev_handle(struct sk_buff *msg, struct dpll_device *dpll)
+{
+	if (nla_put_u32(msg, DPLL_A_ID, dpll->id))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+static int
+dpll_msg_add_dev_parent_handle(struct sk_buff *msg, u32 id)
+{
+	if (nla_put_u32(msg, DPLL_A_PIN_PARENT_ID, id))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+/**
+ * dpll_msg_pin_handle_size - get size of pin handle attribute for given pin
+ * @pin: pin pointer
+ *
+ * Return: byte size of pin handle attribute for given pin.
+ */
+size_t dpll_msg_pin_handle_size(struct dpll_pin *pin)
+{
+	return pin ? nla_total_size(4) : 0; /* DPLL_A_PIN_ID */
+}
+EXPORT_SYMBOL_GPL(dpll_msg_pin_handle_size);
+
+/**
+ * dpll_msg_add_pin_handle - attach pin handle attribute to a given message
+ * @msg: pointer to sk_buff message to attach a pin handle
+ * @pin: pin pointer
+ *
+ * Return:
+ * * 0 - success
+ * * -EMSGSIZE - no space in message to attach pin handle
+ */
+int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin)
+{
+	if (!pin)
+		return 0;
+	if (nla_put_u32(msg, DPLL_A_PIN_ID, pin->id))
+		return -EMSGSIZE;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dpll_msg_add_pin_handle);
+
+static int
+dpll_msg_add_mode(struct sk_buff *msg, struct dpll_device *dpll,
+		  struct netlink_ext_ack *extack)
+{
+	const struct dpll_device_ops *ops = dpll_device_ops(dpll);
+	enum dpll_mode mode;
+	int ret;
+
+	ret = ops->mode_get(dpll, dpll_priv(dpll), &mode, extack);
+	if (ret)
+		return ret;
+	if (nla_put_u32(msg, DPLL_A_MODE, mode))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+static int
+dpll_msg_add_mode_supported(struct sk_buff *msg, struct dpll_device *dpll,
+			    struct netlink_ext_ack *extack)
+{
+	const struct dpll_device_ops *ops = dpll_device_ops(dpll);
+	enum dpll_mode mode;
+
+	if (!ops->mode_supported)
+		return 0;
+	for (mode = DPLL_MODE_MANUAL; mode <= DPLL_MODE_MAX; mode++)
+		if (ops->mode_supported(dpll, dpll_priv(dpll), mode, extack))
+			if (nla_put_u32(msg, DPLL_A_MODE_SUPPORTED, mode))
+				return -EMSGSIZE;
+
+	return 0;
+}
+
+static int
+dpll_msg_add_lock_status(struct sk_buff *msg, struct dpll_device *dpll,
+			 struct netlink_ext_ack *extack)
+{
+	const struct dpll_device_ops *ops = dpll_device_ops(dpll);
+	enum dpll_lock_status status;
+	int ret;
+
+	ret = ops->lock_status_get(dpll, dpll_priv(dpll), &status, extack);
+	if (ret)
+		return ret;
+	if (nla_put_u32(msg, DPLL_A_LOCK_STATUS, status))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+static int
+dpll_msg_add_temp(struct sk_buff *msg, struct dpll_device *dpll,
+		  struct netlink_ext_ack *extack)
+{
+	const struct dpll_device_ops *ops = dpll_device_ops(dpll);
+	s32 temp;
+	int ret;
+
+	if (!ops->temp_get)
+		return 0;
+	ret = ops->temp_get(dpll, dpll_priv(dpll), &temp, extack);
+	if (ret)
+		return ret;
+	if (nla_put_s32(msg, DPLL_A_TEMP, temp))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+static int
+dpll_msg_add_pin_prio(struct sk_buff *msg, struct dpll_pin *pin,
+		      struct dpll_pin_ref *ref,
+		      struct netlink_ext_ack *extack)
+{
+	const struct dpll_pin_ops *ops = dpll_pin_ops(ref);
+	struct dpll_device *dpll = ref->dpll;
+	u32 prio;
+	int ret;
+
+	if (!ops->prio_get)
+		return 0;
+	ret = ops->prio_get(pin, dpll_pin_on_dpll_priv(dpll, pin), dpll,
+			    dpll_priv(dpll), &prio, extack);
+	if (ret)
+		return ret;
+	if (nla_put_u32(msg, DPLL_A_PIN_PRIO, prio))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+static int
+dpll_msg_add_pin_on_dpll_state(struct sk_buff *msg, struct dpll_pin *pin,
+			       struct dpll_pin_ref *ref,
+			       struct netlink_ext_ack *extack)
+{
+	const struct dpll_pin_ops *ops = dpll_pin_ops(ref);
+	struct dpll_device *dpll = ref->dpll;
+	enum dpll_pin_state state;
+	int ret;
+
+	if (!ops->state_on_dpll_get)
+		return 0;
+	ret = ops->state_on_dpll_get(pin, dpll_pin_on_dpll_priv(dpll, pin),
+				     dpll, dpll_priv(dpll), &state, extack);
+	if (ret)
+		return ret;
+	if (nla_put_u32(msg, DPLL_A_PIN_STATE, state))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+static int
+dpll_msg_add_pin_direction(struct sk_buff *msg, struct dpll_pin *pin,
+			   struct dpll_pin_ref *ref,
+			   struct netlink_ext_ack *extack)
+{
+	const struct dpll_pin_ops *ops = dpll_pin_ops(ref);
+	struct dpll_device *dpll = ref->dpll;
+	enum dpll_pin_direction direction;
+	int ret;
+
+	ret = ops->direction_get(pin, dpll_pin_on_dpll_priv(dpll, pin), dpll,
+				 dpll_priv(dpll), &direction, extack);
+	if (ret)
+		return ret;
+	if (nla_put_u32(msg, DPLL_A_PIN_DIRECTION, direction))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+static int
+dpll_msg_add_pin_freq(struct sk_buff *msg, struct dpll_pin *pin,
+		      struct dpll_pin_ref *ref, struct netlink_ext_ack *extack)
+{
+	const struct dpll_pin_ops *ops = dpll_pin_ops(ref);
+	struct dpll_device *dpll = ref->dpll;
+	struct nlattr *nest;
+	int fs, ret;
+	u64 freq;
+
+	if (!ops->frequency_get)
+		return 0;
+	ret = ops->frequency_get(pin, dpll_pin_on_dpll_priv(dpll, pin), dpll,
+				 dpll_priv(dpll), &freq, extack);
+	if (ret)
+		return ret;
+	if (nla_put_64bit(msg, DPLL_A_PIN_FREQUENCY, sizeof(freq), &freq,
+			  DPLL_A_PIN_PAD))
+		return -EMSGSIZE;
+	for (fs = 0; fs < pin->prop->freq_supported_num; fs++) {
+		nest = nla_nest_start(msg, DPLL_A_PIN_FREQUENCY_SUPPORTED);
+		if (!nest)
+			return -EMSGSIZE;
+		freq = pin->prop->freq_supported[fs].min;
+		if (nla_put_64bit(msg, DPLL_A_PIN_FREQUENCY_MIN, sizeof(freq),
+				  &freq, DPLL_A_PIN_PAD)) {
+			nla_nest_cancel(msg, nest);
+			return -EMSGSIZE;
+		}
+		freq = pin->prop->freq_supported[fs].max;
+		if (nla_put_64bit(msg, DPLL_A_PIN_FREQUENCY_MAX, sizeof(freq),
+				  &freq, DPLL_A_PIN_PAD)) {
+			nla_nest_cancel(msg, nest);
+			return -EMSGSIZE;
+		}
+		nla_nest_end(msg, nest);
+	}
+
+	return 0;
+}
+
+static bool dpll_pin_is_freq_supported(struct dpll_pin *pin, u32 freq)
+{
+	int fs;
+
+	for (fs = 0; fs < pin->prop->freq_supported_num; fs++)
+		if (freq >= pin->prop->freq_supported[fs].min &&
+		    freq <= pin->prop->freq_supported[fs].max)
+			return true;
+	return false;
+}
+
+static int
+dpll_msg_add_pin_parents(struct sk_buff *msg, struct dpll_pin *pin,
+			 struct dpll_pin_ref *dpll_ref,
+			 struct netlink_ext_ack *extack)
+{
+	enum dpll_pin_state state;
+	struct dpll_pin_ref *ref;
+	struct dpll_pin *ppin;
+	struct nlattr *nest;
+	unsigned long index;
+	int ret;
+
+	xa_for_each(&pin->parent_refs, index, ref) {
+		const struct dpll_pin_ops *ops = dpll_pin_ops(ref);
+		void *parent_priv;
+
+		ppin = ref->pin;
+		parent_priv = dpll_pin_on_dpll_priv(dpll_ref->dpll, ppin);
+		ret = ops->state_on_pin_get(pin,
+					    dpll_pin_on_pin_priv(ppin, pin),
+					    ppin, parent_priv, &state, extack);
+		if (ret)
+			return ret;
+		nest = nla_nest_start(msg, DPLL_A_PIN_PARENT_PIN);
+		if (!nest)
+			return -EMSGSIZE;
+		ret = dpll_msg_add_dev_parent_handle(msg, ppin->id);
+		if (ret)
+			goto nest_cancel;
+		if (nla_put_u32(msg, DPLL_A_PIN_STATE, state)) {
+			ret = -EMSGSIZE;
+			goto nest_cancel;
+		}
+		nla_nest_end(msg, nest);
+	}
+
+	return 0;
+
+nest_cancel:
+	nla_nest_cancel(msg, nest);
+	return ret;
+}
+
+static int
+dpll_msg_add_pin_dplls(struct sk_buff *msg, struct dpll_pin *pin,
+		       struct netlink_ext_ack *extack)
+{
+	struct dpll_pin_ref *ref;
+	struct nlattr *attr;
+	unsigned long index;
+	int ret;
+
+	xa_for_each(&pin->dpll_refs, index, ref) {
+		attr = nla_nest_start(msg, DPLL_A_PIN_PARENT_DEVICE);
+		if (!attr)
+			return -EMSGSIZE;
+		ret = dpll_msg_add_dev_parent_handle(msg, ref->dpll->id);
+		if (ret)
+			goto nest_cancel;
+		ret = dpll_msg_add_pin_on_dpll_state(msg, pin, ref, extack);
+		if (ret)
+			goto nest_cancel;
+		ret = dpll_msg_add_pin_prio(msg, pin, ref, extack);
+		if (ret)
+			goto nest_cancel;
+		ret = dpll_msg_add_pin_direction(msg, pin, ref, extack);
+		if (ret)
+			goto nest_cancel;
+		nla_nest_end(msg, attr);
+	}
+
+	return 0;
+
+nest_cancel:
+	nla_nest_end(msg, attr);
+	return ret;
+}
+
+static int
+dpll_cmd_pin_get_one(struct sk_buff *msg, struct dpll_pin *pin,
+		     struct netlink_ext_ack *extack)
+{
+	const struct dpll_pin_properties *prop = pin->prop;
+	struct dpll_pin_ref *ref;
+	int ret;
+
+	ref = dpll_xa_ref_dpll_first(&pin->dpll_refs);
+	ASSERT_NOT_NULL(ref);
+
+	ret = dpll_msg_add_pin_handle(msg, pin);
+	if (ret)
+		return ret;
+	if (nla_put_string(msg, DPLL_A_PIN_MODULE_NAME,
+			   module_name(pin->module)))
+		return -EMSGSIZE;
+	if (nla_put_64bit(msg, DPLL_A_PIN_CLOCK_ID, sizeof(pin->clock_id),
+			  &pin->clock_id, DPLL_A_PIN_PAD))
+		return -EMSGSIZE;
+	if (prop->board_label &&
+	    nla_put_string(msg, DPLL_A_PIN_BOARD_LABEL, prop->board_label))
+		return -EMSGSIZE;
+	if (prop->panel_label &&
+	    nla_put_string(msg, DPLL_A_PIN_PANEL_LABEL, prop->panel_label))
+		return -EMSGSIZE;
+	if (prop->package_label &&
+	    nla_put_string(msg, DPLL_A_PIN_PACKAGE_LABEL,
+			   prop->package_label))
+		return -EMSGSIZE;
+	if (nla_put_u32(msg, DPLL_A_PIN_TYPE, prop->type))
+		return -EMSGSIZE;
+	if (nla_put_u32(msg, DPLL_A_PIN_CAPABILITIES, prop->capabilities))
+		return -EMSGSIZE;
+	ret = dpll_msg_add_pin_freq(msg, pin, ref, extack);
+	if (ret)
+		return ret;
+	if (xa_empty(&pin->parent_refs))
+		ret = dpll_msg_add_pin_dplls(msg, pin, extack);
+	else
+		ret = dpll_msg_add_pin_parents(msg, pin, ref, extack);
+
+	return ret;
+}
+
+static int
+dpll_device_get_one(struct dpll_device *dpll, struct sk_buff *msg,
+		    struct netlink_ext_ack *extack)
+{
+	int ret;
+
+	ret = dpll_msg_add_dev_handle(msg, dpll);
+	if (ret)
+		return ret;
+	if (nla_put_string(msg, DPLL_A_MODULE_NAME, module_name(dpll->module)))
+		return -EMSGSIZE;
+	if (nla_put_64bit(msg, DPLL_A_CLOCK_ID, sizeof(dpll->clock_id),
+			  &dpll->clock_id, DPLL_A_PAD))
+		return -EMSGSIZE;
+	ret = dpll_msg_add_temp(msg, dpll, extack);
+	if (ret)
+		return ret;
+	ret = dpll_msg_add_lock_status(msg, dpll, extack);
+	if (ret)
+		return ret;
+	ret = dpll_msg_add_mode(msg, dpll, extack);
+	if (ret)
+		return ret;
+	ret = dpll_msg_add_mode_supported(msg, dpll, extack);
+	if (ret)
+		return ret;
+	if (nla_put_u32(msg, DPLL_A_TYPE, dpll->type))
+		return -EMSGSIZE;
+
+	return ret;
+}
+
+static int
+dpll_device_event_send(enum dpll_cmd event, struct dpll_device *dpll)
+{
+	struct sk_buff *msg;
+	int ret = -ENOMEM;
+	void *hdr;
+
+	if (WARN_ON(!xa_get_mark(&dpll_device_xa, dpll->id, DPLL_REGISTERED)))
+		return -ENODEV;
+	msg = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+	hdr = genlmsg_put(msg, 0, 0, &dpll_nl_family, 0, event);
+	if (!hdr)
+		goto err_free_msg;
+	ret = dpll_device_get_one(dpll, msg, NULL);
+	if (ret)
+		goto err_cancel_msg;
+	genlmsg_end(msg, hdr);
+	genlmsg_multicast(&dpll_nl_family, msg, 0, 0, GFP_KERNEL);
+
+	return 0;
+
+err_cancel_msg:
+	genlmsg_cancel(msg, hdr);
+err_free_msg:
+	nlmsg_free(msg);
+
+	return ret;
+}
+
+int dpll_device_create_ntf(struct dpll_device *dpll)
+{
+	return dpll_device_event_send(DPLL_CMD_DEVICE_CREATE_NTF, dpll);
+}
+
+int dpll_device_delete_ntf(struct dpll_device *dpll)
+{
+	return dpll_device_event_send(DPLL_CMD_DEVICE_DELETE_NTF, dpll);
+}
+
+static int
+__dpll_device_change_ntf(struct dpll_device *dpll)
+{
+	return dpll_device_event_send(DPLL_CMD_DEVICE_CHANGE_NTF, dpll);
+}
+
+/**
+ * dpll_device_change_ntf - notify that the dpll device has been changed
+ * @dpll: registered dpll pointer
+ *
+ * Context: acquires and holds a dpll_lock.
+ * Return: 0 if succeeds, error code otherwise.
+ */
+int dpll_device_change_ntf(struct dpll_device *dpll)
+{
+	int ret;
+
+	mutex_lock(&dpll_lock);
+	ret = __dpll_device_change_ntf(dpll);
+	mutex_unlock(&dpll_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dpll_device_change_ntf);
+
+static int
+dpll_pin_event_send(enum dpll_cmd event, struct dpll_pin *pin)
+{
+	struct sk_buff *msg;
+	int ret = -ENOMEM;
+	void *hdr;
+
+	if (WARN_ON(!xa_get_mark(&dpll_pin_xa, pin->id, DPLL_REGISTERED)))
+		return -ENODEV;
+
+	msg = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	hdr = genlmsg_put(msg, 0, 0, &dpll_nl_family, 0, event);
+	if (!hdr)
+		goto err_free_msg;
+	ret = dpll_cmd_pin_get_one(msg, pin, NULL);
+	if (ret)
+		goto err_cancel_msg;
+	genlmsg_end(msg, hdr);
+	genlmsg_multicast(&dpll_nl_family, msg, 0, 0, GFP_KERNEL);
+
+	return 0;
+
+err_cancel_msg:
+	genlmsg_cancel(msg, hdr);
+err_free_msg:
+	nlmsg_free(msg);
+
+	return ret;
+}
+
+int dpll_pin_create_ntf(struct dpll_pin *pin)
+{
+	return dpll_pin_event_send(DPLL_CMD_PIN_CREATE_NTF, pin);
+}
+
+int dpll_pin_delete_ntf(struct dpll_pin *pin)
+{
+	return dpll_pin_event_send(DPLL_CMD_PIN_DELETE_NTF, pin);
+}
+
+static int __dpll_pin_change_ntf(struct dpll_pin *pin)
+{
+	return dpll_pin_event_send(DPLL_CMD_PIN_CHANGE_NTF, pin);
+}
+
+/**
+ * dpll_pin_change_ntf - notify that the pin has been changed
+ * @pin: registered pin pointer
+ *
+ * Context: acquires and holds a dpll_lock.
+ * Return: 0 if succeeds, error code otherwise.
+ */
+int dpll_pin_change_ntf(struct dpll_pin *pin)
+{
+	int ret;
+
+	mutex_lock(&dpll_lock);
+	ret = __dpll_pin_change_ntf(pin);
+	mutex_unlock(&dpll_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dpll_pin_change_ntf);
+
+static int
+dpll_pin_freq_set(struct dpll_pin *pin, struct nlattr *a,
+		  struct netlink_ext_ack *extack)
+{
+	u64 freq = nla_get_u64(a);
+	struct dpll_pin_ref *ref;
+	unsigned long i;
+	int ret;
+
+	if (!dpll_pin_is_freq_supported(pin, freq)) {
+		NL_SET_ERR_MSG_ATTR(extack, a, "frequency is not supported by the device");
+		return -EINVAL;
+	}
+
+	xa_for_each(&pin->dpll_refs, i, ref) {
+		const struct dpll_pin_ops *ops = dpll_pin_ops(ref);
+		struct dpll_device *dpll = ref->dpll;
+
+		if (!ops->frequency_set)
+			return -EOPNOTSUPP;
+		ret = ops->frequency_set(pin, dpll_pin_on_dpll_priv(dpll, pin),
+					 dpll, dpll_priv(dpll), freq, extack);
+		if (ret)
+			return ret;
+	}
+	__dpll_pin_change_ntf(pin);
+
+	return 0;
+}
+
+static int
+dpll_pin_on_pin_state_set(struct dpll_pin *pin, u32 parent_idx,
+			  enum dpll_pin_state state,
+			  struct netlink_ext_ack *extack)
+{
+	struct dpll_pin_ref *parent_ref;
+	const struct dpll_pin_ops *ops;
+	struct dpll_pin_ref *dpll_ref;
+	void *pin_priv, *parent_priv;
+	struct dpll_pin *parent;
+	unsigned long i;
+	int ret;
+
+	if (!(DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE &
+	      pin->prop->capabilities)) {
+		NL_SET_ERR_MSG(extack, "state changing is not allowed");
+		return -EOPNOTSUPP;
+	}
+	parent = xa_load(&dpll_pin_xa, parent_idx);
+	if (!parent)
+		return -EINVAL;
+	parent_ref = xa_load(&pin->parent_refs, parent->pin_idx);
+	if (!parent_ref)
+		return -EINVAL;
+	xa_for_each(&parent->dpll_refs, i, dpll_ref) {
+		ops = dpll_pin_ops(parent_ref);
+		if (!ops->state_on_pin_set)
+			return -EOPNOTSUPP;
+		pin_priv = dpll_pin_on_pin_priv(parent, pin);
+		parent_priv = dpll_pin_on_dpll_priv(dpll_ref->dpll, parent);
+		ret = ops->state_on_pin_set(pin, pin_priv, parent, parent_priv,
+					    state, extack);
+		if (ret)
+			return ret;
+	}
+	__dpll_pin_change_ntf(pin);
+
+	return 0;
+}
+
+static int
+dpll_pin_state_set(struct dpll_device *dpll, struct dpll_pin *pin,
+		   enum dpll_pin_state state,
+		   struct netlink_ext_ack *extack)
+{
+	const struct dpll_pin_ops *ops;
+	struct dpll_pin_ref *ref;
+	int ret;
+
+	if (!(DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE &
+	      pin->prop->capabilities)) {
+		NL_SET_ERR_MSG(extack, "state changing is not allowed");
+		return -EOPNOTSUPP;
+	}
+	ref = xa_load(&pin->dpll_refs, dpll->id);
+	ASSERT_NOT_NULL(ref);
+	ops = dpll_pin_ops(ref);
+	if (!ops->state_on_dpll_set)
+		return -EOPNOTSUPP;
+	ret = ops->state_on_dpll_set(pin, dpll_pin_on_dpll_priv(dpll, pin),
+				     dpll, dpll_priv(dpll), state, extack);
+	if (ret)
+		return ret;
+	__dpll_pin_change_ntf(pin);
+
+	return 0;
+}
+
+static int
+dpll_pin_prio_set(struct dpll_device *dpll, struct dpll_pin *pin,
+		  u32 prio, struct netlink_ext_ack *extack)
+{
+	const struct dpll_pin_ops *ops;
+	struct dpll_pin_ref *ref;
+	int ret;
+
+	if (!(DPLL_PIN_CAPABILITIES_PRIORITY_CAN_CHANGE &
+	      pin->prop->capabilities)) {
+		NL_SET_ERR_MSG(extack, "prio changing is not allowed");
+		return -EOPNOTSUPP;
+	}
+	ref = xa_load(&pin->dpll_refs, dpll->id);
+	ASSERT_NOT_NULL(ref);
+	ops = dpll_pin_ops(ref);
+	if (!ops->prio_set)
+		return -EOPNOTSUPP;
+	ret = ops->prio_set(pin, dpll_pin_on_dpll_priv(dpll, pin), dpll,
+			    dpll_priv(dpll), prio, extack);
+	if (ret)
+		return ret;
+	__dpll_pin_change_ntf(pin);
+
+	return 0;
+}
+
+static int
+dpll_pin_direction_set(struct dpll_pin *pin, struct dpll_device *dpll,
+		       enum dpll_pin_direction direction,
+		       struct netlink_ext_ack *extack)
+{
+	const struct dpll_pin_ops *ops;
+	struct dpll_pin_ref *ref;
+	int ret;
+
+	if (!(DPLL_PIN_CAPABILITIES_DIRECTION_CAN_CHANGE &
+	      pin->prop->capabilities)) {
+		NL_SET_ERR_MSG(extack, "direction changing is not allowed");
+		return -EOPNOTSUPP;
+	}
+	ref = xa_load(&pin->dpll_refs, dpll->id);
+	ASSERT_NOT_NULL(ref);
+	ops = dpll_pin_ops(ref);
+	if (!ops->direction_set)
+		return -EOPNOTSUPP;
+	ret = ops->direction_set(pin, dpll_pin_on_dpll_priv(dpll, pin),
+				 dpll, dpll_priv(dpll), direction, extack);
+	if (ret)
+		return ret;
+	__dpll_pin_change_ntf(pin);
+
+	return 0;
+}
+
+static int
+dpll_pin_parent_device_set(struct dpll_pin *pin, struct nlattr *parent_nest,
+			   struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[DPLL_A_PIN_MAX + 1];
+	enum dpll_pin_direction direction;
+	enum dpll_pin_state state;
+	struct dpll_pin_ref *ref;
+	struct dpll_device *dpll;
+	u32 pdpll_idx, prio;
+	int ret;
+
+	nla_parse_nested(tb, DPLL_A_PIN_MAX, parent_nest,
+			 dpll_pin_parent_device_nl_policy, extack);
+	if (!tb[DPLL_A_PIN_PARENT_ID]) {
+		NL_SET_ERR_MSG(extack, "device parent id expected");
+		return -EINVAL;
+	}
+	pdpll_idx = nla_get_u32(tb[DPLL_A_PIN_PARENT_ID]);
+	dpll = xa_load(&dpll_device_xa, pdpll_idx);
+	if (!dpll) {
+		NL_SET_ERR_MSG(extack, "parent device not found");
+		return -EINVAL;
+	}
+	ref = xa_load(&pin->dpll_refs, dpll->id);
+	if (!ref) {
+		NL_SET_ERR_MSG(extack, "pin not connected to given parent device");
+		return -EINVAL;
+	}
+	if (tb[DPLL_A_PIN_STATE]) {
+		state = nla_get_u32(tb[DPLL_A_PIN_STATE]);
+		ret = dpll_pin_state_set(dpll, pin, state, extack);
+		if (ret)
+			return ret;
+	}
+	if (tb[DPLL_A_PIN_PRIO]) {
+		prio = nla_get_u32(tb[DPLL_A_PIN_PRIO]);
+		ret = dpll_pin_prio_set(dpll, pin, prio, extack);
+		if (ret)
+			return ret;
+	}
+	if (tb[DPLL_A_PIN_DIRECTION]) {
+		direction = nla_get_u32(tb[DPLL_A_PIN_DIRECTION]);
+		ret = dpll_pin_direction_set(pin, dpll, direction, extack);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+static int
+dpll_pin_parent_pin_set(struct dpll_pin *pin, struct nlattr *parent_nest,
+			struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[DPLL_A_PIN_MAX + 1];
+	enum dpll_pin_state state;
+	u32 ppin_idx;
+	int ret;
+
+	nla_parse_nested(tb, DPLL_A_PIN_MAX, parent_nest,
+			 dpll_pin_parent_pin_nl_policy, extack);
+	if (!tb[DPLL_A_PIN_PARENT_ID]) {
+		NL_SET_ERR_MSG(extack, "device parent id expected");
+		return -EINVAL;
+	}
+	ppin_idx = nla_get_u32(tb[DPLL_A_PIN_PARENT_ID]);
+	state = nla_get_u32(tb[DPLL_A_PIN_STATE]);
+	ret = dpll_pin_on_pin_state_set(pin, ppin_idx, state, extack);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int
+dpll_pin_set_from_nlattr(struct dpll_pin *pin, struct genl_info *info)
+{
+	struct nlattr *a;
+	int rem, ret;
+
+	nla_for_each_attr(a, genlmsg_data(info->genlhdr),
+			  genlmsg_len(info->genlhdr), rem) {
+		switch (nla_type(a)) {
+		case DPLL_A_PIN_FREQUENCY:
+			ret = dpll_pin_freq_set(pin, a, info->extack);
+			if (ret)
+				return ret;
+			break;
+		case DPLL_A_PIN_PARENT_DEVICE:
+			ret = dpll_pin_parent_device_set(pin, a, info->extack);
+			if (ret)
+				return ret;
+			break;
+		case DPLL_A_PIN_PARENT_PIN:
+			ret = dpll_pin_parent_pin_set(pin, a, info->extack);
+			if (ret)
+				return ret;
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static struct dpll_pin *
+dpll_pin_find(u64 clock_id, struct nlattr *mod_name_attr,
+	      enum dpll_pin_type type, struct nlattr *board_label,
+	      struct nlattr *panel_label, struct nlattr *package_label,
+	      struct netlink_ext_ack *extack)
+{
+	bool board_match, panel_match, package_match;
+	struct dpll_pin *pin_match = NULL, *pin;
+	const struct dpll_pin_properties *prop;
+	bool cid_match, mod_match, type_match;
+	unsigned long i;
+
+	xa_for_each_marked(&dpll_pin_xa, i, pin, DPLL_REGISTERED) {
+		prop = pin->prop;
+		cid_match = clock_id ? pin->clock_id == clock_id : true;
+		mod_match = mod_name_attr && module_name(pin->module) ?
+			!nla_strcmp(mod_name_attr,
+				    module_name(pin->module)) : true;
+		type_match = type ? prop->type == type : true;
+		board_match = board_label ? (prop->board_label ?
+			!nla_strcmp(board_label, prop->board_label) : false) :
+			true;
+		panel_match = panel_label ? (prop->panel_label ?
+			!nla_strcmp(panel_label, prop->panel_label) : false) :
+			true;
+		package_match = package_label ? (prop->package_label ?
+			!nla_strcmp(package_label, prop->package_label) :
+			false) : true;
+		if (cid_match && mod_match && type_match && board_match &&
+		    panel_match && package_match) {
+			if (pin_match) {
+				NL_SET_ERR_MSG(extack, "multiple matches");
+				return ERR_PTR(-EINVAL);
+			}
+			pin_match = pin;
+		}
+	}
+	if (!pin_match) {
+		NL_SET_ERR_MSG(extack, "not found");
+		return ERR_PTR(-ENODEV);
+	}
+	return pin_match;
+}
+
+static struct dpll_pin *dpll_pin_find_from_nlattr(struct genl_info *info)
+{
+	struct nlattr *attr, *mod_name_attr = NULL, *board_label_attr = NULL,
+		*panel_label_attr = NULL, *package_label_attr = NULL;
+	enum dpll_pin_type type = 0;
+	u64 clock_id = 0;
+	int rem = 0;
+
+	nla_for_each_attr(attr, genlmsg_data(info->genlhdr),
+			  genlmsg_len(info->genlhdr), rem) {
+		switch (nla_type(attr)) {
+		case DPLL_A_PIN_CLOCK_ID:
+			if (clock_id)
+				goto duplicated_attr;
+			clock_id = nla_get_u64(attr);
+			break;
+		case DPLL_A_PIN_MODULE_NAME:
+			if (mod_name_attr)
+				goto duplicated_attr;
+			mod_name_attr = attr;
+			break;
+		case DPLL_A_PIN_TYPE:
+			if (type)
+				goto duplicated_attr;
+			type = nla_get_u32(attr);
+		break;
+		case DPLL_A_PIN_BOARD_LABEL:
+			if (board_label_attr)
+				goto duplicated_attr;
+			board_label_attr = attr;
+		break;
+		case DPLL_A_PIN_PANEL_LABEL:
+			if (panel_label_attr)
+				goto duplicated_attr;
+			panel_label_attr = attr;
+		break;
+		case DPLL_A_PIN_PACKAGE_LABEL:
+			if (package_label_attr)
+				goto duplicated_attr;
+			package_label_attr = attr;
+		break;
+		default:
+			break;
+		}
+	}
+	if (!(clock_id  || mod_name_attr || board_label_attr ||
+	      panel_label_attr || package_label_attr)) {
+		NL_SET_ERR_MSG(info->extack, "missing attributes");
+		return ERR_PTR(-EINVAL);
+	}
+	return dpll_pin_find(clock_id, mod_name_attr, type, board_label_attr,
+			     panel_label_attr, package_label_attr,
+			     info->extack);
+duplicated_attr:
+	NL_SET_ERR_MSG(info->extack, "duplicated attribute");
+	return ERR_PTR(-EINVAL);
+}
+
+int dpll_nl_pin_id_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+	struct dpll_pin *pin;
+	struct sk_buff *msg;
+	struct nlattr *hdr;
+	int ret;
+
+	msg = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+	hdr = genlmsg_put_reply(msg, info, &dpll_nl_family, 0,
+				DPLL_CMD_PIN_ID_GET);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	pin = dpll_pin_find_from_nlattr(info);
+	if (!IS_ERR(pin)) {
+		ret = dpll_msg_add_pin_handle(msg, pin);
+		if (ret) {
+			nlmsg_free(msg);
+			return ret;
+		}
+	}
+	genlmsg_end(msg, hdr);
+
+	return genlmsg_reply(msg, info);
+}
+
+int dpll_nl_pin_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+	struct dpll_pin *pin = info->user_ptr[0];
+	struct sk_buff *msg;
+	struct nlattr *hdr;
+	int ret;
+
+	if (!pin)
+		return -ENODEV;
+	msg = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+	hdr = genlmsg_put_reply(msg, info, &dpll_nl_family, 0,
+				DPLL_CMD_PIN_GET);
+	if (!hdr)
+		return -EMSGSIZE;
+	ret = dpll_cmd_pin_get_one(msg, pin, info->extack);
+	if (ret) {
+		nlmsg_free(msg);
+		return ret;
+	}
+	genlmsg_end(msg, hdr);
+
+	return genlmsg_reply(msg, info);
+}
+
+int dpll_nl_pin_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct dpll_dump_ctx *ctx = dpll_dump_context(cb);
+	struct dpll_pin *pin;
+	struct nlattr *hdr;
+	unsigned long i;
+	int ret = 0;
+
+	xa_for_each_marked_start(&dpll_pin_xa, i, pin, DPLL_REGISTERED,
+				 ctx->idx) {
+		hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
+				  cb->nlh->nlmsg_seq,
+				  &dpll_nl_family, NLM_F_MULTI,
+				  DPLL_CMD_PIN_GET);
+		if (!hdr) {
+			ret = -EMSGSIZE;
+			break;
+		}
+		ret = dpll_cmd_pin_get_one(skb, pin, cb->extack);
+		if (ret) {
+			genlmsg_cancel(skb, hdr);
+			break;
+		}
+		genlmsg_end(skb, hdr);
+	}
+	if (ret == -EMSGSIZE) {
+		ctx->idx = i;
+		return skb->len;
+	}
+	return ret;
+}
+
+int dpll_nl_pin_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+	struct dpll_pin *pin = info->user_ptr[0];
+
+	return dpll_pin_set_from_nlattr(pin, info);
+}
+
+static struct dpll_device *
+dpll_device_find(u64 clock_id, struct nlattr *mod_name_attr,
+		 enum dpll_type type, struct netlink_ext_ack *extack)
+{
+	struct dpll_device *dpll_match = NULL, *dpll;
+	bool cid_match, mod_match, type_match;
+	unsigned long i;
+
+	xa_for_each_marked(&dpll_device_xa, i, dpll, DPLL_REGISTERED) {
+		cid_match = clock_id ? dpll->clock_id == clock_id : true;
+		mod_match = mod_name_attr ? (module_name(dpll->module) ?
+			!nla_strcmp(mod_name_attr,
+				    module_name(dpll->module)) : false) : true;
+		type_match = type ? dpll->type == type : true;
+		if (cid_match && mod_match && type_match) {
+			if (dpll_match) {
+				NL_SET_ERR_MSG(extack, "multiple matches");
+				return ERR_PTR(-EINVAL);
+			}
+			dpll_match = dpll;
+		}
+	}
+	if (!dpll_match) {
+		NL_SET_ERR_MSG(extack, "not found");
+		return ERR_PTR(-ENODEV);
+	}
+
+	return dpll_match;
+}
+
+static struct dpll_device *
+dpll_device_find_from_nlattr(struct genl_info *info)
+{
+	struct nlattr *attr, *mod_name_attr = NULL;
+	enum dpll_type type = 0;
+	u64 clock_id = 0;
+	int rem = 0;
+
+	nla_for_each_attr(attr, genlmsg_data(info->genlhdr),
+			  genlmsg_len(info->genlhdr), rem) {
+		switch (nla_type(attr)) {
+		case DPLL_A_CLOCK_ID:
+			if (clock_id)
+				goto duplicated_attr;
+			clock_id = nla_get_u64(attr);
+			break;
+		case DPLL_A_MODULE_NAME:
+			if (mod_name_attr)
+				goto duplicated_attr;
+			mod_name_attr = attr;
+			break;
+		case DPLL_A_TYPE:
+			if (type)
+				goto duplicated_attr;
+			type = nla_get_u32(attr);
+			break;
+		default:
+			break;
+		}
+	}
+	if (!clock_id && !mod_name_attr && !type) {
+		NL_SET_ERR_MSG(info->extack, "missing attributes");
+		return ERR_PTR(-EINVAL);
+	}
+	return dpll_device_find(clock_id, mod_name_attr, type, info->extack);
+duplicated_attr:
+	NL_SET_ERR_MSG(info->extack, "duplicated attribute");
+	return ERR_PTR(-EINVAL);
+}
+
+int dpll_nl_device_id_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+	struct dpll_device *dpll;
+	struct sk_buff *msg;
+	struct nlattr *hdr;
+	int ret;
+
+	msg = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+	hdr = genlmsg_put_reply(msg, info, &dpll_nl_family, 0,
+				DPLL_CMD_DEVICE_ID_GET);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	dpll = dpll_device_find_from_nlattr(info);
+	if (!IS_ERR(dpll)) {
+		ret = dpll_msg_add_dev_handle(msg, dpll);
+		if (ret) {
+			nlmsg_free(msg);
+			return ret;
+		}
+	}
+	genlmsg_end(msg, hdr);
+
+	return genlmsg_reply(msg, info);
+}
+
+int dpll_nl_device_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+	struct dpll_device *dpll = info->user_ptr[0];
+	struct sk_buff *msg;
+	struct nlattr *hdr;
+	int ret;
+
+	msg = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+	hdr = genlmsg_put_reply(msg, info, &dpll_nl_family, 0,
+				DPLL_CMD_DEVICE_GET);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	ret = dpll_device_get_one(dpll, msg, info->extack);
+	if (ret) {
+		nlmsg_free(msg);
+		return ret;
+	}
+	genlmsg_end(msg, hdr);
+
+	return genlmsg_reply(msg, info);
+}
+
+int dpll_nl_device_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+	/* placeholder for set command */
+	return 0;
+}
+
+int dpll_nl_device_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct dpll_dump_ctx *ctx = dpll_dump_context(cb);
+	struct dpll_device *dpll;
+	struct nlattr *hdr;
+	unsigned long i;
+	int ret = 0;
+
+	xa_for_each_marked_start(&dpll_device_xa, i, dpll, DPLL_REGISTERED,
+				 ctx->idx) {
+		hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
+				  cb->nlh->nlmsg_seq, &dpll_nl_family,
+				  NLM_F_MULTI, DPLL_CMD_DEVICE_GET);
+		if (!hdr) {
+			ret = -EMSGSIZE;
+			break;
+		}
+		ret = dpll_device_get_one(dpll, skb, cb->extack);
+		if (ret) {
+			genlmsg_cancel(skb, hdr);
+			break;
+		}
+		genlmsg_end(skb, hdr);
+	}
+	if (ret == -EMSGSIZE) {
+		ctx->idx = i;
+		return skb->len;
+	}
+	return ret;
+}
+
+int dpll_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
+		  struct genl_info *info)
+{
+	u32 id;
+
+	if (GENL_REQ_ATTR_CHECK(info, DPLL_A_ID))
+		return -EINVAL;
+
+	mutex_lock(&dpll_lock);
+	id = nla_get_u32(info->attrs[DPLL_A_ID]);
+	info->user_ptr[0] = dpll_device_get_by_id(id);
+	if (!info->user_ptr[0]) {
+		NL_SET_ERR_MSG(info->extack, "device not found");
+		goto unlock;
+	}
+	return 0;
+unlock:
+	mutex_unlock(&dpll_lock);
+	return -ENODEV;
+}
+
+void dpll_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
+		    struct genl_info *info)
+{
+	mutex_unlock(&dpll_lock);
+}
+
+int
+dpll_lock_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
+	       struct genl_info *info)
+{
+	mutex_lock(&dpll_lock);
+
+	return 0;
+}
+
+void
+dpll_unlock_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
+		 struct genl_info *info)
+{
+	mutex_unlock(&dpll_lock);
+}
+
+int dpll_lock_dumpit(struct netlink_callback *cb)
+{
+	mutex_lock(&dpll_lock);
+
+	return 0;
+}
+
+int dpll_unlock_dumpit(struct netlink_callback *cb)
+{
+	mutex_unlock(&dpll_lock);
+
+	return 0;
+}
+
+int dpll_pin_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
+		      struct genl_info *info)
+{
+	int ret;
+
+	mutex_lock(&dpll_lock);
+	if (GENL_REQ_ATTR_CHECK(info, DPLL_A_PIN_ID)) {
+		ret = -EINVAL;
+		goto unlock_dev;
+	}
+	info->user_ptr[0] = xa_load(&dpll_pin_xa,
+				    nla_get_u32(info->attrs[DPLL_A_PIN_ID]));
+	if (!info->user_ptr[0]) {
+		NL_SET_ERR_MSG(info->extack, "pin not found");
+		ret = -ENODEV;
+		goto unlock_dev;
+	}
+
+	return 0;
+
+unlock_dev:
+	mutex_unlock(&dpll_lock);
+	return ret;
+}
+
+void dpll_pin_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
+			struct genl_info *info)
+{
+	mutex_unlock(&dpll_lock);
+}
diff --git a/drivers/dpll/dpll_netlink.h b/drivers/dpll/dpll_netlink.h
new file mode 100644
index 000000000000..a9cfd55f57fc
--- /dev/null
+++ b/drivers/dpll/dpll_netlink.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  Copyright (c) 2023 Meta Platforms, Inc. and affiliates
+ *  Copyright (c) 2023 Intel and affiliates
+ */
+
+int dpll_device_create_ntf(struct dpll_device *dpll);
+
+int dpll_device_delete_ntf(struct dpll_device *dpll);
+
+int dpll_pin_create_ntf(struct dpll_pin *pin);
+
+int dpll_pin_delete_ntf(struct dpll_pin *pin);
diff --git a/drivers/dpll/dpll_nl.c b/drivers/dpll/dpll_nl.c
new file mode 100644
index 000000000000..14064c8c783b
--- /dev/null
+++ b/drivers/dpll/dpll_nl.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+/* Do not edit directly, auto-generated from: */
+/*	Documentation/netlink/specs/dpll.yaml */
+/* YNL-GEN kernel source */
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include "dpll_nl.h"
+
+#include <uapi/linux/dpll.h>
+
+/* Common nested types */
+const struct nla_policy dpll_pin_parent_device_nl_policy[DPLL_A_PIN_STATE + 1] = {
+	[DPLL_A_PIN_PARENT_ID] = { .type = NLA_U32, },
+	[DPLL_A_PIN_DIRECTION] = NLA_POLICY_RANGE(NLA_U32, 1, 2),
+	[DPLL_A_PIN_PRIO] = { .type = NLA_U32, },
+	[DPLL_A_PIN_STATE] = NLA_POLICY_RANGE(NLA_U32, 1, 3),
+};
+
+const struct nla_policy dpll_pin_parent_pin_nl_policy[DPLL_A_PIN_STATE + 1] = {
+	[DPLL_A_PIN_PARENT_ID] = { .type = NLA_U32, },
+	[DPLL_A_PIN_STATE] = NLA_POLICY_RANGE(NLA_U32, 1, 3),
+};
+
+/* DPLL_CMD_DEVICE_ID_GET - do */
+static const struct nla_policy dpll_device_id_get_nl_policy[DPLL_A_TYPE + 1] = {
+	[DPLL_A_MODULE_NAME] = { .type = NLA_NUL_STRING, },
+	[DPLL_A_CLOCK_ID] = { .type = NLA_U64, },
+	[DPLL_A_TYPE] = NLA_POLICY_RANGE(NLA_U32, 1, 2),
+};
+
+/* DPLL_CMD_DEVICE_GET - do */
+static const struct nla_policy dpll_device_get_nl_policy[DPLL_A_ID + 1] = {
+	[DPLL_A_ID] = { .type = NLA_U32, },
+};
+
+/* DPLL_CMD_DEVICE_SET - do */
+static const struct nla_policy dpll_device_set_nl_policy[DPLL_A_ID + 1] = {
+	[DPLL_A_ID] = { .type = NLA_U32, },
+};
+
+/* DPLL_CMD_PIN_ID_GET - do */
+static const struct nla_policy dpll_pin_id_get_nl_policy[DPLL_A_PIN_TYPE + 1] = {
+	[DPLL_A_PIN_MODULE_NAME] = { .type = NLA_NUL_STRING, },
+	[DPLL_A_PIN_CLOCK_ID] = { .type = NLA_U64, },
+	[DPLL_A_PIN_BOARD_LABEL] = { .type = NLA_NUL_STRING, },
+	[DPLL_A_PIN_PANEL_LABEL] = { .type = NLA_NUL_STRING, },
+	[DPLL_A_PIN_PACKAGE_LABEL] = { .type = NLA_NUL_STRING, },
+	[DPLL_A_PIN_TYPE] = NLA_POLICY_RANGE(NLA_U32, 1, 5),
+};
+
+/* DPLL_CMD_PIN_GET - do */
+static const struct nla_policy dpll_pin_get_do_nl_policy[DPLL_A_PIN_ID + 1] = {
+	[DPLL_A_PIN_ID] = { .type = NLA_U32, },
+};
+
+/* DPLL_CMD_PIN_GET - dump */
+static const struct nla_policy dpll_pin_get_dump_nl_policy[DPLL_A_PIN_ID + 1] = {
+	[DPLL_A_PIN_ID] = { .type = NLA_U32, },
+};
+
+/* DPLL_CMD_PIN_SET - do */
+static const struct nla_policy dpll_pin_set_nl_policy[DPLL_A_PIN_PARENT_PIN + 1] = {
+	[DPLL_A_PIN_ID] = { .type = NLA_U32, },
+	[DPLL_A_PIN_FREQUENCY] = { .type = NLA_U64, },
+	[DPLL_A_PIN_DIRECTION] = NLA_POLICY_RANGE(NLA_U32, 1, 2),
+	[DPLL_A_PIN_PRIO] = { .type = NLA_U32, },
+	[DPLL_A_PIN_STATE] = NLA_POLICY_RANGE(NLA_U32, 1, 3),
+	[DPLL_A_PIN_PARENT_DEVICE] = NLA_POLICY_NESTED(dpll_pin_parent_device_nl_policy),
+	[DPLL_A_PIN_PARENT_PIN] = NLA_POLICY_NESTED(dpll_pin_parent_pin_nl_policy),
+};
+
+/* Ops table for dpll */
+static const struct genl_split_ops dpll_nl_ops[] = {
+	{
+		.cmd		= DPLL_CMD_DEVICE_ID_GET,
+		.pre_doit	= dpll_lock_doit,
+		.doit		= dpll_nl_device_id_get_doit,
+		.post_doit	= dpll_unlock_doit,
+		.policy		= dpll_device_id_get_nl_policy,
+		.maxattr	= DPLL_A_TYPE,
+		.flags		= GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+	},
+	{
+		.cmd		= DPLL_CMD_DEVICE_GET,
+		.pre_doit	= dpll_pre_doit,
+		.doit		= dpll_nl_device_get_doit,
+		.post_doit	= dpll_post_doit,
+		.policy		= dpll_device_get_nl_policy,
+		.maxattr	= DPLL_A_ID,
+		.flags		= GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+	},
+	{
+		.cmd	= DPLL_CMD_DEVICE_GET,
+		.start	= dpll_lock_dumpit,
+		.dumpit	= dpll_nl_device_get_dumpit,
+		.done	= dpll_unlock_dumpit,
+		.flags	= GENL_ADMIN_PERM | GENL_CMD_CAP_DUMP,
+	},
+	{
+		.cmd		= DPLL_CMD_DEVICE_SET,
+		.pre_doit	= dpll_pre_doit,
+		.doit		= dpll_nl_device_set_doit,
+		.post_doit	= dpll_post_doit,
+		.policy		= dpll_device_set_nl_policy,
+		.maxattr	= DPLL_A_ID,
+		.flags		= GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+	},
+	{
+		.cmd		= DPLL_CMD_PIN_ID_GET,
+		.pre_doit	= dpll_lock_doit,
+		.doit		= dpll_nl_pin_id_get_doit,
+		.post_doit	= dpll_unlock_doit,
+		.policy		= dpll_pin_id_get_nl_policy,
+		.maxattr	= DPLL_A_PIN_TYPE,
+		.flags		= GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+	},
+	{
+		.cmd		= DPLL_CMD_PIN_GET,
+		.pre_doit	= dpll_pin_pre_doit,
+		.doit		= dpll_nl_pin_get_doit,
+		.post_doit	= dpll_pin_post_doit,
+		.policy		= dpll_pin_get_do_nl_policy,
+		.maxattr	= DPLL_A_PIN_ID,
+		.flags		= GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+	},
+	{
+		.cmd		= DPLL_CMD_PIN_GET,
+		.start		= dpll_lock_dumpit,
+		.dumpit		= dpll_nl_pin_get_dumpit,
+		.done		= dpll_unlock_dumpit,
+		.policy		= dpll_pin_get_dump_nl_policy,
+		.maxattr	= DPLL_A_PIN_ID,
+		.flags		= GENL_ADMIN_PERM | GENL_CMD_CAP_DUMP,
+	},
+	{
+		.cmd		= DPLL_CMD_PIN_SET,
+		.pre_doit	= dpll_pin_pre_doit,
+		.doit		= dpll_nl_pin_set_doit,
+		.post_doit	= dpll_pin_post_doit,
+		.policy		= dpll_pin_set_nl_policy,
+		.maxattr	= DPLL_A_PIN_PARENT_PIN,
+		.flags		= GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+	},
+};
+
+static const struct genl_multicast_group dpll_nl_mcgrps[] = {
+	[DPLL_NLGRP_MONITOR] = { "monitor", },
+};
+
+struct genl_family dpll_nl_family __ro_after_init = {
+	.name		= DPLL_FAMILY_NAME,
+	.version	= DPLL_FAMILY_VERSION,
+	.netnsok	= true,
+	.parallel_ops	= true,
+	.module		= THIS_MODULE,
+	.split_ops	= dpll_nl_ops,
+	.n_split_ops	= ARRAY_SIZE(dpll_nl_ops),
+	.mcgrps		= dpll_nl_mcgrps,
+	.n_mcgrps	= ARRAY_SIZE(dpll_nl_mcgrps),
+};
diff --git a/drivers/dpll/dpll_nl.h b/drivers/dpll/dpll_nl.h
new file mode 100644
index 000000000000..1f67aaed4742
--- /dev/null
+++ b/drivers/dpll/dpll_nl.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* Do not edit directly, auto-generated from: */
+/*	Documentation/netlink/specs/dpll.yaml */
+/* YNL-GEN kernel header */
+
+#ifndef _LINUX_DPLL_GEN_H
+#define _LINUX_DPLL_GEN_H
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include <uapi/linux/dpll.h>
+
+/* Common nested types */
+extern const struct nla_policy dpll_pin_parent_device_nl_policy[DPLL_A_PIN_STATE + 1];
+extern const struct nla_policy dpll_pin_parent_pin_nl_policy[DPLL_A_PIN_STATE + 1];
+
+int dpll_lock_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
+		   struct genl_info *info);
+int dpll_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
+		  struct genl_info *info);
+int dpll_pin_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
+		      struct genl_info *info);
+void
+dpll_unlock_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
+		 struct genl_info *info);
+void
+dpll_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
+	       struct genl_info *info);
+void
+dpll_pin_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
+		   struct genl_info *info);
+int dpll_lock_dumpit(struct netlink_callback *cb);
+int dpll_unlock_dumpit(struct netlink_callback *cb);
+
+int dpll_nl_device_id_get_doit(struct sk_buff *skb, struct genl_info *info);
+int dpll_nl_device_get_doit(struct sk_buff *skb, struct genl_info *info);
+int dpll_nl_device_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
+int dpll_nl_device_set_doit(struct sk_buff *skb, struct genl_info *info);
+int dpll_nl_pin_id_get_doit(struct sk_buff *skb, struct genl_info *info);
+int dpll_nl_pin_get_doit(struct sk_buff *skb, struct genl_info *info);
+int dpll_nl_pin_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
+int dpll_nl_pin_set_doit(struct sk_buff *skb, struct genl_info *info);
+
+enum {
+	DPLL_NLGRP_MONITOR,
+};
+
+extern struct genl_family dpll_nl_family;
+
+#endif /* _LINUX_DPLL_GEN_H */
diff --git a/drivers/net/dsa/b53/b53_mmap.c b/drivers/net/dsa/b53/b53_mmap.c
index 5e39641ea887..3a89349dc918 100644
--- a/drivers/net/dsa/b53/b53_mmap.c
+++ b/drivers/net/dsa/b53/b53_mmap.c
@@ -324,14 +324,12 @@ static int b53_mmap_probe(struct platform_device *pdev)
 	return b53_switch_register(dev);
 }
 
-static int b53_mmap_remove(struct platform_device *pdev)
+static void b53_mmap_remove(struct platform_device *pdev)
 {
 	struct b53_device *dev = platform_get_drvdata(pdev);
 
 	if (dev)
 		b53_switch_remove(dev);
-
-	return 0;
 }
 
 static void b53_mmap_shutdown(struct platform_device *pdev)
@@ -372,7 +370,7 @@ MODULE_DEVICE_TABLE(of, b53_mmap_of_table);
 
 static struct platform_driver b53_mmap_driver = {
 	.probe = b53_mmap_probe,
-	.remove = b53_mmap_remove,
+	.remove_new = b53_mmap_remove,
 	.shutdown = b53_mmap_shutdown,
 	.driver = {
 		.name = "b53-switch",
diff --git a/drivers/net/dsa/b53/b53_srab.c b/drivers/net/dsa/b53/b53_srab.c
index bcb44034404d..f3f95332ff17 100644
--- a/drivers/net/dsa/b53/b53_srab.c
+++ b/drivers/net/dsa/b53/b53_srab.c
@@ -657,17 +657,15 @@ static int b53_srab_probe(struct platform_device *pdev)
 	return b53_switch_register(dev);
 }
 
-static int b53_srab_remove(struct platform_device *pdev)
+static void b53_srab_remove(struct platform_device *pdev)
 {
 	struct b53_device *dev = platform_get_drvdata(pdev);
 
 	if (!dev)
-		return 0;
+		return;
 
 	b53_srab_intr_set(dev->priv, false);
 	b53_switch_remove(dev);
-
-	return 0;
 }
 
 static void b53_srab_shutdown(struct platform_device *pdev)
@@ -684,7 +682,7 @@ static void b53_srab_shutdown(struct platform_device *pdev)
 
 static struct platform_driver b53_srab_driver = {
 	.probe = b53_srab_probe,
-	.remove = b53_srab_remove,
+	.remove_new = b53_srab_remove,
 	.shutdown = b53_srab_shutdown,
 	.driver = {
 		.name = "b53-srab-switch",
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index cd1f240c90f3..326937e91f52 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -1543,12 +1543,12 @@ out_clk:
 	return ret;
 }
 
-static int bcm_sf2_sw_remove(struct platform_device *pdev)
+static void bcm_sf2_sw_remove(struct platform_device *pdev)
 {
 	struct bcm_sf2_priv *priv = platform_get_drvdata(pdev);
 
 	if (!priv)
-		return 0;
+		return;
 
 	priv->wol_ports_mask = 0;
 	/* Disable interrupts */
@@ -1560,8 +1560,6 @@ static int bcm_sf2_sw_remove(struct platform_device *pdev)
 	clk_disable_unprepare(priv->clk);
 	if (priv->type == BCM7278_DEVICE_ID)
 		reset_control_assert(priv->rcdev);
-
-	return 0;
 }
 
 static void bcm_sf2_sw_shutdown(struct platform_device *pdev)
@@ -1607,7 +1605,7 @@ static SIMPLE_DEV_PM_OPS(bcm_sf2_pm_ops,
 
 static struct platform_driver bcm_sf2_driver = {
 	.probe	= bcm_sf2_sw_probe,
-	.remove	= bcm_sf2_sw_remove,
+	.remove_new = bcm_sf2_sw_remove,
 	.shutdown = bcm_sf2_sw_shutdown,
 	.driver = {
 		.name = "brcm-sf2",
diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c
index 11ef1d7ea229..beda1e9d350f 100644
--- a/drivers/net/dsa/hirschmann/hellcreek.c
+++ b/drivers/net/dsa/hirschmann/hellcreek.c
@@ -2060,18 +2060,16 @@ err_ptp_setup:
 	return ret;
 }
 
-static int hellcreek_remove(struct platform_device *pdev)
+static void hellcreek_remove(struct platform_device *pdev)
 {
 	struct hellcreek *hellcreek = platform_get_drvdata(pdev);
 
 	if (!hellcreek)
-		return 0;
+		return;
 
 	hellcreek_hwtstamp_free(hellcreek);
 	hellcreek_ptp_free(hellcreek);
 	dsa_unregister_switch(hellcreek->ds);
-
-	return 0;
 }
 
 static void hellcreek_shutdown(struct platform_device *pdev)
@@ -2107,7 +2105,7 @@ MODULE_DEVICE_TABLE(of, hellcreek_of_match);
 
 static struct platform_driver hellcreek_driver = {
 	.probe	= hellcreek_probe,
-	.remove = hellcreek_remove,
+	.remove_new = hellcreek_remove,
 	.shutdown = hellcreek_shutdown,
 	.driver = {
 		.name = "hellcreek",
diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c
index 3c76a1a14aee..25abf2caf5fb 100644
--- a/drivers/net/dsa/lantiq_gswip.c
+++ b/drivers/net/dsa/lantiq_gswip.c
@@ -2207,13 +2207,13 @@ put_mdio_node:
 	return err;
 }
 
-static int gswip_remove(struct platform_device *pdev)
+static void gswip_remove(struct platform_device *pdev)
 {
 	struct gswip_priv *priv = platform_get_drvdata(pdev);
 	int i;
 
 	if (!priv)
-		return 0;
+		return;
 
 	/* disable the switch */
 	gswip_mdio_mask(priv, GSWIP_MDIO_GLOB_ENABLE, 0, GSWIP_MDIO_GLOB);
@@ -2228,8 +2228,6 @@ static int gswip_remove(struct platform_device *pdev)
 
 	for (i = 0; i < priv->num_gphy_fw; i++)
 		gswip_gphy_fw_remove(priv, &priv->gphy_fw[i]);
-
-	return 0;
 }
 
 static void gswip_shutdown(struct platform_device *pdev)
@@ -2266,7 +2264,7 @@ MODULE_DEVICE_TABLE(of, gswip_of_match);
 
 static struct platform_driver gswip_driver = {
 	.probe = gswip_probe,
-	.remove = gswip_remove,
+	.remove_new = gswip_remove,
 	.shutdown = gswip_shutdown,
 	.driver = {
 		.name = "gswip",
diff --git a/drivers/net/dsa/microchip/Makefile b/drivers/net/dsa/microchip/Makefile
index 48360cc9fc68..49459a50dbc8 100644
--- a/drivers/net/dsa/microchip/Makefile
+++ b/drivers/net/dsa/microchip/Makefile
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_NET_DSA_MICROCHIP_KSZ_COMMON)	+= ksz_switch.o
 ksz_switch-objs := ksz_common.o
-ksz_switch-objs += ksz9477.o
+ksz_switch-objs += ksz9477.o ksz9477_acl.o ksz9477_tc_flower.o
 ksz_switch-objs += ksz8795.o
 ksz_switch-objs += lan937x_main.o
 
diff --git a/drivers/net/dsa/microchip/ksz8795_reg.h b/drivers/net/dsa/microchip/ksz8795_reg.h
index 7a57c6088f80..d33db4f86c64 100644
--- a/drivers/net/dsa/microchip/ksz8795_reg.h
+++ b/drivers/net/dsa/microchip/ksz8795_reg.h
@@ -442,20 +442,6 @@
 #define TOS_PRIO_M			KS_PRIO_M
 #define TOS_PRIO_S			KS_PRIO_S
 
-#define REG_SW_CTRL_20			0xA3
-
-#define SW_GMII_DRIVE_STRENGTH_S	4
-#define SW_DRIVE_STRENGTH_M		0x7
-#define SW_DRIVE_STRENGTH_2MA		0
-#define SW_DRIVE_STRENGTH_4MA		1
-#define SW_DRIVE_STRENGTH_8MA		2
-#define SW_DRIVE_STRENGTH_12MA		3
-#define SW_DRIVE_STRENGTH_16MA		4
-#define SW_DRIVE_STRENGTH_20MA		5
-#define SW_DRIVE_STRENGTH_24MA		6
-#define SW_DRIVE_STRENGTH_28MA		7
-#define SW_MII_DRIVE_STRENGTH_S		0
-
 #define REG_SW_CTRL_21			0xA4
 
 #define SW_IPV6_MLD_OPTION		BIT(3)
diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c
index 83b7f2d5c1ea..9e63ed820c92 100644
--- a/drivers/net/dsa/microchip/ksz9477.c
+++ b/drivers/net/dsa/microchip/ksz9477.c
@@ -1004,6 +1004,8 @@ void ksz9477_port_setup(struct ksz_device *dev, int port, bool cpu_port)
 	/* clear pending interrupts */
 	if (dev->info->internal_phy[port])
 		ksz_pread16(dev, port, REG_PORT_PHY_INT_ENABLE, &data16);
+
+	ksz9477_port_acl_init(dev, port);
 }
 
 void ksz9477_config_cpu_port(struct dsa_switch *ds)
diff --git a/drivers/net/dsa/microchip/ksz9477.h b/drivers/net/dsa/microchip/ksz9477.h
index a6f425866a29..d7bbd9bd8893 100644
--- a/drivers/net/dsa/microchip/ksz9477.h
+++ b/drivers/net/dsa/microchip/ksz9477.h
@@ -57,4 +57,40 @@ int ksz9477_switch_init(struct ksz_device *dev);
 void ksz9477_switch_exit(struct ksz_device *dev);
 void ksz9477_port_queue_split(struct ksz_device *dev, int port);
 
+int ksz9477_port_acl_init(struct ksz_device *dev, int port);
+void ksz9477_port_acl_free(struct ksz_device *dev, int port);
+int ksz9477_cls_flower_add(struct dsa_switch *ds, int port,
+			   struct flow_cls_offload *cls, bool ingress);
+int ksz9477_cls_flower_del(struct dsa_switch *ds, int port,
+			   struct flow_cls_offload *cls, bool ingress);
+
+#define KSZ9477_ACL_ENTRY_SIZE		18
+#define KSZ9477_ACL_MAX_ENTRIES		16
+
+struct ksz9477_acl_entry {
+	u8 entry[KSZ9477_ACL_ENTRY_SIZE];
+	unsigned long cookie;
+	u32 prio;
+};
+
+struct ksz9477_acl_entries {
+	struct ksz9477_acl_entry entries[KSZ9477_ACL_MAX_ENTRIES];
+	int entries_count;
+};
+
+struct ksz9477_acl_priv {
+	struct ksz9477_acl_entries acles;
+};
+
+void ksz9477_acl_remove_entries(struct ksz_device *dev, int port,
+				struct ksz9477_acl_entries *acles,
+				unsigned long cookie);
+int ksz9477_acl_write_list(struct ksz_device *dev, int port);
+int ksz9477_sort_acl_entries(struct ksz_device *dev, int port);
+void ksz9477_acl_action_rule_cfg(u8 *entry, bool force_prio, u8 prio_val);
+void ksz9477_acl_processing_rule_set_action(u8 *entry, u8 action_idx);
+void ksz9477_acl_match_process_l2(struct ksz_device *dev, int port,
+				  u16 ethtype, u8 *src_mac, u8 *dst_mac,
+				  unsigned long cookie, u32 prio);
+
 #endif
diff --git a/drivers/net/dsa/microchip/ksz9477_acl.c b/drivers/net/dsa/microchip/ksz9477_acl.c
new file mode 100644
index 000000000000..06d74c19eb94
--- /dev/null
+++ b/drivers/net/dsa/microchip/ksz9477_acl.c
@@ -0,0 +1,1436 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2023 Pengutronix, Oleksij Rempel <kernel@pengutronix.de>
+
+/* Access Control List (ACL) structure:
+ *
+ * There are multiple groups of registers involved in ACL configuration:
+ *
+ * - Matching Rules: These registers define the criteria for matching incoming
+ *   packets based on their header information (Layer 2 MAC, Layer 3 IP, or
+ *   Layer 4 TCP/UDP). Different register settings are used depending on the
+ *   matching rule mode (MD) and the Enable (ENB) settings.
+ *
+ * - Action Rules: These registers define how the ACL should modify the packet's
+ *   priority, VLAN tag priority, and forwarding map once a matching rule has
+ *   been triggered. The settings vary depending on whether the matching rule is
+ *   in Count Mode (MD = 01 and ENB = 00) or not.
+ *
+ * - Processing Rules: These registers control the overall behavior of the ACL,
+ *   such as selecting which matching rule to apply first, enabling/disabling
+ *   specific rules, or specifying actions for matched packets.
+ *
+ * ACL Structure:
+ *                             +----------------------+
+ * +----------------------+    |    (optional)        |
+ * |    Matching Rules    |    |    Matching Rules    |
+ * |    (Layer 2, 3, 4)   |    |    (Layer 2, 3, 4)   |
+ * +----------------------+    +----------------------+
+ *             |                            |
+ *             \___________________________/
+ *                          v
+ *               +----------------------+
+ *               |   Processing Rules   |
+ *               | (action idx,         |
+ *               | matching rule set)   |
+ *               +----------------------+
+ *                          |
+ *                          v
+ *               +----------------------+
+ *               |    Action Rules      |
+ *               | (Modify Priority,    |
+ *               |  Forwarding Map,     |
+ *               |  VLAN tag, etc)      |
+ *               +----------------------+
+ */
+
+#include <linux/bitops.h>
+
+#include "ksz9477.h"
+#include "ksz9477_reg.h"
+#include "ksz_common.h"
+
+#define KSZ9477_PORT_ACL_0		0x600
+
+enum ksz9477_acl_port_access {
+	KSZ9477_ACL_PORT_ACCESS_0  = 0x00,
+	KSZ9477_ACL_PORT_ACCESS_1  = 0x01,
+	KSZ9477_ACL_PORT_ACCESS_2  = 0x02,
+	KSZ9477_ACL_PORT_ACCESS_3  = 0x03,
+	KSZ9477_ACL_PORT_ACCESS_4  = 0x04,
+	KSZ9477_ACL_PORT_ACCESS_5  = 0x05,
+	KSZ9477_ACL_PORT_ACCESS_6  = 0x06,
+	KSZ9477_ACL_PORT_ACCESS_7  = 0x07,
+	KSZ9477_ACL_PORT_ACCESS_8  = 0x08,
+	KSZ9477_ACL_PORT_ACCESS_9  = 0x09,
+	KSZ9477_ACL_PORT_ACCESS_A  = 0x0A,
+	KSZ9477_ACL_PORT_ACCESS_B  = 0x0B,
+	KSZ9477_ACL_PORT_ACCESS_C  = 0x0C,
+	KSZ9477_ACL_PORT_ACCESS_D  = 0x0D,
+	KSZ9477_ACL_PORT_ACCESS_E  = 0x0E,
+	KSZ9477_ACL_PORT_ACCESS_F  = 0x0F,
+	KSZ9477_ACL_PORT_ACCESS_10 = 0x10,
+	KSZ9477_ACL_PORT_ACCESS_11 = 0x11
+};
+
+#define KSZ9477_ACL_MD_MASK			GENMASK(5, 4)
+#define KSZ9477_ACL_MD_DISABLE			0
+#define KSZ9477_ACL_MD_L2_MAC			1
+#define KSZ9477_ACL_MD_L3_IP			2
+#define KSZ9477_ACL_MD_L4_TCP_UDP		3
+
+#define KSZ9477_ACL_ENB_MASK			GENMASK(3, 2)
+#define KSZ9477_ACL_ENB_L2_COUNTER		0
+#define KSZ9477_ACL_ENB_L2_TYPE			1
+#define KSZ9477_ACL_ENB_L2_MAC			2
+#define KSZ9477_ACL_ENB_L2_MAC_TYPE		3
+
+/* only IPv4 src or dst can be used with mask */
+#define KSZ9477_ACL_ENB_L3_IPV4_ADDR_MASK	1
+/* only IPv4 src and dst can be used without mask */
+#define KSZ9477_ACL_ENB_L3_IPV4_ADDR_SRC_DST	2
+
+#define KSZ9477_ACL_ENB_L4_IP_PROTO	        0
+#define KSZ9477_ACL_ENB_L4_TCP_SRC_DST_PORT	1
+#define KSZ9477_ACL_ENB_L4_UDP_SRC_DST_PORT	2
+#define KSZ9477_ACL_ENB_L4_TCP_SEQ_NUMBER	3
+
+#define KSZ9477_ACL_SD_SRC			BIT(1)
+#define KSZ9477_ACL_SD_DST			0
+#define KSZ9477_ACL_EQ_EQUAL			BIT(0)
+#define KSZ9477_ACL_EQ_NOT_EQUAL		0
+
+#define KSZ9477_ACL_PM_M			GENMASK(7, 6)
+#define KSZ9477_ACL_PM_DISABLE			0
+#define KSZ9477_ACL_PM_HIGHER			1
+#define KSZ9477_ACL_PM_LOWER			2
+#define KSZ9477_ACL_PM_REPLACE			3
+#define KSZ9477_ACL_P_M				GENMASK(5, 3)
+
+#define KSZ9477_PORT_ACL_CTRL_0			0x0612
+
+#define KSZ9477_ACL_WRITE_DONE			BIT(6)
+#define KSZ9477_ACL_READ_DONE			BIT(5)
+#define KSZ9477_ACL_WRITE			BIT(4)
+#define KSZ9477_ACL_INDEX_M			GENMASK(3, 0)
+
+/**
+ * ksz9477_dump_acl_index - Print the ACL entry at the specified index
+ *
+ * @dev: Pointer to the ksz9477 device structure.
+ * @acle: Pointer to the ACL entry array.
+ * @index: The index of the ACL entry to print.
+ *
+ * This function prints the details of an ACL entry, located at a particular
+ * index within the ksz9477 device's ACL table. It omits printing entries that
+ * are empty.
+ *
+ * Return: 1 if the entry is non-empty and printed, 0 otherwise.
+ */
+static int ksz9477_dump_acl_index(struct ksz_device *dev,
+				  struct ksz9477_acl_entry *acle, int index)
+{
+	bool empty = true;
+	char buf[64];
+	u8 *entry;
+	int i;
+
+	entry = &acle[index].entry[0];
+	for (i = 0; i <= KSZ9477_ACL_PORT_ACCESS_11; i++) {
+		if (entry[i])
+			empty = false;
+
+		sprintf(buf + (i * 3), "%02x ", entry[i]);
+	}
+
+	/* no need to print empty entries */
+	if (empty)
+		return 0;
+
+	dev_err(dev->dev, " Entry %02d, prio: %02d : %s", index,
+		acle[index].prio, buf);
+
+	return 1;
+}
+
+/**
+ * ksz9477_dump_acl - Print ACL entries
+ *
+ * @dev: Pointer to the device structure.
+ * @acle: Pointer to the ACL entry array.
+ */
+static void ksz9477_dump_acl(struct ksz_device *dev,
+			     struct ksz9477_acl_entry *acle)
+{
+	int count = 0;
+	int i;
+
+	for (i = 0; i < KSZ9477_ACL_MAX_ENTRIES; i++)
+		count += ksz9477_dump_acl_index(dev, acle, i);
+
+	if (count != KSZ9477_ACL_MAX_ENTRIES - 1)
+		dev_err(dev->dev, " Empty ACL entries were skipped\n");
+}
+
+/**
+ * ksz9477_acl_is_valid_matching_rule - Check if an ACL entry contains a valid
+ *					matching rule.
+ *
+ * @entry: Pointer to ACL entry buffer
+ *
+ * This function checks if the given ACL entry buffer contains a valid
+ * matching rule by inspecting the Mode (MD) and Enable (ENB) fields.
+ *
+ * Returns: True if it's a valid matching rule, false otherwise.
+ */
+static bool ksz9477_acl_is_valid_matching_rule(u8 *entry)
+{
+	u8 val1, md, enb;
+
+	val1 = entry[KSZ9477_ACL_PORT_ACCESS_1];
+
+	md = FIELD_GET(KSZ9477_ACL_MD_MASK, val1);
+	if (md == KSZ9477_ACL_MD_DISABLE)
+		return false;
+
+	if (md == KSZ9477_ACL_MD_L2_MAC) {
+		/* L2 counter is not support, so it is not valid rule for now */
+		enb = FIELD_GET(KSZ9477_ACL_ENB_MASK, val1);
+		if (enb == KSZ9477_ACL_ENB_L2_COUNTER)
+			return false;
+	}
+
+	return true;
+}
+
+/**
+ * ksz9477_acl_get_cont_entr - Get count of contiguous ACL entries and validate
+ *                             the matching rules.
+ * @dev: Pointer to the KSZ9477 device structure.
+ * @port: Port number.
+ * @index: Index of the starting ACL entry.
+ *
+ * Based on the KSZ9477 switch's Access Control List (ACL) system, the RuleSet
+ * in an ACL entry indicates which entries contain Matching rules linked to it.
+ * This RuleSet is represented by two registers: KSZ9477_ACL_PORT_ACCESS_E and
+ * KSZ9477_ACL_PORT_ACCESS_F. Each bit set in these registers corresponds to
+ * an entry containing a Matching rule for this RuleSet.
+ *
+ * For a single Matching rule linked, only one bit is set. However, when an
+ * entry links multiple Matching rules, forming what's termed a 'complex rule',
+ * multiple bits are set in these registers.
+ *
+ * This function checks that, for complex rules, the entries containing the
+ * linked Matching rules are contiguous in terms of their indices. It calculates
+ * and returns the number of these contiguous entries.
+ *
+ * Returns:
+ *    - 0 if the entry is empty and can be safely overwritten
+ *    - 1 if the entry represents a simple rule
+ *    - The number of contiguous entries if it is the root entry of a complex
+ *      rule
+ *    - -ENOTEMPTY if the entry is part of a complex rule but not the root
+ *      entry
+ *    - -EINVAL if the validation fails
+ */
+static int ksz9477_acl_get_cont_entr(struct ksz_device *dev, int port,
+				     int index)
+{
+	struct ksz9477_acl_priv *acl = dev->ports[port].acl_priv;
+	struct ksz9477_acl_entries *acles = &acl->acles;
+	int start_idx, end_idx, contiguous_count;
+	unsigned long val;
+	u8 vale, valf;
+	u8 *entry;
+	int i;
+
+	entry = &acles->entries[index].entry[0];
+	vale = entry[KSZ9477_ACL_PORT_ACCESS_E];
+	valf = entry[KSZ9477_ACL_PORT_ACCESS_F];
+
+	val = (vale << 8) | valf;
+
+	/* If no bits are set, return an appropriate value or error */
+	if (!val) {
+		if (ksz9477_acl_is_valid_matching_rule(entry)) {
+			/* Looks like we are about to corrupt some complex rule.
+			 * Do not print an error here, as this is a normal case
+			 * when we are trying to find a free or starting entry.
+			 */
+			dev_dbg(dev->dev, "ACL: entry %d starting with a valid matching rule, but no bits set in RuleSet\n",
+				index);
+			return -ENOTEMPTY;
+		}
+
+		/* This entry does not contain a valid matching rule */
+		return 0;
+	}
+
+	start_idx = find_first_bit((unsigned long *)&val, 16);
+	end_idx = find_last_bit((unsigned long *)&val, 16);
+
+	/* Calculate the contiguous count */
+	contiguous_count = end_idx - start_idx + 1;
+
+	/* Check if the number of bits set in val matches our calculated count */
+	if (contiguous_count != hweight16(val)) {
+		/* Probably we have a fragmented complex rule, which is not
+		 * supported by this driver.
+		 */
+		dev_err(dev->dev, "ACL: number of bits set in RuleSet does not match calculated count\n");
+		return -EINVAL;
+	}
+
+	/* loop over the contiguous entries and check for valid matching rules */
+	for (i = start_idx; i <= end_idx; i++) {
+		u8 *current_entry = &acles->entries[i].entry[0];
+
+		if (!ksz9477_acl_is_valid_matching_rule(current_entry)) {
+			/* we have something linked without a valid matching
+			 * rule. ACL table?
+			 */
+			dev_err(dev->dev, "ACL: entry %d does not contain a valid matching rule\n",
+				i);
+			return -EINVAL;
+		}
+
+		if (i > start_idx) {
+			vale = current_entry[KSZ9477_ACL_PORT_ACCESS_E];
+			valf = current_entry[KSZ9477_ACL_PORT_ACCESS_F];
+			/* Following entry should have empty linkage list */
+			if (vale || valf) {
+				dev_err(dev->dev, "ACL: entry %d has non-empty RuleSet linkage\n",
+					i);
+				return -EINVAL;
+			}
+		}
+	}
+
+	return contiguous_count;
+}
+
+/**
+ * ksz9477_acl_update_linkage - Update the RuleSet linkage for an ACL entry
+ *                              after a move operation.
+ *
+ * @dev: Pointer to the ksz_device.
+ * @entry:   Pointer to the ACL entry array.
+ * @old_idx: The original index of the ACL entry before moving.
+ * @new_idx: The new index of the ACL entry after moving.
+ *
+ * This function updates the RuleSet linkage bits for an ACL entry when
+ * it's moved from one position to another in the ACL table. The RuleSet
+ * linkage is represented by two 8-bit registers, which are combined
+ * into a 16-bit value for easier manipulation. The linkage bits are shifted
+ * based on the difference between the old and new index. If any bits are lost
+ * during the shift operation, an error is returned.
+ *
+ * Note: Fragmentation within a RuleSet is not supported. Hence, entries must
+ * be moved as complete blocks, maintaining the integrity of the RuleSet.
+ *
+ * Returns: 0 on success, or -EINVAL if any RuleSet linkage bits are lost
+ * during the move.
+ */
+static int ksz9477_acl_update_linkage(struct ksz_device *dev, u8 *entry,
+				      u16 old_idx, u16 new_idx)
+{
+	unsigned int original_bit_count;
+	unsigned long rule_linkage;
+	u8 vale, valf, val0;
+	int shift;
+
+	val0 = entry[KSZ9477_ACL_PORT_ACCESS_0];
+	vale = entry[KSZ9477_ACL_PORT_ACCESS_E];
+	valf = entry[KSZ9477_ACL_PORT_ACCESS_F];
+
+	/* Combine the two u8 values into one u16 for easier manipulation */
+	rule_linkage = (vale << 8) | valf;
+	original_bit_count = hweight16(rule_linkage);
+
+	/* Even if HW is able to handle fragmented RuleSet, we don't support it.
+	 * RuleSet is filled only for the first entry of the set.
+	 */
+	if (!rule_linkage)
+		return 0;
+
+	if (val0 != old_idx) {
+		dev_err(dev->dev, "ACL: entry %d has unexpected ActionRule linkage: %d\n",
+			old_idx, val0);
+		return -EINVAL;
+	}
+
+	val0 = new_idx;
+
+	/* Calculate the number of positions to shift */
+	shift = new_idx - old_idx;
+
+	/* Shift the RuleSet */
+	if (shift > 0)
+		rule_linkage <<= shift;
+	else
+		rule_linkage >>= -shift;
+
+	/* Check that no bits were lost in the process */
+	if (original_bit_count != hweight16(rule_linkage)) {
+		dev_err(dev->dev, "ACL RuleSet linkage bits lost during move\n");
+		return -EINVAL;
+	}
+
+	entry[KSZ9477_ACL_PORT_ACCESS_0] = val0;
+
+	/* Update the RuleSet bitfields in the entry */
+	entry[KSZ9477_ACL_PORT_ACCESS_E] = (rule_linkage >> 8) & 0xFF;
+	entry[KSZ9477_ACL_PORT_ACCESS_F] = rule_linkage & 0xFF;
+
+	return 0;
+}
+
+/**
+ * ksz9477_validate_and_get_src_count - Validate source and destination indices
+ *					and determine the source entry count.
+ * @dev: Pointer to the KSZ device structure.
+ * @port: Port number on the KSZ device where the ACL entries reside.
+ * @src_idx: Index of the starting ACL entry that needs to be validated.
+ * @dst_idx: Index of the destination where the source entries are intended to
+ *	     be moved.
+ * @src_count: Pointer to the variable that will hold the number of contiguous
+ *	     source entries if the validation passes.
+ * @dst_count: Pointer to the variable that will hold the number of contiguous
+ *	     destination entries if the validation passes.
+ *
+ * This function performs validation on the source and destination indices
+ * provided for ACL entries. It checks if the indices are within the valid
+ * range, and if the source entries are contiguous. Additionally, the function
+ * ensures that there's adequate space at the destination for the source entries
+ * and that the destination index isn't in the middle of a RuleSet. If all
+ * validations pass, the function returns the number of contiguous source and
+ * destination entries.
+ *
+ * Return: 0 on success, otherwise returns a negative error code if any
+ * validation check fails.
+ */
+static int ksz9477_validate_and_get_src_count(struct ksz_device *dev, int port,
+					      int src_idx, int dst_idx,
+					      int *src_count, int *dst_count)
+{
+	int ret;
+
+	if (src_idx >= KSZ9477_ACL_MAX_ENTRIES ||
+	    dst_idx >= KSZ9477_ACL_MAX_ENTRIES) {
+		dev_err(dev->dev, "ACL: invalid entry index\n");
+		return -EINVAL;
+	}
+
+	/* Nothing to do */
+	if (src_idx == dst_idx)
+		return 0;
+
+	/* Validate if the source entries are contiguous */
+	ret = ksz9477_acl_get_cont_entr(dev, port, src_idx);
+	if (ret < 0)
+		return ret;
+	*src_count = ret;
+
+	if (!*src_count) {
+		dev_err(dev->dev, "ACL: source entry is empty\n");
+		return -EINVAL;
+	}
+
+	if (dst_idx + *src_count >= KSZ9477_ACL_MAX_ENTRIES) {
+		dev_err(dev->dev, "ACL: Not enough space at the destination. Move operation will fail.\n");
+		return -EINVAL;
+	}
+
+	/* Validate if the destination entry is empty or not in the middle of
+	 * a RuleSet.
+	 */
+	ret = ksz9477_acl_get_cont_entr(dev, port, dst_idx);
+	if (ret < 0)
+		return ret;
+	*dst_count = ret;
+
+	return 0;
+}
+
+/**
+ * ksz9477_move_entries_downwards - Move a range of ACL entries downwards in
+ *				    the list.
+ * @dev: Pointer to the KSZ device structure.
+ * @acles: Pointer to the structure encapsulating all the ACL entries.
+ * @start_idx: Starting index of the entries to be relocated.
+ * @num_entries_to_move: Number of consecutive entries to be relocated.
+ * @end_idx: Destination index where the first entry should be situated post
+ *           relocation.
+ *
+ * This function is responsible for rearranging a specific block of ACL entries
+ * by shifting them downwards in the list based on the supplied source and
+ * destination indices. It ensures that the linkage between the ACL entries is
+ * maintained accurately after the relocation.
+ *
+ * Return: 0 on successful relocation of entries, otherwise returns a negative
+ * error code.
+ */
+static int ksz9477_move_entries_downwards(struct ksz_device *dev,
+					  struct ksz9477_acl_entries *acles,
+					  u16 start_idx,
+					  u16 num_entries_to_move,
+					  u16 end_idx)
+{
+	struct ksz9477_acl_entry *e;
+	int ret, i;
+
+	for (i = start_idx; i < end_idx; i++) {
+		e = &acles->entries[i];
+		*e = acles->entries[i + num_entries_to_move];
+
+		ret = ksz9477_acl_update_linkage(dev, &e->entry[0],
+						 i + num_entries_to_move, i);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * ksz9477_move_entries_upwards - Move a range of ACL entries upwards in the
+ *				  list.
+ * @dev: Pointer to the KSZ device structure.
+ * @acles: Pointer to the structure holding all the ACL entries.
+ * @start_idx: The starting index of the entries to be moved.
+ * @num_entries_to_move: Number of contiguous entries to be moved.
+ * @target_idx: The destination index where the first entry should be placed
+ *		after moving.
+ *
+ * This function rearranges a chunk of ACL entries by moving them upwards
+ * in the list based on the given source and destination indices. The reordering
+ * process preserves the linkage between entries by updating it accordingly.
+ *
+ * Return: 0 if the entries were successfully moved, otherwise a negative error
+ * code.
+ */
+static int ksz9477_move_entries_upwards(struct ksz_device *dev,
+					struct ksz9477_acl_entries *acles,
+					u16 start_idx, u16 num_entries_to_move,
+					u16 target_idx)
+{
+	struct ksz9477_acl_entry *e;
+	int ret, i, b;
+
+	for (i = start_idx; i > target_idx; i--) {
+		b = i + num_entries_to_move - 1;
+
+		e = &acles->entries[b];
+		*e = acles->entries[i - 1];
+
+		ret = ksz9477_acl_update_linkage(dev, &e->entry[0], i - 1, b);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * ksz9477_acl_move_entries - Move a block of contiguous ACL entries from a
+ *			      source to a destination index.
+ * @dev: Pointer to the KSZ9477 device structure.
+ * @port: Port number.
+ * @src_idx: Index of the starting source ACL entry.
+ * @dst_idx: Index of the starting destination ACL entry.
+ *
+ * This function aims to move a block of contiguous ACL entries from the source
+ * index to the destination index while ensuring the integrity and validity of
+ * the ACL table.
+ *
+ * In case of any errors during the adjustments or copying, the function will
+ * restore the ACL entries to their original state from the backup.
+ *
+ * Return: 0 if the move operation is successful. Returns -EINVAL for validation
+ * errors or other error codes based on specific failure conditions.
+ */
+static int ksz9477_acl_move_entries(struct ksz_device *dev, int port,
+				    u16 src_idx, u16 dst_idx)
+{
+	struct ksz9477_acl_entry buffer[KSZ9477_ACL_MAX_ENTRIES];
+	struct ksz9477_acl_priv *acl = dev->ports[port].acl_priv;
+	struct ksz9477_acl_entries *acles = &acl->acles;
+	int src_count, ret, dst_count;
+
+	ret = ksz9477_validate_and_get_src_count(dev, port, src_idx, dst_idx,
+						 &src_count, &dst_count);
+	if (ret)
+		return ret;
+
+	/* In case dst_index is greater than src_index, we need to adjust the
+	 * destination index to account for the entries that will be moved
+	 * downwards and the size of the entry located at dst_idx.
+	 */
+	if (dst_idx > src_idx)
+		dst_idx = dst_idx + dst_count - src_count;
+
+	/* Copy source block to buffer and update its linkage */
+	for (int i = 0; i < src_count; i++) {
+		buffer[i] = acles->entries[src_idx + i];
+		ret = ksz9477_acl_update_linkage(dev, &buffer[i].entry[0],
+						 src_idx + i, dst_idx + i);
+		if (ret < 0)
+			return ret;
+	}
+
+	/* Adjust other entries and their linkage based on destination */
+	if (dst_idx > src_idx) {
+		ret = ksz9477_move_entries_downwards(dev, acles, src_idx,
+						     src_count, dst_idx);
+	} else {
+		ret = ksz9477_move_entries_upwards(dev, acles, src_idx,
+						   src_count, dst_idx);
+	}
+	if (ret < 0)
+		return ret;
+
+	/* Copy buffer to destination block */
+	for (int i = 0; i < src_count; i++)
+		acles->entries[dst_idx + i] = buffer[i];
+
+	return 0;
+}
+
+/**
+ * ksz9477_get_next_block_start - Identify the starting index of the next ACL
+ *				  block.
+ * @dev: Pointer to the device structure.
+ * @port: The port number on which the ACL entries are being checked.
+ * @start: The starting index from which the search begins.
+ *
+ * This function looks for the next valid ACL block starting from the provided
+ * 'start' index and returns the beginning index of that block. If the block is
+ * invalid or if it reaches the end of the ACL entries without finding another
+ * block, it returns the maximum ACL entries count.
+ *
+ * Returns:
+ *  - The starting index of the next valid ACL block.
+ *  - KSZ9477_ACL_MAX_ENTRIES if no other valid blocks are found after 'start'.
+ *  - A negative error code if an error occurs while checking.
+ */
+static int ksz9477_get_next_block_start(struct ksz_device *dev, int port,
+					int start)
+{
+	int block_size;
+
+	for (int i = start; i < KSZ9477_ACL_MAX_ENTRIES;) {
+		block_size = ksz9477_acl_get_cont_entr(dev, port, i);
+		if (block_size < 0 && block_size != -ENOTEMPTY)
+			return block_size;
+
+		if (block_size > 0)
+			return i;
+
+		i++;
+	}
+	return KSZ9477_ACL_MAX_ENTRIES;
+}
+
+/**
+ * ksz9477_swap_acl_blocks - Swap two ACL blocks
+ * @dev: Pointer to the device structure.
+ * @port: The port number on which the ACL blocks are to be swapped.
+ * @i: The starting index of the first ACL block.
+ * @j: The starting index of the second ACL block.
+ *
+ * This function is used to swap two ACL blocks present at given indices. The
+ * main purpose is to aid in the sorting and reordering of ACL blocks based on
+ * certain criteria, e.g., priority. It checks the validity of the block at
+ * index 'i', ensuring it's not an empty block, and then proceeds to swap it
+ * with the block at index 'j'.
+ *
+ * Returns:
+ *  - 0 on successful swapping of blocks.
+ *  - -EINVAL if the block at index 'i' is empty.
+ *  - A negative error code if any other error occurs during the swap.
+ */
+static int ksz9477_swap_acl_blocks(struct ksz_device *dev, int port, int i,
+				   int j)
+{
+	int ret, current_block_size;
+
+	current_block_size = ksz9477_acl_get_cont_entr(dev, port, i);
+	if (current_block_size < 0)
+		return current_block_size;
+
+	if (!current_block_size) {
+		dev_err(dev->dev, "ACL: swapping empty entry %d\n", i);
+		return -EINVAL;
+	}
+
+	ret = ksz9477_acl_move_entries(dev, port, i, j);
+	if (ret)
+		return ret;
+
+	ret = ksz9477_acl_move_entries(dev, port, j - current_block_size, i);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+/**
+ * ksz9477_sort_acl_entr_no_back - Sort ACL entries for a given port based on
+ *			           priority without backing up entries.
+ * @dev: Pointer to the device structure.
+ * @port: The port number whose ACL entries need to be sorted.
+ *
+ * This function sorts ACL entries of the specified port using a variant of the
+ * bubble sort algorithm. It operates on blocks of ACL entries rather than
+ * individual entries. Each block's starting point is identified and then
+ * compared with subsequent blocks based on their priority. If the current
+ * block has a lower priority than the subsequent block, the two blocks are
+ * swapped.
+ *
+ * This is done in order to maintain an organized order of ACL entries based on
+ * priority, ensuring efficient and predictable ACL rule application.
+ *
+ * Returns:
+ *  - 0 on successful sorting of entries.
+ *  - A negative error code if any issue arises during sorting, e.g.,
+ *    if the function is unable to get the next block start.
+ */
+static int ksz9477_sort_acl_entr_no_back(struct ksz_device *dev, int port)
+{
+	struct ksz9477_acl_priv *acl = dev->ports[port].acl_priv;
+	struct ksz9477_acl_entries *acles = &acl->acles;
+	struct ksz9477_acl_entry *curr, *next;
+	int i, j, ret;
+
+	/* Bubble sort */
+	for (i = 0; i < KSZ9477_ACL_MAX_ENTRIES;) {
+		curr = &acles->entries[i];
+
+		j = ksz9477_get_next_block_start(dev, port, i + 1);
+		if (j < 0)
+			return j;
+
+		while (j < KSZ9477_ACL_MAX_ENTRIES) {
+			next = &acles->entries[j];
+
+			if (curr->prio > next->prio) {
+				ret = ksz9477_swap_acl_blocks(dev, port, i, j);
+				if (ret)
+					return ret;
+			}
+
+			j = ksz9477_get_next_block_start(dev, port, j + 1);
+			if (j < 0)
+				return j;
+		}
+
+		i = ksz9477_get_next_block_start(dev, port, i + 1);
+		if (i < 0)
+			return i;
+	}
+
+	return 0;
+}
+
+/**
+ * ksz9477_sort_acl_entries - Sort the ACL entries for a given port.
+ * @dev: Pointer to the KSZ device.
+ * @port: Port number.
+ *
+ * This function sorts the Access Control List (ACL) entries for a specified
+ * port. Before sorting, a backup of the original entries is created. If the
+ * sorting process fails, the function will log error messages displaying both
+ * the original and attempted sorted entries, and then restore the original
+ * entries from the backup.
+ *
+ * Return: 0 if the sorting succeeds, otherwise a negative error code.
+ */
+int ksz9477_sort_acl_entries(struct ksz_device *dev, int port)
+{
+	struct ksz9477_acl_entry backup[KSZ9477_ACL_MAX_ENTRIES];
+	struct ksz9477_acl_priv *acl = dev->ports[port].acl_priv;
+	struct ksz9477_acl_entries *acles = &acl->acles;
+	int ret;
+
+	/* create a backup of the ACL entries, if something goes wrong
+	 * we can restore the ACL entries.
+	 */
+	memcpy(backup, acles->entries, sizeof(backup));
+
+	ret = ksz9477_sort_acl_entr_no_back(dev, port);
+	if (ret) {
+		dev_err(dev->dev, "ACL: failed to sort entries for port %d\n",
+			port);
+		dev_err(dev->dev, "ACL dump before sorting:\n");
+		ksz9477_dump_acl(dev, backup);
+		dev_err(dev->dev, "ACL dump after sorting:\n");
+		ksz9477_dump_acl(dev, acles->entries);
+		/* Restore the original entries */
+		memcpy(acles->entries, backup, sizeof(backup));
+	}
+
+	return ret;
+}
+
+/**
+ * ksz9477_acl_wait_ready - Waits for the ACL operation to complete on a given
+ *			    port.
+ * @dev: The ksz_device instance.
+ * @port: The port number to wait for.
+ *
+ * This function checks if the ACL write or read operation is completed by
+ * polling the specified register.
+ *
+ * Returns: 0 if the operation is successful, or a negative error code if an
+ * error occurs.
+ */
+static int ksz9477_acl_wait_ready(struct ksz_device *dev, int port)
+{
+	unsigned int wr_mask = KSZ9477_ACL_WRITE_DONE | KSZ9477_ACL_READ_DONE;
+	unsigned int val, reg;
+	int ret;
+
+	reg = dev->dev_ops->get_port_addr(port, KSZ9477_PORT_ACL_CTRL_0);
+
+	ret = regmap_read_poll_timeout(dev->regmap[0], reg, val,
+				       (val & wr_mask) == wr_mask, 1000, 10000);
+	if (ret)
+		dev_err(dev->dev, "Failed to read/write ACL table\n");
+
+	return ret;
+}
+
+/**
+ * ksz9477_acl_entry_write - Writes an ACL entry to a given port at the
+ *			     specified index.
+ * @dev: The ksz_device instance.
+ * @port: The port number to write the ACL entry to.
+ * @entry: A pointer to the ACL entry data.
+ * @idx: The index at which to write the ACL entry.
+ *
+ * This function writes the provided ACL entry to the specified port at the
+ * given index.
+ *
+ * Returns: 0 if the operation is successful, or a negative error code if an
+ * error occurs.
+ */
+static int ksz9477_acl_entry_write(struct ksz_device *dev, int port, u8 *entry,
+				   int idx)
+{
+	int ret, i;
+	u8 val;
+
+	for (i = 0; i < KSZ9477_ACL_ENTRY_SIZE; i++) {
+		ret = ksz_pwrite8(dev, port, KSZ9477_PORT_ACL_0 + i, entry[i]);
+		if (ret) {
+			dev_err(dev->dev, "Failed to write ACL entry %d\n", i);
+			return ret;
+		}
+	}
+
+	/* write everything down */
+	val = FIELD_PREP(KSZ9477_ACL_INDEX_M, idx) | KSZ9477_ACL_WRITE;
+	ret = ksz_pwrite8(dev, port, KSZ9477_PORT_ACL_CTRL_0, val);
+	if (ret)
+		return ret;
+
+	/* wait until everything is written  */
+	return ksz9477_acl_wait_ready(dev, port);
+}
+
+/**
+ * ksz9477_acl_port_enable - Enables ACL functionality on a given port.
+ * @dev: The ksz_device instance.
+ * @port: The port number on which to enable ACL functionality.
+ *
+ * This function enables ACL functionality on the specified port by configuring
+ * the appropriate control registers. It returns 0 if the operation is
+ * successful, or a negative error code if an error occurs.
+ *
+ * 0xn801 - KSZ9477S 5.2.8.2 Port Priority Control Register
+ *        Bit 7 - Highest Priority
+ *        Bit 6 - OR'ed Priority
+ *        Bit 4 - MAC Address Priority Classification
+ *        Bit 3 - VLAN Priority Classification
+ *        Bit 2 - 802.1p Priority Classification
+ *        Bit 1 - Diffserv Priority Classification
+ *        Bit 0 - ACL Priority Classification
+ *
+ * Current driver implementation sets 802.1p priority classification by default.
+ * In this function we add ACL priority classification with OR'ed priority.
+ * According to testing, priority set by ACL will supersede the 802.1p priority.
+ *
+ * 0xn803 - KSZ9477S 5.2.8.4 Port Authentication Control Register
+ *        Bit 2 - Access Control List (ACL) Enable
+ *        Bits 1:0 - Authentication Mode
+ *                00 = Reserved
+ *                01 = Block Mode. Authentication is enabled. When ACL is
+ *                     enabled, all traffic that misses the ACL rules is
+ *                     blocked; otherwise ACL actions apply.
+ *                10 = Pass Mode. Authentication is disabled. When ACL is
+ *                     enabled, all traffic that misses the ACL rules is
+ *                     forwarded; otherwise ACL actions apply.
+ *                11 = Trap Mode. Authentication is enabled. All traffic is
+ *                     forwarded to the host port. When ACL is enabled, all
+ *                     traffic that misses the ACL rules is blocked; otherwise
+ *                     ACL actions apply.
+ *
+ * We are using Pass Mode int this function.
+ *
+ * Returns: 0 if the operation is successful, or a negative error code if an
+ * error occurs.
+ */
+static int ksz9477_acl_port_enable(struct ksz_device *dev, int port)
+{
+	int ret;
+
+	ret = ksz_prmw8(dev, port, P_PRIO_CTRL, 0, PORT_ACL_PRIO_ENABLE |
+			PORT_OR_PRIO);
+	if (ret)
+		return ret;
+
+	return ksz_pwrite8(dev, port, REG_PORT_MRI_AUTHEN_CTRL,
+			   PORT_ACL_ENABLE |
+			   FIELD_PREP(PORT_AUTHEN_MODE, PORT_AUTHEN_PASS));
+}
+
+/**
+ * ksz9477_acl_port_disable - Disables ACL functionality on a given port.
+ * @dev: The ksz_device instance.
+ * @port: The port number on which to disable ACL functionality.
+ *
+ * This function disables ACL functionality on the specified port by writing a
+ * value of 0 to the REG_PORT_MRI_AUTHEN_CTRL control register and remove
+ * PORT_ACL_PRIO_ENABLE bit from P_PRIO_CTRL register.
+ *
+ * Returns: 0 if the operation is successful, or a negative error code if an
+ * error occurs.
+ */
+static int ksz9477_acl_port_disable(struct ksz_device *dev, int port)
+{
+	int ret;
+
+	ret = ksz_prmw8(dev, port, P_PRIO_CTRL, PORT_ACL_PRIO_ENABLE, 0);
+	if (ret)
+		return ret;
+
+	return ksz_pwrite8(dev, port, REG_PORT_MRI_AUTHEN_CTRL, 0);
+}
+
+/**
+ * ksz9477_acl_write_list - Write a list of ACL entries to a given port.
+ * @dev: The ksz_device instance.
+ * @port: The port number on which to write ACL entries.
+ *
+ * This function enables ACL functionality on the specified port, writes a list
+ * of ACL entries to the port, and disables ACL functionality if there are no
+ * entries.
+ *
+ * Returns: 0 if the operation is successful, or a negative error code if an
+ * error occurs.
+ */
+int ksz9477_acl_write_list(struct ksz_device *dev, int port)
+{
+	struct ksz9477_acl_priv *acl = dev->ports[port].acl_priv;
+	struct ksz9477_acl_entries *acles = &acl->acles;
+	int ret, i;
+
+	/* ACL should be enabled before writing entries */
+	ret = ksz9477_acl_port_enable(dev, port);
+	if (ret)
+		return ret;
+
+	/* write all entries */
+	for (i = 0; i < ARRAY_SIZE(acles->entries); i++) {
+		u8 *entry = acles->entries[i].entry;
+
+		/* Check if entry was removed and should be zeroed.
+		 * If last fields of the entry are not zero, it means
+		 * it is removed locally but currently not synced with the HW.
+		 * So, we will write it down to the HW to remove it.
+		 */
+		if (i >= acles->entries_count &&
+		    entry[KSZ9477_ACL_PORT_ACCESS_10] == 0 &&
+		    entry[KSZ9477_ACL_PORT_ACCESS_11] == 0)
+			continue;
+
+		ret = ksz9477_acl_entry_write(dev, port, entry, i);
+		if (ret)
+			return ret;
+
+		/* now removed entry is clean on HW side, so it can
+		 * in the cache too
+		 */
+		if (i >= acles->entries_count &&
+		    entry[KSZ9477_ACL_PORT_ACCESS_10] != 0 &&
+		    entry[KSZ9477_ACL_PORT_ACCESS_11] != 0) {
+			entry[KSZ9477_ACL_PORT_ACCESS_10] = 0;
+			entry[KSZ9477_ACL_PORT_ACCESS_11] = 0;
+		}
+	}
+
+	if (!acles->entries_count)
+		return ksz9477_acl_port_disable(dev, port);
+
+	return 0;
+}
+
+/**
+ * ksz9477_acl_remove_entries - Remove ACL entries with a given cookie from a
+ *                              specified ksz9477_acl_entries structure.
+ * @dev: The ksz_device instance.
+ * @port: The port number on which to remove ACL entries.
+ * @acles: The ksz9477_acl_entries instance.
+ * @cookie: The cookie value to match for entry removal.
+ *
+ * This function iterates through the entries array, removing any entries with
+ * a matching cookie value. The remaining entries are then shifted down to fill
+ * the gap.
+ */
+void ksz9477_acl_remove_entries(struct ksz_device *dev, int port,
+				struct ksz9477_acl_entries *acles,
+				unsigned long cookie)
+{
+	int entries_count = acles->entries_count;
+	int ret, i, src_count;
+	int src_idx = -1;
+
+	if (!entries_count)
+		return;
+
+	/* Search for the first position with the cookie */
+	for (i = 0; i < entries_count; i++) {
+		if (acles->entries[i].cookie == cookie) {
+			src_idx = i;
+			break;
+		}
+	}
+
+	/* No entries with the matching cookie found */
+	if (src_idx == -1)
+		return;
+
+	/* Get the size of the cookie entry. We may have complex entries. */
+	src_count = ksz9477_acl_get_cont_entr(dev, port, src_idx);
+	if (src_count <= 0)
+		return;
+
+	/* Move all entries down to overwrite removed entry with the cookie */
+	ret = ksz9477_move_entries_downwards(dev, acles, src_idx,
+					     src_count,
+					     entries_count - src_count);
+	if (ret) {
+		dev_err(dev->dev, "Failed to move ACL entries down\n");
+		return;
+	}
+
+	/* Overwrite new empty places at the end of the list with zeros to make
+	 * sure not unexpected things will happen or no unexplored quirks will
+	 * come out.
+	 */
+	for (i = entries_count - src_count; i < entries_count; i++) {
+		struct ksz9477_acl_entry *entry = &acles->entries[i];
+
+		memset(entry, 0, sizeof(*entry));
+
+		/* Set all access bits to be able to write zeroed entry to HW */
+		entry->entry[KSZ9477_ACL_PORT_ACCESS_10] = 0xff;
+		entry->entry[KSZ9477_ACL_PORT_ACCESS_11] = 0xff;
+	}
+
+	/* Adjust the total entries count */
+	acles->entries_count -= src_count;
+}
+
+/**
+ * ksz9477_port_acl_init - Initialize the ACL for a specified port on a ksz
+ *			   device.
+ * @dev: The ksz_device instance.
+ * @port: The port number to initialize the ACL for.
+ *
+ * This function allocates memory for an acl structure, associates it with the
+ * specified port, and initializes the ACL entries to a default state. The
+ * entries are then written using the ksz9477_acl_write_list function, ensuring
+ * the ACL has a predictable initial hardware state.
+ *
+ * Returns: 0 on success, or an error code on failure.
+ */
+int ksz9477_port_acl_init(struct ksz_device *dev, int port)
+{
+	struct ksz9477_acl_entries *acles;
+	struct ksz9477_acl_priv *acl;
+	int ret, i;
+
+	acl = kzalloc(sizeof(*acl), GFP_KERNEL);
+	if (!acl)
+		return -ENOMEM;
+
+	dev->ports[port].acl_priv = acl;
+
+	acles = &acl->acles;
+	/* write all entries */
+	for (i = 0; i < ARRAY_SIZE(acles->entries); i++) {
+		u8 *entry = acles->entries[i].entry;
+
+		/* Set all access bits to be able to write zeroed
+		 * entry
+		 */
+		entry[KSZ9477_ACL_PORT_ACCESS_10] = 0xff;
+		entry[KSZ9477_ACL_PORT_ACCESS_11] = 0xff;
+	}
+
+	ret = ksz9477_acl_write_list(dev, port);
+	if (ret)
+		goto free_acl;
+
+	return 0;
+
+free_acl:
+	kfree(dev->ports[port].acl_priv);
+	dev->ports[port].acl_priv = NULL;
+
+	return ret;
+}
+
+/**
+ * ksz9477_port_acl_free - Free the ACL resources for a specified port on a ksz
+ *			   device.
+ * @dev: The ksz_device instance.
+ * @port: The port number to initialize the ACL for.
+ *
+ * This disables the ACL for the specified port and frees the associated memory,
+ */
+void ksz9477_port_acl_free(struct ksz_device *dev, int port)
+{
+	if (!dev->ports[port].acl_priv)
+		return;
+
+	ksz9477_acl_port_disable(dev, port);
+
+	kfree(dev->ports[port].acl_priv);
+	dev->ports[port].acl_priv = NULL;
+}
+
+/**
+ * ksz9477_acl_set_reg - Set entry[16] and entry[17] depending on the updated
+ *			   entry[]
+ * @entry: An array containing the entries
+ * @reg: The register of the entry that needs to be updated
+ * @value: The value to be assigned to the updated entry
+ *
+ * This function updates the entry[] array based on the provided register and
+ * value. It also sets entry[0x10] and entry[0x11] according to the ACL byte
+ * enable rules.
+ *
+ * 0x10 - Byte Enable [15:8]
+ *
+ * Each bit enables accessing one of the ACL bytes when a read or write is
+ * initiated by writing to the Port ACL Byte Enable LSB Register.
+ * Bit 0 applies to the Port ACL Access 7 Register
+ * Bit 1 applies to the Port ACL Access 6 Register, etc.
+ * Bit 7 applies to the Port ACL Access 0 Register
+ * 1 = Byte is selected for read/write
+ * 0 = Byte is not selected
+ *
+ * 0x11 - Byte Enable [7:0]
+ *
+ * Each bit enables accessing one of the ACL bytes when a read or write is
+ * initiated by writing to the Port ACL Byte Enable LSB Register.
+ * Bit 0 applies to the Port ACL Access F Register
+ * Bit 1 applies to the Port ACL Access E Register, etc.
+ * Bit 7 applies to the Port ACL Access 8 Register
+ * 1 = Byte is selected for read/write
+ * 0 = Byte is not selected
+ */
+static void ksz9477_acl_set_reg(u8 *entry, enum ksz9477_acl_port_access reg,
+				u8 value)
+{
+	if (reg >= KSZ9477_ACL_PORT_ACCESS_0 &&
+	    reg <= KSZ9477_ACL_PORT_ACCESS_7) {
+		entry[KSZ9477_ACL_PORT_ACCESS_10] |=
+				BIT(KSZ9477_ACL_PORT_ACCESS_7 - reg);
+	} else if (reg >= KSZ9477_ACL_PORT_ACCESS_8 &&
+		   reg <= KSZ9477_ACL_PORT_ACCESS_F) {
+		entry[KSZ9477_ACL_PORT_ACCESS_11] |=
+			BIT(KSZ9477_ACL_PORT_ACCESS_F - reg);
+	} else {
+		WARN_ON(1);
+		return;
+	}
+
+	entry[reg] = value;
+}
+
+/**
+ * ksz9477_acl_matching_rule_cfg_l2 - Configure an ACL filtering entry to match
+ *				      L2 types of Ethernet frames
+ * @entry: Pointer to ACL entry buffer
+ * @ethertype: Ethertype value
+ * @eth_addr: Pointer to Ethernet address
+ * @is_src: If true, match the source MAC address; if false, match the
+ *	    destination MAC address
+ *
+ * This function configures an Access Control List (ACL) filtering
+ * entry to match Layer 2 types of Ethernet frames based on the provided
+ * ethertype and Ethernet address. Additionally, it can match either the source
+ * or destination MAC address depending on the value of the is_src parameter.
+ *
+ * Register Descriptions for MD = 01 and ENB != 00 (Layer 2 MAC header
+ * filtering)
+ *
+ * 0x01 - Mode and Enable
+ *        Bits 5:4 - MD (Mode)
+ *                01 = Layer 2 MAC header or counter filtering
+ *        Bits 3:2 - ENB (Enable)
+ *                01 = Comparison is performed only on the TYPE value
+ *                10 = Comparison is performed only on the MAC Address value
+ *                11 = Both the MAC Address and TYPE are tested
+ *        Bit  1   - S/D (Source / Destination)
+ *                0 = Destination address
+ *                1 = Source address
+ *        Bit  0   - EQ (Equal / Not Equal)
+ *                0 = Not Equal produces true result
+ *                1 = Equal produces true result
+ *
+ * 0x02-0x07 - MAC Address
+ *        0x02 - MAC Address [47:40]
+ *        0x03 - MAC Address [39:32]
+ *        0x04 - MAC Address [31:24]
+ *        0x05 - MAC Address [23:16]
+ *        0x06 - MAC Address [15:8]
+ *        0x07 - MAC Address [7:0]
+ *
+ * 0x08-0x09 - EtherType
+ *        0x08 - EtherType [15:8]
+ *        0x09 - EtherType [7:0]
+ */
+static void ksz9477_acl_matching_rule_cfg_l2(u8 *entry, u16 ethertype,
+					     u8 *eth_addr, bool is_src)
+{
+	u8 enb = 0;
+	u8 val;
+
+	if (ethertype)
+		enb |= KSZ9477_ACL_ENB_L2_TYPE;
+	if (eth_addr)
+		enb |= KSZ9477_ACL_ENB_L2_MAC;
+
+	val = FIELD_PREP(KSZ9477_ACL_MD_MASK, KSZ9477_ACL_MD_L2_MAC) |
+	      FIELD_PREP(KSZ9477_ACL_ENB_MASK, enb) |
+	      FIELD_PREP(KSZ9477_ACL_SD_SRC, is_src) | KSZ9477_ACL_EQ_EQUAL;
+	ksz9477_acl_set_reg(entry, KSZ9477_ACL_PORT_ACCESS_1, val);
+
+	if (eth_addr) {
+		int i;
+
+		for (i = 0; i < ETH_ALEN; i++) {
+			ksz9477_acl_set_reg(entry,
+					    KSZ9477_ACL_PORT_ACCESS_2 + i,
+					    eth_addr[i]);
+		}
+	}
+
+	ksz9477_acl_set_reg(entry, KSZ9477_ACL_PORT_ACCESS_8, ethertype >> 8);
+	ksz9477_acl_set_reg(entry, KSZ9477_ACL_PORT_ACCESS_9, ethertype & 0xff);
+}
+
+/**
+ * ksz9477_acl_action_rule_cfg - Set action for an ACL entry
+ * @entry: Pointer to the ACL entry
+ * @force_prio: If true, force the priority value
+ * @prio_val: Priority value
+ *
+ * This function sets the action for the specified ACL entry. It prepares
+ * the priority mode and traffic class values and updates the entry's
+ * action registers accordingly. Currently, there is no port or VLAN PCP
+ * remapping.
+ *
+ * ACL Action Rule Parameters for Non-Count Modes (MD ≠ 01 or ENB ≠ 00)
+ *
+ * 0x0A - PM, P, RPE, RP[2:1]
+ *        Bits 7:6 - PM[1:0] - Priority Mode
+ *		00 = ACL does not specify the packet priority. Priority is
+ *		     determined by standard QoS functions.
+ *		01 = Change packet priority to P[2:0] if it is greater than QoS
+ *		     result.
+ *		10 = Change packet priority to P[2:0] if it is smaller than the
+ *		     QoS result.
+ *		11 = Always change packet priority to P[2:0].
+ *        Bits 5:3 - P[2:0] - Priority value
+ *        Bit  2   - RPE - Remark Priority Enable
+ *        Bits 1:0 - RP[2:1] - Remarked Priority value (bits 2:1)
+ *		0 = Disable priority remarking
+ *		1 = Enable priority remarking. VLAN tag priority (PCP) bits are
+ *		    replaced by RP[2:0].
+ *
+ * 0x0B - RP[0], MM
+ *        Bit  7   - RP[0] - Remarked Priority value (bit 0)
+ *        Bits 6:5 - MM[1:0] - Map Mode
+ *		00 = No forwarding remapping
+ *		01 = The forwarding map in FORWARD is OR'ed with the forwarding
+ *		     map from the Address Lookup Table.
+ *		10 = The forwarding map in FORWARD is AND'ed with the forwarding
+ *		     map from the Address Lookup Table.
+ *		11 = The forwarding map in FORWARD replaces the forwarding map
+ *		     from the Address Lookup Table.
+ * 0x0D - FORWARD[n:0]
+ *       Bits 7:0 - FORWARD[n:0] - Forwarding map. Bit 0 = port 1,
+ *		    bit 1 = port 2, etc.
+ *		1 = enable forwarding to this port
+ *		0 = do not forward to this port
+ */
+void ksz9477_acl_action_rule_cfg(u8 *entry, bool force_prio, u8 prio_val)
+{
+	u8 prio_mode, val;
+
+	if (force_prio)
+		prio_mode = KSZ9477_ACL_PM_REPLACE;
+	else
+		prio_mode = KSZ9477_ACL_PM_DISABLE;
+
+	val = FIELD_PREP(KSZ9477_ACL_PM_M, prio_mode) |
+	      FIELD_PREP(KSZ9477_ACL_P_M, prio_val);
+	ksz9477_acl_set_reg(entry, KSZ9477_ACL_PORT_ACCESS_A, val);
+
+	/* no port or VLAN PCP remapping for now */
+	ksz9477_acl_set_reg(entry, KSZ9477_ACL_PORT_ACCESS_B, 0);
+	ksz9477_acl_set_reg(entry, KSZ9477_ACL_PORT_ACCESS_D, 0);
+}
+
+/**
+ * ksz9477_acl_processing_rule_set_action - Set the action for the processing
+ *					    rule set.
+ * @entry: Pointer to the ACL entry
+ * @action_idx: Index of the action to be applied
+ *
+ * This function sets the action for the processing rule set by updating the
+ * appropriate register in the entry. There can be only one action per
+ * processing rule.
+ *
+ * Access Control List (ACL) Processing Rule Registers:
+ *
+ * 0x00 - First Rule Number (FRN)
+ *        Bits 3:0 - First Rule Number. Pointer to an Action rule entry.
+ */
+void ksz9477_acl_processing_rule_set_action(u8 *entry, u8 action_idx)
+{
+	ksz9477_acl_set_reg(entry, KSZ9477_ACL_PORT_ACCESS_0, action_idx);
+}
+
+/**
+ * ksz9477_acl_processing_rule_add_match - Add a matching rule to the rule set
+ * @entry: Pointer to the ACL entry
+ * @match_idx: Index of the matching rule to be added
+ *
+ * This function adds a matching rule to the rule set by updating the
+ * appropriate bits in the entry's rule set registers.
+ *
+ * Access Control List (ACL) Processing Rule Registers:
+ *
+ * 0x0E - RuleSet [15:8]
+ *        Bits 7:0 - RuleSet [15:8] Specifies a set of one or more Matching rule
+ *        entries. RuleSet has one bit for each of the 16 Matching rule entries.
+ *        If multiple Matching rules are selected, then all conditions will be
+ *	  AND'ed to produce a final match result.
+ *		0 = Matching rule not selected
+ *		1 = Matching rule selected
+ *
+ * 0x0F - RuleSet [7:0]
+ *        Bits 7:0 - RuleSet [7:0]
+ */
+static void ksz9477_acl_processing_rule_add_match(u8 *entry, u8 match_idx)
+{
+	u8 vale = entry[KSZ9477_ACL_PORT_ACCESS_E];
+	u8 valf = entry[KSZ9477_ACL_PORT_ACCESS_F];
+
+	if (match_idx < 8)
+		valf |= BIT(match_idx);
+	else
+		vale |= BIT(match_idx - 8);
+
+	ksz9477_acl_set_reg(entry, KSZ9477_ACL_PORT_ACCESS_E, vale);
+	ksz9477_acl_set_reg(entry, KSZ9477_ACL_PORT_ACCESS_F, valf);
+}
+
+/**
+ * ksz9477_acl_get_init_entry - Get a new uninitialized entry for a specified
+ *				port on a ksz_device.
+ * @dev: The ksz_device instance.
+ * @port: The port number to get the uninitialized entry for.
+ * @cookie: The cookie to associate with the entry.
+ * @prio: The priority to associate with the entry.
+ *
+ * This function retrieves the next available ACL entry for the specified port,
+ * clears all access flags, and associates it with the current cookie.
+ *
+ * Returns: A pointer to the new uninitialized ACL entry.
+ */
+static struct ksz9477_acl_entry *
+ksz9477_acl_get_init_entry(struct ksz_device *dev, int port,
+			   unsigned long cookie, u32 prio)
+{
+	struct ksz9477_acl_priv *acl = dev->ports[port].acl_priv;
+	struct ksz9477_acl_entries *acles = &acl->acles;
+	struct ksz9477_acl_entry *entry;
+
+	entry = &acles->entries[acles->entries_count];
+	entry->cookie = cookie;
+	entry->prio = prio;
+
+	/* clear all access flags */
+	entry->entry[KSZ9477_ACL_PORT_ACCESS_10] = 0;
+	entry->entry[KSZ9477_ACL_PORT_ACCESS_11] = 0;
+
+	return entry;
+}
+
+/**
+ * ksz9477_acl_match_process_l2 - Configure Layer 2 ACL matching rules and
+ *                                processing rules.
+ * @dev: Pointer to the ksz_device.
+ * @port: Port number.
+ * @ethtype: Ethernet type.
+ * @src_mac: Source MAC address.
+ * @dst_mac: Destination MAC address.
+ * @cookie: The cookie to associate with the entry.
+ * @prio: The priority of the entry.
+ *
+ * This function sets up matching and processing rules for Layer 2 ACLs.
+ * It takes into account that only one MAC per entry is supported.
+ */
+void ksz9477_acl_match_process_l2(struct ksz_device *dev, int port,
+				  u16 ethtype, u8 *src_mac, u8 *dst_mac,
+				  unsigned long cookie, u32 prio)
+{
+	struct ksz9477_acl_priv *acl = dev->ports[port].acl_priv;
+	struct ksz9477_acl_entries *acles = &acl->acles;
+	struct ksz9477_acl_entry *entry;
+
+	entry = ksz9477_acl_get_init_entry(dev, port, cookie, prio);
+
+	/* ACL supports only one MAC per entry */
+	if (src_mac && dst_mac) {
+		ksz9477_acl_matching_rule_cfg_l2(entry->entry, ethtype, src_mac,
+						 true);
+
+		/* Add both match entries to first processing rule */
+		ksz9477_acl_processing_rule_add_match(entry->entry,
+						      acles->entries_count);
+		acles->entries_count++;
+		ksz9477_acl_processing_rule_add_match(entry->entry,
+						      acles->entries_count);
+
+		entry = ksz9477_acl_get_init_entry(dev, port, cookie, prio);
+		ksz9477_acl_matching_rule_cfg_l2(entry->entry, 0, dst_mac,
+						 false);
+		acles->entries_count++;
+	} else {
+		u8 *mac = src_mac ? src_mac : dst_mac;
+		bool is_src = src_mac ? true : false;
+
+		ksz9477_acl_matching_rule_cfg_l2(entry->entry, ethtype, mac,
+						 is_src);
+		ksz9477_acl_processing_rule_add_match(entry->entry,
+						      acles->entries_count);
+		acles->entries_count++;
+	}
+}
diff --git a/drivers/net/dsa/microchip/ksz9477_reg.h b/drivers/net/dsa/microchip/ksz9477_reg.h
index cba3dba58bc3..504e085aab52 100644
--- a/drivers/net/dsa/microchip/ksz9477_reg.h
+++ b/drivers/net/dsa/microchip/ksz9477_reg.h
@@ -112,19 +112,6 @@
 
 #define REG_SW_IBA_SYNC__1		0x010C
 
-#define REG_SW_IO_STRENGTH__1		0x010D
-#define SW_DRIVE_STRENGTH_M		0x7
-#define SW_DRIVE_STRENGTH_2MA		0
-#define SW_DRIVE_STRENGTH_4MA		1
-#define SW_DRIVE_STRENGTH_8MA		2
-#define SW_DRIVE_STRENGTH_12MA		3
-#define SW_DRIVE_STRENGTH_16MA		4
-#define SW_DRIVE_STRENGTH_20MA		5
-#define SW_DRIVE_STRENGTH_24MA		6
-#define SW_DRIVE_STRENGTH_28MA		7
-#define SW_HI_SPEED_DRIVE_STRENGTH_S	4
-#define SW_LO_SPEED_DRIVE_STRENGTH_S	0
-
 #define REG_SW_IBA_STATUS__4		0x0110
 
 #define SW_IBA_REQ			BIT(31)
diff --git a/drivers/net/dsa/microchip/ksz9477_tc_flower.c b/drivers/net/dsa/microchip/ksz9477_tc_flower.c
new file mode 100644
index 000000000000..8b2f5be667e0
--- /dev/null
+++ b/drivers/net/dsa/microchip/ksz9477_tc_flower.c
@@ -0,0 +1,281 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2023 Pengutronix, Oleksij Rempel <kernel@pengutronix.de>
+
+#include "ksz9477.h"
+#include "ksz9477_reg.h"
+#include "ksz_common.h"
+
+#define ETHER_TYPE_FULL_MASK		cpu_to_be16(~0)
+#define KSZ9477_MAX_TC			7
+
+/**
+ * ksz9477_flower_parse_key_l2 - Parse Layer 2 key from flow rule and configure
+ *                               ACL entries accordingly.
+ * @dev: Pointer to the ksz_device.
+ * @port: Port number.
+ * @extack: Pointer to the netlink_ext_ack.
+ * @rule: Pointer to the flow_rule.
+ * @cookie: The cookie to associate with the entry.
+ * @prio: The priority of the entry.
+ *
+ * This function parses the Layer 2 key from the flow rule and configures
+ * the corresponding ACL entries. It checks for unsupported offloads and
+ * available entries before proceeding with the configuration.
+ *
+ * Returns: 0 on success or a negative error code on failure.
+ */
+static int ksz9477_flower_parse_key_l2(struct ksz_device *dev, int port,
+				       struct netlink_ext_ack *extack,
+				       struct flow_rule *rule,
+				       unsigned long cookie, u32 prio)
+{
+	struct ksz9477_acl_priv *acl = dev->ports[port].acl_priv;
+	struct flow_match_eth_addrs ematch;
+	struct ksz9477_acl_entries *acles;
+	int required_entries;
+	u8 *src_mac = NULL;
+	u8 *dst_mac = NULL;
+	u16 ethtype = 0;
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_match_basic match;
+
+		flow_rule_match_basic(rule, &match);
+
+		if (match.key->n_proto) {
+			if (match.mask->n_proto != ETHER_TYPE_FULL_MASK) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "ethernet type mask must be a full mask");
+				return -EINVAL;
+			}
+
+			ethtype = be16_to_cpu(match.key->n_proto);
+		}
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+		flow_rule_match_eth_addrs(rule, &ematch);
+
+		if (!is_zero_ether_addr(ematch.key->src)) {
+			if (!is_broadcast_ether_addr(ematch.mask->src))
+				goto not_full_mask_err;
+
+			src_mac = ematch.key->src;
+		}
+
+		if (!is_zero_ether_addr(ematch.key->dst)) {
+			if (!is_broadcast_ether_addr(ematch.mask->dst))
+				goto not_full_mask_err;
+
+			dst_mac = ematch.key->dst;
+		}
+	}
+
+	acles = &acl->acles;
+	/* ACL supports only one MAC per entry */
+	required_entries = src_mac && dst_mac ? 2 : 1;
+
+	/* Check if there are enough available entries */
+	if (acles->entries_count + required_entries > KSZ9477_ACL_MAX_ENTRIES) {
+		NL_SET_ERR_MSG_MOD(extack, "ACL entry limit reached");
+		return -EOPNOTSUPP;
+	}
+
+	ksz9477_acl_match_process_l2(dev, port, ethtype, src_mac, dst_mac,
+				     cookie, prio);
+
+	return 0;
+
+not_full_mask_err:
+	NL_SET_ERR_MSG_MOD(extack, "MAC address mask must be a full mask");
+	return -EOPNOTSUPP;
+}
+
+/**
+ * ksz9477_flower_parse_key - Parse flow rule keys for a specified port on a
+ *			      ksz_device.
+ * @dev: The ksz_device instance.
+ * @port: The port number to parse the flow rule keys for.
+ * @extack: The netlink extended ACK for reporting errors.
+ * @rule: The flow_rule to parse.
+ * @cookie: The cookie to associate with the entry.
+ * @prio: The priority of the entry.
+ *
+ * This function checks if the used keys in the flow rule are supported by
+ * the device and parses the L2 keys if they match. If unsupported keys are
+ * used, an error message is set in the extended ACK.
+ *
+ * Returns: 0 on success or a negative error code on failure.
+ */
+static int ksz9477_flower_parse_key(struct ksz_device *dev, int port,
+				    struct netlink_ext_ack *extack,
+				    struct flow_rule *rule,
+				    unsigned long cookie, u32 prio)
+{
+	struct flow_dissector *dissector = rule->match.dissector;
+	int ret;
+
+	if (dissector->used_keys &
+	    ~(BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) |
+	      BIT_ULL(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+	      BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL))) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Unsupported keys used");
+		return -EOPNOTSUPP;
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC) ||
+	    flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+		ret = ksz9477_flower_parse_key_l2(dev, port, extack, rule,
+						  cookie, prio);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * ksz9477_flower_parse_action - Parse flow rule actions for a specified port
+ *				 on a ksz_device.
+ * @dev: The ksz_device instance.
+ * @port: The port number to parse the flow rule actions for.
+ * @extack: The netlink extended ACK for reporting errors.
+ * @cls: The flow_cls_offload instance containing the flow rule.
+ * @entry_idx: The index of the ACL entry to store the action.
+ *
+ * This function checks if the actions in the flow rule are supported by
+ * the device. Currently, only actions that change priorities are supported.
+ * If unsupported actions are encountered, an error message is set in the
+ * extended ACK.
+ *
+ * Returns: 0 on success or a negative error code on failure.
+ */
+static int ksz9477_flower_parse_action(struct ksz_device *dev, int port,
+				       struct netlink_ext_ack *extack,
+				       struct flow_cls_offload *cls,
+				       int entry_idx)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
+	struct ksz9477_acl_priv *acl = dev->ports[port].acl_priv;
+	const struct flow_action_entry *act;
+	struct ksz9477_acl_entry *entry;
+	bool prio_force = false;
+	u8 prio_val = 0;
+	int i;
+
+	if (TC_H_MIN(cls->classid)) {
+		NL_SET_ERR_MSG_MOD(extack, "hw_tc is not supported. Use: action skbedit prio");
+		return -EOPNOTSUPP;
+	}
+
+	flow_action_for_each(i, act, &rule->action) {
+		switch (act->id) {
+		case FLOW_ACTION_PRIORITY:
+			if (act->priority > KSZ9477_MAX_TC) {
+				NL_SET_ERR_MSG_MOD(extack, "Priority value is too high");
+				return -EOPNOTSUPP;
+			}
+			prio_force = true;
+			prio_val = act->priority;
+			break;
+		default:
+			NL_SET_ERR_MSG_MOD(extack, "action not supported");
+			return -EOPNOTSUPP;
+		}
+	}
+
+	/* pick entry to store action */
+	entry = &acl->acles.entries[entry_idx];
+
+	ksz9477_acl_action_rule_cfg(entry->entry, prio_force, prio_val);
+	ksz9477_acl_processing_rule_set_action(entry->entry, entry_idx);
+
+	return 0;
+}
+
+/**
+ * ksz9477_cls_flower_add - Add a flow classification rule for a specified port
+ *			    on a ksz_device.
+ * @ds: The DSA switch instance.
+ * @port: The port number to add the flow classification rule to.
+ * @cls: The flow_cls_offload instance containing the flow rule.
+ * @ingress: A flag indicating if the rule is applied on the ingress path.
+ *
+ * This function adds a flow classification rule for a specified port on a
+ * ksz_device. It checks if the ACL offloading is supported and parses the flow
+ * keys and actions. If the ACL is not supported, it returns an error. If there
+ * are unprocessed entries, it parses the action for the rule.
+ *
+ * Returns: 0 on success or a negative error code on failure.
+ */
+int ksz9477_cls_flower_add(struct dsa_switch *ds, int port,
+			   struct flow_cls_offload *cls, bool ingress)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
+	struct netlink_ext_ack *extack = cls->common.extack;
+	struct ksz_device *dev = ds->priv;
+	struct ksz9477_acl_priv *acl;
+	int action_entry_idx;
+	int ret;
+
+	acl = dev->ports[port].acl_priv;
+
+	if (!acl) {
+		NL_SET_ERR_MSG_MOD(extack, "ACL offloading is not supported");
+		return -EOPNOTSUPP;
+	}
+
+	/* A complex rule set can take multiple entries. Use first entry
+	 * to store the action.
+	 */
+	action_entry_idx = acl->acles.entries_count;
+
+	ret = ksz9477_flower_parse_key(dev, port, extack, rule, cls->cookie,
+				       cls->common.prio);
+	if (ret)
+		return ret;
+
+	ret = ksz9477_flower_parse_action(dev, port, extack, cls,
+					  action_entry_idx);
+	if (ret)
+		return ret;
+
+	ret = ksz9477_sort_acl_entries(dev, port);
+	if (ret)
+		return ret;
+
+	return ksz9477_acl_write_list(dev, port);
+}
+
+/**
+ * ksz9477_cls_flower_del - Remove a flow classification rule for a specified
+ *			    port on a ksz_device.
+ * @ds: The DSA switch instance.
+ * @port: The port number to remove the flow classification rule from.
+ * @cls: The flow_cls_offload instance containing the flow rule.
+ * @ingress: A flag indicating if the rule is applied on the ingress path.
+ *
+ * This function removes a flow classification rule for a specified port on a
+ * ksz_device. It checks if the ACL is initialized, and if not, returns an
+ * error. If the ACL is initialized, it removes entries with the specified
+ * cookie and rewrites the ACL list.
+ *
+ * Returns: 0 on success or a negative error code on failure.
+ */
+int ksz9477_cls_flower_del(struct dsa_switch *ds, int port,
+			   struct flow_cls_offload *cls, bool ingress)
+{
+	unsigned long cookie = cls->cookie;
+	struct ksz_device *dev = ds->priv;
+	struct ksz9477_acl_priv *acl;
+
+	acl = dev->ports[port].acl_priv;
+
+	if (!acl)
+		return -EOPNOTSUPP;
+
+	ksz9477_acl_remove_entries(dev, port, &acl->acles, cookie);
+
+	return ksz9477_acl_write_list(dev, port);
+}
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 42db7679c360..173ad8f04671 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -186,6 +186,72 @@ static const struct ksz_mib_names ksz9477_mib_names[] = {
 	{ 0x83, "tx_discards" },
 };
 
+struct ksz_driver_strength_prop {
+	const char *name;
+	int offset;
+	int value;
+};
+
+enum ksz_driver_strength_type {
+	KSZ_DRIVER_STRENGTH_HI,
+	KSZ_DRIVER_STRENGTH_LO,
+	KSZ_DRIVER_STRENGTH_IO,
+};
+
+/**
+ * struct ksz_drive_strength - drive strength mapping
+ * @reg_val:	register value
+ * @microamp:	microamp value
+ */
+struct ksz_drive_strength {
+	u32 reg_val;
+	u32 microamp;
+};
+
+/* ksz9477_drive_strengths - Drive strength mapping for KSZ9477 variants
+ *
+ * This values are not documented in KSZ9477 variants but confirmed by
+ * Microchip that KSZ9477, KSZ9567, KSZ8567, KSZ9897, KSZ9896, KSZ9563, KSZ9893
+ * and KSZ8563 are using same register (drive strength) settings like KSZ8795.
+ *
+ * Documentation in KSZ8795CLX provides more information with some
+ * recommendations:
+ * - for high speed signals
+ *   1. 4 mA or 8 mA is often used for MII, RMII, and SPI interface with using
+ *      2.5V or 3.3V VDDIO.
+ *   2. 12 mA or 16 mA is often used for MII, RMII, and SPI interface with
+ *      using 1.8V VDDIO.
+ *   3. 20 mA or 24 mA is often used for GMII/RGMII interface with using 2.5V
+ *      or 3.3V VDDIO.
+ *   4. 28 mA is often used for GMII/RGMII interface with using 1.8V VDDIO.
+ *   5. In same interface, the heavy loading should use higher one of the
+ *      drive current strength.
+ * - for low speed signals
+ *   1. 3.3V VDDIO, use either 4 mA or 8 mA.
+ *   2. 2.5V VDDIO, use either 8 mA or 12 mA.
+ *   3. 1.8V VDDIO, use either 12 mA or 16 mA.
+ *   4. If it is heavy loading, can use higher drive current strength.
+ */
+static const struct ksz_drive_strength ksz9477_drive_strengths[] = {
+	{ SW_DRIVE_STRENGTH_2MA,  2000 },
+	{ SW_DRIVE_STRENGTH_4MA,  4000 },
+	{ SW_DRIVE_STRENGTH_8MA,  8000 },
+	{ SW_DRIVE_STRENGTH_12MA, 12000 },
+	{ SW_DRIVE_STRENGTH_16MA, 16000 },
+	{ SW_DRIVE_STRENGTH_20MA, 20000 },
+	{ SW_DRIVE_STRENGTH_24MA, 24000 },
+	{ SW_DRIVE_STRENGTH_28MA, 28000 },
+};
+
+/* ksz8830_drive_strengths - Drive strength mapping for KSZ8830, KSZ8873, ..
+ *			     variants.
+ * This values are documented in KSZ8873 and KSZ8863 datasheets.
+ */
+static const struct ksz_drive_strength ksz8830_drive_strengths[] = {
+	{ 0,  8000 },
+	{ KSZ8873_DRIVE_STRENGTH_16MA, 16000 },
+};
+
 static const struct ksz_dev_ops ksz8_dev_ops = {
 	.setup = ksz8_setup,
 	.get_port_addr = ksz8_get_port_addr,
@@ -2498,8 +2564,7 @@ static int ksz_port_mdb_del(struct dsa_switch *ds, int port,
 	return dev->dev_ops->mdb_del(dev, port, mdb, db);
 }
 
-static int ksz_enable_port(struct dsa_switch *ds, int port,
-			   struct phy_device *phy)
+static int ksz_port_setup(struct dsa_switch *ds, int port)
 {
 	struct ksz_device *dev = ds->priv;
 
@@ -2562,6 +2627,23 @@ void ksz_port_stp_state_set(struct dsa_switch *ds, int port, u8 state)
 	ksz_update_port_member(dev, port);
 }
 
+static void ksz_port_teardown(struct dsa_switch *ds, int port)
+{
+	struct ksz_device *dev = ds->priv;
+
+	switch (dev->chip_id) {
+	case KSZ8563_CHIP_ID:
+	case KSZ9477_CHIP_ID:
+	case KSZ9563_CHIP_ID:
+	case KSZ9567_CHIP_ID:
+	case KSZ9893_CHIP_ID:
+	case KSZ9896_CHIP_ID:
+	case KSZ9897_CHIP_ID:
+		if (dsa_is_user_port(ds, port))
+			ksz9477_port_acl_free(dev, port);
+	}
+}
+
 static int ksz_port_pre_bridge_flags(struct dsa_switch *ds, int port,
 				     struct switchdev_brport_flags flags,
 				     struct netlink_ext_ack *extack)
@@ -3107,6 +3189,44 @@ static int ksz_switch_detect(struct ksz_device *dev)
 	return 0;
 }
 
+static int ksz_cls_flower_add(struct dsa_switch *ds, int port,
+			      struct flow_cls_offload *cls, bool ingress)
+{
+	struct ksz_device *dev = ds->priv;
+
+	switch (dev->chip_id) {
+	case KSZ8563_CHIP_ID:
+	case KSZ9477_CHIP_ID:
+	case KSZ9563_CHIP_ID:
+	case KSZ9567_CHIP_ID:
+	case KSZ9893_CHIP_ID:
+	case KSZ9896_CHIP_ID:
+	case KSZ9897_CHIP_ID:
+		return ksz9477_cls_flower_add(ds, port, cls, ingress);
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static int ksz_cls_flower_del(struct dsa_switch *ds, int port,
+			      struct flow_cls_offload *cls, bool ingress)
+{
+	struct ksz_device *dev = ds->priv;
+
+	switch (dev->chip_id) {
+	case KSZ8563_CHIP_ID:
+	case KSZ9477_CHIP_ID:
+	case KSZ9563_CHIP_ID:
+	case KSZ9567_CHIP_ID:
+	case KSZ9893_CHIP_ID:
+	case KSZ9896_CHIP_ID:
+	case KSZ9897_CHIP_ID:
+		return ksz9477_cls_flower_del(ds, port, cls, ingress);
+	}
+
+	return -EOPNOTSUPP;
+}
+
 /* Bandwidth is calculated by idle slope/transmission speed. Then the Bandwidth
  * is converted to Hex-decimal using the successive multiplication method. On
  * every step, integer part is taken and decimal part is carry forwarded.
@@ -3431,7 +3551,7 @@ static const struct dsa_switch_ops ksz_switch_ops = {
 	.phylink_mac_config	= ksz_phylink_mac_config,
 	.phylink_mac_link_up	= ksz_phylink_mac_link_up,
 	.phylink_mac_link_down	= ksz_mac_link_down,
-	.port_enable		= ksz_enable_port,
+	.port_setup		= ksz_port_setup,
 	.set_ageing_time	= ksz_set_ageing_time,
 	.get_strings		= ksz_get_strings,
 	.get_ethtool_stats	= ksz_get_ethtool_stats,
@@ -3439,6 +3559,7 @@ static const struct dsa_switch_ops ksz_switch_ops = {
 	.port_bridge_join	= ksz_port_bridge_join,
 	.port_bridge_leave	= ksz_port_bridge_leave,
 	.port_stp_state_set	= ksz_port_stp_state_set,
+	.port_teardown		= ksz_port_teardown,
 	.port_pre_bridge_flags	= ksz_port_pre_bridge_flags,
 	.port_bridge_flags	= ksz_port_bridge_flags,
 	.port_fast_age		= ksz_port_fast_age,
@@ -3461,6 +3582,8 @@ static const struct dsa_switch_ops ksz_switch_ops = {
 	.port_hwtstamp_set	= ksz_hwtstamp_set,
 	.port_txtstamp		= ksz_port_txtstamp,
 	.port_rxtstamp		= ksz_port_rxtstamp,
+	.cls_flower_add		= ksz_cls_flower_add,
+	.cls_flower_del		= ksz_cls_flower_del,
 	.port_setup_tc		= ksz_setup_tc,
 	.get_mac_eee		= ksz_get_mac_eee,
 	.set_mac_eee		= ksz_set_mac_eee,
@@ -3530,6 +3653,245 @@ static void ksz_parse_rgmii_delay(struct ksz_device *dev, int port_num,
 	dev->ports[port_num].rgmii_tx_val = tx_delay;
 }
 
+/**
+ * ksz_drive_strength_to_reg() - Convert drive strength value to corresponding
+ *				 register value.
+ * @array:	The array of drive strength values to search.
+ * @array_size:	The size of the array.
+ * @microamp:	The drive strength value in microamp to be converted.
+ *
+ * This function searches the array of drive strength values for the given
+ * microamp value and returns the corresponding register value for that drive.
+ *
+ * Returns: If found, the corresponding register value for that drive strength
+ * is returned. Otherwise, -EINVAL is returned indicating an invalid value.
+ */
+static int ksz_drive_strength_to_reg(const struct ksz_drive_strength *array,
+				     size_t array_size, int microamp)
+{
+	int i;
+
+	for (i = 0; i < array_size; i++) {
+		if (array[i].microamp == microamp)
+			return array[i].reg_val;
+	}
+
+	return -EINVAL;
+}
+
+/**
+ * ksz_drive_strength_error() - Report invalid drive strength value
+ * @dev:	ksz device
+ * @array:	The array of drive strength values to search.
+ * @array_size:	The size of the array.
+ * @microamp:	Invalid drive strength value in microamp
+ *
+ * This function logs an error message when an unsupported drive strength value
+ * is detected. It lists out all the supported drive strength values for
+ * reference in the error message.
+ */
+static void ksz_drive_strength_error(struct ksz_device *dev,
+				     const struct ksz_drive_strength *array,
+				     size_t array_size, int microamp)
+{
+	char supported_values[100];
+	size_t remaining_size;
+	int added_len;
+	char *ptr;
+	int i;
+
+	remaining_size = sizeof(supported_values);
+	ptr = supported_values;
+
+	for (i = 0; i < array_size; i++) {
+		added_len = snprintf(ptr, remaining_size,
+				     i == 0 ? "%d" : ", %d", array[i].microamp);
+
+		if (added_len >= remaining_size)
+			break;
+
+		ptr += added_len;
+		remaining_size -= added_len;
+	}
+
+	dev_err(dev->dev, "Invalid drive strength %d, supported values are %s\n",
+		microamp, supported_values);
+}
+
+/**
+ * ksz9477_drive_strength_write() - Set the drive strength for specific KSZ9477
+ *				    chip variants.
+ * @dev:       ksz device
+ * @props:     Array of drive strength properties to be applied
+ * @num_props: Number of properties in the array
+ *
+ * This function configures the drive strength for various KSZ9477 chip variants
+ * based on the provided properties. It handles chip-specific nuances and
+ * ensures only valid drive strengths are written to the respective chip.
+ *
+ * Return: 0 on successful configuration, a negative error code on failure.
+ */
+static int ksz9477_drive_strength_write(struct ksz_device *dev,
+					struct ksz_driver_strength_prop *props,
+					int num_props)
+{
+	size_t array_size = ARRAY_SIZE(ksz9477_drive_strengths);
+	int i, ret, reg;
+	u8 mask = 0;
+	u8 val = 0;
+
+	if (props[KSZ_DRIVER_STRENGTH_IO].value != -1)
+		dev_warn(dev->dev, "%s is not supported by this chip variant\n",
+			 props[KSZ_DRIVER_STRENGTH_IO].name);
+
+	if (dev->chip_id == KSZ8795_CHIP_ID ||
+	    dev->chip_id == KSZ8794_CHIP_ID ||
+	    dev->chip_id == KSZ8765_CHIP_ID)
+		reg = KSZ8795_REG_SW_CTRL_20;
+	else
+		reg = KSZ9477_REG_SW_IO_STRENGTH;
+
+	for (i = 0; i < num_props; i++) {
+		if (props[i].value == -1)
+			continue;
+
+		ret = ksz_drive_strength_to_reg(ksz9477_drive_strengths,
+						array_size, props[i].value);
+		if (ret < 0) {
+			ksz_drive_strength_error(dev, ksz9477_drive_strengths,
+						 array_size, props[i].value);
+			return ret;
+		}
+
+		mask |= SW_DRIVE_STRENGTH_M << props[i].offset;
+		val |= ret << props[i].offset;
+	}
+
+	return ksz_rmw8(dev, reg, mask, val);
+}
+
+/**
+ * ksz8830_drive_strength_write() - Set the drive strength configuration for
+ *				    KSZ8830 compatible chip variants.
+ * @dev:       ksz device
+ * @props:     Array of drive strength properties to be set
+ * @num_props: Number of properties in the array
+ *
+ * This function applies the specified drive strength settings to KSZ8830 chip
+ * variants (KSZ8873, KSZ8863).
+ * It ensures the configurations align with what the chip variant supports and
+ * warns or errors out on unsupported settings.
+ *
+ * Return: 0 on success, error code otherwise
+ */
+static int ksz8830_drive_strength_write(struct ksz_device *dev,
+					struct ksz_driver_strength_prop *props,
+					int num_props)
+{
+	size_t array_size = ARRAY_SIZE(ksz8830_drive_strengths);
+	int microamp;
+	int i, ret;
+
+	for (i = 0; i < num_props; i++) {
+		if (props[i].value == -1 || i == KSZ_DRIVER_STRENGTH_IO)
+			continue;
+
+		dev_warn(dev->dev, "%s is not supported by this chip variant\n",
+			 props[i].name);
+	}
+
+	microamp = props[KSZ_DRIVER_STRENGTH_IO].value;
+	ret = ksz_drive_strength_to_reg(ksz8830_drive_strengths, array_size,
+					microamp);
+	if (ret < 0) {
+		ksz_drive_strength_error(dev, ksz8830_drive_strengths,
+					 array_size, microamp);
+		return ret;
+	}
+
+	return ksz_rmw8(dev, KSZ8873_REG_GLOBAL_CTRL_12,
+			KSZ8873_DRIVE_STRENGTH_16MA, ret);
+}
+
+/**
+ * ksz_parse_drive_strength() - Extract and apply drive strength configurations
+ *				from device tree properties.
+ * @dev:	ksz device
+ *
+ * This function reads the specified drive strength properties from the
+ * device tree, validates against the supported chip variants, and sets
+ * them accordingly. An error should be critical here, as the drive strength
+ * settings are crucial for EMI compliance.
+ *
+ * Return: 0 on success, error code otherwise
+ */
+static int ksz_parse_drive_strength(struct ksz_device *dev)
+{
+	struct ksz_driver_strength_prop of_props[] = {
+		[KSZ_DRIVER_STRENGTH_HI] = {
+			.name = "microchip,hi-drive-strength-microamp",
+			.offset = SW_HI_SPEED_DRIVE_STRENGTH_S,
+			.value = -1,
+		},
+		[KSZ_DRIVER_STRENGTH_LO] = {
+			.name = "microchip,lo-drive-strength-microamp",
+			.offset = SW_LO_SPEED_DRIVE_STRENGTH_S,
+			.value = -1,
+		},
+		[KSZ_DRIVER_STRENGTH_IO] = {
+			.name = "microchip,io-drive-strength-microamp",
+			.offset = 0, /* don't care */
+			.value = -1,
+		},
+	};
+	struct device_node *np = dev->dev->of_node;
+	bool have_any_prop = false;
+	int i, ret;
+
+	for (i = 0; i < ARRAY_SIZE(of_props); i++) {
+		ret = of_property_read_u32(np, of_props[i].name,
+					   &of_props[i].value);
+		if (ret && ret != -EINVAL)
+			dev_warn(dev->dev, "Failed to read %s\n",
+				 of_props[i].name);
+		if (ret)
+			continue;
+
+		have_any_prop = true;
+	}
+
+	if (!have_any_prop)
+		return 0;
+
+	switch (dev->chip_id) {
+	case KSZ8830_CHIP_ID:
+		return ksz8830_drive_strength_write(dev, of_props,
+						    ARRAY_SIZE(of_props));
+	case KSZ8795_CHIP_ID:
+	case KSZ8794_CHIP_ID:
+	case KSZ8765_CHIP_ID:
+	case KSZ8563_CHIP_ID:
+	case KSZ9477_CHIP_ID:
+	case KSZ9563_CHIP_ID:
+	case KSZ9567_CHIP_ID:
+	case KSZ9893_CHIP_ID:
+	case KSZ9896_CHIP_ID:
+	case KSZ9897_CHIP_ID:
+		return ksz9477_drive_strength_write(dev, of_props,
+						    ARRAY_SIZE(of_props));
+	default:
+		for (i = 0; i < ARRAY_SIZE(of_props); i++) {
+			if (of_props[i].value == -1)
+				continue;
+
+			dev_warn(dev->dev, "%s is not supported by this chip variant\n",
+				 of_props[i].name);
+		}
+	}
+
+	return 0;
+}
+
 int ksz_switch_register(struct ksz_device *dev)
 {
 	const struct ksz_chip_data *info;
@@ -3612,6 +3974,10 @@ int ksz_switch_register(struct ksz_device *dev)
 	for (port_num = 0; port_num < dev->info->port_cnt; ++port_num)
 		dev->ports[port_num].interface = PHY_INTERFACE_MODE_NA;
 	if (dev->dev->of_node) {
+		ret = ksz_parse_drive_strength(dev);
+		if (ret)
+			return ret;
+
 		ret = of_get_phy_mode(dev->dev->of_node, &interface);
 		if (ret == 0)
 			dev->compat_interface = interface;
diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h
index a4de58847dea..d180c8a34e27 100644
--- a/drivers/net/dsa/microchip/ksz_common.h
+++ b/drivers/net/dsa/microchip/ksz_common.h
@@ -117,6 +117,7 @@ struct ksz_port {
 	u32 rgmii_tx_val;
 	u32 rgmii_rx_val;
 	struct ksz_device *ksz_dev;
+	void *acl_priv;
 	struct ksz_irq pirq;
 	u8 num;
 #if IS_ENABLED(CONFIG_NET_DSA_MICROCHIP_KSZ_PTP)
@@ -689,6 +690,26 @@ static inline int is_lan937x(struct ksz_device *dev)
 #define KSZ8_LEGAL_PACKET_SIZE		1518
 #define KSZ9477_MAX_FRAME_SIZE		9000
 
+#define KSZ8873_REG_GLOBAL_CTRL_12	0x0e
+/* Drive Strength of I/O Pad
+ * 0: 8mA, 1: 16mA
+ */
+#define KSZ8873_DRIVE_STRENGTH_16MA	BIT(6)
+
+#define KSZ8795_REG_SW_CTRL_20		0xa3
+#define KSZ9477_REG_SW_IO_STRENGTH	0x010d
+#define SW_DRIVE_STRENGTH_M		0x7
+#define SW_DRIVE_STRENGTH_2MA		0
+#define SW_DRIVE_STRENGTH_4MA		1
+#define SW_DRIVE_STRENGTH_8MA		2
+#define SW_DRIVE_STRENGTH_12MA		3
+#define SW_DRIVE_STRENGTH_16MA		4
+#define SW_DRIVE_STRENGTH_20MA		5
+#define SW_DRIVE_STRENGTH_24MA		6
+#define SW_DRIVE_STRENGTH_28MA		7
+#define SW_HI_SPEED_DRIVE_STRENGTH_S	4
+#define SW_LO_SPEED_DRIVE_STRENGTH_S	0
+
 #define KSZ9477_REG_PORT_OUT_RATE_0	0x0420
 #define KSZ9477_OUT_RATE_NO_LIMIT	0
 
diff --git a/drivers/net/dsa/mt7530-mmio.c b/drivers/net/dsa/mt7530-mmio.c
index 0a6a2fe34e64..b74a230a3f13 100644
--- a/drivers/net/dsa/mt7530-mmio.c
+++ b/drivers/net/dsa/mt7530-mmio.c
@@ -63,15 +63,12 @@ mt7988_probe(struct platform_device *pdev)
 	return dsa_register_switch(priv->ds);
 }
 
-static int
-mt7988_remove(struct platform_device *pdev)
+static void mt7988_remove(struct platform_device *pdev)
 {
 	struct mt7530_priv *priv = platform_get_drvdata(pdev);
 
 	if (priv)
 		mt7530_remove_common(priv);
-
-	return 0;
 }
 
 static void mt7988_shutdown(struct platform_device *pdev)
@@ -88,7 +85,7 @@ static void mt7988_shutdown(struct platform_device *pdev)
 
 static struct platform_driver mt7988_platform_driver = {
 	.probe  = mt7988_probe,
-	.remove = mt7988_remove,
+	.remove_new = mt7988_remove,
 	.shutdown = mt7988_shutdown,
 	.driver = {
 		.name = "mt7530-mmio",
diff --git a/drivers/net/dsa/mv88e6xxx/pcs-639x.c b/drivers/net/dsa/mv88e6xxx/pcs-639x.c
index ba373656bfe1..9a8429f5d09c 100644
--- a/drivers/net/dsa/mv88e6xxx/pcs-639x.c
+++ b/drivers/net/dsa/mv88e6xxx/pcs-639x.c
@@ -208,7 +208,7 @@ static void mv88e639x_sgmii_pcs_pre_config(struct phylink_pcs *pcs,
 
 static int mv88e6390_erratum_3_14(struct mv88e639x_pcs *mpcs)
 {
-	const int lanes[] = { MV88E6390_PORT9_LANE0, MV88E6390_PORT9_LANE1,
+	static const int lanes[] = { MV88E6390_PORT9_LANE0, MV88E6390_PORT9_LANE1,
 		MV88E6390_PORT9_LANE2, MV88E6390_PORT9_LANE3,
 		MV88E6390_PORT10_LANE0, MV88E6390_PORT10_LANE1,
 		MV88E6390_PORT10_LANE2, MV88E6390_PORT10_LANE3 };
diff --git a/drivers/net/dsa/ocelot/ocelot_ext.c b/drivers/net/dsa/ocelot/ocelot_ext.c
index c29bee5a5c48..22187d831c4b 100644
--- a/drivers/net/dsa/ocelot/ocelot_ext.c
+++ b/drivers/net/dsa/ocelot/ocelot_ext.c
@@ -115,19 +115,17 @@ err_free_felix:
 	return err;
 }
 
-static int ocelot_ext_remove(struct platform_device *pdev)
+static void ocelot_ext_remove(struct platform_device *pdev)
 {
 	struct felix *felix = dev_get_drvdata(&pdev->dev);
 
 	if (!felix)
-		return 0;
+		return;
 
 	dsa_unregister_switch(felix->ds);
 
 	kfree(felix->ds);
 	kfree(felix);
-
-	return 0;
 }
 
 static void ocelot_ext_shutdown(struct platform_device *pdev)
@@ -154,7 +152,7 @@ static struct platform_driver ocelot_ext_switch_driver = {
 		.of_match_table = ocelot_ext_switch_of_match,
 	},
 	.probe = ocelot_ext_probe,
-	.remove = ocelot_ext_remove,
+	.remove_new = ocelot_ext_remove,
 	.shutdown = ocelot_ext_shutdown,
 };
 module_platform_driver(ocelot_ext_switch_driver);
diff --git a/drivers/net/dsa/ocelot/seville_vsc9953.c b/drivers/net/dsa/ocelot/seville_vsc9953.c
index 8f912bda120b..049930da0521 100644
--- a/drivers/net/dsa/ocelot/seville_vsc9953.c
+++ b/drivers/net/dsa/ocelot/seville_vsc9953.c
@@ -1029,19 +1029,17 @@ err_alloc_felix:
 	return err;
 }
 
-static int seville_remove(struct platform_device *pdev)
+static void seville_remove(struct platform_device *pdev)
 {
 	struct felix *felix = platform_get_drvdata(pdev);
 
 	if (!felix)
-		return 0;
+		return;
 
 	dsa_unregister_switch(felix->ds);
 
 	kfree(felix->ds);
 	kfree(felix);
-
-	return 0;
 }
 
 static void seville_shutdown(struct platform_device *pdev)
@@ -1064,7 +1062,7 @@ MODULE_DEVICE_TABLE(of, seville_of_match);
 
 static struct platform_driver seville_vsc9953_driver = {
 	.probe		= seville_probe,
-	.remove		= seville_remove,
+	.remove_new	= seville_remove,
 	.shutdown	= seville_shutdown,
 	.driver = {
 		.name		= "mscc_seville",
diff --git a/drivers/net/dsa/realtek/realtek-smi.c b/drivers/net/dsa/realtek/realtek-smi.c
index ff13563059c5..bfd11591faf4 100644
--- a/drivers/net/dsa/realtek/realtek-smi.c
+++ b/drivers/net/dsa/realtek/realtek-smi.c
@@ -506,12 +506,12 @@ static int realtek_smi_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int realtek_smi_remove(struct platform_device *pdev)
+static void realtek_smi_remove(struct platform_device *pdev)
 {
 	struct realtek_priv *priv = platform_get_drvdata(pdev);
 
 	if (!priv)
-		return 0;
+		return;
 
 	dsa_unregister_switch(priv->ds);
 	if (priv->slave_mii_bus)
@@ -520,8 +520,6 @@ static int realtek_smi_remove(struct platform_device *pdev)
 	/* leave the device reset asserted */
 	if (priv->reset)
 		gpiod_set_value(priv->reset, 1);
-
-	return 0;
 }
 
 static void realtek_smi_shutdown(struct platform_device *pdev)
@@ -559,7 +557,7 @@ static struct platform_driver realtek_smi_driver = {
 		.of_match_table = realtek_smi_of_match,
 	},
 	.probe  = realtek_smi_probe,
-	.remove = realtek_smi_remove,
+	.remove_new = realtek_smi_remove,
 	.shutdown = realtek_smi_shutdown,
 };
 module_platform_driver(realtek_smi_driver);
diff --git a/drivers/net/dsa/realtek/rtl8366rb.c b/drivers/net/dsa/realtek/rtl8366rb.c
index 7868ef237f6c..b39b719a5b8f 100644
--- a/drivers/net/dsa/realtek/rtl8366rb.c
+++ b/drivers/net/dsa/realtek/rtl8366rb.c
@@ -95,12 +95,6 @@
 #define RTL8366RB_PAACR_RX_PAUSE	BIT(6)
 #define RTL8366RB_PAACR_AN		BIT(7)
 
-#define RTL8366RB_PAACR_CPU_PORT	(RTL8366RB_PAACR_SPEED_1000M | \
-					 RTL8366RB_PAACR_FULL_DUPLEX | \
-					 RTL8366RB_PAACR_LINK_UP | \
-					 RTL8366RB_PAACR_TX_PAUSE | \
-					 RTL8366RB_PAACR_RX_PAUSE)
-
 /* bits 0..7 = port 0, bits 8..15 = port 1 */
 #define RTL8366RB_PSTAT0		0x0014
 /* bits 0..7 = port 2, bits 8..15 = port 3 */
@@ -1081,29 +1075,61 @@ rtl8366rb_mac_link_up(struct dsa_switch *ds, int port, unsigned int mode,
 		      int speed, int duplex, bool tx_pause, bool rx_pause)
 {
 	struct realtek_priv *priv = ds->priv;
+	unsigned int val;
 	int ret;
 
+	/* Allow forcing the mode on the fixed CPU port, no autonegotiation.
+	 * We assume autonegotiation works on the PHY-facing ports.
+	 */
 	if (port != priv->cpu_port)
 		return;
 
 	dev_dbg(priv->dev, "MAC link up on CPU port (%d)\n", port);
 
-	/* Force the fixed CPU port into 1Gbit mode, no autonegotiation */
 	ret = regmap_update_bits(priv->map, RTL8366RB_MAC_FORCE_CTRL_REG,
 				 BIT(port), BIT(port));
 	if (ret) {
-		dev_err(priv->dev, "failed to force 1Gbit on CPU port\n");
+		dev_err(priv->dev, "failed to force CPU port\n");
 		return;
 	}
 
+	/* Conjure port config */
+	switch (speed) {
+	case SPEED_10:
+		val = RTL8366RB_PAACR_SPEED_10M;
+		break;
+	case SPEED_100:
+		val = RTL8366RB_PAACR_SPEED_100M;
+		break;
+	case SPEED_1000:
+		val = RTL8366RB_PAACR_SPEED_1000M;
+		break;
+	default:
+		val = RTL8366RB_PAACR_SPEED_1000M;
+		break;
+	}
+
+	if (duplex == DUPLEX_FULL)
+		val |= RTL8366RB_PAACR_FULL_DUPLEX;
+
+	if (tx_pause)
+		val |=  RTL8366RB_PAACR_TX_PAUSE;
+
+	if (rx_pause)
+		val |= RTL8366RB_PAACR_RX_PAUSE;
+
+	val |= RTL8366RB_PAACR_LINK_UP;
+
 	ret = regmap_update_bits(priv->map, RTL8366RB_PAACR2,
 				 0xFF00U,
-				 RTL8366RB_PAACR_CPU_PORT << 8);
+				 val << 8);
 	if (ret) {
 		dev_err(priv->dev, "failed to set PAACR on CPU port\n");
 		return;
 	}
 
+	dev_dbg(priv->dev, "set PAACR to %04x\n", val);
+
 	/* Enable the CPU port */
 	ret = regmap_update_bits(priv->map, RTL8366RB_PECR, BIT(port),
 				 0);
diff --git a/drivers/net/dsa/rzn1_a5psw.c b/drivers/net/dsa/rzn1_a5psw.c
index 2eda10b33f2e..10092ea85e46 100644
--- a/drivers/net/dsa/rzn1_a5psw.c
+++ b/drivers/net/dsa/rzn1_a5psw.c
@@ -1272,19 +1272,17 @@ free_pcs:
 	return ret;
 }
 
-static int a5psw_remove(struct platform_device *pdev)
+static void a5psw_remove(struct platform_device *pdev)
 {
 	struct a5psw *a5psw = platform_get_drvdata(pdev);
 
 	if (!a5psw)
-		return 0;
+		return;
 
 	dsa_unregister_switch(&a5psw->ds);
 	a5psw_pcs_free(a5psw);
 	clk_disable_unprepare(a5psw->hclk);
 	clk_disable_unprepare(a5psw->clk);
-
-	return 0;
 }
 
 static void a5psw_shutdown(struct platform_device *pdev)
@@ -1311,7 +1309,7 @@ static struct platform_driver a5psw_driver = {
 		.of_match_table = a5psw_of_mtable,
 	},
 	.probe = a5psw_probe,
-	.remove = a5psw_remove,
+	.remove_new = a5psw_remove,
 	.shutdown = a5psw_shutdown,
 };
 module_platform_driver(a5psw_driver);
diff --git a/drivers/net/dsa/sja1105/sja1105_clocking.c b/drivers/net/dsa/sja1105/sja1105_clocking.c
index e3699f76f6d7..08a3e7b96254 100644
--- a/drivers/net/dsa/sja1105/sja1105_clocking.c
+++ b/drivers/net/dsa/sja1105/sja1105_clocking.c
@@ -153,14 +153,14 @@ static int sja1105_cgu_mii_tx_clk_config(struct sja1105_private *priv,
 {
 	const struct sja1105_regs *regs = priv->info->regs;
 	struct sja1105_cgu_mii_ctrl mii_tx_clk;
-	const int mac_clk_sources[] = {
+	static const int mac_clk_sources[] = {
 		CLKSRC_MII0_TX_CLK,
 		CLKSRC_MII1_TX_CLK,
 		CLKSRC_MII2_TX_CLK,
 		CLKSRC_MII3_TX_CLK,
 		CLKSRC_MII4_TX_CLK,
 	};
-	const int phy_clk_sources[] = {
+	static const int phy_clk_sources[] = {
 		CLKSRC_IDIV0,
 		CLKSRC_IDIV1,
 		CLKSRC_IDIV2,
@@ -194,7 +194,7 @@ sja1105_cgu_mii_rx_clk_config(struct sja1105_private *priv, int port)
 	const struct sja1105_regs *regs = priv->info->regs;
 	struct sja1105_cgu_mii_ctrl mii_rx_clk;
 	u8 packed_buf[SJA1105_SIZE_CGU_CMD] = {0};
-	const int clk_sources[] = {
+	static const int clk_sources[] = {
 		CLKSRC_MII0_RX_CLK,
 		CLKSRC_MII1_RX_CLK,
 		CLKSRC_MII2_RX_CLK,
@@ -221,7 +221,7 @@ sja1105_cgu_mii_ext_tx_clk_config(struct sja1105_private *priv, int port)
 	const struct sja1105_regs *regs = priv->info->regs;
 	struct sja1105_cgu_mii_ctrl mii_ext_tx_clk;
 	u8 packed_buf[SJA1105_SIZE_CGU_CMD] = {0};
-	const int clk_sources[] = {
+	static const int clk_sources[] = {
 		CLKSRC_IDIV0,
 		CLKSRC_IDIV1,
 		CLKSRC_IDIV2,
@@ -248,7 +248,7 @@ sja1105_cgu_mii_ext_rx_clk_config(struct sja1105_private *priv, int port)
 	const struct sja1105_regs *regs = priv->info->regs;
 	struct sja1105_cgu_mii_ctrl mii_ext_rx_clk;
 	u8 packed_buf[SJA1105_SIZE_CGU_CMD] = {0};
-	const int clk_sources[] = {
+	static const int clk_sources[] = {
 		CLKSRC_IDIV0,
 		CLKSRC_IDIV1,
 		CLKSRC_IDIV2,
@@ -349,8 +349,13 @@ static int sja1105_cgu_rgmii_tx_clk_config(struct sja1105_private *priv,
 	if (speed == priv->info->port_speed[SJA1105_SPEED_1000MBPS]) {
 		clksrc = CLKSRC_PLL0;
 	} else {
-		int clk_sources[] = {CLKSRC_IDIV0, CLKSRC_IDIV1, CLKSRC_IDIV2,
-				     CLKSRC_IDIV3, CLKSRC_IDIV4};
+		static const int clk_sources[] = {
+			CLKSRC_IDIV0,
+			CLKSRC_IDIV1,
+			CLKSRC_IDIV2,
+			CLKSRC_IDIV3,
+			CLKSRC_IDIV4,
+		};
 		clksrc = clk_sources[port];
 	}
 
@@ -638,7 +643,7 @@ static int sja1105_cgu_rmii_ref_clk_config(struct sja1105_private *priv,
 	const struct sja1105_regs *regs = priv->info->regs;
 	struct sja1105_cgu_mii_ctrl ref_clk;
 	u8 packed_buf[SJA1105_SIZE_CGU_CMD] = {0};
-	const int clk_sources[] = {
+	static const int clk_sources[] = {
 		CLKSRC_MII0_TX_CLK,
 		CLKSRC_MII1_TX_CLK,
 		CLKSRC_MII2_TX_CLK,
diff --git a/drivers/net/dsa/vitesse-vsc73xx-platform.c b/drivers/net/dsa/vitesse-vsc73xx-platform.c
index bd4206e8f9af..755b7895a15a 100644
--- a/drivers/net/dsa/vitesse-vsc73xx-platform.c
+++ b/drivers/net/dsa/vitesse-vsc73xx-platform.c
@@ -112,16 +112,14 @@ static int vsc73xx_platform_probe(struct platform_device *pdev)
 	return vsc73xx_probe(&vsc_platform->vsc);
 }
 
-static int vsc73xx_platform_remove(struct platform_device *pdev)
+static void vsc73xx_platform_remove(struct platform_device *pdev)
 {
 	struct vsc73xx_platform *vsc_platform = platform_get_drvdata(pdev);
 
 	if (!vsc_platform)
-		return 0;
+		return;
 
 	vsc73xx_remove(&vsc_platform->vsc);
-
-	return 0;
 }
 
 static void vsc73xx_platform_shutdown(struct platform_device *pdev)
@@ -160,7 +158,7 @@ MODULE_DEVICE_TABLE(of, vsc73xx_of_match);
 
 static struct platform_driver vsc73xx_platform_driver = {
 	.probe = vsc73xx_platform_probe,
-	.remove = vsc73xx_platform_remove,
+	.remove_new = vsc73xx_platform_remove,
 	.shutdown = vsc73xx_platform_shutdown,
 	.driver = {
 		.name = "vsc73xx-platform",
diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c
index af603256b724..2874680ef24d 100644
--- a/drivers/net/ethernet/8390/ax88796.c
+++ b/drivers/net/ethernet/8390/ax88796.c
@@ -811,7 +811,7 @@ static int ax_init_dev(struct net_device *dev)
 	return ret;
 }
 
-static int ax_remove(struct platform_device *pdev)
+static void ax_remove(struct platform_device *pdev)
 {
 	struct net_device *dev = platform_get_drvdata(pdev);
 	struct ei_device *ei_local = netdev_priv(dev);
@@ -832,8 +832,6 @@ static int ax_remove(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, NULL);
 	free_netdev(dev);
-
-	return 0;
 }
 
 /*
@@ -1011,7 +1009,7 @@ static struct platform_driver axdrv = {
 		.name		= "ax88796",
 	},
 	.probe		= ax_probe,
-	.remove		= ax_remove,
+	.remove_new	= ax_remove,
 	.suspend	= ax_suspend,
 	.resume		= ax_resume,
 };
diff --git a/drivers/net/ethernet/8390/mcf8390.c b/drivers/net/ethernet/8390/mcf8390.c
index 217838b28220..5a0fa995e643 100644
--- a/drivers/net/ethernet/8390/mcf8390.c
+++ b/drivers/net/ethernet/8390/mcf8390.c
@@ -441,7 +441,7 @@ static int mcf8390_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int mcf8390_remove(struct platform_device *pdev)
+static void mcf8390_remove(struct platform_device *pdev)
 {
 	struct net_device *dev = platform_get_drvdata(pdev);
 	struct resource *mem;
@@ -450,7 +450,6 @@ static int mcf8390_remove(struct platform_device *pdev)
 	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	release_mem_region(mem->start, resource_size(mem));
 	free_netdev(dev);
-	return 0;
 }
 
 static struct platform_driver mcf8390_drv = {
@@ -458,7 +457,7 @@ static struct platform_driver mcf8390_drv = {
 		.name	= "mcf8390",
 	},
 	.probe		= mcf8390_probe,
-	.remove		= mcf8390_remove,
+	.remove_new	= mcf8390_remove,
 };
 
 module_platform_driver(mcf8390_drv);
diff --git a/drivers/net/ethernet/8390/ne.c b/drivers/net/ethernet/8390/ne.c
index 7d89ec1cf273..350683a09d2e 100644
--- a/drivers/net/ethernet/8390/ne.c
+++ b/drivers/net/ethernet/8390/ne.c
@@ -823,7 +823,7 @@ static int __init ne_drv_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int ne_drv_remove(struct platform_device *pdev)
+static void ne_drv_remove(struct platform_device *pdev)
 {
 	struct net_device *dev = platform_get_drvdata(pdev);
 
@@ -842,7 +842,6 @@ static int ne_drv_remove(struct platform_device *pdev)
 		release_region(dev->base_addr, NE_IO_EXTENT);
 		free_netdev(dev);
 	}
-	return 0;
 }
 
 /* Remove unused devices or all if true. */
@@ -895,7 +894,7 @@ static int ne_drv_resume(struct platform_device *pdev)
 #endif
 
 static struct platform_driver ne_driver = {
-	.remove		= ne_drv_remove,
+	.remove_new	= ne_drv_remove,
 	.suspend	= ne_drv_suspend,
 	.resume		= ne_drv_resume,
 	.driver		= {
diff --git a/drivers/net/ethernet/actions/owl-emac.c b/drivers/net/ethernet/actions/owl-emac.c
index c6f8f852bff1..e03193da5874 100644
--- a/drivers/net/ethernet/actions/owl-emac.c
+++ b/drivers/net/ethernet/actions/owl-emac.c
@@ -1582,15 +1582,13 @@ static int owl_emac_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int owl_emac_remove(struct platform_device *pdev)
+static void owl_emac_remove(struct platform_device *pdev)
 {
 	struct owl_emac_priv *priv = platform_get_drvdata(pdev);
 
 	netif_napi_del(&priv->napi);
 	phy_disconnect(priv->netdev->phydev);
 	cancel_work_sync(&priv->mac_reset_task);
-
-	return 0;
 }
 
 static const struct of_device_id owl_emac_of_match[] = {
@@ -1609,7 +1607,7 @@ static struct platform_driver owl_emac_driver = {
 		.pm = &owl_emac_pm_ops,
 	},
 	.probe = owl_emac_probe,
-	.remove = owl_emac_remove,
+	.remove_new = owl_emac_remove,
 };
 module_platform_driver(owl_emac_driver);
 
diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c
index 597a02c75d52..27af7746d645 100644
--- a/drivers/net/ethernet/aeroflex/greth.c
+++ b/drivers/net/ethernet/aeroflex/greth.c
@@ -1525,7 +1525,7 @@ error1:
 	return err;
 }
 
-static int greth_of_remove(struct platform_device *of_dev)
+static void greth_of_remove(struct platform_device *of_dev)
 {
 	struct net_device *ndev = platform_get_drvdata(of_dev);
 	struct greth_private *greth = netdev_priv(ndev);
@@ -1544,8 +1544,6 @@ static int greth_of_remove(struct platform_device *of_dev)
 	of_iounmap(&of_dev->resource[0], greth->regs, resource_size(&of_dev->resource[0]));
 
 	free_netdev(ndev);
-
-	return 0;
 }
 
 static const struct of_device_id greth_of_match[] = {
@@ -1566,7 +1564,7 @@ static struct platform_driver greth_of_driver = {
 		.of_match_table = greth_of_match,
 	},
 	.probe = greth_of_probe,
-	.remove = greth_of_remove,
+	.remove_new = greth_of_remove,
 };
 
 module_platform_driver(greth_of_driver);
diff --git a/drivers/net/ethernet/allwinner/sun4i-emac.c b/drivers/net/ethernet/allwinner/sun4i-emac.c
index a94c62956eed..d761c08fe5c1 100644
--- a/drivers/net/ethernet/allwinner/sun4i-emac.c
+++ b/drivers/net/ethernet/allwinner/sun4i-emac.c
@@ -1083,7 +1083,7 @@ out:
 	return ret;
 }
 
-static int emac_remove(struct platform_device *pdev)
+static void emac_remove(struct platform_device *pdev)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct emac_board_info *db = netdev_priv(ndev);
@@ -1101,7 +1101,6 @@ static int emac_remove(struct platform_device *pdev)
 	free_netdev(ndev);
 
 	dev_dbg(&pdev->dev, "released and freed device\n");
-	return 0;
 }
 
 static int emac_suspend(struct platform_device *dev, pm_message_t state)
@@ -1143,7 +1142,7 @@ static struct platform_driver emac_driver = {
 		.of_match_table = emac_of_match,
 	},
 	.probe = emac_probe,
-	.remove = emac_remove,
+	.remove_new = emac_remove,
 	.suspend = emac_suspend,
 	.resume = emac_resume,
 };
diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c
index 2e15800e5310..1b1799985d1d 100644
--- a/drivers/net/ethernet/altera/altera_tse_main.c
+++ b/drivers/net/ethernet/altera/altera_tse_main.c
@@ -1464,7 +1464,7 @@ err_free_netdev:
 
 /* Remove Altera TSE MAC device
  */
-static int altera_tse_remove(struct platform_device *pdev)
+static void altera_tse_remove(struct platform_device *pdev)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct altera_tse_private *priv = netdev_priv(ndev);
@@ -1476,8 +1476,6 @@ static int altera_tse_remove(struct platform_device *pdev)
 	lynx_pcs_destroy(priv->pcs);
 
 	free_netdev(ndev);
-
-	return 0;
 }
 
 static const struct altera_dmaops altera_dtype_sgdma = {
@@ -1528,7 +1526,7 @@ MODULE_DEVICE_TABLE(of, altera_tse_ids);
 
 static struct platform_driver altera_tse_driver = {
 	.probe		= altera_tse_probe,
-	.remove		= altera_tse_remove,
+	.remove_new	= altera_tse_remove,
 	.suspend	= NULL,
 	.resume		= NULL,
 	.driver		= {
diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c
index c5cec4e79489..85c978149bf6 100644
--- a/drivers/net/ethernet/amd/au1000_eth.c
+++ b/drivers/net/ethernet/amd/au1000_eth.c
@@ -1323,7 +1323,7 @@ out:
 	return err;
 }
 
-static int au1000_remove(struct platform_device *pdev)
+static void au1000_remove(struct platform_device *pdev)
 {
 	struct net_device *dev = platform_get_drvdata(pdev);
 	struct au1000_private *aup = netdev_priv(dev);
@@ -1359,13 +1359,11 @@ static int au1000_remove(struct platform_device *pdev)
 	release_mem_region(macen->start, resource_size(macen));
 
 	free_netdev(dev);
-
-	return 0;
 }
 
 static struct platform_driver au1000_eth_driver = {
 	.probe  = au1000_probe,
-	.remove = au1000_remove,
+	.remove_new = au1000_remove,
 	.driver = {
 		.name   = "au1000-eth",
 	},
diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c
index 36f9b932b9e2..2a8643e167e1 100644
--- a/drivers/net/ethernet/amd/pds_core/core.c
+++ b/drivers/net/ethernet/amd/pds_core/core.c
@@ -445,12 +445,13 @@ int pdsc_setup(struct pdsc *pdsc, bool init)
 		goto err_out_teardown;
 
 	/* Set up the VIFs */
-	err = pdsc_viftypes_init(pdsc);
-	if (err)
-		goto err_out_teardown;
+	if (init) {
+		err = pdsc_viftypes_init(pdsc);
+		if (err)
+			goto err_out_teardown;
 
-	if (init)
 		pdsc_debugfs_add_viftype(pdsc);
+	}
 
 	clear_bit(PDSC_S_FW_DEAD, &pdsc->state);
 	return 0;
@@ -469,8 +470,10 @@ void pdsc_teardown(struct pdsc *pdsc, bool removing)
 	pdsc_qcq_free(pdsc, &pdsc->notifyqcq);
 	pdsc_qcq_free(pdsc, &pdsc->adminqcq);
 
-	kfree(pdsc->viftype_status);
-	pdsc->viftype_status = NULL;
+	if (removing) {
+		kfree(pdsc->viftype_status);
+		pdsc->viftype_status = NULL;
+	}
 
 	if (pdsc->intr_info) {
 		for (i = 0; i < pdsc->nintrs; i++)
@@ -512,7 +515,7 @@ void pdsc_stop(struct pdsc *pdsc)
 					   PDS_CORE_INTR_MASK_SET);
 }
 
-static void pdsc_fw_down(struct pdsc *pdsc)
+void pdsc_fw_down(struct pdsc *pdsc)
 {
 	union pds_core_notifyq_comp reset_event = {
 		.reset.ecode = cpu_to_le16(PDS_EVENT_RESET),
@@ -520,10 +523,13 @@ static void pdsc_fw_down(struct pdsc *pdsc)
 	};
 
 	if (test_and_set_bit(PDSC_S_FW_DEAD, &pdsc->state)) {
-		dev_err(pdsc->dev, "%s: already happening\n", __func__);
+		dev_warn(pdsc->dev, "%s: already happening\n", __func__);
 		return;
 	}
 
+	if (pdsc->pdev->is_virtfn)
+		return;
+
 	/* Notify clients of fw_down */
 	if (pdsc->fw_reporter)
 		devlink_health_report(pdsc->fw_reporter, "FW down reported", pdsc);
@@ -533,7 +539,7 @@ static void pdsc_fw_down(struct pdsc *pdsc)
 	pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY);
 }
 
-static void pdsc_fw_up(struct pdsc *pdsc)
+void pdsc_fw_up(struct pdsc *pdsc)
 {
 	union pds_core_notifyq_comp reset_event = {
 		.reset.ecode = cpu_to_le16(PDS_EVENT_RESET),
@@ -546,6 +552,11 @@ static void pdsc_fw_up(struct pdsc *pdsc)
 		return;
 	}
 
+	if (pdsc->pdev->is_virtfn) {
+		clear_bit(PDSC_S_FW_DEAD, &pdsc->state);
+		return;
+	}
+
 	err = pdsc_setup(pdsc, PDSC_SETUP_RECOVERY);
 	if (err)
 		goto err_out;
@@ -567,6 +578,18 @@ err_out:
 	pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY);
 }
 
+static void pdsc_check_pci_health(struct pdsc *pdsc)
+{
+	u8 fw_status = ioread8(&pdsc->info_regs->fw_status);
+
+	/* is PCI broken? */
+	if (fw_status != PDS_RC_BAD_PCI)
+		return;
+
+	pdsc_reset_prepare(pdsc->pdev);
+	pdsc_reset_done(pdsc->pdev);
+}
+
 void pdsc_health_thread(struct work_struct *work)
 {
 	struct pdsc *pdsc = container_of(work, struct pdsc, health_work);
@@ -593,6 +616,8 @@ void pdsc_health_thread(struct work_struct *work)
 			pdsc_fw_down(pdsc);
 	}
 
+	pdsc_check_pci_health(pdsc);
+
 	pdsc->fw_generation = pdsc->fw_status & PDS_CORE_FW_STS_F_GENERATION;
 
 out_unlock:
diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h
index e545fafc4819..f3a7deda9972 100644
--- a/drivers/net/ethernet/amd/pds_core/core.h
+++ b/drivers/net/ethernet/amd/pds_core/core.h
@@ -283,6 +283,9 @@ int pdsc_devcmd_reset(struct pdsc *pdsc);
 int pdsc_dev_reinit(struct pdsc *pdsc);
 int pdsc_dev_init(struct pdsc *pdsc);
 
+void pdsc_reset_prepare(struct pci_dev *pdev);
+void pdsc_reset_done(struct pci_dev *pdev);
+
 int pdsc_intr_alloc(struct pdsc *pdsc, char *name,
 		    irq_handler_t handler, void *data);
 void pdsc_intr_free(struct pdsc *pdsc, int index);
@@ -309,4 +312,8 @@ irqreturn_t pdsc_adminq_isr(int irq, void *data);
 
 int pdsc_firmware_update(struct pdsc *pdsc, const struct firmware *fw,
 			 struct netlink_ext_ack *extack);
+
+void pdsc_fw_down(struct pdsc *pdsc);
+void pdsc_fw_up(struct pdsc *pdsc);
+
 #endif /* _PDSC_H_ */
diff --git a/drivers/net/ethernet/amd/pds_core/dev.c b/drivers/net/ethernet/amd/pds_core/dev.c
index f77cd9f5a2fd..7c1b965d61a9 100644
--- a/drivers/net/ethernet/amd/pds_core/dev.c
+++ b/drivers/net/ethernet/amd/pds_core/dev.c
@@ -42,6 +42,8 @@ int pdsc_err_to_errno(enum pds_core_status_code code)
 		return -ERANGE;
 	case PDS_RC_BAD_ADDR:
 		return -EFAULT;
+	case PDS_RC_BAD_PCI:
+		return -ENXIO;
 	case PDS_RC_EOPCODE:
 	case PDS_RC_EINTR:
 	case PDS_RC_DEV_CMD:
@@ -62,7 +64,7 @@ bool pdsc_is_fw_running(struct pdsc *pdsc)
 	/* Firmware is useful only if the running bit is set and
 	 * fw_status != 0xff (bad PCI read)
 	 */
-	return (pdsc->fw_status != 0xff) &&
+	return (pdsc->fw_status != PDS_RC_BAD_PCI) &&
 		(pdsc->fw_status & PDS_CORE_FW_STS_F_RUNNING);
 }
 
@@ -128,6 +130,7 @@ static int pdsc_devcmd_wait(struct pdsc *pdsc, u8 opcode, int max_seconds)
 	unsigned long max_wait;
 	unsigned long duration;
 	int timeout = 0;
+	bool running;
 	int done = 0;
 	int err = 0;
 	int status;
@@ -136,6 +139,10 @@ static int pdsc_devcmd_wait(struct pdsc *pdsc, u8 opcode, int max_seconds)
 	max_wait = start_time + (max_seconds * HZ);
 
 	while (!done && !timeout) {
+		running = pdsc_is_fw_running(pdsc);
+		if (!running)
+			break;
+
 		done = pdsc_devcmd_done(pdsc);
 		if (done)
 			break;
@@ -152,7 +159,7 @@ static int pdsc_devcmd_wait(struct pdsc *pdsc, u8 opcode, int max_seconds)
 		dev_dbg(dev, "DEVCMD %d %s after %ld secs\n",
 			opcode, pdsc_devcmd_str(opcode), duration / HZ);
 
-	if (!done || timeout) {
+	if ((!done || timeout) && running) {
 		dev_err(dev, "DEVCMD %d %s timeout, done %d timeout %d max_seconds=%d\n",
 			opcode, pdsc_devcmd_str(opcode), done, timeout,
 			max_seconds);
diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c
index 3a45bf474a19..3080898d7b95 100644
--- a/drivers/net/ethernet/amd/pds_core/main.c
+++ b/drivers/net/ethernet/amd/pds_core/main.c
@@ -445,12 +445,62 @@ static void pdsc_remove(struct pci_dev *pdev)
 	devlink_free(dl);
 }
 
+void pdsc_reset_prepare(struct pci_dev *pdev)
+{
+	struct pdsc *pdsc = pci_get_drvdata(pdev);
+
+	pdsc_fw_down(pdsc);
+
+	pci_free_irq_vectors(pdev);
+	pdsc_unmap_bars(pdsc);
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+}
+
+void pdsc_reset_done(struct pci_dev *pdev)
+{
+	struct pdsc *pdsc = pci_get_drvdata(pdev);
+	struct device *dev = pdsc->dev;
+	int err;
+
+	err = pci_enable_device(pdev);
+	if (err) {
+		dev_err(dev, "Cannot enable PCI device: %pe\n", ERR_PTR(err));
+		return;
+	}
+	pci_set_master(pdev);
+
+	if (!pdev->is_virtfn) {
+		pcie_print_link_status(pdsc->pdev);
+
+		err = pci_request_regions(pdsc->pdev, PDS_CORE_DRV_NAME);
+		if (err) {
+			dev_err(pdsc->dev, "Cannot request PCI regions: %pe\n",
+				ERR_PTR(err));
+			return;
+		}
+
+		err = pdsc_map_bars(pdsc);
+		if (err)
+			return;
+	}
+
+	pdsc_fw_up(pdsc);
+}
+
+static const struct pci_error_handlers pdsc_err_handler = {
+	/* FLR handling */
+	.reset_prepare      = pdsc_reset_prepare,
+	.reset_done         = pdsc_reset_done,
+};
+
 static struct pci_driver pdsc_driver = {
 	.name = PDS_CORE_DRV_NAME,
 	.id_table = pdsc_id_table,
 	.probe = pdsc_probe,
 	.remove = pdsc_remove,
 	.sriov_configure = pdsc_sriov_configure,
+	.err_handler = &pdsc_err_handler,
 };
 
 void *pdsc_get_pf_struct(struct pci_dev *vf_pdev)
diff --git a/drivers/net/ethernet/amd/sunlance.c b/drivers/net/ethernet/amd/sunlance.c
index 33bb539ad70a..c78706d21a6a 100644
--- a/drivers/net/ethernet/amd/sunlance.c
+++ b/drivers/net/ethernet/amd/sunlance.c
@@ -1487,7 +1487,7 @@ static int sunlance_sbus_probe(struct platform_device *op)
 	return err;
 }
 
-static int sunlance_sbus_remove(struct platform_device *op)
+static void sunlance_sbus_remove(struct platform_device *op)
 {
 	struct lance_private *lp = platform_get_drvdata(op);
 	struct net_device *net_dev = lp->dev;
@@ -1497,8 +1497,6 @@ static int sunlance_sbus_remove(struct platform_device *op)
 	lance_free_hwresources(lp);
 
 	free_netdev(net_dev);
-
-	return 0;
 }
 
 static const struct of_device_id sunlance_sbus_match[] = {
@@ -1516,7 +1514,7 @@ static struct platform_driver sunlance_sbus_driver = {
 		.of_match_table = sunlance_sbus_match,
 	},
 	.probe		= sunlance_sbus_probe,
-	.remove		= sunlance_sbus_remove,
+	.remove_new	= sunlance_sbus_remove,
 };
 
 module_platform_driver(sunlance_sbus_driver);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c
index 4d790a89fe77..91842a5e161b 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c
@@ -512,7 +512,7 @@ err_alloc:
 	return ret;
 }
 
-static int xgbe_platform_remove(struct platform_device *pdev)
+static void xgbe_platform_remove(struct platform_device *pdev)
 {
 	struct xgbe_prv_data *pdata = platform_get_drvdata(pdev);
 
@@ -521,8 +521,6 @@ static int xgbe_platform_remove(struct platform_device *pdev)
 	platform_device_put(pdata->phy_platdev);
 
 	xgbe_free_pdata(pdata);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -615,7 +613,7 @@ static struct platform_driver xgbe_driver = {
 		.pm = &xgbe_platform_pm_ops,
 	},
 	.probe = xgbe_platform_probe,
-	.remove = xgbe_platform_remove,
+	.remove_new = xgbe_platform_remove,
 };
 
 int xgbe_platform_init(void)
diff --git a/drivers/net/ethernet/apm/xgene-v2/main.c b/drivers/net/ethernet/apm/xgene-v2/main.c
index 379d19d18dbe..9e90c2381491 100644
--- a/drivers/net/ethernet/apm/xgene-v2/main.c
+++ b/drivers/net/ethernet/apm/xgene-v2/main.c
@@ -690,7 +690,7 @@ err:
 	return ret;
 }
 
-static int xge_remove(struct platform_device *pdev)
+static void xge_remove(struct platform_device *pdev)
 {
 	struct xge_pdata *pdata;
 	struct net_device *ndev;
@@ -706,8 +706,6 @@ static int xge_remove(struct platform_device *pdev)
 	xge_mdio_remove(ndev);
 	unregister_netdev(ndev);
 	free_netdev(ndev);
-
-	return 0;
 }
 
 static void xge_shutdown(struct platform_device *pdev)
@@ -736,7 +734,7 @@ static struct platform_driver xge_driver = {
 		   .acpi_match_table = ACPI_PTR(xge_acpi_match),
 	},
 	.probe = xge_probe,
-	.remove = xge_remove,
+	.remove_new = xge_remove,
 	.shutdown = xge_shutdown,
 };
 module_platform_driver(xge_driver);
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
index f3305c434c95..91b4ab31f233 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
@@ -2127,7 +2127,7 @@ err:
 	return ret;
 }
 
-static int xgene_enet_remove(struct platform_device *pdev)
+static void xgene_enet_remove(struct platform_device *pdev)
 {
 	struct xgene_enet_pdata *pdata;
 	struct net_device *ndev;
@@ -2149,8 +2149,6 @@ static int xgene_enet_remove(struct platform_device *pdev)
 	xgene_enet_delete_desc_rings(pdata);
 	pdata->port_ops->shutdown(pdata);
 	free_netdev(ndev);
-
-	return 0;
 }
 
 static void xgene_enet_shutdown(struct platform_device *pdev)
@@ -2174,7 +2172,7 @@ static struct platform_driver xgene_enet_driver = {
 		   .acpi_match_table = ACPI_PTR(xgene_enet_acpi_match),
 	},
 	.probe = xgene_enet_probe,
-	.remove = xgene_enet_remove,
+	.remove_new = xgene_enet_remove,
 	.shutdown = xgene_enet_shutdown,
 };
 
diff --git a/drivers/net/ethernet/apple/macmace.c b/drivers/net/ethernet/apple/macmace.c
index 8775c3234e91..766ab78256fe 100644
--- a/drivers/net/ethernet/apple/macmace.c
+++ b/drivers/net/ethernet/apple/macmace.c
@@ -739,7 +739,7 @@ MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Macintosh MACE ethernet driver");
 MODULE_ALIAS("platform:macmace");
 
-static int mac_mace_device_remove(struct platform_device *pdev)
+static void mac_mace_device_remove(struct platform_device *pdev)
 {
 	struct net_device *dev = platform_get_drvdata(pdev);
 	struct mace_data *mp = netdev_priv(dev);
@@ -755,13 +755,11 @@ static int mac_mace_device_remove(struct platform_device *pdev)
 	                  mp->tx_ring, mp->tx_ring_phys);
 
 	free_netdev(dev);
-
-	return 0;
 }
 
 static struct platform_driver mac_mace_driver = {
 	.probe  = mace_probe,
-	.remove = mac_mace_device_remove,
+	.remove_new = mac_mace_device_remove,
 	.driver	= {
 		.name	= mac_mace_string,
 	},
diff --git a/drivers/net/ethernet/arc/emac_arc.c b/drivers/net/ethernet/arc/emac_arc.c
index ce3147e886a1..a3afddb23ee8 100644
--- a/drivers/net/ethernet/arc/emac_arc.c
+++ b/drivers/net/ethernet/arc/emac_arc.c
@@ -58,14 +58,12 @@ out_netdev:
 	return err;
 }
 
-static int emac_arc_remove(struct platform_device *pdev)
+static void emac_arc_remove(struct platform_device *pdev)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
 
 	arc_emac_remove(ndev);
 	free_netdev(ndev);
-
-	return 0;
 }
 
 static const struct of_device_id emac_arc_dt_ids[] = {
@@ -76,7 +74,7 @@ MODULE_DEVICE_TABLE(of, emac_arc_dt_ids);
 
 static struct platform_driver emac_arc_driver = {
 	.probe = emac_arc_probe,
-	.remove = emac_arc_remove,
+	.remove_new = emac_arc_remove,
 	.driver = {
 		.name = DRV_NAME,
 		.of_match_table  = emac_arc_dt_ids,
diff --git a/drivers/net/ethernet/arc/emac_rockchip.c b/drivers/net/ethernet/arc/emac_rockchip.c
index 509101112279..493d6356c8ca 100644
--- a/drivers/net/ethernet/arc/emac_rockchip.c
+++ b/drivers/net/ethernet/arc/emac_rockchip.c
@@ -244,7 +244,7 @@ out_netdev:
 	return err;
 }
 
-static int emac_rockchip_remove(struct platform_device *pdev)
+static void emac_rockchip_remove(struct platform_device *pdev)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct rockchip_priv_data *priv = netdev_priv(ndev);
@@ -260,12 +260,11 @@ static int emac_rockchip_remove(struct platform_device *pdev)
 		clk_disable_unprepare(priv->macclk);
 
 	free_netdev(ndev);
-	return 0;
 }
 
 static struct platform_driver emac_rockchip_driver = {
 	.probe = emac_rockchip_probe,
-	.remove = emac_rockchip_remove,
+	.remove_new = emac_rockchip_remove,
 	.driver = {
 		.name = DRV_NAME,
 		.of_match_table  = emac_rockchip_dt_ids,
diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c
index 009e0b3066fa..0f2f400b5bc4 100644
--- a/drivers/net/ethernet/atheros/ag71xx.c
+++ b/drivers/net/ethernet/atheros/ag71xx.c
@@ -1968,21 +1968,19 @@ err_put_clk:
 	return err;
 }
 
-static int ag71xx_remove(struct platform_device *pdev)
+static void ag71xx_remove(struct platform_device *pdev)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct ag71xx *ag;
 
 	if (!ndev)
-		return 0;
+		return;
 
 	ag = netdev_priv(ndev);
 	unregister_netdev(ndev);
 	ag71xx_mdio_remove(ag);
 	clk_disable_unprepare(ag->clk_eth);
 	platform_set_drvdata(pdev, NULL);
-
-	return 0;
 }
 
 static const u32 ar71xx_fifo_ar7100[] = {
@@ -2069,7 +2067,7 @@ static const struct of_device_id ag71xx_match[] = {
 
 static struct platform_driver ag71xx_driver = {
 	.probe		= ag71xx_probe,
-	.remove		= ag71xx_remove,
+	.remove_new	= ag71xx_remove,
 	.driver = {
 		.name	= "ag71xx",
 		.of_match_table = ag71xx_match,
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c.h b/drivers/net/ethernet/atheros/atl1c/atl1c.h
index 43d821fe7a54..63ba64dbb731 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c.h
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c.h
@@ -504,15 +504,12 @@ struct atl1c_rrd_ring {
 	u16 next_to_use;
 	u16 next_to_clean;
 	struct napi_struct napi;
-	struct page *rx_page;
-	unsigned int rx_page_offset;
 };
 
 /* board specific private data structure */
 struct atl1c_adapter {
 	struct net_device   *netdev;
 	struct pci_dev      *pdev;
-	unsigned int	    rx_frag_size;
 	struct atl1c_hw        hw;
 	struct atl1c_hw_stats  hw_stats;
 	struct mii_if_info  mii;    /* MII interface info */
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index 940c5d1ff9cf..74b78164cf74 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -483,15 +483,10 @@ static int atl1c_set_mac_addr(struct net_device *netdev, void *p)
 static void atl1c_set_rxbufsize(struct atl1c_adapter *adapter,
 				struct net_device *dev)
 {
-	unsigned int head_size;
 	int mtu = dev->mtu;
 
 	adapter->rx_buffer_len = mtu > AT_RX_BUF_SIZE ?
 		roundup(mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN, 8) : AT_RX_BUF_SIZE;
-
-	head_size = SKB_DATA_ALIGN(adapter->rx_buffer_len + NET_SKB_PAD + NET_IP_ALIGN) +
-		    SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
-	adapter->rx_frag_size = roundup_pow_of_two(head_size);
 }
 
 static netdev_features_t atl1c_fix_features(struct net_device *netdev,
@@ -964,7 +959,6 @@ static void atl1c_init_ring_ptrs(struct atl1c_adapter *adapter)
 static void atl1c_free_ring_resources(struct atl1c_adapter *adapter)
 {
 	struct pci_dev *pdev = adapter->pdev;
-	int i;
 
 	dma_free_coherent(&pdev->dev, adapter->ring_header.size,
 			  adapter->ring_header.desc, adapter->ring_header.dma);
@@ -977,12 +971,6 @@ static void atl1c_free_ring_resources(struct atl1c_adapter *adapter)
 		kfree(adapter->tpd_ring[0].buffer_info);
 		adapter->tpd_ring[0].buffer_info = NULL;
 	}
-	for (i = 0; i < adapter->rx_queue_count; ++i) {
-		if (adapter->rrd_ring[i].rx_page) {
-			put_page(adapter->rrd_ring[i].rx_page);
-			adapter->rrd_ring[i].rx_page = NULL;
-		}
-	}
 }
 
 /**
@@ -1754,48 +1742,11 @@ static inline void atl1c_rx_checksum(struct atl1c_adapter *adapter,
 	skb_checksum_none_assert(skb);
 }
 
-static struct sk_buff *atl1c_alloc_skb(struct atl1c_adapter *adapter,
-				       u32 queue, bool napi_mode)
-{
-	struct atl1c_rrd_ring *rrd_ring = &adapter->rrd_ring[queue];
-	struct sk_buff *skb;
-	struct page *page;
-
-	if (adapter->rx_frag_size > PAGE_SIZE) {
-		if (likely(napi_mode))
-			return napi_alloc_skb(&rrd_ring->napi,
-					      adapter->rx_buffer_len);
-		else
-			return netdev_alloc_skb_ip_align(adapter->netdev,
-							 adapter->rx_buffer_len);
-	}
-
-	page = rrd_ring->rx_page;
-	if (!page) {
-		page = alloc_page(GFP_ATOMIC);
-		if (unlikely(!page))
-			return NULL;
-		rrd_ring->rx_page = page;
-		rrd_ring->rx_page_offset = 0;
-	}
-
-	skb = build_skb(page_address(page) + rrd_ring->rx_page_offset,
-			adapter->rx_frag_size);
-	if (likely(skb)) {
-		skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
-		rrd_ring->rx_page_offset += adapter->rx_frag_size;
-		if (rrd_ring->rx_page_offset >= PAGE_SIZE)
-			rrd_ring->rx_page = NULL;
-		else
-			get_page(page);
-	}
-	return skb;
-}
-
 static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter, u32 queue,
 				 bool napi_mode)
 {
 	struct atl1c_rfd_ring *rfd_ring = &adapter->rfd_ring[queue];
+	struct atl1c_rrd_ring *rrd_ring = &adapter->rrd_ring[queue];
 	struct pci_dev *pdev = adapter->pdev;
 	struct atl1c_buffer *buffer_info, *next_info;
 	struct sk_buff *skb;
@@ -1814,13 +1765,27 @@ static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter, u32 queue,
 	while (next_info->flags & ATL1C_BUFFER_FREE) {
 		rfd_desc = ATL1C_RFD_DESC(rfd_ring, rfd_next_to_use);
 
-		skb = atl1c_alloc_skb(adapter, queue, napi_mode);
+		/* When DMA RX address is set to something like
+		 * 0x....fc0, it will be very likely to cause DMA
+		 * RFD overflow issue.
+		 *
+		 * To work around it, we apply rx skb with 64 bytes
+		 * longer space, and offset the address whenever
+		 * 0x....fc0 is detected.
+		 */
+		if (likely(napi_mode))
+			skb = napi_alloc_skb(&rrd_ring->napi, adapter->rx_buffer_len + 64);
+		else
+			skb = netdev_alloc_skb(adapter->netdev, adapter->rx_buffer_len + 64);
 		if (unlikely(!skb)) {
 			if (netif_msg_rx_err(adapter))
 				dev_warn(&pdev->dev, "alloc rx buffer failed\n");
 			break;
 		}
 
+		if (((unsigned long)skb->data & 0xfff) == 0xfc0)
+			skb_reserve(skb, 64);
+
 		/*
 		 * Make buffer alignment 2 beyond a 16 byte boundary
 		 * this will result in a 16 byte aligned IP header after
diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp.c b/drivers/net/ethernet/broadcom/asp2/bcmasp.c
index 41a6098eb0c2..29b04a274d07 100644
--- a/drivers/net/ethernet/broadcom/asp2/bcmasp.c
+++ b/drivers/net/ethernet/broadcom/asp2/bcmasp.c
@@ -1344,17 +1344,15 @@ of_put_exit:
 	return ret;
 }
 
-static int bcmasp_remove(struct platform_device *pdev)
+static void bcmasp_remove(struct platform_device *pdev)
 {
 	struct bcmasp_priv *priv = dev_get_drvdata(&pdev->dev);
 
 	if (!priv)
-		return 0;
+		return;
 
 	priv->destroy_wol(priv);
 	bcmasp_remove_intfs(priv);
-
-	return 0;
 }
 
 static void bcmasp_shutdown(struct platform_device *pdev)
@@ -1428,7 +1426,7 @@ static SIMPLE_DEV_PM_OPS(bcmasp_pm_ops,
 
 static struct platform_driver bcmasp_driver = {
 	.probe = bcmasp_probe,
-	.remove = bcmasp_remove,
+	.remove_new = bcmasp_remove,
 	.shutdown = bcmasp_shutdown,
 	.driver = {
 		.name = "brcm,asp-v2",
diff --git a/drivers/net/ethernet/broadcom/bcm4908_enet.c b/drivers/net/ethernet/broadcom/bcm4908_enet.c
index 33d86683af50..3e7c8671cd11 100644
--- a/drivers/net/ethernet/broadcom/bcm4908_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm4908_enet.c
@@ -768,7 +768,7 @@ err_dma_free:
 	return err;
 }
 
-static int bcm4908_enet_remove(struct platform_device *pdev)
+static void bcm4908_enet_remove(struct platform_device *pdev)
 {
 	struct bcm4908_enet *enet = platform_get_drvdata(pdev);
 
@@ -776,8 +776,6 @@ static int bcm4908_enet_remove(struct platform_device *pdev)
 	netif_napi_del(&enet->rx_ring.napi);
 	netif_napi_del(&enet->tx_ring.napi);
 	bcm4908_enet_dma_free(enet);
-
-	return 0;
 }
 
 static const struct of_device_id bcm4908_enet_of_match[] = {
@@ -791,7 +789,7 @@ static struct platform_driver bcm4908_enet_driver = {
 		.of_match_table = bcm4908_enet_of_match,
 	},
 	.probe	= bcm4908_enet_probe,
-	.remove = bcm4908_enet_remove,
+	.remove_new = bcm4908_enet_remove,
 };
 module_platform_driver(bcm4908_enet_driver);
 
diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
index a741070f1f9a..105afde5dbe1 100644
--- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
@@ -1902,7 +1902,7 @@ out:
 /*
  * exit func, stops hardware and unregisters netdevice
  */
-static int bcm_enet_remove(struct platform_device *pdev)
+static void bcm_enet_remove(struct platform_device *pdev)
 {
 	struct bcm_enet_priv *priv;
 	struct net_device *dev;
@@ -1932,12 +1932,11 @@ static int bcm_enet_remove(struct platform_device *pdev)
 	clk_disable_unprepare(priv->mac_clk);
 
 	free_netdev(dev);
-	return 0;
 }
 
 static struct platform_driver bcm63xx_enet_driver = {
 	.probe	= bcm_enet_probe,
-	.remove	= bcm_enet_remove,
+	.remove_new = bcm_enet_remove,
 	.driver	= {
 		.name	= "bcm63xx_enet",
 	},
@@ -2739,7 +2738,7 @@ out:
 
 
 /* exit func, stops hardware and unregisters netdevice */
-static int bcm_enetsw_remove(struct platform_device *pdev)
+static void bcm_enetsw_remove(struct platform_device *pdev)
 {
 	struct bcm_enet_priv *priv;
 	struct net_device *dev;
@@ -2752,12 +2751,11 @@ static int bcm_enetsw_remove(struct platform_device *pdev)
 	clk_disable_unprepare(priv->mac_clk);
 
 	free_netdev(dev);
-	return 0;
 }
 
 static struct platform_driver bcm63xx_enetsw_driver = {
 	.probe	= bcm_enetsw_probe,
-	.remove	= bcm_enetsw_remove,
+	.remove_new = bcm_enetsw_remove,
 	.driver	= {
 		.name	= "bcm63xx_enetsw",
 	},
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index bf1611cce974..ab096795e805 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -2648,7 +2648,7 @@ err_free_netdev:
 	return ret;
 }
 
-static int bcm_sysport_remove(struct platform_device *pdev)
+static void bcm_sysport_remove(struct platform_device *pdev)
 {
 	struct net_device *dev = dev_get_drvdata(&pdev->dev);
 	struct bcm_sysport_priv *priv = netdev_priv(dev);
@@ -2663,8 +2663,6 @@ static int bcm_sysport_remove(struct platform_device *pdev)
 		of_phy_deregister_fixed_link(dn);
 	free_netdev(dev);
 	dev_set_drvdata(&pdev->dev, NULL);
-
-	return 0;
 }
 
 static int bcm_sysport_suspend_to_wol(struct bcm_sysport_priv *priv)
@@ -2901,7 +2899,7 @@ static SIMPLE_DEV_PM_OPS(bcm_sysport_pm_ops,
 
 static struct platform_driver bcm_sysport_driver = {
 	.probe	= bcm_sysport_probe,
-	.remove	= bcm_sysport_remove,
+	.remove_new = bcm_sysport_remove,
 	.driver =  {
 		.name = "brcm-systemport",
 		.of_match_table = bcm_sysport_of_match,
diff --git a/drivers/net/ethernet/broadcom/bgmac-platform.c b/drivers/net/ethernet/broadcom/bgmac-platform.c
index b4381cd41979..0b21fd5bd457 100644
--- a/drivers/net/ethernet/broadcom/bgmac-platform.c
+++ b/drivers/net/ethernet/broadcom/bgmac-platform.c
@@ -246,13 +246,11 @@ static int bgmac_probe(struct platform_device *pdev)
 	return bgmac_enet_probe(bgmac);
 }
 
-static int bgmac_remove(struct platform_device *pdev)
+static void bgmac_remove(struct platform_device *pdev)
 {
 	struct bgmac *bgmac = platform_get_drvdata(pdev);
 
 	bgmac_enet_remove(bgmac);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM
@@ -296,7 +294,7 @@ static struct platform_driver bgmac_enet_driver = {
 		.pm = BGMAC_PM_OPS
 	},
 	.probe = bgmac_probe,
-	.remove = bgmac_remove,
+	.remove_new = bgmac_remove,
 };
 
 module_platform_driver(bgmac_enet_driver);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 24bade875ca6..91f3a7e78d65 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -4164,7 +4164,7 @@ err:
 	return err;
 }
 
-static int bcmgenet_remove(struct platform_device *pdev)
+static void bcmgenet_remove(struct platform_device *pdev)
 {
 	struct bcmgenet_priv *priv = dev_to_priv(&pdev->dev);
 
@@ -4172,8 +4172,6 @@ static int bcmgenet_remove(struct platform_device *pdev)
 	unregister_netdev(priv->dev);
 	bcmgenet_mii_exit(priv->dev);
 	free_netdev(priv->dev);
-
-	return 0;
 }
 
 static void bcmgenet_shutdown(struct platform_device *pdev)
@@ -4352,7 +4350,7 @@ MODULE_DEVICE_TABLE(acpi, genet_acpi_match);
 
 static struct platform_driver bcmgenet_driver = {
 	.probe	= bcmgenet_probe,
-	.remove	= bcmgenet_remove,
+	.remove_new = bcmgenet_remove,
 	.shutdown = bcmgenet_shutdown,
 	.driver	= {
 		.name	= "bcmgenet",
diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c
index 3a6763c5e8b3..fcf8485f3446 100644
--- a/drivers/net/ethernet/broadcom/sb1250-mac.c
+++ b/drivers/net/ethernet/broadcom/sb1250-mac.c
@@ -2593,7 +2593,7 @@ out_out:
 	return err;
 }
 
-static int sbmac_remove(struct platform_device *pldev)
+static void sbmac_remove(struct platform_device *pldev)
 {
 	struct net_device *dev = platform_get_drvdata(pldev);
 	struct sbmac_softc *sc = netdev_priv(dev);
@@ -2604,13 +2604,11 @@ static int sbmac_remove(struct platform_device *pldev)
 	mdiobus_free(sc->mii_bus);
 	iounmap(sc->sbm_base);
 	free_netdev(dev);
-
-	return 0;
 }
 
 static struct platform_driver sbmac_driver = {
 	.probe = sbmac_probe,
-	.remove = sbmac_remove,
+	.remove_new = sbmac_remove,
 	.driver = {
 		.name = sbmac_string,
 	},
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index b940dcd3ace6..cebae0f418f2 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -5156,7 +5156,7 @@ err_disable_clocks:
 	return err;
 }
 
-static int macb_remove(struct platform_device *pdev)
+static void macb_remove(struct platform_device *pdev)
 {
 	struct net_device *dev;
 	struct macb *bp;
@@ -5181,8 +5181,6 @@ static int macb_remove(struct platform_device *pdev)
 		phylink_destroy(bp->phylink);
 		free_netdev(dev);
 	}
-
-	return 0;
 }
 
 static int __maybe_unused macb_suspend(struct device *dev)
@@ -5398,7 +5396,7 @@ static const struct dev_pm_ops macb_pm_ops = {
 
 static struct platform_driver macb_driver = {
 	.probe		= macb_probe,
-	.remove		= macb_remove,
+	.remove_new	= macb_remove,
 	.driver		= {
 		.name		= "macb",
 		.of_match_table	= of_match_ptr(macb_dt_ids),
diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c
index f4f87dfa9687..5e97f1e4e38e 100644
--- a/drivers/net/ethernet/calxeda/xgmac.c
+++ b/drivers/net/ethernet/calxeda/xgmac.c
@@ -1820,7 +1820,7 @@ err_alloc:
  * changes the link status, releases the DMA descriptor rings,
  * unregisters the MDIO bus and unmaps the allocated memory.
  */
-static int xgmac_remove(struct platform_device *pdev)
+static void xgmac_remove(struct platform_device *pdev)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct xgmac_priv *priv = netdev_priv(ndev);
@@ -1840,8 +1840,6 @@ static int xgmac_remove(struct platform_device *pdev)
 	release_mem_region(res->start, resource_size(res));
 
 	free_netdev(ndev);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -1921,7 +1919,7 @@ static struct platform_driver xgmac_driver = {
 		.pm = &xgmac_pm_ops,
 	},
 	.probe = xgmac_probe,
-	.remove = xgmac_remove,
+	.remove_new = xgmac_remove,
 };
 
 module_platform_driver(xgmac_driver);
diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
index edde0b8fa49c..007d4b06819e 100644
--- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
+++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
@@ -1521,7 +1521,7 @@ err:
 	return result;
 }
 
-static int octeon_mgmt_remove(struct platform_device *pdev)
+static void octeon_mgmt_remove(struct platform_device *pdev)
 {
 	struct net_device *netdev = platform_get_drvdata(pdev);
 	struct octeon_mgmt *p = netdev_priv(netdev);
@@ -1529,7 +1529,6 @@ static int octeon_mgmt_remove(struct platform_device *pdev)
 	unregister_netdev(netdev);
 	of_node_put(p->phy_np);
 	free_netdev(netdev);
-	return 0;
 }
 
 static const struct of_device_id octeon_mgmt_match[] = {
@@ -1546,7 +1545,7 @@ static struct platform_driver octeon_mgmt_driver = {
 		.of_match_table = octeon_mgmt_match,
 	},
 	.probe		= octeon_mgmt_probe,
-	.remove		= octeon_mgmt_remove,
+	.remove_new	= octeon_mgmt_remove,
 };
 
 module_platform_driver(octeon_mgmt_driver);
diff --git a/drivers/net/ethernet/cirrus/cs89x0.c b/drivers/net/ethernet/cirrus/cs89x0.c
index d323c5c23521..0a21a10a791c 100644
--- a/drivers/net/ethernet/cirrus/cs89x0.c
+++ b/drivers/net/ethernet/cirrus/cs89x0.c
@@ -1879,7 +1879,7 @@ free:
 	return err;
 }
 
-static int cs89x0_platform_remove(struct platform_device *pdev)
+static void cs89x0_platform_remove(struct platform_device *pdev)
 {
 	struct net_device *dev = platform_get_drvdata(pdev);
 
@@ -1889,7 +1889,6 @@ static int cs89x0_platform_remove(struct platform_device *pdev)
 	 */
 	unregister_netdev(dev);
 	free_netdev(dev);
-	return 0;
 }
 
 static const struct of_device_id __maybe_unused cs89x0_match[] = {
@@ -1904,7 +1903,7 @@ static struct platform_driver cs89x0_driver = {
 		.name		= DRV_NAME,
 		.of_match_table	= of_match_ptr(cs89x0_match),
 	},
-	.remove	= cs89x0_platform_remove,
+	.remove_new = cs89x0_platform_remove,
 };
 
 module_platform_driver_probe(cs89x0_driver, cs89x0_platform_probe);
diff --git a/drivers/net/ethernet/cirrus/ep93xx_eth.c b/drivers/net/ethernet/cirrus/ep93xx_eth.c
index 8627ab19d470..1c2a540db13d 100644
--- a/drivers/net/ethernet/cirrus/ep93xx_eth.c
+++ b/drivers/net/ethernet/cirrus/ep93xx_eth.c
@@ -757,7 +757,7 @@ static struct net_device *ep93xx_dev_alloc(struct ep93xx_eth_data *data)
 }
 
 
-static int ep93xx_eth_remove(struct platform_device *pdev)
+static void ep93xx_eth_remove(struct platform_device *pdev)
 {
 	struct net_device *dev;
 	struct ep93xx_priv *ep;
@@ -765,7 +765,7 @@ static int ep93xx_eth_remove(struct platform_device *pdev)
 
 	dev = platform_get_drvdata(pdev);
 	if (dev == NULL)
-		return 0;
+		return;
 
 	ep = netdev_priv(dev);
 
@@ -782,8 +782,6 @@ static int ep93xx_eth_remove(struct platform_device *pdev)
 	}
 
 	free_netdev(dev);
-
-	return 0;
 }
 
 static int ep93xx_eth_probe(struct platform_device *pdev)
@@ -862,7 +860,7 @@ err_out:
 
 static struct platform_driver ep93xx_eth_driver = {
 	.probe		= ep93xx_eth_probe,
-	.remove		= ep93xx_eth_remove,
+	.remove_new	= ep93xx_eth_remove,
 	.driver		= {
 		.name	= "ep93xx-eth",
 	},
diff --git a/drivers/net/ethernet/cirrus/mac89x0.c b/drivers/net/ethernet/cirrus/mac89x0.c
index 21a70b1f0ac5..887876f35f10 100644
--- a/drivers/net/ethernet/cirrus/mac89x0.c
+++ b/drivers/net/ethernet/cirrus/mac89x0.c
@@ -556,19 +556,18 @@ static int set_mac_address(struct net_device *dev, void *addr)
 
 MODULE_LICENSE("GPL");
 
-static int mac89x0_device_remove(struct platform_device *pdev)
+static void mac89x0_device_remove(struct platform_device *pdev)
 {
 	struct net_device *dev = platform_get_drvdata(pdev);
 
 	unregister_netdev(dev);
 	nubus_writew(0, dev->base_addr + ADD_PORT);
 	free_netdev(dev);
-	return 0;
 }
 
 static struct platform_driver mac89x0_platform_driver = {
 	.probe = mac89x0_device_probe,
-	.remove = mac89x0_device_remove,
+	.remove_new = mac89x0_device_remove,
 	.driver = {
 		.name = "mac89x0",
 	},
diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c
index a8b9d1a3e4d5..5423fe26b4ef 100644
--- a/drivers/net/ethernet/cortina/gemini.c
+++ b/drivers/net/ethernet/cortina/gemini.c
@@ -2518,13 +2518,11 @@ unprepare:
 	return ret;
 }
 
-static int gemini_ethernet_port_remove(struct platform_device *pdev)
+static void gemini_ethernet_port_remove(struct platform_device *pdev)
 {
 	struct gemini_ethernet_port *port = platform_get_drvdata(pdev);
 
 	gemini_port_remove(port);
-
-	return 0;
 }
 
 static const struct of_device_id gemini_ethernet_port_of_match[] = {
@@ -2541,7 +2539,7 @@ static struct platform_driver gemini_ethernet_port_driver = {
 		.of_match_table = gemini_ethernet_port_of_match,
 	},
 	.probe = gemini_ethernet_port_probe,
-	.remove = gemini_ethernet_port_remove,
+	.remove_new = gemini_ethernet_port_remove,
 };
 
 static int gemini_ethernet_probe(struct platform_device *pdev)
@@ -2583,14 +2581,12 @@ static int gemini_ethernet_probe(struct platform_device *pdev)
 	return devm_of_platform_populate(dev);
 }
 
-static int gemini_ethernet_remove(struct platform_device *pdev)
+static void gemini_ethernet_remove(struct platform_device *pdev)
 {
 	struct gemini_ethernet *geth = platform_get_drvdata(pdev);
 
 	geth_cleanup_freeq(geth);
 	geth->initialized = false;
-
-	return 0;
 }
 
 static const struct of_device_id gemini_ethernet_of_match[] = {
@@ -2607,7 +2603,7 @@ static struct platform_driver gemini_ethernet_driver = {
 		.of_match_table = gemini_ethernet_of_match,
 	},
 	.probe = gemini_ethernet_probe,
-	.remove = gemini_ethernet_remove,
+	.remove_new = gemini_ethernet_remove,
 };
 
 static int __init gemini_ethernet_module_init(void)
diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c
index 05a89ab6766c..150cc94ae9f8 100644
--- a/drivers/net/ethernet/davicom/dm9000.c
+++ b/drivers/net/ethernet/davicom/dm9000.c
@@ -1770,8 +1770,7 @@ static const struct dev_pm_ops dm9000_drv_pm_ops = {
 	.resume		= dm9000_drv_resume,
 };
 
-static int
-dm9000_drv_remove(struct platform_device *pdev)
+static void dm9000_drv_remove(struct platform_device *pdev)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct board_info *dm = to_dm9000_board(ndev);
@@ -1783,7 +1782,6 @@ dm9000_drv_remove(struct platform_device *pdev)
 		regulator_disable(dm->power_supply);
 
 	dev_dbg(&pdev->dev, "released and freed device\n");
-	return 0;
 }
 
 #ifdef CONFIG_OF
@@ -1801,7 +1799,7 @@ static struct platform_driver dm9000_driver = {
 		.of_match_table = of_match_ptr(dm9000_of_matches),
 	},
 	.probe   = dm9000_probe,
-	.remove  = dm9000_drv_remove,
+	.remove_new = dm9000_drv_remove,
 };
 
 module_platform_driver(dm9000_driver);
diff --git a/drivers/net/ethernet/dnet.c b/drivers/net/ethernet/dnet.c
index 151ca9573be9..2a18df3605f1 100644
--- a/drivers/net/ethernet/dnet.c
+++ b/drivers/net/ethernet/dnet.c
@@ -841,7 +841,7 @@ err_out_free_dev:
 	return err;
 }
 
-static int dnet_remove(struct platform_device *pdev)
+static void dnet_remove(struct platform_device *pdev)
 {
 
 	struct net_device *dev;
@@ -859,13 +859,11 @@ static int dnet_remove(struct platform_device *pdev)
 		free_irq(dev->irq, dev);
 		free_netdev(dev);
 	}
-
-	return 0;
 }
 
 static struct platform_driver dnet_driver = {
 	.probe		= dnet_probe,
-	.remove		= dnet_remove,
+	.remove_new	= dnet_remove,
 	.driver		= {
 		.name		= "dnet",
 	},
diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c
index 8b992dc9bb52..77a3fcb821c8 100644
--- a/drivers/net/ethernet/engleder/tsnep_main.c
+++ b/drivers/net/ethernet/engleder/tsnep_main.c
@@ -2587,7 +2587,7 @@ mdio_init_failed:
 	return retval;
 }
 
-static int tsnep_remove(struct platform_device *pdev)
+static void tsnep_remove(struct platform_device *pdev)
 {
 	struct tsnep_adapter *adapter = platform_get_drvdata(pdev);
 
@@ -2603,8 +2603,6 @@ static int tsnep_remove(struct platform_device *pdev)
 		mdiobus_unregister(adapter->mdiobus);
 
 	tsnep_disable_irq(adapter, ECM_INT_ALL);
-
-	return 0;
 }
 
 static const struct of_device_id tsnep_of_match[] = {
@@ -2619,7 +2617,7 @@ static struct platform_driver tsnep_driver = {
 		.of_match_table = tsnep_of_match,
 	},
 	.probe = tsnep_probe,
-	.remove = tsnep_remove,
+	.remove_new = tsnep_remove,
 };
 module_platform_driver(tsnep_driver);
 
diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c
index 95cbad198b4b..ad41c9019018 100644
--- a/drivers/net/ethernet/ethoc.c
+++ b/drivers/net/ethernet/ethoc.c
@@ -1254,7 +1254,7 @@ out:
  * ethoc_remove - shutdown OpenCores ethernet MAC
  * @pdev:	platform device
  */
-static int ethoc_remove(struct platform_device *pdev)
+static void ethoc_remove(struct platform_device *pdev)
 {
 	struct net_device *netdev = platform_get_drvdata(pdev);
 	struct ethoc *priv = netdev_priv(netdev);
@@ -1271,8 +1271,6 @@ static int ethoc_remove(struct platform_device *pdev)
 		unregister_netdev(netdev);
 		free_netdev(netdev);
 	}
-
-	return 0;
 }
 
 #ifdef CONFIG_PM
@@ -1298,7 +1296,7 @@ MODULE_DEVICE_TABLE(of, ethoc_match);
 
 static struct platform_driver ethoc_driver = {
 	.probe   = ethoc_probe,
-	.remove  = ethoc_remove,
+	.remove_new = ethoc_remove,
 	.suspend = ethoc_suspend,
 	.resume  = ethoc_resume,
 	.driver  = {
diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 9135b918dd49..fddfd1dd5070 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -2012,7 +2012,7 @@ err_alloc_etherdev:
 	return err;
 }
 
-static int ftgmac100_remove(struct platform_device *pdev)
+static void ftgmac100_remove(struct platform_device *pdev)
 {
 	struct net_device *netdev;
 	struct ftgmac100 *priv;
@@ -2040,7 +2040,6 @@ static int ftgmac100_remove(struct platform_device *pdev)
 
 	netif_napi_del(&priv->napi);
 	free_netdev(netdev);
-	return 0;
 }
 
 static const struct of_device_id ftgmac100_of_match[] = {
@@ -2051,7 +2050,7 @@ MODULE_DEVICE_TABLE(of, ftgmac100_of_match);
 
 static struct platform_driver ftgmac100_driver = {
 	.probe	= ftgmac100_probe,
-	.remove	= ftgmac100_remove,
+	.remove_new = ftgmac100_remove,
 	.driver	= {
 		.name		= DRV_NAME,
 		.of_match_table	= ftgmac100_of_match,
diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c
index 183069581bc0..003bc9a45c65 100644
--- a/drivers/net/ethernet/faraday/ftmac100.c
+++ b/drivers/net/ethernet/faraday/ftmac100.c
@@ -1219,7 +1219,7 @@ err_alloc_etherdev:
 	return err;
 }
 
-static int ftmac100_remove(struct platform_device *pdev)
+static void ftmac100_remove(struct platform_device *pdev)
 {
 	struct net_device *netdev;
 	struct ftmac100 *priv;
@@ -1234,7 +1234,6 @@ static int ftmac100_remove(struct platform_device *pdev)
 
 	netif_napi_del(&priv->napi);
 	free_netdev(netdev);
-	return 0;
 }
 
 static const struct of_device_id ftmac100_of_ids[] = {
@@ -1244,7 +1243,7 @@ static const struct of_device_id ftmac100_of_ids[] = {
 
 static struct platform_driver ftmac100_driver = {
 	.probe		= ftmac100_probe,
-	.remove		= ftmac100_remove,
+	.remove_new	= ftmac100_remove,
 	.driver		= {
 		.name	= DRV_NAME,
 		.of_match_table = ftmac100_of_ids
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index 5704b5f57cd0..83b09dcfafc4 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -190,7 +190,7 @@ static int gve_alloc_stats_report(struct gve_priv *priv)
 	rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) *
 		       priv->rx_cfg.num_queues;
 	priv->stats_report_len = struct_size(priv->stats_report, stats,
-					     tx_stats_num + rx_stats_num);
+					     size_add(tx_stats_num, rx_stats_num));
 	priv->stats_report =
 		dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len,
 				   &priv->stats_report_bus, GFP_KERNEL);
diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c
index ecf92a5d56bb..b91e7a06b97f 100644
--- a/drivers/net/ethernet/hisilicon/hip04_eth.c
+++ b/drivers/net/ethernet/hisilicon/hip04_eth.c
@@ -1021,7 +1021,7 @@ init_fail:
 	return ret;
 }
 
-static int hip04_remove(struct platform_device *pdev)
+static void hip04_remove(struct platform_device *pdev)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct hip04_priv *priv = netdev_priv(ndev);
@@ -1035,8 +1035,6 @@ static int hip04_remove(struct platform_device *pdev)
 	of_node_put(priv->phy_node);
 	cancel_work_sync(&priv->tx_timeout_task);
 	free_netdev(ndev);
-
-	return 0;
 }
 
 static const struct of_device_id hip04_mac_match[] = {
@@ -1048,7 +1046,7 @@ MODULE_DEVICE_TABLE(of, hip04_mac_match);
 
 static struct platform_driver hip04_mac_driver = {
 	.probe	= hip04_mac_probe,
-	.remove	= hip04_remove,
+	.remove_new = hip04_remove,
 	.driver	= {
 		.name		= DRV_NAME,
 		.of_match_table	= hip04_mac_match,
diff --git a/drivers/net/ethernet/hisilicon/hisi_femac.c b/drivers/net/ethernet/hisilicon/hisi_femac.c
index cb7b0293fe85..2406263c9dd3 100644
--- a/drivers/net/ethernet/hisilicon/hisi_femac.c
+++ b/drivers/net/ethernet/hisilicon/hisi_femac.c
@@ -893,7 +893,7 @@ out_free_netdev:
 	return ret;
 }
 
-static int hisi_femac_drv_remove(struct platform_device *pdev)
+static void hisi_femac_drv_remove(struct platform_device *pdev)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct hisi_femac_priv *priv = netdev_priv(ndev);
@@ -904,8 +904,6 @@ static int hisi_femac_drv_remove(struct platform_device *pdev)
 	phy_disconnect(ndev->phydev);
 	clk_disable_unprepare(priv->clk);
 	free_netdev(ndev);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM
@@ -961,7 +959,7 @@ static struct platform_driver hisi_femac_driver = {
 		.of_match_table = hisi_femac_match,
 	},
 	.probe = hisi_femac_drv_probe,
-	.remove = hisi_femac_drv_remove,
+	.remove_new = hisi_femac_drv_remove,
 #ifdef CONFIG_PM
 	.suspend = hisi_femac_drv_suspend,
 	.resume = hisi_femac_drv_resume,
diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
index 26d22bb04b87..506fa3d8bbee 100644
--- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
+++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
@@ -1282,7 +1282,7 @@ out_free_netdev:
 	return ret;
 }
 
-static int hix5hd2_dev_remove(struct platform_device *pdev)
+static void hix5hd2_dev_remove(struct platform_device *pdev)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct hix5hd2_priv *priv = netdev_priv(ndev);
@@ -1298,8 +1298,6 @@ static int hix5hd2_dev_remove(struct platform_device *pdev)
 	of_node_put(priv->phy_node);
 	cancel_work_sync(&priv->tx_timeout_task);
 	free_netdev(ndev);
-
-	return 0;
 }
 
 static const struct of_device_id hix5hd2_of_match[] = {
@@ -1319,7 +1317,7 @@ static struct platform_driver hix5hd2_dev_driver = {
 		.of_match_table = hix5hd2_of_match,
 	},
 	.probe = hix5hd2_dev_probe,
-	.remove = hix5hd2_dev_remove,
+	.remove_new = hix5hd2_dev_remove,
 };
 
 module_platform_driver(hix5hd2_dev_driver);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
index fcaf5132b865..1b67da1f6fa8 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
@@ -3007,7 +3007,7 @@ free_dev:
  * hns_dsaf_remove - remove dsaf dev
  * @pdev: dasf platform device
  */
-static int hns_dsaf_remove(struct platform_device *pdev)
+static void hns_dsaf_remove(struct platform_device *pdev)
 {
 	struct dsaf_device *dsaf_dev = dev_get_drvdata(&pdev->dev);
 
@@ -3020,8 +3020,6 @@ static int hns_dsaf_remove(struct platform_device *pdev)
 	hns_dsaf_free(dsaf_dev);
 
 	hns_dsaf_free_dev(dsaf_dev);
-
-	return 0;
 }
 
 static const struct of_device_id g_dsaf_match[] = {
@@ -3033,7 +3031,7 @@ MODULE_DEVICE_TABLE(of, g_dsaf_match);
 
 static struct platform_driver g_dsaf_driver = {
 	.probe = hns_dsaf_probe,
-	.remove = hns_dsaf_remove,
+	.remove_new = hns_dsaf_remove,
 	.driver = {
 		.name = DSAF_DRV_NAME,
 		.of_match_table = g_dsaf_match,
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index 7cf10d1e2b31..0900abf5c508 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -2384,7 +2384,7 @@ out_read_prop_fail:
 	return ret;
 }
 
-static int hns_nic_dev_remove(struct platform_device *pdev)
+static void hns_nic_dev_remove(struct platform_device *pdev)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct hns_nic_priv *priv = netdev_priv(ndev);
@@ -2413,7 +2413,6 @@ static int hns_nic_dev_remove(struct platform_device *pdev)
 	of_node_put(to_of_node(priv->fwnode));
 
 	free_netdev(ndev);
-	return 0;
 }
 
 static const struct of_device_id hns_enet_of_match[] = {
@@ -2431,7 +2430,7 @@ static struct platform_driver hns_nic_dev_driver = {
 		.acpi_match_table = ACPI_PTR(hns_enet_acpi_match),
 	},
 	.probe = hns_nic_dev_probe,
-	.remove = hns_nic_dev_remove,
+	.remove_new = hns_nic_dev_remove,
 };
 
 module_platform_driver(hns_nic_dev_driver);
diff --git a/drivers/net/ethernet/hisilicon/hns_mdio.c b/drivers/net/ethernet/hisilicon/hns_mdio.c
index 409a89d80220..ed73707176c1 100644
--- a/drivers/net/ethernet/hisilicon/hns_mdio.c
+++ b/drivers/net/ethernet/hisilicon/hns_mdio.c
@@ -610,7 +610,7 @@ static int hns_mdio_probe(struct platform_device *pdev)
  *
  * Return 0 on success, negative on failure
  */
-static int hns_mdio_remove(struct platform_device *pdev)
+static void hns_mdio_remove(struct platform_device *pdev)
 {
 	struct mii_bus *bus;
 
@@ -618,7 +618,6 @@ static int hns_mdio_remove(struct platform_device *pdev)
 
 	mdiobus_unregister(bus);
 	platform_set_drvdata(pdev, NULL);
-	return 0;
 }
 
 static const struct of_device_id hns_mdio_match[] = {
@@ -636,7 +635,7 @@ MODULE_DEVICE_TABLE(acpi, hns_mdio_acpi_match);
 
 static struct platform_driver hns_mdio_driver = {
 	.probe = hns_mdio_probe,
-	.remove = hns_mdio_remove,
+	.remove_new = hns_mdio_remove,
 	.driver = {
 		   .name = MDIO_DRV_NAME,
 		   .of_match_table = hns_mdio_match,
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
index ad47ac51a139..9b60966736db 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
@@ -861,7 +861,7 @@ int hinic_init_txq(struct hinic_txq *txq, struct hinic_sq *sq,
 	struct hinic_qp *qp = container_of(sq, struct hinic_qp, sq);
 	struct hinic_dev *nic_dev = netdev_priv(netdev);
 	struct hinic_hwdev *hwdev = nic_dev->hwdev;
-	int err, irqname_len;
+	int err;
 
 	txq->netdev = netdev;
 	txq->sq = sq;
@@ -882,15 +882,13 @@ int hinic_init_txq(struct hinic_txq *txq, struct hinic_sq *sq,
 		goto err_alloc_free_sges;
 	}
 
-	irqname_len = snprintf(NULL, 0, "%s_txq%d", netdev->name, qp->q_id) + 1;
-	txq->irq_name = devm_kzalloc(&netdev->dev, irqname_len, GFP_KERNEL);
+	txq->irq_name = devm_kasprintf(&netdev->dev, GFP_KERNEL, "%s_txq%d",
+				       netdev->name, qp->q_id);
 	if (!txq->irq_name) {
 		err = -ENOMEM;
 		goto err_alloc_irqname;
 	}
 
-	sprintf(txq->irq_name, "%s_txq%d", netdev->name, qp->q_id);
-
 	err = hinic_hwdev_hw_ci_addr_set(hwdev, sq, CI_UPDATE_NO_PENDING,
 					 CI_UPDATE_NO_COALESC);
 	if (err)
diff --git a/drivers/net/ethernet/i825xx/sni_82596.c b/drivers/net/ethernet/i825xx/sni_82596.c
index 54bb4d9a0d1e..813403c2628f 100644
--- a/drivers/net/ethernet/i825xx/sni_82596.c
+++ b/drivers/net/ethernet/i825xx/sni_82596.c
@@ -153,7 +153,7 @@ probe_failed_free_mpu:
 	return retval;
 }
 
-static int sni_82596_driver_remove(struct platform_device *pdev)
+static void sni_82596_driver_remove(struct platform_device *pdev)
 {
 	struct net_device *dev = platform_get_drvdata(pdev);
 	struct i596_private *lp = netdev_priv(dev);
@@ -164,12 +164,11 @@ static int sni_82596_driver_remove(struct platform_device *pdev)
 	iounmap(lp->ca);
 	iounmap(lp->mpu_port);
 	free_netdev (dev);
-	return 0;
 }
 
 static struct platform_driver sni_82596_driver = {
 	.probe	= sni_82596_probe,
-	.remove	= sni_82596_driver_remove,
+	.remove_new = sni_82596_driver_remove,
 	.driver	= {
 		.name	= sni_82596_string,
 	},
diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c
index 0a56e9752464..251dedd55cfb 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea_main.c
+++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c
@@ -90,7 +90,7 @@ static struct ehea_bcmc_reg_array ehea_bcmc_regs;
 
 static int ehea_probe_adapter(struct platform_device *dev);
 
-static int ehea_remove(struct platform_device *dev);
+static void ehea_remove(struct platform_device *dev);
 
 static const struct of_device_id ehea_module_device_table[] = {
 	{
@@ -121,7 +121,7 @@ static struct platform_driver ehea_driver = {
 		.of_match_table = ehea_device_table,
 	},
 	.probe = ehea_probe_adapter,
-	.remove = ehea_remove,
+	.remove_new = ehea_remove,
 };
 
 void ehea_dump(void *adr, int len, char *msg)
@@ -3471,7 +3471,7 @@ out:
 	return ret;
 }
 
-static int ehea_remove(struct platform_device *dev)
+static void ehea_remove(struct platform_device *dev)
 {
 	struct ehea_adapter *adapter = platform_get_drvdata(dev);
 	int i;
@@ -3492,8 +3492,6 @@ static int ehea_remove(struct platform_device *dev)
 	list_del(&adapter->list);
 
 	ehea_update_firmware_handles();
-
-	return 0;
 }
 
 static int check_module_parm(void)
diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c
index 0c314bf97480..e6e47b1842ea 100644
--- a/drivers/net/ethernet/ibm/emac/core.c
+++ b/drivers/net/ethernet/ibm/emac/core.c
@@ -3253,7 +3253,7 @@ static int emac_probe(struct platform_device *ofdev)
 	return err;
 }
 
-static int emac_remove(struct platform_device *ofdev)
+static void emac_remove(struct platform_device *ofdev)
 {
 	struct emac_instance *dev = platform_get_drvdata(ofdev);
 
@@ -3290,8 +3290,6 @@ static int emac_remove(struct platform_device *ofdev)
 		irq_dispose_mapping(dev->emac_irq);
 
 	free_netdev(dev->ndev);
-
-	return 0;
 }
 
 /* XXX Features in here should be replaced by properties... */
@@ -3319,7 +3317,7 @@ static struct platform_driver emac_driver = {
 		.of_match_table = emac_match,
 	},
 	.probe = emac_probe,
-	.remove = emac_remove,
+	.remove_new = emac_remove,
 };
 
 static void __init emac_make_bootlist(void)
diff --git a/drivers/net/ethernet/ibm/emac/mal.c b/drivers/net/ethernet/ibm/emac/mal.c
index c3236b59e7e9..462646d1b817 100644
--- a/drivers/net/ethernet/ibm/emac/mal.c
+++ b/drivers/net/ethernet/ibm/emac/mal.c
@@ -711,7 +711,7 @@ static int mal_probe(struct platform_device *ofdev)
 	return err;
 }
 
-static int mal_remove(struct platform_device *ofdev)
+static void mal_remove(struct platform_device *ofdev)
 {
 	struct mal_instance *mal = platform_get_drvdata(ofdev);
 
@@ -740,8 +740,6 @@ static int mal_remove(struct platform_device *ofdev)
 			   NUM_RX_BUFF * mal->num_rx_chans), mal->bd_virt,
 			  mal->bd_dma);
 	kfree(mal);
-
-	return 0;
 }
 
 static const struct of_device_id mal_platform_match[] =
@@ -770,7 +768,7 @@ static struct platform_driver mal_of_driver = {
 		.of_match_table = mal_platform_match,
 	},
 	.probe = mal_probe,
-	.remove = mal_remove,
+	.remove_new = mal_remove,
 };
 
 int __init mal_init(void)
diff --git a/drivers/net/ethernet/ibm/emac/rgmii.c b/drivers/net/ethernet/ibm/emac/rgmii.c
index fd437f986edf..e1712fdc3c31 100644
--- a/drivers/net/ethernet/ibm/emac/rgmii.c
+++ b/drivers/net/ethernet/ibm/emac/rgmii.c
@@ -273,7 +273,7 @@ static int rgmii_probe(struct platform_device *ofdev)
 	return rc;
 }
 
-static int rgmii_remove(struct platform_device *ofdev)
+static void rgmii_remove(struct platform_device *ofdev)
 {
 	struct rgmii_instance *dev = platform_get_drvdata(ofdev);
 
@@ -281,8 +281,6 @@ static int rgmii_remove(struct platform_device *ofdev)
 
 	iounmap(dev->base);
 	kfree(dev);
-
-	return 0;
 }
 
 static const struct of_device_id rgmii_match[] =
@@ -302,7 +300,7 @@ static struct platform_driver rgmii_driver = {
 		.of_match_table = rgmii_match,
 	},
 	.probe = rgmii_probe,
-	.remove = rgmii_remove,
+	.remove_new = rgmii_remove,
 };
 
 int __init rgmii_init(void)
diff --git a/drivers/net/ethernet/ibm/emac/tah.c b/drivers/net/ethernet/ibm/emac/tah.c
index aae9a88d95d7..fa3488258ca2 100644
--- a/drivers/net/ethernet/ibm/emac/tah.c
+++ b/drivers/net/ethernet/ibm/emac/tah.c
@@ -130,7 +130,7 @@ static int tah_probe(struct platform_device *ofdev)
 	return rc;
 }
 
-static int tah_remove(struct platform_device *ofdev)
+static void tah_remove(struct platform_device *ofdev)
 {
 	struct tah_instance *dev = platform_get_drvdata(ofdev);
 
@@ -138,8 +138,6 @@ static int tah_remove(struct platform_device *ofdev)
 
 	iounmap(dev->base);
 	kfree(dev);
-
-	return 0;
 }
 
 static const struct of_device_id tah_match[] =
@@ -160,7 +158,7 @@ static struct platform_driver tah_driver = {
 		.of_match_table = tah_match,
 	},
 	.probe = tah_probe,
-	.remove = tah_remove,
+	.remove_new = tah_remove,
 };
 
 int __init tah_init(void)
diff --git a/drivers/net/ethernet/ibm/emac/zmii.c b/drivers/net/ethernet/ibm/emac/zmii.c
index 6337388ee5f4..26e86cdee2f6 100644
--- a/drivers/net/ethernet/ibm/emac/zmii.c
+++ b/drivers/net/ethernet/ibm/emac/zmii.c
@@ -278,7 +278,7 @@ static int zmii_probe(struct platform_device *ofdev)
 	return rc;
 }
 
-static int zmii_remove(struct platform_device *ofdev)
+static void zmii_remove(struct platform_device *ofdev)
 {
 	struct zmii_instance *dev = platform_get_drvdata(ofdev);
 
@@ -286,8 +286,6 @@ static int zmii_remove(struct platform_device *ofdev)
 
 	iounmap(dev->base);
 	kfree(dev);
-
-	return 0;
 }
 
 static const struct of_device_id zmii_match[] =
@@ -308,7 +306,7 @@ static struct platform_driver zmii_driver = {
 		.of_match_table = zmii_match,
 	},
 	.probe = zmii_probe,
-	.remove = zmii_remove,
+	.remove_new = zmii_remove,
 };
 
 int __init zmii_init(void)
diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig
index 9bc0a9519899..e6684f3cc0ce 100644
--- a/drivers/net/ethernet/intel/Kconfig
+++ b/drivers/net/ethernet/intel/Kconfig
@@ -284,6 +284,7 @@ config ICE
 	select DIMLIB
 	select NET_DEVLINK
 	select PLDMFW
+	select DPLL
 	help
 	  This driver supports Intel(R) Ethernet Connection E800 Series of
 	  devices.  For more information on how to identify your adapter, go
@@ -355,5 +356,17 @@ config IGC
 
 	  To compile this driver as a module, choose M here. The module
 	  will be called igc.
+config IDPF
+	tristate "Intel(R) Infrastructure Data Path Function Support"
+	depends on PCI_MSI
+	select DIMLIB
+	select PAGE_POOL
+	select PAGE_POOL_STATS
+	help
+	  This driver supports Intel(R) Infrastructure Data Path Function
+	  devices.
+
+	  To compile this driver as a module, choose M here. The module
+	  will be called idpf.
 
 endif # NET_VENDOR_INTEL
diff --git a/drivers/net/ethernet/intel/Makefile b/drivers/net/ethernet/intel/Makefile
index d80d04132073..dacb481ee5b1 100644
--- a/drivers/net/ethernet/intel/Makefile
+++ b/drivers/net/ethernet/intel/Makefile
@@ -15,3 +15,4 @@ obj-$(CONFIG_I40E) += i40e/
 obj-$(CONFIG_IAVF) += iavf/
 obj-$(CONFIG_FM10K) += fm10k/
 obj-$(CONFIG_ICE) += ice/
+obj-$(CONFIG_IDPF) += idpf/
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index de7fd43dc11c..00ca2b88165c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -16320,11 +16320,15 @@ static void i40e_remove(struct pci_dev *pdev)
 			i40e_switch_branch_release(pf->veb[i]);
 	}
 
-	/* Now we can shutdown the PF's VSI, just before we kill
+	/* Now we can shutdown the PF's VSIs, just before we kill
 	 * adminq and hmc.
 	 */
-	if (pf->vsi[pf->lan_vsi])
-		i40e_vsi_release(pf->vsi[pf->lan_vsi]);
+	for (i = pf->num_alloc_vsi; i--;)
+		if (pf->vsi[i]) {
+			i40e_vsi_close(pf->vsi[i]);
+			i40e_vsi_release(pf->vsi[i]);
+			pf->vsi[i] = NULL;
+		}
 
 	i40e_cloud_filter_exit(pf);
 
diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h
index e110ba346185..44daf335e8c5 100644
--- a/drivers/net/ethernet/intel/iavf/iavf.h
+++ b/drivers/net/ethernet/intel/iavf/iavf.h
@@ -298,8 +298,6 @@ struct iavf_adapter {
 #define IAVF_FLAG_CLIENT_NEEDS_OPEN		BIT(10)
 #define IAVF_FLAG_CLIENT_NEEDS_CLOSE		BIT(11)
 #define IAVF_FLAG_CLIENT_NEEDS_L2_PARAMS	BIT(12)
-#define IAVF_FLAG_PROMISC_ON			BIT(13)
-#define IAVF_FLAG_ALLMULTI_ON			BIT(14)
 #define IAVF_FLAG_LEGACY_RX			BIT(15)
 #define IAVF_FLAG_REINIT_ITR_NEEDED		BIT(16)
 #define IAVF_FLAG_QUEUES_DISABLED		BIT(17)
@@ -325,10 +323,7 @@ struct iavf_adapter {
 #define IAVF_FLAG_AQ_SET_HENA			BIT_ULL(12)
 #define IAVF_FLAG_AQ_SET_RSS_KEY		BIT_ULL(13)
 #define IAVF_FLAG_AQ_SET_RSS_LUT		BIT_ULL(14)
-#define IAVF_FLAG_AQ_REQUEST_PROMISC		BIT_ULL(15)
-#define IAVF_FLAG_AQ_RELEASE_PROMISC		BIT_ULL(16)
-#define IAVF_FLAG_AQ_REQUEST_ALLMULTI		BIT_ULL(17)
-#define IAVF_FLAG_AQ_RELEASE_ALLMULTI		BIT_ULL(18)
+#define IAVF_FLAG_AQ_CONFIGURE_PROMISC_MODE	BIT_ULL(15)
 #define IAVF_FLAG_AQ_ENABLE_VLAN_STRIPPING	BIT_ULL(19)
 #define IAVF_FLAG_AQ_DISABLE_VLAN_STRIPPING	BIT_ULL(20)
 #define IAVF_FLAG_AQ_ENABLE_CHANNELS		BIT_ULL(21)
@@ -365,6 +360,12 @@ struct iavf_adapter {
 	(IAVF_EXTENDED_CAP_SEND_VLAN_V2 |		\
 	 IAVF_EXTENDED_CAP_RECV_VLAN_V2)
 
+	/* Lock to prevent possible clobbering of
+	 * current_netdev_promisc_flags
+	 */
+	spinlock_t current_netdev_promisc_flags_lock;
+	netdev_features_t current_netdev_promisc_flags;
+
 	/* OS defined structs */
 	struct net_device *netdev;
 	struct pci_dev *pdev;
@@ -405,6 +406,8 @@ struct iavf_adapter {
 			  VIRTCHNL_VF_OFFLOAD_VLAN)
 #define VLAN_V2_ALLOWED(_a) ((_a)->vf_res->vf_cap_flags & \
 			     VIRTCHNL_VF_OFFLOAD_VLAN_V2)
+#define CRC_OFFLOAD_ALLOWED(_a) ((_a)->vf_res->vf_cap_flags & \
+				 VIRTCHNL_VF_OFFLOAD_CRC)
 #define VLAN_V2_FILTERING_ALLOWED(_a) \
 	(VLAN_V2_ALLOWED((_a)) && \
 	 ((_a)->vlan_v2_caps.filtering.filtering_support.outer || \
@@ -551,7 +554,8 @@ void iavf_add_ether_addrs(struct iavf_adapter *adapter);
 void iavf_del_ether_addrs(struct iavf_adapter *adapter);
 void iavf_add_vlans(struct iavf_adapter *adapter);
 void iavf_del_vlans(struct iavf_adapter *adapter);
-void iavf_set_promiscuous(struct iavf_adapter *adapter, int flags);
+void iavf_set_promiscuous(struct iavf_adapter *adapter);
+bool iavf_promiscuous_mode_changed(struct iavf_adapter *adapter);
 void iavf_request_stats(struct iavf_adapter *adapter);
 int iavf_request_reset(struct iavf_adapter *adapter);
 void iavf_get_hena(struct iavf_adapter *adapter);
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index b3434dbc90d6..d47eae3a2d32 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -1187,6 +1187,16 @@ static int iavf_addr_unsync(struct net_device *netdev, const u8 *addr)
 }
 
 /**
+ * iavf_promiscuous_mode_changed - check if promiscuous mode bits changed
+ * @adapter: device specific adapter
+ */
+bool iavf_promiscuous_mode_changed(struct iavf_adapter *adapter)
+{
+	return (adapter->current_netdev_promisc_flags ^ adapter->netdev->flags) &
+		(IFF_PROMISC | IFF_ALLMULTI);
+}
+
+/**
  * iavf_set_rx_mode - NDO callback to set the netdev filters
  * @netdev: network interface device structure
  **/
@@ -1199,19 +1209,10 @@ static void iavf_set_rx_mode(struct net_device *netdev)
 	__dev_mc_sync(netdev, iavf_addr_sync, iavf_addr_unsync);
 	spin_unlock_bh(&adapter->mac_vlan_list_lock);
 
-	if (netdev->flags & IFF_PROMISC &&
-	    !(adapter->flags & IAVF_FLAG_PROMISC_ON))
-		adapter->aq_required |= IAVF_FLAG_AQ_REQUEST_PROMISC;
-	else if (!(netdev->flags & IFF_PROMISC) &&
-		 adapter->flags & IAVF_FLAG_PROMISC_ON)
-		adapter->aq_required |= IAVF_FLAG_AQ_RELEASE_PROMISC;
-
-	if (netdev->flags & IFF_ALLMULTI &&
-	    !(adapter->flags & IAVF_FLAG_ALLMULTI_ON))
-		adapter->aq_required |= IAVF_FLAG_AQ_REQUEST_ALLMULTI;
-	else if (!(netdev->flags & IFF_ALLMULTI) &&
-		 adapter->flags & IAVF_FLAG_ALLMULTI_ON)
-		adapter->aq_required |= IAVF_FLAG_AQ_RELEASE_ALLMULTI;
+	spin_lock_bh(&adapter->current_netdev_promisc_flags_lock);
+	if (iavf_promiscuous_mode_changed(adapter))
+		adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_PROMISC_MODE;
+	spin_unlock_bh(&adapter->current_netdev_promisc_flags_lock);
 }
 
 /**
@@ -2162,19 +2163,8 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter)
 		return 0;
 	}
 
-	if (adapter->aq_required & IAVF_FLAG_AQ_REQUEST_PROMISC) {
-		iavf_set_promiscuous(adapter, FLAG_VF_UNICAST_PROMISC |
-				       FLAG_VF_MULTICAST_PROMISC);
-		return 0;
-	}
-
-	if (adapter->aq_required & IAVF_FLAG_AQ_REQUEST_ALLMULTI) {
-		iavf_set_promiscuous(adapter, FLAG_VF_MULTICAST_PROMISC);
-		return 0;
-	}
-	if ((adapter->aq_required & IAVF_FLAG_AQ_RELEASE_PROMISC) ||
-	    (adapter->aq_required & IAVF_FLAG_AQ_RELEASE_ALLMULTI)) {
-		iavf_set_promiscuous(adapter, 0);
+	if (adapter->aq_required & IAVF_FLAG_AQ_CONFIGURE_PROMISC_MODE) {
+		iavf_set_promiscuous(adapter);
 		return 0;
 	}
 
@@ -4410,6 +4400,9 @@ static int iavf_set_features(struct net_device *netdev,
 	    (features & NETIF_VLAN_OFFLOAD_FEATURES))
 		iavf_set_vlan_offload_features(adapter, netdev->features,
 					       features);
+	if (CRC_OFFLOAD_ALLOWED(adapter) &&
+	    ((netdev->features & NETIF_F_RXFCS) ^ (features & NETIF_F_RXFCS)))
+		iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED);
 
 	return 0;
 }
@@ -4531,6 +4524,9 @@ iavf_get_netdev_vlan_hw_features(struct iavf_adapter *adapter)
 		}
 	}
 
+	if (CRC_OFFLOAD_ALLOWED(adapter))
+		hw_features |= NETIF_F_RXFCS;
+
 	return hw_features;
 }
 
@@ -4695,6 +4691,55 @@ iavf_fix_netdev_vlan_features(struct iavf_adapter *adapter,
 }
 
 /**
+ * iavf_fix_strip_features - fix NETDEV CRC and VLAN strip features
+ * @adapter: board private structure
+ * @requested_features: stack requested NETDEV features
+ *
+ * Returns fixed-up features bits
+ **/
+static netdev_features_t
+iavf_fix_strip_features(struct iavf_adapter *adapter,
+			netdev_features_t requested_features)
+{
+	struct net_device *netdev = adapter->netdev;
+	bool crc_offload_req, is_vlan_strip;
+	netdev_features_t vlan_strip;
+	int num_non_zero_vlan;
+
+	crc_offload_req = CRC_OFFLOAD_ALLOWED(adapter) &&
+			  (requested_features & NETIF_F_RXFCS);
+	num_non_zero_vlan = iavf_get_num_vlans_added(adapter);
+	vlan_strip = (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_STAG_RX);
+	is_vlan_strip = requested_features & vlan_strip;
+
+	if (!crc_offload_req)
+		return requested_features;
+
+	if (!num_non_zero_vlan && (netdev->features & vlan_strip) &&
+	    !(netdev->features & NETIF_F_RXFCS) && is_vlan_strip) {
+		requested_features &= ~vlan_strip;
+		netdev_info(netdev, "Disabling VLAN stripping as FCS/CRC stripping is also disabled and there is no VLAN configured\n");
+		return requested_features;
+	}
+
+	if ((netdev->features & NETIF_F_RXFCS) && is_vlan_strip) {
+		requested_features &= ~vlan_strip;
+		if (!(netdev->features & vlan_strip))
+			netdev_info(netdev, "To enable VLAN stripping, first need to enable FCS/CRC stripping");
+
+		return requested_features;
+	}
+
+	if (num_non_zero_vlan && is_vlan_strip &&
+	    !(netdev->features & NETIF_F_RXFCS)) {
+		requested_features &= ~NETIF_F_RXFCS;
+		netdev_info(netdev, "To disable FCS/CRC stripping, first need to disable VLAN stripping");
+	}
+
+	return requested_features;
+}
+
+/**
  * iavf_fix_features - fix up the netdev feature bits
  * @netdev: our net device
  * @features: desired feature bits
@@ -4706,7 +4751,9 @@ static netdev_features_t iavf_fix_features(struct net_device *netdev,
 {
 	struct iavf_adapter *adapter = netdev_priv(netdev);
 
-	return iavf_fix_netdev_vlan_features(adapter, features);
+	features = iavf_fix_netdev_vlan_features(adapter, features);
+
+	return iavf_fix_strip_features(adapter, features);
 }
 
 static const struct net_device_ops iavf_netdev_ops = {
@@ -4970,6 +5017,7 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	spin_lock_init(&adapter->cloud_filter_list_lock);
 	spin_lock_init(&adapter->fdir_fltr_lock);
 	spin_lock_init(&adapter->adv_rss_lock);
+	spin_lock_init(&adapter->current_netdev_promisc_flags_lock);
 
 	INIT_LIST_HEAD(&adapter->mac_filter_list);
 	INIT_LIST_HEAD(&adapter->vlan_filter_list);
diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
index f9727e9c3d63..8ce6389b5815 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
@@ -142,6 +142,7 @@ int iavf_send_vf_config_msg(struct iavf_adapter *adapter)
 	       VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 |
 	       VIRTCHNL_VF_OFFLOAD_ENCAP |
 	       VIRTCHNL_VF_OFFLOAD_VLAN_V2 |
+	       VIRTCHNL_VF_OFFLOAD_CRC |
 	       VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM |
 	       VIRTCHNL_VF_OFFLOAD_REQ_QUEUES |
 	       VIRTCHNL_VF_OFFLOAD_ADQ |
@@ -312,6 +313,9 @@ void iavf_configure_queues(struct iavf_adapter *adapter)
 		vqpi->rxq.databuffer_size =
 			ALIGN(adapter->rx_rings[i].rx_buf_len,
 			      BIT_ULL(IAVF_RXQ_CTX_DBUFF_SHIFT));
+		if (CRC_OFFLOAD_ALLOWED(adapter))
+			vqpi->rxq.crc_disable = !!(adapter->netdev->features &
+						   NETIF_F_RXFCS);
 		vqpi++;
 	}
 
@@ -936,14 +940,14 @@ void iavf_del_vlans(struct iavf_adapter *adapter)
 /**
  * iavf_set_promiscuous
  * @adapter: adapter structure
- * @flags: bitmask to control unicast/multicast promiscuous.
  *
  * Request that the PF enable promiscuous mode for our VSI.
  **/
-void iavf_set_promiscuous(struct iavf_adapter *adapter, int flags)
+void iavf_set_promiscuous(struct iavf_adapter *adapter)
 {
+	struct net_device *netdev = adapter->netdev;
 	struct virtchnl_promisc_info vpi;
-	int promisc_all;
+	unsigned int flags;
 
 	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
 		/* bail because we already have a command pending */
@@ -952,36 +956,57 @@ void iavf_set_promiscuous(struct iavf_adapter *adapter, int flags)
 		return;
 	}
 
-	promisc_all = FLAG_VF_UNICAST_PROMISC |
-		      FLAG_VF_MULTICAST_PROMISC;
-	if ((flags & promisc_all) == promisc_all) {
-		adapter->flags |= IAVF_FLAG_PROMISC_ON;
-		adapter->aq_required &= ~IAVF_FLAG_AQ_REQUEST_PROMISC;
-		dev_info(&adapter->pdev->dev, "Entering promiscuous mode\n");
-	}
+	/* prevent changes to promiscuous flags */
+	spin_lock_bh(&adapter->current_netdev_promisc_flags_lock);
 
-	if (flags & FLAG_VF_MULTICAST_PROMISC) {
-		adapter->flags |= IAVF_FLAG_ALLMULTI_ON;
-		adapter->aq_required &= ~IAVF_FLAG_AQ_REQUEST_ALLMULTI;
-		dev_info(&adapter->pdev->dev, "%s is entering multicast promiscuous mode\n",
-			 adapter->netdev->name);
+	/* sanity check to prevent duplicate AQ calls */
+	if (!iavf_promiscuous_mode_changed(adapter)) {
+		adapter->aq_required &= ~IAVF_FLAG_AQ_CONFIGURE_PROMISC_MODE;
+		dev_dbg(&adapter->pdev->dev, "No change in promiscuous mode\n");
+		/* allow changes to promiscuous flags */
+		spin_unlock_bh(&adapter->current_netdev_promisc_flags_lock);
+		return;
 	}
 
-	if (!flags) {
-		if (adapter->flags & IAVF_FLAG_PROMISC_ON) {
-			adapter->flags &= ~IAVF_FLAG_PROMISC_ON;
-			adapter->aq_required &= ~IAVF_FLAG_AQ_RELEASE_PROMISC;
-			dev_info(&adapter->pdev->dev, "Leaving promiscuous mode\n");
-		}
+	/* there are 2 bits, but only 3 states */
+	if (!(netdev->flags & IFF_PROMISC) &&
+	    netdev->flags & IFF_ALLMULTI) {
+		/* State 1  - only multicast promiscuous mode enabled
+		 * - !IFF_PROMISC && IFF_ALLMULTI
+		 */
+		flags = FLAG_VF_MULTICAST_PROMISC;
+		adapter->current_netdev_promisc_flags |= IFF_ALLMULTI;
+		adapter->current_netdev_promisc_flags &= ~IFF_PROMISC;
+		dev_info(&adapter->pdev->dev, "Entering multicast promiscuous mode\n");
+	} else if (!(netdev->flags & IFF_PROMISC) &&
+		   !(netdev->flags & IFF_ALLMULTI)) {
+		/* State 2 - unicast/multicast promiscuous mode disabled
+		 * - !IFF_PROMISC && !IFF_ALLMULTI
+		 */
+		flags = 0;
+		adapter->current_netdev_promisc_flags &=
+			~(IFF_PROMISC | IFF_ALLMULTI);
+		dev_info(&adapter->pdev->dev, "Leaving promiscuous mode\n");
+	} else {
+		/* State 3 - unicast/multicast promiscuous mode enabled
+		 * - IFF_PROMISC && IFF_ALLMULTI
+		 * - IFF_PROMISC && !IFF_ALLMULTI
+		 */
+		flags = FLAG_VF_UNICAST_PROMISC | FLAG_VF_MULTICAST_PROMISC;
+		adapter->current_netdev_promisc_flags |= IFF_PROMISC;
+		if (netdev->flags & IFF_ALLMULTI)
+			adapter->current_netdev_promisc_flags |= IFF_ALLMULTI;
+		else
+			adapter->current_netdev_promisc_flags &= ~IFF_ALLMULTI;
 
-		if (adapter->flags & IAVF_FLAG_ALLMULTI_ON) {
-			adapter->flags &= ~IAVF_FLAG_ALLMULTI_ON;
-			adapter->aq_required &= ~IAVF_FLAG_AQ_RELEASE_ALLMULTI;
-			dev_info(&adapter->pdev->dev, "%s is leaving multicast promiscuous mode\n",
-				 adapter->netdev->name);
-		}
+		dev_info(&adapter->pdev->dev, "Entering promiscuous mode\n");
 	}
 
+	adapter->aq_required &= ~IAVF_FLAG_AQ_CONFIGURE_PROMISC_MODE;
+
+	/* allow changes to promiscuous flags */
+	spin_unlock_bh(&adapter->current_netdev_promisc_flags_lock);
+
 	adapter->current_op = VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE;
 	vpi.vsi_id = adapter->vsi_res->vsi_id;
 	vpi.flags = flags;
diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
index 960277d78e09..00806ddf5bf0 100644
--- a/drivers/net/ethernet/intel/ice/Makefile
+++ b/drivers/net/ethernet/intel/ice/Makefile
@@ -34,7 +34,8 @@ ice-y := ice_main.o	\
 	 ice_lag.o	\
 	 ice_ethtool.o  \
 	 ice_repr.o	\
-	 ice_tc_lib.o
+	 ice_tc_lib.o	\
+	 ice_dpll.o
 ice-$(CONFIG_PCI_IOV) +=	\
 	ice_sriov.o		\
 	ice_virtchnl.o		\
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 5022b036ca4f..d30ae39c19f0 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -76,6 +76,7 @@
 #include "ice_vsi_vlan_ops.h"
 #include "ice_gnss.h"
 #include "ice_irq.h"
+#include "ice_dpll.h"
 
 #define ICE_BAR0		0
 #define ICE_REQ_DESC_MULTIPLE	32
@@ -195,10 +196,13 @@
 
 #define ice_pf_to_dev(pf) (&((pf)->pdev->dev))
 
+#define ice_pf_src_tmr_owned(pf) ((pf)->hw.func_caps.ts_func_info.src_tmr_owned)
+
 enum ice_feature {
 	ICE_F_DSCP,
-	ICE_F_PTP_EXTTS,
+	ICE_F_PHY_RCLK,
 	ICE_F_SMA_CTRL,
+	ICE_F_CGU,
 	ICE_F_GNSS,
 	ICE_F_ROCE_LAG,
 	ICE_F_SRIOV_LAG,
@@ -508,6 +512,7 @@ enum ice_pf_flags {
 	ICE_FLAG_UNPLUG_AUX_DEV,
 	ICE_FLAG_MTU_CHANGED,
 	ICE_FLAG_GNSS,			/* GNSS successfully initialized */
+	ICE_FLAG_DPLL,			/* SyncE/PTP dplls initialized */
 	ICE_PF_FLAGS_NBITS		/* must be last */
 };
 
@@ -640,6 +645,7 @@ struct ice_pf {
 #define ICE_VF_AGG_NODE_ID_START	65
 #define ICE_MAX_VF_AGG_NODES		32
 	struct ice_agg_node vf_agg_node[ICE_MAX_VF_AGG_NODES];
+	struct ice_dplls dplls;
 };
 
 extern struct workqueue_struct *ice_lag_wq;
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index 29f7a9852aec..24293f52f2d1 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -1351,6 +1351,30 @@ struct ice_aqc_set_mac_lb {
 	u8 reserved[15];
 };
 
+/* Set PHY recovered clock output (direct 0x0630) */
+struct ice_aqc_set_phy_rec_clk_out {
+	u8 phy_output;
+	u8 port_num;
+#define ICE_AQC_SET_PHY_REC_CLK_OUT_CURR_PORT	0xFF
+	u8 flags;
+#define ICE_AQC_SET_PHY_REC_CLK_OUT_OUT_EN	BIT(0)
+	u8 rsvd;
+	__le32 freq;
+	u8 rsvd2[6];
+	__le16 node_handle;
+};
+
+/* Get PHY recovered clock output (direct 0x0631) */
+struct ice_aqc_get_phy_rec_clk_out {
+	u8 phy_output;
+	u8 port_num;
+#define ICE_AQC_GET_PHY_REC_CLK_OUT_CURR_PORT	0xFF
+	u8 flags;
+#define ICE_AQC_GET_PHY_REC_CLK_OUT_OUT_EN	BIT(0)
+	u8 rsvd[11];
+	__le16 node_handle;
+};
+
 struct ice_aqc_link_topo_params {
 	u8 lport_num;
 	u8 lport_num_valid;
@@ -1367,6 +1391,9 @@ struct ice_aqc_link_topo_params {
 #define ICE_AQC_LINK_TOPO_NODE_TYPE_CAGE	6
 #define ICE_AQC_LINK_TOPO_NODE_TYPE_MEZZ	7
 #define ICE_AQC_LINK_TOPO_NODE_TYPE_ID_EEPROM	8
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_CLK_CTRL	9
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_CLK_MUX	10
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_GPS		11
 #define ICE_AQC_LINK_TOPO_NODE_CTX_S		4
 #define ICE_AQC_LINK_TOPO_NODE_CTX_M		\
 				(0xF << ICE_AQC_LINK_TOPO_NODE_CTX_S)
@@ -1403,8 +1430,13 @@ struct ice_aqc_link_topo_addr {
 struct ice_aqc_get_link_topo {
 	struct ice_aqc_link_topo_addr addr;
 	u8 node_part_num;
-#define ICE_AQC_GET_LINK_TOPO_NODE_NR_PCA9575	0x21
-#define ICE_AQC_GET_LINK_TOPO_NODE_NR_C827	0x31
+#define ICE_AQC_GET_LINK_TOPO_NODE_NR_PCA9575		0x21
+#define ICE_AQC_GET_LINK_TOPO_NODE_NR_ZL30632_80032	0x24
+#define ICE_AQC_GET_LINK_TOPO_NODE_NR_SI5383_5384	0x25
+#define ICE_AQC_GET_LINK_TOPO_NODE_NR_E822_PHY		0x30
+#define ICE_AQC_GET_LINK_TOPO_NODE_NR_C827		0x31
+#define ICE_AQC_GET_LINK_TOPO_NODE_NR_GEN_CLK_MUX	0x47
+#define ICE_AQC_GET_LINK_TOPO_NODE_NR_GEN_GPS		0x48
 	u8 rsvd[9];
 };
 
@@ -2125,6 +2157,193 @@ struct ice_aqc_get_pkg_info_resp {
 	struct ice_aqc_get_pkg_info pkg_info[];
 };
 
+/* Get CGU abilities command response data structure (indirect 0x0C61) */
+struct ice_aqc_get_cgu_abilities {
+	u8 num_inputs;
+	u8 num_outputs;
+	u8 pps_dpll_idx;
+	u8 eec_dpll_idx;
+	__le32 max_in_freq;
+	__le32 max_in_phase_adj;
+	__le32 max_out_freq;
+	__le32 max_out_phase_adj;
+	u8 cgu_part_num;
+	u8 rsvd[3];
+};
+
+/* Set CGU input config (direct 0x0C62) */
+struct ice_aqc_set_cgu_input_config {
+	u8 input_idx;
+	u8 flags1;
+#define ICE_AQC_SET_CGU_IN_CFG_FLG1_UPDATE_FREQ		BIT(6)
+#define ICE_AQC_SET_CGU_IN_CFG_FLG1_UPDATE_DELAY	BIT(7)
+	u8 flags2;
+#define ICE_AQC_SET_CGU_IN_CFG_FLG2_INPUT_EN		BIT(5)
+#define ICE_AQC_SET_CGU_IN_CFG_FLG2_ESYNC_EN		BIT(6)
+	u8 rsvd;
+	__le32 freq;
+	__le32 phase_delay;
+	u8 rsvd2[2];
+	__le16 node_handle;
+};
+
+/* Get CGU input config response descriptor structure (direct 0x0C63) */
+struct ice_aqc_get_cgu_input_config {
+	u8 input_idx;
+	u8 status;
+#define ICE_AQC_GET_CGU_IN_CFG_STATUS_LOS		BIT(0)
+#define ICE_AQC_GET_CGU_IN_CFG_STATUS_SCM_FAIL		BIT(1)
+#define ICE_AQC_GET_CGU_IN_CFG_STATUS_CFM_FAIL		BIT(2)
+#define ICE_AQC_GET_CGU_IN_CFG_STATUS_GST_FAIL		BIT(3)
+#define ICE_AQC_GET_CGU_IN_CFG_STATUS_PFM_FAIL		BIT(4)
+#define ICE_AQC_GET_CGU_IN_CFG_STATUS_ESYNC_FAIL	BIT(6)
+#define ICE_AQC_GET_CGU_IN_CFG_STATUS_ESYNC_CAP		BIT(7)
+	u8 type;
+#define ICE_AQC_GET_CGU_IN_CFG_TYPE_READ_ONLY		BIT(0)
+#define ICE_AQC_GET_CGU_IN_CFG_TYPE_GPS			BIT(4)
+#define ICE_AQC_GET_CGU_IN_CFG_TYPE_EXTERNAL		BIT(5)
+#define ICE_AQC_GET_CGU_IN_CFG_TYPE_PHY			BIT(6)
+	u8 flags1;
+#define ICE_AQC_GET_CGU_IN_CFG_FLG1_PHASE_DELAY_SUPP	BIT(0)
+#define ICE_AQC_GET_CGU_IN_CFG_FLG1_1PPS_SUPP		BIT(2)
+#define ICE_AQC_GET_CGU_IN_CFG_FLG1_10MHZ_SUPP		BIT(3)
+#define ICE_AQC_GET_CGU_IN_CFG_FLG1_ANYFREQ		BIT(7)
+	__le32 freq;
+	__le32 phase_delay;
+	u8 flags2;
+#define ICE_AQC_GET_CGU_IN_CFG_FLG2_INPUT_EN		BIT(5)
+#define ICE_AQC_GET_CGU_IN_CFG_FLG2_ESYNC_EN		BIT(6)
+	u8 rsvd[1];
+	__le16 node_handle;
+};
+
+/* Set CGU output config (direct 0x0C64) */
+struct ice_aqc_set_cgu_output_config {
+	u8 output_idx;
+	u8 flags;
+#define ICE_AQC_SET_CGU_OUT_CFG_OUT_EN		BIT(0)
+#define ICE_AQC_SET_CGU_OUT_CFG_ESYNC_EN	BIT(1)
+#define ICE_AQC_SET_CGU_OUT_CFG_UPDATE_FREQ     BIT(2)
+#define ICE_AQC_SET_CGU_OUT_CFG_UPDATE_PHASE    BIT(3)
+#define ICE_AQC_SET_CGU_OUT_CFG_UPDATE_SRC_SEL  BIT(4)
+	u8 src_sel;
+#define ICE_AQC_SET_CGU_OUT_CFG_DPLL_SRC_SEL    ICE_M(0x1F, 0)
+	u8 rsvd;
+	__le32 freq;
+	__le32 phase_delay;
+	u8 rsvd2[2];
+	__le16 node_handle;
+};
+
+/* Get CGU output config (direct 0x0C65) */
+struct ice_aqc_get_cgu_output_config {
+	u8 output_idx;
+	u8 flags;
+#define ICE_AQC_GET_CGU_OUT_CFG_OUT_EN		BIT(0)
+#define ICE_AQC_GET_CGU_OUT_CFG_ESYNC_EN	BIT(1)
+#define ICE_AQC_GET_CGU_OUT_CFG_ESYNC_ABILITY	BIT(2)
+	u8 src_sel;
+#define ICE_AQC_GET_CGU_OUT_CFG_DPLL_SRC_SEL_SHIFT	0
+#define ICE_AQC_GET_CGU_OUT_CFG_DPLL_SRC_SEL \
+	ICE_M(0x1F, ICE_AQC_GET_CGU_OUT_CFG_DPLL_SRC_SEL_SHIFT)
+#define ICE_AQC_GET_CGU_OUT_CFG_DPLL_MODE_SHIFT		5
+#define ICE_AQC_GET_CGU_OUT_CFG_DPLL_MODE \
+	ICE_M(0x7, ICE_AQC_GET_CGU_OUT_CFG_DPLL_MODE_SHIFT)
+	u8 rsvd;
+	__le32 freq;
+	__le32 src_freq;
+	u8 rsvd2[2];
+	__le16 node_handle;
+};
+
+/* Get CGU DPLL status (direct 0x0C66) */
+struct ice_aqc_get_cgu_dpll_status {
+	u8 dpll_num;
+	u8 ref_state;
+#define ICE_AQC_GET_CGU_DPLL_STATUS_REF_SW_LOS		BIT(0)
+#define ICE_AQC_GET_CGU_DPLL_STATUS_REF_SW_SCM		BIT(1)
+#define ICE_AQC_GET_CGU_DPLL_STATUS_REF_SW_CFM		BIT(2)
+#define ICE_AQC_GET_CGU_DPLL_STATUS_REF_SW_GST		BIT(3)
+#define ICE_AQC_GET_CGU_DPLL_STATUS_REF_SW_PFM		BIT(4)
+#define ICE_AQC_GET_CGU_DPLL_STATUS_FAST_LOCK_EN	BIT(5)
+#define ICE_AQC_GET_CGU_DPLL_STATUS_REF_SW_ESYNC	BIT(6)
+	u8 dpll_state;
+#define ICE_AQC_GET_CGU_DPLL_STATUS_STATE_LOCK		BIT(0)
+#define ICE_AQC_GET_CGU_DPLL_STATUS_STATE_HO		BIT(1)
+#define ICE_AQC_GET_CGU_DPLL_STATUS_STATE_HO_READY	BIT(2)
+#define ICE_AQC_GET_CGU_DPLL_STATUS_STATE_FLHIT		BIT(5)
+#define ICE_AQC_GET_CGU_DPLL_STATUS_STATE_PSLHIT	BIT(7)
+	u8 config;
+#define ICE_AQC_GET_CGU_DPLL_CONFIG_CLK_REF_SEL		ICE_M(0x1F, 0)
+#define ICE_AQC_GET_CGU_DPLL_CONFIG_MODE_SHIFT		5
+#define ICE_AQC_GET_CGU_DPLL_CONFIG_MODE		\
+	ICE_M(0x7, ICE_AQC_GET_CGU_DPLL_CONFIG_MODE_SHIFT)
+#define ICE_AQC_GET_CGU_DPLL_CONFIG_MODE_FREERUN	0
+#define ICE_AQC_GET_CGU_DPLL_CONFIG_MODE_AUTOMATIC	\
+	ICE_M(0x3, ICE_AQC_GET_CGU_DPLL_CONFIG_MODE_SHIFT)
+	__le32 phase_offset_h;
+	__le32 phase_offset_l;
+	u8 eec_mode;
+#define ICE_AQC_GET_CGU_DPLL_STATUS_EEC_MODE_1		0xA
+#define ICE_AQC_GET_CGU_DPLL_STATUS_EEC_MODE_2		0xB
+#define ICE_AQC_GET_CGU_DPLL_STATUS_EEC_MODE_UNKNOWN	0xF
+	u8 rsvd[1];
+	__le16 node_handle;
+};
+
+/* Set CGU DPLL config (direct 0x0C67) */
+struct ice_aqc_set_cgu_dpll_config {
+	u8 dpll_num;
+	u8 ref_state;
+#define ICE_AQC_SET_CGU_DPLL_CONFIG_REF_SW_LOS		BIT(0)
+#define ICE_AQC_SET_CGU_DPLL_CONFIG_REF_SW_SCM		BIT(1)
+#define ICE_AQC_SET_CGU_DPLL_CONFIG_REF_SW_CFM		BIT(2)
+#define ICE_AQC_SET_CGU_DPLL_CONFIG_REF_SW_GST		BIT(3)
+#define ICE_AQC_SET_CGU_DPLL_CONFIG_REF_SW_PFM		BIT(4)
+#define ICE_AQC_SET_CGU_DPLL_CONFIG_REF_FLOCK_EN	BIT(5)
+#define ICE_AQC_SET_CGU_DPLL_CONFIG_REF_SW_ESYNC	BIT(6)
+	u8 rsvd;
+	u8 config;
+#define ICE_AQC_SET_CGU_DPLL_CONFIG_CLK_REF_SEL		ICE_M(0x1F, 0)
+#define ICE_AQC_SET_CGU_DPLL_CONFIG_MODE_SHIFT		5
+#define ICE_AQC_SET_CGU_DPLL_CONFIG_MODE		\
+	ICE_M(0x7, ICE_AQC_SET_CGU_DPLL_CONFIG_MODE_SHIFT)
+#define ICE_AQC_SET_CGU_DPLL_CONFIG_MODE_FREERUN	0
+#define ICE_AQC_SET_CGU_DPLL_CONFIG_MODE_AUTOMATIC	\
+	ICE_M(0x3, ICE_AQC_SET_CGU_DPLL_CONFIG_MODE_SHIFT)
+	u8 rsvd2[8];
+	u8 eec_mode;
+	u8 rsvd3[1];
+	__le16 node_handle;
+};
+
+/* Set CGU reference priority (direct 0x0C68) */
+struct ice_aqc_set_cgu_ref_prio {
+	u8 dpll_num;
+	u8 ref_idx;
+	u8 ref_priority;
+	u8 rsvd[11];
+	__le16 node_handle;
+};
+
+/* Get CGU reference priority (direct 0x0C69) */
+struct ice_aqc_get_cgu_ref_prio {
+	u8 dpll_num;
+	u8 ref_idx;
+	u8 ref_priority; /* Valid only in response */
+	u8 rsvd[13];
+};
+
+/* Get CGU info (direct 0x0C6A) */
+struct ice_aqc_get_cgu_info {
+	__le32 cgu_id;
+	__le32 cgu_cfg_ver;
+	__le32 cgu_fw_ver;
+	u8 node_part_num;
+	u8 dev_rev;
+	__le16 node_handle;
+};
+
 /* Driver Shared Parameters (direct, 0x0C90) */
 struct ice_aqc_driver_shared_params {
 	u8 set_or_get_op;
@@ -2194,6 +2413,8 @@ struct ice_aq_desc {
 		struct ice_aqc_get_phy_caps get_phy;
 		struct ice_aqc_set_phy_cfg set_phy;
 		struct ice_aqc_restart_an restart_an;
+		struct ice_aqc_set_phy_rec_clk_out set_phy_rec_clk_out;
+		struct ice_aqc_get_phy_rec_clk_out get_phy_rec_clk_out;
 		struct ice_aqc_gpio read_write_gpio;
 		struct ice_aqc_sff_eeprom read_write_sff_param;
 		struct ice_aqc_set_port_id_led set_port_id_led;
@@ -2234,6 +2455,15 @@ struct ice_aq_desc {
 		struct ice_aqc_fw_logging fw_logging;
 		struct ice_aqc_get_clear_fw_log get_clear_fw_log;
 		struct ice_aqc_download_pkg download_pkg;
+		struct ice_aqc_set_cgu_input_config set_cgu_input_config;
+		struct ice_aqc_get_cgu_input_config get_cgu_input_config;
+		struct ice_aqc_set_cgu_output_config set_cgu_output_config;
+		struct ice_aqc_get_cgu_output_config get_cgu_output_config;
+		struct ice_aqc_get_cgu_dpll_status get_cgu_dpll_status;
+		struct ice_aqc_set_cgu_dpll_config set_cgu_dpll_config;
+		struct ice_aqc_set_cgu_ref_prio set_cgu_ref_prio;
+		struct ice_aqc_get_cgu_ref_prio get_cgu_ref_prio;
+		struct ice_aqc_get_cgu_info get_cgu_info;
 		struct ice_aqc_driver_shared_params drv_shared_params;
 		struct ice_aqc_set_mac_lb set_mac_lb;
 		struct ice_aqc_alloc_free_res_cmd sw_res_ctrl;
@@ -2358,6 +2588,8 @@ enum ice_adminq_opc {
 	ice_aqc_opc_get_link_status			= 0x0607,
 	ice_aqc_opc_set_event_mask			= 0x0613,
 	ice_aqc_opc_set_mac_lb				= 0x0620,
+	ice_aqc_opc_set_phy_rec_clk_out			= 0x0630,
+	ice_aqc_opc_get_phy_rec_clk_out			= 0x0631,
 	ice_aqc_opc_get_link_topo			= 0x06E0,
 	ice_aqc_opc_read_i2c				= 0x06E2,
 	ice_aqc_opc_write_i2c				= 0x06E3,
@@ -2413,6 +2645,18 @@ enum ice_adminq_opc {
 	ice_aqc_opc_update_pkg				= 0x0C42,
 	ice_aqc_opc_get_pkg_info_list			= 0x0C43,
 
+	/* 1588/SyncE commands/events */
+	ice_aqc_opc_get_cgu_abilities			= 0x0C61,
+	ice_aqc_opc_set_cgu_input_config		= 0x0C62,
+	ice_aqc_opc_get_cgu_input_config		= 0x0C63,
+	ice_aqc_opc_set_cgu_output_config		= 0x0C64,
+	ice_aqc_opc_get_cgu_output_config		= 0x0C65,
+	ice_aqc_opc_get_cgu_dpll_status			= 0x0C66,
+	ice_aqc_opc_set_cgu_dpll_config			= 0x0C67,
+	ice_aqc_opc_set_cgu_ref_prio			= 0x0C68,
+	ice_aqc_opc_get_cgu_ref_prio			= 0x0C69,
+	ice_aqc_opc_get_cgu_info			= 0x0C6A,
+
 	ice_aqc_opc_driver_shared_params		= 0x0C90,
 
 	/* Standalone Commands/Events */
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 80deca45ab59..8f31ae449948 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -8,6 +8,7 @@
 #include "ice_ptp_hw.h"
 
 #define ICE_PF_RESET_WAIT_COUNT	300
+#define ICE_MAX_NETLIST_SIZE	10
 
 static const char * const ice_link_mode_str_low[] = {
 	[0] = "100BASE_TX",
@@ -436,6 +437,81 @@ ice_aq_get_link_topo_handle(struct ice_port_info *pi, u8 node_type,
 }
 
 /**
+ * ice_aq_get_netlist_node
+ * @hw: pointer to the hw struct
+ * @cmd: get_link_topo AQ structure
+ * @node_part_number: output node part number if node found
+ * @node_handle: output node handle parameter if node found
+ *
+ * Get netlist node handle.
+ */
+int
+ice_aq_get_netlist_node(struct ice_hw *hw, struct ice_aqc_get_link_topo *cmd,
+			u8 *node_part_number, u16 *node_handle)
+{
+	struct ice_aq_desc desc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_topo);
+	desc.params.get_link_topo = *cmd;
+
+	if (ice_aq_send_cmd(hw, &desc, NULL, 0, NULL))
+		return -EINTR;
+
+	if (node_handle)
+		*node_handle =
+			le16_to_cpu(desc.params.get_link_topo.addr.handle);
+	if (node_part_number)
+		*node_part_number = desc.params.get_link_topo.node_part_num;
+
+	return 0;
+}
+
+/**
+ * ice_find_netlist_node
+ * @hw: pointer to the hw struct
+ * @node_type_ctx: type of netlist node to look for
+ * @node_part_number: node part number to look for
+ * @node_handle: output parameter if node found - optional
+ *
+ * Find and return the node handle for a given node type and part number in the
+ * netlist. When found ICE_SUCCESS is returned, ICE_ERR_DOES_NOT_EXIST
+ * otherwise. If node_handle provided, it would be set to found node handle.
+ */
+int
+ice_find_netlist_node(struct ice_hw *hw, u8 node_type_ctx, u8 node_part_number,
+		      u16 *node_handle)
+{
+	struct ice_aqc_get_link_topo cmd;
+	u8 rec_node_part_number;
+	u16 rec_node_handle;
+	u8 idx;
+
+	for (idx = 0; idx < ICE_MAX_NETLIST_SIZE; idx++) {
+		int status;
+
+		memset(&cmd, 0, sizeof(cmd));
+
+		cmd.addr.topo_params.node_type_ctx =
+			(node_type_ctx << ICE_AQC_LINK_TOPO_NODE_TYPE_S);
+		cmd.addr.topo_params.index = idx;
+
+		status = ice_aq_get_netlist_node(hw, &cmd,
+						 &rec_node_part_number,
+						 &rec_node_handle);
+		if (status)
+			return status;
+
+		if (rec_node_part_number == node_part_number) {
+			if (node_handle)
+				*node_handle = rec_node_handle;
+			return 0;
+		}
+	}
+
+	return -ENOTBLK;
+}
+
+/**
  * ice_is_media_cage_present
  * @pi: port information structure
  *
@@ -2655,33 +2731,6 @@ ice_parse_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p,
 }
 
 /**
- * ice_aq_get_netlist_node
- * @hw: pointer to the hw struct
- * @cmd: get_link_topo AQ structure
- * @node_part_number: output node part number if node found
- * @node_handle: output node handle parameter if node found
- */
-static int
-ice_aq_get_netlist_node(struct ice_hw *hw, struct ice_aqc_get_link_topo *cmd,
-			u8 *node_part_number, u16 *node_handle)
-{
-	struct ice_aq_desc desc;
-
-	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_topo);
-	desc.params.get_link_topo = *cmd;
-
-	if (ice_aq_send_cmd(hw, &desc, NULL, 0, NULL))
-		return -EIO;
-
-	if (node_handle)
-		*node_handle = le16_to_cpu(desc.params.get_link_topo.addr.handle);
-	if (node_part_number)
-		*node_part_number = desc.params.get_link_topo.node_part_num;
-
-	return 0;
-}
-
-/**
  * ice_is_pf_c827 - check if pf contains c827 phy
  * @hw: pointer to the hw struct
  */
@@ -2716,6 +2765,21 @@ bool ice_is_pf_c827(struct ice_hw *hw)
 }
 
 /**
+ * ice_is_gps_in_netlist
+ * @hw: pointer to the hw struct
+ *
+ * Check if the GPS generic device is present in the netlist
+ */
+bool ice_is_gps_in_netlist(struct ice_hw *hw)
+{
+	if (ice_find_netlist_node(hw, ICE_AQC_LINK_TOPO_NODE_TYPE_GPS,
+				  ICE_AQC_GET_LINK_TOPO_NODE_NR_GEN_GPS, NULL))
+		return false;
+
+	return true;
+}
+
+/**
  * ice_aq_list_caps - query function/device capabilities
  * @hw: pointer to the HW struct
  * @buf: a buffer to hold the capabilities
@@ -4999,6 +5063,395 @@ ice_dis_vsi_rdma_qset(struct ice_port_info *pi, u16 count, u32 *qset_teid,
 }
 
 /**
+ * ice_aq_get_cgu_abilities - get cgu abilities
+ * @hw: pointer to the HW struct
+ * @abilities: CGU abilities
+ *
+ * Get CGU abilities (0x0C61)
+ * Return: 0 on success or negative value on failure.
+ */
+int
+ice_aq_get_cgu_abilities(struct ice_hw *hw,
+			 struct ice_aqc_get_cgu_abilities *abilities)
+{
+	struct ice_aq_desc desc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_cgu_abilities);
+	return ice_aq_send_cmd(hw, &desc, abilities, sizeof(*abilities), NULL);
+}
+
+/**
+ * ice_aq_set_input_pin_cfg - set input pin config
+ * @hw: pointer to the HW struct
+ * @input_idx: Input index
+ * @flags1: Input flags
+ * @flags2: Input flags
+ * @freq: Frequency in Hz
+ * @phase_delay: Delay in ps
+ *
+ * Set CGU input config (0x0C62)
+ * Return: 0 on success or negative value on failure.
+ */
+int
+ice_aq_set_input_pin_cfg(struct ice_hw *hw, u8 input_idx, u8 flags1, u8 flags2,
+			 u32 freq, s32 phase_delay)
+{
+	struct ice_aqc_set_cgu_input_config *cmd;
+	struct ice_aq_desc desc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_cgu_input_config);
+	cmd = &desc.params.set_cgu_input_config;
+	cmd->input_idx = input_idx;
+	cmd->flags1 = flags1;
+	cmd->flags2 = flags2;
+	cmd->freq = cpu_to_le32(freq);
+	cmd->phase_delay = cpu_to_le32(phase_delay);
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+}
+
+/**
+ * ice_aq_get_input_pin_cfg - get input pin config
+ * @hw: pointer to the HW struct
+ * @input_idx: Input index
+ * @status: Pin status
+ * @type: Pin type
+ * @flags1: Input flags
+ * @flags2: Input flags
+ * @freq: Frequency in Hz
+ * @phase_delay: Delay in ps
+ *
+ * Get CGU input config (0x0C63)
+ * Return: 0 on success or negative value on failure.
+ */
+int
+ice_aq_get_input_pin_cfg(struct ice_hw *hw, u8 input_idx, u8 *status, u8 *type,
+			 u8 *flags1, u8 *flags2, u32 *freq, s32 *phase_delay)
+{
+	struct ice_aqc_get_cgu_input_config *cmd;
+	struct ice_aq_desc desc;
+	int ret;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_cgu_input_config);
+	cmd = &desc.params.get_cgu_input_config;
+	cmd->input_idx = input_idx;
+
+	ret = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+	if (!ret) {
+		if (status)
+			*status = cmd->status;
+		if (type)
+			*type = cmd->type;
+		if (flags1)
+			*flags1 = cmd->flags1;
+		if (flags2)
+			*flags2 = cmd->flags2;
+		if (freq)
+			*freq = le32_to_cpu(cmd->freq);
+		if (phase_delay)
+			*phase_delay = le32_to_cpu(cmd->phase_delay);
+	}
+
+	return ret;
+}
+
+/**
+ * ice_aq_set_output_pin_cfg - set output pin config
+ * @hw: pointer to the HW struct
+ * @output_idx: Output index
+ * @flags: Output flags
+ * @src_sel: Index of DPLL block
+ * @freq: Output frequency
+ * @phase_delay: Output phase compensation
+ *
+ * Set CGU output config (0x0C64)
+ * Return: 0 on success or negative value on failure.
+ */
+int
+ice_aq_set_output_pin_cfg(struct ice_hw *hw, u8 output_idx, u8 flags,
+			  u8 src_sel, u32 freq, s32 phase_delay)
+{
+	struct ice_aqc_set_cgu_output_config *cmd;
+	struct ice_aq_desc desc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_cgu_output_config);
+	cmd = &desc.params.set_cgu_output_config;
+	cmd->output_idx = output_idx;
+	cmd->flags = flags;
+	cmd->src_sel = src_sel;
+	cmd->freq = cpu_to_le32(freq);
+	cmd->phase_delay = cpu_to_le32(phase_delay);
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+}
+
+/**
+ * ice_aq_get_output_pin_cfg - get output pin config
+ * @hw: pointer to the HW struct
+ * @output_idx: Output index
+ * @flags: Output flags
+ * @src_sel: Internal DPLL source
+ * @freq: Output frequency
+ * @src_freq: Source frequency
+ *
+ * Get CGU output config (0x0C65)
+ * Return: 0 on success or negative value on failure.
+ */
+int
+ice_aq_get_output_pin_cfg(struct ice_hw *hw, u8 output_idx, u8 *flags,
+			  u8 *src_sel, u32 *freq, u32 *src_freq)
+{
+	struct ice_aqc_get_cgu_output_config *cmd;
+	struct ice_aq_desc desc;
+	int ret;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_cgu_output_config);
+	cmd = &desc.params.get_cgu_output_config;
+	cmd->output_idx = output_idx;
+
+	ret = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+	if (!ret) {
+		if (flags)
+			*flags = cmd->flags;
+		if (src_sel)
+			*src_sel = cmd->src_sel;
+		if (freq)
+			*freq = le32_to_cpu(cmd->freq);
+		if (src_freq)
+			*src_freq = le32_to_cpu(cmd->src_freq);
+	}
+
+	return ret;
+}
+
+/**
+ * ice_aq_get_cgu_dpll_status - get dpll status
+ * @hw: pointer to the HW struct
+ * @dpll_num: DPLL index
+ * @ref_state: Reference clock state
+ * @config: current DPLL config
+ * @dpll_state: current DPLL state
+ * @phase_offset: Phase offset in ns
+ * @eec_mode: EEC_mode
+ *
+ * Get CGU DPLL status (0x0C66)
+ * Return: 0 on success or negative value on failure.
+ */
+int
+ice_aq_get_cgu_dpll_status(struct ice_hw *hw, u8 dpll_num, u8 *ref_state,
+			   u8 *dpll_state, u8 *config, s64 *phase_offset,
+			   u8 *eec_mode)
+{
+	struct ice_aqc_get_cgu_dpll_status *cmd;
+	const s64 nsec_per_psec = 1000LL;
+	struct ice_aq_desc desc;
+	int status;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_cgu_dpll_status);
+	cmd = &desc.params.get_cgu_dpll_status;
+	cmd->dpll_num = dpll_num;
+
+	status = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+	if (!status) {
+		*ref_state = cmd->ref_state;
+		*dpll_state = cmd->dpll_state;
+		*config = cmd->config;
+		*phase_offset = le32_to_cpu(cmd->phase_offset_h);
+		*phase_offset <<= 32;
+		*phase_offset += le32_to_cpu(cmd->phase_offset_l);
+		*phase_offset = div64_s64(sign_extend64(*phase_offset, 47),
+					  nsec_per_psec);
+		*eec_mode = cmd->eec_mode;
+	}
+
+	return status;
+}
+
+/**
+ * ice_aq_set_cgu_dpll_config - set dpll config
+ * @hw: pointer to the HW struct
+ * @dpll_num: DPLL index
+ * @ref_state: Reference clock state
+ * @config: DPLL config
+ * @eec_mode: EEC mode
+ *
+ * Set CGU DPLL config (0x0C67)
+ * Return: 0 on success or negative value on failure.
+ */
+int
+ice_aq_set_cgu_dpll_config(struct ice_hw *hw, u8 dpll_num, u8 ref_state,
+			   u8 config, u8 eec_mode)
+{
+	struct ice_aqc_set_cgu_dpll_config *cmd;
+	struct ice_aq_desc desc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_cgu_dpll_config);
+	cmd = &desc.params.set_cgu_dpll_config;
+	cmd->dpll_num = dpll_num;
+	cmd->ref_state = ref_state;
+	cmd->config = config;
+	cmd->eec_mode = eec_mode;
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+}
+
+/**
+ * ice_aq_set_cgu_ref_prio - set input reference priority
+ * @hw: pointer to the HW struct
+ * @dpll_num: DPLL index
+ * @ref_idx: Reference pin index
+ * @ref_priority: Reference input priority
+ *
+ * Set CGU reference priority (0x0C68)
+ * Return: 0 on success or negative value on failure.
+ */
+int
+ice_aq_set_cgu_ref_prio(struct ice_hw *hw, u8 dpll_num, u8 ref_idx,
+			u8 ref_priority)
+{
+	struct ice_aqc_set_cgu_ref_prio *cmd;
+	struct ice_aq_desc desc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_cgu_ref_prio);
+	cmd = &desc.params.set_cgu_ref_prio;
+	cmd->dpll_num = dpll_num;
+	cmd->ref_idx = ref_idx;
+	cmd->ref_priority = ref_priority;
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+}
+
+/**
+ * ice_aq_get_cgu_ref_prio - get input reference priority
+ * @hw: pointer to the HW struct
+ * @dpll_num: DPLL index
+ * @ref_idx: Reference pin index
+ * @ref_prio: Reference input priority
+ *
+ * Get CGU reference priority (0x0C69)
+ * Return: 0 on success or negative value on failure.
+ */
+int
+ice_aq_get_cgu_ref_prio(struct ice_hw *hw, u8 dpll_num, u8 ref_idx,
+			u8 *ref_prio)
+{
+	struct ice_aqc_get_cgu_ref_prio *cmd;
+	struct ice_aq_desc desc;
+	int status;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_cgu_ref_prio);
+	cmd = &desc.params.get_cgu_ref_prio;
+	cmd->dpll_num = dpll_num;
+	cmd->ref_idx = ref_idx;
+
+	status = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+	if (!status)
+		*ref_prio = cmd->ref_priority;
+
+	return status;
+}
+
+/**
+ * ice_aq_get_cgu_info - get cgu info
+ * @hw: pointer to the HW struct
+ * @cgu_id: CGU ID
+ * @cgu_cfg_ver: CGU config version
+ * @cgu_fw_ver: CGU firmware version
+ *
+ * Get CGU info (0x0C6A)
+ * Return: 0 on success or negative value on failure.
+ */
+int
+ice_aq_get_cgu_info(struct ice_hw *hw, u32 *cgu_id, u32 *cgu_cfg_ver,
+		    u32 *cgu_fw_ver)
+{
+	struct ice_aqc_get_cgu_info *cmd;
+	struct ice_aq_desc desc;
+	int status;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_cgu_info);
+	cmd = &desc.params.get_cgu_info;
+
+	status = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+	if (!status) {
+		*cgu_id = le32_to_cpu(cmd->cgu_id);
+		*cgu_cfg_ver = le32_to_cpu(cmd->cgu_cfg_ver);
+		*cgu_fw_ver = le32_to_cpu(cmd->cgu_fw_ver);
+	}
+
+	return status;
+}
+
+/**
+ * ice_aq_set_phy_rec_clk_out - set RCLK phy out
+ * @hw: pointer to the HW struct
+ * @phy_output: PHY reference clock output pin
+ * @enable: GPIO state to be applied
+ * @freq: PHY output frequency
+ *
+ * Set phy recovered clock as reference (0x0630)
+ * Return: 0 on success or negative value on failure.
+ */
+int
+ice_aq_set_phy_rec_clk_out(struct ice_hw *hw, u8 phy_output, bool enable,
+			   u32 *freq)
+{
+	struct ice_aqc_set_phy_rec_clk_out *cmd;
+	struct ice_aq_desc desc;
+	int status;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_phy_rec_clk_out);
+	cmd = &desc.params.set_phy_rec_clk_out;
+	cmd->phy_output = phy_output;
+	cmd->port_num = ICE_AQC_SET_PHY_REC_CLK_OUT_CURR_PORT;
+	cmd->flags = enable & ICE_AQC_SET_PHY_REC_CLK_OUT_OUT_EN;
+	cmd->freq = cpu_to_le32(*freq);
+
+	status = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+	if (!status)
+		*freq = le32_to_cpu(cmd->freq);
+
+	return status;
+}
+
+/**
+ * ice_aq_get_phy_rec_clk_out - get phy recovered signal info
+ * @hw: pointer to the HW struct
+ * @phy_output: PHY reference clock output pin
+ * @port_num: Port number
+ * @flags: PHY flags
+ * @node_handle: PHY output frequency
+ *
+ * Get PHY recovered clock output info (0x0631)
+ * Return: 0 on success or negative value on failure.
+ */
+int
+ice_aq_get_phy_rec_clk_out(struct ice_hw *hw, u8 *phy_output, u8 *port_num,
+			   u8 *flags, u16 *node_handle)
+{
+	struct ice_aqc_get_phy_rec_clk_out *cmd;
+	struct ice_aq_desc desc;
+	int status;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_phy_rec_clk_out);
+	cmd = &desc.params.get_phy_rec_clk_out;
+	cmd->phy_output = *phy_output;
+
+	status = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+	if (!status) {
+		*phy_output = cmd->phy_output;
+		if (port_num)
+			*port_num = cmd->port_num;
+		if (flags)
+			*flags = cmd->flags;
+		if (node_handle)
+			*node_handle = le16_to_cpu(cmd->node_handle);
+	}
+
+	return status;
+}
+
+/**
  * ice_replay_pre_init - replay pre initialization
  * @hw: pointer to the HW struct
  *
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index 226b81f97a92..47a75651ca38 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -93,6 +93,13 @@ ice_aq_get_phy_caps(struct ice_port_info *pi, bool qual_mods, u8 report_mode,
 		    struct ice_aqc_get_phy_caps_data *caps,
 		    struct ice_sq_cd *cd);
 bool ice_is_pf_c827(struct ice_hw *hw);
+bool ice_is_gps_in_netlist(struct ice_hw *hw);
+int
+ice_find_netlist_node(struct ice_hw *hw, u8 node_type_ctx, u8 node_part_number,
+		      u16 *node_handle);
+int
+ice_aq_get_netlist_node(struct ice_hw *hw, struct ice_aqc_get_link_topo *cmd,
+			u8 *node_part_number, u16 *node_handle);
 int
 ice_aq_list_caps(struct ice_hw *hw, void *buf, u16 buf_size, u32 *cap_count,
 		 enum ice_adminq_opc opc, struct ice_sq_cd *cd);
@@ -196,6 +203,44 @@ void ice_output_fw_log(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf);
 struct ice_q_ctx *
 ice_get_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 q_handle);
 int ice_sbq_rw_reg(struct ice_hw *hw, struct ice_sbq_msg_input *in);
+int
+ice_aq_get_cgu_abilities(struct ice_hw *hw,
+			 struct ice_aqc_get_cgu_abilities *abilities);
+int
+ice_aq_set_input_pin_cfg(struct ice_hw *hw, u8 input_idx, u8 flags1, u8 flags2,
+			 u32 freq, s32 phase_delay);
+int
+ice_aq_get_input_pin_cfg(struct ice_hw *hw, u8 input_idx, u8 *status, u8 *type,
+			 u8 *flags1, u8 *flags2, u32 *freq, s32 *phase_delay);
+int
+ice_aq_set_output_pin_cfg(struct ice_hw *hw, u8 output_idx, u8 flags,
+			  u8 src_sel, u32 freq, s32 phase_delay);
+int
+ice_aq_get_output_pin_cfg(struct ice_hw *hw, u8 output_idx, u8 *flags,
+			  u8 *src_sel, u32 *freq, u32 *src_freq);
+int
+ice_aq_get_cgu_dpll_status(struct ice_hw *hw, u8 dpll_num, u8 *ref_state,
+			   u8 *dpll_state, u8 *config, s64 *phase_offset,
+			   u8 *eec_mode);
+int
+ice_aq_set_cgu_dpll_config(struct ice_hw *hw, u8 dpll_num, u8 ref_state,
+			   u8 config, u8 eec_mode);
+int
+ice_aq_set_cgu_ref_prio(struct ice_hw *hw, u8 dpll_num, u8 ref_idx,
+			u8 ref_priority);
+int
+ice_aq_get_cgu_ref_prio(struct ice_hw *hw, u8 dpll_num, u8 ref_idx,
+			u8 *ref_prio);
+int
+ice_aq_get_cgu_info(struct ice_hw *hw, u32 *cgu_id, u32 *cgu_cfg_ver,
+		    u32 *cgu_fw_ver);
+
+int
+ice_aq_set_phy_rec_clk_out(struct ice_hw *hw, u8 phy_output, bool enable,
+			   u32 *freq);
+int
+ice_aq_get_phy_rec_clk_out(struct ice_hw *hw, u8 *phy_output, u8 *port_num,
+			   u8 *flags, u16 *node_handle);
 void
 ice_stat_update40(struct ice_hw *hw, u32 reg, bool prev_stat_loaded,
 		  u64 *prev_stat, u64 *cur_stat);
diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c b/drivers/net/ethernet/intel/ice/ice_dpll.c
new file mode 100644
index 000000000000..1faee9cb944d
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_dpll.c
@@ -0,0 +1,1904 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2022, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_lib.h"
+#include "ice_trace.h"
+#include <linux/dpll.h>
+
+#define ICE_CGU_STATE_ACQ_ERR_THRESHOLD		50
+#define ICE_DPLL_PIN_IDX_INVALID		0xff
+#define ICE_DPLL_RCLK_NUM_PER_PF		1
+
+/**
+ * enum ice_dpll_pin_type - enumerate ice pin types:
+ * @ICE_DPLL_PIN_INVALID: invalid pin type
+ * @ICE_DPLL_PIN_TYPE_INPUT: input pin
+ * @ICE_DPLL_PIN_TYPE_OUTPUT: output pin
+ * @ICE_DPLL_PIN_TYPE_RCLK_INPUT: recovery clock input pin
+ */
+enum ice_dpll_pin_type {
+	ICE_DPLL_PIN_INVALID,
+	ICE_DPLL_PIN_TYPE_INPUT,
+	ICE_DPLL_PIN_TYPE_OUTPUT,
+	ICE_DPLL_PIN_TYPE_RCLK_INPUT,
+};
+
+static const char * const pin_type_name[] = {
+	[ICE_DPLL_PIN_TYPE_INPUT] = "input",
+	[ICE_DPLL_PIN_TYPE_OUTPUT] = "output",
+	[ICE_DPLL_PIN_TYPE_RCLK_INPUT] = "rclk-input",
+};
+
+/**
+ * ice_dpll_pin_freq_set - set pin's frequency
+ * @pf: private board structure
+ * @pin: pointer to a pin
+ * @pin_type: type of pin being configured
+ * @freq: frequency to be set
+ * @extack: error reporting
+ *
+ * Set requested frequency on a pin.
+ *
+ * Context: Called under pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - error on AQ or wrong pin type given
+ */
+static int
+ice_dpll_pin_freq_set(struct ice_pf *pf, struct ice_dpll_pin *pin,
+		      enum ice_dpll_pin_type pin_type, const u32 freq,
+		      struct netlink_ext_ack *extack)
+{
+	u8 flags;
+	int ret;
+
+	switch (pin_type) {
+	case ICE_DPLL_PIN_TYPE_INPUT:
+		flags = ICE_AQC_SET_CGU_IN_CFG_FLG1_UPDATE_FREQ;
+		ret = ice_aq_set_input_pin_cfg(&pf->hw, pin->idx, flags,
+					       pin->flags[0], freq, 0);
+		break;
+	case ICE_DPLL_PIN_TYPE_OUTPUT:
+		flags = ICE_AQC_SET_CGU_OUT_CFG_UPDATE_FREQ;
+		ret = ice_aq_set_output_pin_cfg(&pf->hw, pin->idx, flags,
+						0, freq, 0);
+		break;
+	default:
+		return -EINVAL;
+	}
+	if (ret) {
+		NL_SET_ERR_MSG_FMT(extack,
+				   "err:%d %s failed to set pin freq:%u on pin:%u\n",
+				   ret,
+				   ice_aq_str(pf->hw.adminq.sq_last_status),
+				   freq, pin->idx);
+		return ret;
+	}
+	pin->freq = freq;
+
+	return 0;
+}
+
+/**
+ * ice_dpll_frequency_set - wrapper for pin callback for set frequency
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @dpll: pointer to dpll
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @frequency: frequency to be set
+ * @extack: error reporting
+ * @pin_type: type of pin being configured
+ *
+ * Wraps internal set frequency command on a pin.
+ *
+ * Context: Acquires pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - error pin not found or couldn't set in hw
+ */
+static int
+ice_dpll_frequency_set(const struct dpll_pin *pin, void *pin_priv,
+		       const struct dpll_device *dpll, void *dpll_priv,
+		       const u32 frequency,
+		       struct netlink_ext_ack *extack,
+		       enum ice_dpll_pin_type pin_type)
+{
+	struct ice_dpll_pin *p = pin_priv;
+	struct ice_dpll *d = dpll_priv;
+	struct ice_pf *pf = d->pf;
+	int ret;
+
+	mutex_lock(&pf->dplls.lock);
+	ret = ice_dpll_pin_freq_set(pf, p, pin_type, frequency, extack);
+	mutex_unlock(&pf->dplls.lock);
+
+	return ret;
+}
+
+/**
+ * ice_dpll_input_frequency_set - input pin callback for set frequency
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @dpll: pointer to dpll
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @frequency: frequency to be set
+ * @extack: error reporting
+ *
+ * Wraps internal set frequency command on a pin.
+ *
+ * Context: Calls a function which acquires pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - error pin not found or couldn't set in hw
+ */
+static int
+ice_dpll_input_frequency_set(const struct dpll_pin *pin, void *pin_priv,
+			     const struct dpll_device *dpll, void *dpll_priv,
+			     u64 frequency, struct netlink_ext_ack *extack)
+{
+	return ice_dpll_frequency_set(pin, pin_priv, dpll, dpll_priv, frequency,
+				      extack, ICE_DPLL_PIN_TYPE_INPUT);
+}
+
+/**
+ * ice_dpll_output_frequency_set - output pin callback for set frequency
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @dpll: pointer to dpll
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @frequency: frequency to be set
+ * @extack: error reporting
+ *
+ * Wraps internal set frequency command on a pin.
+ *
+ * Context: Calls a function which acquires pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - error pin not found or couldn't set in hw
+ */
+static int
+ice_dpll_output_frequency_set(const struct dpll_pin *pin, void *pin_priv,
+			      const struct dpll_device *dpll, void *dpll_priv,
+			      u64 frequency, struct netlink_ext_ack *extack)
+{
+	return ice_dpll_frequency_set(pin, pin_priv, dpll, dpll_priv, frequency,
+				      extack, ICE_DPLL_PIN_TYPE_OUTPUT);
+}
+
+/**
+ * ice_dpll_frequency_get - wrapper for pin callback for get frequency
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @dpll: pointer to dpll
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @frequency: on success holds pin's frequency
+ * @extack: error reporting
+ * @pin_type: type of pin being configured
+ *
+ * Wraps internal get frequency command of a pin.
+ *
+ * Context: Acquires pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - error pin not found or couldn't get from hw
+ */
+static int
+ice_dpll_frequency_get(const struct dpll_pin *pin, void *pin_priv,
+		       const struct dpll_device *dpll, void *dpll_priv,
+		       u64 *frequency, struct netlink_ext_ack *extack,
+		       enum ice_dpll_pin_type pin_type)
+{
+	struct ice_dpll_pin *p = pin_priv;
+	struct ice_dpll *d = dpll_priv;
+	struct ice_pf *pf = d->pf;
+
+	mutex_lock(&pf->dplls.lock);
+	*frequency = p->freq;
+	mutex_unlock(&pf->dplls.lock);
+
+	return 0;
+}
+
+/**
+ * ice_dpll_input_frequency_get - input pin callback for get frequency
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @dpll: pointer to dpll
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @frequency: on success holds pin's frequency
+ * @extack: error reporting
+ *
+ * Wraps internal get frequency command of a input pin.
+ *
+ * Context: Calls a function which acquires pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - error pin not found or couldn't get from hw
+ */
+static int
+ice_dpll_input_frequency_get(const struct dpll_pin *pin, void *pin_priv,
+			     const struct dpll_device *dpll, void *dpll_priv,
+			     u64 *frequency, struct netlink_ext_ack *extack)
+{
+	return ice_dpll_frequency_get(pin, pin_priv, dpll, dpll_priv, frequency,
+				      extack, ICE_DPLL_PIN_TYPE_INPUT);
+}
+
+/**
+ * ice_dpll_output_frequency_get - output pin callback for get frequency
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @dpll: pointer to dpll
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @frequency: on success holds pin's frequency
+ * @extack: error reporting
+ *
+ * Wraps internal get frequency command of a pin.
+ *
+ * Context: Calls a function which acquires pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - error pin not found or couldn't get from hw
+ */
+static int
+ice_dpll_output_frequency_get(const struct dpll_pin *pin, void *pin_priv,
+			      const struct dpll_device *dpll, void *dpll_priv,
+			      u64 *frequency, struct netlink_ext_ack *extack)
+{
+	return ice_dpll_frequency_get(pin, pin_priv, dpll, dpll_priv, frequency,
+				      extack, ICE_DPLL_PIN_TYPE_OUTPUT);
+}
+
+/**
+ * ice_dpll_pin_enable - enable a pin on dplls
+ * @hw: board private hw structure
+ * @pin: pointer to a pin
+ * @pin_type: type of pin being enabled
+ * @extack: error reporting
+ *
+ * Enable a pin on both dplls. Store current state in pin->flags.
+ *
+ * Context: Called under pf->dplls.lock
+ * Return:
+ * * 0 - OK
+ * * negative - error
+ */
+static int
+ice_dpll_pin_enable(struct ice_hw *hw, struct ice_dpll_pin *pin,
+		    enum ice_dpll_pin_type pin_type,
+		    struct netlink_ext_ack *extack)
+{
+	u8 flags = 0;
+	int ret;
+
+	switch (pin_type) {
+	case ICE_DPLL_PIN_TYPE_INPUT:
+		if (pin->flags[0] & ICE_AQC_GET_CGU_IN_CFG_FLG2_ESYNC_EN)
+			flags |= ICE_AQC_SET_CGU_IN_CFG_FLG2_ESYNC_EN;
+		flags |= ICE_AQC_SET_CGU_IN_CFG_FLG2_INPUT_EN;
+		ret = ice_aq_set_input_pin_cfg(hw, pin->idx, 0, flags, 0, 0);
+		break;
+	case ICE_DPLL_PIN_TYPE_OUTPUT:
+		if (pin->flags[0] & ICE_AQC_GET_CGU_OUT_CFG_ESYNC_EN)
+			flags |= ICE_AQC_SET_CGU_OUT_CFG_ESYNC_EN;
+		flags |= ICE_AQC_SET_CGU_OUT_CFG_OUT_EN;
+		ret = ice_aq_set_output_pin_cfg(hw, pin->idx, flags, 0, 0, 0);
+		break;
+	default:
+		return -EINVAL;
+	}
+	if (ret)
+		NL_SET_ERR_MSG_FMT(extack,
+				   "err:%d %s failed to enable %s pin:%u\n",
+				   ret, ice_aq_str(hw->adminq.sq_last_status),
+				   pin_type_name[pin_type], pin->idx);
+
+	return ret;
+}
+
+/**
+ * ice_dpll_pin_disable - disable a pin on dplls
+ * @hw: board private hw structure
+ * @pin: pointer to a pin
+ * @pin_type: type of pin being disabled
+ * @extack: error reporting
+ *
+ * Disable a pin on both dplls. Store current state in pin->flags.
+ *
+ * Context: Called under pf->dplls.lock
+ * Return:
+ * * 0 - OK
+ * * negative - error
+ */
+static int
+ice_dpll_pin_disable(struct ice_hw *hw, struct ice_dpll_pin *pin,
+		     enum ice_dpll_pin_type pin_type,
+		     struct netlink_ext_ack *extack)
+{
+	u8 flags = 0;
+	int ret;
+
+	switch (pin_type) {
+	case ICE_DPLL_PIN_TYPE_INPUT:
+		if (pin->flags[0] & ICE_AQC_GET_CGU_IN_CFG_FLG2_ESYNC_EN)
+			flags |= ICE_AQC_SET_CGU_IN_CFG_FLG2_ESYNC_EN;
+		ret = ice_aq_set_input_pin_cfg(hw, pin->idx, 0, flags, 0, 0);
+		break;
+	case ICE_DPLL_PIN_TYPE_OUTPUT:
+		if (pin->flags[0] & ICE_AQC_GET_CGU_OUT_CFG_ESYNC_EN)
+			flags |= ICE_AQC_SET_CGU_OUT_CFG_ESYNC_EN;
+		ret = ice_aq_set_output_pin_cfg(hw, pin->idx, flags, 0, 0, 0);
+		break;
+	default:
+		return -EINVAL;
+	}
+	if (ret)
+		NL_SET_ERR_MSG_FMT(extack,
+				   "err:%d %s failed to disable %s pin:%u\n",
+				   ret, ice_aq_str(hw->adminq.sq_last_status),
+				   pin_type_name[pin_type], pin->idx);
+
+	return ret;
+}
+
+/**
+ * ice_dpll_pin_state_update - update pin's state
+ * @pf: private board struct
+ * @pin: structure with pin attributes to be updated
+ * @pin_type: type of pin being updated
+ * @extack: error reporting
+ *
+ * Determine pin current state and frequency, then update struct
+ * holding the pin info. For input pin states are separated for each
+ * dpll, for rclk pins states are separated for each parent.
+ *
+ * Context: Called under pf->dplls.lock
+ * Return:
+ * * 0 - OK
+ * * negative - error
+ */
+static int
+ice_dpll_pin_state_update(struct ice_pf *pf, struct ice_dpll_pin *pin,
+			  enum ice_dpll_pin_type pin_type,
+			  struct netlink_ext_ack *extack)
+{
+	u8 parent, port_num = ICE_AQC_SET_PHY_REC_CLK_OUT_CURR_PORT;
+	int ret;
+
+	switch (pin_type) {
+	case ICE_DPLL_PIN_TYPE_INPUT:
+		ret = ice_aq_get_input_pin_cfg(&pf->hw, pin->idx, NULL, NULL,
+					       NULL, &pin->flags[0],
+					       &pin->freq, NULL);
+		if (ret)
+			goto err;
+		if (ICE_AQC_GET_CGU_IN_CFG_FLG2_INPUT_EN & pin->flags[0]) {
+			if (pin->pin) {
+				pin->state[pf->dplls.eec.dpll_idx] =
+					pin->pin == pf->dplls.eec.active_input ?
+					DPLL_PIN_STATE_CONNECTED :
+					DPLL_PIN_STATE_SELECTABLE;
+				pin->state[pf->dplls.pps.dpll_idx] =
+					pin->pin == pf->dplls.pps.active_input ?
+					DPLL_PIN_STATE_CONNECTED :
+					DPLL_PIN_STATE_SELECTABLE;
+			} else {
+				pin->state[pf->dplls.eec.dpll_idx] =
+					DPLL_PIN_STATE_SELECTABLE;
+				pin->state[pf->dplls.pps.dpll_idx] =
+					DPLL_PIN_STATE_SELECTABLE;
+			}
+		} else {
+			pin->state[pf->dplls.eec.dpll_idx] =
+				DPLL_PIN_STATE_DISCONNECTED;
+			pin->state[pf->dplls.pps.dpll_idx] =
+				DPLL_PIN_STATE_DISCONNECTED;
+		}
+		break;
+	case ICE_DPLL_PIN_TYPE_OUTPUT:
+		ret = ice_aq_get_output_pin_cfg(&pf->hw, pin->idx,
+						&pin->flags[0], NULL,
+						&pin->freq, NULL);
+		if (ret)
+			goto err;
+		if (ICE_AQC_SET_CGU_OUT_CFG_OUT_EN & pin->flags[0])
+			pin->state[0] = DPLL_PIN_STATE_CONNECTED;
+		else
+			pin->state[0] = DPLL_PIN_STATE_DISCONNECTED;
+		break;
+	case ICE_DPLL_PIN_TYPE_RCLK_INPUT:
+		for (parent = 0; parent < pf->dplls.rclk.num_parents;
+		     parent++) {
+			u8 p = parent;
+
+			ret = ice_aq_get_phy_rec_clk_out(&pf->hw, &p,
+							 &port_num,
+							 &pin->flags[parent],
+							 NULL);
+			if (ret)
+				goto err;
+			if (ICE_AQC_GET_PHY_REC_CLK_OUT_OUT_EN &
+			    pin->flags[parent])
+				pin->state[parent] = DPLL_PIN_STATE_CONNECTED;
+			else
+				pin->state[parent] =
+					DPLL_PIN_STATE_DISCONNECTED;
+		}
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+err:
+	if (extack)
+		NL_SET_ERR_MSG_FMT(extack,
+				   "err:%d %s failed to update %s pin:%u\n",
+				   ret,
+				   ice_aq_str(pf->hw.adminq.sq_last_status),
+				   pin_type_name[pin_type], pin->idx);
+	else
+		dev_err_ratelimited(ice_pf_to_dev(pf),
+				    "err:%d %s failed to update %s pin:%u\n",
+				    ret,
+				    ice_aq_str(pf->hw.adminq.sq_last_status),
+				    pin_type_name[pin_type], pin->idx);
+	return ret;
+}
+
+/**
+ * ice_dpll_hw_input_prio_set - set input priority value in hardware
+ * @pf: board private structure
+ * @dpll: ice dpll pointer
+ * @pin: ice pin pointer
+ * @prio: priority value being set on a dpll
+ * @extack: error reporting
+ *
+ * Internal wrapper for setting the priority in the hardware.
+ *
+ * Context: Called under pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - failure
+ */
+static int
+ice_dpll_hw_input_prio_set(struct ice_pf *pf, struct ice_dpll *dpll,
+			   struct ice_dpll_pin *pin, const u32 prio,
+			   struct netlink_ext_ack *extack)
+{
+	int ret;
+
+	ret = ice_aq_set_cgu_ref_prio(&pf->hw, dpll->dpll_idx, pin->idx,
+				      (u8)prio);
+	if (ret)
+		NL_SET_ERR_MSG_FMT(extack,
+				   "err:%d %s failed to set pin prio:%u on pin:%u\n",
+				   ret,
+				   ice_aq_str(pf->hw.adminq.sq_last_status),
+				   prio, pin->idx);
+	else
+		dpll->input_prio[pin->idx] = prio;
+
+	return ret;
+}
+
+/**
+ * ice_dpll_lock_status_get - get dpll lock status callback
+ * @dpll: registered dpll pointer
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @status: on success holds dpll's lock status
+ * @extack: error reporting
+ *
+ * Dpll subsystem callback, provides dpll's lock status.
+ *
+ * Context: Acquires pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - failure
+ */
+static int
+ice_dpll_lock_status_get(const struct dpll_device *dpll, void *dpll_priv,
+			 enum dpll_lock_status *status,
+			 struct netlink_ext_ack *extack)
+{
+	struct ice_dpll *d = dpll_priv;
+	struct ice_pf *pf = d->pf;
+
+	mutex_lock(&pf->dplls.lock);
+	*status = d->dpll_state;
+	mutex_unlock(&pf->dplls.lock);
+
+	return 0;
+}
+
+/**
+ * ice_dpll_mode_supported - check if dpll's working mode is supported
+ * @dpll: registered dpll pointer
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @mode: mode to be checked for support
+ * @extack: error reporting
+ *
+ * Dpll subsystem callback. Provides information if working mode is supported
+ * by dpll.
+ *
+ * Return:
+ * * true - mode is supported
+ * * false - mode is not supported
+ */
+static bool ice_dpll_mode_supported(const struct dpll_device *dpll,
+				    void *dpll_priv,
+				    enum dpll_mode mode,
+				    struct netlink_ext_ack *extack)
+{
+	if (mode == DPLL_MODE_AUTOMATIC)
+		return true;
+
+	return false;
+}
+
+/**
+ * ice_dpll_mode_get - get dpll's working mode
+ * @dpll: registered dpll pointer
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @mode: on success holds current working mode of dpll
+ * @extack: error reporting
+ *
+ * Dpll subsystem callback. Provides working mode of dpll.
+ *
+ * Context: Acquires pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - failure
+ */
+static int ice_dpll_mode_get(const struct dpll_device *dpll, void *dpll_priv,
+			     enum dpll_mode *mode,
+			     struct netlink_ext_ack *extack)
+{
+	struct ice_dpll *d = dpll_priv;
+	struct ice_pf *pf = d->pf;
+
+	mutex_lock(&pf->dplls.lock);
+	*mode = d->mode;
+	mutex_unlock(&pf->dplls.lock);
+
+	return 0;
+}
+
+/**
+ * ice_dpll_pin_state_set - set pin's state on dpll
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @dpll: registered dpll pointer
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @enable: if pin shalll be enabled
+ * @extack: error reporting
+ * @pin_type: type of a pin
+ *
+ * Set pin state on a pin.
+ *
+ * Context: Acquires pf->dplls.lock
+ * Return:
+ * * 0 - OK or no change required
+ * * negative - error
+ */
+static int
+ice_dpll_pin_state_set(const struct dpll_pin *pin, void *pin_priv,
+		       const struct dpll_device *dpll, void *dpll_priv,
+		       bool enable, struct netlink_ext_ack *extack,
+		       enum ice_dpll_pin_type pin_type)
+{
+	struct ice_dpll_pin *p = pin_priv;
+	struct ice_dpll *d = dpll_priv;
+	struct ice_pf *pf = d->pf;
+	int ret;
+
+	mutex_lock(&pf->dplls.lock);
+	if (enable)
+		ret = ice_dpll_pin_enable(&pf->hw, p, pin_type, extack);
+	else
+		ret = ice_dpll_pin_disable(&pf->hw, p, pin_type, extack);
+	if (!ret)
+		ret = ice_dpll_pin_state_update(pf, p, pin_type, extack);
+	mutex_unlock(&pf->dplls.lock);
+
+	return ret;
+}
+
+/**
+ * ice_dpll_output_state_set - enable/disable output pin on dpll device
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @dpll: dpll being configured
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @state: state of pin to be set
+ * @extack: error reporting
+ *
+ * Dpll subsystem callback. Set given state on output type pin.
+ *
+ * Context: Calls a function which acquires pf->dplls.lock
+ * Return:
+ * * 0 - successfully enabled mode
+ * * negative - failed to enable mode
+ */
+static int
+ice_dpll_output_state_set(const struct dpll_pin *pin, void *pin_priv,
+			  const struct dpll_device *dpll, void *dpll_priv,
+			  enum dpll_pin_state state,
+			  struct netlink_ext_ack *extack)
+{
+	bool enable = state == DPLL_PIN_STATE_CONNECTED;
+
+	return ice_dpll_pin_state_set(pin, pin_priv, dpll, dpll_priv, enable,
+				      extack, ICE_DPLL_PIN_TYPE_OUTPUT);
+}
+
+/**
+ * ice_dpll_input_state_set - enable/disable input pin on dpll levice
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @dpll: dpll being configured
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @state: state of pin to be set
+ * @extack: error reporting
+ *
+ * Dpll subsystem callback. Enables given mode on input type pin.
+ *
+ * Context: Calls a function which acquires pf->dplls.lock
+ * Return:
+ * * 0 - successfully enabled mode
+ * * negative - failed to enable mode
+ */
+static int
+ice_dpll_input_state_set(const struct dpll_pin *pin, void *pin_priv,
+			 const struct dpll_device *dpll, void *dpll_priv,
+			 enum dpll_pin_state state,
+			 struct netlink_ext_ack *extack)
+{
+	bool enable = state == DPLL_PIN_STATE_SELECTABLE;
+
+	return ice_dpll_pin_state_set(pin, pin_priv, dpll, dpll_priv, enable,
+				      extack, ICE_DPLL_PIN_TYPE_INPUT);
+}
+
+/**
+ * ice_dpll_pin_state_get - set pin's state on dpll
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @dpll: registered dpll pointer
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @state: on success holds state of the pin
+ * @extack: error reporting
+ * @pin_type: type of questioned pin
+ *
+ * Determine pin state set it on a pin.
+ *
+ * Context: Acquires pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - failed to get state
+ */
+static int
+ice_dpll_pin_state_get(const struct dpll_pin *pin, void *pin_priv,
+		       const struct dpll_device *dpll, void *dpll_priv,
+		       enum dpll_pin_state *state,
+		       struct netlink_ext_ack *extack,
+		       enum ice_dpll_pin_type pin_type)
+{
+	struct ice_dpll_pin *p = pin_priv;
+	struct ice_dpll *d = dpll_priv;
+	struct ice_pf *pf = d->pf;
+	int ret;
+
+	mutex_lock(&pf->dplls.lock);
+	ret = ice_dpll_pin_state_update(pf, p, pin_type, extack);
+	if (ret)
+		goto unlock;
+	if (pin_type == ICE_DPLL_PIN_TYPE_INPUT)
+		*state = p->state[d->dpll_idx];
+	else if (pin_type == ICE_DPLL_PIN_TYPE_OUTPUT)
+		*state = p->state[0];
+	ret = 0;
+unlock:
+	mutex_unlock(&pf->dplls.lock);
+
+	return ret;
+}
+
+/**
+ * ice_dpll_output_state_get - get output pin state on dpll device
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @dpll: registered dpll pointer
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @state: on success holds state of the pin
+ * @extack: error reporting
+ *
+ * Dpll subsystem callback. Check state of a pin.
+ *
+ * Context: Calls a function which acquires pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - failed to get state
+ */
+static int
+ice_dpll_output_state_get(const struct dpll_pin *pin, void *pin_priv,
+			  const struct dpll_device *dpll, void *dpll_priv,
+			  enum dpll_pin_state *state,
+			  struct netlink_ext_ack *extack)
+{
+	return ice_dpll_pin_state_get(pin, pin_priv, dpll, dpll_priv, state,
+				      extack, ICE_DPLL_PIN_TYPE_OUTPUT);
+}
+
+/**
+ * ice_dpll_input_state_get - get input pin state on dpll device
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @dpll: registered dpll pointer
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @state: on success holds state of the pin
+ * @extack: error reporting
+ *
+ * Dpll subsystem callback. Check state of a input pin.
+ *
+ * Context: Calls a function which acquires pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - failed to get state
+ */
+static int
+ice_dpll_input_state_get(const struct dpll_pin *pin, void *pin_priv,
+			 const struct dpll_device *dpll, void *dpll_priv,
+			 enum dpll_pin_state *state,
+			 struct netlink_ext_ack *extack)
+{
+	return ice_dpll_pin_state_get(pin, pin_priv, dpll, dpll_priv, state,
+				      extack, ICE_DPLL_PIN_TYPE_INPUT);
+}
+
+/**
+ * ice_dpll_input_prio_get - get dpll's input prio
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @dpll: registered dpll pointer
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @prio: on success - returns input priority on dpll
+ * @extack: error reporting
+ *
+ * Dpll subsystem callback. Handler for getting priority of a input pin.
+ *
+ * Context: Acquires pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - failure
+ */
+static int
+ice_dpll_input_prio_get(const struct dpll_pin *pin, void *pin_priv,
+			const struct dpll_device *dpll, void *dpll_priv,
+			u32 *prio, struct netlink_ext_ack *extack)
+{
+	struct ice_dpll_pin *p = pin_priv;
+	struct ice_dpll *d = dpll_priv;
+	struct ice_pf *pf = d->pf;
+
+	mutex_lock(&pf->dplls.lock);
+	*prio = d->input_prio[p->idx];
+	mutex_unlock(&pf->dplls.lock);
+
+	return 0;
+}
+
+/**
+ * ice_dpll_input_prio_set - set dpll input prio
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @dpll: registered dpll pointer
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @prio: input priority to be set on dpll
+ * @extack: error reporting
+ *
+ * Dpll subsystem callback. Handler for setting priority of a input pin.
+ *
+ * Context: Acquires pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - failure
+ */
+static int
+ice_dpll_input_prio_set(const struct dpll_pin *pin, void *pin_priv,
+			const struct dpll_device *dpll, void *dpll_priv,
+			u32 prio, struct netlink_ext_ack *extack)
+{
+	struct ice_dpll_pin *p = pin_priv;
+	struct ice_dpll *d = dpll_priv;
+	struct ice_pf *pf = d->pf;
+	int ret;
+
+	if (prio > ICE_DPLL_PRIO_MAX) {
+		NL_SET_ERR_MSG_FMT(extack, "prio out of supported range 0-%d",
+				   ICE_DPLL_PRIO_MAX);
+		return -EINVAL;
+	}
+
+	mutex_lock(&pf->dplls.lock);
+	ret = ice_dpll_hw_input_prio_set(pf, d, p, prio, extack);
+	mutex_unlock(&pf->dplls.lock);
+
+	return ret;
+}
+
+/**
+ * ice_dpll_input_direction - callback for get input pin direction
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @dpll: registered dpll pointer
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @direction: holds input pin direction
+ * @extack: error reporting
+ *
+ * Dpll subsystem callback. Handler for getting direction of a input pin.
+ *
+ * Return:
+ * * 0 - success
+ */
+static int
+ice_dpll_input_direction(const struct dpll_pin *pin, void *pin_priv,
+			 const struct dpll_device *dpll, void *dpll_priv,
+			 enum dpll_pin_direction *direction,
+			 struct netlink_ext_ack *extack)
+{
+	*direction = DPLL_PIN_DIRECTION_INPUT;
+
+	return 0;
+}
+
+/**
+ * ice_dpll_output_direction - callback for get output pin direction
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @dpll: registered dpll pointer
+ * @dpll_priv: private data pointer passed on dpll registration
+ * @direction: holds output pin direction
+ * @extack: error reporting
+ *
+ * Dpll subsystem callback. Handler for getting direction of an output pin.
+ *
+ * Return:
+ * * 0 - success
+ */
+static int
+ice_dpll_output_direction(const struct dpll_pin *pin, void *pin_priv,
+			  const struct dpll_device *dpll, void *dpll_priv,
+			  enum dpll_pin_direction *direction,
+			  struct netlink_ext_ack *extack)
+{
+	*direction = DPLL_PIN_DIRECTION_OUTPUT;
+
+	return 0;
+}
+
+/**
+ * ice_dpll_rclk_state_on_pin_set - set a state on rclk pin
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @parent_pin: pin parent pointer
+ * @parent_pin_priv: parent private data pointer passed on pin registration
+ * @state: state to be set on pin
+ * @extack: error reporting
+ *
+ * Dpll subsystem callback, set a state of a rclk pin on a parent pin
+ *
+ * Context: Acquires pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - failure
+ */
+static int
+ice_dpll_rclk_state_on_pin_set(const struct dpll_pin *pin, void *pin_priv,
+			       const struct dpll_pin *parent_pin,
+			       void *parent_pin_priv,
+			       enum dpll_pin_state state,
+			       struct netlink_ext_ack *extack)
+{
+	struct ice_dpll_pin *p = pin_priv, *parent = parent_pin_priv;
+	bool enable = state == DPLL_PIN_STATE_CONNECTED;
+	struct ice_pf *pf = p->pf;
+	int ret = -EINVAL;
+	u32 hw_idx;
+
+	mutex_lock(&pf->dplls.lock);
+	hw_idx = parent->idx - pf->dplls.base_rclk_idx;
+	if (hw_idx >= pf->dplls.num_inputs)
+		goto unlock;
+
+	if ((enable && p->state[hw_idx] == DPLL_PIN_STATE_CONNECTED) ||
+	    (!enable && p->state[hw_idx] == DPLL_PIN_STATE_DISCONNECTED)) {
+		NL_SET_ERR_MSG_FMT(extack,
+				   "pin:%u state:%u on parent:%u already set",
+				   p->idx, state, parent->idx);
+		goto unlock;
+	}
+	ret = ice_aq_set_phy_rec_clk_out(&pf->hw, hw_idx, enable,
+					 &p->freq);
+	if (ret)
+		NL_SET_ERR_MSG_FMT(extack,
+				   "err:%d %s failed to set pin state:%u for pin:%u on parent:%u\n",
+				   ret,
+				   ice_aq_str(pf->hw.adminq.sq_last_status),
+				   state, p->idx, parent->idx);
+unlock:
+	mutex_unlock(&pf->dplls.lock);
+
+	return ret;
+}
+
+/**
+ * ice_dpll_rclk_state_on_pin_get - get a state of rclk pin
+ * @pin: pointer to a pin
+ * @pin_priv: private data pointer passed on pin registration
+ * @parent_pin: pin parent pointer
+ * @parent_pin_priv: pin parent priv data pointer passed on pin registration
+ * @state: on success holds pin state on parent pin
+ * @extack: error reporting
+ *
+ * dpll subsystem callback, get a state of a recovered clock pin.
+ *
+ * Context: Acquires pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - failure
+ */
+static int
+ice_dpll_rclk_state_on_pin_get(const struct dpll_pin *pin, void *pin_priv,
+			       const struct dpll_pin *parent_pin,
+			       void *parent_pin_priv,
+			       enum dpll_pin_state *state,
+			       struct netlink_ext_ack *extack)
+{
+	struct ice_dpll_pin *p = pin_priv, *parent = parent_pin_priv;
+	struct ice_pf *pf = p->pf;
+	int ret = -EINVAL;
+	u32 hw_idx;
+
+	mutex_lock(&pf->dplls.lock);
+	hw_idx = parent->idx - pf->dplls.base_rclk_idx;
+	if (hw_idx >= pf->dplls.num_inputs)
+		goto unlock;
+
+	ret = ice_dpll_pin_state_update(pf, p, ICE_DPLL_PIN_TYPE_RCLK_INPUT,
+					extack);
+	if (ret)
+		goto unlock;
+
+	*state = p->state[hw_idx];
+	ret = 0;
+unlock:
+	mutex_unlock(&pf->dplls.lock);
+
+	return ret;
+}
+
+static const struct dpll_pin_ops ice_dpll_rclk_ops = {
+	.state_on_pin_set = ice_dpll_rclk_state_on_pin_set,
+	.state_on_pin_get = ice_dpll_rclk_state_on_pin_get,
+	.direction_get = ice_dpll_input_direction,
+};
+
+static const struct dpll_pin_ops ice_dpll_input_ops = {
+	.frequency_get = ice_dpll_input_frequency_get,
+	.frequency_set = ice_dpll_input_frequency_set,
+	.state_on_dpll_get = ice_dpll_input_state_get,
+	.state_on_dpll_set = ice_dpll_input_state_set,
+	.prio_get = ice_dpll_input_prio_get,
+	.prio_set = ice_dpll_input_prio_set,
+	.direction_get = ice_dpll_input_direction,
+};
+
+static const struct dpll_pin_ops ice_dpll_output_ops = {
+	.frequency_get = ice_dpll_output_frequency_get,
+	.frequency_set = ice_dpll_output_frequency_set,
+	.state_on_dpll_get = ice_dpll_output_state_get,
+	.state_on_dpll_set = ice_dpll_output_state_set,
+	.direction_get = ice_dpll_output_direction,
+};
+
+static const struct dpll_device_ops ice_dpll_ops = {
+	.lock_status_get = ice_dpll_lock_status_get,
+	.mode_supported = ice_dpll_mode_supported,
+	.mode_get = ice_dpll_mode_get,
+};
+
+/**
+ * ice_generate_clock_id - generates unique clock_id for registering dpll.
+ * @pf: board private structure
+ *
+ * Generates unique (per board) clock_id for allocation and search of dpll
+ * devices in Linux dpll subsystem.
+ *
+ * Return: generated clock id for the board
+ */
+static u64 ice_generate_clock_id(struct ice_pf *pf)
+{
+	return pci_get_dsn(pf->pdev);
+}
+
+/**
+ * ice_dpll_notify_changes - notify dpll subsystem about changes
+ * @d: pointer do dpll
+ *
+ * Once change detected appropriate event is submitted to the dpll subsystem.
+ */
+static void ice_dpll_notify_changes(struct ice_dpll *d)
+{
+	if (d->prev_dpll_state != d->dpll_state) {
+		d->prev_dpll_state = d->dpll_state;
+		dpll_device_change_ntf(d->dpll);
+	}
+	if (d->prev_input != d->active_input) {
+		if (d->prev_input)
+			dpll_pin_change_ntf(d->prev_input);
+		d->prev_input = d->active_input;
+		if (d->active_input)
+			dpll_pin_change_ntf(d->active_input);
+	}
+}
+
+/**
+ * ice_dpll_update_state - update dpll state
+ * @pf: pf private structure
+ * @d: pointer to queried dpll device
+ * @init: if function called on initialization of ice dpll
+ *
+ * Poll current state of dpll from hw and update ice_dpll struct.
+ *
+ * Context: Called by kworker under pf->dplls.lock
+ * Return:
+ * * 0 - success
+ * * negative - AQ failure
+ */
+static int
+ice_dpll_update_state(struct ice_pf *pf, struct ice_dpll *d, bool init)
+{
+	struct ice_dpll_pin *p = NULL;
+	int ret;
+
+	ret = ice_get_cgu_state(&pf->hw, d->dpll_idx, d->prev_dpll_state,
+				&d->input_idx, &d->ref_state, &d->eec_mode,
+				&d->phase_shift, &d->dpll_state);
+
+	dev_dbg(ice_pf_to_dev(pf),
+		"update dpll=%d, prev_src_idx:%u, src_idx:%u, state:%d, prev:%d mode:%d\n",
+		d->dpll_idx, d->prev_input_idx, d->input_idx,
+		d->dpll_state, d->prev_dpll_state, d->mode);
+	if (ret) {
+		dev_err(ice_pf_to_dev(pf),
+			"update dpll=%d state failed, ret=%d %s\n",
+			d->dpll_idx, ret,
+			ice_aq_str(pf->hw.adminq.sq_last_status));
+		return ret;
+	}
+	if (init) {
+		if (d->dpll_state == DPLL_LOCK_STATUS_LOCKED ||
+		    d->dpll_state == DPLL_LOCK_STATUS_LOCKED_HO_ACQ)
+			d->active_input = pf->dplls.inputs[d->input_idx].pin;
+		p = &pf->dplls.inputs[d->input_idx];
+		return ice_dpll_pin_state_update(pf, p,
+						 ICE_DPLL_PIN_TYPE_INPUT, NULL);
+	}
+	if (d->dpll_state == DPLL_LOCK_STATUS_HOLDOVER ||
+	    d->dpll_state == DPLL_LOCK_STATUS_UNLOCKED) {
+		d->active_input = NULL;
+		if (d->input_idx != ICE_DPLL_PIN_IDX_INVALID)
+			p = &pf->dplls.inputs[d->input_idx];
+		d->prev_input_idx = ICE_DPLL_PIN_IDX_INVALID;
+		d->input_idx = ICE_DPLL_PIN_IDX_INVALID;
+		if (!p)
+			return 0;
+		ret = ice_dpll_pin_state_update(pf, p,
+						ICE_DPLL_PIN_TYPE_INPUT, NULL);
+	} else if (d->input_idx != d->prev_input_idx) {
+		if (d->prev_input_idx != ICE_DPLL_PIN_IDX_INVALID) {
+			p = &pf->dplls.inputs[d->prev_input_idx];
+			ice_dpll_pin_state_update(pf, p,
+						  ICE_DPLL_PIN_TYPE_INPUT,
+						  NULL);
+		}
+		if (d->input_idx != ICE_DPLL_PIN_IDX_INVALID) {
+			p = &pf->dplls.inputs[d->input_idx];
+			d->active_input = p->pin;
+			ice_dpll_pin_state_update(pf, p,
+						  ICE_DPLL_PIN_TYPE_INPUT,
+						  NULL);
+		}
+		d->prev_input_idx = d->input_idx;
+	}
+
+	return ret;
+}
+
+/**
+ * ice_dpll_periodic_work - DPLLs periodic worker
+ * @work: pointer to kthread_work structure
+ *
+ * DPLLs periodic worker is responsible for polling state of dpll.
+ * Context: Holds pf->dplls.lock
+ */
+static void ice_dpll_periodic_work(struct kthread_work *work)
+{
+	struct ice_dplls *d = container_of(work, struct ice_dplls, work.work);
+	struct ice_pf *pf = container_of(d, struct ice_pf, dplls);
+	struct ice_dpll *de = &pf->dplls.eec;
+	struct ice_dpll *dp = &pf->dplls.pps;
+	int ret;
+
+	mutex_lock(&pf->dplls.lock);
+	ret = ice_dpll_update_state(pf, de, false);
+	if (!ret)
+		ret = ice_dpll_update_state(pf, dp, false);
+	if (ret) {
+		d->cgu_state_acq_err_num++;
+		/* stop rescheduling this worker */
+		if (d->cgu_state_acq_err_num >
+		    ICE_CGU_STATE_ACQ_ERR_THRESHOLD) {
+			dev_err(ice_pf_to_dev(pf),
+				"EEC/PPS DPLLs periodic work disabled\n");
+			mutex_unlock(&pf->dplls.lock);
+			return;
+		}
+	}
+	mutex_unlock(&pf->dplls.lock);
+	ice_dpll_notify_changes(de);
+	ice_dpll_notify_changes(dp);
+
+	/* Run twice a second or reschedule if update failed */
+	kthread_queue_delayed_work(d->kworker, &d->work,
+				   ret ? msecs_to_jiffies(10) :
+				   msecs_to_jiffies(500));
+}
+
+/**
+ * ice_dpll_release_pins - release pins resources from dpll subsystem
+ * @pins: pointer to pins array
+ * @count: number of pins
+ *
+ * Release resources of given pins array in the dpll subsystem.
+ */
+static void ice_dpll_release_pins(struct ice_dpll_pin *pins, int count)
+{
+	int i;
+
+	for (i = 0; i < count; i++)
+		dpll_pin_put(pins[i].pin);
+}
+
+/**
+ * ice_dpll_get_pins - get pins from dpll subsystem
+ * @pf: board private structure
+ * @pins: pointer to pins array
+ * @start_idx: get starts from this pin idx value
+ * @count: number of pins
+ * @clock_id: clock_id of dpll device
+ *
+ * Get pins - allocate - in dpll subsystem, store them in pin field of given
+ * pins array.
+ *
+ * Return:
+ * * 0 - success
+ * * negative - allocation failure reason
+ */
+static int
+ice_dpll_get_pins(struct ice_pf *pf, struct ice_dpll_pin *pins,
+		  int start_idx, int count, u64 clock_id)
+{
+	int i, ret;
+
+	for (i = 0; i < count; i++) {
+		pins[i].pin = dpll_pin_get(clock_id, i + start_idx, THIS_MODULE,
+					   &pins[i].prop);
+		if (IS_ERR(pins[i].pin)) {
+			ret = PTR_ERR(pins[i].pin);
+			goto release_pins;
+		}
+	}
+
+	return 0;
+
+release_pins:
+	while (--i >= 0)
+		dpll_pin_put(pins[i].pin);
+	return ret;
+}
+
+/**
+ * ice_dpll_unregister_pins - unregister pins from a dpll
+ * @dpll: dpll device pointer
+ * @pins: pointer to pins array
+ * @ops: callback ops registered with the pins
+ * @count: number of pins
+ *
+ * Unregister pins of a given array of pins from given dpll device registered in
+ * dpll subsystem.
+ */
+static void
+ice_dpll_unregister_pins(struct dpll_device *dpll, struct ice_dpll_pin *pins,
+			 const struct dpll_pin_ops *ops, int count)
+{
+	int i;
+
+	for (i = 0; i < count; i++)
+		dpll_pin_unregister(dpll, pins[i].pin, ops, &pins[i]);
+}
+
+/**
+ * ice_dpll_register_pins - register pins with a dpll
+ * @dpll: dpll pointer to register pins with
+ * @pins: pointer to pins array
+ * @ops: callback ops registered with the pins
+ * @count: number of pins
+ *
+ * Register pins of a given array with given dpll in dpll subsystem.
+ *
+ * Return:
+ * * 0 - success
+ * * negative - registration failure reason
+ */
+static int
+ice_dpll_register_pins(struct dpll_device *dpll, struct ice_dpll_pin *pins,
+		       const struct dpll_pin_ops *ops, int count)
+{
+	int ret, i;
+
+	for (i = 0; i < count; i++) {
+		ret = dpll_pin_register(dpll, pins[i].pin, ops, &pins[i]);
+		if (ret)
+			goto unregister_pins;
+	}
+
+	return 0;
+
+unregister_pins:
+	while (--i >= 0)
+		dpll_pin_unregister(dpll, pins[i].pin, ops, &pins[i]);
+	return ret;
+}
+
+/**
+ * ice_dpll_deinit_direct_pins - deinitialize direct pins
+ * @cgu: if cgu is present and controlled by this NIC
+ * @pins: pointer to pins array
+ * @count: number of pins
+ * @ops: callback ops registered with the pins
+ * @first: dpll device pointer
+ * @second: dpll device pointer
+ *
+ * If cgu is owned unregister pins from given dplls.
+ * Release pins resources to the dpll subsystem.
+ */
+static void
+ice_dpll_deinit_direct_pins(bool cgu, struct ice_dpll_pin *pins, int count,
+			    const struct dpll_pin_ops *ops,
+			    struct dpll_device *first,
+			    struct dpll_device *second)
+{
+	if (cgu) {
+		ice_dpll_unregister_pins(first, pins, ops, count);
+		ice_dpll_unregister_pins(second, pins, ops, count);
+	}
+	ice_dpll_release_pins(pins, count);
+}
+
+/**
+ * ice_dpll_init_direct_pins - initialize direct pins
+ * @pf: board private structure
+ * @cgu: if cgu is present and controlled by this NIC
+ * @pins: pointer to pins array
+ * @start_idx: on which index shall allocation start in dpll subsystem
+ * @count: number of pins
+ * @ops: callback ops registered with the pins
+ * @first: dpll device pointer
+ * @second: dpll device pointer
+ *
+ * Allocate directly connected pins of a given array in dpll subsystem.
+ * If cgu is owned register allocated pins with given dplls.
+ *
+ * Return:
+ * * 0 - success
+ * * negative - registration failure reason
+ */
+static int
+ice_dpll_init_direct_pins(struct ice_pf *pf, bool cgu,
+			  struct ice_dpll_pin *pins, int start_idx, int count,
+			  const struct dpll_pin_ops *ops,
+			  struct dpll_device *first, struct dpll_device *second)
+{
+	int ret;
+
+	ret = ice_dpll_get_pins(pf, pins, start_idx, count, pf->dplls.clock_id);
+	if (ret)
+		return ret;
+	if (cgu) {
+		ret = ice_dpll_register_pins(first, pins, ops, count);
+		if (ret)
+			goto release_pins;
+		ret = ice_dpll_register_pins(second, pins, ops, count);
+		if (ret)
+			goto unregister_first;
+	}
+
+	return 0;
+
+unregister_first:
+	ice_dpll_unregister_pins(first, pins, ops, count);
+release_pins:
+	ice_dpll_release_pins(pins, count);
+	return ret;
+}
+
+/**
+ * ice_dpll_deinit_rclk_pin - release rclk pin resources
+ * @pf: board private structure
+ *
+ * Deregister rclk pin from parent pins and release resources in dpll subsystem.
+ */
+static void ice_dpll_deinit_rclk_pin(struct ice_pf *pf)
+{
+	struct ice_dpll_pin *rclk = &pf->dplls.rclk;
+	struct ice_vsi *vsi = ice_get_main_vsi(pf);
+	struct dpll_pin *parent;
+	int i;
+
+	for (i = 0; i < rclk->num_parents; i++) {
+		parent = pf->dplls.inputs[rclk->parent_idx[i]].pin;
+		if (!parent)
+			continue;
+		dpll_pin_on_pin_unregister(parent, rclk->pin,
+					   &ice_dpll_rclk_ops, rclk);
+	}
+	if (WARN_ON_ONCE(!vsi || !vsi->netdev))
+		return;
+	netdev_dpll_pin_clear(vsi->netdev);
+	dpll_pin_put(rclk->pin);
+}
+
+/**
+ * ice_dpll_init_rclk_pins - initialize recovered clock pin
+ * @pf: board private structure
+ * @pin: pin to register
+ * @start_idx: on which index shall allocation start in dpll subsystem
+ * @ops: callback ops registered with the pins
+ *
+ * Allocate resource for recovered clock pin in dpll subsystem. Register the
+ * pin with the parents it has in the info. Register pin with the pf's main vsi
+ * netdev.
+ *
+ * Return:
+ * * 0 - success
+ * * negative - registration failure reason
+ */
+static int
+ice_dpll_init_rclk_pins(struct ice_pf *pf, struct ice_dpll_pin *pin,
+			int start_idx, const struct dpll_pin_ops *ops)
+{
+	struct ice_vsi *vsi = ice_get_main_vsi(pf);
+	struct dpll_pin *parent;
+	int ret, i;
+
+	ret = ice_dpll_get_pins(pf, pin, start_idx, ICE_DPLL_RCLK_NUM_PER_PF,
+				pf->dplls.clock_id);
+	if (ret)
+		return ret;
+	for (i = 0; i < pf->dplls.rclk.num_parents; i++) {
+		parent = pf->dplls.inputs[pf->dplls.rclk.parent_idx[i]].pin;
+		if (!parent) {
+			ret = -ENODEV;
+			goto unregister_pins;
+		}
+		ret = dpll_pin_on_pin_register(parent, pf->dplls.rclk.pin,
+					       ops, &pf->dplls.rclk);
+		if (ret)
+			goto unregister_pins;
+	}
+	if (WARN_ON((!vsi || !vsi->netdev)))
+		return -EINVAL;
+	netdev_dpll_pin_set(vsi->netdev, pf->dplls.rclk.pin);
+
+	return 0;
+
+unregister_pins:
+	while (i) {
+		parent = pf->dplls.inputs[pf->dplls.rclk.parent_idx[--i]].pin;
+		dpll_pin_on_pin_unregister(parent, pf->dplls.rclk.pin,
+					   &ice_dpll_rclk_ops, &pf->dplls.rclk);
+	}
+	ice_dpll_release_pins(pin, ICE_DPLL_RCLK_NUM_PER_PF);
+	return ret;
+}
+
+/**
+ * ice_dpll_deinit_pins - deinitialize direct pins
+ * @pf: board private structure
+ * @cgu: if cgu is controlled by this pf
+ *
+ * If cgu is owned unregister directly connected pins from the dplls.
+ * Release resources of directly connected pins from the dpll subsystem.
+ */
+static void ice_dpll_deinit_pins(struct ice_pf *pf, bool cgu)
+{
+	struct ice_dpll_pin *outputs = pf->dplls.outputs;
+	struct ice_dpll_pin *inputs = pf->dplls.inputs;
+	int num_outputs = pf->dplls.num_outputs;
+	int num_inputs = pf->dplls.num_inputs;
+	struct ice_dplls *d = &pf->dplls;
+	struct ice_dpll *de = &d->eec;
+	struct ice_dpll *dp = &d->pps;
+
+	ice_dpll_deinit_rclk_pin(pf);
+	if (cgu) {
+		ice_dpll_unregister_pins(dp->dpll, inputs, &ice_dpll_input_ops,
+					 num_inputs);
+		ice_dpll_unregister_pins(de->dpll, inputs, &ice_dpll_input_ops,
+					 num_inputs);
+	}
+	ice_dpll_release_pins(inputs, num_inputs);
+	if (cgu) {
+		ice_dpll_unregister_pins(dp->dpll, outputs,
+					 &ice_dpll_output_ops, num_outputs);
+		ice_dpll_unregister_pins(de->dpll, outputs,
+					 &ice_dpll_output_ops, num_outputs);
+		ice_dpll_release_pins(outputs, num_outputs);
+	}
+}
+
+/**
+ * ice_dpll_init_pins - init pins and register pins with a dplls
+ * @pf: board private structure
+ * @cgu: if cgu is present and controlled by this NIC
+ *
+ * Initialize directly connected pf's pins within pf's dplls in a Linux dpll
+ * subsystem.
+ *
+ * Return:
+ * * 0 - success
+ * * negative - initialization failure reason
+ */
+static int ice_dpll_init_pins(struct ice_pf *pf, bool cgu)
+{
+	u32 rclk_idx;
+	int ret;
+
+	ret = ice_dpll_init_direct_pins(pf, cgu, pf->dplls.inputs, 0,
+					pf->dplls.num_inputs,
+					&ice_dpll_input_ops,
+					pf->dplls.eec.dpll, pf->dplls.pps.dpll);
+	if (ret)
+		return ret;
+	if (cgu) {
+		ret = ice_dpll_init_direct_pins(pf, cgu, pf->dplls.outputs,
+						pf->dplls.num_inputs,
+						pf->dplls.num_outputs,
+						&ice_dpll_output_ops,
+						pf->dplls.eec.dpll,
+						pf->dplls.pps.dpll);
+		if (ret)
+			goto deinit_inputs;
+	}
+	rclk_idx = pf->dplls.num_inputs + pf->dplls.num_outputs + pf->hw.pf_id;
+	ret = ice_dpll_init_rclk_pins(pf, &pf->dplls.rclk, rclk_idx,
+				      &ice_dpll_rclk_ops);
+	if (ret)
+		goto deinit_outputs;
+
+	return 0;
+deinit_outputs:
+	ice_dpll_deinit_direct_pins(cgu, pf->dplls.outputs,
+				    pf->dplls.num_outputs,
+				    &ice_dpll_output_ops, pf->dplls.pps.dpll,
+				    pf->dplls.eec.dpll);
+deinit_inputs:
+	ice_dpll_deinit_direct_pins(cgu, pf->dplls.inputs, pf->dplls.num_inputs,
+				    &ice_dpll_input_ops, pf->dplls.pps.dpll,
+				    pf->dplls.eec.dpll);
+	return ret;
+}
+
+/**
+ * ice_dpll_deinit_dpll - deinitialize dpll device
+ * @pf: board private structure
+ * @d: pointer to ice_dpll
+ * @cgu: if cgu is present and controlled by this NIC
+ *
+ * If cgu is owned unregister the dpll from dpll subsystem.
+ * Release resources of dpll device from dpll subsystem.
+ */
+static void
+ice_dpll_deinit_dpll(struct ice_pf *pf, struct ice_dpll *d, bool cgu)
+{
+	if (cgu)
+		dpll_device_unregister(d->dpll, &ice_dpll_ops, d);
+	dpll_device_put(d->dpll);
+}
+
+/**
+ * ice_dpll_init_dpll - initialize dpll device in dpll subsystem
+ * @pf: board private structure
+ * @d: dpll to be initialized
+ * @cgu: if cgu is present and controlled by this NIC
+ * @type: type of dpll being initialized
+ *
+ * Allocate dpll instance for this board in dpll subsystem, if cgu is controlled
+ * by this NIC, register dpll with the callback ops.
+ *
+ * Return:
+ * * 0 - success
+ * * negative - initialization failure reason
+ */
+static int
+ice_dpll_init_dpll(struct ice_pf *pf, struct ice_dpll *d, bool cgu,
+		   enum dpll_type type)
+{
+	u64 clock_id = pf->dplls.clock_id;
+	int ret;
+
+	d->dpll = dpll_device_get(clock_id, d->dpll_idx, THIS_MODULE);
+	if (IS_ERR(d->dpll)) {
+		ret = PTR_ERR(d->dpll);
+		dev_err(ice_pf_to_dev(pf),
+			"dpll_device_get failed (%p) err=%d\n", d, ret);
+		return ret;
+	}
+	d->pf = pf;
+	if (cgu) {
+		ret = dpll_device_register(d->dpll, type, &ice_dpll_ops, d);
+		if (ret) {
+			dpll_device_put(d->dpll);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_dpll_deinit_worker - deinitialize dpll kworker
+ * @pf: board private structure
+ *
+ * Stop dpll's kworker, release it's resources.
+ */
+static void ice_dpll_deinit_worker(struct ice_pf *pf)
+{
+	struct ice_dplls *d = &pf->dplls;
+
+	kthread_cancel_delayed_work_sync(&d->work);
+	kthread_destroy_worker(d->kworker);
+}
+
+/**
+ * ice_dpll_init_worker - Initialize DPLLs periodic worker
+ * @pf: board private structure
+ *
+ * Create and start DPLLs periodic worker.
+ *
+ * Context: Shall be called after pf->dplls.lock is initialized.
+ * Return:
+ * * 0 - success
+ * * negative - create worker failure
+ */
+static int ice_dpll_init_worker(struct ice_pf *pf)
+{
+	struct ice_dplls *d = &pf->dplls;
+	struct kthread_worker *kworker;
+
+	ice_dpll_update_state(pf, &d->eec, true);
+	ice_dpll_update_state(pf, &d->pps, true);
+	kthread_init_delayed_work(&d->work, ice_dpll_periodic_work);
+	kworker = kthread_create_worker(0, "ice-dplls-%s",
+					dev_name(ice_pf_to_dev(pf)));
+	if (IS_ERR(kworker))
+		return PTR_ERR(kworker);
+	d->kworker = kworker;
+	d->cgu_state_acq_err_num = 0;
+	kthread_queue_delayed_work(d->kworker, &d->work, 0);
+
+	return 0;
+}
+
+/**
+ * ice_dpll_init_info_direct_pins - initializes direct pins info
+ * @pf: board private structure
+ * @pin_type: type of pins being initialized
+ *
+ * Init information for directly connected pins, cache them in pf's pins
+ * structures.
+ *
+ * Return:
+ * * 0 - success
+ * * negative - init failure reason
+ */
+static int
+ice_dpll_init_info_direct_pins(struct ice_pf *pf,
+			       enum ice_dpll_pin_type pin_type)
+{
+	struct ice_dpll *de = &pf->dplls.eec, *dp = &pf->dplls.pps;
+	int num_pins, i, ret = -EINVAL;
+	struct ice_hw *hw = &pf->hw;
+	struct ice_dpll_pin *pins;
+	u8 freq_supp_num;
+	bool input;
+
+	switch (pin_type) {
+	case ICE_DPLL_PIN_TYPE_INPUT:
+		pins = pf->dplls.inputs;
+		num_pins = pf->dplls.num_inputs;
+		input = true;
+		break;
+	case ICE_DPLL_PIN_TYPE_OUTPUT:
+		pins = pf->dplls.outputs;
+		num_pins = pf->dplls.num_outputs;
+		input = false;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	for (i = 0; i < num_pins; i++) {
+		pins[i].idx = i;
+		pins[i].prop.board_label = ice_cgu_get_pin_name(hw, i, input);
+		pins[i].prop.type = ice_cgu_get_pin_type(hw, i, input);
+		if (input) {
+			ret = ice_aq_get_cgu_ref_prio(hw, de->dpll_idx, i,
+						      &de->input_prio[i]);
+			if (ret)
+				return ret;
+			ret = ice_aq_get_cgu_ref_prio(hw, dp->dpll_idx, i,
+						      &dp->input_prio[i]);
+			if (ret)
+				return ret;
+			pins[i].prop.capabilities |=
+				DPLL_PIN_CAPABILITIES_PRIORITY_CAN_CHANGE;
+		}
+		pins[i].prop.capabilities |=
+			DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE;
+		ret = ice_dpll_pin_state_update(pf, &pins[i], pin_type, NULL);
+		if (ret)
+			return ret;
+		pins[i].prop.freq_supported =
+			ice_cgu_get_pin_freq_supp(hw, i, input, &freq_supp_num);
+		pins[i].prop.freq_supported_num = freq_supp_num;
+		pins[i].pf = pf;
+	}
+
+	return ret;
+}
+
+/**
+ * ice_dpll_init_info_rclk_pin - initializes rclk pin information
+ * @pf: board private structure
+ *
+ * Init information for rclk pin, cache them in pf->dplls.rclk.
+ *
+ * Return:
+ * * 0 - success
+ * * negative - init failure reason
+ */
+static int ice_dpll_init_info_rclk_pin(struct ice_pf *pf)
+{
+	struct ice_dpll_pin *pin = &pf->dplls.rclk;
+
+	pin->prop.type = DPLL_PIN_TYPE_SYNCE_ETH_PORT;
+	pin->prop.capabilities |= DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE;
+	pin->pf = pf;
+
+	return ice_dpll_pin_state_update(pf, pin,
+					 ICE_DPLL_PIN_TYPE_RCLK_INPUT, NULL);
+}
+
+/**
+ * ice_dpll_init_pins_info - init pins info wrapper
+ * @pf: board private structure
+ * @pin_type: type of pins being initialized
+ *
+ * Wraps functions for pin initialization.
+ *
+ * Return:
+ * * 0 - success
+ * * negative - init failure reason
+ */
+static int
+ice_dpll_init_pins_info(struct ice_pf *pf, enum ice_dpll_pin_type pin_type)
+{
+	switch (pin_type) {
+	case ICE_DPLL_PIN_TYPE_INPUT:
+	case ICE_DPLL_PIN_TYPE_OUTPUT:
+		return ice_dpll_init_info_direct_pins(pf, pin_type);
+	case ICE_DPLL_PIN_TYPE_RCLK_INPUT:
+		return ice_dpll_init_info_rclk_pin(pf);
+	default:
+		return -EINVAL;
+	}
+}
+
+/**
+ * ice_dpll_deinit_info - release memory allocated for pins info
+ * @pf: board private structure
+ *
+ * Release memory allocated for pins by ice_dpll_init_info function.
+ */
+static void ice_dpll_deinit_info(struct ice_pf *pf)
+{
+	kfree(pf->dplls.inputs);
+	kfree(pf->dplls.outputs);
+	kfree(pf->dplls.eec.input_prio);
+	kfree(pf->dplls.pps.input_prio);
+}
+
+/**
+ * ice_dpll_init_info - prepare pf's dpll information structure
+ * @pf: board private structure
+ * @cgu: if cgu is present and controlled by this NIC
+ *
+ * Acquire (from HW) and set basic dpll information (on pf->dplls struct).
+ *
+ * Return:
+ * * 0 - success
+ * * negative - init failure reason
+ */
+static int ice_dpll_init_info(struct ice_pf *pf, bool cgu)
+{
+	struct ice_aqc_get_cgu_abilities abilities;
+	struct ice_dpll *de = &pf->dplls.eec;
+	struct ice_dpll *dp = &pf->dplls.pps;
+	struct ice_dplls *d = &pf->dplls;
+	struct ice_hw *hw = &pf->hw;
+	int ret, alloc_size, i;
+
+	d->clock_id = ice_generate_clock_id(pf);
+	ret = ice_aq_get_cgu_abilities(hw, &abilities);
+	if (ret) {
+		dev_err(ice_pf_to_dev(pf),
+			"err:%d %s failed to read cgu abilities\n",
+			ret, ice_aq_str(hw->adminq.sq_last_status));
+		return ret;
+	}
+
+	de->dpll_idx = abilities.eec_dpll_idx;
+	dp->dpll_idx = abilities.pps_dpll_idx;
+	d->num_inputs = abilities.num_inputs;
+	d->num_outputs = abilities.num_outputs;
+	d->input_phase_adj_max = le32_to_cpu(abilities.max_in_phase_adj);
+	d->output_phase_adj_max = le32_to_cpu(abilities.max_out_phase_adj);
+
+	alloc_size = sizeof(*d->inputs) * d->num_inputs;
+	d->inputs = kzalloc(alloc_size, GFP_KERNEL);
+	if (!d->inputs)
+		return -ENOMEM;
+
+	alloc_size = sizeof(*de->input_prio) * d->num_inputs;
+	de->input_prio = kzalloc(alloc_size, GFP_KERNEL);
+	if (!de->input_prio)
+		return -ENOMEM;
+
+	dp->input_prio = kzalloc(alloc_size, GFP_KERNEL);
+	if (!dp->input_prio)
+		return -ENOMEM;
+
+	ret = ice_dpll_init_pins_info(pf, ICE_DPLL_PIN_TYPE_INPUT);
+	if (ret)
+		goto deinit_info;
+
+	if (cgu) {
+		alloc_size = sizeof(*d->outputs) * d->num_outputs;
+		d->outputs = kzalloc(alloc_size, GFP_KERNEL);
+		if (!d->outputs) {
+			ret = -ENOMEM;
+			goto deinit_info;
+		}
+
+		ret = ice_dpll_init_pins_info(pf, ICE_DPLL_PIN_TYPE_OUTPUT);
+		if (ret)
+			goto deinit_info;
+	}
+
+	ret = ice_get_cgu_rclk_pin_info(&pf->hw, &d->base_rclk_idx,
+					&pf->dplls.rclk.num_parents);
+	if (ret)
+		return ret;
+	for (i = 0; i < pf->dplls.rclk.num_parents; i++)
+		pf->dplls.rclk.parent_idx[i] = d->base_rclk_idx + i;
+	ret = ice_dpll_init_pins_info(pf, ICE_DPLL_PIN_TYPE_RCLK_INPUT);
+	if (ret)
+		return ret;
+	de->mode = DPLL_MODE_AUTOMATIC;
+	dp->mode = DPLL_MODE_AUTOMATIC;
+
+	dev_dbg(ice_pf_to_dev(pf),
+		"%s - success, inputs:%u, outputs:%u rclk-parents:%u\n",
+		__func__, d->num_inputs, d->num_outputs, d->rclk.num_parents);
+
+	return 0;
+
+deinit_info:
+	dev_err(ice_pf_to_dev(pf),
+		"%s - fail: d->inputs:%p, de->input_prio:%p, dp->input_prio:%p, d->outputs:%p\n",
+		__func__, d->inputs, de->input_prio,
+		dp->input_prio, d->outputs);
+	ice_dpll_deinit_info(pf);
+	return ret;
+}
+
+/**
+ * ice_dpll_deinit - Disable the driver/HW support for dpll subsystem
+ * the dpll device.
+ * @pf: board private structure
+ *
+ * Handles the cleanup work required after dpll initialization, freeing
+ * resources and unregistering the dpll, pin and all resources used for
+ * handling them.
+ *
+ * Context: Destroys pf->dplls.lock mutex. Call only if ICE_FLAG_DPLL was set.
+ */
+void ice_dpll_deinit(struct ice_pf *pf)
+{
+	bool cgu = ice_is_feature_supported(pf, ICE_F_CGU);
+
+	clear_bit(ICE_FLAG_DPLL, pf->flags);
+	if (cgu)
+		ice_dpll_deinit_worker(pf);
+
+	ice_dpll_deinit_pins(pf, cgu);
+	ice_dpll_deinit_dpll(pf, &pf->dplls.pps, cgu);
+	ice_dpll_deinit_dpll(pf, &pf->dplls.eec, cgu);
+	ice_dpll_deinit_info(pf);
+	mutex_destroy(&pf->dplls.lock);
+}
+
+/**
+ * ice_dpll_init - initialize support for dpll subsystem
+ * @pf: board private structure
+ *
+ * Set up the device dplls, register them and pins connected within Linux dpll
+ * subsystem. Allow userspace to obtain state of DPLL and handling of DPLL
+ * configuration requests.
+ *
+ * Context: Initializes pf->dplls.lock mutex.
+ */
+void ice_dpll_init(struct ice_pf *pf)
+{
+	bool cgu = ice_is_feature_supported(pf, ICE_F_CGU);
+	struct ice_dplls *d = &pf->dplls;
+	int err = 0;
+
+	err = ice_dpll_init_info(pf, cgu);
+	if (err)
+		goto err_exit;
+	err = ice_dpll_init_dpll(pf, &pf->dplls.eec, cgu, DPLL_TYPE_EEC);
+	if (err)
+		goto deinit_info;
+	err = ice_dpll_init_dpll(pf, &pf->dplls.pps, cgu, DPLL_TYPE_PPS);
+	if (err)
+		goto deinit_eec;
+	err = ice_dpll_init_pins(pf, cgu);
+	if (err)
+		goto deinit_pps;
+	mutex_init(&d->lock);
+	if (cgu) {
+		err = ice_dpll_init_worker(pf);
+		if (err)
+			goto deinit_pins;
+	}
+	set_bit(ICE_FLAG_DPLL, pf->flags);
+
+	return;
+
+deinit_pins:
+	ice_dpll_deinit_pins(pf, cgu);
+deinit_pps:
+	ice_dpll_deinit_dpll(pf, &pf->dplls.pps, cgu);
+deinit_eec:
+	ice_dpll_deinit_dpll(pf, &pf->dplls.eec, cgu);
+deinit_info:
+	ice_dpll_deinit_info(pf);
+err_exit:
+	mutex_destroy(&d->lock);
+	dev_warn(ice_pf_to_dev(pf), "DPLLs init failure err:%d\n", err);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.h b/drivers/net/ethernet/intel/ice/ice_dpll.h
new file mode 100644
index 000000000000..9c524c4bdfd7
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_dpll.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2022, Intel Corporation. */
+
+#ifndef _ICE_DPLL_H_
+#define _ICE_DPLL_H_
+
+#include "ice.h"
+
+#define ICE_DPLL_PRIO_MAX	0xF
+#define ICE_DPLL_RCLK_NUM_MAX	4
+
+/** ice_dpll_pin - store info about pins
+ * @pin: dpll pin structure
+ * @pf: pointer to pf, which has registered the dpll_pin
+ * @idx: ice pin private idx
+ * @num_parents: hols number of parent pins
+ * @parent_idx: hold indexes of parent pins
+ * @flags: pin flags returned from HW
+ * @state: state of a pin
+ * @prop: pin properties
+ * @freq: current frequency of a pin
+ */
+struct ice_dpll_pin {
+	struct dpll_pin *pin;
+	struct ice_pf *pf;
+	u8 idx;
+	u8 num_parents;
+	u8 parent_idx[ICE_DPLL_RCLK_NUM_MAX];
+	u8 flags[ICE_DPLL_RCLK_NUM_MAX];
+	u8 state[ICE_DPLL_RCLK_NUM_MAX];
+	struct dpll_pin_properties prop;
+	u32 freq;
+};
+
+/** ice_dpll - store info required for DPLL control
+ * @dpll: pointer to dpll dev
+ * @pf: pointer to pf, which has registered the dpll_device
+ * @dpll_idx: index of dpll on the NIC
+ * @input_idx: currently selected input index
+ * @prev_input_idx: previously selected input index
+ * @ref_state: state of dpll reference signals
+ * @eec_mode: eec_mode dpll is configured for
+ * @phase_shift: phase shift delay of a dpll
+ * @input_prio: priorities of each input
+ * @dpll_state: current dpll sync state
+ * @prev_dpll_state: last dpll sync state
+ * @active_input: pointer to active input pin
+ * @prev_input: pointer to previous active input pin
+ */
+struct ice_dpll {
+	struct dpll_device *dpll;
+	struct ice_pf *pf;
+	u8 dpll_idx;
+	u8 input_idx;
+	u8 prev_input_idx;
+	u8 ref_state;
+	u8 eec_mode;
+	s64 phase_shift;
+	u8 *input_prio;
+	enum dpll_lock_status dpll_state;
+	enum dpll_lock_status prev_dpll_state;
+	enum dpll_mode mode;
+	struct dpll_pin *active_input;
+	struct dpll_pin *prev_input;
+};
+
+/** ice_dplls - store info required for CCU (clock controlling unit)
+ * @kworker: periodic worker
+ * @work: periodic work
+ * @lock: locks access to configuration of a dpll
+ * @eec: pointer to EEC dpll dev
+ * @pps: pointer to PPS dpll dev
+ * @inputs: input pins pointer
+ * @outputs: output pins pointer
+ * @rclk: recovered pins pointer
+ * @num_inputs: number of input pins available on dpll
+ * @num_outputs: number of output pins available on dpll
+ * @cgu_state_acq_err_num: number of errors returned during periodic work
+ * @base_rclk_idx: idx of first pin used for clock revocery pins
+ * @clock_id: clock_id of dplls
+ */
+struct ice_dplls {
+	struct kthread_worker *kworker;
+	struct kthread_delayed_work work;
+	struct mutex lock;
+	struct ice_dpll eec;
+	struct ice_dpll pps;
+	struct ice_dpll_pin *inputs;
+	struct ice_dpll_pin *outputs;
+	struct ice_dpll_pin rclk;
+	u8 num_inputs;
+	u8 num_outputs;
+	int cgu_state_acq_err_num;
+	u8 base_rclk_idx;
+	u64 clock_id;
+	s32 input_phase_adj_max;
+	s32 output_phase_adj_max;
+};
+
+void ice_dpll_init(struct ice_pf *pf);
+
+void ice_dpll_deinit(struct ice_pf *pf);
+
+#endif
diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.c b/drivers/net/ethernet/intel/ice/ice_gnss.c
index 75c9de675f20..c8ea1af51ad3 100644
--- a/drivers/net/ethernet/intel/ice/ice_gnss.c
+++ b/drivers/net/ethernet/intel/ice/ice_gnss.c
@@ -389,6 +389,9 @@ bool ice_gnss_is_gps_present(struct ice_hw *hw)
 	if (!hw->func_caps.ts_func_info.src_tmr_owned)
 		return false;
 
+	if (!ice_is_gps_in_netlist(hw))
+		return false;
+
 #if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
 	if (ice_is_e810t(hw)) {
 		int err;
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 73bbf06a76db..031abe311ea3 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -3990,13 +3990,21 @@ void ice_init_feature_support(struct ice_pf *pf)
 	case ICE_DEV_ID_E810C_BACKPLANE:
 	case ICE_DEV_ID_E810C_QSFP:
 	case ICE_DEV_ID_E810C_SFP:
+	case ICE_DEV_ID_E810_XXV_BACKPLANE:
+	case ICE_DEV_ID_E810_XXV_QSFP:
+	case ICE_DEV_ID_E810_XXV_SFP:
 		ice_set_feature_support(pf, ICE_F_DSCP);
-		ice_set_feature_support(pf, ICE_F_PTP_EXTTS);
-		if (ice_is_e810t(&pf->hw)) {
+		if (ice_is_phy_rclk_present(&pf->hw))
+			ice_set_feature_support(pf, ICE_F_PHY_RCLK);
+		/* If we don't own the timer - don't enable other caps */
+		if (!ice_pf_src_tmr_owned(pf))
+			break;
+		if (ice_is_cgu_present(&pf->hw))
+			ice_set_feature_support(pf, ICE_F_CGU);
+		if (ice_is_clock_mux_present_e810t(&pf->hw))
 			ice_set_feature_support(pf, ICE_F_SMA_CTRL);
-			if (ice_gnss_is_gps_present(&pf->hw))
-				ice_set_feature_support(pf, ICE_F_GNSS);
-		}
+		if (ice_gnss_is_gps_present(&pf->hw))
+			ice_set_feature_support(pf, ICE_F_GNSS);
 		break;
 	default:
 		break;
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 7784135160fd..4fd245f55b2d 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -3160,7 +3160,7 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
 
 		ena_mask &= ~PFINT_OICR_TSYN_EVNT_M;
 
-		if (hw->func_caps.ts_func_info.src_tmr_owned) {
+		if (ice_pf_src_tmr_owned(pf)) {
 			/* Save EVENTs from GLTSYN register */
 			pf->ptp.ext_ts_irq |= gltsyn_stat &
 					      (GLTSYN_STAT_EVENT0_M |
@@ -4666,6 +4666,10 @@ static void ice_init_features(struct ice_pf *pf)
 	if (ice_is_feature_supported(pf, ICE_F_GNSS))
 		ice_gnss_init(pf);
 
+	if (ice_is_feature_supported(pf, ICE_F_CGU) ||
+	    ice_is_feature_supported(pf, ICE_F_PHY_RCLK))
+		ice_dpll_init(pf);
+
 	/* Note: Flow director init failure is non-fatal to load */
 	if (ice_init_fdir(pf))
 		dev_err(dev, "could not initialize flow director\n");
@@ -4695,6 +4699,8 @@ static void ice_deinit_features(struct ice_pf *pf)
 		ice_gnss_exit(pf);
 	if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
 		ice_ptp_release(pf);
+	if (test_bit(ICE_FLAG_DPLL, pf->flags))
+		ice_dpll_deinit(pf);
 }
 
 static void ice_init_wakeup(struct ice_pf *pf)
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
index 81d96a40d5a7..05f922d3b316 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
@@ -436,7 +436,7 @@ static void ice_clear_ptp_clock_index(struct ice_pf *pf)
 	int err;
 
 	/* Do not clear the index if we don't own the timer */
-	if (!hw->func_caps.ts_func_info.src_tmr_owned)
+	if (!ice_pf_src_tmr_owned(pf))
 		return;
 
 	tmr_idx = hw->func_caps.ts_func_info.tmr_index_assoc;
@@ -1366,6 +1366,7 @@ out_unlock:
 void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup)
 {
 	struct ice_ptp_port *ptp_port;
+	struct ice_hw *hw = &pf->hw;
 
 	if (!test_bit(ICE_FLAG_PTP, pf->flags))
 		return;
@@ -1380,11 +1381,16 @@ void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup)
 	/* Update cached link status for this port immediately */
 	ptp_port->link_up = linkup;
 
-	/* E810 devices do not need to reconfigure the PHY */
-	if (ice_is_e810(&pf->hw))
+	switch (hw->phy_model) {
+	case ICE_PHY_E810:
+		/* Do not reconfigure E810 PHY */
 		return;
-
-	ice_ptp_port_phy_restart(ptp_port);
+	case ICE_PHY_E822:
+		ice_ptp_port_phy_restart(ptp_port);
+		return;
+	default:
+		dev_warn(ice_pf_to_dev(pf), "%s: Unknown PHY type\n", __func__);
+	}
 }
 
 /**
@@ -1976,21 +1982,32 @@ ice_ptp_get_syncdevicetime(ktime_t *device,
 	u32 hh_lock, hh_art_ctl;
 	int i;
 
-	/* Get the HW lock */
-	hh_lock = rd32(hw, PFHH_SEM + (PFTSYN_SEM_BYTES * hw->pf_id));
+#define MAX_HH_HW_LOCK_TRIES	5
+#define MAX_HH_CTL_LOCK_TRIES	100
+
+	for (i = 0; i < MAX_HH_HW_LOCK_TRIES; i++) {
+		/* Get the HW lock */
+		hh_lock = rd32(hw, PFHH_SEM + (PFTSYN_SEM_BYTES * hw->pf_id));
+		if (hh_lock & PFHH_SEM_BUSY_M) {
+			usleep_range(10000, 15000);
+			continue;
+		}
+		break;
+	}
 	if (hh_lock & PFHH_SEM_BUSY_M) {
 		dev_err(ice_pf_to_dev(pf), "PTP failed to get hh lock\n");
-		return -EFAULT;
+		return -EBUSY;
 	}
 
+	/* Program cmd to master timer */
+	ice_ptp_src_cmd(hw, ICE_PTP_READ_TIME);
+
 	/* Start the ART and device clock sync sequence */
 	hh_art_ctl = rd32(hw, GLHH_ART_CTL);
 	hh_art_ctl = hh_art_ctl | GLHH_ART_CTL_ACTIVE_M;
 	wr32(hw, GLHH_ART_CTL, hh_art_ctl);
 
-#define MAX_HH_LOCK_TRIES 100
-
-	for (i = 0; i < MAX_HH_LOCK_TRIES; i++) {
+	for (i = 0; i < MAX_HH_CTL_LOCK_TRIES; i++) {
 		/* Wait for sync to complete */
 		hh_art_ctl = rd32(hw, GLHH_ART_CTL);
 		if (hh_art_ctl & GLHH_ART_CTL_ACTIVE_M) {
@@ -2014,19 +2031,23 @@ ice_ptp_get_syncdevicetime(ktime_t *device,
 			break;
 		}
 	}
+
+	/* Clear the master timer */
+	ice_ptp_src_cmd(hw, ICE_PTP_NOP);
+
 	/* Release HW lock */
 	hh_lock = rd32(hw, PFHH_SEM + (PFTSYN_SEM_BYTES * hw->pf_id));
 	hh_lock = hh_lock & ~PFHH_SEM_BUSY_M;
 	wr32(hw, PFHH_SEM + (PFTSYN_SEM_BYTES * hw->pf_id), hh_lock);
 
-	if (i == MAX_HH_LOCK_TRIES)
+	if (i == MAX_HH_CTL_LOCK_TRIES)
 		return -ETIMEDOUT;
 
 	return 0;
 }
 
 /**
- * ice_ptp_getcrosststamp_e822 - Capture a device cross timestamp
+ * ice_ptp_getcrosststamp_e82x - Capture a device cross timestamp
  * @info: the driver's PTP info structure
  * @cts: The memory to fill the cross timestamp info
  *
@@ -2034,14 +2055,14 @@ ice_ptp_get_syncdevicetime(ktime_t *device,
  * clock. Fill the cross timestamp information and report it back to the
  * caller.
  *
- * This is only valid for E822 devices which have support for generating the
- * cross timestamp via PCIe PTM.
+ * This is only valid for E822 and E823 devices which have support for
+ * generating the cross timestamp via PCIe PTM.
  *
  * In order to correctly correlate the ART timestamp back to the TSC time, the
  * CPU must have X86_FEATURE_TSC_KNOWN_FREQ.
  */
 static int
-ice_ptp_getcrosststamp_e822(struct ptp_clock_info *info,
+ice_ptp_getcrosststamp_e82x(struct ptp_clock_info *info,
 			    struct system_device_crosststamp *cts)
 {
 	struct ice_pf *pf = ptp_info_to_pf(info);
@@ -2246,18 +2267,20 @@ ice_ptp_setup_sma_pins_e810t(struct ice_pf *pf, struct ptp_clock_info *info)
 static void
 ice_ptp_setup_pins_e810(struct ice_pf *pf, struct ptp_clock_info *info)
 {
-	info->n_per_out = N_PER_OUT_E810;
-
-	if (ice_is_feature_supported(pf, ICE_F_PTP_EXTTS))
-		info->n_ext_ts = N_EXT_TS_E810;
-
 	if (ice_is_feature_supported(pf, ICE_F_SMA_CTRL)) {
 		info->n_ext_ts = N_EXT_TS_E810;
+		info->n_per_out = N_PER_OUT_E810T;
 		info->n_pins = NUM_PTP_PINS_E810T;
 		info->verify = ice_verify_pin_e810t;
 
 		/* Complete setup of the SMA pins */
 		ice_ptp_setup_sma_pins_e810t(pf, info);
+	} else if (ice_is_e810t(&pf->hw)) {
+		info->n_ext_ts = N_EXT_TS_NO_SMA_E810T;
+		info->n_per_out = N_PER_OUT_NO_SMA_E810T;
+	} else {
+		info->n_per_out = N_PER_OUT_E810;
+		info->n_ext_ts = N_EXT_TS_E810;
 	}
 }
 
@@ -2275,22 +2298,22 @@ ice_ptp_setup_pins_e823(struct ice_pf *pf, struct ptp_clock_info *info)
 }
 
 /**
- * ice_ptp_set_funcs_e822 - Set specialized functions for E822 support
+ * ice_ptp_set_funcs_e82x - Set specialized functions for E82x support
  * @pf: Board private structure
  * @info: PTP info to fill
  *
- * Assign functions to the PTP capabiltiies structure for E822 devices.
+ * Assign functions to the PTP capabiltiies structure for E82x devices.
  * Functions which operate across all device families should be set directly
- * in ice_ptp_set_caps. Only add functions here which are distinct for E822
+ * in ice_ptp_set_caps. Only add functions here which are distinct for E82x
  * devices.
  */
 static void
-ice_ptp_set_funcs_e822(struct ice_pf *pf, struct ptp_clock_info *info)
+ice_ptp_set_funcs_e82x(struct ice_pf *pf, struct ptp_clock_info *info)
 {
 #ifdef CONFIG_ICE_HWTS
 	if (boot_cpu_has(X86_FEATURE_ART) &&
 	    boot_cpu_has(X86_FEATURE_TSC_KNOWN_FREQ))
-		info->getcrosststamp = ice_ptp_getcrosststamp_e822;
+		info->getcrosststamp = ice_ptp_getcrosststamp_e82x;
 #endif /* CONFIG_ICE_HWTS */
 }
 
@@ -2324,6 +2347,8 @@ ice_ptp_set_funcs_e810(struct ice_pf *pf, struct ptp_clock_info *info)
 static void
 ice_ptp_set_funcs_e823(struct ice_pf *pf, struct ptp_clock_info *info)
 {
+	ice_ptp_set_funcs_e82x(pf, info);
+
 	info->enable = ice_ptp_gpio_enable_e823;
 	ice_ptp_setup_pins_e823(pf, info);
 }
@@ -2351,7 +2376,7 @@ static void ice_ptp_set_caps(struct ice_pf *pf)
 	else if (ice_is_e823(&pf->hw))
 		ice_ptp_set_funcs_e823(pf, info);
 	else
-		ice_ptp_set_funcs_e822(pf, info);
+		ice_ptp_set_funcs_e82x(pf, info);
 }
 
 /**
@@ -2474,7 +2499,7 @@ void ice_ptp_reset(struct ice_pf *pf)
 	if (test_bit(ICE_PFR_REQ, pf->state))
 		goto pfr;
 
-	if (!hw->func_caps.ts_func_info.src_tmr_owned)
+	if (!ice_pf_src_tmr_owned(pf))
 		goto reset_ts;
 
 	err = ice_ptp_init_phc(hw);
@@ -2685,14 +2710,22 @@ static int ice_ptp_init_work(struct ice_pf *pf, struct ice_ptp *ptp)
  */
 static int ice_ptp_init_port(struct ice_pf *pf, struct ice_ptp_port *ptp_port)
 {
+	struct ice_hw *hw = &pf->hw;
+
 	mutex_init(&ptp_port->ps_lock);
 
-	if (ice_is_e810(&pf->hw))
+	switch (hw->phy_model) {
+	case ICE_PHY_E810:
 		return ice_ptp_init_tx_e810(pf, &ptp_port->tx);
+	case ICE_PHY_E822:
+		kthread_init_delayed_work(&ptp_port->ov_work,
+					  ice_ptp_wait_for_offsets);
 
-	kthread_init_delayed_work(&ptp_port->ov_work,
-				  ice_ptp_wait_for_offsets);
-	return ice_ptp_init_tx_e822(pf, &ptp_port->tx, ptp_port->port_num);
+		return ice_ptp_init_tx_e822(pf, &ptp_port->tx,
+					    ptp_port->port_num);
+	default:
+		return -ENODEV;
+	}
 }
 
 /**
@@ -2713,10 +2746,12 @@ void ice_ptp_init(struct ice_pf *pf)
 	struct ice_hw *hw = &pf->hw;
 	int err;
 
+	ice_ptp_init_phy_model(hw);
+
 	/* If this function owns the clock hardware, it must allocate and
 	 * configure the PTP clock device to represent it.
 	 */
-	if (hw->func_caps.ts_func_info.src_tmr_owned) {
+	if (ice_pf_src_tmr_owned(pf)) {
 		err = ice_ptp_init_owner(pf);
 		if (err)
 			goto err;
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
index f818dd215c05..f839f186797d 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
@@ -7,6 +7,132 @@
 #include "ice_ptp_consts.h"
 #include "ice_cgu_regs.h"
 
+static struct dpll_pin_frequency ice_cgu_pin_freq_common[] = {
+	DPLL_PIN_FREQUENCY_1PPS,
+	DPLL_PIN_FREQUENCY_10MHZ,
+};
+
+static struct dpll_pin_frequency ice_cgu_pin_freq_1_hz[] = {
+	DPLL_PIN_FREQUENCY_1PPS,
+};
+
+static struct dpll_pin_frequency ice_cgu_pin_freq_10_mhz[] = {
+	DPLL_PIN_FREQUENCY_10MHZ,
+};
+
+static const struct ice_cgu_pin_desc ice_e810t_sfp_cgu_inputs[] = {
+	{ "CVL-SDP22",	  ZL_REF0P, DPLL_PIN_TYPE_INT_OSCILLATOR,
+		ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common },
+	{ "CVL-SDP20",	  ZL_REF0N, DPLL_PIN_TYPE_INT_OSCILLATOR,
+		ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common },
+	{ "C827_0-RCLKA", ZL_REF1P, DPLL_PIN_TYPE_MUX, 0, },
+	{ "C827_0-RCLKB", ZL_REF1N, DPLL_PIN_TYPE_MUX, 0, },
+	{ "SMA1",	  ZL_REF3P, DPLL_PIN_TYPE_EXT,
+		ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common },
+	{ "SMA2/U.FL2",	  ZL_REF3N, DPLL_PIN_TYPE_EXT,
+		ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common },
+	{ "GNSS-1PPS",	  ZL_REF4P, DPLL_PIN_TYPE_GNSS,
+		ARRAY_SIZE(ice_cgu_pin_freq_1_hz), ice_cgu_pin_freq_1_hz },
+	{ "OCXO",	  ZL_REF4N, DPLL_PIN_TYPE_INT_OSCILLATOR, 0, },
+};
+
+static const struct ice_cgu_pin_desc ice_e810t_qsfp_cgu_inputs[] = {
+	{ "CVL-SDP22",	  ZL_REF0P, DPLL_PIN_TYPE_INT_OSCILLATOR,
+		ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common },
+	{ "CVL-SDP20",	  ZL_REF0N, DPLL_PIN_TYPE_INT_OSCILLATOR,
+		ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common },
+	{ "C827_0-RCLKA", ZL_REF1P, DPLL_PIN_TYPE_MUX, },
+	{ "C827_0-RCLKB", ZL_REF1N, DPLL_PIN_TYPE_MUX, },
+	{ "C827_1-RCLKA", ZL_REF2P, DPLL_PIN_TYPE_MUX, },
+	{ "C827_1-RCLKB", ZL_REF2N, DPLL_PIN_TYPE_MUX, },
+	{ "SMA1",	  ZL_REF3P, DPLL_PIN_TYPE_EXT,
+		ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common },
+	{ "SMA2/U.FL2",	  ZL_REF3N, DPLL_PIN_TYPE_EXT,
+		ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common },
+	{ "GNSS-1PPS",	  ZL_REF4P, DPLL_PIN_TYPE_GNSS,
+		ARRAY_SIZE(ice_cgu_pin_freq_1_hz), ice_cgu_pin_freq_1_hz },
+	{ "OCXO",	  ZL_REF4N, DPLL_PIN_TYPE_INT_OSCILLATOR, },
+};
+
+static const struct ice_cgu_pin_desc ice_e810t_sfp_cgu_outputs[] = {
+	{ "REF-SMA1",	    ZL_OUT0, DPLL_PIN_TYPE_EXT,
+		ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common },
+	{ "REF-SMA2/U.FL2", ZL_OUT1, DPLL_PIN_TYPE_EXT,
+		ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common },
+	{ "PHY-CLK",	    ZL_OUT2, DPLL_PIN_TYPE_SYNCE_ETH_PORT, },
+	{ "MAC-CLK",	    ZL_OUT3, DPLL_PIN_TYPE_SYNCE_ETH_PORT, },
+	{ "CVL-SDP21",	    ZL_OUT4, DPLL_PIN_TYPE_EXT,
+		ARRAY_SIZE(ice_cgu_pin_freq_1_hz), ice_cgu_pin_freq_1_hz },
+	{ "CVL-SDP23",	    ZL_OUT5, DPLL_PIN_TYPE_EXT,
+		ARRAY_SIZE(ice_cgu_pin_freq_1_hz), ice_cgu_pin_freq_1_hz },
+};
+
+static const struct ice_cgu_pin_desc ice_e810t_qsfp_cgu_outputs[] = {
+	{ "REF-SMA1",	    ZL_OUT0, DPLL_PIN_TYPE_EXT,
+		ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common },
+	{ "REF-SMA2/U.FL2", ZL_OUT1, DPLL_PIN_TYPE_EXT,
+		ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common },
+	{ "PHY-CLK",	    ZL_OUT2, DPLL_PIN_TYPE_SYNCE_ETH_PORT, 0 },
+	{ "PHY2-CLK",	    ZL_OUT3, DPLL_PIN_TYPE_SYNCE_ETH_PORT, 0 },
+	{ "MAC-CLK",	    ZL_OUT4, DPLL_PIN_TYPE_SYNCE_ETH_PORT, 0 },
+	{ "CVL-SDP21",	    ZL_OUT5, DPLL_PIN_TYPE_EXT,
+		ARRAY_SIZE(ice_cgu_pin_freq_1_hz), ice_cgu_pin_freq_1_hz },
+	{ "CVL-SDP23",	    ZL_OUT6, DPLL_PIN_TYPE_EXT,
+		ARRAY_SIZE(ice_cgu_pin_freq_1_hz), ice_cgu_pin_freq_1_hz },
+};
+
+static const struct ice_cgu_pin_desc ice_e823_si_cgu_inputs[] = {
+	{ "NONE",	  SI_REF0P, 0, 0 },
+	{ "NONE",	  SI_REF0N, 0, 0 },
+	{ "SYNCE0_DP",	  SI_REF1P, DPLL_PIN_TYPE_MUX, 0 },
+	{ "SYNCE0_DN",	  SI_REF1N, DPLL_PIN_TYPE_MUX, 0 },
+	{ "EXT_CLK_SYNC", SI_REF2P, DPLL_PIN_TYPE_EXT,
+		ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common },
+	{ "NONE",	  SI_REF2N, 0, 0 },
+	{ "EXT_PPS_OUT",  SI_REF3,  DPLL_PIN_TYPE_EXT,
+		ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common },
+	{ "INT_PPS_OUT",  SI_REF4,  DPLL_PIN_TYPE_EXT,
+		ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common },
+};
+
+static const struct ice_cgu_pin_desc ice_e823_si_cgu_outputs[] = {
+	{ "1588-TIME_SYNC", SI_OUT0, DPLL_PIN_TYPE_EXT,
+		ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common },
+	{ "PHY-CLK",	    SI_OUT1, DPLL_PIN_TYPE_SYNCE_ETH_PORT, 0 },
+	{ "10MHZ-SMA2",	    SI_OUT2, DPLL_PIN_TYPE_EXT,
+		ARRAY_SIZE(ice_cgu_pin_freq_10_mhz), ice_cgu_pin_freq_10_mhz },
+	{ "PPS-SMA1",	    SI_OUT3, DPLL_PIN_TYPE_EXT,
+		ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common },
+};
+
+static const struct ice_cgu_pin_desc ice_e823_zl_cgu_inputs[] = {
+	{ "NONE",	  ZL_REF0P, 0, 0 },
+	{ "INT_PPS_OUT",  ZL_REF0N, DPLL_PIN_TYPE_EXT,
+		ARRAY_SIZE(ice_cgu_pin_freq_1_hz), ice_cgu_pin_freq_1_hz },
+	{ "SYNCE0_DP",	  ZL_REF1P, DPLL_PIN_TYPE_MUX, 0 },
+	{ "SYNCE0_DN",	  ZL_REF1N, DPLL_PIN_TYPE_MUX, 0 },
+	{ "NONE",	  ZL_REF2P, 0, 0 },
+	{ "NONE",	  ZL_REF2N, 0, 0 },
+	{ "EXT_CLK_SYNC", ZL_REF3P, DPLL_PIN_TYPE_EXT,
+		ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common },
+	{ "NONE",	  ZL_REF3N, 0, 0 },
+	{ "EXT_PPS_OUT",  ZL_REF4P, DPLL_PIN_TYPE_EXT,
+		ARRAY_SIZE(ice_cgu_pin_freq_1_hz), ice_cgu_pin_freq_1_hz },
+	{ "OCXO",	  ZL_REF4N, DPLL_PIN_TYPE_INT_OSCILLATOR, 0 },
+};
+
+static const struct ice_cgu_pin_desc ice_e823_zl_cgu_outputs[] = {
+	{ "PPS-SMA1",	   ZL_OUT0, DPLL_PIN_TYPE_EXT,
+		ARRAY_SIZE(ice_cgu_pin_freq_1_hz), ice_cgu_pin_freq_1_hz },
+	{ "10MHZ-SMA2",	   ZL_OUT1, DPLL_PIN_TYPE_EXT,
+		ARRAY_SIZE(ice_cgu_pin_freq_10_mhz), ice_cgu_pin_freq_10_mhz },
+	{ "PHY-CLK",	   ZL_OUT2, DPLL_PIN_TYPE_SYNCE_ETH_PORT, 0 },
+	{ "1588-TIME_REF", ZL_OUT3, DPLL_PIN_TYPE_SYNCE_ETH_PORT, 0 },
+	{ "CPK-TIME_SYNC", ZL_OUT4, DPLL_PIN_TYPE_EXT,
+		ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common },
+	{ "NONE",	   ZL_OUT5, 0, 0 },
+};
+
 /* Low level functions for interacting with and managing the device clock used
  * for the Precision Time Protocol.
  *
@@ -107,7 +233,7 @@ static u64 ice_ptp_read_src_incval(struct ice_hw *hw)
  *
  * Prepare the source timer for an upcoming timer sync command.
  */
-static void ice_ptp_src_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd)
+void ice_ptp_src_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd)
 {
 	u32 cmd_val;
 	u8 tmr_idx;
@@ -116,19 +242,19 @@ static void ice_ptp_src_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd)
 	cmd_val = tmr_idx << SEL_CPK_SRC;
 
 	switch (cmd) {
-	case INIT_TIME:
+	case ICE_PTP_INIT_TIME:
 		cmd_val |= GLTSYN_CMD_INIT_TIME;
 		break;
-	case INIT_INCVAL:
+	case ICE_PTP_INIT_INCVAL:
 		cmd_val |= GLTSYN_CMD_INIT_INCVAL;
 		break;
-	case ADJ_TIME:
+	case ICE_PTP_ADJ_TIME:
 		cmd_val |= GLTSYN_CMD_ADJ_TIME;
 		break;
-	case ADJ_TIME_AT_TIME:
+	case ICE_PTP_ADJ_TIME_AT_TIME:
 		cmd_val |= GLTSYN_CMD_ADJ_INIT_TIME;
 		break;
-	case READ_TIME:
+	case ICE_PTP_READ_TIME:
 		cmd_val |= GLTSYN_CMD_READ_TIME;
 		break;
 	case ICE_PTP_NOP:
@@ -168,9 +294,9 @@ ice_fill_phy_msg_e822(struct ice_sbq_msg_input *msg, u8 port, u16 offset)
 {
 	int phy_port, phy, quadtype;
 
-	phy_port = port % ICE_PORTS_PER_PHY;
-	phy = port / ICE_PORTS_PER_PHY;
-	quadtype = (port / ICE_PORTS_PER_QUAD) % ICE_NUM_QUAD_TYPE;
+	phy_port = port % ICE_PORTS_PER_PHY_E822;
+	phy = port / ICE_PORTS_PER_PHY_E822;
+	quadtype = (port / ICE_PORTS_PER_QUAD) % ICE_QUADS_PER_PHY_E822;
 
 	if (quadtype == 0) {
 		msg->msg_addr_low = P_Q0_L(P_0_BASE + offset, phy_port);
@@ -495,20 +621,25 @@ ice_write_64b_phy_reg_e822(struct ice_hw *hw, u8 port, u16 low_addr, u64 val)
  * Fill a message buffer for accessing a register in a quad shared between
  * multiple PHYs.
  */
-static void
+static int
 ice_fill_quad_msg_e822(struct ice_sbq_msg_input *msg, u8 quad, u16 offset)
 {
 	u32 addr;
 
+	if (quad >= ICE_MAX_QUAD)
+		return -EINVAL;
+
 	msg->dest_dev = rmn_0;
 
-	if ((quad % ICE_NUM_QUAD_TYPE) == 0)
+	if ((quad % ICE_QUADS_PER_PHY_E822) == 0)
 		addr = Q_0_BASE + offset;
 	else
 		addr = Q_1_BASE + offset;
 
 	msg->msg_addr_low = lower_16_bits(addr);
 	msg->msg_addr_high = upper_16_bits(addr);
+
+	return 0;
 }
 
 /**
@@ -527,10 +658,10 @@ ice_read_quad_reg_e822(struct ice_hw *hw, u8 quad, u16 offset, u32 *val)
 	struct ice_sbq_msg_input msg = {0};
 	int err;
 
-	if (quad >= ICE_MAX_QUAD)
-		return -EINVAL;
+	err = ice_fill_quad_msg_e822(&msg, quad, offset);
+	if (err)
+		return err;
 
-	ice_fill_quad_msg_e822(&msg, quad, offset);
 	msg.opcode = ice_sbq_msg_rd;
 
 	err = ice_sbq_rw_reg(hw, &msg);
@@ -561,10 +692,10 @@ ice_write_quad_reg_e822(struct ice_hw *hw, u8 quad, u16 offset, u32 val)
 	struct ice_sbq_msg_input msg = {0};
 	int err;
 
-	if (quad >= ICE_MAX_QUAD)
-		return -EINVAL;
+	err = ice_fill_quad_msg_e822(&msg, quad, offset);
+	if (err)
+		return err;
 
-	ice_fill_quad_msg_e822(&msg, quad, offset);
 	msg.opcode = ice_sbq_msg_wr;
 	msg.data = val;
 
@@ -628,29 +759,32 @@ ice_read_phy_tstamp_e822(struct ice_hw *hw, u8 quad, u8 idx, u64 *tstamp)
  * @quad: the quad to read from
  * @idx: the timestamp index to reset
  *
- * Clear a timestamp, resetting its valid bit, from the PHY quad block that is
- * shared between the internal PHYs on the E822 devices.
+ * Read the timestamp out of the quad to clear its timestamp status bit from
+ * the PHY quad block that is shared between the internal PHYs of the E822
+ * devices.
+ *
+ * Note that unlike E810, software cannot directly write to the quad memory
+ * bank registers. E822 relies on the ice_get_phy_tx_tstamp_ready() function
+ * to determine which timestamps are valid. Reading a timestamp auto-clears
+ * the valid bit.
+ *
+ * To directly clear the contents of the timestamp block entirely, discarding
+ * all timestamp data at once, software should instead use
+ * ice_ptp_reset_ts_memory_quad_e822().
+ *
+ * This function should only be called on an idx whose bit is set according to
+ * ice_get_phy_tx_tstamp_ready().
  */
 static int
 ice_clear_phy_tstamp_e822(struct ice_hw *hw, u8 quad, u8 idx)
 {
-	u16 lo_addr, hi_addr;
+	u64 unused_tstamp;
 	int err;
 
-	lo_addr = (u16)TS_L(Q_REG_TX_MEMORY_BANK_START, idx);
-	hi_addr = (u16)TS_H(Q_REG_TX_MEMORY_BANK_START, idx);
-
-	err = ice_write_quad_reg_e822(hw, quad, lo_addr, 0);
+	err = ice_read_phy_tstamp_e822(hw, quad, idx, &unused_tstamp);
 	if (err) {
-		ice_debug(hw, ICE_DBG_PTP, "Failed to clear low PTP timestamp register, err %d\n",
-			  err);
-		return err;
-	}
-
-	err = ice_write_quad_reg_e822(hw, quad, hi_addr, 0);
-	if (err) {
-		ice_debug(hw, ICE_DBG_PTP, "Failed to clear high PTP timestamp register, err %d\n",
-			  err);
+		ice_debug(hw, ICE_DBG_PTP, "Failed to read the timestamp register for quad %u, idx %u, err %d\n",
+			  quad, idx, err);
 		return err;
 	}
 
@@ -1025,7 +1159,7 @@ static int ice_ptp_init_phc_e822(struct ice_hw *hw)
  * @time: Time to initialize the PHY port clocks to
  *
  * Program the PHY port registers with a new initial time value. The port
- * clock will be initialized once the driver issues an INIT_TIME sync
+ * clock will be initialized once the driver issues an ICE_PTP_INIT_TIME sync
  * command. The time value is the upper 32 bits of the PHY timer, usually in
  * units of nominal nanoseconds.
  */
@@ -1074,7 +1208,7 @@ exit_err:
  *
  * Program the port for an atomic adjustment by writing the Tx and Rx timer
  * registers. The atomic adjustment won't be completed until the driver issues
- * an ADJ_TIME command.
+ * an ICE_PTP_ADJ_TIME command.
  *
  * Note that time is not in units of nanoseconds. It is in clock time
  * including the lower sub-nanosecond portion of the port timer.
@@ -1127,7 +1261,7 @@ exit_err:
  *
  * Prepare the PHY ports for an atomic time adjustment by programming the PHY
  * Tx and Rx port registers. The actual adjustment is completed by issuing an
- * ADJ_TIME or ADJ_TIME_AT_TIME sync command.
+ * ICE_PTP_ADJ_TIME or ICE_PTP_ADJ_TIME_AT_TIME sync command.
  */
 static int
 ice_ptp_prep_phy_adj_e822(struct ice_hw *hw, s32 adj)
@@ -1162,7 +1296,7 @@ ice_ptp_prep_phy_adj_e822(struct ice_hw *hw, s32 adj)
  *
  * Prepare each of the PHY ports for a new increment value by programming the
  * port's TIMETUS registers. The new increment value will be updated after
- * issuing an INIT_INCVAL command.
+ * issuing an ICE_PTP_INIT_INCVAL command.
  */
 static int
 ice_ptp_prep_phy_incval_e822(struct ice_hw *hw, u64 incval)
@@ -1248,19 +1382,19 @@ ice_ptp_write_port_cmd_e822(struct ice_hw *hw, u8 port, enum ice_ptp_tmr_cmd cmd
 	tmr_idx = ice_get_ptp_src_clock_index(hw);
 	cmd_val = tmr_idx << SEL_PHY_SRC;
 	switch (cmd) {
-	case INIT_TIME:
+	case ICE_PTP_INIT_TIME:
 		cmd_val |= PHY_CMD_INIT_TIME;
 		break;
-	case INIT_INCVAL:
+	case ICE_PTP_INIT_INCVAL:
 		cmd_val |= PHY_CMD_INIT_INCVAL;
 		break;
-	case ADJ_TIME:
+	case ICE_PTP_ADJ_TIME:
 		cmd_val |= PHY_CMD_ADJ_TIME;
 		break;
-	case READ_TIME:
+	case ICE_PTP_READ_TIME:
 		cmd_val |= PHY_CMD_READ_TIME;
 		break;
-	case ADJ_TIME_AT_TIME:
+	case ICE_PTP_ADJ_TIME_AT_TIME:
 		cmd_val |= PHY_CMD_ADJ_TIME_AT_TIME;
 		break;
 	case ICE_PTP_NOP:
@@ -2196,8 +2330,8 @@ int ice_phy_cfg_rx_offset_e822(struct ice_hw *hw, u8 port)
  * @phy_time: on return, the 64bit PHY timer value
  * @phc_time: on return, the lower 64bits of PHC time
  *
- * Issue a READ_TIME timer command to simultaneously capture the PHY and PHC
- * timer values.
+ * Issue a ICE_PTP_READ_TIME timer command to simultaneously capture the PHY
+ * and PHC timer values.
  */
 static int
 ice_read_phy_and_phc_time_e822(struct ice_hw *hw, u8 port, u64 *phy_time,
@@ -2210,15 +2344,15 @@ ice_read_phy_and_phc_time_e822(struct ice_hw *hw, u8 port, u64 *phy_time,
 
 	tmr_idx = ice_get_ptp_src_clock_index(hw);
 
-	/* Prepare the PHC timer for a READ_TIME capture command */
-	ice_ptp_src_cmd(hw, READ_TIME);
+	/* Prepare the PHC timer for a ICE_PTP_READ_TIME capture command */
+	ice_ptp_src_cmd(hw, ICE_PTP_READ_TIME);
 
-	/* Prepare the PHY timer for a READ_TIME capture command */
-	err = ice_ptp_one_port_cmd(hw, port, READ_TIME);
+	/* Prepare the PHY timer for a ICE_PTP_READ_TIME capture command */
+	err = ice_ptp_one_port_cmd(hw, port, ICE_PTP_READ_TIME);
 	if (err)
 		return err;
 
-	/* Issue the sync to start the READ_TIME capture */
+	/* Issue the sync to start the ICE_PTP_READ_TIME capture */
 	ice_ptp_exec_tmr_cmd(hw);
 
 	/* Read the captured PHC time from the shadow time registers */
@@ -2252,10 +2386,11 @@ ice_read_phy_and_phc_time_e822(struct ice_hw *hw, u8 port, u64 *phy_time,
  * @port: the PHY port to synchronize
  *
  * Perform an adjustment to ensure that the PHY and PHC timers are in sync.
- * This is done by issuing a READ_TIME command which triggers a simultaneous
- * read of the PHY timer and PHC timer. Then we use the difference to
- * calculate an appropriate 2s complement addition to add to the PHY timer in
- * order to ensure it reads the same value as the primary PHC timer.
+ * This is done by issuing a ICE_PTP_READ_TIME command which triggers a
+ * simultaneous read of the PHY timer and PHC timer. Then we use the
+ * difference to calculate an appropriate 2s complement addition to add
+ * to the PHY timer in order to ensure it reads the same value as the
+ * primary PHC timer.
  */
 static int ice_sync_phy_timer_e822(struct ice_hw *hw, u8 port)
 {
@@ -2285,7 +2420,7 @@ static int ice_sync_phy_timer_e822(struct ice_hw *hw, u8 port)
 	if (err)
 		goto err_unlock;
 
-	err = ice_ptp_one_port_cmd(hw, port, ADJ_TIME);
+	err = ice_ptp_one_port_cmd(hw, port, ICE_PTP_ADJ_TIME);
 	if (err)
 		goto err_unlock;
 
@@ -2408,7 +2543,7 @@ int ice_start_phy_timer_e822(struct ice_hw *hw, u8 port)
 	if (err)
 		return err;
 
-	err = ice_ptp_one_port_cmd(hw, port, INIT_INCVAL);
+	err = ice_ptp_one_port_cmd(hw, port, ICE_PTP_INIT_INCVAL);
 	if (err)
 		return err;
 
@@ -2436,7 +2571,7 @@ int ice_start_phy_timer_e822(struct ice_hw *hw, u8 port)
 	if (err)
 		return err;
 
-	err = ice_ptp_one_port_cmd(hw, port, INIT_INCVAL);
+	err = ice_ptp_one_port_cmd(hw, port, ICE_PTP_INIT_INCVAL);
 	if (err)
 		return err;
 
@@ -2685,28 +2820,39 @@ ice_read_phy_tstamp_e810(struct ice_hw *hw, u8 lport, u8 idx, u64 *tstamp)
  * @lport: the lport to read from
  * @idx: the timestamp index to reset
  *
- * Clear a timestamp, resetting its valid bit, from the timestamp block of the
- * external PHY on the E810 device.
+ * Read the timestamp and then forcibly overwrite its value to clear the valid
+ * bit from the timestamp block of the external PHY on the E810 device.
+ *
+ * This function should only be called on an idx whose bit is set according to
+ * ice_get_phy_tx_tstamp_ready().
  */
 static int ice_clear_phy_tstamp_e810(struct ice_hw *hw, u8 lport, u8 idx)
 {
 	u32 lo_addr, hi_addr;
+	u64 unused_tstamp;
 	int err;
 
+	err = ice_read_phy_tstamp_e810(hw, lport, idx, &unused_tstamp);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to read the timestamp register for lport %u, idx %u, err %d\n",
+			  lport, idx, err);
+		return err;
+	}
+
 	lo_addr = TS_EXT(LOW_TX_MEMORY_BANK_START, lport, idx);
 	hi_addr = TS_EXT(HIGH_TX_MEMORY_BANK_START, lport, idx);
 
 	err = ice_write_phy_reg_e810(hw, lo_addr, 0);
 	if (err) {
-		ice_debug(hw, ICE_DBG_PTP, "Failed to clear low PTP timestamp register, err %d\n",
-			  err);
+		ice_debug(hw, ICE_DBG_PTP, "Failed to clear low PTP timestamp register for lport %u, idx %u, err %d\n",
+			  lport, idx, err);
 		return err;
 	}
 
 	err = ice_write_phy_reg_e810(hw, hi_addr, 0);
 	if (err) {
-		ice_debug(hw, ICE_DBG_PTP, "Failed to clear high PTP timestamp register, err %d\n",
-			  err);
+		ice_debug(hw, ICE_DBG_PTP, "Failed to clear high PTP timestamp register for lport %u, idx %u, err %d\n",
+			  lport, idx, err);
 		return err;
 	}
 
@@ -2757,7 +2903,7 @@ static int ice_ptp_init_phc_e810(struct ice_hw *hw)
  *
  * Program the PHY port ETH_GLTSYN_SHTIME registers in preparation setting the
  * initial clock time. The time will not actually be programmed until the
- * driver issues an INIT_TIME command.
+ * driver issues an ICE_PTP_INIT_TIME command.
  *
  * The time value is the upper 32 bits of the PHY timer, usually in units of
  * nominal nanoseconds.
@@ -2792,7 +2938,7 @@ static int ice_ptp_prep_phy_time_e810(struct ice_hw *hw, u32 time)
  *
  * Prepare the PHY port for an atomic adjustment by programming the PHY
  * ETH_GLTSYN_SHADJ_L and ETH_GLTSYN_SHADJ_H registers. The actual adjustment
- * is completed by issuing an ADJ_TIME sync command.
+ * is completed by issuing an ICE_PTP_ADJ_TIME sync command.
  *
  * The adjustment value only contains the portion used for the upper 32bits of
  * the PHY timer, usually in units of nominal nanoseconds. Negative
@@ -2832,7 +2978,7 @@ static int ice_ptp_prep_phy_adj_e810(struct ice_hw *hw, s32 adj)
  *
  * Prepare the PHY port for a new increment value by programming the PHY
  * ETH_GLTSYN_SHADJ_L and ETH_GLTSYN_SHADJ_H registers. The actual change is
- * completed by issuing an INIT_INCVAL command.
+ * completed by issuing an ICE_PTP_INIT_INCVAL command.
  */
 static int ice_ptp_prep_phy_incval_e810(struct ice_hw *hw, u64 incval)
 {
@@ -2875,19 +3021,19 @@ static int ice_ptp_port_cmd_e810(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd)
 	int err;
 
 	switch (cmd) {
-	case INIT_TIME:
+	case ICE_PTP_INIT_TIME:
 		cmd_val = GLTSYN_CMD_INIT_TIME;
 		break;
-	case INIT_INCVAL:
+	case ICE_PTP_INIT_INCVAL:
 		cmd_val = GLTSYN_CMD_INIT_INCVAL;
 		break;
-	case ADJ_TIME:
+	case ICE_PTP_ADJ_TIME:
 		cmd_val = GLTSYN_CMD_ADJ_TIME;
 		break;
-	case READ_TIME:
+	case ICE_PTP_READ_TIME:
 		cmd_val = GLTSYN_CMD_READ_TIME;
 		break;
-	case ADJ_TIME_AT_TIME:
+	case ICE_PTP_ADJ_TIME_AT_TIME:
 		cmd_val = GLTSYN_CMD_ADJ_INIT_TIME;
 		break;
 	case ICE_PTP_NOP:
@@ -3150,6 +3296,21 @@ void ice_ptp_unlock(struct ice_hw *hw)
 }
 
 /**
+ * ice_ptp_init_phy_model - Initialize hw->phy_model based on device type
+ * @hw: pointer to the HW structure
+ *
+ * Determine the PHY model for the device, and initialize hw->phy_model
+ * for use by other functions.
+ */
+void ice_ptp_init_phy_model(struct ice_hw *hw)
+{
+	if (ice_is_e810(hw))
+		hw->phy_model = ICE_PHY_E810;
+	else
+		hw->phy_model = ICE_PHY_E822;
+}
+
+/**
  * ice_ptp_tmr_cmd - Prepare and trigger a timer sync command
  * @hw: pointer to HW struct
  * @cmd: the command to issue
@@ -3167,10 +3328,17 @@ static int ice_ptp_tmr_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd)
 	ice_ptp_src_cmd(hw, cmd);
 
 	/* Next, prepare the ports */
-	if (ice_is_e810(hw))
+	switch (hw->phy_model) {
+	case ICE_PHY_E810:
 		err = ice_ptp_port_cmd_e810(hw, cmd);
-	else
+		break;
+	case ICE_PHY_E822:
 		err = ice_ptp_port_cmd_e822(hw, cmd);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+	}
+
 	if (err) {
 		ice_debug(hw, ICE_DBG_PTP, "Failed to prepare PHY ports for timer command %u, err %d\n",
 			  cmd, err);
@@ -3212,14 +3380,21 @@ int ice_ptp_init_time(struct ice_hw *hw, u64 time)
 
 	/* PHY timers */
 	/* Fill Rx and Tx ports and send msg to PHY */
-	if (ice_is_e810(hw))
+	switch (hw->phy_model) {
+	case ICE_PHY_E810:
 		err = ice_ptp_prep_phy_time_e810(hw, time & 0xFFFFFFFF);
-	else
+		break;
+	case ICE_PHY_E822:
 		err = ice_ptp_prep_phy_time_e822(hw, time & 0xFFFFFFFF);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+	}
+
 	if (err)
 		return err;
 
-	return ice_ptp_tmr_cmd(hw, INIT_TIME);
+	return ice_ptp_tmr_cmd(hw, ICE_PTP_INIT_TIME);
 }
 
 /**
@@ -3232,8 +3407,8 @@ int ice_ptp_init_time(struct ice_hw *hw, u64 time)
  *
  * 1) Write the increment value to the source timer shadow registers
  * 2) Write the increment value to the PHY timer shadow registers
- * 3) Issue an INIT_INCVAL timer command to synchronously switch both the
- *    source and port timers to the new increment value at the next clock
+ * 3) Issue an ICE_PTP_INIT_INCVAL timer command to synchronously switch both
+ *    the source and port timers to the new increment value at the next clock
  *    cycle.
  */
 int ice_ptp_write_incval(struct ice_hw *hw, u64 incval)
@@ -3247,14 +3422,21 @@ int ice_ptp_write_incval(struct ice_hw *hw, u64 incval)
 	wr32(hw, GLTSYN_SHADJ_L(tmr_idx), lower_32_bits(incval));
 	wr32(hw, GLTSYN_SHADJ_H(tmr_idx), upper_32_bits(incval));
 
-	if (ice_is_e810(hw))
+	switch (hw->phy_model) {
+	case ICE_PHY_E810:
 		err = ice_ptp_prep_phy_incval_e810(hw, incval);
-	else
+		break;
+	case ICE_PHY_E822:
 		err = ice_ptp_prep_phy_incval_e822(hw, incval);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+	}
+
 	if (err)
 		return err;
 
-	return ice_ptp_tmr_cmd(hw, INIT_INCVAL);
+	return ice_ptp_tmr_cmd(hw, ICE_PTP_INIT_INCVAL);
 }
 
 /**
@@ -3288,8 +3470,8 @@ int ice_ptp_write_incval_locked(struct ice_hw *hw, u64 incval)
  *
  * 1) Write the adjustment to the source timer shadow registers
  * 2) Write the adjustment to the PHY timer shadow registers
- * 3) Issue an ADJ_TIME timer command to synchronously apply the adjustment to
- *    both the source and port timers at the next clock cycle.
+ * 3) Issue an ICE_PTP_ADJ_TIME timer command to synchronously apply the
+ *    adjustment to both the source and port timers at the next clock cycle.
  */
 int ice_ptp_adj_clock(struct ice_hw *hw, s32 adj)
 {
@@ -3299,21 +3481,28 @@ int ice_ptp_adj_clock(struct ice_hw *hw, s32 adj)
 	tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
 
 	/* Write the desired clock adjustment into the GLTSYN_SHADJ register.
-	 * For an ADJ_TIME command, this set of registers represents the value
-	 * to add to the clock time. It supports subtraction by interpreting
-	 * the value as a 2's complement integer.
+	 * For an ICE_PTP_ADJ_TIME command, this set of registers represents
+	 * the value to add to the clock time. It supports subtraction by
+	 * interpreting the value as a 2's complement integer.
 	 */
 	wr32(hw, GLTSYN_SHADJ_L(tmr_idx), 0);
 	wr32(hw, GLTSYN_SHADJ_H(tmr_idx), adj);
 
-	if (ice_is_e810(hw))
+	switch (hw->phy_model) {
+	case ICE_PHY_E810:
 		err = ice_ptp_prep_phy_adj_e810(hw, adj);
-	else
+		break;
+	case ICE_PHY_E822:
 		err = ice_ptp_prep_phy_adj_e822(hw, adj);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+	}
+
 	if (err)
 		return err;
 
-	return ice_ptp_tmr_cmd(hw, ADJ_TIME);
+	return ice_ptp_tmr_cmd(hw, ICE_PTP_ADJ_TIME);
 }
 
 /**
@@ -3329,10 +3518,14 @@ int ice_ptp_adj_clock(struct ice_hw *hw, s32 adj)
  */
 int ice_read_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx, u64 *tstamp)
 {
-	if (ice_is_e810(hw))
+	switch (hw->phy_model) {
+	case ICE_PHY_E810:
 		return ice_read_phy_tstamp_e810(hw, block, idx, tstamp);
-	else
+	case ICE_PHY_E822:
 		return ice_read_phy_tstamp_e822(hw, block, idx, tstamp);
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
 /**
@@ -3341,16 +3534,110 @@ int ice_read_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx, u64 *tstamp)
  * @block: the block to read from
  * @idx: the timestamp index to reset
  *
- * Clear a timestamp, resetting its valid bit, from the timestamp block. For
- * E822 devices, the block is the quad to clear from. For E810 devices, the
- * block is the logical port to clear from.
+ * Clear a timestamp from the timestamp block, discarding its value without
+ * returning it. This resets the memory status bit for the timestamp index
+ * allowing it to be reused for another timestamp in the future.
+ *
+ * For E822 devices, the block number is the PHY quad to clear from. For E810
+ * devices, the block number is the logical port to clear from.
+ *
+ * This function must only be called on a timestamp index whose valid bit is
+ * set according to ice_get_phy_tx_tstamp_ready().
  */
 int ice_clear_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx)
 {
-	if (ice_is_e810(hw))
+	switch (hw->phy_model) {
+	case ICE_PHY_E810:
 		return ice_clear_phy_tstamp_e810(hw, block, idx);
-	else
+	case ICE_PHY_E822:
 		return ice_clear_phy_tstamp_e822(hw, block, idx);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+/**
+ * ice_is_phy_rclk_present - check recovered clk presence
+ * @hw: pointer to the hw struct
+ *
+ * Check if the PHY Recovered Clock device is present in the netlist
+ * Return:
+ * * true - device found in netlist
+ * * false - device not found
+ */
+bool ice_is_phy_rclk_present(struct ice_hw *hw)
+{
+	if (ice_find_netlist_node(hw, ICE_AQC_LINK_TOPO_NODE_TYPE_CLK_CTRL,
+				  ICE_AQC_GET_LINK_TOPO_NODE_NR_C827, NULL) &&
+	    ice_find_netlist_node(hw, ICE_AQC_LINK_TOPO_NODE_TYPE_CLK_CTRL,
+				  ICE_AQC_GET_LINK_TOPO_NODE_NR_E822_PHY, NULL))
+		return false;
+
+	return true;
+}
+
+/**
+ * ice_is_clock_mux_present_e810t
+ * @hw: pointer to the hw struct
+ *
+ * Check if the Clock Multiplexer device is present in the netlist
+ * Return:
+ * * true - device found in netlist
+ * * false - device not found
+ */
+bool ice_is_clock_mux_present_e810t(struct ice_hw *hw)
+{
+	if (ice_find_netlist_node(hw, ICE_AQC_LINK_TOPO_NODE_TYPE_CLK_MUX,
+				  ICE_AQC_GET_LINK_TOPO_NODE_NR_GEN_CLK_MUX,
+				  NULL))
+		return false;
+
+	return true;
+}
+
+/**
+ * ice_get_pf_c827_idx - find and return the C827 index for the current pf
+ * @hw: pointer to the hw struct
+ * @idx: index of the found C827 PHY
+ * Return:
+ * * 0 - success
+ * * negative - failure
+ */
+int ice_get_pf_c827_idx(struct ice_hw *hw, u8 *idx)
+{
+	struct ice_aqc_get_link_topo cmd;
+	u8 node_part_number;
+	u16 node_handle;
+	int status;
+	u8 ctx;
+
+	if (hw->mac_type != ICE_MAC_E810)
+		return -ENODEV;
+
+	if (hw->device_id != ICE_DEV_ID_E810C_QSFP) {
+		*idx = C827_0;
+		return 0;
+	}
+
+	memset(&cmd, 0, sizeof(cmd));
+
+	ctx = ICE_AQC_LINK_TOPO_NODE_TYPE_PHY << ICE_AQC_LINK_TOPO_NODE_TYPE_S;
+	ctx |= ICE_AQC_LINK_TOPO_NODE_CTX_PORT << ICE_AQC_LINK_TOPO_NODE_CTX_S;
+	cmd.addr.topo_params.node_type_ctx = ctx;
+
+	status = ice_aq_get_netlist_node(hw, &cmd, &node_part_number,
+					 &node_handle);
+	if (status || node_part_number != ICE_AQC_GET_LINK_TOPO_NODE_NR_C827)
+		return -ENOENT;
+
+	if (node_handle == E810C_QSFP_C827_0_HANDLE)
+		*idx = C827_0;
+	else if (node_handle == E810C_QSFP_C827_1_HANDLE)
+		*idx = C827_1;
+	else
+		return -EIO;
+
+	return 0;
 }
 
 /**
@@ -3359,10 +3646,14 @@ int ice_clear_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx)
  */
 void ice_ptp_reset_ts_memory(struct ice_hw *hw)
 {
-	if (ice_is_e810(hw))
+	switch (hw->phy_model) {
+	case ICE_PHY_E822:
+		ice_ptp_reset_ts_memory_e822(hw);
+		break;
+	case ICE_PHY_E810:
+	default:
 		return;
-
-	ice_ptp_reset_ts_memory_e822(hw);
+	}
 }
 
 /**
@@ -3381,10 +3672,14 @@ int ice_ptp_init_phc(struct ice_hw *hw)
 	/* Clear event err indications for auxiliary pins */
 	(void)rd32(hw, GLTSYN_STAT(src_idx));
 
-	if (ice_is_e810(hw))
+	switch (hw->phy_model) {
+	case ICE_PHY_E810:
 		return ice_ptp_init_phc_e810(hw);
-	else
+	case ICE_PHY_E822:
 		return ice_ptp_init_phc_e822(hw);
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
 /**
@@ -3400,10 +3695,335 @@ int ice_ptp_init_phc(struct ice_hw *hw)
  */
 int ice_get_phy_tx_tstamp_ready(struct ice_hw *hw, u8 block, u64 *tstamp_ready)
 {
-	if (ice_is_e810(hw))
+	switch (hw->phy_model) {
+	case ICE_PHY_E810:
 		return ice_get_phy_tx_tstamp_ready_e810(hw, block,
 							tstamp_ready);
-	else
+	case ICE_PHY_E822:
 		return ice_get_phy_tx_tstamp_ready_e822(hw, block,
 							tstamp_ready);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+/**
+ * ice_is_cgu_present - check for CGU presence
+ * @hw: pointer to the hw struct
+ *
+ * Check if the Clock Generation Unit (CGU) device is present in the netlist
+ * Return:
+ * * true - cgu is present
+ * * false - cgu is not present
+ */
+bool ice_is_cgu_present(struct ice_hw *hw)
+{
+	if (!ice_find_netlist_node(hw, ICE_AQC_LINK_TOPO_NODE_TYPE_CLK_CTRL,
+				   ICE_AQC_GET_LINK_TOPO_NODE_NR_ZL30632_80032,
+				   NULL)) {
+		hw->cgu_part_number = ICE_AQC_GET_LINK_TOPO_NODE_NR_ZL30632_80032;
+		return true;
+	} else if (!ice_find_netlist_node(hw,
+					  ICE_AQC_LINK_TOPO_NODE_TYPE_CLK_CTRL,
+					  ICE_AQC_GET_LINK_TOPO_NODE_NR_SI5383_5384,
+					  NULL)) {
+		hw->cgu_part_number = ICE_AQC_GET_LINK_TOPO_NODE_NR_SI5383_5384;
+		return true;
+	}
+
+	return false;
+}
+
+/**
+ * ice_cgu_get_pin_desc_e823 - get pin description array
+ * @hw: pointer to the hw struct
+ * @input: if request is done against input or output pin
+ * @size: number of inputs/outputs
+ *
+ * Return: pointer to pin description array associated to given hw.
+ */
+static const struct ice_cgu_pin_desc *
+ice_cgu_get_pin_desc_e823(struct ice_hw *hw, bool input, int *size)
+{
+	static const struct ice_cgu_pin_desc *t;
+
+	if (hw->cgu_part_number ==
+	    ICE_AQC_GET_LINK_TOPO_NODE_NR_ZL30632_80032) {
+		if (input) {
+			t = ice_e823_zl_cgu_inputs;
+			*size = ARRAY_SIZE(ice_e823_zl_cgu_inputs);
+		} else {
+			t = ice_e823_zl_cgu_outputs;
+			*size = ARRAY_SIZE(ice_e823_zl_cgu_outputs);
+		}
+	} else if (hw->cgu_part_number ==
+		   ICE_AQC_GET_LINK_TOPO_NODE_NR_SI5383_5384) {
+		if (input) {
+			t = ice_e823_si_cgu_inputs;
+			*size = ARRAY_SIZE(ice_e823_si_cgu_inputs);
+		} else {
+			t = ice_e823_si_cgu_outputs;
+			*size = ARRAY_SIZE(ice_e823_si_cgu_outputs);
+		}
+	} else {
+		t = NULL;
+		*size = 0;
+	}
+
+	return t;
+}
+
+/**
+ * ice_cgu_get_pin_desc - get pin description array
+ * @hw: pointer to the hw struct
+ * @input: if request is done against input or output pins
+ * @size: size of array returned by function
+ *
+ * Return: pointer to pin description array associated to given hw.
+ */
+static const struct ice_cgu_pin_desc *
+ice_cgu_get_pin_desc(struct ice_hw *hw, bool input, int *size)
+{
+	const struct ice_cgu_pin_desc *t = NULL;
+
+	switch (hw->device_id) {
+	case ICE_DEV_ID_E810C_SFP:
+		if (input) {
+			t = ice_e810t_sfp_cgu_inputs;
+			*size = ARRAY_SIZE(ice_e810t_sfp_cgu_inputs);
+		} else {
+			t = ice_e810t_sfp_cgu_outputs;
+			*size = ARRAY_SIZE(ice_e810t_sfp_cgu_outputs);
+		}
+		break;
+	case ICE_DEV_ID_E810C_QSFP:
+		if (input) {
+			t = ice_e810t_qsfp_cgu_inputs;
+			*size = ARRAY_SIZE(ice_e810t_qsfp_cgu_inputs);
+		} else {
+			t = ice_e810t_qsfp_cgu_outputs;
+			*size = ARRAY_SIZE(ice_e810t_qsfp_cgu_outputs);
+		}
+		break;
+	case ICE_DEV_ID_E823L_10G_BASE_T:
+	case ICE_DEV_ID_E823L_1GBE:
+	case ICE_DEV_ID_E823L_BACKPLANE:
+	case ICE_DEV_ID_E823L_QSFP:
+	case ICE_DEV_ID_E823L_SFP:
+	case ICE_DEV_ID_E823C_10G_BASE_T:
+	case ICE_DEV_ID_E823C_BACKPLANE:
+	case ICE_DEV_ID_E823C_QSFP:
+	case ICE_DEV_ID_E823C_SFP:
+	case ICE_DEV_ID_E823C_SGMII:
+		t = ice_cgu_get_pin_desc_e823(hw, input, size);
+		break;
+	default:
+		break;
+	}
+
+	return t;
+}
+
+/**
+ * ice_cgu_get_pin_type - get pin's type
+ * @hw: pointer to the hw struct
+ * @pin: pin index
+ * @input: if request is done against input or output pin
+ *
+ * Return: type of a pin.
+ */
+enum dpll_pin_type ice_cgu_get_pin_type(struct ice_hw *hw, u8 pin, bool input)
+{
+	const struct ice_cgu_pin_desc *t;
+	int t_size;
+
+	t = ice_cgu_get_pin_desc(hw, input, &t_size);
+
+	if (!t)
+		return 0;
+
+	if (pin >= t_size)
+		return 0;
+
+	return t[pin].type;
+}
+
+/**
+ * ice_cgu_get_pin_freq_supp - get pin's supported frequency
+ * @hw: pointer to the hw struct
+ * @pin: pin index
+ * @input: if request is done against input or output pin
+ * @num: output number of supported frequencies
+ *
+ * Get frequency supported number and array of supported frequencies.
+ *
+ * Return: array of supported frequencies for given pin.
+ */
+struct dpll_pin_frequency *
+ice_cgu_get_pin_freq_supp(struct ice_hw *hw, u8 pin, bool input, u8 *num)
+{
+	const struct ice_cgu_pin_desc *t;
+	int t_size;
+
+	*num = 0;
+	t = ice_cgu_get_pin_desc(hw, input, &t_size);
+	if (!t)
+		return NULL;
+	if (pin >= t_size)
+		return NULL;
+	*num = t[pin].freq_supp_num;
+
+	return t[pin].freq_supp;
+}
+
+/**
+ * ice_cgu_get_pin_name - get pin's name
+ * @hw: pointer to the hw struct
+ * @pin: pin index
+ * @input: if request is done against input or output pin
+ *
+ * Return:
+ * * null terminated char array with name
+ * * NULL in case of failure
+ */
+const char *ice_cgu_get_pin_name(struct ice_hw *hw, u8 pin, bool input)
+{
+	const struct ice_cgu_pin_desc *t;
+	int t_size;
+
+	t = ice_cgu_get_pin_desc(hw, input, &t_size);
+
+	if (!t)
+		return NULL;
+
+	if (pin >= t_size)
+		return NULL;
+
+	return t[pin].name;
+}
+
+/**
+ * ice_get_cgu_state - get the state of the DPLL
+ * @hw: pointer to the hw struct
+ * @dpll_idx: Index of internal DPLL unit
+ * @last_dpll_state: last known state of DPLL
+ * @pin: pointer to a buffer for returning currently active pin
+ * @ref_state: reference clock state
+ * @eec_mode: eec mode of the DPLL
+ * @phase_offset: pointer to a buffer for returning phase offset
+ * @dpll_state: state of the DPLL (output)
+ *
+ * This function will read the state of the DPLL(dpll_idx). Non-null
+ * 'pin', 'ref_state', 'eec_mode' and 'phase_offset' parameters are used to
+ * retrieve currently active pin, state, mode and phase_offset respectively.
+ *
+ * Return: state of the DPLL
+ */
+int ice_get_cgu_state(struct ice_hw *hw, u8 dpll_idx,
+		      enum dpll_lock_status last_dpll_state, u8 *pin,
+		      u8 *ref_state, u8 *eec_mode, s64 *phase_offset,
+		      enum dpll_lock_status *dpll_state)
+{
+	u8 hw_ref_state, hw_dpll_state, hw_eec_mode, hw_config;
+	s64 hw_phase_offset;
+	int status;
+
+	status = ice_aq_get_cgu_dpll_status(hw, dpll_idx, &hw_ref_state,
+					    &hw_dpll_state, &hw_config,
+					    &hw_phase_offset, &hw_eec_mode);
+	if (status)
+		return status;
+
+	if (pin)
+		/* current ref pin in dpll_state_refsel_status_X register */
+		*pin = hw_config & ICE_AQC_GET_CGU_DPLL_CONFIG_CLK_REF_SEL;
+	if (phase_offset)
+		*phase_offset = hw_phase_offset;
+	if (ref_state)
+		*ref_state = hw_ref_state;
+	if (eec_mode)
+		*eec_mode = hw_eec_mode;
+	if (!dpll_state)
+		return 0;
+
+	/* According to ZL DPLL documentation, once state reach LOCKED_HO_ACQ
+	 * it would never return to FREERUN. This aligns to ITU-T G.781
+	 * Recommendation. We cannot report HOLDOVER as HO memory is cleared
+	 * while switching to another reference.
+	 * Only for situations where previous state was either: "LOCKED without
+	 * HO_ACQ" or "HOLDOVER" we actually back to FREERUN.
+	 */
+	if (hw_dpll_state & ICE_AQC_GET_CGU_DPLL_STATUS_STATE_LOCK) {
+		if (hw_dpll_state & ICE_AQC_GET_CGU_DPLL_STATUS_STATE_HO_READY)
+			*dpll_state = DPLL_LOCK_STATUS_LOCKED_HO_ACQ;
+		else
+			*dpll_state = DPLL_LOCK_STATUS_LOCKED;
+	} else if (last_dpll_state == DPLL_LOCK_STATUS_LOCKED_HO_ACQ ||
+		   last_dpll_state == DPLL_LOCK_STATUS_HOLDOVER) {
+		*dpll_state = DPLL_LOCK_STATUS_HOLDOVER;
+	} else {
+		*dpll_state = DPLL_LOCK_STATUS_UNLOCKED;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_get_cgu_rclk_pin_info - get info on available recovered clock pins
+ * @hw: pointer to the hw struct
+ * @base_idx: returns index of first recovered clock pin on device
+ * @pin_num: returns number of recovered clock pins available on device
+ *
+ * Based on hw provide caller info about recovery clock pins available on the
+ * board.
+ *
+ * Return:
+ * * 0 - success, information is valid
+ * * negative - failure, information is not valid
+ */
+int ice_get_cgu_rclk_pin_info(struct ice_hw *hw, u8 *base_idx, u8 *pin_num)
+{
+	u8 phy_idx;
+	int ret;
+
+	switch (hw->device_id) {
+	case ICE_DEV_ID_E810C_SFP:
+	case ICE_DEV_ID_E810C_QSFP:
+
+		ret = ice_get_pf_c827_idx(hw, &phy_idx);
+		if (ret)
+			return ret;
+		*base_idx = E810T_CGU_INPUT_C827(phy_idx, ICE_RCLKA_PIN);
+		*pin_num = ICE_E810_RCLK_PINS_NUM;
+		ret = 0;
+		break;
+	case ICE_DEV_ID_E823L_10G_BASE_T:
+	case ICE_DEV_ID_E823L_1GBE:
+	case ICE_DEV_ID_E823L_BACKPLANE:
+	case ICE_DEV_ID_E823L_QSFP:
+	case ICE_DEV_ID_E823L_SFP:
+	case ICE_DEV_ID_E823C_10G_BASE_T:
+	case ICE_DEV_ID_E823C_BACKPLANE:
+	case ICE_DEV_ID_E823C_QSFP:
+	case ICE_DEV_ID_E823C_SFP:
+	case ICE_DEV_ID_E823C_SGMII:
+		*pin_num = ICE_E822_RCLK_PINS_NUM;
+		ret = 0;
+		if (hw->cgu_part_number ==
+		    ICE_AQC_GET_LINK_TOPO_NODE_NR_ZL30632_80032)
+			*base_idx = ZL_REF1P;
+		else if (hw->cgu_part_number ==
+			 ICE_AQC_GET_LINK_TOPO_NODE_NR_SI5383_5384)
+			*base_idx = SI_REF1P;
+		else
+			ret = -ENODEV;
+
+		break;
+	default:
+		ret = -ENODEV;
+		break;
+	}
+
+	return ret;
 }
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h
index 9aa10b0426fd..6f277e7b06b9 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h
+++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h
@@ -3,13 +3,14 @@
 
 #ifndef _ICE_PTP_HW_H_
 #define _ICE_PTP_HW_H_
+#include <linux/dpll.h>
 
 enum ice_ptp_tmr_cmd {
-	INIT_TIME,
-	INIT_INCVAL,
-	ADJ_TIME,
-	ADJ_TIME_AT_TIME,
-	READ_TIME,
+	ICE_PTP_INIT_TIME,
+	ICE_PTP_INIT_INCVAL,
+	ICE_PTP_ADJ_TIME,
+	ICE_PTP_ADJ_TIME_AT_TIME,
+	ICE_PTP_READ_TIME,
 	ICE_PTP_NOP,
 };
 
@@ -110,6 +111,77 @@ struct ice_cgu_pll_params_e822 {
 	u32 post_pll_div;
 };
 
+#define E810C_QSFP_C827_0_HANDLE	2
+#define E810C_QSFP_C827_1_HANDLE	3
+enum ice_e810_c827_idx {
+	C827_0,
+	C827_1
+};
+
+enum ice_phy_rclk_pins {
+	ICE_RCLKA_PIN = 0,		/* SCL pin */
+	ICE_RCLKB_PIN,			/* SDA pin */
+};
+
+#define ICE_E810_RCLK_PINS_NUM		(ICE_RCLKB_PIN + 1)
+#define ICE_E822_RCLK_PINS_NUM		(ICE_RCLKA_PIN + 1)
+#define E810T_CGU_INPUT_C827(_phy, _pin) ((_phy) * ICE_E810_RCLK_PINS_NUM + \
+					  (_pin) + ZL_REF1P)
+
+enum ice_zl_cgu_in_pins {
+	ZL_REF0P = 0,
+	ZL_REF0N,
+	ZL_REF1P,
+	ZL_REF1N,
+	ZL_REF2P,
+	ZL_REF2N,
+	ZL_REF3P,
+	ZL_REF3N,
+	ZL_REF4P,
+	ZL_REF4N,
+	NUM_ZL_CGU_INPUT_PINS
+};
+
+enum ice_zl_cgu_out_pins {
+	ZL_OUT0 = 0,
+	ZL_OUT1,
+	ZL_OUT2,
+	ZL_OUT3,
+	ZL_OUT4,
+	ZL_OUT5,
+	ZL_OUT6,
+	NUM_ZL_CGU_OUTPUT_PINS
+};
+
+enum ice_si_cgu_in_pins {
+	SI_REF0P = 0,
+	SI_REF0N,
+	SI_REF1P,
+	SI_REF1N,
+	SI_REF2P,
+	SI_REF2N,
+	SI_REF3,
+	SI_REF4,
+	NUM_SI_CGU_INPUT_PINS
+};
+
+enum ice_si_cgu_out_pins {
+	SI_OUT0 = 0,
+	SI_OUT1,
+	SI_OUT2,
+	SI_OUT3,
+	SI_OUT4,
+	NUM_SI_CGU_OUTPUT_PINS
+};
+
+struct ice_cgu_pin_desc {
+	char *name;
+	u8 index;
+	enum dpll_pin_type type;
+	u32 freq_supp_num;
+	struct dpll_pin_frequency *freq_supp;
+};
+
 extern const struct
 ice_cgu_pll_params_e822 e822_cgu_params[NUM_ICE_TIME_REF_FREQ];
 
@@ -131,6 +203,7 @@ extern const struct ice_vernier_info_e822 e822_vernier[NUM_ICE_PTP_LNK_SPD];
 u8 ice_get_ptp_src_clock_index(struct ice_hw *hw);
 bool ice_ptp_lock(struct ice_hw *hw);
 void ice_ptp_unlock(struct ice_hw *hw);
+void ice_ptp_src_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd);
 int ice_ptp_init_time(struct ice_hw *hw, u64 time);
 int ice_ptp_write_incval(struct ice_hw *hw, u64 incval);
 int ice_ptp_write_incval_locked(struct ice_hw *hw, u64 incval);
@@ -197,6 +270,22 @@ int ice_ptp_init_phy_e810(struct ice_hw *hw);
 int ice_read_sma_ctrl_e810t(struct ice_hw *hw, u8 *data);
 int ice_write_sma_ctrl_e810t(struct ice_hw *hw, u8 data);
 int ice_read_pca9575_reg_e810t(struct ice_hw *hw, u8 offset, u8 *data);
+bool ice_is_pca9575_present(struct ice_hw *hw);
+bool ice_is_phy_rclk_present(struct ice_hw *hw);
+bool ice_is_clock_mux_present_e810t(struct ice_hw *hw);
+int ice_get_pf_c827_idx(struct ice_hw *hw, u8 *idx);
+bool ice_is_cgu_present(struct ice_hw *hw);
+enum dpll_pin_type ice_cgu_get_pin_type(struct ice_hw *hw, u8 pin, bool input);
+struct dpll_pin_frequency *
+ice_cgu_get_pin_freq_supp(struct ice_hw *hw, u8 pin, bool input, u8 *num);
+const char *ice_cgu_get_pin_name(struct ice_hw *hw, u8 pin, bool input);
+int ice_get_cgu_state(struct ice_hw *hw, u8 dpll_idx,
+		      enum dpll_lock_status last_dpll_state, u8 *pin,
+		      u8 *ref_state, u8 *eec_mode, s64 *phase_offset,
+		      enum dpll_lock_status *dpll_state);
+int ice_get_cgu_rclk_pin_info(struct ice_hw *hw, u8 *base_idx, u8 *pin_num);
+
+void ice_ptp_init_phy_model(struct ice_hw *hw);
 
 #define PFTSYN_SEM_BYTES	4
 
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 5e353b0cbe6f..bb5d8b681bc2 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -822,6 +822,13 @@ struct ice_mbx_data {
 	u16 async_watermark_val;
 };
 
+/* PHY model */
+enum ice_phy_model {
+	ICE_PHY_UNSUP = -1,
+	ICE_PHY_E810  = 1,
+	ICE_PHY_E822,
+};
+
 /* Port hardware description */
 struct ice_hw {
 	u8 __iomem *hw_addr;
@@ -843,6 +850,7 @@ struct ice_hw {
 	u8 revision_id;
 
 	u8 pf_id;		/* device profile info */
+	enum ice_phy_model phy_model;
 
 	u16 max_burst_size;	/* driver sets this value */
 
@@ -901,13 +909,13 @@ struct ice_hw {
 	/* INTRL granularity in 1 us */
 	u8 intrl_gran;
 
-#define ICE_PHY_PER_NAC		1
-#define ICE_MAX_QUAD		2
-#define ICE_NUM_QUAD_TYPE	2
-#define ICE_PORTS_PER_QUAD	4
-#define ICE_PHY_0_LAST_QUAD	1
-#define ICE_PORTS_PER_PHY	8
-#define ICE_NUM_EXTERNAL_PORTS		ICE_PORTS_PER_PHY
+#define ICE_PHY_PER_NAC_E822		1
+#define ICE_MAX_QUAD			2
+#define ICE_QUADS_PER_PHY_E822		2
+#define ICE_PORTS_PER_PHY_E822		8
+#define ICE_PORTS_PER_QUAD		4
+#define ICE_PORTS_PER_PHY_E810		4
+#define ICE_NUM_EXTERNAL_PORTS		(ICE_MAX_QUAD * ICE_PORTS_PER_QUAD)
 
 	/* Active package version (currently active) */
 	struct ice_pkg_ver active_pkg_ver;
@@ -965,6 +973,7 @@ struct ice_hw {
 	DECLARE_BITMAP(hw_ptype, ICE_FLOW_PTYPE_MAX);
 	u8 dvm_ena;
 	u16 io_expander_handle;
+	u8 cgu_part_number;
 };
 
 /* Statistics collected by each port, VSI, VEB, and S-channel */
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.h b/drivers/net/ethernet/intel/ice/ice_vf_lib.h
index 48fea6fa0362..31a082e8a827 100644
--- a/drivers/net/ethernet/intel/ice/ice_vf_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.h
@@ -123,6 +123,9 @@ struct ice_vf {
 	u8 num_req_qs;			/* num of queue pairs requested by VF */
 	u16 num_mac;
 	u16 num_vf_qs;			/* num of queue configured per VF */
+	u8 vlan_strip_ena;		/* Outer and Inner VLAN strip enable */
+#define ICE_INNER_VLAN_STRIP_ENA	BIT(0)
+#define ICE_OUTER_VLAN_STRIP_ENA	BIT(1)
 	struct ice_mdd_vf_events mdd_rx_events;
 	struct ice_mdd_vf_events mdd_tx_events;
 	DECLARE_BITMAP(opcodes_allowlist, VIRTCHNL_OP_MAX);
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
index db97353efd06..01e88b6e43a1 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
@@ -486,6 +486,9 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg)
 	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_REQ_QUEUES)
 		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_REQ_QUEUES;
 
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_CRC)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_CRC;
+
 	if (vf->driver_caps & VIRTCHNL_VF_CAP_ADV_LINK_SPEED)
 		vfres->vf_cap_flags |= VIRTCHNL_VF_CAP_ADV_LINK_SPEED;
 
@@ -1621,6 +1624,15 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
 	}
 
 	for (i = 0; i < qci->num_queue_pairs; i++) {
+		if (!qci->qpair[i].rxq.crc_disable)
+			continue;
+
+		if (!(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_CRC) ||
+		    vf->vlan_strip_ena)
+			goto error_param;
+	}
+
+	for (i = 0; i < qci->num_queue_pairs; i++) {
 		qpi = &qci->qpair[i];
 		if (qpi->txq.vsi_id != qci->vsi_id ||
 		    qpi->rxq.vsi_id != qci->vsi_id ||
@@ -1666,6 +1678,13 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
 			vsi->rx_rings[i]->dma = qpi->rxq.dma_ring_addr;
 			vsi->rx_rings[i]->count = qpi->rxq.ring_len;
 
+			if (qpi->rxq.crc_disable)
+				vsi->rx_rings[q_idx]->flags |=
+					ICE_RX_FLAGS_CRC_STRIP_DIS;
+			else
+				vsi->rx_rings[q_idx]->flags &=
+					~ICE_RX_FLAGS_CRC_STRIP_DIS;
+
 			if (qpi->rxq.databuffer_size != 0 &&
 			    (qpi->rxq.databuffer_size > ((16 * 1024) - 128) ||
 			     qpi->rxq.databuffer_size < 1024))
@@ -2411,6 +2430,21 @@ static int ice_vc_remove_vlan_msg(struct ice_vf *vf, u8 *msg)
 }
 
 /**
+ * ice_vsi_is_rxq_crc_strip_dis - check if Rx queue CRC strip is disabled or not
+ * @vsi: pointer to the VF VSI info
+ */
+static bool ice_vsi_is_rxq_crc_strip_dis(struct ice_vsi *vsi)
+{
+	unsigned int i;
+
+	ice_for_each_alloc_rxq(vsi, i)
+		if (vsi->rx_rings[i]->flags & ICE_RX_FLAGS_CRC_STRIP_DIS)
+			return true;
+
+	return false;
+}
+
+/**
  * ice_vc_ena_vlan_stripping
  * @vf: pointer to the VF info
  *
@@ -2439,6 +2473,8 @@ static int ice_vc_ena_vlan_stripping(struct ice_vf *vf)
 
 	if (vsi->inner_vlan_ops.ena_stripping(vsi, ETH_P_8021Q))
 		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+	else
+		vf->vlan_strip_ena |= ICE_INNER_VLAN_STRIP_ENA;
 
 error_param:
 	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_VLAN_STRIPPING,
@@ -2474,6 +2510,8 @@ static int ice_vc_dis_vlan_stripping(struct ice_vf *vf)
 
 	if (vsi->inner_vlan_ops.dis_stripping(vsi))
 		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+	else
+		vf->vlan_strip_ena &= ~ICE_INNER_VLAN_STRIP_ENA;
 
 error_param:
 	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING,
@@ -2651,6 +2689,8 @@ static int ice_vf_init_vlan_stripping(struct ice_vf *vf)
 {
 	struct ice_vsi *vsi = ice_get_vf_vsi(vf);
 
+	vf->vlan_strip_ena = 0;
+
 	if (!vsi)
 		return -EINVAL;
 
@@ -2660,10 +2700,16 @@ static int ice_vf_init_vlan_stripping(struct ice_vf *vf)
 	if (ice_vf_is_port_vlan_ena(vf) && !ice_is_dvm_ena(&vsi->back->hw))
 		return 0;
 
-	if (ice_vf_vlan_offload_ena(vf->driver_caps))
-		return vsi->inner_vlan_ops.ena_stripping(vsi, ETH_P_8021Q);
-	else
-		return vsi->inner_vlan_ops.dis_stripping(vsi);
+	if (ice_vf_vlan_offload_ena(vf->driver_caps)) {
+		int err;
+
+		err = vsi->inner_vlan_ops.ena_stripping(vsi, ETH_P_8021Q);
+		if (!err)
+			vf->vlan_strip_ena |= ICE_INNER_VLAN_STRIP_ENA;
+		return err;
+	}
+
+	return vsi->inner_vlan_ops.dis_stripping(vsi);
 }
 
 static u16 ice_vc_get_max_vlan_fltrs(struct ice_vf *vf)
@@ -3437,6 +3483,11 @@ static int ice_vc_ena_vlan_stripping_v2_msg(struct ice_vf *vf, u8 *msg)
 		goto out;
 	}
 
+	if (ice_vsi_is_rxq_crc_strip_dis(vsi)) {
+		v_ret = VIRTCHNL_STATUS_ERR_NOT_SUPPORTED;
+		goto out;
+	}
+
 	ethertype_setting = strip_msg->outer_ethertype_setting;
 	if (ethertype_setting) {
 		if (ice_vc_ena_vlan_offload(vsi,
@@ -3457,6 +3508,8 @@ static int ice_vc_ena_vlan_stripping_v2_msg(struct ice_vf *vf, u8 *msg)
 			 * enabled, is extracted in L2TAG1.
 			 */
 			ice_vsi_update_l2tsel(vsi, l2tsel);
+
+			vf->vlan_strip_ena |= ICE_OUTER_VLAN_STRIP_ENA;
 		}
 	}
 
@@ -3468,6 +3521,9 @@ static int ice_vc_ena_vlan_stripping_v2_msg(struct ice_vf *vf, u8 *msg)
 		goto out;
 	}
 
+	if (ethertype_setting)
+		vf->vlan_strip_ena |= ICE_INNER_VLAN_STRIP_ENA;
+
 out:
 	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2,
 				     v_ret, NULL, 0);
@@ -3529,6 +3585,8 @@ static int ice_vc_dis_vlan_stripping_v2_msg(struct ice_vf *vf, u8 *msg)
 			 * in L2TAG1.
 			 */
 			ice_vsi_update_l2tsel(vsi, l2tsel);
+
+			vf->vlan_strip_ena &= ~ICE_OUTER_VLAN_STRIP_ENA;
 		}
 	}
 
@@ -3538,6 +3596,9 @@ static int ice_vc_dis_vlan_stripping_v2_msg(struct ice_vf *vf, u8 *msg)
 		goto out;
 	}
 
+	if (ethertype_setting)
+		vf->vlan_strip_ena &= ~ICE_INNER_VLAN_STRIP_ENA;
+
 out:
 	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2,
 				     v_ret, NULL, 0);
diff --git a/drivers/net/ethernet/intel/idpf/Makefile b/drivers/net/ethernet/intel/idpf/Makefile
new file mode 100644
index 000000000000..6844ead2f3ac
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/Makefile
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) 2023 Intel Corporation
+
+# Makefile for Intel(R) Infrastructure Data Path Function Linux Driver
+
+obj-$(CONFIG_IDPF) += idpf.o
+
+idpf-y := \
+	idpf_controlq.o		\
+	idpf_controlq_setup.o	\
+	idpf_dev.o		\
+	idpf_ethtool.o		\
+	idpf_lib.o		\
+	idpf_main.o		\
+	idpf_singleq_txrx.o	\
+	idpf_txrx.o		\
+	idpf_virtchnl.o 	\
+	idpf_vf_dev.o
diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h
new file mode 100644
index 000000000000..bee73353b56a
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/idpf.h
@@ -0,0 +1,968 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2023 Intel Corporation */
+
+#ifndef _IDPF_H_
+#define _IDPF_H_
+
+/* Forward declaration */
+struct idpf_adapter;
+struct idpf_vport;
+struct idpf_vport_max_q;
+
+#include <net/pkt_sched.h>
+#include <linux/aer.h>
+#include <linux/etherdevice.h>
+#include <linux/pci.h>
+#include <linux/bitfield.h>
+#include <linux/sctp.h>
+#include <linux/ethtool.h>
+#include <net/gro.h>
+#include <linux/dim.h>
+
+#include "virtchnl2.h"
+#include "idpf_lan_txrx.h"
+#include "idpf_txrx.h"
+#include "idpf_controlq.h"
+
+#define GETMAXVAL(num_bits)		GENMASK((num_bits) - 1, 0)
+
+#define IDPF_NO_FREE_SLOT		0xffff
+
+/* Default Mailbox settings */
+#define IDPF_NUM_FILTERS_PER_MSG	20
+#define IDPF_NUM_DFLT_MBX_Q		2	/* includes both TX and RX */
+#define IDPF_DFLT_MBX_Q_LEN		64
+#define IDPF_DFLT_MBX_ID		-1
+/* maximum number of times to try before resetting mailbox */
+#define IDPF_MB_MAX_ERR			20
+#define IDPF_NUM_CHUNKS_PER_MSG(struct_sz, chunk_sz)	\
+	((IDPF_CTLQ_MAX_BUF_LEN - (struct_sz)) / (chunk_sz))
+#define IDPF_WAIT_FOR_EVENT_TIMEO_MIN	2000
+#define IDPF_WAIT_FOR_EVENT_TIMEO	60000
+
+#define IDPF_MAX_WAIT			500
+
+/* available message levels */
+#define IDPF_AVAIL_NETIF_M (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
+
+#define IDPF_DIM_PROFILE_SLOTS  5
+
+#define IDPF_VIRTCHNL_VERSION_MAJOR VIRTCHNL2_VERSION_MAJOR_2
+#define IDPF_VIRTCHNL_VERSION_MINOR VIRTCHNL2_VERSION_MINOR_0
+
+/**
+ * struct idpf_mac_filter
+ * @list: list member field
+ * @macaddr: MAC address
+ * @remove: filter should be removed (virtchnl)
+ * @add: filter should be added (virtchnl)
+ */
+struct idpf_mac_filter {
+	struct list_head list;
+	u8 macaddr[ETH_ALEN];
+	bool remove;
+	bool add;
+};
+
+/**
+ * enum idpf_state - State machine to handle bring up
+ * @__IDPF_STARTUP: Start the state machine
+ * @__IDPF_VER_CHECK: Negotiate virtchnl version
+ * @__IDPF_GET_CAPS: Negotiate capabilities
+ * @__IDPF_INIT_SW: Init based on given capabilities
+ * @__IDPF_STATE_LAST: Must be last, used to determine size
+ */
+enum idpf_state {
+	__IDPF_STARTUP,
+	__IDPF_VER_CHECK,
+	__IDPF_GET_CAPS,
+	__IDPF_INIT_SW,
+	__IDPF_STATE_LAST,
+};
+
+/**
+ * enum idpf_flags - Hard reset causes.
+ * @IDPF_HR_FUNC_RESET: Hard reset when TxRx timeout
+ * @IDPF_HR_DRV_LOAD: Set on driver load for a clean HW
+ * @IDPF_HR_RESET_IN_PROG: Reset in progress
+ * @IDPF_REMOVE_IN_PROG: Driver remove in progress
+ * @IDPF_MB_INTR_MODE: Mailbox in interrupt mode
+ * @IDPF_FLAGS_NBITS: Must be last
+ */
+enum idpf_flags {
+	IDPF_HR_FUNC_RESET,
+	IDPF_HR_DRV_LOAD,
+	IDPF_HR_RESET_IN_PROG,
+	IDPF_REMOVE_IN_PROG,
+	IDPF_MB_INTR_MODE,
+	IDPF_FLAGS_NBITS,
+};
+
+/**
+ * enum idpf_cap_field - Offsets into capabilities struct for specific caps
+ * @IDPF_BASE_CAPS: generic base capabilities
+ * @IDPF_CSUM_CAPS: checksum offload capabilities
+ * @IDPF_SEG_CAPS: segmentation offload capabilities
+ * @IDPF_RSS_CAPS: RSS offload capabilities
+ * @IDPF_HSPLIT_CAPS: Header split capabilities
+ * @IDPF_RSC_CAPS: RSC offload capabilities
+ * @IDPF_OTHER_CAPS: miscellaneous offloads
+ *
+ * Used when checking for a specific capability flag since different capability
+ * sets are not mutually exclusive numerically, the caller must specify which
+ * type of capability they are checking for.
+ */
+enum idpf_cap_field {
+	IDPF_BASE_CAPS		= -1,
+	IDPF_CSUM_CAPS		= offsetof(struct virtchnl2_get_capabilities,
+					   csum_caps),
+	IDPF_SEG_CAPS		= offsetof(struct virtchnl2_get_capabilities,
+					   seg_caps),
+	IDPF_RSS_CAPS		= offsetof(struct virtchnl2_get_capabilities,
+					   rss_caps),
+	IDPF_HSPLIT_CAPS	= offsetof(struct virtchnl2_get_capabilities,
+					   hsplit_caps),
+	IDPF_RSC_CAPS		= offsetof(struct virtchnl2_get_capabilities,
+					   rsc_caps),
+	IDPF_OTHER_CAPS		= offsetof(struct virtchnl2_get_capabilities,
+					   other_caps),
+};
+
+/**
+ * enum idpf_vport_state - Current vport state
+ * @__IDPF_VPORT_DOWN: Vport is down
+ * @__IDPF_VPORT_UP: Vport is up
+ * @__IDPF_VPORT_STATE_LAST: Must be last, number of states
+ */
+enum idpf_vport_state {
+	__IDPF_VPORT_DOWN,
+	__IDPF_VPORT_UP,
+	__IDPF_VPORT_STATE_LAST,
+};
+
+/**
+ * struct idpf_netdev_priv - Struct to store vport back pointer
+ * @adapter: Adapter back pointer
+ * @vport: Vport back pointer
+ * @vport_id: Vport identifier
+ * @vport_idx: Relative vport index
+ * @state: See enum idpf_vport_state
+ * @netstats: Packet and byte stats
+ * @stats_lock: Lock to protect stats update
+ */
+struct idpf_netdev_priv {
+	struct idpf_adapter *adapter;
+	struct idpf_vport *vport;
+	u32 vport_id;
+	u16 vport_idx;
+	enum idpf_vport_state state;
+	struct rtnl_link_stats64 netstats;
+	spinlock_t stats_lock;
+};
+
+/**
+ * struct idpf_reset_reg - Reset register offsets/masks
+ * @rstat: Reset status register
+ * @rstat_m: Reset status mask
+ */
+struct idpf_reset_reg {
+	void __iomem *rstat;
+	u32 rstat_m;
+};
+
+/**
+ * struct idpf_vport_max_q - Queue limits
+ * @max_rxq: Maximum number of RX queues supported
+ * @max_txq: Maixmum number of TX queues supported
+ * @max_bufq: In splitq, maximum number of buffer queues supported
+ * @max_complq: In splitq, maximum number of completion queues supported
+ */
+struct idpf_vport_max_q {
+	u16 max_rxq;
+	u16 max_txq;
+	u16 max_bufq;
+	u16 max_complq;
+};
+
+/**
+ * struct idpf_reg_ops - Device specific register operation function pointers
+ * @ctlq_reg_init: Mailbox control queue register initialization
+ * @intr_reg_init: Traffic interrupt register initialization
+ * @mb_intr_reg_init: Mailbox interrupt register initialization
+ * @reset_reg_init: Reset register initialization
+ * @trigger_reset: Trigger a reset to occur
+ */
+struct idpf_reg_ops {
+	void (*ctlq_reg_init)(struct idpf_ctlq_create_info *cq);
+	int (*intr_reg_init)(struct idpf_vport *vport);
+	void (*mb_intr_reg_init)(struct idpf_adapter *adapter);
+	void (*reset_reg_init)(struct idpf_adapter *adapter);
+	void (*trigger_reset)(struct idpf_adapter *adapter,
+			      enum idpf_flags trig_cause);
+};
+
+/**
+ * struct idpf_dev_ops - Device specific operations
+ * @reg_ops: Register operations
+ */
+struct idpf_dev_ops {
+	struct idpf_reg_ops reg_ops;
+};
+
+/* These macros allow us to generate an enum and a matching char * array of
+ * stringified enums that are always in sync. Checkpatch issues a bogus warning
+ * about this being a complex macro; but it's wrong, these are never used as a
+ * statement and instead only used to define the enum and array.
+ */
+#define IDPF_FOREACH_VPORT_VC_STATE(STATE)	\
+	STATE(IDPF_VC_CREATE_VPORT)		\
+	STATE(IDPF_VC_CREATE_VPORT_ERR)		\
+	STATE(IDPF_VC_ENA_VPORT)		\
+	STATE(IDPF_VC_ENA_VPORT_ERR)		\
+	STATE(IDPF_VC_DIS_VPORT)		\
+	STATE(IDPF_VC_DIS_VPORT_ERR)		\
+	STATE(IDPF_VC_DESTROY_VPORT)		\
+	STATE(IDPF_VC_DESTROY_VPORT_ERR)	\
+	STATE(IDPF_VC_CONFIG_TXQ)		\
+	STATE(IDPF_VC_CONFIG_TXQ_ERR)		\
+	STATE(IDPF_VC_CONFIG_RXQ)		\
+	STATE(IDPF_VC_CONFIG_RXQ_ERR)		\
+	STATE(IDPF_VC_ENA_QUEUES)		\
+	STATE(IDPF_VC_ENA_QUEUES_ERR)		\
+	STATE(IDPF_VC_DIS_QUEUES)		\
+	STATE(IDPF_VC_DIS_QUEUES_ERR)		\
+	STATE(IDPF_VC_MAP_IRQ)			\
+	STATE(IDPF_VC_MAP_IRQ_ERR)		\
+	STATE(IDPF_VC_UNMAP_IRQ)		\
+	STATE(IDPF_VC_UNMAP_IRQ_ERR)		\
+	STATE(IDPF_VC_ADD_QUEUES)		\
+	STATE(IDPF_VC_ADD_QUEUES_ERR)		\
+	STATE(IDPF_VC_DEL_QUEUES)		\
+	STATE(IDPF_VC_DEL_QUEUES_ERR)		\
+	STATE(IDPF_VC_ALLOC_VECTORS)		\
+	STATE(IDPF_VC_ALLOC_VECTORS_ERR)	\
+	STATE(IDPF_VC_DEALLOC_VECTORS)		\
+	STATE(IDPF_VC_DEALLOC_VECTORS_ERR)	\
+	STATE(IDPF_VC_SET_SRIOV_VFS)		\
+	STATE(IDPF_VC_SET_SRIOV_VFS_ERR)	\
+	STATE(IDPF_VC_GET_RSS_LUT)		\
+	STATE(IDPF_VC_GET_RSS_LUT_ERR)		\
+	STATE(IDPF_VC_SET_RSS_LUT)		\
+	STATE(IDPF_VC_SET_RSS_LUT_ERR)		\
+	STATE(IDPF_VC_GET_RSS_KEY)		\
+	STATE(IDPF_VC_GET_RSS_KEY_ERR)		\
+	STATE(IDPF_VC_SET_RSS_KEY)		\
+	STATE(IDPF_VC_SET_RSS_KEY_ERR)		\
+	STATE(IDPF_VC_GET_STATS)		\
+	STATE(IDPF_VC_GET_STATS_ERR)		\
+	STATE(IDPF_VC_ADD_MAC_ADDR)		\
+	STATE(IDPF_VC_ADD_MAC_ADDR_ERR)		\
+	STATE(IDPF_VC_DEL_MAC_ADDR)		\
+	STATE(IDPF_VC_DEL_MAC_ADDR_ERR)		\
+	STATE(IDPF_VC_GET_PTYPE_INFO)		\
+	STATE(IDPF_VC_GET_PTYPE_INFO_ERR)	\
+	STATE(IDPF_VC_LOOPBACK_STATE)		\
+	STATE(IDPF_VC_LOOPBACK_STATE_ERR)	\
+	STATE(IDPF_VC_NBITS)
+
+#define IDPF_GEN_ENUM(ENUM) ENUM,
+#define IDPF_GEN_STRING(STRING) #STRING,
+
+enum idpf_vport_vc_state {
+	IDPF_FOREACH_VPORT_VC_STATE(IDPF_GEN_ENUM)
+};
+
+extern const char * const idpf_vport_vc_state_str[];
+
+/**
+ * enum idpf_vport_reset_cause - Vport soft reset causes
+ * @IDPF_SR_Q_CHANGE: Soft reset queue change
+ * @IDPF_SR_Q_DESC_CHANGE: Soft reset descriptor change
+ * @IDPF_SR_MTU_CHANGE: Soft reset MTU change
+ * @IDPF_SR_RSC_CHANGE: Soft reset RSC change
+ */
+enum idpf_vport_reset_cause {
+	IDPF_SR_Q_CHANGE,
+	IDPF_SR_Q_DESC_CHANGE,
+	IDPF_SR_MTU_CHANGE,
+	IDPF_SR_RSC_CHANGE,
+};
+
+/**
+ * enum idpf_vport_flags - Vport flags
+ * @IDPF_VPORT_DEL_QUEUES: To send delete queues message
+ * @IDPF_VPORT_SW_MARKER: Indicate TX pipe drain software marker packets
+ *			  processing is done
+ * @IDPF_VPORT_FLAGS_NBITS: Must be last
+ */
+enum idpf_vport_flags {
+	IDPF_VPORT_DEL_QUEUES,
+	IDPF_VPORT_SW_MARKER,
+	IDPF_VPORT_FLAGS_NBITS,
+};
+
+struct idpf_port_stats {
+	struct u64_stats_sync stats_sync;
+	u64_stats_t rx_hw_csum_err;
+	u64_stats_t rx_hsplit;
+	u64_stats_t rx_hsplit_hbo;
+	u64_stats_t rx_bad_descs;
+	u64_stats_t tx_linearize;
+	u64_stats_t tx_busy;
+	u64_stats_t tx_drops;
+	u64_stats_t tx_dma_map_errs;
+	struct virtchnl2_vport_stats vport_stats;
+};
+
+/**
+ * struct idpf_vport - Handle for netdevices and queue resources
+ * @num_txq: Number of allocated TX queues
+ * @num_complq: Number of allocated completion queues
+ * @txq_desc_count: TX queue descriptor count
+ * @complq_desc_count: Completion queue descriptor count
+ * @compln_clean_budget: Work budget for completion clean
+ * @num_txq_grp: Number of TX queue groups
+ * @txq_grps: Array of TX queue groups
+ * @txq_model: Split queue or single queue queuing model
+ * @txqs: Used only in hotpath to get to the right queue very fast
+ * @crc_enable: Enable CRC insertion offload
+ * @num_rxq: Number of allocated RX queues
+ * @num_bufq: Number of allocated buffer queues
+ * @rxq_desc_count: RX queue descriptor count. *MUST* have enough descriptors
+ *		    to complete all buffer descriptors for all buffer queues in
+ *		    the worst case.
+ * @num_bufqs_per_qgrp: Buffer queues per RX queue in a given grouping
+ * @bufq_desc_count: Buffer queue descriptor count
+ * @bufq_size: Size of buffers in ring (e.g. 2K, 4K, etc)
+ * @num_rxq_grp: Number of RX queues in a group
+ * @rxq_grps: Total number of RX groups. Number of groups * number of RX per
+ *	      group will yield total number of RX queues.
+ * @rxq_model: Splitq queue or single queue queuing model
+ * @rx_ptype_lkup: Lookup table for ptypes on RX
+ * @adapter: back pointer to associated adapter
+ * @netdev: Associated net_device. Each vport should have one and only one
+ *	    associated netdev.
+ * @flags: See enum idpf_vport_flags
+ * @vport_type: Default SRIOV, SIOV, etc.
+ * @vport_id: Device given vport identifier
+ * @idx: Software index in adapter vports struct
+ * @default_vport: Use this vport if one isn't specified
+ * @base_rxd: True if the driver should use base descriptors instead of flex
+ * @num_q_vectors: Number of IRQ vectors allocated
+ * @q_vectors: Array of queue vectors
+ * @q_vector_idxs: Starting index of queue vectors
+ * @max_mtu: device given max possible MTU
+ * @default_mac_addr: device will give a default MAC to use
+ * @rx_itr_profile: RX profiles for Dynamic Interrupt Moderation
+ * @tx_itr_profile: TX profiles for Dynamic Interrupt Moderation
+ * @port_stats: per port csum, header split, and other offload stats
+ * @link_up: True if link is up
+ * @link_speed_mbps: Link speed in mbps
+ * @vc_msg: Virtchnl message buffer
+ * @vc_state: Virtchnl message state
+ * @vchnl_wq: Wait queue for virtchnl messages
+ * @sw_marker_wq: workqueue for marker packets
+ * @vc_buf_lock: Lock to protect virtchnl buffer
+ */
+struct idpf_vport {
+	u16 num_txq;
+	u16 num_complq;
+	u32 txq_desc_count;
+	u32 complq_desc_count;
+	u32 compln_clean_budget;
+	u16 num_txq_grp;
+	struct idpf_txq_group *txq_grps;
+	u32 txq_model;
+	struct idpf_queue **txqs;
+	bool crc_enable;
+
+	u16 num_rxq;
+	u16 num_bufq;
+	u32 rxq_desc_count;
+	u8 num_bufqs_per_qgrp;
+	u32 bufq_desc_count[IDPF_MAX_BUFQS_PER_RXQ_GRP];
+	u32 bufq_size[IDPF_MAX_BUFQS_PER_RXQ_GRP];
+	u16 num_rxq_grp;
+	struct idpf_rxq_group *rxq_grps;
+	u32 rxq_model;
+	struct idpf_rx_ptype_decoded rx_ptype_lkup[IDPF_RX_MAX_PTYPE];
+
+	struct idpf_adapter *adapter;
+	struct net_device *netdev;
+	DECLARE_BITMAP(flags, IDPF_VPORT_FLAGS_NBITS);
+	u16 vport_type;
+	u32 vport_id;
+	u16 idx;
+	bool default_vport;
+	bool base_rxd;
+
+	u16 num_q_vectors;
+	struct idpf_q_vector *q_vectors;
+	u16 *q_vector_idxs;
+	u16 max_mtu;
+	u8 default_mac_addr[ETH_ALEN];
+	u16 rx_itr_profile[IDPF_DIM_PROFILE_SLOTS];
+	u16 tx_itr_profile[IDPF_DIM_PROFILE_SLOTS];
+	struct idpf_port_stats port_stats;
+
+	bool link_up;
+	u32 link_speed_mbps;
+
+	char vc_msg[IDPF_CTLQ_MAX_BUF_LEN];
+	DECLARE_BITMAP(vc_state, IDPF_VC_NBITS);
+
+	wait_queue_head_t vchnl_wq;
+	wait_queue_head_t sw_marker_wq;
+	struct mutex vc_buf_lock;
+};
+
+/**
+ * enum idpf_user_flags
+ * @__IDPF_PROMISC_UC: Unicast promiscuous mode
+ * @__IDPF_PROMISC_MC: Multicast promiscuous mode
+ * @__IDPF_USER_FLAGS_NBITS: Must be last
+ */
+enum idpf_user_flags {
+	__IDPF_PROMISC_UC = 32,
+	__IDPF_PROMISC_MC,
+
+	__IDPF_USER_FLAGS_NBITS,
+};
+
+/**
+ * struct idpf_rss_data - Associated RSS data
+ * @rss_key_size: Size of RSS hash key
+ * @rss_key: RSS hash key
+ * @rss_lut_size: Size of RSS lookup table
+ * @rss_lut: RSS lookup table
+ * @cached_lut: Used to restore previously init RSS lut
+ */
+struct idpf_rss_data {
+	u16 rss_key_size;
+	u8 *rss_key;
+	u16 rss_lut_size;
+	u32 *rss_lut;
+	u32 *cached_lut;
+};
+
+/**
+ * struct idpf_vport_user_config_data - User defined configuration values for
+ *					each vport.
+ * @rss_data: See struct idpf_rss_data
+ * @num_req_tx_qs: Number of user requested TX queues through ethtool
+ * @num_req_rx_qs: Number of user requested RX queues through ethtool
+ * @num_req_txq_desc: Number of user requested TX queue descriptors through
+ *		      ethtool
+ * @num_req_rxq_desc: Number of user requested RX queue descriptors through
+ *		      ethtool
+ * @user_flags: User toggled config flags
+ * @mac_filter_list: List of MAC filters
+ *
+ * Used to restore configuration after a reset as the vport will get wiped.
+ */
+struct idpf_vport_user_config_data {
+	struct idpf_rss_data rss_data;
+	u16 num_req_tx_qs;
+	u16 num_req_rx_qs;
+	u32 num_req_txq_desc;
+	u32 num_req_rxq_desc;
+	DECLARE_BITMAP(user_flags, __IDPF_USER_FLAGS_NBITS);
+	struct list_head mac_filter_list;
+};
+
+/**
+ * enum idpf_vport_config_flags - Vport config flags
+ * @IDPF_VPORT_REG_NETDEV: Register netdev
+ * @IDPF_VPORT_UP_REQUESTED: Set if interface up is requested on core reset
+ * @IDPF_VPORT_ADD_MAC_REQ: Asynchronous add ether address in flight
+ * @IDPF_VPORT_DEL_MAC_REQ: Asynchronous delete ether address in flight
+ * @IDPF_VPORT_CONFIG_FLAGS_NBITS: Must be last
+ */
+enum idpf_vport_config_flags {
+	IDPF_VPORT_REG_NETDEV,
+	IDPF_VPORT_UP_REQUESTED,
+	IDPF_VPORT_ADD_MAC_REQ,
+	IDPF_VPORT_DEL_MAC_REQ,
+	IDPF_VPORT_CONFIG_FLAGS_NBITS,
+};
+
+/**
+ * struct idpf_avail_queue_info
+ * @avail_rxq: Available RX queues
+ * @avail_txq: Available TX queues
+ * @avail_bufq: Available buffer queues
+ * @avail_complq: Available completion queues
+ *
+ * Maintain total queues available after allocating max queues to each vport.
+ */
+struct idpf_avail_queue_info {
+	u16 avail_rxq;
+	u16 avail_txq;
+	u16 avail_bufq;
+	u16 avail_complq;
+};
+
+/**
+ * struct idpf_vector_info - Utility structure to pass function arguments as a
+ *			     structure
+ * @num_req_vecs: Vectors required based on the number of queues updated by the
+ *		  user via ethtool
+ * @num_curr_vecs: Current number of vectors, must be >= @num_req_vecs
+ * @index: Relative starting index for vectors
+ * @default_vport: Vectors are for default vport
+ */
+struct idpf_vector_info {
+	u16 num_req_vecs;
+	u16 num_curr_vecs;
+	u16 index;
+	bool default_vport;
+};
+
+/**
+ * struct idpf_vector_lifo - Stack to maintain vector indexes used for vector
+ *			     distribution algorithm
+ * @top: Points to stack top i.e. next available vector index
+ * @base: Always points to start of the free pool
+ * @size: Total size of the vector stack
+ * @vec_idx: Array to store all the vector indexes
+ *
+ * Vector stack maintains all the relative vector indexes at the *adapter*
+ * level. This stack is divided into 2 parts, first one is called as 'default
+ * pool' and other one is called 'free pool'.  Vector distribution algorithm
+ * gives priority to default vports in a way that at least IDPF_MIN_Q_VEC
+ * vectors are allocated per default vport and the relative vector indexes for
+ * those are maintained in default pool. Free pool contains all the unallocated
+ * vector indexes which can be allocated on-demand basis. Mailbox vector index
+ * is maintained in the default pool of the stack.
+ */
+struct idpf_vector_lifo {
+	u16 top;
+	u16 base;
+	u16 size;
+	u16 *vec_idx;
+};
+
+/**
+ * struct idpf_vport_config - Vport configuration data
+ * @user_config: see struct idpf_vport_user_config_data
+ * @max_q: Maximum possible queues
+ * @req_qs_chunks: Queue chunk data for requested queues
+ * @mac_filter_list_lock: Lock to protect mac filters
+ * @flags: See enum idpf_vport_config_flags
+ */
+struct idpf_vport_config {
+	struct idpf_vport_user_config_data user_config;
+	struct idpf_vport_max_q max_q;
+	void *req_qs_chunks;
+	spinlock_t mac_filter_list_lock;
+	DECLARE_BITMAP(flags, IDPF_VPORT_CONFIG_FLAGS_NBITS);
+};
+
+/**
+ * struct idpf_adapter - Device data struct generated on probe
+ * @pdev: PCI device struct given on probe
+ * @virt_ver_maj: Virtchnl version major
+ * @virt_ver_min: Virtchnl version minor
+ * @msg_enable: Debug message level enabled
+ * @mb_wait_count: Number of times mailbox was attempted initialization
+ * @state: Init state machine
+ * @flags: See enum idpf_flags
+ * @reset_reg: See struct idpf_reset_reg
+ * @hw: Device access data
+ * @num_req_msix: Requested number of MSIX vectors
+ * @num_avail_msix: Available number of MSIX vectors
+ * @num_msix_entries: Number of entries in MSIX table
+ * @msix_entries: MSIX table
+ * @req_vec_chunks: Requested vector chunk data
+ * @mb_vector: Mailbox vector data
+ * @vector_stack: Stack to store the msix vector indexes
+ * @irq_mb_handler: Handler for hard interrupt for mailbox
+ * @tx_timeout_count: Number of TX timeouts that have occurred
+ * @avail_queues: Device given queue limits
+ * @vports: Array to store vports created by the driver
+ * @netdevs: Associated Vport netdevs
+ * @vport_params_reqd: Vport params requested
+ * @vport_params_recvd: Vport params received
+ * @vport_ids: Array of device given vport identifiers
+ * @vport_config: Vport config parameters
+ * @max_vports: Maximum vports that can be allocated
+ * @num_alloc_vports: Current number of vports allocated
+ * @next_vport: Next free slot in pf->vport[] - 0-based!
+ * @init_task: Initialization task
+ * @init_wq: Workqueue for initialization task
+ * @serv_task: Periodically recurring maintenance task
+ * @serv_wq: Workqueue for service task
+ * @mbx_task: Task to handle mailbox interrupts
+ * @mbx_wq: Workqueue for mailbox responses
+ * @vc_event_task: Task to handle out of band virtchnl event notifications
+ * @vc_event_wq: Workqueue for virtchnl events
+ * @stats_task: Periodic statistics retrieval task
+ * @stats_wq: Workqueue for statistics task
+ * @caps: Negotiated capabilities with device
+ * @vchnl_wq: Wait queue for virtchnl messages
+ * @vc_state: Virtchnl message state
+ * @vc_msg: Virtchnl message buffer
+ * @dev_ops: See idpf_dev_ops
+ * @num_vfs: Number of allocated VFs through sysfs. PF does not directly talk
+ *	     to VFs but is used to initialize them
+ * @crc_enable: Enable CRC insertion offload
+ * @req_tx_splitq: TX split or single queue model to request
+ * @req_rx_splitq: RX split or single queue model to request
+ * @vport_ctrl_lock: Lock to protect the vport control flow
+ * @vector_lock: Lock to protect vector distribution
+ * @queue_lock: Lock to protect queue distribution
+ * @vc_buf_lock: Lock to protect virtchnl buffer
+ */
+struct idpf_adapter {
+	struct pci_dev *pdev;
+	u32 virt_ver_maj;
+	u32 virt_ver_min;
+
+	u32 msg_enable;
+	u32 mb_wait_count;
+	enum idpf_state state;
+	DECLARE_BITMAP(flags, IDPF_FLAGS_NBITS);
+	struct idpf_reset_reg reset_reg;
+	struct idpf_hw hw;
+	u16 num_req_msix;
+	u16 num_avail_msix;
+	u16 num_msix_entries;
+	struct msix_entry *msix_entries;
+	struct virtchnl2_alloc_vectors *req_vec_chunks;
+	struct idpf_q_vector mb_vector;
+	struct idpf_vector_lifo vector_stack;
+	irqreturn_t (*irq_mb_handler)(int irq, void *data);
+
+	u32 tx_timeout_count;
+	struct idpf_avail_queue_info avail_queues;
+	struct idpf_vport **vports;
+	struct net_device **netdevs;
+	struct virtchnl2_create_vport **vport_params_reqd;
+	struct virtchnl2_create_vport **vport_params_recvd;
+	u32 *vport_ids;
+
+	struct idpf_vport_config **vport_config;
+	u16 max_vports;
+	u16 num_alloc_vports;
+	u16 next_vport;
+
+	struct delayed_work init_task;
+	struct workqueue_struct *init_wq;
+	struct delayed_work serv_task;
+	struct workqueue_struct *serv_wq;
+	struct delayed_work mbx_task;
+	struct workqueue_struct *mbx_wq;
+	struct delayed_work vc_event_task;
+	struct workqueue_struct *vc_event_wq;
+	struct delayed_work stats_task;
+	struct workqueue_struct *stats_wq;
+	struct virtchnl2_get_capabilities caps;
+
+	wait_queue_head_t vchnl_wq;
+	DECLARE_BITMAP(vc_state, IDPF_VC_NBITS);
+	char vc_msg[IDPF_CTLQ_MAX_BUF_LEN];
+	struct idpf_dev_ops dev_ops;
+	int num_vfs;
+	bool crc_enable;
+	bool req_tx_splitq;
+	bool req_rx_splitq;
+
+	struct mutex vport_ctrl_lock;
+	struct mutex vector_lock;
+	struct mutex queue_lock;
+	struct mutex vc_buf_lock;
+};
+
+/**
+ * idpf_is_queue_model_split - check if queue model is split
+ * @q_model: queue model single or split
+ *
+ * Returns true if queue model is split else false
+ */
+static inline int idpf_is_queue_model_split(u16 q_model)
+{
+	return q_model == VIRTCHNL2_QUEUE_MODEL_SPLIT;
+}
+
+#define idpf_is_cap_ena(adapter, field, flag) \
+	idpf_is_capability_ena(adapter, false, field, flag)
+#define idpf_is_cap_ena_all(adapter, field, flag) \
+	idpf_is_capability_ena(adapter, true, field, flag)
+
+bool idpf_is_capability_ena(struct idpf_adapter *adapter, bool all,
+			    enum idpf_cap_field field, u64 flag);
+
+#define IDPF_CAP_RSS (\
+	VIRTCHNL2_CAP_RSS_IPV4_TCP	|\
+	VIRTCHNL2_CAP_RSS_IPV4_TCP	|\
+	VIRTCHNL2_CAP_RSS_IPV4_UDP	|\
+	VIRTCHNL2_CAP_RSS_IPV4_SCTP	|\
+	VIRTCHNL2_CAP_RSS_IPV4_OTHER	|\
+	VIRTCHNL2_CAP_RSS_IPV6_TCP	|\
+	VIRTCHNL2_CAP_RSS_IPV6_TCP	|\
+	VIRTCHNL2_CAP_RSS_IPV6_UDP	|\
+	VIRTCHNL2_CAP_RSS_IPV6_SCTP	|\
+	VIRTCHNL2_CAP_RSS_IPV6_OTHER)
+
+#define IDPF_CAP_RSC (\
+	VIRTCHNL2_CAP_RSC_IPV4_TCP	|\
+	VIRTCHNL2_CAP_RSC_IPV6_TCP)
+
+#define IDPF_CAP_HSPLIT	(\
+	VIRTCHNL2_CAP_RX_HSPLIT_AT_L4V4	|\
+	VIRTCHNL2_CAP_RX_HSPLIT_AT_L4V6)
+
+#define IDPF_CAP_RX_CSUM_L4V4 (\
+	VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_TCP	|\
+	VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_UDP)
+
+#define IDPF_CAP_RX_CSUM_L4V6 (\
+	VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_TCP	|\
+	VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_UDP)
+
+#define IDPF_CAP_RX_CSUM (\
+	VIRTCHNL2_CAP_RX_CSUM_L3_IPV4		|\
+	VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_TCP	|\
+	VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_UDP	|\
+	VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_TCP	|\
+	VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_UDP)
+
+#define IDPF_CAP_SCTP_CSUM (\
+	VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_SCTP	|\
+	VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_SCTP	|\
+	VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_SCTP	|\
+	VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_SCTP)
+
+#define IDPF_CAP_TUNNEL_TX_CSUM (\
+	VIRTCHNL2_CAP_TX_CSUM_L3_SINGLE_TUNNEL	|\
+	VIRTCHNL2_CAP_TX_CSUM_L4_SINGLE_TUNNEL)
+
+/**
+ * idpf_get_reserved_vecs - Get reserved vectors
+ * @adapter: private data struct
+ */
+static inline u16 idpf_get_reserved_vecs(struct idpf_adapter *adapter)
+{
+	return le16_to_cpu(adapter->caps.num_allocated_vectors);
+}
+
+/**
+ * idpf_get_default_vports - Get default number of vports
+ * @adapter: private data struct
+ */
+static inline u16 idpf_get_default_vports(struct idpf_adapter *adapter)
+{
+	return le16_to_cpu(adapter->caps.default_num_vports);
+}
+
+/**
+ * idpf_get_max_vports - Get max number of vports
+ * @adapter: private data struct
+ */
+static inline u16 idpf_get_max_vports(struct idpf_adapter *adapter)
+{
+	return le16_to_cpu(adapter->caps.max_vports);
+}
+
+/**
+ * idpf_get_max_tx_bufs - Get max scatter-gather buffers supported by the device
+ * @adapter: private data struct
+ */
+static inline unsigned int idpf_get_max_tx_bufs(struct idpf_adapter *adapter)
+{
+	return adapter->caps.max_sg_bufs_per_tx_pkt;
+}
+
+/**
+ * idpf_get_min_tx_pkt_len - Get min packet length supported by the device
+ * @adapter: private data struct
+ */
+static inline u8 idpf_get_min_tx_pkt_len(struct idpf_adapter *adapter)
+{
+	u8 pkt_len = adapter->caps.min_sso_packet_len;
+
+	return pkt_len ? pkt_len : IDPF_TX_MIN_PKT_LEN;
+}
+
+/**
+ * idpf_get_reg_addr - Get BAR0 register address
+ * @adapter: private data struct
+ * @reg_offset: register offset value
+ *
+ * Based on the register offset, return the actual BAR0 register address
+ */
+static inline void __iomem *idpf_get_reg_addr(struct idpf_adapter *adapter,
+					      resource_size_t reg_offset)
+{
+	return (void __iomem *)(adapter->hw.hw_addr + reg_offset);
+}
+
+/**
+ * idpf_is_reset_detected - check if we were reset at some point
+ * @adapter: driver specific private structure
+ *
+ * Returns true if we are either in reset currently or were previously reset.
+ */
+static inline bool idpf_is_reset_detected(struct idpf_adapter *adapter)
+{
+	if (!adapter->hw.arq)
+		return true;
+
+	return !(readl(idpf_get_reg_addr(adapter, adapter->hw.arq->reg.len)) &
+		 adapter->hw.arq->reg.len_mask);
+}
+
+/**
+ * idpf_is_reset_in_prog - check if reset is in progress
+ * @adapter: driver specific private structure
+ *
+ * Returns true if hard reset is in progress, false otherwise
+ */
+static inline bool idpf_is_reset_in_prog(struct idpf_adapter *adapter)
+{
+	return (test_bit(IDPF_HR_RESET_IN_PROG, adapter->flags) ||
+		test_bit(IDPF_HR_FUNC_RESET, adapter->flags) ||
+		test_bit(IDPF_HR_DRV_LOAD, adapter->flags));
+}
+
+/**
+ * idpf_netdev_to_vport - get a vport handle from a netdev
+ * @netdev: network interface device structure
+ */
+static inline struct idpf_vport *idpf_netdev_to_vport(struct net_device *netdev)
+{
+	struct idpf_netdev_priv *np = netdev_priv(netdev);
+
+	return np->vport;
+}
+
+/**
+ * idpf_netdev_to_adapter - Get adapter handle from a netdev
+ * @netdev: Network interface device structure
+ */
+static inline struct idpf_adapter *idpf_netdev_to_adapter(struct net_device *netdev)
+{
+	struct idpf_netdev_priv *np = netdev_priv(netdev);
+
+	return np->adapter;
+}
+
+/**
+ * idpf_is_feature_ena - Determine if a particular feature is enabled
+ * @vport: Vport to check
+ * @feature: Netdev flag to check
+ *
+ * Returns true or false if a particular feature is enabled.
+ */
+static inline bool idpf_is_feature_ena(const struct idpf_vport *vport,
+				       netdev_features_t feature)
+{
+	return vport->netdev->features & feature;
+}
+
+/**
+ * idpf_get_max_tx_hdr_size -- get the size of tx header
+ * @adapter: Driver specific private structure
+ */
+static inline u16 idpf_get_max_tx_hdr_size(struct idpf_adapter *adapter)
+{
+	return le16_to_cpu(adapter->caps.max_tx_hdr_size);
+}
+
+/**
+ * idpf_vport_ctrl_lock - Acquire the vport control lock
+ * @netdev: Network interface device structure
+ *
+ * This lock should be used by non-datapath code to protect against vport
+ * destruction.
+ */
+static inline void idpf_vport_ctrl_lock(struct net_device *netdev)
+{
+	struct idpf_netdev_priv *np = netdev_priv(netdev);
+
+	mutex_lock(&np->adapter->vport_ctrl_lock);
+}
+
+/**
+ * idpf_vport_ctrl_unlock - Release the vport control lock
+ * @netdev: Network interface device structure
+ */
+static inline void idpf_vport_ctrl_unlock(struct net_device *netdev)
+{
+	struct idpf_netdev_priv *np = netdev_priv(netdev);
+
+	mutex_unlock(&np->adapter->vport_ctrl_lock);
+}
+
+void idpf_statistics_task(struct work_struct *work);
+void idpf_init_task(struct work_struct *work);
+void idpf_service_task(struct work_struct *work);
+void idpf_mbx_task(struct work_struct *work);
+void idpf_vc_event_task(struct work_struct *work);
+void idpf_dev_ops_init(struct idpf_adapter *adapter);
+void idpf_vf_dev_ops_init(struct idpf_adapter *adapter);
+int idpf_vport_adjust_qs(struct idpf_vport *vport);
+int idpf_init_dflt_mbx(struct idpf_adapter *adapter);
+void idpf_deinit_dflt_mbx(struct idpf_adapter *adapter);
+int idpf_vc_core_init(struct idpf_adapter *adapter);
+void idpf_vc_core_deinit(struct idpf_adapter *adapter);
+int idpf_intr_req(struct idpf_adapter *adapter);
+void idpf_intr_rel(struct idpf_adapter *adapter);
+int idpf_get_reg_intr_vecs(struct idpf_vport *vport,
+			   struct idpf_vec_regs *reg_vals);
+u16 idpf_get_max_tx_hdr_size(struct idpf_adapter *adapter);
+int idpf_send_delete_queues_msg(struct idpf_vport *vport);
+int idpf_send_add_queues_msg(const struct idpf_vport *vport, u16 num_tx_q,
+			     u16 num_complq, u16 num_rx_q, u16 num_rx_bufq);
+int idpf_initiate_soft_reset(struct idpf_vport *vport,
+			     enum idpf_vport_reset_cause reset_cause);
+int idpf_send_enable_vport_msg(struct idpf_vport *vport);
+int idpf_send_disable_vport_msg(struct idpf_vport *vport);
+int idpf_send_destroy_vport_msg(struct idpf_vport *vport);
+int idpf_send_get_rx_ptype_msg(struct idpf_vport *vport);
+int idpf_send_ena_dis_loopback_msg(struct idpf_vport *vport);
+int idpf_send_get_set_rss_key_msg(struct idpf_vport *vport, bool get);
+int idpf_send_get_set_rss_lut_msg(struct idpf_vport *vport, bool get);
+int idpf_send_dealloc_vectors_msg(struct idpf_adapter *adapter);
+int idpf_send_alloc_vectors_msg(struct idpf_adapter *adapter, u16 num_vectors);
+void idpf_deinit_task(struct idpf_adapter *adapter);
+int idpf_req_rel_vector_indexes(struct idpf_adapter *adapter,
+				u16 *q_vector_idxs,
+				struct idpf_vector_info *vec_info);
+int idpf_vport_alloc_vec_indexes(struct idpf_vport *vport);
+int idpf_send_get_stats_msg(struct idpf_vport *vport);
+int idpf_get_vec_ids(struct idpf_adapter *adapter,
+		     u16 *vecids, int num_vecids,
+		     struct virtchnl2_vector_chunks *chunks);
+int idpf_recv_mb_msg(struct idpf_adapter *adapter, u32 op,
+		     void *msg, int msg_size);
+int idpf_send_mb_msg(struct idpf_adapter *adapter, u32 op,
+		     u16 msg_size, u8 *msg);
+void idpf_set_ethtool_ops(struct net_device *netdev);
+int idpf_vport_alloc_max_qs(struct idpf_adapter *adapter,
+			    struct idpf_vport_max_q *max_q);
+void idpf_vport_dealloc_max_qs(struct idpf_adapter *adapter,
+			       struct idpf_vport_max_q *max_q);
+int idpf_add_del_mac_filters(struct idpf_vport *vport,
+			     struct idpf_netdev_priv *np,
+			     bool add, bool async);
+int idpf_set_promiscuous(struct idpf_adapter *adapter,
+			 struct idpf_vport_user_config_data *config_data,
+			 u32 vport_id);
+int idpf_send_disable_queues_msg(struct idpf_vport *vport);
+void idpf_vport_init(struct idpf_vport *vport, struct idpf_vport_max_q *max_q);
+u32 idpf_get_vport_id(struct idpf_vport *vport);
+int idpf_vport_queue_ids_init(struct idpf_vport *vport);
+int idpf_queue_reg_init(struct idpf_vport *vport);
+int idpf_send_config_queues_msg(struct idpf_vport *vport);
+int idpf_send_enable_queues_msg(struct idpf_vport *vport);
+int idpf_send_create_vport_msg(struct idpf_adapter *adapter,
+			       struct idpf_vport_max_q *max_q);
+int idpf_check_supported_desc_ids(struct idpf_vport *vport);
+void idpf_vport_intr_write_itr(struct idpf_q_vector *q_vector,
+			       u16 itr, bool tx);
+int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map);
+int idpf_send_set_sriov_vfs_msg(struct idpf_adapter *adapter, u16 num_vfs);
+int idpf_sriov_configure(struct pci_dev *pdev, int num_vfs);
+
+#endif /* !_IDPF_H_ */
diff --git a/drivers/net/ethernet/intel/idpf/idpf_controlq.c b/drivers/net/ethernet/intel/idpf/idpf_controlq.c
new file mode 100644
index 000000000000..c7f43d2fcd13
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/idpf_controlq.c
@@ -0,0 +1,621 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2023 Intel Corporation */
+
+#include "idpf_controlq.h"
+
+/**
+ * idpf_ctlq_setup_regs - initialize control queue registers
+ * @cq: pointer to the specific control queue
+ * @q_create_info: structs containing info for each queue to be initialized
+ */
+static void idpf_ctlq_setup_regs(struct idpf_ctlq_info *cq,
+				 struct idpf_ctlq_create_info *q_create_info)
+{
+	/* set control queue registers in our local struct */
+	cq->reg.head = q_create_info->reg.head;
+	cq->reg.tail = q_create_info->reg.tail;
+	cq->reg.len = q_create_info->reg.len;
+	cq->reg.bah = q_create_info->reg.bah;
+	cq->reg.bal = q_create_info->reg.bal;
+	cq->reg.len_mask = q_create_info->reg.len_mask;
+	cq->reg.len_ena_mask = q_create_info->reg.len_ena_mask;
+	cq->reg.head_mask = q_create_info->reg.head_mask;
+}
+
+/**
+ * idpf_ctlq_init_regs - Initialize control queue registers
+ * @hw: pointer to hw struct
+ * @cq: pointer to the specific Control queue
+ * @is_rxq: true if receive control queue, false otherwise
+ *
+ * Initialize registers. The caller is expected to have already initialized the
+ * descriptor ring memory and buffer memory
+ */
+static void idpf_ctlq_init_regs(struct idpf_hw *hw, struct idpf_ctlq_info *cq,
+				bool is_rxq)
+{
+	/* Update tail to post pre-allocated buffers for rx queues */
+	if (is_rxq)
+		wr32(hw, cq->reg.tail, (u32)(cq->ring_size - 1));
+
+	/* For non-Mailbox control queues only TAIL need to be set */
+	if (cq->q_id != -1)
+		return;
+
+	/* Clear Head for both send or receive */
+	wr32(hw, cq->reg.head, 0);
+
+	/* set starting point */
+	wr32(hw, cq->reg.bal, lower_32_bits(cq->desc_ring.pa));
+	wr32(hw, cq->reg.bah, upper_32_bits(cq->desc_ring.pa));
+	wr32(hw, cq->reg.len, (cq->ring_size | cq->reg.len_ena_mask));
+}
+
+/**
+ * idpf_ctlq_init_rxq_bufs - populate receive queue descriptors with buf
+ * @cq: pointer to the specific Control queue
+ *
+ * Record the address of the receive queue DMA buffers in the descriptors.
+ * The buffers must have been previously allocated.
+ */
+static void idpf_ctlq_init_rxq_bufs(struct idpf_ctlq_info *cq)
+{
+	int i;
+
+	for (i = 0; i < cq->ring_size; i++) {
+		struct idpf_ctlq_desc *desc = IDPF_CTLQ_DESC(cq, i);
+		struct idpf_dma_mem *bi = cq->bi.rx_buff[i];
+
+		/* No buffer to post to descriptor, continue */
+		if (!bi)
+			continue;
+
+		desc->flags =
+			cpu_to_le16(IDPF_CTLQ_FLAG_BUF | IDPF_CTLQ_FLAG_RD);
+		desc->opcode = 0;
+		desc->datalen = cpu_to_le16(bi->size);
+		desc->ret_val = 0;
+		desc->v_opcode_dtype = 0;
+		desc->v_retval = 0;
+		desc->params.indirect.addr_high =
+			cpu_to_le32(upper_32_bits(bi->pa));
+		desc->params.indirect.addr_low =
+			cpu_to_le32(lower_32_bits(bi->pa));
+		desc->params.indirect.param0 = 0;
+		desc->params.indirect.sw_cookie = 0;
+		desc->params.indirect.v_flags = 0;
+	}
+}
+
+/**
+ * idpf_ctlq_shutdown - shutdown the CQ
+ * @hw: pointer to hw struct
+ * @cq: pointer to the specific Control queue
+ *
+ * The main shutdown routine for any controq queue
+ */
+static void idpf_ctlq_shutdown(struct idpf_hw *hw, struct idpf_ctlq_info *cq)
+{
+	mutex_lock(&cq->cq_lock);
+
+	/* free ring buffers and the ring itself */
+	idpf_ctlq_dealloc_ring_res(hw, cq);
+
+	/* Set ring_size to 0 to indicate uninitialized queue */
+	cq->ring_size = 0;
+
+	mutex_unlock(&cq->cq_lock);
+	mutex_destroy(&cq->cq_lock);
+}
+
+/**
+ * idpf_ctlq_add - add one control queue
+ * @hw: pointer to hardware struct
+ * @qinfo: info for queue to be created
+ * @cq_out: (output) double pointer to control queue to be created
+ *
+ * Allocate and initialize a control queue and add it to the control queue list.
+ * The cq parameter will be allocated/initialized and passed back to the caller
+ * if no errors occur.
+ *
+ * Note: idpf_ctlq_init must be called prior to any calls to idpf_ctlq_add
+ */
+int idpf_ctlq_add(struct idpf_hw *hw,
+		  struct idpf_ctlq_create_info *qinfo,
+		  struct idpf_ctlq_info **cq_out)
+{
+	struct idpf_ctlq_info *cq;
+	bool is_rxq = false;
+	int err;
+
+	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
+	if (!cq)
+		return -ENOMEM;
+
+	cq->cq_type = qinfo->type;
+	cq->q_id = qinfo->id;
+	cq->buf_size = qinfo->buf_size;
+	cq->ring_size = qinfo->len;
+
+	cq->next_to_use = 0;
+	cq->next_to_clean = 0;
+	cq->next_to_post = cq->ring_size - 1;
+
+	switch (qinfo->type) {
+	case IDPF_CTLQ_TYPE_MAILBOX_RX:
+		is_rxq = true;
+		fallthrough;
+	case IDPF_CTLQ_TYPE_MAILBOX_TX:
+		err = idpf_ctlq_alloc_ring_res(hw, cq);
+		break;
+	default:
+		err = -EBADR;
+		break;
+	}
+
+	if (err)
+		goto init_free_q;
+
+	if (is_rxq) {
+		idpf_ctlq_init_rxq_bufs(cq);
+	} else {
+		/* Allocate the array of msg pointers for TX queues */
+		cq->bi.tx_msg = kcalloc(qinfo->len,
+					sizeof(struct idpf_ctlq_msg *),
+					GFP_KERNEL);
+		if (!cq->bi.tx_msg) {
+			err = -ENOMEM;
+			goto init_dealloc_q_mem;
+		}
+	}
+
+	idpf_ctlq_setup_regs(cq, qinfo);
+
+	idpf_ctlq_init_regs(hw, cq, is_rxq);
+
+	mutex_init(&cq->cq_lock);
+
+	list_add(&cq->cq_list, &hw->cq_list_head);
+
+	*cq_out = cq;
+
+	return 0;
+
+init_dealloc_q_mem:
+	/* free ring buffers and the ring itself */
+	idpf_ctlq_dealloc_ring_res(hw, cq);
+init_free_q:
+	kfree(cq);
+
+	return err;
+}
+
+/**
+ * idpf_ctlq_remove - deallocate and remove specified control queue
+ * @hw: pointer to hardware struct
+ * @cq: pointer to control queue to be removed
+ */
+void idpf_ctlq_remove(struct idpf_hw *hw,
+		      struct idpf_ctlq_info *cq)
+{
+	list_del(&cq->cq_list);
+	idpf_ctlq_shutdown(hw, cq);
+	kfree(cq);
+}
+
+/**
+ * idpf_ctlq_init - main initialization routine for all control queues
+ * @hw: pointer to hardware struct
+ * @num_q: number of queues to initialize
+ * @q_info: array of structs containing info for each queue to be initialized
+ *
+ * This initializes any number and any type of control queues. This is an all
+ * or nothing routine; if one fails, all previously allocated queues will be
+ * destroyed. This must be called prior to using the individual add/remove
+ * APIs.
+ */
+int idpf_ctlq_init(struct idpf_hw *hw, u8 num_q,
+		   struct idpf_ctlq_create_info *q_info)
+{
+	struct idpf_ctlq_info *cq, *tmp;
+	int err;
+	int i;
+
+	INIT_LIST_HEAD(&hw->cq_list_head);
+
+	for (i = 0; i < num_q; i++) {
+		struct idpf_ctlq_create_info *qinfo = q_info + i;
+
+		err = idpf_ctlq_add(hw, qinfo, &cq);
+		if (err)
+			goto init_destroy_qs;
+	}
+
+	return 0;
+
+init_destroy_qs:
+	list_for_each_entry_safe(cq, tmp, &hw->cq_list_head, cq_list)
+		idpf_ctlq_remove(hw, cq);
+
+	return err;
+}
+
+/**
+ * idpf_ctlq_deinit - destroy all control queues
+ * @hw: pointer to hw struct
+ */
+void idpf_ctlq_deinit(struct idpf_hw *hw)
+{
+	struct idpf_ctlq_info *cq, *tmp;
+
+	list_for_each_entry_safe(cq, tmp, &hw->cq_list_head, cq_list)
+		idpf_ctlq_remove(hw, cq);
+}
+
+/**
+ * idpf_ctlq_send - send command to Control Queue (CTQ)
+ * @hw: pointer to hw struct
+ * @cq: handle to control queue struct to send on
+ * @num_q_msg: number of messages to send on control queue
+ * @q_msg: pointer to array of queue messages to be sent
+ *
+ * The caller is expected to allocate DMAable buffers and pass them to the
+ * send routine via the q_msg struct / control queue specific data struct.
+ * The control queue will hold a reference to each send message until
+ * the completion for that message has been cleaned.
+ */
+int idpf_ctlq_send(struct idpf_hw *hw, struct idpf_ctlq_info *cq,
+		   u16 num_q_msg, struct idpf_ctlq_msg q_msg[])
+{
+	struct idpf_ctlq_desc *desc;
+	int num_desc_avail;
+	int err = 0;
+	int i;
+
+	mutex_lock(&cq->cq_lock);
+
+	/* Ensure there are enough descriptors to send all messages */
+	num_desc_avail = IDPF_CTLQ_DESC_UNUSED(cq);
+	if (num_desc_avail == 0 || num_desc_avail < num_q_msg) {
+		err = -ENOSPC;
+		goto err_unlock;
+	}
+
+	for (i = 0; i < num_q_msg; i++) {
+		struct idpf_ctlq_msg *msg = &q_msg[i];
+
+		desc = IDPF_CTLQ_DESC(cq, cq->next_to_use);
+
+		desc->opcode = cpu_to_le16(msg->opcode);
+		desc->pfid_vfid = cpu_to_le16(msg->func_id);
+
+		desc->v_opcode_dtype = cpu_to_le32(msg->cookie.mbx.chnl_opcode);
+		desc->v_retval = cpu_to_le32(msg->cookie.mbx.chnl_retval);
+
+		desc->flags = cpu_to_le16((msg->host_id & IDPF_HOST_ID_MASK) <<
+					  IDPF_CTLQ_FLAG_HOST_ID_S);
+		if (msg->data_len) {
+			struct idpf_dma_mem *buff = msg->ctx.indirect.payload;
+
+			desc->datalen |= cpu_to_le16(msg->data_len);
+			desc->flags |= cpu_to_le16(IDPF_CTLQ_FLAG_BUF);
+			desc->flags |= cpu_to_le16(IDPF_CTLQ_FLAG_RD);
+
+			/* Update the address values in the desc with the pa
+			 * value for respective buffer
+			 */
+			desc->params.indirect.addr_high =
+				cpu_to_le32(upper_32_bits(buff->pa));
+			desc->params.indirect.addr_low =
+				cpu_to_le32(lower_32_bits(buff->pa));
+
+			memcpy(&desc->params, msg->ctx.indirect.context,
+			       IDPF_INDIRECT_CTX_SIZE);
+		} else {
+			memcpy(&desc->params, msg->ctx.direct,
+			       IDPF_DIRECT_CTX_SIZE);
+		}
+
+		/* Store buffer info */
+		cq->bi.tx_msg[cq->next_to_use] = msg;
+
+		(cq->next_to_use)++;
+		if (cq->next_to_use == cq->ring_size)
+			cq->next_to_use = 0;
+	}
+
+	/* Force memory write to complete before letting hardware
+	 * know that there are new descriptors to fetch.
+	 */
+	dma_wmb();
+
+	wr32(hw, cq->reg.tail, cq->next_to_use);
+
+err_unlock:
+	mutex_unlock(&cq->cq_lock);
+
+	return err;
+}
+
+/**
+ * idpf_ctlq_clean_sq - reclaim send descriptors on HW write back for the
+ * requested queue
+ * @cq: pointer to the specific Control queue
+ * @clean_count: (input|output) number of descriptors to clean as input, and
+ * number of descriptors actually cleaned as output
+ * @msg_status: (output) pointer to msg pointer array to be populated; needs
+ * to be allocated by caller
+ *
+ * Returns an array of message pointers associated with the cleaned
+ * descriptors. The pointers are to the original ctlq_msgs sent on the cleaned
+ * descriptors.  The status will be returned for each; any messages that failed
+ * to send will have a non-zero status. The caller is expected to free original
+ * ctlq_msgs and free or reuse the DMA buffers.
+ */
+int idpf_ctlq_clean_sq(struct idpf_ctlq_info *cq, u16 *clean_count,
+		       struct idpf_ctlq_msg *msg_status[])
+{
+	struct idpf_ctlq_desc *desc;
+	u16 i, num_to_clean;
+	u16 ntc, desc_err;
+
+	if (*clean_count == 0)
+		return 0;
+	if (*clean_count > cq->ring_size)
+		return -EBADR;
+
+	mutex_lock(&cq->cq_lock);
+
+	ntc = cq->next_to_clean;
+
+	num_to_clean = *clean_count;
+
+	for (i = 0; i < num_to_clean; i++) {
+		/* Fetch next descriptor and check if marked as done */
+		desc = IDPF_CTLQ_DESC(cq, ntc);
+		if (!(le16_to_cpu(desc->flags) & IDPF_CTLQ_FLAG_DD))
+			break;
+
+		/* strip off FW internal code */
+		desc_err = le16_to_cpu(desc->ret_val) & 0xff;
+
+		msg_status[i] = cq->bi.tx_msg[ntc];
+		msg_status[i]->status = desc_err;
+
+		cq->bi.tx_msg[ntc] = NULL;
+
+		/* Zero out any stale data */
+		memset(desc, 0, sizeof(*desc));
+
+		ntc++;
+		if (ntc == cq->ring_size)
+			ntc = 0;
+	}
+
+	cq->next_to_clean = ntc;
+
+	mutex_unlock(&cq->cq_lock);
+
+	/* Return number of descriptors actually cleaned */
+	*clean_count = i;
+
+	return 0;
+}
+
+/**
+ * idpf_ctlq_post_rx_buffs - post buffers to descriptor ring
+ * @hw: pointer to hw struct
+ * @cq: pointer to control queue handle
+ * @buff_count: (input|output) input is number of buffers caller is trying to
+ * return; output is number of buffers that were not posted
+ * @buffs: array of pointers to dma mem structs to be given to hardware
+ *
+ * Caller uses this function to return DMA buffers to the descriptor ring after
+ * consuming them; buff_count will be the number of buffers.
+ *
+ * Note: this function needs to be called after a receive call even
+ * if there are no DMA buffers to be returned, i.e. buff_count = 0,
+ * buffs = NULL to support direct commands
+ */
+int idpf_ctlq_post_rx_buffs(struct idpf_hw *hw, struct idpf_ctlq_info *cq,
+			    u16 *buff_count, struct idpf_dma_mem **buffs)
+{
+	struct idpf_ctlq_desc *desc;
+	u16 ntp = cq->next_to_post;
+	bool buffs_avail = false;
+	u16 tbp = ntp + 1;
+	int i = 0;
+
+	if (*buff_count > cq->ring_size)
+		return -EBADR;
+
+	if (*buff_count > 0)
+		buffs_avail = true;
+
+	mutex_lock(&cq->cq_lock);
+
+	if (tbp >= cq->ring_size)
+		tbp = 0;
+
+	if (tbp == cq->next_to_clean)
+		/* Nothing to do */
+		goto post_buffs_out;
+
+	/* Post buffers for as many as provided or up until the last one used */
+	while (ntp != cq->next_to_clean) {
+		desc = IDPF_CTLQ_DESC(cq, ntp);
+
+		if (cq->bi.rx_buff[ntp])
+			goto fill_desc;
+		if (!buffs_avail) {
+			/* If the caller hasn't given us any buffers or
+			 * there are none left, search the ring itself
+			 * for an available buffer to move to this
+			 * entry starting at the next entry in the ring
+			 */
+			tbp = ntp + 1;
+
+			/* Wrap ring if necessary */
+			if (tbp >= cq->ring_size)
+				tbp = 0;
+
+			while (tbp != cq->next_to_clean) {
+				if (cq->bi.rx_buff[tbp]) {
+					cq->bi.rx_buff[ntp] =
+						cq->bi.rx_buff[tbp];
+					cq->bi.rx_buff[tbp] = NULL;
+
+					/* Found a buffer, no need to
+					 * search anymore
+					 */
+					break;
+				}
+
+				/* Wrap ring if necessary */
+				tbp++;
+				if (tbp >= cq->ring_size)
+					tbp = 0;
+			}
+
+			if (tbp == cq->next_to_clean)
+				goto post_buffs_out;
+		} else {
+			/* Give back pointer to DMA buffer */
+			cq->bi.rx_buff[ntp] = buffs[i];
+			i++;
+
+			if (i >= *buff_count)
+				buffs_avail = false;
+		}
+
+fill_desc:
+		desc->flags =
+			cpu_to_le16(IDPF_CTLQ_FLAG_BUF | IDPF_CTLQ_FLAG_RD);
+
+		/* Post buffers to descriptor */
+		desc->datalen = cpu_to_le16(cq->bi.rx_buff[ntp]->size);
+		desc->params.indirect.addr_high =
+			cpu_to_le32(upper_32_bits(cq->bi.rx_buff[ntp]->pa));
+		desc->params.indirect.addr_low =
+			cpu_to_le32(lower_32_bits(cq->bi.rx_buff[ntp]->pa));
+
+		ntp++;
+		if (ntp == cq->ring_size)
+			ntp = 0;
+	}
+
+post_buffs_out:
+	/* Only update tail if buffers were actually posted */
+	if (cq->next_to_post != ntp) {
+		if (ntp)
+			/* Update next_to_post to ntp - 1 since current ntp
+			 * will not have a buffer
+			 */
+			cq->next_to_post = ntp - 1;
+		else
+			/* Wrap to end of end ring since current ntp is 0 */
+			cq->next_to_post = cq->ring_size - 1;
+
+		wr32(hw, cq->reg.tail, cq->next_to_post);
+	}
+
+	mutex_unlock(&cq->cq_lock);
+
+	/* return the number of buffers that were not posted */
+	*buff_count = *buff_count - i;
+
+	return 0;
+}
+
+/**
+ * idpf_ctlq_recv - receive control queue message call back
+ * @cq: pointer to control queue handle to receive on
+ * @num_q_msg: (input|output) input number of messages that should be received;
+ * output number of messages actually received
+ * @q_msg: (output) array of received control queue messages on this q;
+ * needs to be pre-allocated by caller for as many messages as requested
+ *
+ * Called by interrupt handler or polling mechanism. Caller is expected
+ * to free buffers
+ */
+int idpf_ctlq_recv(struct idpf_ctlq_info *cq, u16 *num_q_msg,
+		   struct idpf_ctlq_msg *q_msg)
+{
+	u16 num_to_clean, ntc, flags;
+	struct idpf_ctlq_desc *desc;
+	int err = 0;
+	u16 i;
+
+	if (*num_q_msg == 0)
+		return 0;
+	else if (*num_q_msg > cq->ring_size)
+		return -EBADR;
+
+	/* take the lock before we start messing with the ring */
+	mutex_lock(&cq->cq_lock);
+
+	ntc = cq->next_to_clean;
+
+	num_to_clean = *num_q_msg;
+
+	for (i = 0; i < num_to_clean; i++) {
+		/* Fetch next descriptor and check if marked as done */
+		desc = IDPF_CTLQ_DESC(cq, ntc);
+		flags = le16_to_cpu(desc->flags);
+
+		if (!(flags & IDPF_CTLQ_FLAG_DD))
+			break;
+
+		q_msg[i].vmvf_type = (flags &
+				      (IDPF_CTLQ_FLAG_FTYPE_VM |
+				       IDPF_CTLQ_FLAG_FTYPE_PF)) >>
+				       IDPF_CTLQ_FLAG_FTYPE_S;
+
+		if (flags & IDPF_CTLQ_FLAG_ERR)
+			err  = -EBADMSG;
+
+		q_msg[i].cookie.mbx.chnl_opcode =
+				le32_to_cpu(desc->v_opcode_dtype);
+		q_msg[i].cookie.mbx.chnl_retval =
+				le32_to_cpu(desc->v_retval);
+
+		q_msg[i].opcode = le16_to_cpu(desc->opcode);
+		q_msg[i].data_len = le16_to_cpu(desc->datalen);
+		q_msg[i].status = le16_to_cpu(desc->ret_val);
+
+		if (desc->datalen) {
+			memcpy(q_msg[i].ctx.indirect.context,
+			       &desc->params.indirect, IDPF_INDIRECT_CTX_SIZE);
+
+			/* Assign pointer to dma buffer to ctlq_msg array
+			 * to be given to upper layer
+			 */
+			q_msg[i].ctx.indirect.payload = cq->bi.rx_buff[ntc];
+
+			/* Zero out pointer to DMA buffer info;
+			 * will be repopulated by post buffers API
+			 */
+			cq->bi.rx_buff[ntc] = NULL;
+		} else {
+			memcpy(q_msg[i].ctx.direct, desc->params.raw,
+			       IDPF_DIRECT_CTX_SIZE);
+		}
+
+		/* Zero out stale data in descriptor */
+		memset(desc, 0, sizeof(struct idpf_ctlq_desc));
+
+		ntc++;
+		if (ntc == cq->ring_size)
+			ntc = 0;
+	}
+
+	cq->next_to_clean = ntc;
+
+	mutex_unlock(&cq->cq_lock);
+
+	*num_q_msg = i;
+	if (*num_q_msg == 0)
+		err = -ENOMSG;
+
+	return err;
+}
diff --git a/drivers/net/ethernet/intel/idpf/idpf_controlq.h b/drivers/net/ethernet/intel/idpf/idpf_controlq.h
new file mode 100644
index 000000000000..c1aba09e9856
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/idpf_controlq.h
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2023 Intel Corporation */
+
+#ifndef _IDPF_CONTROLQ_H_
+#define _IDPF_CONTROLQ_H_
+
+#include <linux/slab.h>
+
+#include "idpf_controlq_api.h"
+
+/* Maximum buffer length for all control queue types */
+#define IDPF_CTLQ_MAX_BUF_LEN	4096
+
+#define IDPF_CTLQ_DESC(R, i) \
+	(&(((struct idpf_ctlq_desc *)((R)->desc_ring.va))[i]))
+
+#define IDPF_CTLQ_DESC_UNUSED(R) \
+	((u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->ring_size) + \
+	       (R)->next_to_clean - (R)->next_to_use - 1))
+
+/* Control Queue default settings */
+#define IDPF_CTRL_SQ_CMD_TIMEOUT	250  /* msecs */
+
+struct idpf_ctlq_desc {
+	/* Control queue descriptor flags */
+	__le16 flags;
+	/* Control queue message opcode */
+	__le16 opcode;
+	__le16 datalen;		/* 0 for direct commands */
+	union {
+		__le16 ret_val;
+		__le16 pfid_vfid;
+#define IDPF_CTLQ_DESC_VF_ID_S	0
+#define IDPF_CTLQ_DESC_VF_ID_M	(0x7FF << IDPF_CTLQ_DESC_VF_ID_S)
+#define IDPF_CTLQ_DESC_PF_ID_S	11
+#define IDPF_CTLQ_DESC_PF_ID_M	(0x1F << IDPF_CTLQ_DESC_PF_ID_S)
+	};
+
+	/* Virtchnl message opcode and virtchnl descriptor type
+	 * v_opcode=[27:0], v_dtype=[31:28]
+	 */
+	__le32 v_opcode_dtype;
+	/* Virtchnl return value */
+	__le32 v_retval;
+	union {
+		struct {
+			__le32 param0;
+			__le32 param1;
+			__le32 param2;
+			__le32 param3;
+		} direct;
+		struct {
+			__le32 param0;
+			__le16 sw_cookie;
+			/* Virtchnl flags */
+			__le16 v_flags;
+			__le32 addr_high;
+			__le32 addr_low;
+		} indirect;
+		u8 raw[16];
+	} params;
+};
+
+/* Flags sub-structure
+ * |0  |1  |2  |3  |4  |5  |6  |7  |8  |9  |10 |11 |12 |13 |14 |15 |
+ * |DD |CMP|ERR|  * RSV *  |FTYPE  | *RSV* |RD |VFC|BUF|  HOST_ID  |
+ */
+/* command flags and offsets */
+#define IDPF_CTLQ_FLAG_DD_S		0
+#define IDPF_CTLQ_FLAG_CMP_S		1
+#define IDPF_CTLQ_FLAG_ERR_S		2
+#define IDPF_CTLQ_FLAG_FTYPE_S		6
+#define IDPF_CTLQ_FLAG_RD_S		10
+#define IDPF_CTLQ_FLAG_VFC_S		11
+#define IDPF_CTLQ_FLAG_BUF_S		12
+#define IDPF_CTLQ_FLAG_HOST_ID_S	13
+
+#define IDPF_CTLQ_FLAG_DD	BIT(IDPF_CTLQ_FLAG_DD_S)	/* 0x1	  */
+#define IDPF_CTLQ_FLAG_CMP	BIT(IDPF_CTLQ_FLAG_CMP_S)	/* 0x2	  */
+#define IDPF_CTLQ_FLAG_ERR	BIT(IDPF_CTLQ_FLAG_ERR_S)	/* 0x4	  */
+#define IDPF_CTLQ_FLAG_FTYPE_VM	BIT(IDPF_CTLQ_FLAG_FTYPE_S)	/* 0x40	  */
+#define IDPF_CTLQ_FLAG_FTYPE_PF	BIT(IDPF_CTLQ_FLAG_FTYPE_S + 1)	/* 0x80   */
+#define IDPF_CTLQ_FLAG_RD	BIT(IDPF_CTLQ_FLAG_RD_S)	/* 0x400  */
+#define IDPF_CTLQ_FLAG_VFC	BIT(IDPF_CTLQ_FLAG_VFC_S)	/* 0x800  */
+#define IDPF_CTLQ_FLAG_BUF	BIT(IDPF_CTLQ_FLAG_BUF_S)	/* 0x1000 */
+
+/* Host ID is a special field that has 3b and not a 1b flag */
+#define IDPF_CTLQ_FLAG_HOST_ID_M MAKE_MASK(0x7000UL, IDPF_CTLQ_FLAG_HOST_ID_S)
+
+struct idpf_mbxq_desc {
+	u8 pad[8];		/* CTLQ flags/opcode/len/retval fields */
+	u32 chnl_opcode;	/* avoid confusion with desc->opcode */
+	u32 chnl_retval;	/* ditto for desc->retval */
+	u32 pf_vf_id;		/* used by CP when sending to PF */
+};
+
+/* Define the driver hardware struct to replace other control structs as needed
+ * Align to ctlq_hw_info
+ */
+struct idpf_hw {
+	void __iomem *hw_addr;
+	resource_size_t hw_addr_len;
+
+	struct idpf_adapter *back;
+
+	/* control queue - send and receive */
+	struct idpf_ctlq_info *asq;
+	struct idpf_ctlq_info *arq;
+
+	/* pci info */
+	u16 device_id;
+	u16 vendor_id;
+	u16 subsystem_device_id;
+	u16 subsystem_vendor_id;
+	u8 revision_id;
+	bool adapter_stopped;
+
+	struct list_head cq_list_head;
+};
+
+int idpf_ctlq_alloc_ring_res(struct idpf_hw *hw,
+			     struct idpf_ctlq_info *cq);
+
+void idpf_ctlq_dealloc_ring_res(struct idpf_hw *hw, struct idpf_ctlq_info *cq);
+
+/* prototype for functions used for dynamic memory allocation */
+void *idpf_alloc_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem,
+			 u64 size);
+void idpf_free_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem);
+#endif /* _IDPF_CONTROLQ_H_ */
diff --git a/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h b/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h
new file mode 100644
index 000000000000..8dee098bbfb0
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2023 Intel Corporation */
+
+#ifndef _IDPF_CONTROLQ_API_H_
+#define _IDPF_CONTROLQ_API_H_
+
+#include "idpf_mem.h"
+
+struct idpf_hw;
+
+/* Used for queue init, response and events */
+enum idpf_ctlq_type {
+	IDPF_CTLQ_TYPE_MAILBOX_TX	= 0,
+	IDPF_CTLQ_TYPE_MAILBOX_RX	= 1,
+	IDPF_CTLQ_TYPE_CONFIG_TX	= 2,
+	IDPF_CTLQ_TYPE_CONFIG_RX	= 3,
+	IDPF_CTLQ_TYPE_EVENT_RX		= 4,
+	IDPF_CTLQ_TYPE_RDMA_TX		= 5,
+	IDPF_CTLQ_TYPE_RDMA_RX		= 6,
+	IDPF_CTLQ_TYPE_RDMA_COMPL	= 7
+};
+
+/* Generic Control Queue Structures */
+struct idpf_ctlq_reg {
+	/* used for queue tracking */
+	u32 head;
+	u32 tail;
+	/* Below applies only to default mb (if present) */
+	u32 len;
+	u32 bah;
+	u32 bal;
+	u32 len_mask;
+	u32 len_ena_mask;
+	u32 head_mask;
+};
+
+/* Generic queue msg structure */
+struct idpf_ctlq_msg {
+	u8 vmvf_type; /* represents the source of the message on recv */
+#define IDPF_VMVF_TYPE_VF 0
+#define IDPF_VMVF_TYPE_VM 1
+#define IDPF_VMVF_TYPE_PF 2
+	u8 host_id;
+	/* 3b field used only when sending a message to CP - to be used in
+	 * combination with target func_id to route the message
+	 */
+#define IDPF_HOST_ID_MASK 0x7
+
+	u16 opcode;
+	u16 data_len;	/* data_len = 0 when no payload is attached */
+	union {
+		u16 func_id;	/* when sending a message */
+		u16 status;	/* when receiving a message */
+	};
+	union {
+		struct {
+			u32 chnl_opcode;
+			u32 chnl_retval;
+		} mbx;
+	} cookie;
+	union {
+#define IDPF_DIRECT_CTX_SIZE	16
+#define IDPF_INDIRECT_CTX_SIZE	8
+		/* 16 bytes of context can be provided or 8 bytes of context
+		 * plus the address of a DMA buffer
+		 */
+		u8 direct[IDPF_DIRECT_CTX_SIZE];
+		struct {
+			u8 context[IDPF_INDIRECT_CTX_SIZE];
+			struct idpf_dma_mem *payload;
+		} indirect;
+	} ctx;
+};
+
+/* Generic queue info structures */
+/* MB, CONFIG and EVENT q do not have extended info */
+struct idpf_ctlq_create_info {
+	enum idpf_ctlq_type type;
+	int id; /* absolute queue offset passed as input
+		 * -1 for default mailbox if present
+		 */
+	u16 len; /* Queue length passed as input */
+	u16 buf_size; /* buffer size passed as input */
+	u64 base_address; /* output, HPA of the Queue start  */
+	struct idpf_ctlq_reg reg; /* registers accessed by ctlqs */
+
+	int ext_info_size;
+	void *ext_info; /* Specific to q type */
+};
+
+/* Control Queue information */
+struct idpf_ctlq_info {
+	struct list_head cq_list;
+
+	enum idpf_ctlq_type cq_type;
+	int q_id;
+	struct mutex cq_lock;		/* control queue lock */
+	/* used for interrupt processing */
+	u16 next_to_use;
+	u16 next_to_clean;
+	u16 next_to_post;		/* starting descriptor to post buffers
+					 * to after recev
+					 */
+
+	struct idpf_dma_mem desc_ring;	/* descriptor ring memory
+					 * idpf_dma_mem is defined in OSdep.h
+					 */
+	union {
+		struct idpf_dma_mem **rx_buff;
+		struct idpf_ctlq_msg **tx_msg;
+	} bi;
+
+	u16 buf_size;			/* queue buffer size */
+	u16 ring_size;			/* Number of descriptors */
+	struct idpf_ctlq_reg reg;	/* registers accessed by ctlqs */
+};
+
+/**
+ * enum idpf_mbx_opc - PF/VF mailbox commands
+ * @idpf_mbq_opc_send_msg_to_cp: used by PF or VF to send a message to its CP
+ */
+enum idpf_mbx_opc {
+	idpf_mbq_opc_send_msg_to_cp		= 0x0801,
+};
+
+/* API supported for control queue management */
+/* Will init all required q including default mb.  "q_info" is an array of
+ * create_info structs equal to the number of control queues to be created.
+ */
+int idpf_ctlq_init(struct idpf_hw *hw, u8 num_q,
+		   struct idpf_ctlq_create_info *q_info);
+
+/* Allocate and initialize a single control queue, which will be added to the
+ * control queue list; returns a handle to the created control queue
+ */
+int idpf_ctlq_add(struct idpf_hw *hw,
+		  struct idpf_ctlq_create_info *qinfo,
+		  struct idpf_ctlq_info **cq);
+
+/* Deinitialize and deallocate a single control queue */
+void idpf_ctlq_remove(struct idpf_hw *hw,
+		      struct idpf_ctlq_info *cq);
+
+/* Sends messages to HW and will also free the buffer*/
+int idpf_ctlq_send(struct idpf_hw *hw,
+		   struct idpf_ctlq_info *cq,
+		   u16 num_q_msg,
+		   struct idpf_ctlq_msg q_msg[]);
+
+/* Receives messages and called by interrupt handler/polling
+ * initiated by app/process. Also caller is supposed to free the buffers
+ */
+int idpf_ctlq_recv(struct idpf_ctlq_info *cq, u16 *num_q_msg,
+		   struct idpf_ctlq_msg *q_msg);
+
+/* Reclaims send descriptors on HW write back */
+int idpf_ctlq_clean_sq(struct idpf_ctlq_info *cq, u16 *clean_count,
+		       struct idpf_ctlq_msg *msg_status[]);
+
+/* Indicate RX buffers are done being processed */
+int idpf_ctlq_post_rx_buffs(struct idpf_hw *hw,
+			    struct idpf_ctlq_info *cq,
+			    u16 *buff_count,
+			    struct idpf_dma_mem **buffs);
+
+/* Will destroy all q including the default mb */
+void idpf_ctlq_deinit(struct idpf_hw *hw);
+
+#endif /* _IDPF_CONTROLQ_API_H_ */
diff --git a/drivers/net/ethernet/intel/idpf/idpf_controlq_setup.c b/drivers/net/ethernet/intel/idpf/idpf_controlq_setup.c
new file mode 100644
index 000000000000..a942a6385d06
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/idpf_controlq_setup.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2023 Intel Corporation */
+
+#include "idpf_controlq.h"
+
+/**
+ * idpf_ctlq_alloc_desc_ring - Allocate Control Queue (CQ) rings
+ * @hw: pointer to hw struct
+ * @cq: pointer to the specific Control queue
+ */
+static int idpf_ctlq_alloc_desc_ring(struct idpf_hw *hw,
+				     struct idpf_ctlq_info *cq)
+{
+	size_t size = cq->ring_size * sizeof(struct idpf_ctlq_desc);
+
+	cq->desc_ring.va = idpf_alloc_dma_mem(hw, &cq->desc_ring, size);
+	if (!cq->desc_ring.va)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/**
+ * idpf_ctlq_alloc_bufs - Allocate Control Queue (CQ) buffers
+ * @hw: pointer to hw struct
+ * @cq: pointer to the specific Control queue
+ *
+ * Allocate the buffer head for all control queues, and if it's a receive
+ * queue, allocate DMA buffers
+ */
+static int idpf_ctlq_alloc_bufs(struct idpf_hw *hw,
+				struct idpf_ctlq_info *cq)
+{
+	int i;
+
+	/* Do not allocate DMA buffers for transmit queues */
+	if (cq->cq_type == IDPF_CTLQ_TYPE_MAILBOX_TX)
+		return 0;
+
+	/* We'll be allocating the buffer info memory first, then we can
+	 * allocate the mapped buffers for the event processing
+	 */
+	cq->bi.rx_buff = kcalloc(cq->ring_size, sizeof(struct idpf_dma_mem *),
+				 GFP_KERNEL);
+	if (!cq->bi.rx_buff)
+		return -ENOMEM;
+
+	/* allocate the mapped buffers (except for the last one) */
+	for (i = 0; i < cq->ring_size - 1; i++) {
+		struct idpf_dma_mem *bi;
+		int num = 1; /* number of idpf_dma_mem to be allocated */
+
+		cq->bi.rx_buff[i] = kcalloc(num, sizeof(struct idpf_dma_mem),
+					    GFP_KERNEL);
+		if (!cq->bi.rx_buff[i])
+			goto unwind_alloc_cq_bufs;
+
+		bi = cq->bi.rx_buff[i];
+
+		bi->va = idpf_alloc_dma_mem(hw, bi, cq->buf_size);
+		if (!bi->va) {
+			/* unwind will not free the failed entry */
+			kfree(cq->bi.rx_buff[i]);
+			goto unwind_alloc_cq_bufs;
+		}
+	}
+
+	return 0;
+
+unwind_alloc_cq_bufs:
+	/* don't try to free the one that failed... */
+	i--;
+	for (; i >= 0; i--) {
+		idpf_free_dma_mem(hw, cq->bi.rx_buff[i]);
+		kfree(cq->bi.rx_buff[i]);
+	}
+	kfree(cq->bi.rx_buff);
+
+	return -ENOMEM;
+}
+
+/**
+ * idpf_ctlq_free_desc_ring - Free Control Queue (CQ) rings
+ * @hw: pointer to hw struct
+ * @cq: pointer to the specific Control queue
+ *
+ * This assumes the posted send buffers have already been cleaned
+ * and de-allocated
+ */
+static void idpf_ctlq_free_desc_ring(struct idpf_hw *hw,
+				     struct idpf_ctlq_info *cq)
+{
+	idpf_free_dma_mem(hw, &cq->desc_ring);
+}
+
+/**
+ * idpf_ctlq_free_bufs - Free CQ buffer info elements
+ * @hw: pointer to hw struct
+ * @cq: pointer to the specific Control queue
+ *
+ * Free the DMA buffers for RX queues, and DMA buffer header for both RX and TX
+ * queues.  The upper layers are expected to manage freeing of TX DMA buffers
+ */
+static void idpf_ctlq_free_bufs(struct idpf_hw *hw, struct idpf_ctlq_info *cq)
+{
+	void *bi;
+
+	if (cq->cq_type == IDPF_CTLQ_TYPE_MAILBOX_RX) {
+		int i;
+
+		/* free DMA buffers for rx queues*/
+		for (i = 0; i < cq->ring_size; i++) {
+			if (cq->bi.rx_buff[i]) {
+				idpf_free_dma_mem(hw, cq->bi.rx_buff[i]);
+				kfree(cq->bi.rx_buff[i]);
+			}
+		}
+
+		bi = (void *)cq->bi.rx_buff;
+	} else {
+		bi = (void *)cq->bi.tx_msg;
+	}
+
+	/* free the buffer header */
+	kfree(bi);
+}
+
+/**
+ * idpf_ctlq_dealloc_ring_res - Free memory allocated for control queue
+ * @hw: pointer to hw struct
+ * @cq: pointer to the specific Control queue
+ *
+ * Free the memory used by the ring, buffers and other related structures
+ */
+void idpf_ctlq_dealloc_ring_res(struct idpf_hw *hw, struct idpf_ctlq_info *cq)
+{
+	/* free ring buffers and the ring itself */
+	idpf_ctlq_free_bufs(hw, cq);
+	idpf_ctlq_free_desc_ring(hw, cq);
+}
+
+/**
+ * idpf_ctlq_alloc_ring_res - allocate memory for descriptor ring and bufs
+ * @hw: pointer to hw struct
+ * @cq: pointer to control queue struct
+ *
+ * Do *NOT* hold cq_lock when calling this as the memory allocation routines
+ * called are not going to be atomic context safe
+ */
+int idpf_ctlq_alloc_ring_res(struct idpf_hw *hw, struct idpf_ctlq_info *cq)
+{
+	int err;
+
+	/* allocate the ring memory */
+	err = idpf_ctlq_alloc_desc_ring(hw, cq);
+	if (err)
+		return err;
+
+	/* allocate buffers in the rings */
+	err = idpf_ctlq_alloc_bufs(hw, cq);
+	if (err)
+		goto idpf_init_cq_free_ring;
+
+	/* success! */
+	return 0;
+
+idpf_init_cq_free_ring:
+	idpf_free_dma_mem(hw, &cq->desc_ring);
+
+	return err;
+}
diff --git a/drivers/net/ethernet/intel/idpf/idpf_dev.c b/drivers/net/ethernet/intel/idpf/idpf_dev.c
new file mode 100644
index 000000000000..34ad1ac46b78
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/idpf_dev.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2023 Intel Corporation */
+
+#include "idpf.h"
+#include "idpf_lan_pf_regs.h"
+
+#define IDPF_PF_ITR_IDX_SPACING		0x4
+
+/**
+ * idpf_ctlq_reg_init - initialize default mailbox registers
+ * @cq: pointer to the array of create control queues
+ */
+static void idpf_ctlq_reg_init(struct idpf_ctlq_create_info *cq)
+{
+	int i;
+
+	for (i = 0; i < IDPF_NUM_DFLT_MBX_Q; i++) {
+		struct idpf_ctlq_create_info *ccq = cq + i;
+
+		switch (ccq->type) {
+		case IDPF_CTLQ_TYPE_MAILBOX_TX:
+			/* set head and tail registers in our local struct */
+			ccq->reg.head = PF_FW_ATQH;
+			ccq->reg.tail = PF_FW_ATQT;
+			ccq->reg.len = PF_FW_ATQLEN;
+			ccq->reg.bah = PF_FW_ATQBAH;
+			ccq->reg.bal = PF_FW_ATQBAL;
+			ccq->reg.len_mask = PF_FW_ATQLEN_ATQLEN_M;
+			ccq->reg.len_ena_mask = PF_FW_ATQLEN_ATQENABLE_M;
+			ccq->reg.head_mask = PF_FW_ATQH_ATQH_M;
+			break;
+		case IDPF_CTLQ_TYPE_MAILBOX_RX:
+			/* set head and tail registers in our local struct */
+			ccq->reg.head = PF_FW_ARQH;
+			ccq->reg.tail = PF_FW_ARQT;
+			ccq->reg.len = PF_FW_ARQLEN;
+			ccq->reg.bah = PF_FW_ARQBAH;
+			ccq->reg.bal = PF_FW_ARQBAL;
+			ccq->reg.len_mask = PF_FW_ARQLEN_ARQLEN_M;
+			ccq->reg.len_ena_mask = PF_FW_ARQLEN_ARQENABLE_M;
+			ccq->reg.head_mask = PF_FW_ARQH_ARQH_M;
+			break;
+		default:
+			break;
+		}
+	}
+}
+
+/**
+ * idpf_mb_intr_reg_init - Initialize mailbox interrupt register
+ * @adapter: adapter structure
+ */
+static void idpf_mb_intr_reg_init(struct idpf_adapter *adapter)
+{
+	struct idpf_intr_reg *intr = &adapter->mb_vector.intr_reg;
+	u32 dyn_ctl = le32_to_cpu(adapter->caps.mailbox_dyn_ctl);
+
+	intr->dyn_ctl = idpf_get_reg_addr(adapter, dyn_ctl);
+	intr->dyn_ctl_intena_m = PF_GLINT_DYN_CTL_INTENA_M;
+	intr->dyn_ctl_itridx_m = PF_GLINT_DYN_CTL_ITR_INDX_M;
+	intr->icr_ena = idpf_get_reg_addr(adapter, PF_INT_DIR_OICR_ENA);
+	intr->icr_ena_ctlq_m = PF_INT_DIR_OICR_ENA_M;
+}
+
+/**
+ * idpf_intr_reg_init - Initialize interrupt registers
+ * @vport: virtual port structure
+ */
+static int idpf_intr_reg_init(struct idpf_vport *vport)
+{
+	struct idpf_adapter *adapter = vport->adapter;
+	int num_vecs = vport->num_q_vectors;
+	struct idpf_vec_regs *reg_vals;
+	int num_regs, i, err = 0;
+	u32 rx_itr, tx_itr;
+	u16 total_vecs;
+
+	total_vecs = idpf_get_reserved_vecs(vport->adapter);
+	reg_vals = kcalloc(total_vecs, sizeof(struct idpf_vec_regs),
+			   GFP_KERNEL);
+	if (!reg_vals)
+		return -ENOMEM;
+
+	num_regs = idpf_get_reg_intr_vecs(vport, reg_vals);
+	if (num_regs < num_vecs) {
+		err = -EINVAL;
+		goto free_reg_vals;
+	}
+
+	for (i = 0; i < num_vecs; i++) {
+		struct idpf_q_vector *q_vector = &vport->q_vectors[i];
+		u16 vec_id = vport->q_vector_idxs[i] - IDPF_MBX_Q_VEC;
+		struct idpf_intr_reg *intr = &q_vector->intr_reg;
+		u32 spacing;
+
+		intr->dyn_ctl = idpf_get_reg_addr(adapter,
+						  reg_vals[vec_id].dyn_ctl_reg);
+		intr->dyn_ctl_intena_m = PF_GLINT_DYN_CTL_INTENA_M;
+		intr->dyn_ctl_itridx_s = PF_GLINT_DYN_CTL_ITR_INDX_S;
+		intr->dyn_ctl_intrvl_s = PF_GLINT_DYN_CTL_INTERVAL_S;
+
+		spacing = IDPF_ITR_IDX_SPACING(reg_vals[vec_id].itrn_index_spacing,
+					       IDPF_PF_ITR_IDX_SPACING);
+		rx_itr = PF_GLINT_ITR_ADDR(VIRTCHNL2_ITR_IDX_0,
+					   reg_vals[vec_id].itrn_reg,
+					   spacing);
+		tx_itr = PF_GLINT_ITR_ADDR(VIRTCHNL2_ITR_IDX_1,
+					   reg_vals[vec_id].itrn_reg,
+					   spacing);
+		intr->rx_itr = idpf_get_reg_addr(adapter, rx_itr);
+		intr->tx_itr = idpf_get_reg_addr(adapter, tx_itr);
+	}
+
+free_reg_vals:
+	kfree(reg_vals);
+
+	return err;
+}
+
+/**
+ * idpf_reset_reg_init - Initialize reset registers
+ * @adapter: Driver specific private structure
+ */
+static void idpf_reset_reg_init(struct idpf_adapter *adapter)
+{
+	adapter->reset_reg.rstat = idpf_get_reg_addr(adapter, PFGEN_RSTAT);
+	adapter->reset_reg.rstat_m = PFGEN_RSTAT_PFR_STATE_M;
+}
+
+/**
+ * idpf_trigger_reset - trigger reset
+ * @adapter: Driver specific private structure
+ * @trig_cause: Reason to trigger a reset
+ */
+static void idpf_trigger_reset(struct idpf_adapter *adapter,
+			       enum idpf_flags __always_unused trig_cause)
+{
+	u32 reset_reg;
+
+	reset_reg = readl(idpf_get_reg_addr(adapter, PFGEN_CTRL));
+	writel(reset_reg | PFGEN_CTRL_PFSWR,
+	       idpf_get_reg_addr(adapter, PFGEN_CTRL));
+}
+
+/**
+ * idpf_reg_ops_init - Initialize register API function pointers
+ * @adapter: Driver specific private structure
+ */
+static void idpf_reg_ops_init(struct idpf_adapter *adapter)
+{
+	adapter->dev_ops.reg_ops.ctlq_reg_init = idpf_ctlq_reg_init;
+	adapter->dev_ops.reg_ops.intr_reg_init = idpf_intr_reg_init;
+	adapter->dev_ops.reg_ops.mb_intr_reg_init = idpf_mb_intr_reg_init;
+	adapter->dev_ops.reg_ops.reset_reg_init = idpf_reset_reg_init;
+	adapter->dev_ops.reg_ops.trigger_reset = idpf_trigger_reset;
+}
+
+/**
+ * idpf_dev_ops_init - Initialize device API function pointers
+ * @adapter: Driver specific private structure
+ */
+void idpf_dev_ops_init(struct idpf_adapter *adapter)
+{
+	idpf_reg_ops_init(adapter);
+}
diff --git a/drivers/net/ethernet/intel/idpf/idpf_devids.h b/drivers/net/ethernet/intel/idpf/idpf_devids.h
new file mode 100644
index 000000000000..5154a52ae61c
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/idpf_devids.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2023 Intel Corporation */
+
+#ifndef _IDPF_DEVIDS_H_
+#define _IDPF_DEVIDS_H_
+
+#define IDPF_DEV_ID_PF			0x1452
+#define IDPF_DEV_ID_VF			0x145C
+
+#endif /* _IDPF_DEVIDS_H_ */
diff --git a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c
new file mode 100644
index 000000000000..52ea38669f85
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c
@@ -0,0 +1,1369 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2023 Intel Corporation */
+
+#include "idpf.h"
+
+/**
+ * idpf_get_rxnfc - command to get RX flow classification rules
+ * @netdev: network interface device structure
+ * @cmd: ethtool rxnfc command
+ * @rule_locs: pointer to store rule locations
+ *
+ * Returns Success if the command is supported.
+ */
+static int idpf_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
+			  u32 __always_unused *rule_locs)
+{
+	struct idpf_vport *vport;
+
+	idpf_vport_ctrl_lock(netdev);
+	vport = idpf_netdev_to_vport(netdev);
+
+	switch (cmd->cmd) {
+	case ETHTOOL_GRXRINGS:
+		cmd->data = vport->num_rxq;
+		idpf_vport_ctrl_unlock(netdev);
+
+		return 0;
+	default:
+		break;
+	}
+
+	idpf_vport_ctrl_unlock(netdev);
+
+	return -EOPNOTSUPP;
+}
+
+/**
+ * idpf_get_rxfh_key_size - get the RSS hash key size
+ * @netdev: network interface device structure
+ *
+ * Returns the key size on success, error value on failure.
+ */
+static u32 idpf_get_rxfh_key_size(struct net_device *netdev)
+{
+	struct idpf_netdev_priv *np = netdev_priv(netdev);
+	struct idpf_vport_user_config_data *user_config;
+
+	if (!idpf_is_cap_ena_all(np->adapter, IDPF_RSS_CAPS, IDPF_CAP_RSS))
+		return -EOPNOTSUPP;
+
+	user_config = &np->adapter->vport_config[np->vport_idx]->user_config;
+
+	return user_config->rss_data.rss_key_size;
+}
+
+/**
+ * idpf_get_rxfh_indir_size - get the rx flow hash indirection table size
+ * @netdev: network interface device structure
+ *
+ * Returns the table size on success, error value on failure.
+ */
+static u32 idpf_get_rxfh_indir_size(struct net_device *netdev)
+{
+	struct idpf_netdev_priv *np = netdev_priv(netdev);
+	struct idpf_vport_user_config_data *user_config;
+
+	if (!idpf_is_cap_ena_all(np->adapter, IDPF_RSS_CAPS, IDPF_CAP_RSS))
+		return -EOPNOTSUPP;
+
+	user_config = &np->adapter->vport_config[np->vport_idx]->user_config;
+
+	return user_config->rss_data.rss_lut_size;
+}
+
+/**
+ * idpf_get_rxfh - get the rx flow hash indirection table
+ * @netdev: network interface device structure
+ * @indir: indirection table
+ * @key: hash key
+ * @hfunc: hash function in use
+ *
+ * Reads the indirection table directly from the hardware. Always returns 0.
+ */
+static int idpf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
+			 u8 *hfunc)
+{
+	struct idpf_netdev_priv *np = netdev_priv(netdev);
+	struct idpf_rss_data *rss_data;
+	struct idpf_adapter *adapter;
+	int err = 0;
+	u16 i;
+
+	idpf_vport_ctrl_lock(netdev);
+
+	adapter = np->adapter;
+
+	if (!idpf_is_cap_ena_all(adapter, IDPF_RSS_CAPS, IDPF_CAP_RSS)) {
+		err = -EOPNOTSUPP;
+		goto unlock_mutex;
+	}
+
+	rss_data = &adapter->vport_config[np->vport_idx]->user_config.rss_data;
+	if (np->state != __IDPF_VPORT_UP)
+		goto unlock_mutex;
+
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP;
+
+	if (key)
+		memcpy(key, rss_data->rss_key, rss_data->rss_key_size);
+
+	if (indir) {
+		for (i = 0; i < rss_data->rss_lut_size; i++)
+			indir[i] = rss_data->rss_lut[i];
+	}
+
+unlock_mutex:
+	idpf_vport_ctrl_unlock(netdev);
+
+	return err;
+}
+
+/**
+ * idpf_set_rxfh - set the rx flow hash indirection table
+ * @netdev: network interface device structure
+ * @indir: indirection table
+ * @key: hash key
+ * @hfunc: hash function to use
+ *
+ * Returns -EINVAL if the table specifies an invalid queue id, otherwise
+ * returns 0 after programming the table.
+ */
+static int idpf_set_rxfh(struct net_device *netdev, const u32 *indir,
+			 const u8 *key, const u8 hfunc)
+{
+	struct idpf_netdev_priv *np = netdev_priv(netdev);
+	struct idpf_rss_data *rss_data;
+	struct idpf_adapter *adapter;
+	struct idpf_vport *vport;
+	int err = 0;
+	u16 lut;
+
+	idpf_vport_ctrl_lock(netdev);
+	vport = idpf_netdev_to_vport(netdev);
+
+	adapter = vport->adapter;
+
+	if (!idpf_is_cap_ena_all(adapter, IDPF_RSS_CAPS, IDPF_CAP_RSS)) {
+		err = -EOPNOTSUPP;
+		goto unlock_mutex;
+	}
+
+	rss_data = &adapter->vport_config[vport->idx]->user_config.rss_data;
+	if (np->state != __IDPF_VPORT_UP)
+		goto unlock_mutex;
+
+	if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) {
+		err = -EOPNOTSUPP;
+		goto unlock_mutex;
+	}
+
+	if (key)
+		memcpy(rss_data->rss_key, key, rss_data->rss_key_size);
+
+	if (indir) {
+		for (lut = 0; lut < rss_data->rss_lut_size; lut++)
+			rss_data->rss_lut[lut] = indir[lut];
+	}
+
+	err = idpf_config_rss(vport);
+
+unlock_mutex:
+	idpf_vport_ctrl_unlock(netdev);
+
+	return err;
+}
+
+/**
+ * idpf_get_channels: get the number of channels supported by the device
+ * @netdev: network interface device structure
+ * @ch: channel information structure
+ *
+ * Report maximum of TX and RX. Report one extra channel to match our MailBox
+ * Queue.
+ */
+static void idpf_get_channels(struct net_device *netdev,
+			      struct ethtool_channels *ch)
+{
+	struct idpf_netdev_priv *np = netdev_priv(netdev);
+	struct idpf_vport_config *vport_config;
+	u16 num_txq, num_rxq;
+	u16 combined;
+
+	vport_config = np->adapter->vport_config[np->vport_idx];
+
+	num_txq = vport_config->user_config.num_req_tx_qs;
+	num_rxq = vport_config->user_config.num_req_rx_qs;
+
+	combined = min(num_txq, num_rxq);
+
+	/* Report maximum channels */
+	ch->max_combined = min_t(u16, vport_config->max_q.max_txq,
+				 vport_config->max_q.max_rxq);
+	ch->max_rx = vport_config->max_q.max_rxq;
+	ch->max_tx = vport_config->max_q.max_txq;
+
+	ch->max_other = IDPF_MAX_MBXQ;
+	ch->other_count = IDPF_MAX_MBXQ;
+
+	ch->combined_count = combined;
+	ch->rx_count = num_rxq - combined;
+	ch->tx_count = num_txq - combined;
+}
+
+/**
+ * idpf_set_channels: set the new channel count
+ * @netdev: network interface device structure
+ * @ch: channel information structure
+ *
+ * Negotiate a new number of channels with CP. Returns 0 on success, negative
+ * on failure.
+ */
+static int idpf_set_channels(struct net_device *netdev,
+			     struct ethtool_channels *ch)
+{
+	struct idpf_vport_config *vport_config;
+	u16 combined, num_txq, num_rxq;
+	unsigned int num_req_tx_q;
+	unsigned int num_req_rx_q;
+	struct idpf_vport *vport;
+	struct device *dev;
+	int err = 0;
+	u16 idx;
+
+	idpf_vport_ctrl_lock(netdev);
+	vport = idpf_netdev_to_vport(netdev);
+
+	idx = vport->idx;
+	vport_config = vport->adapter->vport_config[idx];
+
+	num_txq = vport_config->user_config.num_req_tx_qs;
+	num_rxq = vport_config->user_config.num_req_rx_qs;
+
+	combined = min(num_txq, num_rxq);
+
+	/* these checks are for cases where user didn't specify a particular
+	 * value on cmd line but we get non-zero value anyway via
+	 * get_channels(); look at ethtool.c in ethtool repository (the user
+	 * space part), particularly, do_schannels() routine
+	 */
+	if (ch->combined_count == combined)
+		ch->combined_count = 0;
+	if (ch->combined_count && ch->rx_count == num_rxq - combined)
+		ch->rx_count = 0;
+	if (ch->combined_count && ch->tx_count == num_txq - combined)
+		ch->tx_count = 0;
+
+	num_req_tx_q = ch->combined_count + ch->tx_count;
+	num_req_rx_q = ch->combined_count + ch->rx_count;
+
+	dev = &vport->adapter->pdev->dev;
+	/* It's possible to specify number of queues that exceeds max.
+	 * Stack checks max combined_count and max [tx|rx]_count but not the
+	 * max combined_count + [tx|rx]_count. These checks should catch that.
+	 */
+	if (num_req_tx_q > vport_config->max_q.max_txq) {
+		dev_info(dev, "Maximum TX queues is %d\n",
+			 vport_config->max_q.max_txq);
+		err = -EINVAL;
+		goto unlock_mutex;
+	}
+	if (num_req_rx_q > vport_config->max_q.max_rxq) {
+		dev_info(dev, "Maximum RX queues is %d\n",
+			 vport_config->max_q.max_rxq);
+		err = -EINVAL;
+		goto unlock_mutex;
+	}
+
+	if (num_req_tx_q == num_txq && num_req_rx_q == num_rxq)
+		goto unlock_mutex;
+
+	vport_config->user_config.num_req_tx_qs = num_req_tx_q;
+	vport_config->user_config.num_req_rx_qs = num_req_rx_q;
+
+	err = idpf_initiate_soft_reset(vport, IDPF_SR_Q_CHANGE);
+	if (err) {
+		/* roll back queue change */
+		vport_config->user_config.num_req_tx_qs = num_txq;
+		vport_config->user_config.num_req_rx_qs = num_rxq;
+	}
+
+unlock_mutex:
+	idpf_vport_ctrl_unlock(netdev);
+
+	return err;
+}
+
+/**
+ * idpf_get_ringparam - Get ring parameters
+ * @netdev: network interface device structure
+ * @ring: ethtool ringparam structure
+ * @kring: unused
+ * @ext_ack: unused
+ *
+ * Returns current ring parameters. TX and RX rings are reported separately,
+ * but the number of rings is not reported.
+ */
+static void idpf_get_ringparam(struct net_device *netdev,
+			       struct ethtool_ringparam *ring,
+			       struct kernel_ethtool_ringparam *kring,
+			       struct netlink_ext_ack *ext_ack)
+{
+	struct idpf_vport *vport;
+
+	idpf_vport_ctrl_lock(netdev);
+	vport = idpf_netdev_to_vport(netdev);
+
+	ring->rx_max_pending = IDPF_MAX_RXQ_DESC;
+	ring->tx_max_pending = IDPF_MAX_TXQ_DESC;
+	ring->rx_pending = vport->rxq_desc_count;
+	ring->tx_pending = vport->txq_desc_count;
+
+	idpf_vport_ctrl_unlock(netdev);
+}
+
+/**
+ * idpf_set_ringparam - Set ring parameters
+ * @netdev: network interface device structure
+ * @ring: ethtool ringparam structure
+ * @kring: unused
+ * @ext_ack: unused
+ *
+ * Sets ring parameters. TX and RX rings are controlled separately, but the
+ * number of rings is not specified, so all rings get the same settings.
+ */
+static int idpf_set_ringparam(struct net_device *netdev,
+			      struct ethtool_ringparam *ring,
+			      struct kernel_ethtool_ringparam *kring,
+			      struct netlink_ext_ack *ext_ack)
+{
+	struct idpf_vport_user_config_data *config_data;
+	u32 new_rx_count, new_tx_count;
+	struct idpf_vport *vport;
+	int i, err = 0;
+	u16 idx;
+
+	idpf_vport_ctrl_lock(netdev);
+	vport = idpf_netdev_to_vport(netdev);
+
+	idx = vport->idx;
+
+	if (ring->tx_pending < IDPF_MIN_TXQ_DESC) {
+		netdev_err(netdev, "Descriptors requested (Tx: %u) is less than min supported (%u)\n",
+			   ring->tx_pending,
+			   IDPF_MIN_TXQ_DESC);
+		err = -EINVAL;
+		goto unlock_mutex;
+	}
+
+	if (ring->rx_pending < IDPF_MIN_RXQ_DESC) {
+		netdev_err(netdev, "Descriptors requested (Rx: %u) is less than min supported (%u)\n",
+			   ring->rx_pending,
+			   IDPF_MIN_RXQ_DESC);
+		err = -EINVAL;
+		goto unlock_mutex;
+	}
+
+	new_rx_count = ALIGN(ring->rx_pending, IDPF_REQ_RXQ_DESC_MULTIPLE);
+	if (new_rx_count != ring->rx_pending)
+		netdev_info(netdev, "Requested Rx descriptor count rounded up to %u\n",
+			    new_rx_count);
+
+	new_tx_count = ALIGN(ring->tx_pending, IDPF_REQ_DESC_MULTIPLE);
+	if (new_tx_count != ring->tx_pending)
+		netdev_info(netdev, "Requested Tx descriptor count rounded up to %u\n",
+			    new_tx_count);
+
+	if (new_tx_count == vport->txq_desc_count &&
+	    new_rx_count == vport->rxq_desc_count)
+		goto unlock_mutex;
+
+	config_data = &vport->adapter->vport_config[idx]->user_config;
+	config_data->num_req_txq_desc = new_tx_count;
+	config_data->num_req_rxq_desc = new_rx_count;
+
+	/* Since we adjusted the RX completion queue count, the RX buffer queue
+	 * descriptor count needs to be adjusted as well
+	 */
+	for (i = 0; i < vport->num_bufqs_per_qgrp; i++)
+		vport->bufq_desc_count[i] =
+			IDPF_RX_BUFQ_DESC_COUNT(new_rx_count,
+						vport->num_bufqs_per_qgrp);
+
+	err = idpf_initiate_soft_reset(vport, IDPF_SR_Q_DESC_CHANGE);
+
+unlock_mutex:
+	idpf_vport_ctrl_unlock(netdev);
+
+	return err;
+}
+
+/**
+ * struct idpf_stats - definition for an ethtool statistic
+ * @stat_string: statistic name to display in ethtool -S output
+ * @sizeof_stat: the sizeof() the stat, must be no greater than sizeof(u64)
+ * @stat_offset: offsetof() the stat from a base pointer
+ *
+ * This structure defines a statistic to be added to the ethtool stats buffer.
+ * It defines a statistic as offset from a common base pointer. Stats should
+ * be defined in constant arrays using the IDPF_STAT macro, with every element
+ * of the array using the same _type for calculating the sizeof_stat and
+ * stat_offset.
+ *
+ * The @sizeof_stat is expected to be sizeof(u8), sizeof(u16), sizeof(u32) or
+ * sizeof(u64). Other sizes are not expected and will produce a WARN_ONCE from
+ * the idpf_add_ethtool_stat() helper function.
+ *
+ * The @stat_string is interpreted as a format string, allowing formatted
+ * values to be inserted while looping over multiple structures for a given
+ * statistics array. Thus, every statistic string in an array should have the
+ * same type and number of format specifiers, to be formatted by variadic
+ * arguments to the idpf_add_stat_string() helper function.
+ */
+struct idpf_stats {
+	char stat_string[ETH_GSTRING_LEN];
+	int sizeof_stat;
+	int stat_offset;
+};
+
+/* Helper macro to define an idpf_stat structure with proper size and type.
+ * Use this when defining constant statistics arrays. Note that @_type expects
+ * only a type name and is used multiple times.
+ */
+#define IDPF_STAT(_type, _name, _stat) { \
+	.stat_string = _name, \
+	.sizeof_stat = sizeof_field(_type, _stat), \
+	.stat_offset = offsetof(_type, _stat) \
+}
+
+/* Helper macro for defining some statistics related to queues */
+#define IDPF_QUEUE_STAT(_name, _stat) \
+	IDPF_STAT(struct idpf_queue, _name, _stat)
+
+/* Stats associated with a Tx queue */
+static const struct idpf_stats idpf_gstrings_tx_queue_stats[] = {
+	IDPF_QUEUE_STAT("pkts", q_stats.tx.packets),
+	IDPF_QUEUE_STAT("bytes", q_stats.tx.bytes),
+	IDPF_QUEUE_STAT("lso_pkts", q_stats.tx.lso_pkts),
+};
+
+/* Stats associated with an Rx queue */
+static const struct idpf_stats idpf_gstrings_rx_queue_stats[] = {
+	IDPF_QUEUE_STAT("pkts", q_stats.rx.packets),
+	IDPF_QUEUE_STAT("bytes", q_stats.rx.bytes),
+	IDPF_QUEUE_STAT("rx_gro_hw_pkts", q_stats.rx.rsc_pkts),
+};
+
+#define IDPF_TX_QUEUE_STATS_LEN		ARRAY_SIZE(idpf_gstrings_tx_queue_stats)
+#define IDPF_RX_QUEUE_STATS_LEN		ARRAY_SIZE(idpf_gstrings_rx_queue_stats)
+
+#define IDPF_PORT_STAT(_name, _stat) \
+	IDPF_STAT(struct idpf_vport,  _name, _stat)
+
+static const struct idpf_stats idpf_gstrings_port_stats[] = {
+	IDPF_PORT_STAT("rx-csum_errors", port_stats.rx_hw_csum_err),
+	IDPF_PORT_STAT("rx-hsplit", port_stats.rx_hsplit),
+	IDPF_PORT_STAT("rx-hsplit_hbo", port_stats.rx_hsplit_hbo),
+	IDPF_PORT_STAT("rx-bad_descs", port_stats.rx_bad_descs),
+	IDPF_PORT_STAT("tx-skb_drops", port_stats.tx_drops),
+	IDPF_PORT_STAT("tx-dma_map_errs", port_stats.tx_dma_map_errs),
+	IDPF_PORT_STAT("tx-linearized_pkts", port_stats.tx_linearize),
+	IDPF_PORT_STAT("tx-busy_events", port_stats.tx_busy),
+	IDPF_PORT_STAT("rx-unicast_pkts", port_stats.vport_stats.rx_unicast),
+	IDPF_PORT_STAT("rx-multicast_pkts", port_stats.vport_stats.rx_multicast),
+	IDPF_PORT_STAT("rx-broadcast_pkts", port_stats.vport_stats.rx_broadcast),
+	IDPF_PORT_STAT("rx-unknown_protocol", port_stats.vport_stats.rx_unknown_protocol),
+	IDPF_PORT_STAT("tx-unicast_pkts", port_stats.vport_stats.tx_unicast),
+	IDPF_PORT_STAT("tx-multicast_pkts", port_stats.vport_stats.tx_multicast),
+	IDPF_PORT_STAT("tx-broadcast_pkts", port_stats.vport_stats.tx_broadcast),
+};
+
+#define IDPF_PORT_STATS_LEN ARRAY_SIZE(idpf_gstrings_port_stats)
+
+/**
+ * __idpf_add_qstat_strings - copy stat strings into ethtool buffer
+ * @p: ethtool supplied buffer
+ * @stats: stat definitions array
+ * @size: size of the stats array
+ * @type: stat type
+ * @idx: stat index
+ *
+ * Format and copy the strings described by stats into the buffer pointed at
+ * by p.
+ */
+static void __idpf_add_qstat_strings(u8 **p, const struct idpf_stats *stats,
+				     const unsigned int size, const char *type,
+				     unsigned int idx)
+{
+	unsigned int i;
+
+	for (i = 0; i < size; i++)
+		ethtool_sprintf(p, "%s_q-%u_%s",
+				type, idx, stats[i].stat_string);
+}
+
+/**
+ * idpf_add_qstat_strings - Copy queue stat strings into ethtool buffer
+ * @p: ethtool supplied buffer
+ * @stats: stat definitions array
+ * @type: stat type
+ * @idx: stat idx
+ *
+ * Format and copy the strings described by the const static stats value into
+ * the buffer pointed at by p.
+ *
+ * The parameter @stats is evaluated twice, so parameters with side effects
+ * should be avoided. Additionally, stats must be an array such that
+ * ARRAY_SIZE can be called on it.
+ */
+#define idpf_add_qstat_strings(p, stats, type, idx) \
+	__idpf_add_qstat_strings(p, stats, ARRAY_SIZE(stats), type, idx)
+
+/**
+ * idpf_add_stat_strings - Copy port stat strings into ethtool buffer
+ * @p: ethtool buffer
+ * @stats: struct to copy from
+ * @size: size of stats array to copy from
+ */
+static void idpf_add_stat_strings(u8 **p, const struct idpf_stats *stats,
+				  const unsigned int size)
+{
+	unsigned int i;
+
+	for (i = 0; i < size; i++)
+		ethtool_sprintf(p, "%s", stats[i].stat_string);
+}
+
+/**
+ * idpf_get_stat_strings - Get stat strings
+ * @netdev: network interface device structure
+ * @data: buffer for string data
+ *
+ * Builds the statistics string table
+ */
+static void idpf_get_stat_strings(struct net_device *netdev, u8 *data)
+{
+	struct idpf_netdev_priv *np = netdev_priv(netdev);
+	struct idpf_vport_config *vport_config;
+	unsigned int i;
+
+	idpf_add_stat_strings(&data, idpf_gstrings_port_stats,
+			      IDPF_PORT_STATS_LEN);
+
+	vport_config = np->adapter->vport_config[np->vport_idx];
+	/* It's critical that we always report a constant number of strings and
+	 * that the strings are reported in the same order regardless of how
+	 * many queues are actually in use.
+	 */
+	for (i = 0; i < vport_config->max_q.max_txq; i++)
+		idpf_add_qstat_strings(&data, idpf_gstrings_tx_queue_stats,
+				       "tx", i);
+
+	for (i = 0; i < vport_config->max_q.max_rxq; i++)
+		idpf_add_qstat_strings(&data, idpf_gstrings_rx_queue_stats,
+				       "rx", i);
+
+	page_pool_ethtool_stats_get_strings(data);
+}
+
+/**
+ * idpf_get_strings - Get string set
+ * @netdev: network interface device structure
+ * @sset: id of string set
+ * @data: buffer for string data
+ *
+ * Builds string tables for various string sets
+ */
+static void idpf_get_strings(struct net_device *netdev, u32 sset, u8 *data)
+{
+	switch (sset) {
+	case ETH_SS_STATS:
+		idpf_get_stat_strings(netdev, data);
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * idpf_get_sset_count - Get length of string set
+ * @netdev: network interface device structure
+ * @sset: id of string set
+ *
+ * Reports size of various string tables.
+ */
+static int idpf_get_sset_count(struct net_device *netdev, int sset)
+{
+	struct idpf_netdev_priv *np = netdev_priv(netdev);
+	struct idpf_vport_config *vport_config;
+	u16 max_txq, max_rxq;
+	unsigned int size;
+
+	if (sset != ETH_SS_STATS)
+		return -EINVAL;
+
+	vport_config = np->adapter->vport_config[np->vport_idx];
+	/* This size reported back here *must* be constant throughout the
+	 * lifecycle of the netdevice, i.e. we must report the maximum length
+	 * even for queues that don't technically exist.  This is due to the
+	 * fact that this userspace API uses three separate ioctl calls to get
+	 * stats data but has no way to communicate back to userspace when that
+	 * size has changed, which can typically happen as a result of changing
+	 * number of queues. If the number/order of stats change in the middle
+	 * of this call chain it will lead to userspace crashing/accessing bad
+	 * data through buffer under/overflow.
+	 */
+	max_txq = vport_config->max_q.max_txq;
+	max_rxq = vport_config->max_q.max_rxq;
+
+	size = IDPF_PORT_STATS_LEN + (IDPF_TX_QUEUE_STATS_LEN * max_txq) +
+	       (IDPF_RX_QUEUE_STATS_LEN * max_rxq);
+	size += page_pool_ethtool_stats_get_count();
+
+	return size;
+}
+
+/**
+ * idpf_add_one_ethtool_stat - copy the stat into the supplied buffer
+ * @data: location to store the stat value
+ * @pstat: old stat pointer to copy from
+ * @stat: the stat definition
+ *
+ * Copies the stat data defined by the pointer and stat structure pair into
+ * the memory supplied as data. If the pointer is null, data will be zero'd.
+ */
+static void idpf_add_one_ethtool_stat(u64 *data, void *pstat,
+				      const struct idpf_stats *stat)
+{
+	char *p;
+
+	if (!pstat) {
+		/* Ensure that the ethtool data buffer is zero'd for any stats
+		 * which don't have a valid pointer.
+		 */
+		*data = 0;
+		return;
+	}
+
+	p = (char *)pstat + stat->stat_offset;
+	switch (stat->sizeof_stat) {
+	case sizeof(u64):
+		*data = *((u64 *)p);
+		break;
+	case sizeof(u32):
+		*data = *((u32 *)p);
+		break;
+	case sizeof(u16):
+		*data = *((u16 *)p);
+		break;
+	case sizeof(u8):
+		*data = *((u8 *)p);
+		break;
+	default:
+		WARN_ONCE(1, "unexpected stat size for %s",
+			  stat->stat_string);
+		*data = 0;
+	}
+}
+
+/**
+ * idpf_add_queue_stats - copy queue statistics into supplied buffer
+ * @data: ethtool stats buffer
+ * @q: the queue to copy
+ *
+ * Queue statistics must be copied while protected by u64_stats_fetch_begin,
+ * so we can't directly use idpf_add_ethtool_stats. Assumes that queue stats
+ * are defined in idpf_gstrings_queue_stats. If the queue pointer is null,
+ * zero out the queue stat values and update the data pointer. Otherwise
+ * safely copy the stats from the queue into the supplied buffer and update
+ * the data pointer when finished.
+ *
+ * This function expects to be called while under rcu_read_lock().
+ */
+static void idpf_add_queue_stats(u64 **data, struct idpf_queue *q)
+{
+	const struct idpf_stats *stats;
+	unsigned int start;
+	unsigned int size;
+	unsigned int i;
+
+	if (q->q_type == VIRTCHNL2_QUEUE_TYPE_RX) {
+		size = IDPF_RX_QUEUE_STATS_LEN;
+		stats = idpf_gstrings_rx_queue_stats;
+	} else {
+		size = IDPF_TX_QUEUE_STATS_LEN;
+		stats = idpf_gstrings_tx_queue_stats;
+	}
+
+	/* To avoid invalid statistics values, ensure that we keep retrying
+	 * the copy until we get a consistent value according to
+	 * u64_stats_fetch_retry.
+	 */
+	do {
+		start = u64_stats_fetch_begin(&q->stats_sync);
+		for (i = 0; i < size; i++)
+			idpf_add_one_ethtool_stat(&(*data)[i], q, &stats[i]);
+	} while (u64_stats_fetch_retry(&q->stats_sync, start));
+
+	/* Once we successfully copy the stats in, update the data pointer */
+	*data += size;
+}
+
+/**
+ * idpf_add_empty_queue_stats - Add stats for a non-existent queue
+ * @data: pointer to data buffer
+ * @qtype: type of data queue
+ *
+ * We must report a constant length of stats back to userspace regardless of
+ * how many queues are actually in use because stats collection happens over
+ * three separate ioctls and there's no way to notify userspace the size
+ * changed between those calls. This adds empty to data to the stats since we
+ * don't have a real queue to refer to for this stats slot.
+ */
+static void idpf_add_empty_queue_stats(u64 **data, u16 qtype)
+{
+	unsigned int i;
+	int stats_len;
+
+	if (qtype == VIRTCHNL2_QUEUE_TYPE_RX)
+		stats_len = IDPF_RX_QUEUE_STATS_LEN;
+	else
+		stats_len = IDPF_TX_QUEUE_STATS_LEN;
+
+	for (i = 0; i < stats_len; i++)
+		(*data)[i] = 0;
+	*data += stats_len;
+}
+
+/**
+ * idpf_add_port_stats - Copy port stats into ethtool buffer
+ * @vport: virtual port struct
+ * @data: ethtool buffer to copy into
+ */
+static void idpf_add_port_stats(struct idpf_vport *vport, u64 **data)
+{
+	unsigned int size = IDPF_PORT_STATS_LEN;
+	unsigned int start;
+	unsigned int i;
+
+	do {
+		start = u64_stats_fetch_begin(&vport->port_stats.stats_sync);
+		for (i = 0; i < size; i++)
+			idpf_add_one_ethtool_stat(&(*data)[i], vport,
+						  &idpf_gstrings_port_stats[i]);
+	} while (u64_stats_fetch_retry(&vport->port_stats.stats_sync, start));
+
+	*data += size;
+}
+
+/**
+ * idpf_collect_queue_stats - accumulate various per queue stats
+ * into port level stats
+ * @vport: pointer to vport struct
+ **/
+static void idpf_collect_queue_stats(struct idpf_vport *vport)
+{
+	struct idpf_port_stats *pstats = &vport->port_stats;
+	int i, j;
+
+	/* zero out port stats since they're actually tracked in per
+	 * queue stats; this is only for reporting
+	 */
+	u64_stats_update_begin(&pstats->stats_sync);
+	u64_stats_set(&pstats->rx_hw_csum_err, 0);
+	u64_stats_set(&pstats->rx_hsplit, 0);
+	u64_stats_set(&pstats->rx_hsplit_hbo, 0);
+	u64_stats_set(&pstats->rx_bad_descs, 0);
+	u64_stats_set(&pstats->tx_linearize, 0);
+	u64_stats_set(&pstats->tx_busy, 0);
+	u64_stats_set(&pstats->tx_drops, 0);
+	u64_stats_set(&pstats->tx_dma_map_errs, 0);
+	u64_stats_update_end(&pstats->stats_sync);
+
+	for (i = 0; i < vport->num_rxq_grp; i++) {
+		struct idpf_rxq_group *rxq_grp = &vport->rxq_grps[i];
+		u16 num_rxq;
+
+		if (idpf_is_queue_model_split(vport->rxq_model))
+			num_rxq = rxq_grp->splitq.num_rxq_sets;
+		else
+			num_rxq = rxq_grp->singleq.num_rxq;
+
+		for (j = 0; j < num_rxq; j++) {
+			u64 hw_csum_err, hsplit, hsplit_hbo, bad_descs;
+			struct idpf_rx_queue_stats *stats;
+			struct idpf_queue *rxq;
+			unsigned int start;
+
+			if (idpf_is_queue_model_split(vport->rxq_model))
+				rxq = &rxq_grp->splitq.rxq_sets[j]->rxq;
+			else
+				rxq = rxq_grp->singleq.rxqs[j];
+
+			if (!rxq)
+				continue;
+
+			do {
+				start = u64_stats_fetch_begin(&rxq->stats_sync);
+
+				stats = &rxq->q_stats.rx;
+				hw_csum_err = u64_stats_read(&stats->hw_csum_err);
+				hsplit = u64_stats_read(&stats->hsplit_pkts);
+				hsplit_hbo = u64_stats_read(&stats->hsplit_buf_ovf);
+				bad_descs = u64_stats_read(&stats->bad_descs);
+			} while (u64_stats_fetch_retry(&rxq->stats_sync, start));
+
+			u64_stats_update_begin(&pstats->stats_sync);
+			u64_stats_add(&pstats->rx_hw_csum_err, hw_csum_err);
+			u64_stats_add(&pstats->rx_hsplit, hsplit);
+			u64_stats_add(&pstats->rx_hsplit_hbo, hsplit_hbo);
+			u64_stats_add(&pstats->rx_bad_descs, bad_descs);
+			u64_stats_update_end(&pstats->stats_sync);
+		}
+	}
+
+	for (i = 0; i < vport->num_txq_grp; i++) {
+		struct idpf_txq_group *txq_grp = &vport->txq_grps[i];
+
+		for (j = 0; j < txq_grp->num_txq; j++) {
+			u64 linearize, qbusy, skb_drops, dma_map_errs;
+			struct idpf_queue *txq = txq_grp->txqs[j];
+			struct idpf_tx_queue_stats *stats;
+			unsigned int start;
+
+			if (!txq)
+				continue;
+
+			do {
+				start = u64_stats_fetch_begin(&txq->stats_sync);
+
+				stats = &txq->q_stats.tx;
+				linearize = u64_stats_read(&stats->linearize);
+				qbusy = u64_stats_read(&stats->q_busy);
+				skb_drops = u64_stats_read(&stats->skb_drops);
+				dma_map_errs = u64_stats_read(&stats->dma_map_errs);
+			} while (u64_stats_fetch_retry(&txq->stats_sync, start));
+
+			u64_stats_update_begin(&pstats->stats_sync);
+			u64_stats_add(&pstats->tx_linearize, linearize);
+			u64_stats_add(&pstats->tx_busy, qbusy);
+			u64_stats_add(&pstats->tx_drops, skb_drops);
+			u64_stats_add(&pstats->tx_dma_map_errs, dma_map_errs);
+			u64_stats_update_end(&pstats->stats_sync);
+		}
+	}
+}
+
+/**
+ * idpf_get_ethtool_stats - report device statistics
+ * @netdev: network interface device structure
+ * @stats: ethtool statistics structure
+ * @data: pointer to data buffer
+ *
+ * All statistics are added to the data buffer as an array of u64.
+ */
+static void idpf_get_ethtool_stats(struct net_device *netdev,
+				   struct ethtool_stats __always_unused *stats,
+				   u64 *data)
+{
+	struct idpf_netdev_priv *np = netdev_priv(netdev);
+	struct idpf_vport_config *vport_config;
+	struct page_pool_stats pp_stats = { };
+	struct idpf_vport *vport;
+	unsigned int total = 0;
+	unsigned int i, j;
+	bool is_splitq;
+	u16 qtype;
+
+	idpf_vport_ctrl_lock(netdev);
+	vport = idpf_netdev_to_vport(netdev);
+
+	if (np->state != __IDPF_VPORT_UP) {
+		idpf_vport_ctrl_unlock(netdev);
+
+		return;
+	}
+
+	rcu_read_lock();
+
+	idpf_collect_queue_stats(vport);
+	idpf_add_port_stats(vport, &data);
+
+	for (i = 0; i < vport->num_txq_grp; i++) {
+		struct idpf_txq_group *txq_grp = &vport->txq_grps[i];
+
+		qtype = VIRTCHNL2_QUEUE_TYPE_TX;
+
+		for (j = 0; j < txq_grp->num_txq; j++, total++) {
+			struct idpf_queue *txq = txq_grp->txqs[j];
+
+			if (!txq)
+				idpf_add_empty_queue_stats(&data, qtype);
+			else
+				idpf_add_queue_stats(&data, txq);
+		}
+	}
+
+	vport_config = vport->adapter->vport_config[vport->idx];
+	/* It is critical we provide a constant number of stats back to
+	 * userspace regardless of how many queues are actually in use because
+	 * there is no way to inform userspace the size has changed between
+	 * ioctl calls. This will fill in any missing stats with zero.
+	 */
+	for (; total < vport_config->max_q.max_txq; total++)
+		idpf_add_empty_queue_stats(&data, VIRTCHNL2_QUEUE_TYPE_TX);
+	total = 0;
+
+	is_splitq = idpf_is_queue_model_split(vport->rxq_model);
+
+	for (i = 0; i < vport->num_rxq_grp; i++) {
+		struct idpf_rxq_group *rxq_grp = &vport->rxq_grps[i];
+		u16 num_rxq;
+
+		qtype = VIRTCHNL2_QUEUE_TYPE_RX;
+
+		if (is_splitq)
+			num_rxq = rxq_grp->splitq.num_rxq_sets;
+		else
+			num_rxq = rxq_grp->singleq.num_rxq;
+
+		for (j = 0; j < num_rxq; j++, total++) {
+			struct idpf_queue *rxq;
+
+			if (is_splitq)
+				rxq = &rxq_grp->splitq.rxq_sets[j]->rxq;
+			else
+				rxq = rxq_grp->singleq.rxqs[j];
+			if (!rxq)
+				idpf_add_empty_queue_stats(&data, qtype);
+			else
+				idpf_add_queue_stats(&data, rxq);
+
+			/* In splitq mode, don't get page pool stats here since
+			 * the pools are attached to the buffer queues
+			 */
+			if (is_splitq)
+				continue;
+
+			if (rxq)
+				page_pool_get_stats(rxq->pp, &pp_stats);
+		}
+	}
+
+	for (i = 0; i < vport->num_rxq_grp; i++) {
+		for (j = 0; j < vport->num_bufqs_per_qgrp; j++) {
+			struct idpf_queue *rxbufq =
+				&vport->rxq_grps[i].splitq.bufq_sets[j].bufq;
+
+			page_pool_get_stats(rxbufq->pp, &pp_stats);
+		}
+	}
+
+	for (; total < vport_config->max_q.max_rxq; total++)
+		idpf_add_empty_queue_stats(&data, VIRTCHNL2_QUEUE_TYPE_RX);
+
+	page_pool_ethtool_stats_get(data, &pp_stats);
+
+	rcu_read_unlock();
+
+	idpf_vport_ctrl_unlock(netdev);
+}
+
+/**
+ * idpf_find_rxq - find rxq from q index
+ * @vport: virtual port associated to queue
+ * @q_num: q index used to find queue
+ *
+ * returns pointer to rx queue
+ */
+static struct idpf_queue *idpf_find_rxq(struct idpf_vport *vport, int q_num)
+{
+	int q_grp, q_idx;
+
+	if (!idpf_is_queue_model_split(vport->rxq_model))
+		return vport->rxq_grps->singleq.rxqs[q_num];
+
+	q_grp = q_num / IDPF_DFLT_SPLITQ_RXQ_PER_GROUP;
+	q_idx = q_num % IDPF_DFLT_SPLITQ_RXQ_PER_GROUP;
+
+	return &vport->rxq_grps[q_grp].splitq.rxq_sets[q_idx]->rxq;
+}
+
+/**
+ * idpf_find_txq - find txq from q index
+ * @vport: virtual port associated to queue
+ * @q_num: q index used to find queue
+ *
+ * returns pointer to tx queue
+ */
+static struct idpf_queue *idpf_find_txq(struct idpf_vport *vport, int q_num)
+{
+	int q_grp;
+
+	if (!idpf_is_queue_model_split(vport->txq_model))
+		return vport->txqs[q_num];
+
+	q_grp = q_num / IDPF_DFLT_SPLITQ_TXQ_PER_GROUP;
+
+	return vport->txq_grps[q_grp].complq;
+}
+
+/**
+ * __idpf_get_q_coalesce - get ITR values for specific queue
+ * @ec: ethtool structure to fill with driver's coalesce settings
+ * @q: quuee of Rx or Tx
+ */
+static void __idpf_get_q_coalesce(struct ethtool_coalesce *ec,
+				  struct idpf_queue *q)
+{
+	if (q->q_type == VIRTCHNL2_QUEUE_TYPE_RX) {
+		ec->use_adaptive_rx_coalesce =
+				IDPF_ITR_IS_DYNAMIC(q->q_vector->rx_intr_mode);
+		ec->rx_coalesce_usecs = q->q_vector->rx_itr_value;
+	} else {
+		ec->use_adaptive_tx_coalesce =
+				IDPF_ITR_IS_DYNAMIC(q->q_vector->tx_intr_mode);
+		ec->tx_coalesce_usecs = q->q_vector->tx_itr_value;
+	}
+}
+
+/**
+ * idpf_get_q_coalesce - get ITR values for specific queue
+ * @netdev: pointer to the netdev associated with this query
+ * @ec: coalesce settings to program the device with
+ * @q_num: update ITR/INTRL (coalesce) settings for this queue number/index
+ *
+ * Return 0 on success, and negative on failure
+ */
+static int idpf_get_q_coalesce(struct net_device *netdev,
+			       struct ethtool_coalesce *ec,
+			       u32 q_num)
+{
+	struct idpf_netdev_priv *np = netdev_priv(netdev);
+	struct idpf_vport *vport;
+	int err = 0;
+
+	idpf_vport_ctrl_lock(netdev);
+	vport = idpf_netdev_to_vport(netdev);
+
+	if (np->state != __IDPF_VPORT_UP)
+		goto unlock_mutex;
+
+	if (q_num >= vport->num_rxq && q_num >= vport->num_txq) {
+		err = -EINVAL;
+		goto unlock_mutex;
+	}
+
+	if (q_num < vport->num_rxq)
+		__idpf_get_q_coalesce(ec, idpf_find_rxq(vport, q_num));
+
+	if (q_num < vport->num_txq)
+		__idpf_get_q_coalesce(ec, idpf_find_txq(vport, q_num));
+
+unlock_mutex:
+	idpf_vport_ctrl_unlock(netdev);
+
+	return err;
+}
+
+/**
+ * idpf_get_coalesce - get ITR values as requested by user
+ * @netdev: pointer to the netdev associated with this query
+ * @ec: coalesce settings to be filled
+ * @kec: unused
+ * @extack: unused
+ *
+ * Return 0 on success, and negative on failure
+ */
+static int idpf_get_coalesce(struct net_device *netdev,
+			     struct ethtool_coalesce *ec,
+			     struct kernel_ethtool_coalesce *kec,
+			     struct netlink_ext_ack *extack)
+{
+	/* Return coalesce based on queue number zero */
+	return idpf_get_q_coalesce(netdev, ec, 0);
+}
+
+/**
+ * idpf_get_per_q_coalesce - get ITR values as requested by user
+ * @netdev: pointer to the netdev associated with this query
+ * @q_num: queue for which the itr values has to retrieved
+ * @ec: coalesce settings to be filled
+ *
+ * Return 0 on success, and negative on failure
+ */
+
+static int idpf_get_per_q_coalesce(struct net_device *netdev, u32 q_num,
+				   struct ethtool_coalesce *ec)
+{
+	return idpf_get_q_coalesce(netdev, ec, q_num);
+}
+
+/**
+ * __idpf_set_q_coalesce - set ITR values for specific queue
+ * @ec: ethtool structure from user to update ITR settings
+ * @q: queue for which itr values has to be set
+ * @is_rxq: is queue type rx
+ *
+ * Returns 0 on success, negative otherwise.
+ */
+static int __idpf_set_q_coalesce(struct ethtool_coalesce *ec,
+				 struct idpf_queue *q, bool is_rxq)
+{
+	u32 use_adaptive_coalesce, coalesce_usecs;
+	struct idpf_q_vector *qv = q->q_vector;
+	bool is_dim_ena = false;
+	u16 itr_val;
+
+	if (is_rxq) {
+		is_dim_ena = IDPF_ITR_IS_DYNAMIC(qv->rx_intr_mode);
+		use_adaptive_coalesce = ec->use_adaptive_rx_coalesce;
+		coalesce_usecs = ec->rx_coalesce_usecs;
+		itr_val = qv->rx_itr_value;
+	} else {
+		is_dim_ena = IDPF_ITR_IS_DYNAMIC(qv->tx_intr_mode);
+		use_adaptive_coalesce = ec->use_adaptive_tx_coalesce;
+		coalesce_usecs = ec->tx_coalesce_usecs;
+		itr_val = qv->tx_itr_value;
+	}
+	if (coalesce_usecs != itr_val && use_adaptive_coalesce) {
+		netdev_err(q->vport->netdev, "Cannot set coalesce usecs if adaptive enabled\n");
+
+		return -EINVAL;
+	}
+
+	if (is_dim_ena && use_adaptive_coalesce)
+		return 0;
+
+	if (coalesce_usecs > IDPF_ITR_MAX) {
+		netdev_err(q->vport->netdev,
+			   "Invalid value, %d-usecs range is 0-%d\n",
+			   coalesce_usecs, IDPF_ITR_MAX);
+
+		return -EINVAL;
+	}
+
+	if (coalesce_usecs % 2) {
+		coalesce_usecs--;
+		netdev_info(q->vport->netdev,
+			    "HW only supports even ITR values, ITR rounded to %d\n",
+			    coalesce_usecs);
+	}
+
+	if (is_rxq) {
+		qv->rx_itr_value = coalesce_usecs;
+		if (use_adaptive_coalesce) {
+			qv->rx_intr_mode = IDPF_ITR_DYNAMIC;
+		} else {
+			qv->rx_intr_mode = !IDPF_ITR_DYNAMIC;
+			idpf_vport_intr_write_itr(qv, qv->rx_itr_value,
+						  false);
+		}
+	} else {
+		qv->tx_itr_value = coalesce_usecs;
+		if (use_adaptive_coalesce) {
+			qv->tx_intr_mode = IDPF_ITR_DYNAMIC;
+		} else {
+			qv->tx_intr_mode = !IDPF_ITR_DYNAMIC;
+			idpf_vport_intr_write_itr(qv, qv->tx_itr_value, true);
+		}
+	}
+
+	/* Update of static/dynamic itr will be taken care when interrupt is
+	 * fired
+	 */
+	return 0;
+}
+
+/**
+ * idpf_set_q_coalesce - set ITR values for specific queue
+ * @vport: vport associated to the queue that need updating
+ * @ec: coalesce settings to program the device with
+ * @q_num: update ITR/INTRL (coalesce) settings for this queue number/index
+ * @is_rxq: is queue type rx
+ *
+ * Return 0 on success, and negative on failure
+ */
+static int idpf_set_q_coalesce(struct idpf_vport *vport,
+			       struct ethtool_coalesce *ec,
+			       int q_num, bool is_rxq)
+{
+	struct idpf_queue *q;
+
+	q = is_rxq ? idpf_find_rxq(vport, q_num) : idpf_find_txq(vport, q_num);
+
+	if (q && __idpf_set_q_coalesce(ec, q, is_rxq))
+		return -EINVAL;
+
+	return 0;
+}
+
+/**
+ * idpf_set_coalesce - set ITR values as requested by user
+ * @netdev: pointer to the netdev associated with this query
+ * @ec: coalesce settings to program the device with
+ * @kec: unused
+ * @extack: unused
+ *
+ * Return 0 on success, and negative on failure
+ */
+static int idpf_set_coalesce(struct net_device *netdev,
+			     struct ethtool_coalesce *ec,
+			     struct kernel_ethtool_coalesce *kec,
+			     struct netlink_ext_ack *extack)
+{
+	struct idpf_netdev_priv *np = netdev_priv(netdev);
+	struct idpf_vport *vport;
+	int i, err = 0;
+
+	idpf_vport_ctrl_lock(netdev);
+	vport = idpf_netdev_to_vport(netdev);
+
+	if (np->state != __IDPF_VPORT_UP)
+		goto unlock_mutex;
+
+	for (i = 0; i < vport->num_txq; i++) {
+		err = idpf_set_q_coalesce(vport, ec, i, false);
+		if (err)
+			goto unlock_mutex;
+	}
+
+	for (i = 0; i < vport->num_rxq; i++) {
+		err = idpf_set_q_coalesce(vport, ec, i, true);
+		if (err)
+			goto unlock_mutex;
+	}
+
+unlock_mutex:
+	idpf_vport_ctrl_unlock(netdev);
+
+	return err;
+}
+
+/**
+ * idpf_set_per_q_coalesce - set ITR values as requested by user
+ * @netdev: pointer to the netdev associated with this query
+ * @q_num: queue for which the itr values has to be set
+ * @ec: coalesce settings to program the device with
+ *
+ * Return 0 on success, and negative on failure
+ */
+static int idpf_set_per_q_coalesce(struct net_device *netdev, u32 q_num,
+				   struct ethtool_coalesce *ec)
+{
+	struct idpf_vport *vport;
+	int err;
+
+	idpf_vport_ctrl_lock(netdev);
+	vport = idpf_netdev_to_vport(netdev);
+
+	err = idpf_set_q_coalesce(vport, ec, q_num, false);
+	if (err) {
+		idpf_vport_ctrl_unlock(netdev);
+
+		return err;
+	}
+
+	err = idpf_set_q_coalesce(vport, ec, q_num, true);
+
+	idpf_vport_ctrl_unlock(netdev);
+
+	return err;
+}
+
+/**
+ * idpf_get_msglevel - Get debug message level
+ * @netdev: network interface device structure
+ *
+ * Returns current debug message level.
+ */
+static u32 idpf_get_msglevel(struct net_device *netdev)
+{
+	struct idpf_adapter *adapter = idpf_netdev_to_adapter(netdev);
+
+	return adapter->msg_enable;
+}
+
+/**
+ * idpf_set_msglevel - Set debug message level
+ * @netdev: network interface device structure
+ * @data: message level
+ *
+ * Set current debug message level. Higher values cause the driver to
+ * be noisier.
+ */
+static void idpf_set_msglevel(struct net_device *netdev, u32 data)
+{
+	struct idpf_adapter *adapter = idpf_netdev_to_adapter(netdev);
+
+	adapter->msg_enable = data;
+}
+
+/**
+ * idpf_get_link_ksettings - Get Link Speed and Duplex settings
+ * @netdev: network interface device structure
+ * @cmd: ethtool command
+ *
+ * Reports speed/duplex settings.
+ **/
+static int idpf_get_link_ksettings(struct net_device *netdev,
+				   struct ethtool_link_ksettings *cmd)
+{
+	struct idpf_vport *vport;
+
+	idpf_vport_ctrl_lock(netdev);
+	vport = idpf_netdev_to_vport(netdev);
+
+	ethtool_link_ksettings_zero_link_mode(cmd, supported);
+	cmd->base.autoneg = AUTONEG_DISABLE;
+	cmd->base.port = PORT_NONE;
+	if (vport->link_up) {
+		cmd->base.duplex = DUPLEX_FULL;
+		cmd->base.speed = vport->link_speed_mbps;
+	} else {
+		cmd->base.duplex = DUPLEX_UNKNOWN;
+		cmd->base.speed = SPEED_UNKNOWN;
+	}
+
+	idpf_vport_ctrl_unlock(netdev);
+
+	return 0;
+}
+
+static const struct ethtool_ops idpf_ethtool_ops = {
+	.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
+				     ETHTOOL_COALESCE_USE_ADAPTIVE,
+	.get_msglevel		= idpf_get_msglevel,
+	.set_msglevel		= idpf_set_msglevel,
+	.get_link		= ethtool_op_get_link,
+	.get_coalesce		= idpf_get_coalesce,
+	.set_coalesce		= idpf_set_coalesce,
+	.get_per_queue_coalesce = idpf_get_per_q_coalesce,
+	.set_per_queue_coalesce = idpf_set_per_q_coalesce,
+	.get_ethtool_stats	= idpf_get_ethtool_stats,
+	.get_strings		= idpf_get_strings,
+	.get_sset_count		= idpf_get_sset_count,
+	.get_channels		= idpf_get_channels,
+	.get_rxnfc		= idpf_get_rxnfc,
+	.get_rxfh_key_size	= idpf_get_rxfh_key_size,
+	.get_rxfh_indir_size	= idpf_get_rxfh_indir_size,
+	.get_rxfh		= idpf_get_rxfh,
+	.set_rxfh		= idpf_set_rxfh,
+	.set_channels		= idpf_set_channels,
+	.get_ringparam		= idpf_get_ringparam,
+	.set_ringparam		= idpf_set_ringparam,
+	.get_link_ksettings	= idpf_get_link_ksettings,
+};
+
+/**
+ * idpf_set_ethtool_ops - Initialize ethtool ops struct
+ * @netdev: network interface device structure
+ *
+ * Sets ethtool ops struct in our netdev so that ethtool can call
+ * our functions.
+ */
+void idpf_set_ethtool_ops(struct net_device *netdev)
+{
+	netdev->ethtool_ops = &idpf_ethtool_ops;
+}
diff --git a/drivers/net/ethernet/intel/idpf/idpf_lan_pf_regs.h b/drivers/net/ethernet/intel/idpf/idpf_lan_pf_regs.h
new file mode 100644
index 000000000000..24edb8a6ec2e
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/idpf_lan_pf_regs.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2023 Intel Corporation */
+
+#ifndef _IDPF_LAN_PF_REGS_H_
+#define _IDPF_LAN_PF_REGS_H_
+
+/* Receive queues */
+#define PF_QRX_BASE			0x00000000
+#define PF_QRX_TAIL(_QRX)		(PF_QRX_BASE + (((_QRX) * 0x1000)))
+#define PF_QRX_BUFFQ_BASE		0x03000000
+#define PF_QRX_BUFFQ_TAIL(_QRX)		(PF_QRX_BUFFQ_BASE + (((_QRX) * 0x1000)))
+
+/* Transmit queues */
+#define PF_QTX_BASE			0x05000000
+#define PF_QTX_COMM_DBELL(_DBQM)	(PF_QTX_BASE + ((_DBQM) * 0x1000))
+
+/* Control(PF Mailbox) Queue */
+#define PF_FW_BASE			0x08400000
+
+#define PF_FW_ARQBAL			(PF_FW_BASE)
+#define PF_FW_ARQBAH			(PF_FW_BASE + 0x4)
+#define PF_FW_ARQLEN			(PF_FW_BASE + 0x8)
+#define PF_FW_ARQLEN_ARQLEN_S		0
+#define PF_FW_ARQLEN_ARQLEN_M		GENMASK(12, 0)
+#define PF_FW_ARQLEN_ARQVFE_S		28
+#define PF_FW_ARQLEN_ARQVFE_M		BIT(PF_FW_ARQLEN_ARQVFE_S)
+#define PF_FW_ARQLEN_ARQOVFL_S		29
+#define PF_FW_ARQLEN_ARQOVFL_M		BIT(PF_FW_ARQLEN_ARQOVFL_S)
+#define PF_FW_ARQLEN_ARQCRIT_S		30
+#define PF_FW_ARQLEN_ARQCRIT_M		BIT(PF_FW_ARQLEN_ARQCRIT_S)
+#define PF_FW_ARQLEN_ARQENABLE_S	31
+#define PF_FW_ARQLEN_ARQENABLE_M	BIT(PF_FW_ARQLEN_ARQENABLE_S)
+#define PF_FW_ARQH			(PF_FW_BASE + 0xC)
+#define PF_FW_ARQH_ARQH_S		0
+#define PF_FW_ARQH_ARQH_M		GENMASK(12, 0)
+#define PF_FW_ARQT			(PF_FW_BASE + 0x10)
+
+#define PF_FW_ATQBAL			(PF_FW_BASE + 0x14)
+#define PF_FW_ATQBAH			(PF_FW_BASE + 0x18)
+#define PF_FW_ATQLEN			(PF_FW_BASE + 0x1C)
+#define PF_FW_ATQLEN_ATQLEN_S		0
+#define PF_FW_ATQLEN_ATQLEN_M		GENMASK(9, 0)
+#define PF_FW_ATQLEN_ATQVFE_S		28
+#define PF_FW_ATQLEN_ATQVFE_M		BIT(PF_FW_ATQLEN_ATQVFE_S)
+#define PF_FW_ATQLEN_ATQOVFL_S		29
+#define PF_FW_ATQLEN_ATQOVFL_M		BIT(PF_FW_ATQLEN_ATQOVFL_S)
+#define PF_FW_ATQLEN_ATQCRIT_S		30
+#define PF_FW_ATQLEN_ATQCRIT_M		BIT(PF_FW_ATQLEN_ATQCRIT_S)
+#define PF_FW_ATQLEN_ATQENABLE_S	31
+#define PF_FW_ATQLEN_ATQENABLE_M	BIT(PF_FW_ATQLEN_ATQENABLE_S)
+#define PF_FW_ATQH			(PF_FW_BASE + 0x20)
+#define PF_FW_ATQH_ATQH_S		0
+#define PF_FW_ATQH_ATQH_M		GENMASK(9, 0)
+#define PF_FW_ATQT			(PF_FW_BASE + 0x24)
+
+/* Interrupts */
+#define PF_GLINT_BASE			0x08900000
+#define PF_GLINT_DYN_CTL(_INT)		(PF_GLINT_BASE + ((_INT) * 0x1000))
+#define PF_GLINT_DYN_CTL_INTENA_S	0
+#define PF_GLINT_DYN_CTL_INTENA_M	BIT(PF_GLINT_DYN_CTL_INTENA_S)
+#define PF_GLINT_DYN_CTL_CLEARPBA_S	1
+#define PF_GLINT_DYN_CTL_CLEARPBA_M	BIT(PF_GLINT_DYN_CTL_CLEARPBA_S)
+#define PF_GLINT_DYN_CTL_SWINT_TRIG_S	2
+#define PF_GLINT_DYN_CTL_SWINT_TRIG_M	BIT(PF_GLINT_DYN_CTL_SWINT_TRIG_S)
+#define PF_GLINT_DYN_CTL_ITR_INDX_S	3
+#define PF_GLINT_DYN_CTL_ITR_INDX_M	GENMASK(4, 3)
+#define PF_GLINT_DYN_CTL_INTERVAL_S	5
+#define PF_GLINT_DYN_CTL_INTERVAL_M	BIT(PF_GLINT_DYN_CTL_INTERVAL_S)
+#define PF_GLINT_DYN_CTL_SW_ITR_INDX_ENA_S	24
+#define PF_GLINT_DYN_CTL_SW_ITR_INDX_ENA_M BIT(PF_GLINT_DYN_CTL_SW_ITR_INDX_ENA_S)
+#define PF_GLINT_DYN_CTL_SW_ITR_INDX_S	25
+#define PF_GLINT_DYN_CTL_SW_ITR_INDX_M	BIT(PF_GLINT_DYN_CTL_SW_ITR_INDX_S)
+#define PF_GLINT_DYN_CTL_WB_ON_ITR_S	30
+#define PF_GLINT_DYN_CTL_WB_ON_ITR_M	BIT(PF_GLINT_DYN_CTL_WB_ON_ITR_S)
+#define PF_GLINT_DYN_CTL_INTENA_MSK_S	31
+#define PF_GLINT_DYN_CTL_INTENA_MSK_M	BIT(PF_GLINT_DYN_CTL_INTENA_MSK_S)
+/* _ITR is ITR index, _INT is interrupt index, _itrn_indx_spacing is
+ * spacing b/w itrn registers of the same vector.
+ */
+#define PF_GLINT_ITR_ADDR(_ITR, _reg_start, _itrn_indx_spacing)	\
+	((_reg_start) + ((_ITR) * (_itrn_indx_spacing)))
+/* For PF, itrn_indx_spacing is 4 and itrn_reg_spacing is 0x1000 */
+#define PF_GLINT_ITR(_ITR, _INT)	\
+	(PF_GLINT_BASE + (((_ITR) + 1) * 4) + ((_INT) * 0x1000))
+#define PF_GLINT_ITR_MAX_INDEX		2
+#define PF_GLINT_ITR_INTERVAL_S		0
+#define PF_GLINT_ITR_INTERVAL_M		GENMASK(11, 0)
+
+/* Generic registers */
+#define PF_INT_DIR_OICR_ENA		0x08406000
+#define PF_INT_DIR_OICR_ENA_S		0
+#define PF_INT_DIR_OICR_ENA_M		GENMASK(31, 0)
+#define PF_INT_DIR_OICR			0x08406004
+#define PF_INT_DIR_OICR_TSYN_EVNT	0
+#define PF_INT_DIR_OICR_PHY_TS_0	BIT(1)
+#define PF_INT_DIR_OICR_PHY_TS_1	BIT(2)
+#define PF_INT_DIR_OICR_CAUSE		0x08406008
+#define PF_INT_DIR_OICR_CAUSE_CAUSE_S	0
+#define PF_INT_DIR_OICR_CAUSE_CAUSE_M	GENMASK(31, 0)
+#define PF_INT_PBA_CLEAR		0x0840600C
+
+#define PF_FUNC_RID			0x08406010
+#define PF_FUNC_RID_FUNCTION_NUMBER_S	0
+#define PF_FUNC_RID_FUNCTION_NUMBER_M	GENMASK(2, 0)
+#define PF_FUNC_RID_DEVICE_NUMBER_S	3
+#define PF_FUNC_RID_DEVICE_NUMBER_M	GENMASK(7, 3)
+#define PF_FUNC_RID_BUS_NUMBER_S	8
+#define PF_FUNC_RID_BUS_NUMBER_M	GENMASK(15, 8)
+
+/* Reset registers */
+#define PFGEN_RTRIG			0x08407000
+#define PFGEN_RTRIG_CORER_S		0
+#define PFGEN_RTRIG_CORER_M		BIT(0)
+#define PFGEN_RTRIG_LINKR_S		1
+#define PFGEN_RTRIG_LINKR_M		BIT(1)
+#define PFGEN_RTRIG_IMCR_S		2
+#define PFGEN_RTRIG_IMCR_M		BIT(2)
+#define PFGEN_RSTAT			0x08407008 /* PFR Status */
+#define PFGEN_RSTAT_PFR_STATE_S		0
+#define PFGEN_RSTAT_PFR_STATE_M		GENMASK(1, 0)
+#define PFGEN_CTRL			0x0840700C
+#define PFGEN_CTRL_PFSWR		BIT(0)
+
+#endif
diff --git a/drivers/net/ethernet/intel/idpf/idpf_lan_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_lan_txrx.h
new file mode 100644
index 000000000000..a5752dcab888
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/idpf_lan_txrx.h
@@ -0,0 +1,293 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2023 Intel Corporation */
+
+#ifndef _IDPF_LAN_TXRX_H_
+#define _IDPF_LAN_TXRX_H_
+
+enum idpf_rss_hash {
+	IDPF_HASH_INVALID			= 0,
+	/* Values 1 - 28 are reserved for future use */
+	IDPF_HASH_NONF_UNICAST_IPV4_UDP		= 29,
+	IDPF_HASH_NONF_MULTICAST_IPV4_UDP,
+	IDPF_HASH_NONF_IPV4_UDP,
+	IDPF_HASH_NONF_IPV4_TCP_SYN_NO_ACK,
+	IDPF_HASH_NONF_IPV4_TCP,
+	IDPF_HASH_NONF_IPV4_SCTP,
+	IDPF_HASH_NONF_IPV4_OTHER,
+	IDPF_HASH_FRAG_IPV4,
+	/* Values 37-38 are reserved */
+	IDPF_HASH_NONF_UNICAST_IPV6_UDP		= 39,
+	IDPF_HASH_NONF_MULTICAST_IPV6_UDP,
+	IDPF_HASH_NONF_IPV6_UDP,
+	IDPF_HASH_NONF_IPV6_TCP_SYN_NO_ACK,
+	IDPF_HASH_NONF_IPV6_TCP,
+	IDPF_HASH_NONF_IPV6_SCTP,
+	IDPF_HASH_NONF_IPV6_OTHER,
+	IDPF_HASH_FRAG_IPV6,
+	IDPF_HASH_NONF_RSVD47,
+	IDPF_HASH_NONF_FCOE_OX,
+	IDPF_HASH_NONF_FCOE_RX,
+	IDPF_HASH_NONF_FCOE_OTHER,
+	/* Values 51-62 are reserved */
+	IDPF_HASH_L2_PAYLOAD			= 63,
+
+	IDPF_HASH_MAX
+};
+
+/* Supported RSS offloads */
+#define IDPF_DEFAULT_RSS_HASH			\
+	(BIT_ULL(IDPF_HASH_NONF_IPV4_UDP) |	\
+	BIT_ULL(IDPF_HASH_NONF_IPV4_SCTP) |	\
+	BIT_ULL(IDPF_HASH_NONF_IPV4_TCP) |	\
+	BIT_ULL(IDPF_HASH_NONF_IPV4_OTHER) |	\
+	BIT_ULL(IDPF_HASH_FRAG_IPV4) |		\
+	BIT_ULL(IDPF_HASH_NONF_IPV6_UDP) |	\
+	BIT_ULL(IDPF_HASH_NONF_IPV6_TCP) |	\
+	BIT_ULL(IDPF_HASH_NONF_IPV6_SCTP) |	\
+	BIT_ULL(IDPF_HASH_NONF_IPV6_OTHER) |	\
+	BIT_ULL(IDPF_HASH_FRAG_IPV6) |		\
+	BIT_ULL(IDPF_HASH_L2_PAYLOAD))
+
+#define IDPF_DEFAULT_RSS_HASH_EXPANDED (IDPF_DEFAULT_RSS_HASH | \
+	BIT_ULL(IDPF_HASH_NONF_IPV4_TCP_SYN_NO_ACK) |		\
+	BIT_ULL(IDPF_HASH_NONF_UNICAST_IPV4_UDP) |		\
+	BIT_ULL(IDPF_HASH_NONF_MULTICAST_IPV4_UDP) |		\
+	BIT_ULL(IDPF_HASH_NONF_IPV6_TCP_SYN_NO_ACK) |		\
+	BIT_ULL(IDPF_HASH_NONF_UNICAST_IPV6_UDP) |		\
+	BIT_ULL(IDPF_HASH_NONF_MULTICAST_IPV6_UDP))
+
+/* For idpf_splitq_base_tx_compl_desc */
+#define IDPF_TXD_COMPLQ_GEN_S		15
+#define IDPF_TXD_COMPLQ_GEN_M		BIT_ULL(IDPF_TXD_COMPLQ_GEN_S)
+#define IDPF_TXD_COMPLQ_COMPL_TYPE_S	11
+#define IDPF_TXD_COMPLQ_COMPL_TYPE_M	GENMASK_ULL(13, 11)
+#define IDPF_TXD_COMPLQ_QID_S		0
+#define IDPF_TXD_COMPLQ_QID_M		GENMASK_ULL(9, 0)
+
+/* For base mode TX descriptors */
+
+#define IDPF_TXD_CTX_QW0_TUNN_L4T_CS_S	23
+#define IDPF_TXD_CTX_QW0_TUNN_L4T_CS_M	BIT_ULL(IDPF_TXD_CTX_QW0_TUNN_L4T_CS_S)
+#define IDPF_TXD_CTX_QW0_TUNN_DECTTL_S	19
+#define IDPF_TXD_CTX_QW0_TUNN_DECTTL_M	\
+	(0xFULL << IDPF_TXD_CTX_QW0_TUNN_DECTTL_S)
+#define IDPF_TXD_CTX_QW0_TUNN_NATLEN_S	12
+#define IDPF_TXD_CTX_QW0_TUNN_NATLEN_M	\
+	(0X7FULL << IDPF_TXD_CTX_QW0_TUNN_NATLEN_S)
+#define IDPF_TXD_CTX_QW0_TUNN_EIP_NOINC_S	11
+#define IDPF_TXD_CTX_QW0_TUNN_EIP_NOINC_M    \
+	BIT_ULL(IDPF_TXD_CTX_QW0_TUNN_EIP_NOINC_S)
+#define IDPF_TXD_CTX_EIP_NOINC_IPID_CONST	\
+	IDPF_TXD_CTX_QW0_TUNN_EIP_NOINC_M
+#define IDPF_TXD_CTX_QW0_TUNN_NATT_S	        9
+#define IDPF_TXD_CTX_QW0_TUNN_NATT_M	(0x3ULL << IDPF_TXD_CTX_QW0_TUNN_NATT_S)
+#define IDPF_TXD_CTX_UDP_TUNNELING	BIT_ULL(IDPF_TXD_CTX_QW0_TUNN_NATT_S)
+#define IDPF_TXD_CTX_GRE_TUNNELING	(0x2ULL << IDPF_TXD_CTX_QW0_TUNN_NATT_S)
+#define IDPF_TXD_CTX_QW0_TUNN_EXT_IPLEN_S	2
+#define IDPF_TXD_CTX_QW0_TUNN_EXT_IPLEN_M	\
+	(0x3FULL << IDPF_TXD_CTX_QW0_TUNN_EXT_IPLEN_S)
+#define IDPF_TXD_CTX_QW0_TUNN_EXT_IP_S	0
+#define IDPF_TXD_CTX_QW0_TUNN_EXT_IP_M	\
+	(0x3ULL << IDPF_TXD_CTX_QW0_TUNN_EXT_IP_S)
+
+#define IDPF_TXD_CTX_QW1_MSS_S		50
+#define IDPF_TXD_CTX_QW1_MSS_M		GENMASK_ULL(63, 50)
+#define IDPF_TXD_CTX_QW1_TSO_LEN_S	30
+#define IDPF_TXD_CTX_QW1_TSO_LEN_M	GENMASK_ULL(47, 30)
+#define IDPF_TXD_CTX_QW1_CMD_S		4
+#define IDPF_TXD_CTX_QW1_CMD_M		GENMASK_ULL(15, 4)
+#define IDPF_TXD_CTX_QW1_DTYPE_S	0
+#define IDPF_TXD_CTX_QW1_DTYPE_M	GENMASK_ULL(3, 0)
+#define IDPF_TXD_QW1_L2TAG1_S		48
+#define IDPF_TXD_QW1_L2TAG1_M		GENMASK_ULL(63, 48)
+#define IDPF_TXD_QW1_TX_BUF_SZ_S	34
+#define IDPF_TXD_QW1_TX_BUF_SZ_M	GENMASK_ULL(47, 34)
+#define IDPF_TXD_QW1_OFFSET_S		16
+#define IDPF_TXD_QW1_OFFSET_M		GENMASK_ULL(33, 16)
+#define IDPF_TXD_QW1_CMD_S		4
+#define IDPF_TXD_QW1_CMD_M		GENMASK_ULL(15, 4)
+#define IDPF_TXD_QW1_DTYPE_S		0
+#define IDPF_TXD_QW1_DTYPE_M		GENMASK_ULL(3, 0)
+
+/* TX Completion Descriptor Completion Types */
+#define IDPF_TXD_COMPLT_ITR_FLUSH	0
+/* Descriptor completion type 1 is reserved */
+#define IDPF_TXD_COMPLT_RS		2
+/* Descriptor completion type 3 is reserved */
+#define IDPF_TXD_COMPLT_RE		4
+#define IDPF_TXD_COMPLT_SW_MARKER	5
+
+enum idpf_tx_desc_dtype_value {
+	IDPF_TX_DESC_DTYPE_DATA				= 0,
+	IDPF_TX_DESC_DTYPE_CTX				= 1,
+	/* DTYPE 2 is reserved
+	 * DTYPE 3 is free for future use
+	 * DTYPE 4 is reserved
+	 */
+	IDPF_TX_DESC_DTYPE_FLEX_TSO_CTX			= 5,
+	/* DTYPE 6 is reserved */
+	IDPF_TX_DESC_DTYPE_FLEX_L2TAG1_L2TAG2		= 7,
+	/* DTYPE 8, 9 are free for future use
+	 * DTYPE 10 is reserved
+	 * DTYPE 11 is free for future use
+	 */
+	IDPF_TX_DESC_DTYPE_FLEX_FLOW_SCHE		= 12,
+	/* DTYPE 13, 14 are free for future use */
+
+	/* DESC_DONE - HW has completed write-back of descriptor */
+	IDPF_TX_DESC_DTYPE_DESC_DONE			= 15,
+};
+
+enum idpf_tx_ctx_desc_cmd_bits {
+	IDPF_TX_CTX_DESC_TSO		= 0x01,
+	IDPF_TX_CTX_DESC_TSYN		= 0x02,
+	IDPF_TX_CTX_DESC_IL2TAG2	= 0x04,
+	IDPF_TX_CTX_DESC_RSVD		= 0x08,
+	IDPF_TX_CTX_DESC_SWTCH_NOTAG	= 0x00,
+	IDPF_TX_CTX_DESC_SWTCH_UPLINK	= 0x10,
+	IDPF_TX_CTX_DESC_SWTCH_LOCAL	= 0x20,
+	IDPF_TX_CTX_DESC_SWTCH_VSI	= 0x30,
+	IDPF_TX_CTX_DESC_FILT_AU_EN	= 0x40,
+	IDPF_TX_CTX_DESC_FILT_AU_EVICT	= 0x80,
+	IDPF_TX_CTX_DESC_RSVD1		= 0xF00
+};
+
+enum idpf_tx_desc_len_fields {
+	/* Note: These are predefined bit offsets */
+	IDPF_TX_DESC_LEN_MACLEN_S	= 0, /* 7 BITS */
+	IDPF_TX_DESC_LEN_IPLEN_S	= 7, /* 7 BITS */
+	IDPF_TX_DESC_LEN_L4_LEN_S	= 14 /* 4 BITS */
+};
+
+enum idpf_tx_base_desc_cmd_bits {
+	IDPF_TX_DESC_CMD_EOP			= BIT(0),
+	IDPF_TX_DESC_CMD_RS			= BIT(1),
+	 /* only on VFs else RSVD */
+	IDPF_TX_DESC_CMD_ICRC			= BIT(2),
+	IDPF_TX_DESC_CMD_IL2TAG1		= BIT(3),
+	IDPF_TX_DESC_CMD_RSVD1			= BIT(4),
+	IDPF_TX_DESC_CMD_IIPT_IPV6		= BIT(5),
+	IDPF_TX_DESC_CMD_IIPT_IPV4		= BIT(6),
+	IDPF_TX_DESC_CMD_IIPT_IPV4_CSUM		= GENMASK(6, 5),
+	IDPF_TX_DESC_CMD_RSVD2			= BIT(7),
+	IDPF_TX_DESC_CMD_L4T_EOFT_TCP		= BIT(8),
+	IDPF_TX_DESC_CMD_L4T_EOFT_SCTP		= BIT(9),
+	IDPF_TX_DESC_CMD_L4T_EOFT_UDP		= GENMASK(9, 8),
+	IDPF_TX_DESC_CMD_RSVD3			= BIT(10),
+	IDPF_TX_DESC_CMD_RSVD4			= BIT(11),
+};
+
+/* Transmit descriptors  */
+/* splitq tx buf, singleq tx buf and singleq compl desc */
+struct idpf_base_tx_desc {
+	__le64 buf_addr; /* Address of descriptor's data buf */
+	__le64 qw1; /* type_cmd_offset_bsz_l2tag1 */
+}; /* read used with buffer queues */
+
+struct idpf_splitq_tx_compl_desc {
+	/* qid=[10:0] comptype=[13:11] rsvd=[14] gen=[15] */
+	__le16 qid_comptype_gen;
+	union {
+		__le16 q_head; /* Queue head */
+		__le16 compl_tag; /* Completion tag */
+	} q_head_compl_tag;
+	u8 ts[3];
+	u8 rsvd; /* Reserved */
+}; /* writeback used with completion queues */
+
+/* Context descriptors */
+struct idpf_base_tx_ctx_desc {
+	struct {
+		__le32 tunneling_params;
+		__le16 l2tag2;
+		__le16 rsvd1;
+	} qw0;
+	__le64 qw1; /* type_cmd_tlen_mss/rt_hint */
+};
+
+/* Common cmd field defines for all desc except Flex Flow Scheduler (0x0C) */
+enum idpf_tx_flex_desc_cmd_bits {
+	IDPF_TX_FLEX_DESC_CMD_EOP			= BIT(0),
+	IDPF_TX_FLEX_DESC_CMD_RS			= BIT(1),
+	IDPF_TX_FLEX_DESC_CMD_RE			= BIT(2),
+	IDPF_TX_FLEX_DESC_CMD_IL2TAG1			= BIT(3),
+	IDPF_TX_FLEX_DESC_CMD_DUMMY			= BIT(4),
+	IDPF_TX_FLEX_DESC_CMD_CS_EN			= BIT(5),
+	IDPF_TX_FLEX_DESC_CMD_FILT_AU_EN		= BIT(6),
+	IDPF_TX_FLEX_DESC_CMD_FILT_AU_EVICT		= BIT(7),
+};
+
+struct idpf_flex_tx_desc {
+	__le64 buf_addr;	/* Packet buffer address */
+	struct {
+#define IDPF_FLEX_TXD_QW1_DTYPE_S	0
+#define IDPF_FLEX_TXD_QW1_DTYPE_M	GENMASK(4, 0)
+#define IDPF_FLEX_TXD_QW1_CMD_S		5
+#define IDPF_FLEX_TXD_QW1_CMD_M		GENMASK(15, 5)
+		__le16 cmd_dtype;
+		/* DTYPE=IDPF_TX_DESC_DTYPE_FLEX_L2TAG1_L2TAG2 (0x07) */
+		struct {
+			__le16 l2tag1;
+			__le16 l2tag2;
+		} l2tags;
+		__le16 buf_size;
+	} qw1;
+};
+
+struct idpf_flex_tx_sched_desc {
+	__le64 buf_addr;	/* Packet buffer address */
+
+	/* DTYPE = IDPF_TX_DESC_DTYPE_FLEX_FLOW_SCHE_16B (0x0C) */
+	struct {
+		u8 cmd_dtype;
+#define IDPF_TXD_FLEX_FLOW_DTYPE_M	GENMASK(4, 0)
+#define IDPF_TXD_FLEX_FLOW_CMD_EOP	BIT(5)
+#define IDPF_TXD_FLEX_FLOW_CMD_CS_EN	BIT(6)
+#define IDPF_TXD_FLEX_FLOW_CMD_RE	BIT(7)
+
+		/* [23:23] Horizon Overflow bit, [22:0] timestamp */
+		u8 ts[3];
+#define IDPF_TXD_FLOW_SCH_HORIZON_OVERFLOW_M	BIT(7)
+
+		__le16 compl_tag;
+		__le16 rxr_bufsize;
+#define IDPF_TXD_FLEX_FLOW_RXR		BIT(14)
+#define IDPF_TXD_FLEX_FLOW_BUFSIZE_M	GENMASK(13, 0)
+	} qw1;
+};
+
+/* Common cmd fields for all flex context descriptors
+ * Note: these defines already account for the 5 bit dtype in the cmd_dtype
+ * field
+ */
+enum idpf_tx_flex_ctx_desc_cmd_bits {
+	IDPF_TX_FLEX_CTX_DESC_CMD_TSO			= BIT(5),
+	IDPF_TX_FLEX_CTX_DESC_CMD_TSYN_EN		= BIT(6),
+	IDPF_TX_FLEX_CTX_DESC_CMD_L2TAG2		= BIT(7),
+	IDPF_TX_FLEX_CTX_DESC_CMD_SWTCH_UPLNK		= BIT(9),
+	IDPF_TX_FLEX_CTX_DESC_CMD_SWTCH_LOCAL		= BIT(10),
+	IDPF_TX_FLEX_CTX_DESC_CMD_SWTCH_TARGETVSI	= GENMASK(10, 9),
+};
+
+/* Standard flex descriptor TSO context quad word */
+struct idpf_flex_tx_tso_ctx_qw {
+	__le32 flex_tlen;
+#define IDPF_TXD_FLEX_CTX_TLEN_M	GENMASK(17, 0)
+#define IDPF_TXD_FLEX_TSO_CTX_FLEX_S	24
+	__le16 mss_rt;
+#define IDPF_TXD_FLEX_CTX_MSS_RT_M	GENMASK(13, 0)
+	u8 hdr_len;
+	u8 flex;
+};
+
+struct idpf_flex_tx_ctx_desc {
+	/* DTYPE = IDPF_TX_DESC_DTYPE_FLEX_TSO_CTX (0x05) */
+	struct {
+		struct idpf_flex_tx_tso_ctx_qw qw0;
+		struct {
+			__le16 cmd_dtype;
+			u8 flex[6];
+		} qw1;
+	} tso;
+};
+#endif /* _IDPF_LAN_TXRX_H_ */
diff --git a/drivers/net/ethernet/intel/idpf/idpf_lan_vf_regs.h b/drivers/net/ethernet/intel/idpf/idpf_lan_vf_regs.h
new file mode 100644
index 000000000000..3d73b6c76863
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/idpf_lan_vf_regs.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2023 Intel Corporation */
+
+#ifndef _IDPF_LAN_VF_REGS_H_
+#define _IDPF_LAN_VF_REGS_H_
+
+/* Reset */
+#define VFGEN_RSTAT			0x00008800
+#define VFGEN_RSTAT_VFR_STATE_S		0
+#define VFGEN_RSTAT_VFR_STATE_M		GENMASK(1, 0)
+
+/* Control(VF Mailbox) Queue */
+#define VF_BASE				0x00006000
+
+#define VF_ATQBAL			(VF_BASE + 0x1C00)
+#define VF_ATQBAH			(VF_BASE + 0x1800)
+#define VF_ATQLEN			(VF_BASE + 0x0800)
+#define VF_ATQLEN_ATQLEN_S		0
+#define VF_ATQLEN_ATQLEN_M		GENMASK(9, 0)
+#define VF_ATQLEN_ATQVFE_S		28
+#define VF_ATQLEN_ATQVFE_M		BIT(VF_ATQLEN_ATQVFE_S)
+#define VF_ATQLEN_ATQOVFL_S		29
+#define VF_ATQLEN_ATQOVFL_M		BIT(VF_ATQLEN_ATQOVFL_S)
+#define VF_ATQLEN_ATQCRIT_S		30
+#define VF_ATQLEN_ATQCRIT_M		BIT(VF_ATQLEN_ATQCRIT_S)
+#define VF_ATQLEN_ATQENABLE_S		31
+#define VF_ATQLEN_ATQENABLE_M		BIT(VF_ATQLEN_ATQENABLE_S)
+#define VF_ATQH				(VF_BASE + 0x0400)
+#define VF_ATQH_ATQH_S			0
+#define VF_ATQH_ATQH_M			GENMASK(9, 0)
+#define VF_ATQT				(VF_BASE + 0x2400)
+
+#define VF_ARQBAL			(VF_BASE + 0x0C00)
+#define VF_ARQBAH			(VF_BASE)
+#define VF_ARQLEN			(VF_BASE + 0x2000)
+#define VF_ARQLEN_ARQLEN_S		0
+#define VF_ARQLEN_ARQLEN_M		GENMASK(9, 0)
+#define VF_ARQLEN_ARQVFE_S		28
+#define VF_ARQLEN_ARQVFE_M		BIT(VF_ARQLEN_ARQVFE_S)
+#define VF_ARQLEN_ARQOVFL_S		29
+#define VF_ARQLEN_ARQOVFL_M		BIT(VF_ARQLEN_ARQOVFL_S)
+#define VF_ARQLEN_ARQCRIT_S		30
+#define VF_ARQLEN_ARQCRIT_M		BIT(VF_ARQLEN_ARQCRIT_S)
+#define VF_ARQLEN_ARQENABLE_S		31
+#define VF_ARQLEN_ARQENABLE_M		BIT(VF_ARQLEN_ARQENABLE_S)
+#define VF_ARQH				(VF_BASE + 0x1400)
+#define VF_ARQH_ARQH_S			0
+#define VF_ARQH_ARQH_M			GENMASK(12, 0)
+#define VF_ARQT				(VF_BASE + 0x1000)
+
+/* Transmit queues */
+#define VF_QTX_TAIL_BASE		0x00000000
+#define VF_QTX_TAIL(_QTX)		(VF_QTX_TAIL_BASE + (_QTX) * 0x4)
+#define VF_QTX_TAIL_EXT_BASE		0x00040000
+#define VF_QTX_TAIL_EXT(_QTX)		(VF_QTX_TAIL_EXT_BASE + ((_QTX) * 4))
+
+/* Receive queues */
+#define VF_QRX_TAIL_BASE		0x00002000
+#define VF_QRX_TAIL(_QRX)		(VF_QRX_TAIL_BASE + ((_QRX) * 4))
+#define VF_QRX_TAIL_EXT_BASE		0x00050000
+#define VF_QRX_TAIL_EXT(_QRX)		(VF_QRX_TAIL_EXT_BASE + ((_QRX) * 4))
+#define VF_QRXB_TAIL_BASE		0x00060000
+#define VF_QRXB_TAIL(_QRX)		(VF_QRXB_TAIL_BASE + ((_QRX) * 4))
+
+/* Interrupts */
+#define VF_INT_DYN_CTL0			0x00005C00
+#define VF_INT_DYN_CTL0_INTENA_S	0
+#define VF_INT_DYN_CTL0_INTENA_M	BIT(VF_INT_DYN_CTL0_INTENA_S)
+#define VF_INT_DYN_CTL0_ITR_INDX_S	3
+#define VF_INT_DYN_CTL0_ITR_INDX_M	GENMASK(4, 3)
+#define VF_INT_DYN_CTLN(_INT)		(0x00003800 + ((_INT) * 4))
+#define VF_INT_DYN_CTLN_EXT(_INT)	(0x00070000 + ((_INT) * 4))
+#define VF_INT_DYN_CTLN_INTENA_S	0
+#define VF_INT_DYN_CTLN_INTENA_M	BIT(VF_INT_DYN_CTLN_INTENA_S)
+#define VF_INT_DYN_CTLN_CLEARPBA_S	1
+#define VF_INT_DYN_CTLN_CLEARPBA_M	BIT(VF_INT_DYN_CTLN_CLEARPBA_S)
+#define VF_INT_DYN_CTLN_SWINT_TRIG_S	2
+#define VF_INT_DYN_CTLN_SWINT_TRIG_M	BIT(VF_INT_DYN_CTLN_SWINT_TRIG_S)
+#define VF_INT_DYN_CTLN_ITR_INDX_S	3
+#define VF_INT_DYN_CTLN_ITR_INDX_M	GENMASK(4, 3)
+#define VF_INT_DYN_CTLN_INTERVAL_S	5
+#define VF_INT_DYN_CTLN_INTERVAL_M	BIT(VF_INT_DYN_CTLN_INTERVAL_S)
+#define VF_INT_DYN_CTLN_SW_ITR_INDX_ENA_S 24
+#define VF_INT_DYN_CTLN_SW_ITR_INDX_ENA_M BIT(VF_INT_DYN_CTLN_SW_ITR_INDX_ENA_S)
+#define VF_INT_DYN_CTLN_SW_ITR_INDX_S	25
+#define VF_INT_DYN_CTLN_SW_ITR_INDX_M	BIT(VF_INT_DYN_CTLN_SW_ITR_INDX_S)
+#define VF_INT_DYN_CTLN_WB_ON_ITR_S	30
+#define VF_INT_DYN_CTLN_WB_ON_ITR_M	BIT(VF_INT_DYN_CTLN_WB_ON_ITR_S)
+#define VF_INT_DYN_CTLN_INTENA_MSK_S	31
+#define VF_INT_DYN_CTLN_INTENA_MSK_M	BIT(VF_INT_DYN_CTLN_INTENA_MSK_S)
+/* _ITR is ITR index, _INT is interrupt index, _itrn_indx_spacing is spacing
+ * b/w itrn registers of the same vector
+ */
+#define VF_INT_ITR0(_ITR)		(0x00004C00 + ((_ITR) * 4))
+#define VF_INT_ITRN_ADDR(_ITR, _reg_start, _itrn_indx_spacing)	\
+	((_reg_start) + ((_ITR) * (_itrn_indx_spacing)))
+/* For VF with 16 vector support, itrn_reg_spacing is 0x4, itrn_indx_spacing
+ * is 0x40 and base register offset is 0x00002800
+ */
+#define VF_INT_ITRN(_INT, _ITR)		\
+	(0x00002800 + ((_INT) * 4) + ((_ITR) * 0x40))
+/* For VF with 64 vector support, itrn_reg_spacing is 0x4, itrn_indx_spacing
+ * is 0x100 and base register offset is 0x00002C00
+ */
+#define VF_INT_ITRN_64(_INT, _ITR)	\
+	(0x00002C00 + ((_INT) * 4) + ((_ITR) * 0x100))
+/* For VF with 2k vector support, itrn_reg_spacing is 0x4, itrn_indx_spacing
+ * is 0x2000 and base register offset is 0x00072000
+ */
+#define VF_INT_ITRN_2K(_INT, _ITR)	\
+	(0x00072000 + ((_INT) * 4) + ((_ITR) * 0x2000))
+#define VF_INT_ITRN_MAX_INDEX		2
+#define VF_INT_ITRN_INTERVAL_S		0
+#define VF_INT_ITRN_INTERVAL_M		GENMASK(11, 0)
+#define VF_INT_PBA_CLEAR		0x00008900
+
+#define VF_INT_ICR0_ENA1		0x00005000
+#define VF_INT_ICR0_ENA1_ADMINQ_S	30
+#define VF_INT_ICR0_ENA1_ADMINQ_M	BIT(VF_INT_ICR0_ENA1_ADMINQ_S)
+#define VF_INT_ICR0_ENA1_RSVD_S		31
+#define VF_INT_ICR01			0x00004800
+#define VF_QF_HENA(_i)			(0x0000C400 + ((_i) * 4))
+#define VF_QF_HENA_MAX_INDX		1
+#define VF_QF_HKEY(_i)			(0x0000CC00 + ((_i) * 4))
+#define VF_QF_HKEY_MAX_INDX		12
+#define VF_QF_HLUT(_i)			(0x0000D000 + ((_i) * 4))
+#define VF_QF_HLUT_MAX_INDX		15
+#endif
diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c
new file mode 100644
index 000000000000..19809b0ddcd9
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c
@@ -0,0 +1,2379 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2023 Intel Corporation */
+
+#include "idpf.h"
+
+static const struct net_device_ops idpf_netdev_ops_splitq;
+static const struct net_device_ops idpf_netdev_ops_singleq;
+
+const char * const idpf_vport_vc_state_str[] = {
+	IDPF_FOREACH_VPORT_VC_STATE(IDPF_GEN_STRING)
+};
+
+/**
+ * idpf_init_vector_stack - Fill the MSIX vector stack with vector index
+ * @adapter: private data struct
+ *
+ * Return 0 on success, error on failure
+ */
+static int idpf_init_vector_stack(struct idpf_adapter *adapter)
+{
+	struct idpf_vector_lifo *stack;
+	u16 min_vec;
+	u32 i;
+
+	mutex_lock(&adapter->vector_lock);
+	min_vec = adapter->num_msix_entries - adapter->num_avail_msix;
+	stack = &adapter->vector_stack;
+	stack->size = adapter->num_msix_entries;
+	/* set the base and top to point at start of the 'free pool' to
+	 * distribute the unused vectors on-demand basis
+	 */
+	stack->base = min_vec;
+	stack->top = min_vec;
+
+	stack->vec_idx = kcalloc(stack->size, sizeof(u16), GFP_KERNEL);
+	if (!stack->vec_idx) {
+		mutex_unlock(&adapter->vector_lock);
+
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < stack->size; i++)
+		stack->vec_idx[i] = i;
+
+	mutex_unlock(&adapter->vector_lock);
+
+	return 0;
+}
+
+/**
+ * idpf_deinit_vector_stack - zero out the MSIX vector stack
+ * @adapter: private data struct
+ */
+static void idpf_deinit_vector_stack(struct idpf_adapter *adapter)
+{
+	struct idpf_vector_lifo *stack;
+
+	mutex_lock(&adapter->vector_lock);
+	stack = &adapter->vector_stack;
+	kfree(stack->vec_idx);
+	stack->vec_idx = NULL;
+	mutex_unlock(&adapter->vector_lock);
+}
+
+/**
+ * idpf_mb_intr_rel_irq - Free the IRQ association with the OS
+ * @adapter: adapter structure
+ *
+ * This will also disable interrupt mode and queue up mailbox task. Mailbox
+ * task will reschedule itself if not in interrupt mode.
+ */
+static void idpf_mb_intr_rel_irq(struct idpf_adapter *adapter)
+{
+	clear_bit(IDPF_MB_INTR_MODE, adapter->flags);
+	free_irq(adapter->msix_entries[0].vector, adapter);
+	queue_delayed_work(adapter->mbx_wq, &adapter->mbx_task, 0);
+}
+
+/**
+ * idpf_intr_rel - Release interrupt capabilities and free memory
+ * @adapter: adapter to disable interrupts on
+ */
+void idpf_intr_rel(struct idpf_adapter *adapter)
+{
+	int err;
+
+	if (!adapter->msix_entries)
+		return;
+
+	idpf_mb_intr_rel_irq(adapter);
+	pci_free_irq_vectors(adapter->pdev);
+
+	err = idpf_send_dealloc_vectors_msg(adapter);
+	if (err)
+		dev_err(&adapter->pdev->dev,
+			"Failed to deallocate vectors: %d\n", err);
+
+	idpf_deinit_vector_stack(adapter);
+	kfree(adapter->msix_entries);
+	adapter->msix_entries = NULL;
+}
+
+/**
+ * idpf_mb_intr_clean - Interrupt handler for the mailbox
+ * @irq: interrupt number
+ * @data: pointer to the adapter structure
+ */
+static irqreturn_t idpf_mb_intr_clean(int __always_unused irq, void *data)
+{
+	struct idpf_adapter *adapter = (struct idpf_adapter *)data;
+
+	queue_delayed_work(adapter->mbx_wq, &adapter->mbx_task, 0);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * idpf_mb_irq_enable - Enable MSIX interrupt for the mailbox
+ * @adapter: adapter to get the hardware address for register write
+ */
+static void idpf_mb_irq_enable(struct idpf_adapter *adapter)
+{
+	struct idpf_intr_reg *intr = &adapter->mb_vector.intr_reg;
+	u32 val;
+
+	val = intr->dyn_ctl_intena_m | intr->dyn_ctl_itridx_m;
+	writel(val, intr->dyn_ctl);
+	writel(intr->icr_ena_ctlq_m, intr->icr_ena);
+}
+
+/**
+ * idpf_mb_intr_req_irq - Request irq for the mailbox interrupt
+ * @adapter: adapter structure to pass to the mailbox irq handler
+ */
+static int idpf_mb_intr_req_irq(struct idpf_adapter *adapter)
+{
+	struct idpf_q_vector *mb_vector = &adapter->mb_vector;
+	int irq_num, mb_vidx = 0, err;
+
+	irq_num = adapter->msix_entries[mb_vidx].vector;
+	mb_vector->name = kasprintf(GFP_KERNEL, "%s-%s-%d",
+				    dev_driver_string(&adapter->pdev->dev),
+				    "Mailbox", mb_vidx);
+	err = request_irq(irq_num, adapter->irq_mb_handler, 0,
+			  mb_vector->name, adapter);
+	if (err) {
+		dev_err(&adapter->pdev->dev,
+			"IRQ request for mailbox failed, error: %d\n", err);
+
+		return err;
+	}
+
+	set_bit(IDPF_MB_INTR_MODE, adapter->flags);
+
+	return 0;
+}
+
+/**
+ * idpf_set_mb_vec_id - Set vector index for mailbox
+ * @adapter: adapter structure to access the vector chunks
+ *
+ * The first vector id in the requested vector chunks from the CP is for
+ * the mailbox
+ */
+static void idpf_set_mb_vec_id(struct idpf_adapter *adapter)
+{
+	if (adapter->req_vec_chunks)
+		adapter->mb_vector.v_idx =
+			le16_to_cpu(adapter->caps.mailbox_vector_id);
+	else
+		adapter->mb_vector.v_idx = 0;
+}
+
+/**
+ * idpf_mb_intr_init - Initialize the mailbox interrupt
+ * @adapter: adapter structure to store the mailbox vector
+ */
+static int idpf_mb_intr_init(struct idpf_adapter *adapter)
+{
+	adapter->dev_ops.reg_ops.mb_intr_reg_init(adapter);
+	adapter->irq_mb_handler = idpf_mb_intr_clean;
+
+	return idpf_mb_intr_req_irq(adapter);
+}
+
+/**
+ * idpf_vector_lifo_push - push MSIX vector index onto stack
+ * @adapter: private data struct
+ * @vec_idx: vector index to store
+ */
+static int idpf_vector_lifo_push(struct idpf_adapter *adapter, u16 vec_idx)
+{
+	struct idpf_vector_lifo *stack = &adapter->vector_stack;
+
+	lockdep_assert_held(&adapter->vector_lock);
+
+	if (stack->top == stack->base) {
+		dev_err(&adapter->pdev->dev, "Exceeded the vector stack limit: %d\n",
+			stack->top);
+		return -EINVAL;
+	}
+
+	stack->vec_idx[--stack->top] = vec_idx;
+
+	return 0;
+}
+
+/**
+ * idpf_vector_lifo_pop - pop MSIX vector index from stack
+ * @adapter: private data struct
+ */
+static int idpf_vector_lifo_pop(struct idpf_adapter *adapter)
+{
+	struct idpf_vector_lifo *stack = &adapter->vector_stack;
+
+	lockdep_assert_held(&adapter->vector_lock);
+
+	if (stack->top == stack->size) {
+		dev_err(&adapter->pdev->dev, "No interrupt vectors are available to distribute!\n");
+
+		return -EINVAL;
+	}
+
+	return stack->vec_idx[stack->top++];
+}
+
+/**
+ * idpf_vector_stash - Store the vector indexes onto the stack
+ * @adapter: private data struct
+ * @q_vector_idxs: vector index array
+ * @vec_info: info related to the number of vectors
+ *
+ * This function is a no-op if there are no vectors indexes to be stashed
+ */
+static void idpf_vector_stash(struct idpf_adapter *adapter, u16 *q_vector_idxs,
+			      struct idpf_vector_info *vec_info)
+{
+	int i, base = 0;
+	u16 vec_idx;
+
+	lockdep_assert_held(&adapter->vector_lock);
+
+	if (!vec_info->num_curr_vecs)
+		return;
+
+	/* For default vports, no need to stash vector allocated from the
+	 * default pool onto the stack
+	 */
+	if (vec_info->default_vport)
+		base = IDPF_MIN_Q_VEC;
+
+	for (i = vec_info->num_curr_vecs - 1; i >= base ; i--) {
+		vec_idx = q_vector_idxs[i];
+		idpf_vector_lifo_push(adapter, vec_idx);
+		adapter->num_avail_msix++;
+	}
+}
+
+/**
+ * idpf_req_rel_vector_indexes - Request or release MSIX vector indexes
+ * @adapter: driver specific private structure
+ * @q_vector_idxs: vector index array
+ * @vec_info: info related to the number of vectors
+ *
+ * This is the core function to distribute the MSIX vectors acquired from the
+ * OS. It expects the caller to pass the number of vectors required and
+ * also previously allocated. First, it stashes previously allocated vector
+ * indexes on to the stack and then figures out if it can allocate requested
+ * vectors. It can wait on acquiring the mutex lock. If the caller passes 0 as
+ * requested vectors, then this function just stashes the already allocated
+ * vectors and returns 0.
+ *
+ * Returns actual number of vectors allocated on success, error value on failure
+ * If 0 is returned, implies the stack has no vectors to allocate which is also
+ * a failure case for the caller
+ */
+int idpf_req_rel_vector_indexes(struct idpf_adapter *adapter,
+				u16 *q_vector_idxs,
+				struct idpf_vector_info *vec_info)
+{
+	u16 num_req_vecs, num_alloc_vecs = 0, max_vecs;
+	struct idpf_vector_lifo *stack;
+	int i, j, vecid;
+
+	mutex_lock(&adapter->vector_lock);
+	stack = &adapter->vector_stack;
+	num_req_vecs = vec_info->num_req_vecs;
+
+	/* Stash interrupt vector indexes onto the stack if required */
+	idpf_vector_stash(adapter, q_vector_idxs, vec_info);
+
+	if (!num_req_vecs)
+		goto rel_lock;
+
+	if (vec_info->default_vport) {
+		/* As IDPF_MIN_Q_VEC per default vport is put aside in the
+		 * default pool of the stack, use them for default vports
+		 */
+		j = vec_info->index * IDPF_MIN_Q_VEC + IDPF_MBX_Q_VEC;
+		for (i = 0; i < IDPF_MIN_Q_VEC; i++) {
+			q_vector_idxs[num_alloc_vecs++] = stack->vec_idx[j++];
+			num_req_vecs--;
+		}
+	}
+
+	/* Find if stack has enough vector to allocate */
+	max_vecs = min(adapter->num_avail_msix, num_req_vecs);
+
+	for (j = 0; j < max_vecs; j++) {
+		vecid = idpf_vector_lifo_pop(adapter);
+		q_vector_idxs[num_alloc_vecs++] = vecid;
+	}
+	adapter->num_avail_msix -= max_vecs;
+
+rel_lock:
+	mutex_unlock(&adapter->vector_lock);
+
+	return num_alloc_vecs;
+}
+
+/**
+ * idpf_intr_req - Request interrupt capabilities
+ * @adapter: adapter to enable interrupts on
+ *
+ * Returns 0 on success, negative on failure
+ */
+int idpf_intr_req(struct idpf_adapter *adapter)
+{
+	u16 default_vports = idpf_get_default_vports(adapter);
+	int num_q_vecs, total_vecs, num_vec_ids;
+	int min_vectors, v_actual, err;
+	unsigned int vector;
+	u16 *vecids;
+
+	total_vecs = idpf_get_reserved_vecs(adapter);
+	num_q_vecs = total_vecs - IDPF_MBX_Q_VEC;
+
+	err = idpf_send_alloc_vectors_msg(adapter, num_q_vecs);
+	if (err) {
+		dev_err(&adapter->pdev->dev,
+			"Failed to allocate %d vectors: %d\n", num_q_vecs, err);
+
+		return -EAGAIN;
+	}
+
+	min_vectors = IDPF_MBX_Q_VEC + IDPF_MIN_Q_VEC * default_vports;
+	v_actual = pci_alloc_irq_vectors(adapter->pdev, min_vectors,
+					 total_vecs, PCI_IRQ_MSIX);
+	if (v_actual < min_vectors) {
+		dev_err(&adapter->pdev->dev, "Failed to allocate MSIX vectors: %d\n",
+			v_actual);
+		err = -EAGAIN;
+		goto send_dealloc_vecs;
+	}
+
+	adapter->msix_entries = kcalloc(v_actual, sizeof(struct msix_entry),
+					GFP_KERNEL);
+
+	if (!adapter->msix_entries) {
+		err = -ENOMEM;
+		goto free_irq;
+	}
+
+	idpf_set_mb_vec_id(adapter);
+
+	vecids = kcalloc(total_vecs, sizeof(u16), GFP_KERNEL);
+	if (!vecids) {
+		err = -ENOMEM;
+		goto free_msix;
+	}
+
+	if (adapter->req_vec_chunks) {
+		struct virtchnl2_vector_chunks *vchunks;
+		struct virtchnl2_alloc_vectors *ac;
+
+		ac = adapter->req_vec_chunks;
+		vchunks = &ac->vchunks;
+
+		num_vec_ids = idpf_get_vec_ids(adapter, vecids, total_vecs,
+					       vchunks);
+		if (num_vec_ids < v_actual) {
+			err = -EINVAL;
+			goto free_vecids;
+		}
+	} else {
+		int i;
+
+		for (i = 0; i < v_actual; i++)
+			vecids[i] = i;
+	}
+
+	for (vector = 0; vector < v_actual; vector++) {
+		adapter->msix_entries[vector].entry = vecids[vector];
+		adapter->msix_entries[vector].vector =
+			pci_irq_vector(adapter->pdev, vector);
+	}
+
+	adapter->num_req_msix = total_vecs;
+	adapter->num_msix_entries = v_actual;
+	/* 'num_avail_msix' is used to distribute excess vectors to the vports
+	 * after considering the minimum vectors required per each default
+	 * vport
+	 */
+	adapter->num_avail_msix = v_actual - min_vectors;
+
+	/* Fill MSIX vector lifo stack with vector indexes */
+	err = idpf_init_vector_stack(adapter);
+	if (err)
+		goto free_vecids;
+
+	err = idpf_mb_intr_init(adapter);
+	if (err)
+		goto deinit_vec_stack;
+	idpf_mb_irq_enable(adapter);
+	kfree(vecids);
+
+	return 0;
+
+deinit_vec_stack:
+	idpf_deinit_vector_stack(adapter);
+free_vecids:
+	kfree(vecids);
+free_msix:
+	kfree(adapter->msix_entries);
+	adapter->msix_entries = NULL;
+free_irq:
+	pci_free_irq_vectors(adapter->pdev);
+send_dealloc_vecs:
+	idpf_send_dealloc_vectors_msg(adapter);
+
+	return err;
+}
+
+/**
+ * idpf_find_mac_filter - Search filter list for specific mac filter
+ * @vconfig: Vport config structure
+ * @macaddr: The MAC address
+ *
+ * Returns ptr to the filter object or NULL. Must be called while holding the
+ * mac_filter_list_lock.
+ **/
+static struct idpf_mac_filter *idpf_find_mac_filter(struct idpf_vport_config *vconfig,
+						    const u8 *macaddr)
+{
+	struct idpf_mac_filter *f;
+
+	if (!macaddr)
+		return NULL;
+
+	list_for_each_entry(f, &vconfig->user_config.mac_filter_list, list) {
+		if (ether_addr_equal(macaddr, f->macaddr))
+			return f;
+	}
+
+	return NULL;
+}
+
+/**
+ * __idpf_del_mac_filter - Delete a MAC filter from the filter list
+ * @vport_config: Vport config structure
+ * @macaddr: The MAC address
+ *
+ * Returns 0 on success, error value on failure
+ **/
+static int __idpf_del_mac_filter(struct idpf_vport_config *vport_config,
+				 const u8 *macaddr)
+{
+	struct idpf_mac_filter *f;
+
+	spin_lock_bh(&vport_config->mac_filter_list_lock);
+	f = idpf_find_mac_filter(vport_config, macaddr);
+	if (f) {
+		list_del(&f->list);
+		kfree(f);
+	}
+	spin_unlock_bh(&vport_config->mac_filter_list_lock);
+
+	return 0;
+}
+
+/**
+ * idpf_del_mac_filter - Delete a MAC filter from the filter list
+ * @vport: Main vport structure
+ * @np: Netdev private structure
+ * @macaddr: The MAC address
+ * @async: Don't wait for return message
+ *
+ * Removes filter from list and if interface is up, tells hardware about the
+ * removed filter.
+ **/
+static int idpf_del_mac_filter(struct idpf_vport *vport,
+			       struct idpf_netdev_priv *np,
+			       const u8 *macaddr, bool async)
+{
+	struct idpf_vport_config *vport_config;
+	struct idpf_mac_filter *f;
+
+	vport_config = np->adapter->vport_config[np->vport_idx];
+
+	spin_lock_bh(&vport_config->mac_filter_list_lock);
+	f = idpf_find_mac_filter(vport_config, macaddr);
+	if (f) {
+		f->remove = true;
+	} else {
+		spin_unlock_bh(&vport_config->mac_filter_list_lock);
+
+		return -EINVAL;
+	}
+	spin_unlock_bh(&vport_config->mac_filter_list_lock);
+
+	if (np->state == __IDPF_VPORT_UP) {
+		int err;
+
+		err = idpf_add_del_mac_filters(vport, np, false, async);
+		if (err)
+			return err;
+	}
+
+	return  __idpf_del_mac_filter(vport_config, macaddr);
+}
+
+/**
+ * __idpf_add_mac_filter - Add mac filter helper function
+ * @vport_config: Vport config structure
+ * @macaddr: Address to add
+ *
+ * Takes mac_filter_list_lock spinlock to add new filter to list.
+ */
+static int __idpf_add_mac_filter(struct idpf_vport_config *vport_config,
+				 const u8 *macaddr)
+{
+	struct idpf_mac_filter *f;
+
+	spin_lock_bh(&vport_config->mac_filter_list_lock);
+
+	f = idpf_find_mac_filter(vport_config, macaddr);
+	if (f) {
+		f->remove = false;
+		spin_unlock_bh(&vport_config->mac_filter_list_lock);
+
+		return 0;
+	}
+
+	f = kzalloc(sizeof(*f), GFP_ATOMIC);
+	if (!f) {
+		spin_unlock_bh(&vport_config->mac_filter_list_lock);
+
+		return -ENOMEM;
+	}
+
+	ether_addr_copy(f->macaddr, macaddr);
+	list_add_tail(&f->list, &vport_config->user_config.mac_filter_list);
+	f->add = true;
+
+	spin_unlock_bh(&vport_config->mac_filter_list_lock);
+
+	return 0;
+}
+
+/**
+ * idpf_add_mac_filter - Add a mac filter to the filter list
+ * @vport: Main vport structure
+ * @np: Netdev private structure
+ * @macaddr: The MAC address
+ * @async: Don't wait for return message
+ *
+ * Returns 0 on success or error on failure. If interface is up, we'll also
+ * send the virtchnl message to tell hardware about the filter.
+ **/
+static int idpf_add_mac_filter(struct idpf_vport *vport,
+			       struct idpf_netdev_priv *np,
+			       const u8 *macaddr, bool async)
+{
+	struct idpf_vport_config *vport_config;
+	int err;
+
+	vport_config = np->adapter->vport_config[np->vport_idx];
+	err = __idpf_add_mac_filter(vport_config, macaddr);
+	if (err)
+		return err;
+
+	if (np->state == __IDPF_VPORT_UP)
+		err = idpf_add_del_mac_filters(vport, np, true, async);
+
+	return err;
+}
+
+/**
+ * idpf_del_all_mac_filters - Delete all MAC filters in list
+ * @vport: main vport struct
+ *
+ * Takes mac_filter_list_lock spinlock.  Deletes all filters
+ */
+static void idpf_del_all_mac_filters(struct idpf_vport *vport)
+{
+	struct idpf_vport_config *vport_config;
+	struct idpf_mac_filter *f, *ftmp;
+
+	vport_config = vport->adapter->vport_config[vport->idx];
+	spin_lock_bh(&vport_config->mac_filter_list_lock);
+
+	list_for_each_entry_safe(f, ftmp, &vport_config->user_config.mac_filter_list,
+				 list) {
+		list_del(&f->list);
+		kfree(f);
+	}
+
+	spin_unlock_bh(&vport_config->mac_filter_list_lock);
+}
+
+/**
+ * idpf_restore_mac_filters - Re-add all MAC filters in list
+ * @vport: main vport struct
+ *
+ * Takes mac_filter_list_lock spinlock.  Sets add field to true for filters to
+ * resync filters back to HW.
+ */
+static void idpf_restore_mac_filters(struct idpf_vport *vport)
+{
+	struct idpf_vport_config *vport_config;
+	struct idpf_mac_filter *f;
+
+	vport_config = vport->adapter->vport_config[vport->idx];
+	spin_lock_bh(&vport_config->mac_filter_list_lock);
+
+	list_for_each_entry(f, &vport_config->user_config.mac_filter_list, list)
+		f->add = true;
+
+	spin_unlock_bh(&vport_config->mac_filter_list_lock);
+
+	idpf_add_del_mac_filters(vport, netdev_priv(vport->netdev),
+				 true, false);
+}
+
+/**
+ * idpf_remove_mac_filters - Remove all MAC filters in list
+ * @vport: main vport struct
+ *
+ * Takes mac_filter_list_lock spinlock. Sets remove field to true for filters
+ * to remove filters in HW.
+ */
+static void idpf_remove_mac_filters(struct idpf_vport *vport)
+{
+	struct idpf_vport_config *vport_config;
+	struct idpf_mac_filter *f;
+
+	vport_config = vport->adapter->vport_config[vport->idx];
+	spin_lock_bh(&vport_config->mac_filter_list_lock);
+
+	list_for_each_entry(f, &vport_config->user_config.mac_filter_list, list)
+		f->remove = true;
+
+	spin_unlock_bh(&vport_config->mac_filter_list_lock);
+
+	idpf_add_del_mac_filters(vport, netdev_priv(vport->netdev),
+				 false, false);
+}
+
+/**
+ * idpf_deinit_mac_addr - deinitialize mac address for vport
+ * @vport: main vport structure
+ */
+static void idpf_deinit_mac_addr(struct idpf_vport *vport)
+{
+	struct idpf_vport_config *vport_config;
+	struct idpf_mac_filter *f;
+
+	vport_config = vport->adapter->vport_config[vport->idx];
+
+	spin_lock_bh(&vport_config->mac_filter_list_lock);
+
+	f = idpf_find_mac_filter(vport_config, vport->default_mac_addr);
+	if (f) {
+		list_del(&f->list);
+		kfree(f);
+	}
+
+	spin_unlock_bh(&vport_config->mac_filter_list_lock);
+}
+
+/**
+ * idpf_init_mac_addr - initialize mac address for vport
+ * @vport: main vport structure
+ * @netdev: pointer to netdev struct associated with this vport
+ */
+static int idpf_init_mac_addr(struct idpf_vport *vport,
+			      struct net_device *netdev)
+{
+	struct idpf_netdev_priv *np = netdev_priv(netdev);
+	struct idpf_adapter *adapter = vport->adapter;
+	int err;
+
+	if (is_valid_ether_addr(vport->default_mac_addr)) {
+		eth_hw_addr_set(netdev, vport->default_mac_addr);
+		ether_addr_copy(netdev->perm_addr, vport->default_mac_addr);
+
+		return idpf_add_mac_filter(vport, np, vport->default_mac_addr,
+					   false);
+	}
+
+	if (!idpf_is_cap_ena(adapter, IDPF_OTHER_CAPS,
+			     VIRTCHNL2_CAP_MACFILTER)) {
+		dev_err(&adapter->pdev->dev,
+			"MAC address is not provided and capability is not set\n");
+
+		return -EINVAL;
+	}
+
+	eth_hw_addr_random(netdev);
+	err = idpf_add_mac_filter(vport, np, netdev->dev_addr, false);
+	if (err)
+		return err;
+
+	dev_info(&adapter->pdev->dev, "Invalid MAC address %pM, using random %pM\n",
+		 vport->default_mac_addr, netdev->dev_addr);
+	ether_addr_copy(vport->default_mac_addr, netdev->dev_addr);
+
+	return 0;
+}
+
+/**
+ * idpf_cfg_netdev - Allocate, configure and register a netdev
+ * @vport: main vport structure
+ *
+ * Returns 0 on success, negative value on failure.
+ */
+static int idpf_cfg_netdev(struct idpf_vport *vport)
+{
+	struct idpf_adapter *adapter = vport->adapter;
+	struct idpf_vport_config *vport_config;
+	netdev_features_t dflt_features;
+	netdev_features_t offloads = 0;
+	struct idpf_netdev_priv *np;
+	struct net_device *netdev;
+	u16 idx = vport->idx;
+	int err;
+
+	vport_config = adapter->vport_config[idx];
+
+	/* It's possible we already have a netdev allocated and registered for
+	 * this vport
+	 */
+	if (test_bit(IDPF_VPORT_REG_NETDEV, vport_config->flags)) {
+		netdev = adapter->netdevs[idx];
+		np = netdev_priv(netdev);
+		np->vport = vport;
+		np->vport_idx = vport->idx;
+		np->vport_id = vport->vport_id;
+		vport->netdev = netdev;
+
+		return idpf_init_mac_addr(vport, netdev);
+	}
+
+	netdev = alloc_etherdev_mqs(sizeof(struct idpf_netdev_priv),
+				    vport_config->max_q.max_txq,
+				    vport_config->max_q.max_rxq);
+	if (!netdev)
+		return -ENOMEM;
+
+	vport->netdev = netdev;
+	np = netdev_priv(netdev);
+	np->vport = vport;
+	np->adapter = adapter;
+	np->vport_idx = vport->idx;
+	np->vport_id = vport->vport_id;
+
+	spin_lock_init(&np->stats_lock);
+
+	err = idpf_init_mac_addr(vport, netdev);
+	if (err) {
+		free_netdev(vport->netdev);
+		vport->netdev = NULL;
+
+		return err;
+	}
+
+	/* assign netdev_ops */
+	if (idpf_is_queue_model_split(vport->txq_model))
+		netdev->netdev_ops = &idpf_netdev_ops_splitq;
+	else
+		netdev->netdev_ops = &idpf_netdev_ops_singleq;
+
+	/* setup watchdog timeout value to be 5 second */
+	netdev->watchdog_timeo = 5 * HZ;
+
+	/* configure default MTU size */
+	netdev->min_mtu = ETH_MIN_MTU;
+	netdev->max_mtu = vport->max_mtu;
+
+	dflt_features = NETIF_F_SG	|
+			NETIF_F_HIGHDMA;
+
+	if (idpf_is_cap_ena_all(adapter, IDPF_RSS_CAPS, IDPF_CAP_RSS))
+		dflt_features |= NETIF_F_RXHASH;
+	if (idpf_is_cap_ena_all(adapter, IDPF_CSUM_CAPS, IDPF_CAP_RX_CSUM_L4V4))
+		dflt_features |= NETIF_F_IP_CSUM;
+	if (idpf_is_cap_ena_all(adapter, IDPF_CSUM_CAPS, IDPF_CAP_RX_CSUM_L4V6))
+		dflt_features |= NETIF_F_IPV6_CSUM;
+	if (idpf_is_cap_ena(adapter, IDPF_CSUM_CAPS, IDPF_CAP_RX_CSUM))
+		dflt_features |= NETIF_F_RXCSUM;
+	if (idpf_is_cap_ena_all(adapter, IDPF_CSUM_CAPS, IDPF_CAP_SCTP_CSUM))
+		dflt_features |= NETIF_F_SCTP_CRC;
+
+	if (idpf_is_cap_ena(adapter, IDPF_SEG_CAPS, VIRTCHNL2_CAP_SEG_IPV4_TCP))
+		dflt_features |= NETIF_F_TSO;
+	if (idpf_is_cap_ena(adapter, IDPF_SEG_CAPS, VIRTCHNL2_CAP_SEG_IPV6_TCP))
+		dflt_features |= NETIF_F_TSO6;
+	if (idpf_is_cap_ena_all(adapter, IDPF_SEG_CAPS,
+				VIRTCHNL2_CAP_SEG_IPV4_UDP |
+				VIRTCHNL2_CAP_SEG_IPV6_UDP))
+		dflt_features |= NETIF_F_GSO_UDP_L4;
+	if (idpf_is_cap_ena_all(adapter, IDPF_RSC_CAPS, IDPF_CAP_RSC))
+		offloads |= NETIF_F_GRO_HW;
+	/* advertise to stack only if offloads for encapsulated packets is
+	 * supported
+	 */
+	if (idpf_is_cap_ena(vport->adapter, IDPF_SEG_CAPS,
+			    VIRTCHNL2_CAP_SEG_TX_SINGLE_TUNNEL)) {
+		offloads |= NETIF_F_GSO_UDP_TUNNEL	|
+			    NETIF_F_GSO_GRE		|
+			    NETIF_F_GSO_GRE_CSUM	|
+			    NETIF_F_GSO_PARTIAL		|
+			    NETIF_F_GSO_UDP_TUNNEL_CSUM	|
+			    NETIF_F_GSO_IPXIP4		|
+			    NETIF_F_GSO_IPXIP6		|
+			    0;
+
+		if (!idpf_is_cap_ena_all(vport->adapter, IDPF_CSUM_CAPS,
+					 IDPF_CAP_TUNNEL_TX_CSUM))
+			netdev->gso_partial_features |=
+				NETIF_F_GSO_UDP_TUNNEL_CSUM;
+
+		netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM;
+		offloads |= NETIF_F_TSO_MANGLEID;
+	}
+	if (idpf_is_cap_ena(adapter, IDPF_OTHER_CAPS, VIRTCHNL2_CAP_LOOPBACK))
+		offloads |= NETIF_F_LOOPBACK;
+
+	netdev->features |= dflt_features;
+	netdev->hw_features |= dflt_features | offloads;
+	netdev->hw_enc_features |= dflt_features | offloads;
+	idpf_set_ethtool_ops(netdev);
+	SET_NETDEV_DEV(netdev, &adapter->pdev->dev);
+
+	/* carrier off on init to avoid Tx hangs */
+	netif_carrier_off(netdev);
+
+	/* make sure transmit queues start off as stopped */
+	netif_tx_stop_all_queues(netdev);
+
+	/* The vport can be arbitrarily released so we need to also track
+	 * netdevs in the adapter struct
+	 */
+	adapter->netdevs[idx] = netdev;
+
+	return 0;
+}
+
+/**
+ * idpf_get_free_slot - get the next non-NULL location index in array
+ * @adapter: adapter in which to look for a free vport slot
+ */
+static int idpf_get_free_slot(struct idpf_adapter *adapter)
+{
+	unsigned int i;
+
+	for (i = 0; i < adapter->max_vports; i++) {
+		if (!adapter->vports[i])
+			return i;
+	}
+
+	return IDPF_NO_FREE_SLOT;
+}
+
+/**
+ * idpf_remove_features - Turn off feature configs
+ * @vport: virtual port structure
+ */
+static void idpf_remove_features(struct idpf_vport *vport)
+{
+	struct idpf_adapter *adapter = vport->adapter;
+
+	if (idpf_is_cap_ena(adapter, IDPF_OTHER_CAPS, VIRTCHNL2_CAP_MACFILTER))
+		idpf_remove_mac_filters(vport);
+}
+
+/**
+ * idpf_vport_stop - Disable a vport
+ * @vport: vport to disable
+ */
+static void idpf_vport_stop(struct idpf_vport *vport)
+{
+	struct idpf_netdev_priv *np = netdev_priv(vport->netdev);
+
+	if (np->state <= __IDPF_VPORT_DOWN)
+		return;
+
+	netif_carrier_off(vport->netdev);
+	netif_tx_disable(vport->netdev);
+
+	idpf_send_disable_vport_msg(vport);
+	idpf_send_disable_queues_msg(vport);
+	idpf_send_map_unmap_queue_vector_msg(vport, false);
+	/* Normally we ask for queues in create_vport, but if the number of
+	 * initially requested queues have changed, for example via ethtool
+	 * set channels, we do delete queues and then add the queues back
+	 * instead of deleting and reallocating the vport.
+	 */
+	if (test_and_clear_bit(IDPF_VPORT_DEL_QUEUES, vport->flags))
+		idpf_send_delete_queues_msg(vport);
+
+	idpf_remove_features(vport);
+
+	vport->link_up = false;
+	idpf_vport_intr_deinit(vport);
+	idpf_vport_intr_rel(vport);
+	idpf_vport_queues_rel(vport);
+	np->state = __IDPF_VPORT_DOWN;
+}
+
+/**
+ * idpf_stop - Disables a network interface
+ * @netdev: network interface device structure
+ *
+ * The stop entry point is called when an interface is de-activated by the OS,
+ * and the netdevice enters the DOWN state.  The hardware is still under the
+ * driver's control, but the netdev interface is disabled.
+ *
+ * Returns success only - not allowed to fail
+ */
+static int idpf_stop(struct net_device *netdev)
+{
+	struct idpf_netdev_priv *np = netdev_priv(netdev);
+	struct idpf_vport *vport;
+
+	if (test_bit(IDPF_REMOVE_IN_PROG, np->adapter->flags))
+		return 0;
+
+	idpf_vport_ctrl_lock(netdev);
+	vport = idpf_netdev_to_vport(netdev);
+
+	idpf_vport_stop(vport);
+
+	idpf_vport_ctrl_unlock(netdev);
+
+	return 0;
+}
+
+/**
+ * idpf_decfg_netdev - Unregister the netdev
+ * @vport: vport for which netdev to be unregistered
+ */
+static void idpf_decfg_netdev(struct idpf_vport *vport)
+{
+	struct idpf_adapter *adapter = vport->adapter;
+
+	unregister_netdev(vport->netdev);
+	free_netdev(vport->netdev);
+	vport->netdev = NULL;
+
+	adapter->netdevs[vport->idx] = NULL;
+}
+
+/**
+ * idpf_vport_rel - Delete a vport and free its resources
+ * @vport: the vport being removed
+ */
+static void idpf_vport_rel(struct idpf_vport *vport)
+{
+	struct idpf_adapter *adapter = vport->adapter;
+	struct idpf_vport_config *vport_config;
+	struct idpf_vector_info vec_info;
+	struct idpf_rss_data *rss_data;
+	struct idpf_vport_max_q max_q;
+	u16 idx = vport->idx;
+	int i;
+
+	vport_config = adapter->vport_config[vport->idx];
+	idpf_deinit_rss(vport);
+	rss_data = &vport_config->user_config.rss_data;
+	kfree(rss_data->rss_key);
+	rss_data->rss_key = NULL;
+
+	idpf_send_destroy_vport_msg(vport);
+
+	/* Set all bits as we dont know on which vc_state the vport vhnl_wq
+	 * is waiting on and wakeup the virtchnl workqueue even if it is
+	 * waiting for the response as we are going down
+	 */
+	for (i = 0; i < IDPF_VC_NBITS; i++)
+		set_bit(i, vport->vc_state);
+	wake_up(&vport->vchnl_wq);
+
+	mutex_destroy(&vport->vc_buf_lock);
+
+	/* Clear all the bits */
+	for (i = 0; i < IDPF_VC_NBITS; i++)
+		clear_bit(i, vport->vc_state);
+
+	/* Release all max queues allocated to the adapter's pool */
+	max_q.max_rxq = vport_config->max_q.max_rxq;
+	max_q.max_txq = vport_config->max_q.max_txq;
+	max_q.max_bufq = vport_config->max_q.max_bufq;
+	max_q.max_complq = vport_config->max_q.max_complq;
+	idpf_vport_dealloc_max_qs(adapter, &max_q);
+
+	/* Release all the allocated vectors on the stack */
+	vec_info.num_req_vecs = 0;
+	vec_info.num_curr_vecs = vport->num_q_vectors;
+	vec_info.default_vport = vport->default_vport;
+
+	idpf_req_rel_vector_indexes(adapter, vport->q_vector_idxs, &vec_info);
+
+	kfree(vport->q_vector_idxs);
+	vport->q_vector_idxs = NULL;
+
+	kfree(adapter->vport_params_recvd[idx]);
+	adapter->vport_params_recvd[idx] = NULL;
+	kfree(adapter->vport_params_reqd[idx]);
+	adapter->vport_params_reqd[idx] = NULL;
+	if (adapter->vport_config[idx]) {
+		kfree(adapter->vport_config[idx]->req_qs_chunks);
+		adapter->vport_config[idx]->req_qs_chunks = NULL;
+	}
+	kfree(vport);
+	adapter->num_alloc_vports--;
+}
+
+/**
+ * idpf_vport_dealloc - cleanup and release a given vport
+ * @vport: pointer to idpf vport structure
+ *
+ * returns nothing
+ */
+static void idpf_vport_dealloc(struct idpf_vport *vport)
+{
+	struct idpf_adapter *adapter = vport->adapter;
+	unsigned int i = vport->idx;
+
+	idpf_deinit_mac_addr(vport);
+	idpf_vport_stop(vport);
+
+	if (!test_bit(IDPF_HR_RESET_IN_PROG, adapter->flags))
+		idpf_decfg_netdev(vport);
+	if (test_bit(IDPF_REMOVE_IN_PROG, adapter->flags))
+		idpf_del_all_mac_filters(vport);
+
+	if (adapter->netdevs[i]) {
+		struct idpf_netdev_priv *np = netdev_priv(adapter->netdevs[i]);
+
+		np->vport = NULL;
+	}
+
+	idpf_vport_rel(vport);
+
+	adapter->vports[i] = NULL;
+	adapter->next_vport = idpf_get_free_slot(adapter);
+}
+
+/**
+ * idpf_vport_alloc - Allocates the next available struct vport in the adapter
+ * @adapter: board private structure
+ * @max_q: vport max queue info
+ *
+ * returns a pointer to a vport on success, NULL on failure.
+ */
+static struct idpf_vport *idpf_vport_alloc(struct idpf_adapter *adapter,
+					   struct idpf_vport_max_q *max_q)
+{
+	struct idpf_rss_data *rss_data;
+	u16 idx = adapter->next_vport;
+	struct idpf_vport *vport;
+	u16 num_max_q;
+
+	if (idx == IDPF_NO_FREE_SLOT)
+		return NULL;
+
+	vport = kzalloc(sizeof(*vport), GFP_KERNEL);
+	if (!vport)
+		return vport;
+
+	if (!adapter->vport_config[idx]) {
+		struct idpf_vport_config *vport_config;
+
+		vport_config = kzalloc(sizeof(*vport_config), GFP_KERNEL);
+		if (!vport_config) {
+			kfree(vport);
+
+			return NULL;
+		}
+
+		adapter->vport_config[idx] = vport_config;
+	}
+
+	vport->idx = idx;
+	vport->adapter = adapter;
+	vport->compln_clean_budget = IDPF_TX_COMPLQ_CLEAN_BUDGET;
+	vport->default_vport = adapter->num_alloc_vports <
+			       idpf_get_default_vports(adapter);
+
+	num_max_q = max(max_q->max_txq, max_q->max_rxq);
+	vport->q_vector_idxs = kcalloc(num_max_q, sizeof(u16), GFP_KERNEL);
+	if (!vport->q_vector_idxs) {
+		kfree(vport);
+
+		return NULL;
+	}
+	idpf_vport_init(vport, max_q);
+
+	/* This alloc is done separate from the LUT because it's not strictly
+	 * dependent on how many queues we have. If we change number of queues
+	 * and soft reset we'll need a new LUT but the key can remain the same
+	 * for as long as the vport exists.
+	 */
+	rss_data = &adapter->vport_config[idx]->user_config.rss_data;
+	rss_data->rss_key = kzalloc(rss_data->rss_key_size, GFP_KERNEL);
+	if (!rss_data->rss_key) {
+		kfree(vport);
+
+		return NULL;
+	}
+	/* Initialize default rss key */
+	netdev_rss_key_fill((void *)rss_data->rss_key, rss_data->rss_key_size);
+
+	/* fill vport slot in the adapter struct */
+	adapter->vports[idx] = vport;
+	adapter->vport_ids[idx] = idpf_get_vport_id(vport);
+
+	adapter->num_alloc_vports++;
+	/* prepare adapter->next_vport for next use */
+	adapter->next_vport = idpf_get_free_slot(adapter);
+
+	return vport;
+}
+
+/**
+ * idpf_get_stats64 - get statistics for network device structure
+ * @netdev: network interface device structure
+ * @stats: main device statistics structure
+ */
+static void idpf_get_stats64(struct net_device *netdev,
+			     struct rtnl_link_stats64 *stats)
+{
+	struct idpf_netdev_priv *np = netdev_priv(netdev);
+
+	spin_lock_bh(&np->stats_lock);
+	*stats = np->netstats;
+	spin_unlock_bh(&np->stats_lock);
+}
+
+/**
+ * idpf_statistics_task - Delayed task to get statistics over mailbox
+ * @work: work_struct handle to our data
+ */
+void idpf_statistics_task(struct work_struct *work)
+{
+	struct idpf_adapter *adapter;
+	int i;
+
+	adapter = container_of(work, struct idpf_adapter, stats_task.work);
+
+	for (i = 0; i < adapter->max_vports; i++) {
+		struct idpf_vport *vport = adapter->vports[i];
+
+		if (vport && !test_bit(IDPF_HR_RESET_IN_PROG, adapter->flags))
+			idpf_send_get_stats_msg(vport);
+	}
+
+	queue_delayed_work(adapter->stats_wq, &adapter->stats_task,
+			   msecs_to_jiffies(10000));
+}
+
+/**
+ * idpf_mbx_task - Delayed task to handle mailbox responses
+ * @work: work_struct handle
+ */
+void idpf_mbx_task(struct work_struct *work)
+{
+	struct idpf_adapter *adapter;
+
+	adapter = container_of(work, struct idpf_adapter, mbx_task.work);
+
+	if (test_bit(IDPF_MB_INTR_MODE, adapter->flags))
+		idpf_mb_irq_enable(adapter);
+	else
+		queue_delayed_work(adapter->mbx_wq, &adapter->mbx_task,
+				   msecs_to_jiffies(300));
+
+	idpf_recv_mb_msg(adapter, VIRTCHNL2_OP_UNKNOWN, NULL, 0);
+}
+
+/**
+ * idpf_service_task - Delayed task for handling mailbox responses
+ * @work: work_struct handle to our data
+ *
+ */
+void idpf_service_task(struct work_struct *work)
+{
+	struct idpf_adapter *adapter;
+
+	adapter = container_of(work, struct idpf_adapter, serv_task.work);
+
+	if (idpf_is_reset_detected(adapter) &&
+	    !idpf_is_reset_in_prog(adapter) &&
+	    !test_bit(IDPF_REMOVE_IN_PROG, adapter->flags)) {
+		dev_info(&adapter->pdev->dev, "HW reset detected\n");
+		set_bit(IDPF_HR_FUNC_RESET, adapter->flags);
+		queue_delayed_work(adapter->vc_event_wq,
+				   &adapter->vc_event_task,
+				   msecs_to_jiffies(10));
+	}
+
+	queue_delayed_work(adapter->serv_wq, &adapter->serv_task,
+			   msecs_to_jiffies(300));
+}
+
+/**
+ * idpf_restore_features - Restore feature configs
+ * @vport: virtual port structure
+ */
+static void idpf_restore_features(struct idpf_vport *vport)
+{
+	struct idpf_adapter *adapter = vport->adapter;
+
+	if (idpf_is_cap_ena(adapter, IDPF_OTHER_CAPS, VIRTCHNL2_CAP_MACFILTER))
+		idpf_restore_mac_filters(vport);
+}
+
+/**
+ * idpf_set_real_num_queues - set number of queues for netdev
+ * @vport: virtual port structure
+ *
+ * Returns 0 on success, negative on failure.
+ */
+static int idpf_set_real_num_queues(struct idpf_vport *vport)
+{
+	int err;
+
+	err = netif_set_real_num_rx_queues(vport->netdev, vport->num_rxq);
+	if (err)
+		return err;
+
+	return netif_set_real_num_tx_queues(vport->netdev, vport->num_txq);
+}
+
+/**
+ * idpf_up_complete - Complete interface up sequence
+ * @vport: virtual port structure
+ *
+ * Returns 0 on success, negative on failure.
+ */
+static int idpf_up_complete(struct idpf_vport *vport)
+{
+	struct idpf_netdev_priv *np = netdev_priv(vport->netdev);
+
+	if (vport->link_up && !netif_carrier_ok(vport->netdev)) {
+		netif_carrier_on(vport->netdev);
+		netif_tx_start_all_queues(vport->netdev);
+	}
+
+	np->state = __IDPF_VPORT_UP;
+
+	return 0;
+}
+
+/**
+ * idpf_rx_init_buf_tail - Write initial buffer ring tail value
+ * @vport: virtual port struct
+ */
+static void idpf_rx_init_buf_tail(struct idpf_vport *vport)
+{
+	int i, j;
+
+	for (i = 0; i < vport->num_rxq_grp; i++) {
+		struct idpf_rxq_group *grp = &vport->rxq_grps[i];
+
+		if (idpf_is_queue_model_split(vport->rxq_model)) {
+			for (j = 0; j < vport->num_bufqs_per_qgrp; j++) {
+				struct idpf_queue *q =
+					&grp->splitq.bufq_sets[j].bufq;
+
+				writel(q->next_to_alloc, q->tail);
+			}
+		} else {
+			for (j = 0; j < grp->singleq.num_rxq; j++) {
+				struct idpf_queue *q =
+					grp->singleq.rxqs[j];
+
+				writel(q->next_to_alloc, q->tail);
+			}
+		}
+	}
+}
+
+/**
+ * idpf_vport_open - Bring up a vport
+ * @vport: vport to bring up
+ * @alloc_res: allocate queue resources
+ */
+static int idpf_vport_open(struct idpf_vport *vport, bool alloc_res)
+{
+	struct idpf_netdev_priv *np = netdev_priv(vport->netdev);
+	struct idpf_adapter *adapter = vport->adapter;
+	struct idpf_vport_config *vport_config;
+	int err;
+
+	if (np->state != __IDPF_VPORT_DOWN)
+		return -EBUSY;
+
+	/* we do not allow interface up just yet */
+	netif_carrier_off(vport->netdev);
+
+	if (alloc_res) {
+		err = idpf_vport_queues_alloc(vport);
+		if (err)
+			return err;
+	}
+
+	err = idpf_vport_intr_alloc(vport);
+	if (err) {
+		dev_err(&adapter->pdev->dev, "Failed to allocate interrupts for vport %u: %d\n",
+			vport->vport_id, err);
+		goto queues_rel;
+	}
+
+	err = idpf_vport_queue_ids_init(vport);
+	if (err) {
+		dev_err(&adapter->pdev->dev, "Failed to initialize queue ids for vport %u: %d\n",
+			vport->vport_id, err);
+		goto intr_rel;
+	}
+
+	err = idpf_vport_intr_init(vport);
+	if (err) {
+		dev_err(&adapter->pdev->dev, "Failed to initialize interrupts for vport %u: %d\n",
+			vport->vport_id, err);
+		goto intr_rel;
+	}
+
+	err = idpf_rx_bufs_init_all(vport);
+	if (err) {
+		dev_err(&adapter->pdev->dev, "Failed to initialize RX buffers for vport %u: %d\n",
+			vport->vport_id, err);
+		goto intr_rel;
+	}
+
+	err = idpf_queue_reg_init(vport);
+	if (err) {
+		dev_err(&adapter->pdev->dev, "Failed to initialize queue registers for vport %u: %d\n",
+			vport->vport_id, err);
+		goto intr_rel;
+	}
+
+	idpf_rx_init_buf_tail(vport);
+
+	err = idpf_send_config_queues_msg(vport);
+	if (err) {
+		dev_err(&adapter->pdev->dev, "Failed to configure queues for vport %u, %d\n",
+			vport->vport_id, err);
+		goto intr_deinit;
+	}
+
+	err = idpf_send_map_unmap_queue_vector_msg(vport, true);
+	if (err) {
+		dev_err(&adapter->pdev->dev, "Failed to map queue vectors for vport %u: %d\n",
+			vport->vport_id, err);
+		goto intr_deinit;
+	}
+
+	err = idpf_send_enable_queues_msg(vport);
+	if (err) {
+		dev_err(&adapter->pdev->dev, "Failed to enable queues for vport %u: %d\n",
+			vport->vport_id, err);
+		goto unmap_queue_vectors;
+	}
+
+	err = idpf_send_enable_vport_msg(vport);
+	if (err) {
+		dev_err(&adapter->pdev->dev, "Failed to enable vport %u: %d\n",
+			vport->vport_id, err);
+		err = -EAGAIN;
+		goto disable_queues;
+	}
+
+	idpf_restore_features(vport);
+
+	vport_config = adapter->vport_config[vport->idx];
+	if (vport_config->user_config.rss_data.rss_lut)
+		err = idpf_config_rss(vport);
+	else
+		err = idpf_init_rss(vport);
+	if (err) {
+		dev_err(&adapter->pdev->dev, "Failed to initialize RSS for vport %u: %d\n",
+			vport->vport_id, err);
+		goto disable_vport;
+	}
+
+	err = idpf_up_complete(vport);
+	if (err) {
+		dev_err(&adapter->pdev->dev, "Failed to complete interface up for vport %u: %d\n",
+			vport->vport_id, err);
+		goto deinit_rss;
+	}
+
+	return 0;
+
+deinit_rss:
+	idpf_deinit_rss(vport);
+disable_vport:
+	idpf_send_disable_vport_msg(vport);
+disable_queues:
+	idpf_send_disable_queues_msg(vport);
+unmap_queue_vectors:
+	idpf_send_map_unmap_queue_vector_msg(vport, false);
+intr_deinit:
+	idpf_vport_intr_deinit(vport);
+intr_rel:
+	idpf_vport_intr_rel(vport);
+queues_rel:
+	idpf_vport_queues_rel(vport);
+
+	return err;
+}
+
+/**
+ * idpf_init_task - Delayed initialization task
+ * @work: work_struct handle to our data
+ *
+ * Init task finishes up pending work started in probe. Due to the asynchronous
+ * nature in which the device communicates with hardware, we may have to wait
+ * several milliseconds to get a response.  Instead of busy polling in probe,
+ * pulling it out into a delayed work task prevents us from bogging down the
+ * whole system waiting for a response from hardware.
+ */
+void idpf_init_task(struct work_struct *work)
+{
+	struct idpf_vport_config *vport_config;
+	struct idpf_vport_max_q max_q;
+	struct idpf_adapter *adapter;
+	struct idpf_netdev_priv *np;
+	struct idpf_vport *vport;
+	u16 num_default_vports;
+	struct pci_dev *pdev;
+	bool default_vport;
+	int index, err;
+
+	adapter = container_of(work, struct idpf_adapter, init_task.work);
+
+	num_default_vports = idpf_get_default_vports(adapter);
+	if (adapter->num_alloc_vports < num_default_vports)
+		default_vport = true;
+	else
+		default_vport = false;
+
+	err = idpf_vport_alloc_max_qs(adapter, &max_q);
+	if (err)
+		goto unwind_vports;
+
+	err = idpf_send_create_vport_msg(adapter, &max_q);
+	if (err) {
+		idpf_vport_dealloc_max_qs(adapter, &max_q);
+		goto unwind_vports;
+	}
+
+	pdev = adapter->pdev;
+	vport = idpf_vport_alloc(adapter, &max_q);
+	if (!vport) {
+		err = -EFAULT;
+		dev_err(&pdev->dev, "failed to allocate vport: %d\n",
+			err);
+		idpf_vport_dealloc_max_qs(adapter, &max_q);
+		goto unwind_vports;
+	}
+
+	index = vport->idx;
+	vport_config = adapter->vport_config[index];
+
+	init_waitqueue_head(&vport->sw_marker_wq);
+	init_waitqueue_head(&vport->vchnl_wq);
+
+	mutex_init(&vport->vc_buf_lock);
+	spin_lock_init(&vport_config->mac_filter_list_lock);
+
+	INIT_LIST_HEAD(&vport_config->user_config.mac_filter_list);
+
+	err = idpf_check_supported_desc_ids(vport);
+	if (err) {
+		dev_err(&pdev->dev, "failed to get required descriptor ids\n");
+		goto cfg_netdev_err;
+	}
+
+	if (idpf_cfg_netdev(vport))
+		goto cfg_netdev_err;
+
+	err = idpf_send_get_rx_ptype_msg(vport);
+	if (err)
+		goto handle_err;
+
+	/* Once state is put into DOWN, driver is ready for dev_open */
+	np = netdev_priv(vport->netdev);
+	np->state = __IDPF_VPORT_DOWN;
+	if (test_and_clear_bit(IDPF_VPORT_UP_REQUESTED, vport_config->flags))
+		idpf_vport_open(vport, true);
+
+	/* Spawn and return 'idpf_init_task' work queue until all the
+	 * default vports are created
+	 */
+	if (adapter->num_alloc_vports < num_default_vports) {
+		queue_delayed_work(adapter->init_wq, &adapter->init_task,
+				   msecs_to_jiffies(5 * (adapter->pdev->devfn & 0x07)));
+
+		return;
+	}
+
+	for (index = 0; index < adapter->max_vports; index++) {
+		if (adapter->netdevs[index] &&
+		    !test_bit(IDPF_VPORT_REG_NETDEV,
+			      adapter->vport_config[index]->flags)) {
+			register_netdev(adapter->netdevs[index]);
+			set_bit(IDPF_VPORT_REG_NETDEV,
+				adapter->vport_config[index]->flags);
+		}
+	}
+
+	/* As all the required vports are created, clear the reset flag
+	 * unconditionally here in case we were in reset and the link was down.
+	 */
+	clear_bit(IDPF_HR_RESET_IN_PROG, adapter->flags);
+	/* Start the statistics task now */
+	queue_delayed_work(adapter->stats_wq, &adapter->stats_task,
+			   msecs_to_jiffies(10 * (pdev->devfn & 0x07)));
+
+	return;
+
+handle_err:
+	idpf_decfg_netdev(vport);
+cfg_netdev_err:
+	idpf_vport_rel(vport);
+	adapter->vports[index] = NULL;
+unwind_vports:
+	if (default_vport) {
+		for (index = 0; index < adapter->max_vports; index++) {
+			if (adapter->vports[index])
+				idpf_vport_dealloc(adapter->vports[index]);
+		}
+	}
+	clear_bit(IDPF_HR_RESET_IN_PROG, adapter->flags);
+}
+
+/**
+ * idpf_sriov_ena - Enable or change number of VFs
+ * @adapter: private data struct
+ * @num_vfs: number of VFs to allocate
+ */
+static int idpf_sriov_ena(struct idpf_adapter *adapter, int num_vfs)
+{
+	struct device *dev = &adapter->pdev->dev;
+	int err;
+
+	err = idpf_send_set_sriov_vfs_msg(adapter, num_vfs);
+	if (err) {
+		dev_err(dev, "Failed to allocate VFs: %d\n", err);
+
+		return err;
+	}
+
+	err = pci_enable_sriov(adapter->pdev, num_vfs);
+	if (err) {
+		idpf_send_set_sriov_vfs_msg(adapter, 0);
+		dev_err(dev, "Failed to enable SR-IOV: %d\n", err);
+
+		return err;
+	}
+
+	adapter->num_vfs = num_vfs;
+
+	return num_vfs;
+}
+
+/**
+ * idpf_sriov_configure - Configure the requested VFs
+ * @pdev: pointer to a pci_dev structure
+ * @num_vfs: number of vfs to allocate
+ *
+ * Enable or change the number of VFs. Called when the user updates the number
+ * of VFs in sysfs.
+ **/
+int idpf_sriov_configure(struct pci_dev *pdev, int num_vfs)
+{
+	struct idpf_adapter *adapter = pci_get_drvdata(pdev);
+
+	if (!idpf_is_cap_ena(adapter, IDPF_OTHER_CAPS, VIRTCHNL2_CAP_SRIOV)) {
+		dev_info(&pdev->dev, "SR-IOV is not supported on this device\n");
+
+		return -EOPNOTSUPP;
+	}
+
+	if (num_vfs)
+		return idpf_sriov_ena(adapter, num_vfs);
+
+	if (pci_vfs_assigned(pdev)) {
+		dev_warn(&pdev->dev, "Unable to free VFs because some are assigned to VMs\n");
+
+		return -EBUSY;
+	}
+
+	pci_disable_sriov(adapter->pdev);
+	idpf_send_set_sriov_vfs_msg(adapter, 0);
+	adapter->num_vfs = 0;
+
+	return 0;
+}
+
+/**
+ * idpf_deinit_task - Device deinit routine
+ * @adapter: Driver specific private structure
+ *
+ * Extended remove logic which will be used for
+ * hard reset as well
+ */
+void idpf_deinit_task(struct idpf_adapter *adapter)
+{
+	unsigned int i;
+
+	/* Wait until the init_task is done else this thread might release
+	 * the resources first and the other thread might end up in a bad state
+	 */
+	cancel_delayed_work_sync(&adapter->init_task);
+
+	if (!adapter->vports)
+		return;
+
+	cancel_delayed_work_sync(&adapter->stats_task);
+
+	for (i = 0; i < adapter->max_vports; i++) {
+		if (adapter->vports[i])
+			idpf_vport_dealloc(adapter->vports[i]);
+	}
+}
+
+/**
+ * idpf_check_reset_complete - check that reset is complete
+ * @hw: pointer to hw struct
+ * @reset_reg: struct with reset registers
+ *
+ * Returns 0 if device is ready to use, or -EBUSY if it's in reset.
+ **/
+static int idpf_check_reset_complete(struct idpf_hw *hw,
+				     struct idpf_reset_reg *reset_reg)
+{
+	struct idpf_adapter *adapter = hw->back;
+	int i;
+
+	for (i = 0; i < 2000; i++) {
+		u32 reg_val = readl(reset_reg->rstat);
+
+		/* 0xFFFFFFFF might be read if other side hasn't cleared the
+		 * register for us yet and 0xFFFFFFFF is not a valid value for
+		 * the register, so treat that as invalid.
+		 */
+		if (reg_val != 0xFFFFFFFF && (reg_val & reset_reg->rstat_m))
+			return 0;
+
+		usleep_range(5000, 10000);
+	}
+
+	dev_warn(&adapter->pdev->dev, "Device reset timeout!\n");
+	/* Clear the reset flag unconditionally here since the reset
+	 * technically isn't in progress anymore from the driver's perspective
+	 */
+	clear_bit(IDPF_HR_RESET_IN_PROG, adapter->flags);
+
+	return -EBUSY;
+}
+
+/**
+ * idpf_set_vport_state - Set the vport state to be after the reset
+ * @adapter: Driver specific private structure
+ */
+static void idpf_set_vport_state(struct idpf_adapter *adapter)
+{
+	u16 i;
+
+	for (i = 0; i < adapter->max_vports; i++) {
+		struct idpf_netdev_priv *np;
+
+		if (!adapter->netdevs[i])
+			continue;
+
+		np = netdev_priv(adapter->netdevs[i]);
+		if (np->state == __IDPF_VPORT_UP)
+			set_bit(IDPF_VPORT_UP_REQUESTED,
+				adapter->vport_config[i]->flags);
+	}
+}
+
+/**
+ * idpf_init_hard_reset - Initiate a hardware reset
+ * @adapter: Driver specific private structure
+ *
+ * Deallocate the vports and all the resources associated with them and
+ * reallocate. Also reinitialize the mailbox. Return 0 on success,
+ * negative on failure.
+ */
+static int idpf_init_hard_reset(struct idpf_adapter *adapter)
+{
+	struct idpf_reg_ops *reg_ops = &adapter->dev_ops.reg_ops;
+	struct device *dev = &adapter->pdev->dev;
+	struct net_device *netdev;
+	int err;
+	u16 i;
+
+	mutex_lock(&adapter->vport_ctrl_lock);
+
+	dev_info(dev, "Device HW Reset initiated\n");
+
+	/* Avoid TX hangs on reset */
+	for (i = 0; i < adapter->max_vports; i++) {
+		netdev = adapter->netdevs[i];
+		if (!netdev)
+			continue;
+
+		netif_carrier_off(netdev);
+		netif_tx_disable(netdev);
+	}
+
+	/* Prepare for reset */
+	if (test_and_clear_bit(IDPF_HR_DRV_LOAD, adapter->flags)) {
+		reg_ops->trigger_reset(adapter, IDPF_HR_DRV_LOAD);
+	} else if (test_and_clear_bit(IDPF_HR_FUNC_RESET, adapter->flags)) {
+		bool is_reset = idpf_is_reset_detected(adapter);
+
+		idpf_set_vport_state(adapter);
+		idpf_vc_core_deinit(adapter);
+		if (!is_reset)
+			reg_ops->trigger_reset(adapter, IDPF_HR_FUNC_RESET);
+		idpf_deinit_dflt_mbx(adapter);
+	} else {
+		dev_err(dev, "Unhandled hard reset cause\n");
+		err = -EBADRQC;
+		goto unlock_mutex;
+	}
+
+	/* Wait for reset to complete */
+	err = idpf_check_reset_complete(&adapter->hw, &adapter->reset_reg);
+	if (err) {
+		dev_err(dev, "The driver was unable to contact the device's firmware. Check that the FW is running. Driver state= 0x%x\n",
+			adapter->state);
+		goto unlock_mutex;
+	}
+
+	/* Reset is complete and so start building the driver resources again */
+	err = idpf_init_dflt_mbx(adapter);
+	if (err) {
+		dev_err(dev, "Failed to initialize default mailbox: %d\n", err);
+		goto unlock_mutex;
+	}
+
+	/* Initialize the state machine, also allocate memory and request
+	 * resources
+	 */
+	err = idpf_vc_core_init(adapter);
+	if (err) {
+		idpf_deinit_dflt_mbx(adapter);
+		goto unlock_mutex;
+	}
+
+	/* Wait till all the vports are initialized to release the reset lock,
+	 * else user space callbacks may access uninitialized vports
+	 */
+	while (test_bit(IDPF_HR_RESET_IN_PROG, adapter->flags))
+		msleep(100);
+
+unlock_mutex:
+	mutex_unlock(&adapter->vport_ctrl_lock);
+
+	return err;
+}
+
+/**
+ * idpf_vc_event_task - Handle virtchannel event logic
+ * @work: work queue struct
+ */
+void idpf_vc_event_task(struct work_struct *work)
+{
+	struct idpf_adapter *adapter;
+
+	adapter = container_of(work, struct idpf_adapter, vc_event_task.work);
+
+	if (test_bit(IDPF_REMOVE_IN_PROG, adapter->flags))
+		return;
+
+	if (test_bit(IDPF_HR_FUNC_RESET, adapter->flags) ||
+	    test_bit(IDPF_HR_DRV_LOAD, adapter->flags)) {
+		set_bit(IDPF_HR_RESET_IN_PROG, adapter->flags);
+		idpf_init_hard_reset(adapter);
+	}
+}
+
+/**
+ * idpf_initiate_soft_reset - Initiate a software reset
+ * @vport: virtual port data struct
+ * @reset_cause: reason for the soft reset
+ *
+ * Soft reset only reallocs vport queue resources. Returns 0 on success,
+ * negative on failure.
+ */
+int idpf_initiate_soft_reset(struct idpf_vport *vport,
+			     enum idpf_vport_reset_cause reset_cause)
+{
+	struct idpf_netdev_priv *np = netdev_priv(vport->netdev);
+	enum idpf_vport_state current_state = np->state;
+	struct idpf_adapter *adapter = vport->adapter;
+	struct idpf_vport *new_vport;
+	int err, i;
+
+	/* If the system is low on memory, we can end up in bad state if we
+	 * free all the memory for queue resources and try to allocate them
+	 * again. Instead, we can pre-allocate the new resources before doing
+	 * anything and bailing if the alloc fails.
+	 *
+	 * Make a clone of the existing vport to mimic its current
+	 * configuration, then modify the new structure with any requested
+	 * changes. Once the allocation of the new resources is done, stop the
+	 * existing vport and copy the configuration to the main vport. If an
+	 * error occurred, the existing vport will be untouched.
+	 *
+	 */
+	new_vport = kzalloc(sizeof(*vport), GFP_KERNEL);
+	if (!new_vport)
+		return -ENOMEM;
+
+	/* This purposely avoids copying the end of the struct because it
+	 * contains wait_queues and mutexes and other stuff we don't want to
+	 * mess with. Nothing below should use those variables from new_vport
+	 * and should instead always refer to them in vport if they need to.
+	 */
+	memcpy(new_vport, vport, offsetof(struct idpf_vport, vc_state));
+
+	/* Adjust resource parameters prior to reallocating resources */
+	switch (reset_cause) {
+	case IDPF_SR_Q_CHANGE:
+		err = idpf_vport_adjust_qs(new_vport);
+		if (err)
+			goto free_vport;
+		break;
+	case IDPF_SR_Q_DESC_CHANGE:
+		/* Update queue parameters before allocating resources */
+		idpf_vport_calc_num_q_desc(new_vport);
+		break;
+	case IDPF_SR_MTU_CHANGE:
+	case IDPF_SR_RSC_CHANGE:
+		break;
+	default:
+		dev_err(&adapter->pdev->dev, "Unhandled soft reset cause\n");
+		err = -EINVAL;
+		goto free_vport;
+	}
+
+	err = idpf_vport_queues_alloc(new_vport);
+	if (err)
+		goto free_vport;
+	if (current_state <= __IDPF_VPORT_DOWN) {
+		idpf_send_delete_queues_msg(vport);
+	} else {
+		set_bit(IDPF_VPORT_DEL_QUEUES, vport->flags);
+		idpf_vport_stop(vport);
+	}
+
+	idpf_deinit_rss(vport);
+	/* We're passing in vport here because we need its wait_queue
+	 * to send a message and it should be getting all the vport
+	 * config data out of the adapter but we need to be careful not
+	 * to add code to add_queues to change the vport config within
+	 * vport itself as it will be wiped with a memcpy later.
+	 */
+	err = idpf_send_add_queues_msg(vport, new_vport->num_txq,
+				       new_vport->num_complq,
+				       new_vport->num_rxq,
+				       new_vport->num_bufq);
+	if (err)
+		goto err_reset;
+
+	/* Same comment as above regarding avoiding copying the wait_queues and
+	 * mutexes applies here. We do not want to mess with those if possible.
+	 */
+	memcpy(vport, new_vport, offsetof(struct idpf_vport, vc_state));
+
+	/* Since idpf_vport_queues_alloc was called with new_port, the queue
+	 * back pointers are currently pointing to the local new_vport. Reset
+	 * the backpointers to the original vport here
+	 */
+	for (i = 0; i < vport->num_txq_grp; i++) {
+		struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i];
+		int j;
+
+		tx_qgrp->vport = vport;
+		for (j = 0; j < tx_qgrp->num_txq; j++)
+			tx_qgrp->txqs[j]->vport = vport;
+
+		if (idpf_is_queue_model_split(vport->txq_model))
+			tx_qgrp->complq->vport = vport;
+	}
+
+	for (i = 0; i < vport->num_rxq_grp; i++) {
+		struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i];
+		struct idpf_queue *q;
+		u16 num_rxq;
+		int j;
+
+		rx_qgrp->vport = vport;
+		for (j = 0; j < vport->num_bufqs_per_qgrp; j++)
+			rx_qgrp->splitq.bufq_sets[j].bufq.vport = vport;
+
+		if (idpf_is_queue_model_split(vport->rxq_model))
+			num_rxq = rx_qgrp->splitq.num_rxq_sets;
+		else
+			num_rxq = rx_qgrp->singleq.num_rxq;
+
+		for (j = 0; j < num_rxq; j++) {
+			if (idpf_is_queue_model_split(vport->rxq_model))
+				q = &rx_qgrp->splitq.rxq_sets[j]->rxq;
+			else
+				q = rx_qgrp->singleq.rxqs[j];
+			q->vport = vport;
+		}
+	}
+
+	if (reset_cause == IDPF_SR_Q_CHANGE)
+		idpf_vport_alloc_vec_indexes(vport);
+
+	err = idpf_set_real_num_queues(vport);
+	if (err)
+		goto err_reset;
+
+	if (current_state == __IDPF_VPORT_UP)
+		err = idpf_vport_open(vport, false);
+
+	kfree(new_vport);
+
+	return err;
+
+err_reset:
+	idpf_vport_queues_rel(new_vport);
+free_vport:
+	kfree(new_vport);
+
+	return err;
+}
+
+/**
+ * idpf_addr_sync - Callback for dev_(mc|uc)_sync to add address
+ * @netdev: the netdevice
+ * @addr: address to add
+ *
+ * Called by __dev_(mc|uc)_sync when an address needs to be added. We call
+ * __dev_(uc|mc)_sync from .set_rx_mode. Kernel takes addr_list_lock spinlock
+ * meaning we cannot sleep in this context. Due to this, we have to add the
+ * filter and send the virtchnl message asynchronously without waiting for the
+ * response from the other side. We won't know whether or not the operation
+ * actually succeeded until we get the message back.  Returns 0 on success,
+ * negative on failure.
+ */
+static int idpf_addr_sync(struct net_device *netdev, const u8 *addr)
+{
+	struct idpf_netdev_priv *np = netdev_priv(netdev);
+
+	return idpf_add_mac_filter(np->vport, np, addr, true);
+}
+
+/**
+ * idpf_addr_unsync - Callback for dev_(mc|uc)_sync to remove address
+ * @netdev: the netdevice
+ * @addr: address to add
+ *
+ * Called by __dev_(mc|uc)_sync when an address needs to be added. We call
+ * __dev_(uc|mc)_sync from .set_rx_mode. Kernel takes addr_list_lock spinlock
+ * meaning we cannot sleep in this context. Due to this we have to delete the
+ * filter and send the virtchnl message asynchronously without waiting for the
+ * return from the other side.  We won't know whether or not the operation
+ * actually succeeded until we get the message back. Returns 0 on success,
+ * negative on failure.
+ */
+static int idpf_addr_unsync(struct net_device *netdev, const u8 *addr)
+{
+	struct idpf_netdev_priv *np = netdev_priv(netdev);
+
+	/* Under some circumstances, we might receive a request to delete
+	 * our own device address from our uc list. Because we store the
+	 * device address in the VSI's MAC filter list, we need to ignore
+	 * such requests and not delete our device address from this list.
+	 */
+	if (ether_addr_equal(addr, netdev->dev_addr))
+		return 0;
+
+	idpf_del_mac_filter(np->vport, np, addr, true);
+
+	return 0;
+}
+
+/**
+ * idpf_set_rx_mode - NDO callback to set the netdev filters
+ * @netdev: network interface device structure
+ *
+ * Stack takes addr_list_lock spinlock before calling our .set_rx_mode.  We
+ * cannot sleep in this context.
+ */
+static void idpf_set_rx_mode(struct net_device *netdev)
+{
+	struct idpf_netdev_priv *np = netdev_priv(netdev);
+	struct idpf_vport_user_config_data *config_data;
+	struct idpf_adapter *adapter;
+	bool changed = false;
+	struct device *dev;
+	int err;
+
+	adapter = np->adapter;
+	dev = &adapter->pdev->dev;
+
+	if (idpf_is_cap_ena(adapter, IDPF_OTHER_CAPS, VIRTCHNL2_CAP_MACFILTER)) {
+		__dev_uc_sync(netdev, idpf_addr_sync, idpf_addr_unsync);
+		__dev_mc_sync(netdev, idpf_addr_sync, idpf_addr_unsync);
+	}
+
+	if (!idpf_is_cap_ena(adapter, IDPF_OTHER_CAPS, VIRTCHNL2_CAP_PROMISC))
+		return;
+
+	config_data = &adapter->vport_config[np->vport_idx]->user_config;
+	/* IFF_PROMISC enables both unicast and multicast promiscuous,
+	 * while IFF_ALLMULTI only enables multicast such that:
+	 *
+	 * promisc  + allmulti		= unicast | multicast
+	 * promisc  + !allmulti		= unicast | multicast
+	 * !promisc + allmulti		= multicast
+	 */
+	if ((netdev->flags & IFF_PROMISC) &&
+	    !test_and_set_bit(__IDPF_PROMISC_UC, config_data->user_flags)) {
+		changed = true;
+		dev_info(&adapter->pdev->dev, "Entering promiscuous mode\n");
+		if (!test_and_set_bit(__IDPF_PROMISC_MC, adapter->flags))
+			dev_info(dev, "Entering multicast promiscuous mode\n");
+	}
+
+	if (!(netdev->flags & IFF_PROMISC) &&
+	    test_and_clear_bit(__IDPF_PROMISC_UC, config_data->user_flags)) {
+		changed = true;
+		dev_info(dev, "Leaving promiscuous mode\n");
+	}
+
+	if (netdev->flags & IFF_ALLMULTI &&
+	    !test_and_set_bit(__IDPF_PROMISC_MC, config_data->user_flags)) {
+		changed = true;
+		dev_info(dev, "Entering multicast promiscuous mode\n");
+	}
+
+	if (!(netdev->flags & (IFF_ALLMULTI | IFF_PROMISC)) &&
+	    test_and_clear_bit(__IDPF_PROMISC_MC, config_data->user_flags)) {
+		changed = true;
+		dev_info(dev, "Leaving multicast promiscuous mode\n");
+	}
+
+	if (!changed)
+		return;
+
+	err = idpf_set_promiscuous(adapter, config_data, np->vport_id);
+	if (err)
+		dev_err(dev, "Failed to set promiscuous mode: %d\n", err);
+}
+
+/**
+ * idpf_vport_manage_rss_lut - disable/enable RSS
+ * @vport: the vport being changed
+ *
+ * In the event of disable request for RSS, this function will zero out RSS
+ * LUT, while in the event of enable request for RSS, it will reconfigure RSS
+ * LUT with the default LUT configuration.
+ */
+static int idpf_vport_manage_rss_lut(struct idpf_vport *vport)
+{
+	bool ena = idpf_is_feature_ena(vport, NETIF_F_RXHASH);
+	struct idpf_rss_data *rss_data;
+	u16 idx = vport->idx;
+	int lut_size;
+
+	rss_data = &vport->adapter->vport_config[idx]->user_config.rss_data;
+	lut_size = rss_data->rss_lut_size * sizeof(u32);
+
+	if (ena) {
+		/* This will contain the default or user configured LUT */
+		memcpy(rss_data->rss_lut, rss_data->cached_lut, lut_size);
+	} else {
+		/* Save a copy of the current LUT to be restored later if
+		 * requested.
+		 */
+		memcpy(rss_data->cached_lut, rss_data->rss_lut, lut_size);
+
+		/* Zero out the current LUT to disable */
+		memset(rss_data->rss_lut, 0, lut_size);
+	}
+
+	return idpf_config_rss(vport);
+}
+
+/**
+ * idpf_set_features - set the netdev feature flags
+ * @netdev: ptr to the netdev being adjusted
+ * @features: the feature set that the stack is suggesting
+ */
+static int idpf_set_features(struct net_device *netdev,
+			     netdev_features_t features)
+{
+	netdev_features_t changed = netdev->features ^ features;
+	struct idpf_adapter *adapter;
+	struct idpf_vport *vport;
+	int err = 0;
+
+	idpf_vport_ctrl_lock(netdev);
+	vport = idpf_netdev_to_vport(netdev);
+
+	adapter = vport->adapter;
+
+	if (idpf_is_reset_in_prog(adapter)) {
+		dev_err(&adapter->pdev->dev, "Device is resetting, changing netdev features temporarily unavailable.\n");
+		err = -EBUSY;
+		goto unlock_mutex;
+	}
+
+	if (changed & NETIF_F_RXHASH) {
+		netdev->features ^= NETIF_F_RXHASH;
+		err = idpf_vport_manage_rss_lut(vport);
+		if (err)
+			goto unlock_mutex;
+	}
+
+	if (changed & NETIF_F_GRO_HW) {
+		netdev->features ^= NETIF_F_GRO_HW;
+		err = idpf_initiate_soft_reset(vport, IDPF_SR_RSC_CHANGE);
+		if (err)
+			goto unlock_mutex;
+	}
+
+	if (changed & NETIF_F_LOOPBACK) {
+		netdev->features ^= NETIF_F_LOOPBACK;
+		err = idpf_send_ena_dis_loopback_msg(vport);
+	}
+
+unlock_mutex:
+	idpf_vport_ctrl_unlock(netdev);
+
+	return err;
+}
+
+/**
+ * idpf_open - Called when a network interface becomes active
+ * @netdev: network interface device structure
+ *
+ * The open entry point is called when a network interface is made
+ * active by the system (IFF_UP).  At this point all resources needed
+ * for transmit and receive operations are allocated, the interrupt
+ * handler is registered with the OS, the netdev watchdog is enabled,
+ * and the stack is notified that the interface is ready.
+ *
+ * Returns 0 on success, negative value on failure
+ */
+static int idpf_open(struct net_device *netdev)
+{
+	struct idpf_vport *vport;
+	int err;
+
+	idpf_vport_ctrl_lock(netdev);
+	vport = idpf_netdev_to_vport(netdev);
+
+	err = idpf_vport_open(vport, true);
+
+	idpf_vport_ctrl_unlock(netdev);
+
+	return err;
+}
+
+/**
+ * idpf_change_mtu - NDO callback to change the MTU
+ * @netdev: network interface device structure
+ * @new_mtu: new value for maximum frame size
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int idpf_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	struct idpf_vport *vport;
+	int err;
+
+	idpf_vport_ctrl_lock(netdev);
+	vport = idpf_netdev_to_vport(netdev);
+
+	netdev->mtu = new_mtu;
+
+	err = idpf_initiate_soft_reset(vport, IDPF_SR_MTU_CHANGE);
+
+	idpf_vport_ctrl_unlock(netdev);
+
+	return err;
+}
+
+/**
+ * idpf_features_check - Validate packet conforms to limits
+ * @skb: skb buffer
+ * @netdev: This port's netdev
+ * @features: Offload features that the stack believes apply
+ */
+static netdev_features_t idpf_features_check(struct sk_buff *skb,
+					     struct net_device *netdev,
+					     netdev_features_t features)
+{
+	struct idpf_vport *vport = idpf_netdev_to_vport(netdev);
+	struct idpf_adapter *adapter = vport->adapter;
+	size_t len;
+
+	/* No point in doing any of this if neither checksum nor GSO are
+	 * being requested for this frame.  We can rule out both by just
+	 * checking for CHECKSUM_PARTIAL
+	 */
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return features;
+
+	/* We cannot support GSO if the MSS is going to be less than
+	 * 88 bytes. If it is then we need to drop support for GSO.
+	 */
+	if (skb_is_gso(skb) &&
+	    (skb_shinfo(skb)->gso_size < IDPF_TX_TSO_MIN_MSS))
+		features &= ~NETIF_F_GSO_MASK;
+
+	/* Ensure MACLEN is <= 126 bytes (63 words) and not an odd size */
+	len = skb_network_offset(skb);
+	if (unlikely(len & ~(126)))
+		goto unsupported;
+
+	len = skb_network_header_len(skb);
+	if (unlikely(len > idpf_get_max_tx_hdr_size(adapter)))
+		goto unsupported;
+
+	if (!skb->encapsulation)
+		return features;
+
+	/* L4TUNLEN can support 127 words */
+	len = skb_inner_network_header(skb) - skb_transport_header(skb);
+	if (unlikely(len & ~(127 * 2)))
+		goto unsupported;
+
+	/* IPLEN can support at most 127 dwords */
+	len = skb_inner_network_header_len(skb);
+	if (unlikely(len > idpf_get_max_tx_hdr_size(adapter)))
+		goto unsupported;
+
+	/* No need to validate L4LEN as TCP is the only protocol with a
+	 * a flexible value and we support all possible values supported
+	 * by TCP, which is at most 15 dwords
+	 */
+
+	return features;
+
+unsupported:
+	return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
+}
+
+/**
+ * idpf_set_mac - NDO callback to set port mac address
+ * @netdev: network interface device structure
+ * @p: pointer to an address structure
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int idpf_set_mac(struct net_device *netdev, void *p)
+{
+	struct idpf_netdev_priv *np = netdev_priv(netdev);
+	struct idpf_vport_config *vport_config;
+	struct sockaddr *addr = p;
+	struct idpf_vport *vport;
+	int err = 0;
+
+	idpf_vport_ctrl_lock(netdev);
+	vport = idpf_netdev_to_vport(netdev);
+
+	if (!idpf_is_cap_ena(vport->adapter, IDPF_OTHER_CAPS,
+			     VIRTCHNL2_CAP_MACFILTER)) {
+		dev_info(&vport->adapter->pdev->dev, "Setting MAC address is not supported\n");
+		err = -EOPNOTSUPP;
+		goto unlock_mutex;
+	}
+
+	if (!is_valid_ether_addr(addr->sa_data)) {
+		dev_info(&vport->adapter->pdev->dev, "Invalid MAC address: %pM\n",
+			 addr->sa_data);
+		err = -EADDRNOTAVAIL;
+		goto unlock_mutex;
+	}
+
+	if (ether_addr_equal(netdev->dev_addr, addr->sa_data))
+		goto unlock_mutex;
+
+	vport_config = vport->adapter->vport_config[vport->idx];
+	err = idpf_add_mac_filter(vport, np, addr->sa_data, false);
+	if (err) {
+		__idpf_del_mac_filter(vport_config, addr->sa_data);
+		goto unlock_mutex;
+	}
+
+	if (is_valid_ether_addr(vport->default_mac_addr))
+		idpf_del_mac_filter(vport, np, vport->default_mac_addr, false);
+
+	ether_addr_copy(vport->default_mac_addr, addr->sa_data);
+	eth_hw_addr_set(netdev, addr->sa_data);
+
+unlock_mutex:
+	idpf_vport_ctrl_unlock(netdev);
+
+	return err;
+}
+
+/**
+ * idpf_alloc_dma_mem - Allocate dma memory
+ * @hw: pointer to hw struct
+ * @mem: pointer to dma_mem struct
+ * @size: size of the memory to allocate
+ */
+void *idpf_alloc_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem, u64 size)
+{
+	struct idpf_adapter *adapter = hw->back;
+	size_t sz = ALIGN(size, 4096);
+
+	mem->va = dma_alloc_coherent(&adapter->pdev->dev, sz,
+				     &mem->pa, GFP_KERNEL);
+	mem->size = sz;
+
+	return mem->va;
+}
+
+/**
+ * idpf_free_dma_mem - Free the allocated dma memory
+ * @hw: pointer to hw struct
+ * @mem: pointer to dma_mem struct
+ */
+void idpf_free_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem)
+{
+	struct idpf_adapter *adapter = hw->back;
+
+	dma_free_coherent(&adapter->pdev->dev, mem->size,
+			  mem->va, mem->pa);
+	mem->size = 0;
+	mem->va = NULL;
+	mem->pa = 0;
+}
+
+static const struct net_device_ops idpf_netdev_ops_splitq = {
+	.ndo_open = idpf_open,
+	.ndo_stop = idpf_stop,
+	.ndo_start_xmit = idpf_tx_splitq_start,
+	.ndo_features_check = idpf_features_check,
+	.ndo_set_rx_mode = idpf_set_rx_mode,
+	.ndo_validate_addr = eth_validate_addr,
+	.ndo_set_mac_address = idpf_set_mac,
+	.ndo_change_mtu = idpf_change_mtu,
+	.ndo_get_stats64 = idpf_get_stats64,
+	.ndo_set_features = idpf_set_features,
+	.ndo_tx_timeout = idpf_tx_timeout,
+};
+
+static const struct net_device_ops idpf_netdev_ops_singleq = {
+	.ndo_open = idpf_open,
+	.ndo_stop = idpf_stop,
+	.ndo_start_xmit = idpf_tx_singleq_start,
+	.ndo_features_check = idpf_features_check,
+	.ndo_set_rx_mode = idpf_set_rx_mode,
+	.ndo_validate_addr = eth_validate_addr,
+	.ndo_set_mac_address = idpf_set_mac,
+	.ndo_change_mtu = idpf_change_mtu,
+	.ndo_get_stats64 = idpf_get_stats64,
+	.ndo_set_features = idpf_set_features,
+	.ndo_tx_timeout = idpf_tx_timeout,
+};
diff --git a/drivers/net/ethernet/intel/idpf/idpf_main.c b/drivers/net/ethernet/intel/idpf/idpf_main.c
new file mode 100644
index 000000000000..e1febc74cefd
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/idpf_main.c
@@ -0,0 +1,279 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2023 Intel Corporation */
+
+#include "idpf.h"
+#include "idpf_devids.h"
+
+#define DRV_SUMMARY	"Intel(R) Infrastructure Data Path Function Linux Driver"
+
+MODULE_DESCRIPTION(DRV_SUMMARY);
+MODULE_LICENSE("GPL");
+
+/**
+ * idpf_remove - Device removal routine
+ * @pdev: PCI device information struct
+ */
+static void idpf_remove(struct pci_dev *pdev)
+{
+	struct idpf_adapter *adapter = pci_get_drvdata(pdev);
+	int i;
+
+	set_bit(IDPF_REMOVE_IN_PROG, adapter->flags);
+
+	/* Wait until vc_event_task is done to consider if any hard reset is
+	 * in progress else we may go ahead and release the resources but the
+	 * thread doing the hard reset might continue the init path and
+	 * end up in bad state.
+	 */
+	cancel_delayed_work_sync(&adapter->vc_event_task);
+	if (adapter->num_vfs)
+		idpf_sriov_configure(pdev, 0);
+
+	idpf_vc_core_deinit(adapter);
+	/* Be a good citizen and leave the device clean on exit */
+	adapter->dev_ops.reg_ops.trigger_reset(adapter, IDPF_HR_FUNC_RESET);
+	idpf_deinit_dflt_mbx(adapter);
+
+	if (!adapter->netdevs)
+		goto destroy_wqs;
+
+	/* There are some cases where it's possible to still have netdevs
+	 * registered with the stack at this point, e.g. if the driver detected
+	 * a HW reset and rmmod is called before it fully recovers. Unregister
+	 * any stale netdevs here.
+	 */
+	for (i = 0; i < adapter->max_vports; i++) {
+		if (!adapter->netdevs[i])
+			continue;
+		if (adapter->netdevs[i]->reg_state != NETREG_UNINITIALIZED)
+			unregister_netdev(adapter->netdevs[i]);
+		free_netdev(adapter->netdevs[i]);
+		adapter->netdevs[i] = NULL;
+	}
+
+destroy_wqs:
+	destroy_workqueue(adapter->init_wq);
+	destroy_workqueue(adapter->serv_wq);
+	destroy_workqueue(adapter->mbx_wq);
+	destroy_workqueue(adapter->stats_wq);
+	destroy_workqueue(adapter->vc_event_wq);
+
+	for (i = 0; i < adapter->max_vports; i++) {
+		kfree(adapter->vport_config[i]);
+		adapter->vport_config[i] = NULL;
+	}
+	kfree(adapter->vport_config);
+	adapter->vport_config = NULL;
+	kfree(adapter->netdevs);
+	adapter->netdevs = NULL;
+
+	mutex_destroy(&adapter->vport_ctrl_lock);
+	mutex_destroy(&adapter->vector_lock);
+	mutex_destroy(&adapter->queue_lock);
+	mutex_destroy(&adapter->vc_buf_lock);
+
+	pci_set_drvdata(pdev, NULL);
+	kfree(adapter);
+}
+
+/**
+ * idpf_shutdown - PCI callback for shutting down device
+ * @pdev: PCI device information struct
+ */
+static void idpf_shutdown(struct pci_dev *pdev)
+{
+	idpf_remove(pdev);
+
+	if (system_state == SYSTEM_POWER_OFF)
+		pci_set_power_state(pdev, PCI_D3hot);
+}
+
+/**
+ * idpf_cfg_hw - Initialize HW struct
+ * @adapter: adapter to setup hw struct for
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int idpf_cfg_hw(struct idpf_adapter *adapter)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	struct idpf_hw *hw = &adapter->hw;
+
+	hw->hw_addr = pcim_iomap_table(pdev)[0];
+	if (!hw->hw_addr) {
+		pci_err(pdev, "failed to allocate PCI iomap table\n");
+
+		return -ENOMEM;
+	}
+
+	hw->back = adapter;
+
+	return 0;
+}
+
+/**
+ * idpf_probe - Device initialization routine
+ * @pdev: PCI device information struct
+ * @ent: entry in idpf_pci_tbl
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int idpf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	struct device *dev = &pdev->dev;
+	struct idpf_adapter *adapter;
+	int err;
+
+	adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
+	if (!adapter)
+		return -ENOMEM;
+
+	adapter->req_tx_splitq = true;
+	adapter->req_rx_splitq = true;
+
+	switch (ent->device) {
+	case IDPF_DEV_ID_PF:
+		idpf_dev_ops_init(adapter);
+		break;
+	case IDPF_DEV_ID_VF:
+		idpf_vf_dev_ops_init(adapter);
+		adapter->crc_enable = true;
+		break;
+	default:
+		err = -ENODEV;
+		dev_err(&pdev->dev, "Unexpected dev ID 0x%x in idpf probe\n",
+			ent->device);
+		goto err_free;
+	}
+
+	adapter->pdev = pdev;
+	err = pcim_enable_device(pdev);
+	if (err)
+		goto err_free;
+
+	err = pcim_iomap_regions(pdev, BIT(0), pci_name(pdev));
+	if (err) {
+		pci_err(pdev, "pcim_iomap_regions failed %pe\n", ERR_PTR(err));
+
+		goto err_free;
+	}
+
+	/* set up for high or low dma */
+	err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
+	if (err) {
+		pci_err(pdev, "DMA configuration failed: %pe\n", ERR_PTR(err));
+
+		goto err_free;
+	}
+
+	pci_set_master(pdev);
+	pci_set_drvdata(pdev, adapter);
+
+	adapter->init_wq = alloc_workqueue("%s-%s-init", 0, 0,
+					   dev_driver_string(dev),
+					   dev_name(dev));
+	if (!adapter->init_wq) {
+		dev_err(dev, "Failed to allocate init workqueue\n");
+		err = -ENOMEM;
+		goto err_free;
+	}
+
+	adapter->serv_wq = alloc_workqueue("%s-%s-service", 0, 0,
+					   dev_driver_string(dev),
+					   dev_name(dev));
+	if (!adapter->serv_wq) {
+		dev_err(dev, "Failed to allocate service workqueue\n");
+		err = -ENOMEM;
+		goto err_serv_wq_alloc;
+	}
+
+	adapter->mbx_wq = alloc_workqueue("%s-%s-mbx", 0, 0,
+					  dev_driver_string(dev),
+					  dev_name(dev));
+	if (!adapter->mbx_wq) {
+		dev_err(dev, "Failed to allocate mailbox workqueue\n");
+		err = -ENOMEM;
+		goto err_mbx_wq_alloc;
+	}
+
+	adapter->stats_wq = alloc_workqueue("%s-%s-stats", 0, 0,
+					    dev_driver_string(dev),
+					    dev_name(dev));
+	if (!adapter->stats_wq) {
+		dev_err(dev, "Failed to allocate workqueue\n");
+		err = -ENOMEM;
+		goto err_stats_wq_alloc;
+	}
+
+	adapter->vc_event_wq = alloc_workqueue("%s-%s-vc_event", 0, 0,
+					       dev_driver_string(dev),
+					       dev_name(dev));
+	if (!adapter->vc_event_wq) {
+		dev_err(dev, "Failed to allocate virtchnl event workqueue\n");
+		err = -ENOMEM;
+		goto err_vc_event_wq_alloc;
+	}
+
+	/* setup msglvl */
+	adapter->msg_enable = netif_msg_init(-1, IDPF_AVAIL_NETIF_M);
+
+	err = idpf_cfg_hw(adapter);
+	if (err) {
+		dev_err(dev, "Failed to configure HW structure for adapter: %d\n",
+			err);
+		goto err_cfg_hw;
+	}
+
+	mutex_init(&adapter->vport_ctrl_lock);
+	mutex_init(&adapter->vector_lock);
+	mutex_init(&adapter->queue_lock);
+	mutex_init(&adapter->vc_buf_lock);
+
+	init_waitqueue_head(&adapter->vchnl_wq);
+
+	INIT_DELAYED_WORK(&adapter->init_task, idpf_init_task);
+	INIT_DELAYED_WORK(&adapter->serv_task, idpf_service_task);
+	INIT_DELAYED_WORK(&adapter->mbx_task, idpf_mbx_task);
+	INIT_DELAYED_WORK(&adapter->stats_task, idpf_statistics_task);
+	INIT_DELAYED_WORK(&adapter->vc_event_task, idpf_vc_event_task);
+
+	adapter->dev_ops.reg_ops.reset_reg_init(adapter);
+	set_bit(IDPF_HR_DRV_LOAD, adapter->flags);
+	queue_delayed_work(adapter->vc_event_wq, &adapter->vc_event_task,
+			   msecs_to_jiffies(10 * (pdev->devfn & 0x07)));
+
+	return 0;
+
+err_cfg_hw:
+	destroy_workqueue(adapter->vc_event_wq);
+err_vc_event_wq_alloc:
+	destroy_workqueue(adapter->stats_wq);
+err_stats_wq_alloc:
+	destroy_workqueue(adapter->mbx_wq);
+err_mbx_wq_alloc:
+	destroy_workqueue(adapter->serv_wq);
+err_serv_wq_alloc:
+	destroy_workqueue(adapter->init_wq);
+err_free:
+	kfree(adapter);
+	return err;
+}
+
+/* idpf_pci_tbl - PCI Dev idpf ID Table
+ */
+static const struct pci_device_id idpf_pci_tbl[] = {
+	{ PCI_VDEVICE(INTEL, IDPF_DEV_ID_PF)},
+	{ PCI_VDEVICE(INTEL, IDPF_DEV_ID_VF)},
+	{ /* Sentinel */ }
+};
+MODULE_DEVICE_TABLE(pci, idpf_pci_tbl);
+
+static struct pci_driver idpf_driver = {
+	.name			= KBUILD_MODNAME,
+	.id_table		= idpf_pci_tbl,
+	.probe			= idpf_probe,
+	.sriov_configure	= idpf_sriov_configure,
+	.remove			= idpf_remove,
+	.shutdown		= idpf_shutdown,
+};
+module_pci_driver(idpf_driver);
diff --git a/drivers/net/ethernet/intel/idpf/idpf_mem.h b/drivers/net/ethernet/intel/idpf/idpf_mem.h
new file mode 100644
index 000000000000..b21a04fccf0f
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/idpf_mem.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2023 Intel Corporation */
+
+#ifndef _IDPF_MEM_H_
+#define _IDPF_MEM_H_
+
+#include <linux/io.h>
+
+struct idpf_dma_mem {
+	void *va;
+	dma_addr_t pa;
+	size_t size;
+};
+
+#define wr32(a, reg, value)	writel((value), ((a)->hw_addr + (reg)))
+#define rd32(a, reg)		readl((a)->hw_addr + (reg))
+#define wr64(a, reg, value)	writeq((value), ((a)->hw_addr + (reg)))
+#define rd64(a, reg)		readq((a)->hw_addr + (reg))
+
+#endif /* _IDPF_MEM_H_ */
diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
new file mode 100644
index 000000000000..81288a17da2a
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
@@ -0,0 +1,1183 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2023 Intel Corporation */
+
+#include "idpf.h"
+
+/**
+ * idpf_tx_singleq_csum - Enable tx checksum offloads
+ * @skb: pointer to skb
+ * @off: pointer to struct that holds offload parameters
+ *
+ * Returns 0 or error (negative) if checksum offload cannot be executed, 1
+ * otherwise.
+ */
+static int idpf_tx_singleq_csum(struct sk_buff *skb,
+				struct idpf_tx_offload_params *off)
+{
+	u32 l4_len, l3_len, l2_len;
+	union {
+		struct iphdr *v4;
+		struct ipv6hdr *v6;
+		unsigned char *hdr;
+	} ip;
+	union {
+		struct tcphdr *tcp;
+		unsigned char *hdr;
+	} l4;
+	u32 offset, cmd = 0;
+	u8 l4_proto = 0;
+	__be16 frag_off;
+	bool is_tso;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return 0;
+
+	ip.hdr = skb_network_header(skb);
+	l4.hdr = skb_transport_header(skb);
+
+	/* compute outer L2 header size */
+	l2_len = ip.hdr - skb->data;
+	offset = FIELD_PREP(0x3F << IDPF_TX_DESC_LEN_MACLEN_S, l2_len / 2);
+	is_tso = !!(off->tx_flags & IDPF_TX_FLAGS_TSO);
+	if (skb->encapsulation) {
+		u32 tunnel = 0;
+
+		/* define outer network header type */
+		if (off->tx_flags & IDPF_TX_FLAGS_IPV4) {
+			/* The stack computes the IP header already, the only
+			 * time we need the hardware to recompute it is in the
+			 * case of TSO.
+			 */
+			tunnel |= is_tso ?
+				  IDPF_TX_CTX_EXT_IP_IPV4 :
+				  IDPF_TX_CTX_EXT_IP_IPV4_NO_CSUM;
+
+			l4_proto = ip.v4->protocol;
+		} else if (off->tx_flags & IDPF_TX_FLAGS_IPV6) {
+			tunnel |= IDPF_TX_CTX_EXT_IP_IPV6;
+
+			l4_proto = ip.v6->nexthdr;
+			if (ipv6_ext_hdr(l4_proto))
+				ipv6_skip_exthdr(skb, skb_network_offset(skb) +
+						 sizeof(*ip.v6),
+						 &l4_proto, &frag_off);
+		}
+
+		/* define outer transport */
+		switch (l4_proto) {
+		case IPPROTO_UDP:
+			tunnel |= IDPF_TXD_CTX_UDP_TUNNELING;
+			break;
+		case IPPROTO_GRE:
+			tunnel |= IDPF_TXD_CTX_GRE_TUNNELING;
+			break;
+		case IPPROTO_IPIP:
+		case IPPROTO_IPV6:
+			l4.hdr = skb_inner_network_header(skb);
+			break;
+		default:
+			if (is_tso)
+				return -1;
+
+			skb_checksum_help(skb);
+
+			return 0;
+		}
+		off->tx_flags |= IDPF_TX_FLAGS_TUNNEL;
+
+		/* compute outer L3 header size */
+		tunnel |= FIELD_PREP(IDPF_TXD_CTX_QW0_TUNN_EXT_IPLEN_M,
+				     (l4.hdr - ip.hdr) / 4);
+
+		/* switch IP header pointer from outer to inner header */
+		ip.hdr = skb_inner_network_header(skb);
+
+		/* compute tunnel header size */
+		tunnel |= FIELD_PREP(IDPF_TXD_CTX_QW0_TUNN_NATLEN_M,
+				     (ip.hdr - l4.hdr) / 2);
+
+		/* indicate if we need to offload outer UDP header */
+		if (is_tso &&
+		    !(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) &&
+		    (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM))
+			tunnel |= IDPF_TXD_CTX_QW0_TUNN_L4T_CS_M;
+
+		/* record tunnel offload values */
+		off->cd_tunneling |= tunnel;
+
+		/* switch L4 header pointer from outer to inner */
+		l4.hdr = skb_inner_transport_header(skb);
+		l4_proto = 0;
+
+		/* reset type as we transition from outer to inner headers */
+		off->tx_flags &= ~(IDPF_TX_FLAGS_IPV4 | IDPF_TX_FLAGS_IPV6);
+		if (ip.v4->version == 4)
+			off->tx_flags |= IDPF_TX_FLAGS_IPV4;
+		if (ip.v6->version == 6)
+			off->tx_flags |= IDPF_TX_FLAGS_IPV6;
+	}
+
+	/* Enable IP checksum offloads */
+	if (off->tx_flags & IDPF_TX_FLAGS_IPV4) {
+		l4_proto = ip.v4->protocol;
+		/* See comment above regarding need for HW to recompute IP
+		 * header checksum in the case of TSO.
+		 */
+		if (is_tso)
+			cmd |= IDPF_TX_DESC_CMD_IIPT_IPV4_CSUM;
+		else
+			cmd |= IDPF_TX_DESC_CMD_IIPT_IPV4;
+
+	} else if (off->tx_flags & IDPF_TX_FLAGS_IPV6) {
+		cmd |= IDPF_TX_DESC_CMD_IIPT_IPV6;
+		l4_proto = ip.v6->nexthdr;
+		if (ipv6_ext_hdr(l4_proto))
+			ipv6_skip_exthdr(skb, skb_network_offset(skb) +
+					 sizeof(*ip.v6), &l4_proto,
+					 &frag_off);
+	} else {
+		return -1;
+	}
+
+	/* compute inner L3 header size */
+	l3_len = l4.hdr - ip.hdr;
+	offset |= (l3_len / 4) << IDPF_TX_DESC_LEN_IPLEN_S;
+
+	/* Enable L4 checksum offloads */
+	switch (l4_proto) {
+	case IPPROTO_TCP:
+		/* enable checksum offloads */
+		cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_TCP;
+		l4_len = l4.tcp->doff;
+		break;
+	case IPPROTO_UDP:
+		/* enable UDP checksum offload */
+		cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_UDP;
+		l4_len = sizeof(struct udphdr) >> 2;
+		break;
+	case IPPROTO_SCTP:
+		/* enable SCTP checksum offload */
+		cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_SCTP;
+		l4_len = sizeof(struct sctphdr) >> 2;
+		break;
+	default:
+		if (is_tso)
+			return -1;
+
+		skb_checksum_help(skb);
+
+		return 0;
+	}
+
+	offset |= l4_len << IDPF_TX_DESC_LEN_L4_LEN_S;
+	off->td_cmd |= cmd;
+	off->hdr_offsets |= offset;
+
+	return 1;
+}
+
+/**
+ * idpf_tx_singleq_map - Build the Tx base descriptor
+ * @tx_q: queue to send buffer on
+ * @first: first buffer info buffer to use
+ * @offloads: pointer to struct that holds offload parameters
+ *
+ * This function loops over the skb data pointed to by *first
+ * and gets a physical address for each memory location and programs
+ * it and the length into the transmit base mode descriptor.
+ */
+static void idpf_tx_singleq_map(struct idpf_queue *tx_q,
+				struct idpf_tx_buf *first,
+				struct idpf_tx_offload_params *offloads)
+{
+	u32 offsets = offloads->hdr_offsets;
+	struct idpf_tx_buf *tx_buf = first;
+	struct idpf_base_tx_desc *tx_desc;
+	struct sk_buff *skb = first->skb;
+	u64 td_cmd = offloads->td_cmd;
+	unsigned int data_len, size;
+	u16 i = tx_q->next_to_use;
+	struct netdev_queue *nq;
+	skb_frag_t *frag;
+	dma_addr_t dma;
+	u64 td_tag = 0;
+
+	data_len = skb->data_len;
+	size = skb_headlen(skb);
+
+	tx_desc = IDPF_BASE_TX_DESC(tx_q, i);
+
+	dma = dma_map_single(tx_q->dev, skb->data, size, DMA_TO_DEVICE);
+
+	/* write each descriptor with CRC bit */
+	if (tx_q->vport->crc_enable)
+		td_cmd |= IDPF_TX_DESC_CMD_ICRC;
+
+	for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
+		unsigned int max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED;
+
+		if (dma_mapping_error(tx_q->dev, dma))
+			return idpf_tx_dma_map_error(tx_q, skb, first, i);
+
+		/* record length, and DMA address */
+		dma_unmap_len_set(tx_buf, len, size);
+		dma_unmap_addr_set(tx_buf, dma, dma);
+
+		/* align size to end of page */
+		max_data += -dma & (IDPF_TX_MAX_READ_REQ_SIZE - 1);
+		tx_desc->buf_addr = cpu_to_le64(dma);
+
+		/* account for data chunks larger than the hardware
+		 * can handle
+		 */
+		while (unlikely(size > IDPF_TX_MAX_DESC_DATA)) {
+			tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd,
+								  offsets,
+								  max_data,
+								  td_tag);
+			tx_desc++;
+			i++;
+
+			if (i == tx_q->desc_count) {
+				tx_desc = IDPF_BASE_TX_DESC(tx_q, 0);
+				i = 0;
+			}
+
+			dma += max_data;
+			size -= max_data;
+
+			max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED;
+			tx_desc->buf_addr = cpu_to_le64(dma);
+		}
+
+		if (!data_len)
+			break;
+
+		tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, offsets,
+							  size, td_tag);
+		tx_desc++;
+		i++;
+
+		if (i == tx_q->desc_count) {
+			tx_desc = IDPF_BASE_TX_DESC(tx_q, 0);
+			i = 0;
+		}
+
+		size = skb_frag_size(frag);
+		data_len -= size;
+
+		dma = skb_frag_dma_map(tx_q->dev, frag, 0, size,
+				       DMA_TO_DEVICE);
+
+		tx_buf = &tx_q->tx_buf[i];
+	}
+
+	skb_tx_timestamp(first->skb);
+
+	/* write last descriptor with RS and EOP bits */
+	td_cmd |= (u64)(IDPF_TX_DESC_CMD_EOP | IDPF_TX_DESC_CMD_RS);
+
+	tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, offsets,
+						  size, td_tag);
+
+	IDPF_SINGLEQ_BUMP_RING_IDX(tx_q, i);
+
+	/* set next_to_watch value indicating a packet is present */
+	first->next_to_watch = tx_desc;
+
+	nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx);
+	netdev_tx_sent_queue(nq, first->bytecount);
+
+	idpf_tx_buf_hw_update(tx_q, i, netdev_xmit_more());
+}
+
+/**
+ * idpf_tx_singleq_get_ctx_desc - grab next desc and update buffer ring
+ * @txq: queue to put context descriptor on
+ *
+ * Since the TX buffer rings mimics the descriptor ring, update the tx buffer
+ * ring entry to reflect that this index is a context descriptor
+ */
+static struct idpf_base_tx_ctx_desc *
+idpf_tx_singleq_get_ctx_desc(struct idpf_queue *txq)
+{
+	struct idpf_base_tx_ctx_desc *ctx_desc;
+	int ntu = txq->next_to_use;
+
+	memset(&txq->tx_buf[ntu], 0, sizeof(struct idpf_tx_buf));
+	txq->tx_buf[ntu].ctx_entry = true;
+
+	ctx_desc = IDPF_BASE_TX_CTX_DESC(txq, ntu);
+
+	IDPF_SINGLEQ_BUMP_RING_IDX(txq, ntu);
+	txq->next_to_use = ntu;
+
+	return ctx_desc;
+}
+
+/**
+ * idpf_tx_singleq_build_ctx_desc - populate context descriptor
+ * @txq: queue to send buffer on
+ * @offload: offload parameter structure
+ **/
+static void idpf_tx_singleq_build_ctx_desc(struct idpf_queue *txq,
+					   struct idpf_tx_offload_params *offload)
+{
+	struct idpf_base_tx_ctx_desc *desc = idpf_tx_singleq_get_ctx_desc(txq);
+	u64 qw1 = (u64)IDPF_TX_DESC_DTYPE_CTX;
+
+	if (offload->tso_segs) {
+		qw1 |= IDPF_TX_CTX_DESC_TSO << IDPF_TXD_CTX_QW1_CMD_S;
+		qw1 |= ((u64)offload->tso_len << IDPF_TXD_CTX_QW1_TSO_LEN_S) &
+			IDPF_TXD_CTX_QW1_TSO_LEN_M;
+		qw1 |= ((u64)offload->mss << IDPF_TXD_CTX_QW1_MSS_S) &
+			IDPF_TXD_CTX_QW1_MSS_M;
+
+		u64_stats_update_begin(&txq->stats_sync);
+		u64_stats_inc(&txq->q_stats.tx.lso_pkts);
+		u64_stats_update_end(&txq->stats_sync);
+	}
+
+	desc->qw0.tunneling_params = cpu_to_le32(offload->cd_tunneling);
+
+	desc->qw0.l2tag2 = 0;
+	desc->qw0.rsvd1 = 0;
+	desc->qw1 = cpu_to_le64(qw1);
+}
+
+/**
+ * idpf_tx_singleq_frame - Sends buffer on Tx ring using base descriptors
+ * @skb: send buffer
+ * @tx_q: queue to send buffer on
+ *
+ * Returns NETDEV_TX_OK if sent, else an error code
+ */
+static netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb,
+					 struct idpf_queue *tx_q)
+{
+	struct idpf_tx_offload_params offload = { };
+	struct idpf_tx_buf *first;
+	unsigned int count;
+	__be16 protocol;
+	int csum, tso;
+
+	count = idpf_tx_desc_count_required(tx_q, skb);
+	if (unlikely(!count))
+		return idpf_tx_drop_skb(tx_q, skb);
+
+	if (idpf_tx_maybe_stop_common(tx_q,
+				      count + IDPF_TX_DESCS_PER_CACHE_LINE +
+				      IDPF_TX_DESCS_FOR_CTX)) {
+		idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false);
+
+		return NETDEV_TX_BUSY;
+	}
+
+	protocol = vlan_get_protocol(skb);
+	if (protocol == htons(ETH_P_IP))
+		offload.tx_flags |= IDPF_TX_FLAGS_IPV4;
+	else if (protocol == htons(ETH_P_IPV6))
+		offload.tx_flags |= IDPF_TX_FLAGS_IPV6;
+
+	tso = idpf_tso(skb, &offload);
+	if (tso < 0)
+		goto out_drop;
+
+	csum = idpf_tx_singleq_csum(skb, &offload);
+	if (csum < 0)
+		goto out_drop;
+
+	if (tso || offload.cd_tunneling)
+		idpf_tx_singleq_build_ctx_desc(tx_q, &offload);
+
+	/* record the location of the first descriptor for this packet */
+	first = &tx_q->tx_buf[tx_q->next_to_use];
+	first->skb = skb;
+
+	if (tso) {
+		first->gso_segs = offload.tso_segs;
+		first->bytecount = skb->len + ((first->gso_segs - 1) * offload.tso_hdr_len);
+	} else {
+		first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN);
+		first->gso_segs = 1;
+	}
+	idpf_tx_singleq_map(tx_q, first, &offload);
+
+	return NETDEV_TX_OK;
+
+out_drop:
+	return idpf_tx_drop_skb(tx_q, skb);
+}
+
+/**
+ * idpf_tx_singleq_start - Selects the right Tx queue to send buffer
+ * @skb: send buffer
+ * @netdev: network interface device structure
+ *
+ * Returns NETDEV_TX_OK if sent, else an error code
+ */
+netdev_tx_t idpf_tx_singleq_start(struct sk_buff *skb,
+				  struct net_device *netdev)
+{
+	struct idpf_vport *vport = idpf_netdev_to_vport(netdev);
+	struct idpf_queue *tx_q;
+
+	tx_q = vport->txqs[skb_get_queue_mapping(skb)];
+
+	/* hardware can't handle really short frames, hardware padding works
+	 * beyond this point
+	 */
+	if (skb_put_padto(skb, IDPF_TX_MIN_PKT_LEN)) {
+		idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false);
+
+		return NETDEV_TX_OK;
+	}
+
+	return idpf_tx_singleq_frame(skb, tx_q);
+}
+
+/**
+ * idpf_tx_singleq_clean - Reclaim resources from queue
+ * @tx_q: Tx queue to clean
+ * @napi_budget: Used to determine if we are in netpoll
+ * @cleaned: returns number of packets cleaned
+ *
+ */
+static bool idpf_tx_singleq_clean(struct idpf_queue *tx_q, int napi_budget,
+				  int *cleaned)
+{
+	unsigned int budget = tx_q->vport->compln_clean_budget;
+	unsigned int total_bytes = 0, total_pkts = 0;
+	struct idpf_base_tx_desc *tx_desc;
+	s16 ntc = tx_q->next_to_clean;
+	struct idpf_netdev_priv *np;
+	struct idpf_tx_buf *tx_buf;
+	struct idpf_vport *vport;
+	struct netdev_queue *nq;
+	bool dont_wake;
+
+	tx_desc = IDPF_BASE_TX_DESC(tx_q, ntc);
+	tx_buf = &tx_q->tx_buf[ntc];
+	ntc -= tx_q->desc_count;
+
+	do {
+		struct idpf_base_tx_desc *eop_desc;
+
+		/* If this entry in the ring was used as a context descriptor,
+		 * it's corresponding entry in the buffer ring will indicate as
+		 * such. We can skip this descriptor since there is no buffer
+		 * to clean.
+		 */
+		if (tx_buf->ctx_entry) {
+			/* Clear this flag here to avoid stale flag values when
+			 * this buffer is used for actual data in the future.
+			 * There are cases where the tx_buf struct / the flags
+			 * field will not be cleared before being reused.
+			 */
+			tx_buf->ctx_entry = false;
+			goto fetch_next_txq_desc;
+		}
+
+		/* if next_to_watch is not set then no work pending */
+		eop_desc = (struct idpf_base_tx_desc *)tx_buf->next_to_watch;
+		if (!eop_desc)
+			break;
+
+		/* prevent any other reads prior to eop_desc */
+		smp_rmb();
+
+		/* if the descriptor isn't done, no work yet to do */
+		if (!(eop_desc->qw1 &
+		      cpu_to_le64(IDPF_TX_DESC_DTYPE_DESC_DONE)))
+			break;
+
+		/* clear next_to_watch to prevent false hangs */
+		tx_buf->next_to_watch = NULL;
+
+		/* update the statistics for this packet */
+		total_bytes += tx_buf->bytecount;
+		total_pkts += tx_buf->gso_segs;
+
+		napi_consume_skb(tx_buf->skb, napi_budget);
+
+		/* unmap skb header data */
+		dma_unmap_single(tx_q->dev,
+				 dma_unmap_addr(tx_buf, dma),
+				 dma_unmap_len(tx_buf, len),
+				 DMA_TO_DEVICE);
+
+		/* clear tx_buf data */
+		tx_buf->skb = NULL;
+		dma_unmap_len_set(tx_buf, len, 0);
+
+		/* unmap remaining buffers */
+		while (tx_desc != eop_desc) {
+			tx_buf++;
+			tx_desc++;
+			ntc++;
+			if (unlikely(!ntc)) {
+				ntc -= tx_q->desc_count;
+				tx_buf = tx_q->tx_buf;
+				tx_desc = IDPF_BASE_TX_DESC(tx_q, 0);
+			}
+
+			/* unmap any remaining paged data */
+			if (dma_unmap_len(tx_buf, len)) {
+				dma_unmap_page(tx_q->dev,
+					       dma_unmap_addr(tx_buf, dma),
+					       dma_unmap_len(tx_buf, len),
+					       DMA_TO_DEVICE);
+				dma_unmap_len_set(tx_buf, len, 0);
+			}
+		}
+
+		/* update budget only if we did something */
+		budget--;
+
+fetch_next_txq_desc:
+		tx_buf++;
+		tx_desc++;
+		ntc++;
+		if (unlikely(!ntc)) {
+			ntc -= tx_q->desc_count;
+			tx_buf = tx_q->tx_buf;
+			tx_desc = IDPF_BASE_TX_DESC(tx_q, 0);
+		}
+	} while (likely(budget));
+
+	ntc += tx_q->desc_count;
+	tx_q->next_to_clean = ntc;
+
+	*cleaned += total_pkts;
+
+	u64_stats_update_begin(&tx_q->stats_sync);
+	u64_stats_add(&tx_q->q_stats.tx.packets, total_pkts);
+	u64_stats_add(&tx_q->q_stats.tx.bytes, total_bytes);
+	u64_stats_update_end(&tx_q->stats_sync);
+
+	vport = tx_q->vport;
+	np = netdev_priv(vport->netdev);
+	nq = netdev_get_tx_queue(vport->netdev, tx_q->idx);
+
+	dont_wake = np->state != __IDPF_VPORT_UP ||
+		    !netif_carrier_ok(vport->netdev);
+	__netif_txq_completed_wake(nq, total_pkts, total_bytes,
+				   IDPF_DESC_UNUSED(tx_q), IDPF_TX_WAKE_THRESH,
+				   dont_wake);
+
+	return !!budget;
+}
+
+/**
+ * idpf_tx_singleq_clean_all - Clean all Tx queues
+ * @q_vec: queue vector
+ * @budget: Used to determine if we are in netpoll
+ * @cleaned: returns number of packets cleaned
+ *
+ * Returns false if clean is not complete else returns true
+ */
+static bool idpf_tx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget,
+				      int *cleaned)
+{
+	u16 num_txq = q_vec->num_txq;
+	bool clean_complete = true;
+	int i, budget_per_q;
+
+	budget_per_q = num_txq ? max(budget / num_txq, 1) : 0;
+	for (i = 0; i < num_txq; i++) {
+		struct idpf_queue *q;
+
+		q = q_vec->tx[i];
+		clean_complete &= idpf_tx_singleq_clean(q, budget_per_q,
+							cleaned);
+	}
+
+	return clean_complete;
+}
+
+/**
+ * idpf_rx_singleq_test_staterr - tests bits in Rx descriptor
+ * status and error fields
+ * @rx_desc: pointer to receive descriptor (in le64 format)
+ * @stat_err_bits: value to mask
+ *
+ * This function does some fast chicanery in order to return the
+ * value of the mask which is really only used for boolean tests.
+ * The status_error_ptype_len doesn't need to be shifted because it begins
+ * at offset zero.
+ */
+static bool idpf_rx_singleq_test_staterr(const union virtchnl2_rx_desc *rx_desc,
+					 const u64 stat_err_bits)
+{
+	return !!(rx_desc->base_wb.qword1.status_error_ptype_len &
+		  cpu_to_le64(stat_err_bits));
+}
+
+/**
+ * idpf_rx_singleq_is_non_eop - process handling of non-EOP buffers
+ * @rxq: Rx ring being processed
+ * @rx_desc: Rx descriptor for current buffer
+ * @skb: Current socket buffer containing buffer in progress
+ * @ntc: next to clean
+ */
+static bool idpf_rx_singleq_is_non_eop(struct idpf_queue *rxq,
+				       union virtchnl2_rx_desc *rx_desc,
+				       struct sk_buff *skb, u16 ntc)
+{
+	/* if we are the last buffer then there is nothing else to do */
+	if (likely(idpf_rx_singleq_test_staterr(rx_desc, IDPF_RXD_EOF_SINGLEQ)))
+		return false;
+
+	return true;
+}
+
+/**
+ * idpf_rx_singleq_csum - Indicate in skb if checksum is good
+ * @rxq: Rx ring being processed
+ * @skb: skb currently being received and modified
+ * @csum_bits: checksum bits from descriptor
+ * @ptype: the packet type decoded by hardware
+ *
+ * skb->protocol must be set before this function is called
+ */
+static void idpf_rx_singleq_csum(struct idpf_queue *rxq, struct sk_buff *skb,
+				 struct idpf_rx_csum_decoded *csum_bits,
+				 u16 ptype)
+{
+	struct idpf_rx_ptype_decoded decoded;
+	bool ipv4, ipv6;
+
+	/* check if Rx checksum is enabled */
+	if (unlikely(!(rxq->vport->netdev->features & NETIF_F_RXCSUM)))
+		return;
+
+	/* check if HW has decoded the packet and checksum */
+	if (unlikely(!(csum_bits->l3l4p)))
+		return;
+
+	decoded = rxq->vport->rx_ptype_lkup[ptype];
+	if (unlikely(!(decoded.known && decoded.outer_ip)))
+		return;
+
+	ipv4 = IDPF_RX_PTYPE_TO_IPV(&decoded, IDPF_RX_PTYPE_OUTER_IPV4);
+	ipv6 = IDPF_RX_PTYPE_TO_IPV(&decoded, IDPF_RX_PTYPE_OUTER_IPV6);
+
+	/* Check if there were any checksum errors */
+	if (unlikely(ipv4 && (csum_bits->ipe || csum_bits->eipe)))
+		goto checksum_fail;
+
+	/* Device could not do any checksum offload for certain extension
+	 * headers as indicated by setting IPV6EXADD bit
+	 */
+	if (unlikely(ipv6 && csum_bits->ipv6exadd))
+		return;
+
+	/* check for L4 errors and handle packets that were not able to be
+	 * checksummed due to arrival speed
+	 */
+	if (unlikely(csum_bits->l4e))
+		goto checksum_fail;
+
+	if (unlikely(csum_bits->nat && csum_bits->eudpe))
+		goto checksum_fail;
+
+	/* Handle packets that were not able to be checksummed due to arrival
+	 * speed, in this case the stack can compute the csum.
+	 */
+	if (unlikely(csum_bits->pprs))
+		return;
+
+	/* If there is an outer header present that might contain a checksum
+	 * we need to bump the checksum level by 1 to reflect the fact that
+	 * we are indicating we validated the inner checksum.
+	 */
+	if (decoded.tunnel_type >= IDPF_RX_PTYPE_TUNNEL_IP_GRENAT)
+		skb->csum_level = 1;
+
+	/* Only report checksum unnecessary for ICMP, TCP, UDP, or SCTP */
+	switch (decoded.inner_prot) {
+	case IDPF_RX_PTYPE_INNER_PROT_ICMP:
+	case IDPF_RX_PTYPE_INNER_PROT_TCP:
+	case IDPF_RX_PTYPE_INNER_PROT_UDP:
+	case IDPF_RX_PTYPE_INNER_PROT_SCTP:
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		return;
+	default:
+		return;
+	}
+
+checksum_fail:
+	u64_stats_update_begin(&rxq->stats_sync);
+	u64_stats_inc(&rxq->q_stats.rx.hw_csum_err);
+	u64_stats_update_end(&rxq->stats_sync);
+}
+
+/**
+ * idpf_rx_singleq_base_csum - Indicate in skb if hw indicated a good cksum
+ * @rx_q: Rx completion queue
+ * @skb: skb currently being received and modified
+ * @rx_desc: the receive descriptor
+ * @ptype: Rx packet type
+ *
+ * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte
+ * descriptor writeback format.
+ **/
+static void idpf_rx_singleq_base_csum(struct idpf_queue *rx_q,
+				      struct sk_buff *skb,
+				      union virtchnl2_rx_desc *rx_desc,
+				      u16 ptype)
+{
+	struct idpf_rx_csum_decoded csum_bits;
+	u32 rx_error, rx_status;
+	u64 qword;
+
+	qword = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len);
+
+	rx_status = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_STATUS_M, qword);
+	rx_error = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M, qword);
+
+	csum_bits.ipe = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_IPE_M, rx_error);
+	csum_bits.eipe = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_EIPE_M,
+				   rx_error);
+	csum_bits.l4e = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_L4E_M, rx_error);
+	csum_bits.pprs = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_PPRS_M,
+				   rx_error);
+	csum_bits.l3l4p = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_STATUS_L3L4P_M,
+				    rx_status);
+	csum_bits.ipv6exadd = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_STATUS_IPV6EXADD_M,
+					rx_status);
+	csum_bits.nat = 0;
+	csum_bits.eudpe = 0;
+
+	idpf_rx_singleq_csum(rx_q, skb, &csum_bits, ptype);
+}
+
+/**
+ * idpf_rx_singleq_flex_csum - Indicate in skb if hw indicated a good cksum
+ * @rx_q: Rx completion queue
+ * @skb: skb currently being received and modified
+ * @rx_desc: the receive descriptor
+ * @ptype: Rx packet type
+ *
+ * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible
+ * descriptor writeback format.
+ **/
+static void idpf_rx_singleq_flex_csum(struct idpf_queue *rx_q,
+				      struct sk_buff *skb,
+				      union virtchnl2_rx_desc *rx_desc,
+				      u16 ptype)
+{
+	struct idpf_rx_csum_decoded csum_bits;
+	u16 rx_status0, rx_status1;
+
+	rx_status0 = le16_to_cpu(rx_desc->flex_nic_wb.status_error0);
+	rx_status1 = le16_to_cpu(rx_desc->flex_nic_wb.status_error1);
+
+	csum_bits.ipe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_IPE_M,
+				  rx_status0);
+	csum_bits.eipe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EIPE_M,
+				   rx_status0);
+	csum_bits.l4e = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_L4E_M,
+				  rx_status0);
+	csum_bits.eudpe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_M,
+				    rx_status0);
+	csum_bits.l3l4p = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_L3L4P_M,
+				    rx_status0);
+	csum_bits.ipv6exadd = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_IPV6EXADD_M,
+					rx_status0);
+	csum_bits.nat = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS1_NAT_M,
+				  rx_status1);
+	csum_bits.pprs = 0;
+
+	idpf_rx_singleq_csum(rx_q, skb, &csum_bits, ptype);
+}
+
+/**
+ * idpf_rx_singleq_base_hash - set the hash value in the skb
+ * @rx_q: Rx completion queue
+ * @skb: skb currently being received and modified
+ * @rx_desc: specific descriptor
+ * @decoded: Decoded Rx packet type related fields
+ *
+ * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte
+ * descriptor writeback format.
+ **/
+static void idpf_rx_singleq_base_hash(struct idpf_queue *rx_q,
+				      struct sk_buff *skb,
+				      union virtchnl2_rx_desc *rx_desc,
+				      struct idpf_rx_ptype_decoded *decoded)
+{
+	u64 mask, qw1;
+
+	if (unlikely(!(rx_q->vport->netdev->features & NETIF_F_RXHASH)))
+		return;
+
+	mask = VIRTCHNL2_RX_BASE_DESC_FLTSTAT_RSS_HASH_M;
+	qw1 = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len);
+
+	if (FIELD_GET(mask, qw1) == mask) {
+		u32 hash = le32_to_cpu(rx_desc->base_wb.qword0.hi_dword.rss);
+
+		skb_set_hash(skb, hash, idpf_ptype_to_htype(decoded));
+	}
+}
+
+/**
+ * idpf_rx_singleq_flex_hash - set the hash value in the skb
+ * @rx_q: Rx completion queue
+ * @skb: skb currently being received and modified
+ * @rx_desc: specific descriptor
+ * @decoded: Decoded Rx packet type related fields
+ *
+ * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible
+ * descriptor writeback format.
+ **/
+static void idpf_rx_singleq_flex_hash(struct idpf_queue *rx_q,
+				      struct sk_buff *skb,
+				      union virtchnl2_rx_desc *rx_desc,
+				      struct idpf_rx_ptype_decoded *decoded)
+{
+	if (unlikely(!(rx_q->vport->netdev->features & NETIF_F_RXHASH)))
+		return;
+
+	if (FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_RSS_VALID_M,
+		      le16_to_cpu(rx_desc->flex_nic_wb.status_error0)))
+		skb_set_hash(skb, le32_to_cpu(rx_desc->flex_nic_wb.rss_hash),
+			     idpf_ptype_to_htype(decoded));
+}
+
+/**
+ * idpf_rx_singleq_process_skb_fields - Populate skb header fields from Rx
+ * descriptor
+ * @rx_q: Rx ring being processed
+ * @skb: pointer to current skb being populated
+ * @rx_desc: descriptor for skb
+ * @ptype: packet type
+ *
+ * This function checks the ring, descriptor, and packet information in
+ * order to populate the hash, checksum, VLAN, protocol, and
+ * other fields within the skb.
+ */
+static void idpf_rx_singleq_process_skb_fields(struct idpf_queue *rx_q,
+					       struct sk_buff *skb,
+					       union virtchnl2_rx_desc *rx_desc,
+					       u16 ptype)
+{
+	struct idpf_rx_ptype_decoded decoded =
+					rx_q->vport->rx_ptype_lkup[ptype];
+
+	/* modifies the skb - consumes the enet header */
+	skb->protocol = eth_type_trans(skb, rx_q->vport->netdev);
+
+	/* Check if we're using base mode descriptor IDs */
+	if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M) {
+		idpf_rx_singleq_base_hash(rx_q, skb, rx_desc, &decoded);
+		idpf_rx_singleq_base_csum(rx_q, skb, rx_desc, ptype);
+	} else {
+		idpf_rx_singleq_flex_hash(rx_q, skb, rx_desc, &decoded);
+		idpf_rx_singleq_flex_csum(rx_q, skb, rx_desc, ptype);
+	}
+}
+
+/**
+ * idpf_rx_singleq_buf_hw_alloc_all - Replace used receive buffers
+ * @rx_q: queue for which the hw buffers are allocated
+ * @cleaned_count: number of buffers to replace
+ *
+ * Returns false if all allocations were successful, true if any fail
+ */
+bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rx_q,
+				      u16 cleaned_count)
+{
+	struct virtchnl2_singleq_rx_buf_desc *desc;
+	u16 nta = rx_q->next_to_alloc;
+	struct idpf_rx_buf *buf;
+
+	if (!cleaned_count)
+		return false;
+
+	desc = IDPF_SINGLEQ_RX_BUF_DESC(rx_q, nta);
+	buf = &rx_q->rx_buf.buf[nta];
+
+	do {
+		dma_addr_t addr;
+
+		addr = idpf_alloc_page(rx_q->pp, buf, rx_q->rx_buf_size);
+		if (unlikely(addr == DMA_MAPPING_ERROR))
+			break;
+
+		/* Refresh the desc even if buffer_addrs didn't change
+		 * because each write-back erases this info.
+		 */
+		desc->pkt_addr = cpu_to_le64(addr);
+		desc->hdr_addr = 0;
+		desc++;
+
+		buf++;
+		nta++;
+		if (unlikely(nta == rx_q->desc_count)) {
+			desc = IDPF_SINGLEQ_RX_BUF_DESC(rx_q, 0);
+			buf = rx_q->rx_buf.buf;
+			nta = 0;
+		}
+
+		cleaned_count--;
+	} while (cleaned_count);
+
+	if (rx_q->next_to_alloc != nta) {
+		idpf_rx_buf_hw_update(rx_q, nta);
+		rx_q->next_to_alloc = nta;
+	}
+
+	return !!cleaned_count;
+}
+
+/**
+ * idpf_rx_singleq_extract_base_fields - Extract fields from the Rx descriptor
+ * @rx_q: Rx descriptor queue
+ * @rx_desc: the descriptor to process
+ * @fields: storage for extracted values
+ *
+ * Decode the Rx descriptor and extract relevant information including the
+ * size and Rx packet type.
+ *
+ * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte
+ * descriptor writeback format.
+ */
+static void idpf_rx_singleq_extract_base_fields(struct idpf_queue *rx_q,
+						union virtchnl2_rx_desc *rx_desc,
+						struct idpf_rx_extracted *fields)
+{
+	u64 qword;
+
+	qword = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len);
+
+	fields->size = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_LEN_PBUF_M, qword);
+	fields->rx_ptype = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_PTYPE_M, qword);
+}
+
+/**
+ * idpf_rx_singleq_extract_flex_fields - Extract fields from the Rx descriptor
+ * @rx_q: Rx descriptor queue
+ * @rx_desc: the descriptor to process
+ * @fields: storage for extracted values
+ *
+ * Decode the Rx descriptor and extract relevant information including the
+ * size and Rx packet type.
+ *
+ * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible
+ * descriptor writeback format.
+ */
+static void idpf_rx_singleq_extract_flex_fields(struct idpf_queue *rx_q,
+						union virtchnl2_rx_desc *rx_desc,
+						struct idpf_rx_extracted *fields)
+{
+	fields->size = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_M,
+				 le16_to_cpu(rx_desc->flex_nic_wb.pkt_len));
+	fields->rx_ptype = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PTYPE_M,
+				     le16_to_cpu(rx_desc->flex_nic_wb.ptype_flex_flags0));
+}
+
+/**
+ * idpf_rx_singleq_extract_fields - Extract fields from the Rx descriptor
+ * @rx_q: Rx descriptor queue
+ * @rx_desc: the descriptor to process
+ * @fields: storage for extracted values
+ *
+ */
+static void idpf_rx_singleq_extract_fields(struct idpf_queue *rx_q,
+					   union virtchnl2_rx_desc *rx_desc,
+					   struct idpf_rx_extracted *fields)
+{
+	if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M)
+		idpf_rx_singleq_extract_base_fields(rx_q, rx_desc, fields);
+	else
+		idpf_rx_singleq_extract_flex_fields(rx_q, rx_desc, fields);
+}
+
+/**
+ * idpf_rx_singleq_clean - Reclaim resources after receive completes
+ * @rx_q: rx queue to clean
+ * @budget: Total limit on number of packets to process
+ *
+ * Returns true if there's any budget left (e.g. the clean is finished)
+ */
+static int idpf_rx_singleq_clean(struct idpf_queue *rx_q, int budget)
+{
+	unsigned int total_rx_bytes = 0, total_rx_pkts = 0;
+	struct sk_buff *skb = rx_q->skb;
+	u16 ntc = rx_q->next_to_clean;
+	u16 cleaned_count = 0;
+	bool failure = false;
+
+	/* Process Rx packets bounded by budget */
+	while (likely(total_rx_pkts < (unsigned int)budget)) {
+		struct idpf_rx_extracted fields = { };
+		union virtchnl2_rx_desc *rx_desc;
+		struct idpf_rx_buf *rx_buf;
+
+		/* get the Rx desc from Rx queue based on 'next_to_clean' */
+		rx_desc = IDPF_RX_DESC(rx_q, ntc);
+
+		/* status_error_ptype_len will always be zero for unused
+		 * descriptors because it's cleared in cleanup, and overlaps
+		 * with hdr_addr which is always zero because packet split
+		 * isn't used, if the hardware wrote DD then the length will be
+		 * non-zero
+		 */
+#define IDPF_RXD_DD VIRTCHNL2_RX_BASE_DESC_STATUS_DD_M
+		if (!idpf_rx_singleq_test_staterr(rx_desc,
+						  IDPF_RXD_DD))
+			break;
+
+		/* This memory barrier is needed to keep us from reading
+		 * any other fields out of the rx_desc
+		 */
+		dma_rmb();
+
+		idpf_rx_singleq_extract_fields(rx_q, rx_desc, &fields);
+
+		rx_buf = &rx_q->rx_buf.buf[ntc];
+		if (!fields.size) {
+			idpf_rx_put_page(rx_buf);
+			goto skip_data;
+		}
+
+		idpf_rx_sync_for_cpu(rx_buf, fields.size);
+		skb = rx_q->skb;
+		if (skb)
+			idpf_rx_add_frag(rx_buf, skb, fields.size);
+		else
+			skb = idpf_rx_construct_skb(rx_q, rx_buf, fields.size);
+
+		/* exit if we failed to retrieve a buffer */
+		if (!skb)
+			break;
+
+skip_data:
+		IDPF_SINGLEQ_BUMP_RING_IDX(rx_q, ntc);
+
+		cleaned_count++;
+
+		/* skip if it is non EOP desc */
+		if (idpf_rx_singleq_is_non_eop(rx_q, rx_desc, skb, ntc))
+			continue;
+
+#define IDPF_RXD_ERR_S FIELD_PREP(VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M, \
+				  VIRTCHNL2_RX_BASE_DESC_ERROR_RXE_M)
+		if (unlikely(idpf_rx_singleq_test_staterr(rx_desc,
+							  IDPF_RXD_ERR_S))) {
+			dev_kfree_skb_any(skb);
+			skb = NULL;
+			continue;
+		}
+
+		/* pad skb if needed (to make valid ethernet frame) */
+		if (eth_skb_pad(skb)) {
+			skb = NULL;
+			continue;
+		}
+
+		/* probably a little skewed due to removing CRC */
+		total_rx_bytes += skb->len;
+
+		/* protocol */
+		idpf_rx_singleq_process_skb_fields(rx_q, skb,
+						   rx_desc, fields.rx_ptype);
+
+		/* send completed skb up the stack */
+		napi_gro_receive(&rx_q->q_vector->napi, skb);
+		skb = NULL;
+
+		/* update budget accounting */
+		total_rx_pkts++;
+	}
+
+	rx_q->skb = skb;
+
+	rx_q->next_to_clean = ntc;
+
+	if (cleaned_count)
+		failure = idpf_rx_singleq_buf_hw_alloc_all(rx_q, cleaned_count);
+
+	u64_stats_update_begin(&rx_q->stats_sync);
+	u64_stats_add(&rx_q->q_stats.rx.packets, total_rx_pkts);
+	u64_stats_add(&rx_q->q_stats.rx.bytes, total_rx_bytes);
+	u64_stats_update_end(&rx_q->stats_sync);
+
+	/* guarantee a trip back through this routine if there was a failure */
+	return failure ? budget : (int)total_rx_pkts;
+}
+
+/**
+ * idpf_rx_singleq_clean_all - Clean all Rx queues
+ * @q_vec: queue vector
+ * @budget: Used to determine if we are in netpoll
+ * @cleaned: returns number of packets cleaned
+ *
+ * Returns false if clean is not complete else returns true
+ */
+static bool idpf_rx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget,
+				      int *cleaned)
+{
+	u16 num_rxq = q_vec->num_rxq;
+	bool clean_complete = true;
+	int budget_per_q, i;
+
+	/* We attempt to distribute budget to each Rx queue fairly, but don't
+	 * allow the budget to go below 1 because that would exit polling early.
+	 */
+	budget_per_q = num_rxq ? max(budget / num_rxq, 1) : 0;
+	for (i = 0; i < num_rxq; i++) {
+		struct idpf_queue *rxq = q_vec->rx[i];
+		int pkts_cleaned_per_q;
+
+		pkts_cleaned_per_q = idpf_rx_singleq_clean(rxq, budget_per_q);
+
+		/* if we clean as many as budgeted, we must not be done */
+		if (pkts_cleaned_per_q >= budget_per_q)
+			clean_complete = false;
+		*cleaned += pkts_cleaned_per_q;
+	}
+
+	return clean_complete;
+}
+
+/**
+ * idpf_vport_singleq_napi_poll - NAPI handler
+ * @napi: struct from which you get q_vector
+ * @budget: budget provided by stack
+ */
+int idpf_vport_singleq_napi_poll(struct napi_struct *napi, int budget)
+{
+	struct idpf_q_vector *q_vector =
+				container_of(napi, struct idpf_q_vector, napi);
+	bool clean_complete;
+	int work_done = 0;
+
+	/* Handle case where we are called by netpoll with a budget of 0 */
+	if (budget <= 0) {
+		idpf_tx_singleq_clean_all(q_vector, budget, &work_done);
+
+		return budget;
+	}
+
+	clean_complete = idpf_rx_singleq_clean_all(q_vector, budget,
+						   &work_done);
+	clean_complete &= idpf_tx_singleq_clean_all(q_vector, budget,
+						    &work_done);
+
+	/* If work not completed, return budget and polling will return */
+	if (!clean_complete)
+		return budget;
+
+	work_done = min_t(int, work_done, budget - 1);
+
+	/* Exit the polling mode, but don't re-enable interrupts if stack might
+	 * poll us due to busy-polling
+	 */
+	if (likely(napi_complete_done(napi, work_done)))
+		idpf_vport_intr_update_itr_ena_irq(q_vector);
+
+	return work_done;
+}
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
new file mode 100644
index 000000000000..6fa79898c42c
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
@@ -0,0 +1,4289 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2023 Intel Corporation */
+
+#include "idpf.h"
+
+/**
+ * idpf_buf_lifo_push - push a buffer pointer onto stack
+ * @stack: pointer to stack struct
+ * @buf: pointer to buf to push
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int idpf_buf_lifo_push(struct idpf_buf_lifo *stack,
+			      struct idpf_tx_stash *buf)
+{
+	if (unlikely(stack->top == stack->size))
+		return -ENOSPC;
+
+	stack->bufs[stack->top++] = buf;
+
+	return 0;
+}
+
+/**
+ * idpf_buf_lifo_pop - pop a buffer pointer from stack
+ * @stack: pointer to stack struct
+ **/
+static struct idpf_tx_stash *idpf_buf_lifo_pop(struct idpf_buf_lifo *stack)
+{
+	if (unlikely(!stack->top))
+		return NULL;
+
+	return stack->bufs[--stack->top];
+}
+
+/**
+ * idpf_tx_timeout - Respond to a Tx Hang
+ * @netdev: network interface device structure
+ * @txqueue: TX queue
+ */
+void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue)
+{
+	struct idpf_adapter *adapter = idpf_netdev_to_adapter(netdev);
+
+	adapter->tx_timeout_count++;
+
+	netdev_err(netdev, "Detected Tx timeout: Count %d, Queue %d\n",
+		   adapter->tx_timeout_count, txqueue);
+	if (!idpf_is_reset_in_prog(adapter)) {
+		set_bit(IDPF_HR_FUNC_RESET, adapter->flags);
+		queue_delayed_work(adapter->vc_event_wq,
+				   &adapter->vc_event_task,
+				   msecs_to_jiffies(10));
+	}
+}
+
+/**
+ * idpf_tx_buf_rel - Release a Tx buffer
+ * @tx_q: the queue that owns the buffer
+ * @tx_buf: the buffer to free
+ */
+static void idpf_tx_buf_rel(struct idpf_queue *tx_q, struct idpf_tx_buf *tx_buf)
+{
+	if (tx_buf->skb) {
+		if (dma_unmap_len(tx_buf, len))
+			dma_unmap_single(tx_q->dev,
+					 dma_unmap_addr(tx_buf, dma),
+					 dma_unmap_len(tx_buf, len),
+					 DMA_TO_DEVICE);
+		dev_kfree_skb_any(tx_buf->skb);
+	} else if (dma_unmap_len(tx_buf, len)) {
+		dma_unmap_page(tx_q->dev,
+			       dma_unmap_addr(tx_buf, dma),
+			       dma_unmap_len(tx_buf, len),
+			       DMA_TO_DEVICE);
+	}
+
+	tx_buf->next_to_watch = NULL;
+	tx_buf->skb = NULL;
+	tx_buf->compl_tag = IDPF_SPLITQ_TX_INVAL_COMPL_TAG;
+	dma_unmap_len_set(tx_buf, len, 0);
+}
+
+/**
+ * idpf_tx_buf_rel_all - Free any empty Tx buffers
+ * @txq: queue to be cleaned
+ */
+static void idpf_tx_buf_rel_all(struct idpf_queue *txq)
+{
+	u16 i;
+
+	/* Buffers already cleared, nothing to do */
+	if (!txq->tx_buf)
+		return;
+
+	/* Free all the Tx buffer sk_buffs */
+	for (i = 0; i < txq->desc_count; i++)
+		idpf_tx_buf_rel(txq, &txq->tx_buf[i]);
+
+	kfree(txq->tx_buf);
+	txq->tx_buf = NULL;
+
+	if (!txq->buf_stack.bufs)
+		return;
+
+	for (i = 0; i < txq->buf_stack.size; i++)
+		kfree(txq->buf_stack.bufs[i]);
+
+	kfree(txq->buf_stack.bufs);
+	txq->buf_stack.bufs = NULL;
+}
+
+/**
+ * idpf_tx_desc_rel - Free Tx resources per queue
+ * @txq: Tx descriptor ring for a specific queue
+ * @bufq: buffer q or completion q
+ *
+ * Free all transmit software resources
+ */
+static void idpf_tx_desc_rel(struct idpf_queue *txq, bool bufq)
+{
+	if (bufq)
+		idpf_tx_buf_rel_all(txq);
+
+	if (!txq->desc_ring)
+		return;
+
+	dmam_free_coherent(txq->dev, txq->size, txq->desc_ring, txq->dma);
+	txq->desc_ring = NULL;
+	txq->next_to_alloc = 0;
+	txq->next_to_use = 0;
+	txq->next_to_clean = 0;
+}
+
+/**
+ * idpf_tx_desc_rel_all - Free Tx Resources for All Queues
+ * @vport: virtual port structure
+ *
+ * Free all transmit software resources
+ */
+static void idpf_tx_desc_rel_all(struct idpf_vport *vport)
+{
+	int i, j;
+
+	if (!vport->txq_grps)
+		return;
+
+	for (i = 0; i < vport->num_txq_grp; i++) {
+		struct idpf_txq_group *txq_grp = &vport->txq_grps[i];
+
+		for (j = 0; j < txq_grp->num_txq; j++)
+			idpf_tx_desc_rel(txq_grp->txqs[j], true);
+
+		if (idpf_is_queue_model_split(vport->txq_model))
+			idpf_tx_desc_rel(txq_grp->complq, false);
+	}
+}
+
+/**
+ * idpf_tx_buf_alloc_all - Allocate memory for all buffer resources
+ * @tx_q: queue for which the buffers are allocated
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int idpf_tx_buf_alloc_all(struct idpf_queue *tx_q)
+{
+	int buf_size;
+	int i;
+
+	/* Allocate book keeping buffers only. Buffers to be supplied to HW
+	 * are allocated by kernel network stack and received as part of skb
+	 */
+	buf_size = sizeof(struct idpf_tx_buf) * tx_q->desc_count;
+	tx_q->tx_buf = kzalloc(buf_size, GFP_KERNEL);
+	if (!tx_q->tx_buf)
+		return -ENOMEM;
+
+	/* Initialize tx_bufs with invalid completion tags */
+	for (i = 0; i < tx_q->desc_count; i++)
+		tx_q->tx_buf[i].compl_tag = IDPF_SPLITQ_TX_INVAL_COMPL_TAG;
+
+	/* Initialize tx buf stack for out-of-order completions if
+	 * flow scheduling offload is enabled
+	 */
+	tx_q->buf_stack.bufs =
+		kcalloc(tx_q->desc_count, sizeof(struct idpf_tx_stash *),
+			GFP_KERNEL);
+	if (!tx_q->buf_stack.bufs)
+		return -ENOMEM;
+
+	tx_q->buf_stack.size = tx_q->desc_count;
+	tx_q->buf_stack.top = tx_q->desc_count;
+
+	for (i = 0; i < tx_q->desc_count; i++) {
+		tx_q->buf_stack.bufs[i] = kzalloc(sizeof(*tx_q->buf_stack.bufs[i]),
+						  GFP_KERNEL);
+		if (!tx_q->buf_stack.bufs[i])
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/**
+ * idpf_tx_desc_alloc - Allocate the Tx descriptors
+ * @tx_q: the tx ring to set up
+ * @bufq: buffer or completion queue
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int idpf_tx_desc_alloc(struct idpf_queue *tx_q, bool bufq)
+{
+	struct device *dev = tx_q->dev;
+	u32 desc_sz;
+	int err;
+
+	if (bufq) {
+		err = idpf_tx_buf_alloc_all(tx_q);
+		if (err)
+			goto err_alloc;
+
+		desc_sz = sizeof(struct idpf_base_tx_desc);
+	} else {
+		desc_sz = sizeof(struct idpf_splitq_tx_compl_desc);
+	}
+
+	tx_q->size = tx_q->desc_count * desc_sz;
+
+	/* Allocate descriptors also round up to nearest 4K */
+	tx_q->size = ALIGN(tx_q->size, 4096);
+	tx_q->desc_ring = dmam_alloc_coherent(dev, tx_q->size, &tx_q->dma,
+					      GFP_KERNEL);
+	if (!tx_q->desc_ring) {
+		dev_err(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n",
+			tx_q->size);
+		err = -ENOMEM;
+		goto err_alloc;
+	}
+
+	tx_q->next_to_alloc = 0;
+	tx_q->next_to_use = 0;
+	tx_q->next_to_clean = 0;
+	set_bit(__IDPF_Q_GEN_CHK, tx_q->flags);
+
+	return 0;
+
+err_alloc:
+	idpf_tx_desc_rel(tx_q, bufq);
+
+	return err;
+}
+
+/**
+ * idpf_tx_desc_alloc_all - allocate all queues Tx resources
+ * @vport: virtual port private structure
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int idpf_tx_desc_alloc_all(struct idpf_vport *vport)
+{
+	struct device *dev = &vport->adapter->pdev->dev;
+	int err = 0;
+	int i, j;
+
+	/* Setup buffer queues. In single queue model buffer queues and
+	 * completion queues will be same
+	 */
+	for (i = 0; i < vport->num_txq_grp; i++) {
+		for (j = 0; j < vport->txq_grps[i].num_txq; j++) {
+			struct idpf_queue *txq = vport->txq_grps[i].txqs[j];
+			u8 gen_bits = 0;
+			u16 bufidx_mask;
+
+			err = idpf_tx_desc_alloc(txq, true);
+			if (err) {
+				dev_err(dev, "Allocation for Tx Queue %u failed\n",
+					i);
+				goto err_out;
+			}
+
+			if (!idpf_is_queue_model_split(vport->txq_model))
+				continue;
+
+			txq->compl_tag_cur_gen = 0;
+
+			/* Determine the number of bits in the bufid
+			 * mask and add one to get the start of the
+			 * generation bits
+			 */
+			bufidx_mask = txq->desc_count - 1;
+			while (bufidx_mask >> 1) {
+				txq->compl_tag_gen_s++;
+				bufidx_mask = bufidx_mask >> 1;
+			}
+			txq->compl_tag_gen_s++;
+
+			gen_bits = IDPF_TX_SPLITQ_COMPL_TAG_WIDTH -
+							txq->compl_tag_gen_s;
+			txq->compl_tag_gen_max = GETMAXVAL(gen_bits);
+
+			/* Set bufid mask based on location of first
+			 * gen bit; it cannot simply be the descriptor
+			 * ring size-1 since we can have size values
+			 * where not all of those bits are set.
+			 */
+			txq->compl_tag_bufid_m =
+				GETMAXVAL(txq->compl_tag_gen_s);
+		}
+
+		if (!idpf_is_queue_model_split(vport->txq_model))
+			continue;
+
+		/* Setup completion queues */
+		err = idpf_tx_desc_alloc(vport->txq_grps[i].complq, false);
+		if (err) {
+			dev_err(dev, "Allocation for Tx Completion Queue %u failed\n",
+				i);
+			goto err_out;
+		}
+	}
+
+err_out:
+	if (err)
+		idpf_tx_desc_rel_all(vport);
+
+	return err;
+}
+
+/**
+ * idpf_rx_page_rel - Release an rx buffer page
+ * @rxq: the queue that owns the buffer
+ * @rx_buf: the buffer to free
+ */
+static void idpf_rx_page_rel(struct idpf_queue *rxq, struct idpf_rx_buf *rx_buf)
+{
+	if (unlikely(!rx_buf->page))
+		return;
+
+	page_pool_put_full_page(rxq->pp, rx_buf->page, false);
+
+	rx_buf->page = NULL;
+	rx_buf->page_offset = 0;
+}
+
+/**
+ * idpf_rx_hdr_buf_rel_all - Release header buffer memory
+ * @rxq: queue to use
+ */
+static void idpf_rx_hdr_buf_rel_all(struct idpf_queue *rxq)
+{
+	struct idpf_adapter *adapter = rxq->vport->adapter;
+
+	dma_free_coherent(&adapter->pdev->dev,
+			  rxq->desc_count * IDPF_HDR_BUF_SIZE,
+			  rxq->rx_buf.hdr_buf_va,
+			  rxq->rx_buf.hdr_buf_pa);
+	rxq->rx_buf.hdr_buf_va = NULL;
+}
+
+/**
+ * idpf_rx_buf_rel_all - Free all Rx buffer resources for a queue
+ * @rxq: queue to be cleaned
+ */
+static void idpf_rx_buf_rel_all(struct idpf_queue *rxq)
+{
+	u16 i;
+
+	/* queue already cleared, nothing to do */
+	if (!rxq->rx_buf.buf)
+		return;
+
+	/* Free all the bufs allocated and given to hw on Rx queue */
+	for (i = 0; i < rxq->desc_count; i++)
+		idpf_rx_page_rel(rxq, &rxq->rx_buf.buf[i]);
+
+	if (rxq->rx_hsplit_en)
+		idpf_rx_hdr_buf_rel_all(rxq);
+
+	page_pool_destroy(rxq->pp);
+	rxq->pp = NULL;
+
+	kfree(rxq->rx_buf.buf);
+	rxq->rx_buf.buf = NULL;
+}
+
+/**
+ * idpf_rx_desc_rel - Free a specific Rx q resources
+ * @rxq: queue to clean the resources from
+ * @bufq: buffer q or completion q
+ * @q_model: single or split q model
+ *
+ * Free a specific rx queue resources
+ */
+static void idpf_rx_desc_rel(struct idpf_queue *rxq, bool bufq, s32 q_model)
+{
+	if (!rxq)
+		return;
+
+	if (!bufq && idpf_is_queue_model_split(q_model) && rxq->skb) {
+		dev_kfree_skb_any(rxq->skb);
+		rxq->skb = NULL;
+	}
+
+	if (bufq || !idpf_is_queue_model_split(q_model))
+		idpf_rx_buf_rel_all(rxq);
+
+	rxq->next_to_alloc = 0;
+	rxq->next_to_clean = 0;
+	rxq->next_to_use = 0;
+	if (!rxq->desc_ring)
+		return;
+
+	dmam_free_coherent(rxq->dev, rxq->size, rxq->desc_ring, rxq->dma);
+	rxq->desc_ring = NULL;
+}
+
+/**
+ * idpf_rx_desc_rel_all - Free Rx Resources for All Queues
+ * @vport: virtual port structure
+ *
+ * Free all rx queues resources
+ */
+static void idpf_rx_desc_rel_all(struct idpf_vport *vport)
+{
+	struct idpf_rxq_group *rx_qgrp;
+	u16 num_rxq;
+	int i, j;
+
+	if (!vport->rxq_grps)
+		return;
+
+	for (i = 0; i < vport->num_rxq_grp; i++) {
+		rx_qgrp = &vport->rxq_grps[i];
+
+		if (!idpf_is_queue_model_split(vport->rxq_model)) {
+			for (j = 0; j < rx_qgrp->singleq.num_rxq; j++)
+				idpf_rx_desc_rel(rx_qgrp->singleq.rxqs[j],
+						 false, vport->rxq_model);
+			continue;
+		}
+
+		num_rxq = rx_qgrp->splitq.num_rxq_sets;
+		for (j = 0; j < num_rxq; j++)
+			idpf_rx_desc_rel(&rx_qgrp->splitq.rxq_sets[j]->rxq,
+					 false, vport->rxq_model);
+
+		if (!rx_qgrp->splitq.bufq_sets)
+			continue;
+
+		for (j = 0; j < vport->num_bufqs_per_qgrp; j++) {
+			struct idpf_bufq_set *bufq_set =
+				&rx_qgrp->splitq.bufq_sets[j];
+
+			idpf_rx_desc_rel(&bufq_set->bufq, true,
+					 vport->rxq_model);
+		}
+	}
+}
+
+/**
+ * idpf_rx_buf_hw_update - Store the new tail and head values
+ * @rxq: queue to bump
+ * @val: new head index
+ */
+void idpf_rx_buf_hw_update(struct idpf_queue *rxq, u32 val)
+{
+	rxq->next_to_use = val;
+
+	if (unlikely(!rxq->tail))
+		return;
+
+	/* writel has an implicit memory barrier */
+	writel(val, rxq->tail);
+}
+
+/**
+ * idpf_rx_hdr_buf_alloc_all - Allocate memory for header buffers
+ * @rxq: ring to use
+ *
+ * Returns 0 on success, negative on failure.
+ */
+static int idpf_rx_hdr_buf_alloc_all(struct idpf_queue *rxq)
+{
+	struct idpf_adapter *adapter = rxq->vport->adapter;
+
+	rxq->rx_buf.hdr_buf_va =
+		dma_alloc_coherent(&adapter->pdev->dev,
+				   IDPF_HDR_BUF_SIZE * rxq->desc_count,
+				   &rxq->rx_buf.hdr_buf_pa,
+				   GFP_KERNEL);
+	if (!rxq->rx_buf.hdr_buf_va)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/**
+ * idpf_rx_post_buf_refill - Post buffer id to refill queue
+ * @refillq: refill queue to post to
+ * @buf_id: buffer id to post
+ */
+static void idpf_rx_post_buf_refill(struct idpf_sw_queue *refillq, u16 buf_id)
+{
+	u16 nta = refillq->next_to_alloc;
+
+	/* store the buffer ID and the SW maintained GEN bit to the refillq */
+	refillq->ring[nta] =
+		((buf_id << IDPF_RX_BI_BUFID_S) & IDPF_RX_BI_BUFID_M) |
+		(!!(test_bit(__IDPF_Q_GEN_CHK, refillq->flags)) <<
+		 IDPF_RX_BI_GEN_S);
+
+	if (unlikely(++nta == refillq->desc_count)) {
+		nta = 0;
+		change_bit(__IDPF_Q_GEN_CHK, refillq->flags);
+	}
+	refillq->next_to_alloc = nta;
+}
+
+/**
+ * idpf_rx_post_buf_desc - Post buffer to bufq descriptor ring
+ * @bufq: buffer queue to post to
+ * @buf_id: buffer id to post
+ *
+ * Returns false if buffer could not be allocated, true otherwise.
+ */
+static bool idpf_rx_post_buf_desc(struct idpf_queue *bufq, u16 buf_id)
+{
+	struct virtchnl2_splitq_rx_buf_desc *splitq_rx_desc = NULL;
+	u16 nta = bufq->next_to_alloc;
+	struct idpf_rx_buf *buf;
+	dma_addr_t addr;
+
+	splitq_rx_desc = IDPF_SPLITQ_RX_BUF_DESC(bufq, nta);
+	buf = &bufq->rx_buf.buf[buf_id];
+
+	if (bufq->rx_hsplit_en) {
+		splitq_rx_desc->hdr_addr =
+			cpu_to_le64(bufq->rx_buf.hdr_buf_pa +
+				    (u32)buf_id * IDPF_HDR_BUF_SIZE);
+	}
+
+	addr = idpf_alloc_page(bufq->pp, buf, bufq->rx_buf_size);
+	if (unlikely(addr == DMA_MAPPING_ERROR))
+		return false;
+
+	splitq_rx_desc->pkt_addr = cpu_to_le64(addr);
+	splitq_rx_desc->qword0.buf_id = cpu_to_le16(buf_id);
+
+	nta++;
+	if (unlikely(nta == bufq->desc_count))
+		nta = 0;
+	bufq->next_to_alloc = nta;
+
+	return true;
+}
+
+/**
+ * idpf_rx_post_init_bufs - Post initial buffers to bufq
+ * @bufq: buffer queue to post working set to
+ * @working_set: number of buffers to put in working set
+ *
+ * Returns true if @working_set bufs were posted successfully, false otherwise.
+ */
+static bool idpf_rx_post_init_bufs(struct idpf_queue *bufq, u16 working_set)
+{
+	int i;
+
+	for (i = 0; i < working_set; i++) {
+		if (!idpf_rx_post_buf_desc(bufq, i))
+			return false;
+	}
+
+	idpf_rx_buf_hw_update(bufq,
+			      bufq->next_to_alloc & ~(bufq->rx_buf_stride - 1));
+
+	return true;
+}
+
+/**
+ * idpf_rx_create_page_pool - Create a page pool
+ * @rxbufq: RX queue to create page pool for
+ *
+ * Returns &page_pool on success, casted -errno on failure
+ */
+static struct page_pool *idpf_rx_create_page_pool(struct idpf_queue *rxbufq)
+{
+	struct page_pool_params pp = {
+		.flags		= PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV,
+		.order		= 0,
+		.pool_size	= rxbufq->desc_count,
+		.nid		= NUMA_NO_NODE,
+		.dev		= rxbufq->vport->netdev->dev.parent,
+		.max_len	= PAGE_SIZE,
+		.dma_dir	= DMA_FROM_DEVICE,
+		.offset		= 0,
+	};
+
+	if (rxbufq->rx_buf_size == IDPF_RX_BUF_2048)
+		pp.flags |= PP_FLAG_PAGE_FRAG;
+
+	return page_pool_create(&pp);
+}
+
+/**
+ * idpf_rx_buf_alloc_all - Allocate memory for all buffer resources
+ * @rxbufq: queue for which the buffers are allocated; equivalent to
+ * rxq when operating in singleq mode
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int idpf_rx_buf_alloc_all(struct idpf_queue *rxbufq)
+{
+	int err = 0;
+
+	/* Allocate book keeping buffers */
+	rxbufq->rx_buf.buf = kcalloc(rxbufq->desc_count,
+				     sizeof(struct idpf_rx_buf), GFP_KERNEL);
+	if (!rxbufq->rx_buf.buf) {
+		err = -ENOMEM;
+		goto rx_buf_alloc_all_out;
+	}
+
+	if (rxbufq->rx_hsplit_en) {
+		err = idpf_rx_hdr_buf_alloc_all(rxbufq);
+		if (err)
+			goto rx_buf_alloc_all_out;
+	}
+
+	/* Allocate buffers to be given to HW.	 */
+	if (idpf_is_queue_model_split(rxbufq->vport->rxq_model)) {
+		int working_set = IDPF_RX_BUFQ_WORKING_SET(rxbufq);
+
+		if (!idpf_rx_post_init_bufs(rxbufq, working_set))
+			err = -ENOMEM;
+	} else {
+		if (idpf_rx_singleq_buf_hw_alloc_all(rxbufq,
+						     rxbufq->desc_count - 1))
+			err = -ENOMEM;
+	}
+
+rx_buf_alloc_all_out:
+	if (err)
+		idpf_rx_buf_rel_all(rxbufq);
+
+	return err;
+}
+
+/**
+ * idpf_rx_bufs_init - Initialize page pool, allocate rx bufs, and post to HW
+ * @rxbufq: RX queue to create page pool for
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int idpf_rx_bufs_init(struct idpf_queue *rxbufq)
+{
+	struct page_pool *pool;
+
+	pool = idpf_rx_create_page_pool(rxbufq);
+	if (IS_ERR(pool))
+		return PTR_ERR(pool);
+
+	rxbufq->pp = pool;
+
+	return idpf_rx_buf_alloc_all(rxbufq);
+}
+
+/**
+ * idpf_rx_bufs_init_all - Initialize all RX bufs
+ * @vport: virtual port struct
+ *
+ * Returns 0 on success, negative on failure
+ */
+int idpf_rx_bufs_init_all(struct idpf_vport *vport)
+{
+	struct idpf_rxq_group *rx_qgrp;
+	struct idpf_queue *q;
+	int i, j, err;
+
+	for (i = 0; i < vport->num_rxq_grp; i++) {
+		rx_qgrp = &vport->rxq_grps[i];
+
+		/* Allocate bufs for the rxq itself in singleq */
+		if (!idpf_is_queue_model_split(vport->rxq_model)) {
+			int num_rxq = rx_qgrp->singleq.num_rxq;
+
+			for (j = 0; j < num_rxq; j++) {
+				q = rx_qgrp->singleq.rxqs[j];
+				err = idpf_rx_bufs_init(q);
+				if (err)
+					return err;
+			}
+
+			continue;
+		}
+
+		/* Otherwise, allocate bufs for the buffer queues */
+		for (j = 0; j < vport->num_bufqs_per_qgrp; j++) {
+			q = &rx_qgrp->splitq.bufq_sets[j].bufq;
+			err = idpf_rx_bufs_init(q);
+			if (err)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * idpf_rx_desc_alloc - Allocate queue Rx resources
+ * @rxq: Rx queue for which the resources are setup
+ * @bufq: buffer or completion queue
+ * @q_model: single or split queue model
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int idpf_rx_desc_alloc(struct idpf_queue *rxq, bool bufq, s32 q_model)
+{
+	struct device *dev = rxq->dev;
+
+	if (bufq)
+		rxq->size = rxq->desc_count *
+			sizeof(struct virtchnl2_splitq_rx_buf_desc);
+	else
+		rxq->size = rxq->desc_count *
+			sizeof(union virtchnl2_rx_desc);
+
+	/* Allocate descriptors and also round up to nearest 4K */
+	rxq->size = ALIGN(rxq->size, 4096);
+	rxq->desc_ring = dmam_alloc_coherent(dev, rxq->size,
+					     &rxq->dma, GFP_KERNEL);
+	if (!rxq->desc_ring) {
+		dev_err(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
+			rxq->size);
+		return -ENOMEM;
+	}
+
+	rxq->next_to_alloc = 0;
+	rxq->next_to_clean = 0;
+	rxq->next_to_use = 0;
+	set_bit(__IDPF_Q_GEN_CHK, rxq->flags);
+
+	return 0;
+}
+
+/**
+ * idpf_rx_desc_alloc_all - allocate all RX queues resources
+ * @vport: virtual port structure
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int idpf_rx_desc_alloc_all(struct idpf_vport *vport)
+{
+	struct device *dev = &vport->adapter->pdev->dev;
+	struct idpf_rxq_group *rx_qgrp;
+	struct idpf_queue *q;
+	int i, j, err;
+	u16 num_rxq;
+
+	for (i = 0; i < vport->num_rxq_grp; i++) {
+		rx_qgrp = &vport->rxq_grps[i];
+		if (idpf_is_queue_model_split(vport->rxq_model))
+			num_rxq = rx_qgrp->splitq.num_rxq_sets;
+		else
+			num_rxq = rx_qgrp->singleq.num_rxq;
+
+		for (j = 0; j < num_rxq; j++) {
+			if (idpf_is_queue_model_split(vport->rxq_model))
+				q = &rx_qgrp->splitq.rxq_sets[j]->rxq;
+			else
+				q = rx_qgrp->singleq.rxqs[j];
+			err = idpf_rx_desc_alloc(q, false, vport->rxq_model);
+			if (err) {
+				dev_err(dev, "Memory allocation for Rx Queue %u failed\n",
+					i);
+				goto err_out;
+			}
+		}
+
+		if (!idpf_is_queue_model_split(vport->rxq_model))
+			continue;
+
+		for (j = 0; j < vport->num_bufqs_per_qgrp; j++) {
+			q = &rx_qgrp->splitq.bufq_sets[j].bufq;
+			err = idpf_rx_desc_alloc(q, true, vport->rxq_model);
+			if (err) {
+				dev_err(dev, "Memory allocation for Rx Buffer Queue %u failed\n",
+					i);
+				goto err_out;
+			}
+		}
+	}
+
+	return 0;
+
+err_out:
+	idpf_rx_desc_rel_all(vport);
+
+	return err;
+}
+
+/**
+ * idpf_txq_group_rel - Release all resources for txq groups
+ * @vport: vport to release txq groups on
+ */
+static void idpf_txq_group_rel(struct idpf_vport *vport)
+{
+	int i, j;
+
+	if (!vport->txq_grps)
+		return;
+
+	for (i = 0; i < vport->num_txq_grp; i++) {
+		struct idpf_txq_group *txq_grp = &vport->txq_grps[i];
+
+		for (j = 0; j < txq_grp->num_txq; j++) {
+			kfree(txq_grp->txqs[j]);
+			txq_grp->txqs[j] = NULL;
+		}
+		kfree(txq_grp->complq);
+		txq_grp->complq = NULL;
+	}
+	kfree(vport->txq_grps);
+	vport->txq_grps = NULL;
+}
+
+/**
+ * idpf_rxq_sw_queue_rel - Release software queue resources
+ * @rx_qgrp: rx queue group with software queues
+ */
+static void idpf_rxq_sw_queue_rel(struct idpf_rxq_group *rx_qgrp)
+{
+	int i, j;
+
+	for (i = 0; i < rx_qgrp->vport->num_bufqs_per_qgrp; i++) {
+		struct idpf_bufq_set *bufq_set = &rx_qgrp->splitq.bufq_sets[i];
+
+		for (j = 0; j < bufq_set->num_refillqs; j++) {
+			kfree(bufq_set->refillqs[j].ring);
+			bufq_set->refillqs[j].ring = NULL;
+		}
+		kfree(bufq_set->refillqs);
+		bufq_set->refillqs = NULL;
+	}
+}
+
+/**
+ * idpf_rxq_group_rel - Release all resources for rxq groups
+ * @vport: vport to release rxq groups on
+ */
+static void idpf_rxq_group_rel(struct idpf_vport *vport)
+{
+	int i;
+
+	if (!vport->rxq_grps)
+		return;
+
+	for (i = 0; i < vport->num_rxq_grp; i++) {
+		struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i];
+		u16 num_rxq;
+		int j;
+
+		if (idpf_is_queue_model_split(vport->rxq_model)) {
+			num_rxq = rx_qgrp->splitq.num_rxq_sets;
+			for (j = 0; j < num_rxq; j++) {
+				kfree(rx_qgrp->splitq.rxq_sets[j]);
+				rx_qgrp->splitq.rxq_sets[j] = NULL;
+			}
+
+			idpf_rxq_sw_queue_rel(rx_qgrp);
+			kfree(rx_qgrp->splitq.bufq_sets);
+			rx_qgrp->splitq.bufq_sets = NULL;
+		} else {
+			num_rxq = rx_qgrp->singleq.num_rxq;
+			for (j = 0; j < num_rxq; j++) {
+				kfree(rx_qgrp->singleq.rxqs[j]);
+				rx_qgrp->singleq.rxqs[j] = NULL;
+			}
+		}
+	}
+	kfree(vport->rxq_grps);
+	vport->rxq_grps = NULL;
+}
+
+/**
+ * idpf_vport_queue_grp_rel_all - Release all queue groups
+ * @vport: vport to release queue groups for
+ */
+static void idpf_vport_queue_grp_rel_all(struct idpf_vport *vport)
+{
+	idpf_txq_group_rel(vport);
+	idpf_rxq_group_rel(vport);
+}
+
+/**
+ * idpf_vport_queues_rel - Free memory for all queues
+ * @vport: virtual port
+ *
+ * Free the memory allocated for queues associated to a vport
+ */
+void idpf_vport_queues_rel(struct idpf_vport *vport)
+{
+	idpf_tx_desc_rel_all(vport);
+	idpf_rx_desc_rel_all(vport);
+	idpf_vport_queue_grp_rel_all(vport);
+
+	kfree(vport->txqs);
+	vport->txqs = NULL;
+}
+
+/**
+ * idpf_vport_init_fast_path_txqs - Initialize fast path txq array
+ * @vport: vport to init txqs on
+ *
+ * We get a queue index from skb->queue_mapping and we need a fast way to
+ * dereference the queue from queue groups.  This allows us to quickly pull a
+ * txq based on a queue index.
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int idpf_vport_init_fast_path_txqs(struct idpf_vport *vport)
+{
+	int i, j, k = 0;
+
+	vport->txqs = kcalloc(vport->num_txq, sizeof(struct idpf_queue *),
+			      GFP_KERNEL);
+
+	if (!vport->txqs)
+		return -ENOMEM;
+
+	for (i = 0; i < vport->num_txq_grp; i++) {
+		struct idpf_txq_group *tx_grp = &vport->txq_grps[i];
+
+		for (j = 0; j < tx_grp->num_txq; j++, k++) {
+			vport->txqs[k] = tx_grp->txqs[j];
+			vport->txqs[k]->idx = k;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * idpf_vport_init_num_qs - Initialize number of queues
+ * @vport: vport to initialize queues
+ * @vport_msg: data to be filled into vport
+ */
+void idpf_vport_init_num_qs(struct idpf_vport *vport,
+			    struct virtchnl2_create_vport *vport_msg)
+{
+	struct idpf_vport_user_config_data *config_data;
+	u16 idx = vport->idx;
+
+	config_data = &vport->adapter->vport_config[idx]->user_config;
+	vport->num_txq = le16_to_cpu(vport_msg->num_tx_q);
+	vport->num_rxq = le16_to_cpu(vport_msg->num_rx_q);
+	/* number of txqs and rxqs in config data will be zeros only in the
+	 * driver load path and we dont update them there after
+	 */
+	if (!config_data->num_req_tx_qs && !config_data->num_req_rx_qs) {
+		config_data->num_req_tx_qs = le16_to_cpu(vport_msg->num_tx_q);
+		config_data->num_req_rx_qs = le16_to_cpu(vport_msg->num_rx_q);
+	}
+
+	if (idpf_is_queue_model_split(vport->txq_model))
+		vport->num_complq = le16_to_cpu(vport_msg->num_tx_complq);
+	if (idpf_is_queue_model_split(vport->rxq_model))
+		vport->num_bufq = le16_to_cpu(vport_msg->num_rx_bufq);
+
+	/* Adjust number of buffer queues per Rx queue group. */
+	if (!idpf_is_queue_model_split(vport->rxq_model)) {
+		vport->num_bufqs_per_qgrp = 0;
+		vport->bufq_size[0] = IDPF_RX_BUF_2048;
+
+		return;
+	}
+
+	vport->num_bufqs_per_qgrp = IDPF_MAX_BUFQS_PER_RXQ_GRP;
+	/* Bufq[0] default buffer size is 4K
+	 * Bufq[1] default buffer size is 2K
+	 */
+	vport->bufq_size[0] = IDPF_RX_BUF_4096;
+	vport->bufq_size[1] = IDPF_RX_BUF_2048;
+}
+
+/**
+ * idpf_vport_calc_num_q_desc - Calculate number of queue groups
+ * @vport: vport to calculate q groups for
+ */
+void idpf_vport_calc_num_q_desc(struct idpf_vport *vport)
+{
+	struct idpf_vport_user_config_data *config_data;
+	int num_bufqs = vport->num_bufqs_per_qgrp;
+	u32 num_req_txq_desc, num_req_rxq_desc;
+	u16 idx = vport->idx;
+	int i;
+
+	config_data =  &vport->adapter->vport_config[idx]->user_config;
+	num_req_txq_desc = config_data->num_req_txq_desc;
+	num_req_rxq_desc = config_data->num_req_rxq_desc;
+
+	vport->complq_desc_count = 0;
+	if (num_req_txq_desc) {
+		vport->txq_desc_count = num_req_txq_desc;
+		if (idpf_is_queue_model_split(vport->txq_model)) {
+			vport->complq_desc_count = num_req_txq_desc;
+			if (vport->complq_desc_count < IDPF_MIN_TXQ_COMPLQ_DESC)
+				vport->complq_desc_count =
+					IDPF_MIN_TXQ_COMPLQ_DESC;
+		}
+	} else {
+		vport->txq_desc_count =	IDPF_DFLT_TX_Q_DESC_COUNT;
+		if (idpf_is_queue_model_split(vport->txq_model))
+			vport->complq_desc_count =
+				IDPF_DFLT_TX_COMPLQ_DESC_COUNT;
+	}
+
+	if (num_req_rxq_desc)
+		vport->rxq_desc_count = num_req_rxq_desc;
+	else
+		vport->rxq_desc_count = IDPF_DFLT_RX_Q_DESC_COUNT;
+
+	for (i = 0; i < num_bufqs; i++) {
+		if (!vport->bufq_desc_count[i])
+			vport->bufq_desc_count[i] =
+				IDPF_RX_BUFQ_DESC_COUNT(vport->rxq_desc_count,
+							num_bufqs);
+	}
+}
+
+/**
+ * idpf_vport_calc_total_qs - Calculate total number of queues
+ * @adapter: private data struct
+ * @vport_idx: vport idx to retrieve vport pointer
+ * @vport_msg: message to fill with data
+ * @max_q: vport max queue info
+ *
+ * Return 0 on success, error value on failure.
+ */
+int idpf_vport_calc_total_qs(struct idpf_adapter *adapter, u16 vport_idx,
+			     struct virtchnl2_create_vport *vport_msg,
+			     struct idpf_vport_max_q *max_q)
+{
+	int dflt_splitq_txq_grps = 0, dflt_singleq_txqs = 0;
+	int dflt_splitq_rxq_grps = 0, dflt_singleq_rxqs = 0;
+	u16 num_req_tx_qs = 0, num_req_rx_qs = 0;
+	struct idpf_vport_config *vport_config;
+	u16 num_txq_grps, num_rxq_grps;
+	u32 num_qs;
+
+	vport_config = adapter->vport_config[vport_idx];
+	if (vport_config) {
+		num_req_tx_qs = vport_config->user_config.num_req_tx_qs;
+		num_req_rx_qs = vport_config->user_config.num_req_rx_qs;
+	} else {
+		int num_cpus;
+
+		/* Restrict num of queues to cpus online as a default
+		 * configuration to give best performance. User can always
+		 * override to a max number of queues via ethtool.
+		 */
+		num_cpus = num_online_cpus();
+
+		dflt_splitq_txq_grps = min_t(int, max_q->max_txq, num_cpus);
+		dflt_singleq_txqs = min_t(int, max_q->max_txq, num_cpus);
+		dflt_splitq_rxq_grps = min_t(int, max_q->max_rxq, num_cpus);
+		dflt_singleq_rxqs = min_t(int, max_q->max_rxq, num_cpus);
+	}
+
+	if (idpf_is_queue_model_split(le16_to_cpu(vport_msg->txq_model))) {
+		num_txq_grps = num_req_tx_qs ? num_req_tx_qs : dflt_splitq_txq_grps;
+		vport_msg->num_tx_complq = cpu_to_le16(num_txq_grps *
+						       IDPF_COMPLQ_PER_GROUP);
+		vport_msg->num_tx_q = cpu_to_le16(num_txq_grps *
+						  IDPF_DFLT_SPLITQ_TXQ_PER_GROUP);
+	} else {
+		num_txq_grps = IDPF_DFLT_SINGLEQ_TX_Q_GROUPS;
+		num_qs = num_txq_grps * (num_req_tx_qs ? num_req_tx_qs :
+					 dflt_singleq_txqs);
+		vport_msg->num_tx_q = cpu_to_le16(num_qs);
+		vport_msg->num_tx_complq = 0;
+	}
+	if (idpf_is_queue_model_split(le16_to_cpu(vport_msg->rxq_model))) {
+		num_rxq_grps = num_req_rx_qs ? num_req_rx_qs : dflt_splitq_rxq_grps;
+		vport_msg->num_rx_bufq = cpu_to_le16(num_rxq_grps *
+						     IDPF_MAX_BUFQS_PER_RXQ_GRP);
+		vport_msg->num_rx_q = cpu_to_le16(num_rxq_grps *
+						  IDPF_DFLT_SPLITQ_RXQ_PER_GROUP);
+	} else {
+		num_rxq_grps = IDPF_DFLT_SINGLEQ_RX_Q_GROUPS;
+		num_qs = num_rxq_grps * (num_req_rx_qs ? num_req_rx_qs :
+					 dflt_singleq_rxqs);
+		vport_msg->num_rx_q = cpu_to_le16(num_qs);
+		vport_msg->num_rx_bufq = 0;
+	}
+
+	return 0;
+}
+
+/**
+ * idpf_vport_calc_num_q_groups - Calculate number of queue groups
+ * @vport: vport to calculate q groups for
+ */
+void idpf_vport_calc_num_q_groups(struct idpf_vport *vport)
+{
+	if (idpf_is_queue_model_split(vport->txq_model))
+		vport->num_txq_grp = vport->num_txq;
+	else
+		vport->num_txq_grp = IDPF_DFLT_SINGLEQ_TX_Q_GROUPS;
+
+	if (idpf_is_queue_model_split(vport->rxq_model))
+		vport->num_rxq_grp = vport->num_rxq;
+	else
+		vport->num_rxq_grp = IDPF_DFLT_SINGLEQ_RX_Q_GROUPS;
+}
+
+/**
+ * idpf_vport_calc_numq_per_grp - Calculate number of queues per group
+ * @vport: vport to calculate queues for
+ * @num_txq: return parameter for number of TX queues
+ * @num_rxq: return parameter for number of RX queues
+ */
+static void idpf_vport_calc_numq_per_grp(struct idpf_vport *vport,
+					 u16 *num_txq, u16 *num_rxq)
+{
+	if (idpf_is_queue_model_split(vport->txq_model))
+		*num_txq = IDPF_DFLT_SPLITQ_TXQ_PER_GROUP;
+	else
+		*num_txq = vport->num_txq;
+
+	if (idpf_is_queue_model_split(vport->rxq_model))
+		*num_rxq = IDPF_DFLT_SPLITQ_RXQ_PER_GROUP;
+	else
+		*num_rxq = vport->num_rxq;
+}
+
+/**
+ * idpf_rxq_set_descids - set the descids supported by this queue
+ * @vport: virtual port data structure
+ * @q: rx queue for which descids are set
+ *
+ */
+static void idpf_rxq_set_descids(struct idpf_vport *vport, struct idpf_queue *q)
+{
+	if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) {
+		q->rxdids = VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M;
+	} else {
+		if (vport->base_rxd)
+			q->rxdids = VIRTCHNL2_RXDID_1_32B_BASE_M;
+		else
+			q->rxdids = VIRTCHNL2_RXDID_2_FLEX_SQ_NIC_M;
+	}
+}
+
+/**
+ * idpf_txq_group_alloc - Allocate all txq group resources
+ * @vport: vport to allocate txq groups for
+ * @num_txq: number of txqs to allocate for each group
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int idpf_txq_group_alloc(struct idpf_vport *vport, u16 num_txq)
+{
+	int err, i;
+
+	vport->txq_grps = kcalloc(vport->num_txq_grp,
+				  sizeof(*vport->txq_grps), GFP_KERNEL);
+	if (!vport->txq_grps)
+		return -ENOMEM;
+
+	for (i = 0; i < vport->num_txq_grp; i++) {
+		struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i];
+		struct idpf_adapter *adapter = vport->adapter;
+		int j;
+
+		tx_qgrp->vport = vport;
+		tx_qgrp->num_txq = num_txq;
+
+		for (j = 0; j < tx_qgrp->num_txq; j++) {
+			tx_qgrp->txqs[j] = kzalloc(sizeof(*tx_qgrp->txqs[j]),
+						   GFP_KERNEL);
+			if (!tx_qgrp->txqs[j]) {
+				err = -ENOMEM;
+				goto err_alloc;
+			}
+		}
+
+		for (j = 0; j < tx_qgrp->num_txq; j++) {
+			struct idpf_queue *q = tx_qgrp->txqs[j];
+
+			q->dev = &adapter->pdev->dev;
+			q->desc_count = vport->txq_desc_count;
+			q->tx_max_bufs = idpf_get_max_tx_bufs(adapter);
+			q->tx_min_pkt_len = idpf_get_min_tx_pkt_len(adapter);
+			q->vport = vport;
+			q->txq_grp = tx_qgrp;
+			hash_init(q->sched_buf_hash);
+
+			if (!idpf_is_cap_ena(adapter, IDPF_OTHER_CAPS,
+					     VIRTCHNL2_CAP_SPLITQ_QSCHED))
+				set_bit(__IDPF_Q_FLOW_SCH_EN, q->flags);
+		}
+
+		if (!idpf_is_queue_model_split(vport->txq_model))
+			continue;
+
+		tx_qgrp->complq = kcalloc(IDPF_COMPLQ_PER_GROUP,
+					  sizeof(*tx_qgrp->complq),
+					  GFP_KERNEL);
+		if (!tx_qgrp->complq) {
+			err = -ENOMEM;
+			goto err_alloc;
+		}
+
+		tx_qgrp->complq->dev = &adapter->pdev->dev;
+		tx_qgrp->complq->desc_count = vport->complq_desc_count;
+		tx_qgrp->complq->vport = vport;
+		tx_qgrp->complq->txq_grp = tx_qgrp;
+	}
+
+	return 0;
+
+err_alloc:
+	idpf_txq_group_rel(vport);
+
+	return err;
+}
+
+/**
+ * idpf_rxq_group_alloc - Allocate all rxq group resources
+ * @vport: vport to allocate rxq groups for
+ * @num_rxq: number of rxqs to allocate for each group
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int idpf_rxq_group_alloc(struct idpf_vport *vport, u16 num_rxq)
+{
+	struct idpf_adapter *adapter = vport->adapter;
+	struct idpf_queue *q;
+	int i, k, err = 0;
+
+	vport->rxq_grps = kcalloc(vport->num_rxq_grp,
+				  sizeof(struct idpf_rxq_group), GFP_KERNEL);
+	if (!vport->rxq_grps)
+		return -ENOMEM;
+
+	for (i = 0; i < vport->num_rxq_grp; i++) {
+		struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i];
+		int j;
+
+		rx_qgrp->vport = vport;
+		if (!idpf_is_queue_model_split(vport->rxq_model)) {
+			rx_qgrp->singleq.num_rxq = num_rxq;
+			for (j = 0; j < num_rxq; j++) {
+				rx_qgrp->singleq.rxqs[j] =
+						kzalloc(sizeof(*rx_qgrp->singleq.rxqs[j]),
+							GFP_KERNEL);
+				if (!rx_qgrp->singleq.rxqs[j]) {
+					err = -ENOMEM;
+					goto err_alloc;
+				}
+			}
+			goto skip_splitq_rx_init;
+		}
+		rx_qgrp->splitq.num_rxq_sets = num_rxq;
+
+		for (j = 0; j < num_rxq; j++) {
+			rx_qgrp->splitq.rxq_sets[j] =
+				kzalloc(sizeof(struct idpf_rxq_set),
+					GFP_KERNEL);
+			if (!rx_qgrp->splitq.rxq_sets[j]) {
+				err = -ENOMEM;
+				goto err_alloc;
+			}
+		}
+
+		rx_qgrp->splitq.bufq_sets = kcalloc(vport->num_bufqs_per_qgrp,
+						    sizeof(struct idpf_bufq_set),
+						    GFP_KERNEL);
+		if (!rx_qgrp->splitq.bufq_sets) {
+			err = -ENOMEM;
+			goto err_alloc;
+		}
+
+		for (j = 0; j < vport->num_bufqs_per_qgrp; j++) {
+			struct idpf_bufq_set *bufq_set =
+				&rx_qgrp->splitq.bufq_sets[j];
+			int swq_size = sizeof(struct idpf_sw_queue);
+
+			q = &rx_qgrp->splitq.bufq_sets[j].bufq;
+			q->dev = &adapter->pdev->dev;
+			q->desc_count = vport->bufq_desc_count[j];
+			q->vport = vport;
+			q->rxq_grp = rx_qgrp;
+			q->idx = j;
+			q->rx_buf_size = vport->bufq_size[j];
+			q->rx_buffer_low_watermark = IDPF_LOW_WATERMARK;
+			q->rx_buf_stride = IDPF_RX_BUF_STRIDE;
+			if (idpf_is_cap_ena_all(adapter, IDPF_HSPLIT_CAPS,
+						IDPF_CAP_HSPLIT) &&
+			    idpf_is_queue_model_split(vport->rxq_model)) {
+				q->rx_hsplit_en = true;
+				q->rx_hbuf_size = IDPF_HDR_BUF_SIZE;
+			}
+
+			bufq_set->num_refillqs = num_rxq;
+			bufq_set->refillqs = kcalloc(num_rxq, swq_size,
+						     GFP_KERNEL);
+			if (!bufq_set->refillqs) {
+				err = -ENOMEM;
+				goto err_alloc;
+			}
+			for (k = 0; k < bufq_set->num_refillqs; k++) {
+				struct idpf_sw_queue *refillq =
+					&bufq_set->refillqs[k];
+
+				refillq->dev = &vport->adapter->pdev->dev;
+				refillq->desc_count =
+					vport->bufq_desc_count[j];
+				set_bit(__IDPF_Q_GEN_CHK, refillq->flags);
+				set_bit(__IDPF_RFLQ_GEN_CHK, refillq->flags);
+				refillq->ring = kcalloc(refillq->desc_count,
+							sizeof(u16),
+							GFP_KERNEL);
+				if (!refillq->ring) {
+					err = -ENOMEM;
+					goto err_alloc;
+				}
+			}
+		}
+
+skip_splitq_rx_init:
+		for (j = 0; j < num_rxq; j++) {
+			if (!idpf_is_queue_model_split(vport->rxq_model)) {
+				q = rx_qgrp->singleq.rxqs[j];
+				goto setup_rxq;
+			}
+			q = &rx_qgrp->splitq.rxq_sets[j]->rxq;
+			rx_qgrp->splitq.rxq_sets[j]->refillq0 =
+			      &rx_qgrp->splitq.bufq_sets[0].refillqs[j];
+			if (vport->num_bufqs_per_qgrp > IDPF_SINGLE_BUFQ_PER_RXQ_GRP)
+				rx_qgrp->splitq.rxq_sets[j]->refillq1 =
+				      &rx_qgrp->splitq.bufq_sets[1].refillqs[j];
+
+			if (idpf_is_cap_ena_all(adapter, IDPF_HSPLIT_CAPS,
+						IDPF_CAP_HSPLIT) &&
+			    idpf_is_queue_model_split(vport->rxq_model)) {
+				q->rx_hsplit_en = true;
+				q->rx_hbuf_size = IDPF_HDR_BUF_SIZE;
+			}
+
+setup_rxq:
+			q->dev = &adapter->pdev->dev;
+			q->desc_count = vport->rxq_desc_count;
+			q->vport = vport;
+			q->rxq_grp = rx_qgrp;
+			q->idx = (i * num_rxq) + j;
+			/* In splitq mode, RXQ buffer size should be
+			 * set to that of the first buffer queue
+			 * associated with this RXQ
+			 */
+			q->rx_buf_size = vport->bufq_size[0];
+			q->rx_buffer_low_watermark = IDPF_LOW_WATERMARK;
+			q->rx_max_pkt_size = vport->netdev->mtu +
+							IDPF_PACKET_HDR_PAD;
+			idpf_rxq_set_descids(vport, q);
+		}
+	}
+
+err_alloc:
+	if (err)
+		idpf_rxq_group_rel(vport);
+
+	return err;
+}
+
+/**
+ * idpf_vport_queue_grp_alloc_all - Allocate all queue groups/resources
+ * @vport: vport with qgrps to allocate
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int idpf_vport_queue_grp_alloc_all(struct idpf_vport *vport)
+{
+	u16 num_txq, num_rxq;
+	int err;
+
+	idpf_vport_calc_numq_per_grp(vport, &num_txq, &num_rxq);
+
+	err = idpf_txq_group_alloc(vport, num_txq);
+	if (err)
+		goto err_out;
+
+	err = idpf_rxq_group_alloc(vport, num_rxq);
+	if (err)
+		goto err_out;
+
+	return 0;
+
+err_out:
+	idpf_vport_queue_grp_rel_all(vport);
+
+	return err;
+}
+
+/**
+ * idpf_vport_queues_alloc - Allocate memory for all queues
+ * @vport: virtual port
+ *
+ * Allocate memory for queues associated with a vport.  Returns 0 on success,
+ * negative on failure.
+ */
+int idpf_vport_queues_alloc(struct idpf_vport *vport)
+{
+	int err;
+
+	err = idpf_vport_queue_grp_alloc_all(vport);
+	if (err)
+		goto err_out;
+
+	err = idpf_tx_desc_alloc_all(vport);
+	if (err)
+		goto err_out;
+
+	err = idpf_rx_desc_alloc_all(vport);
+	if (err)
+		goto err_out;
+
+	err = idpf_vport_init_fast_path_txqs(vport);
+	if (err)
+		goto err_out;
+
+	return 0;
+
+err_out:
+	idpf_vport_queues_rel(vport);
+
+	return err;
+}
+
+/**
+ * idpf_tx_handle_sw_marker - Handle queue marker packet
+ * @tx_q: tx queue to handle software marker
+ */
+static void idpf_tx_handle_sw_marker(struct idpf_queue *tx_q)
+{
+	struct idpf_vport *vport = tx_q->vport;
+	int i;
+
+	clear_bit(__IDPF_Q_SW_MARKER, tx_q->flags);
+	/* Hardware must write marker packets to all queues associated with
+	 * completion queues. So check if all queues received marker packets
+	 */
+	for (i = 0; i < vport->num_txq; i++)
+		/* If we're still waiting on any other TXQ marker completions,
+		 * just return now since we cannot wake up the marker_wq yet.
+		 */
+		if (test_bit(__IDPF_Q_SW_MARKER, vport->txqs[i]->flags))
+			return;
+
+	/* Drain complete */
+	set_bit(IDPF_VPORT_SW_MARKER, vport->flags);
+	wake_up(&vport->sw_marker_wq);
+}
+
+/**
+ * idpf_tx_splitq_clean_hdr - Clean TX buffer resources for header portion of
+ * packet
+ * @tx_q: tx queue to clean buffer from
+ * @tx_buf: buffer to be cleaned
+ * @cleaned: pointer to stats struct to track cleaned packets/bytes
+ * @napi_budget: Used to determine if we are in netpoll
+ */
+static void idpf_tx_splitq_clean_hdr(struct idpf_queue *tx_q,
+				     struct idpf_tx_buf *tx_buf,
+				     struct idpf_cleaned_stats *cleaned,
+				     int napi_budget)
+{
+	napi_consume_skb(tx_buf->skb, napi_budget);
+
+	if (dma_unmap_len(tx_buf, len)) {
+		dma_unmap_single(tx_q->dev,
+				 dma_unmap_addr(tx_buf, dma),
+				 dma_unmap_len(tx_buf, len),
+				 DMA_TO_DEVICE);
+
+		dma_unmap_len_set(tx_buf, len, 0);
+	}
+
+	/* clear tx_buf data */
+	tx_buf->skb = NULL;
+
+	cleaned->bytes += tx_buf->bytecount;
+	cleaned->packets += tx_buf->gso_segs;
+}
+
+/**
+ * idpf_tx_clean_stashed_bufs - clean bufs that were stored for
+ * out of order completions
+ * @txq: queue to clean
+ * @compl_tag: completion tag of packet to clean (from completion descriptor)
+ * @cleaned: pointer to stats struct to track cleaned packets/bytes
+ * @budget: Used to determine if we are in netpoll
+ */
+static void idpf_tx_clean_stashed_bufs(struct idpf_queue *txq, u16 compl_tag,
+				       struct idpf_cleaned_stats *cleaned,
+				       int budget)
+{
+	struct idpf_tx_stash *stash;
+	struct hlist_node *tmp_buf;
+
+	/* Buffer completion */
+	hash_for_each_possible_safe(txq->sched_buf_hash, stash, tmp_buf,
+				    hlist, compl_tag) {
+		if (unlikely(stash->buf.compl_tag != (int)compl_tag))
+			continue;
+
+		if (stash->buf.skb) {
+			idpf_tx_splitq_clean_hdr(txq, &stash->buf, cleaned,
+						 budget);
+		} else if (dma_unmap_len(&stash->buf, len)) {
+			dma_unmap_page(txq->dev,
+				       dma_unmap_addr(&stash->buf, dma),
+				       dma_unmap_len(&stash->buf, len),
+				       DMA_TO_DEVICE);
+			dma_unmap_len_set(&stash->buf, len, 0);
+		}
+
+		/* Push shadow buf back onto stack */
+		idpf_buf_lifo_push(&txq->buf_stack, stash);
+
+		hash_del(&stash->hlist);
+	}
+}
+
+/**
+ * idpf_stash_flow_sch_buffers - store buffer parameters info to be freed at a
+ * later time (only relevant for flow scheduling mode)
+ * @txq: Tx queue to clean
+ * @tx_buf: buffer to store
+ */
+static int idpf_stash_flow_sch_buffers(struct idpf_queue *txq,
+				       struct idpf_tx_buf *tx_buf)
+{
+	struct idpf_tx_stash *stash;
+
+	if (unlikely(!dma_unmap_addr(tx_buf, dma) &&
+		     !dma_unmap_len(tx_buf, len)))
+		return 0;
+
+	stash = idpf_buf_lifo_pop(&txq->buf_stack);
+	if (unlikely(!stash)) {
+		net_err_ratelimited("%s: No out-of-order TX buffers left!\n",
+				    txq->vport->netdev->name);
+
+		return -ENOMEM;
+	}
+
+	/* Store buffer params in shadow buffer */
+	stash->buf.skb = tx_buf->skb;
+	stash->buf.bytecount = tx_buf->bytecount;
+	stash->buf.gso_segs = tx_buf->gso_segs;
+	dma_unmap_addr_set(&stash->buf, dma, dma_unmap_addr(tx_buf, dma));
+	dma_unmap_len_set(&stash->buf, len, dma_unmap_len(tx_buf, len));
+	stash->buf.compl_tag = tx_buf->compl_tag;
+
+	/* Add buffer to buf_hash table to be freed later */
+	hash_add(txq->sched_buf_hash, &stash->hlist, stash->buf.compl_tag);
+
+	memset(tx_buf, 0, sizeof(struct idpf_tx_buf));
+
+	/* Reinitialize buf_id portion of tag */
+	tx_buf->compl_tag = IDPF_SPLITQ_TX_INVAL_COMPL_TAG;
+
+	return 0;
+}
+
+#define idpf_tx_splitq_clean_bump_ntc(txq, ntc, desc, buf)	\
+do {								\
+	(ntc)++;						\
+	if (unlikely(!(ntc))) {					\
+		ntc -= (txq)->desc_count;			\
+		buf = (txq)->tx_buf;				\
+		desc = IDPF_FLEX_TX_DESC(txq, 0);		\
+	} else {						\
+		(buf)++;					\
+		(desc)++;					\
+	}							\
+} while (0)
+
+/**
+ * idpf_tx_splitq_clean - Reclaim resources from buffer queue
+ * @tx_q: Tx queue to clean
+ * @end: queue index until which it should be cleaned
+ * @napi_budget: Used to determine if we are in netpoll
+ * @cleaned: pointer to stats struct to track cleaned packets/bytes
+ * @descs_only: true if queue is using flow-based scheduling and should
+ * not clean buffers at this time
+ *
+ * Cleans the queue descriptor ring. If the queue is using queue-based
+ * scheduling, the buffers will be cleaned as well. If the queue is using
+ * flow-based scheduling, only the descriptors are cleaned at this time.
+ * Separate packet completion events will be reported on the completion queue,
+ * and the buffers will be cleaned separately. The stats are not updated from
+ * this function when using flow-based scheduling.
+ */
+static void idpf_tx_splitq_clean(struct idpf_queue *tx_q, u16 end,
+				 int napi_budget,
+				 struct idpf_cleaned_stats *cleaned,
+				 bool descs_only)
+{
+	union idpf_tx_flex_desc *next_pending_desc = NULL;
+	union idpf_tx_flex_desc *tx_desc;
+	s16 ntc = tx_q->next_to_clean;
+	struct idpf_tx_buf *tx_buf;
+
+	tx_desc = IDPF_FLEX_TX_DESC(tx_q, ntc);
+	next_pending_desc = IDPF_FLEX_TX_DESC(tx_q, end);
+	tx_buf = &tx_q->tx_buf[ntc];
+	ntc -= tx_q->desc_count;
+
+	while (tx_desc != next_pending_desc) {
+		union idpf_tx_flex_desc *eop_desc;
+
+		/* If this entry in the ring was used as a context descriptor,
+		 * it's corresponding entry in the buffer ring will have an
+		 * invalid completion tag since no buffer was used.  We can
+		 * skip this descriptor since there is no buffer to clean.
+		 */
+		if (unlikely(tx_buf->compl_tag == IDPF_SPLITQ_TX_INVAL_COMPL_TAG))
+			goto fetch_next_txq_desc;
+
+		eop_desc = (union idpf_tx_flex_desc *)tx_buf->next_to_watch;
+
+		/* clear next_to_watch to prevent false hangs */
+		tx_buf->next_to_watch = NULL;
+
+		if (descs_only) {
+			if (idpf_stash_flow_sch_buffers(tx_q, tx_buf))
+				goto tx_splitq_clean_out;
+
+			while (tx_desc != eop_desc) {
+				idpf_tx_splitq_clean_bump_ntc(tx_q, ntc,
+							      tx_desc, tx_buf);
+
+				if (dma_unmap_len(tx_buf, len)) {
+					if (idpf_stash_flow_sch_buffers(tx_q,
+									tx_buf))
+						goto tx_splitq_clean_out;
+				}
+			}
+		} else {
+			idpf_tx_splitq_clean_hdr(tx_q, tx_buf, cleaned,
+						 napi_budget);
+
+			/* unmap remaining buffers */
+			while (tx_desc != eop_desc) {
+				idpf_tx_splitq_clean_bump_ntc(tx_q, ntc,
+							      tx_desc, tx_buf);
+
+				/* unmap any remaining paged data */
+				if (dma_unmap_len(tx_buf, len)) {
+					dma_unmap_page(tx_q->dev,
+						       dma_unmap_addr(tx_buf, dma),
+						       dma_unmap_len(tx_buf, len),
+						       DMA_TO_DEVICE);
+					dma_unmap_len_set(tx_buf, len, 0);
+				}
+			}
+		}
+
+fetch_next_txq_desc:
+		idpf_tx_splitq_clean_bump_ntc(tx_q, ntc, tx_desc, tx_buf);
+	}
+
+tx_splitq_clean_out:
+	ntc += tx_q->desc_count;
+	tx_q->next_to_clean = ntc;
+}
+
+#define idpf_tx_clean_buf_ring_bump_ntc(txq, ntc, buf)	\
+do {							\
+	(buf)++;					\
+	(ntc)++;					\
+	if (unlikely((ntc) == (txq)->desc_count)) {	\
+		buf = (txq)->tx_buf;			\
+		ntc = 0;				\
+	}						\
+} while (0)
+
+/**
+ * idpf_tx_clean_buf_ring - clean flow scheduling TX queue buffers
+ * @txq: queue to clean
+ * @compl_tag: completion tag of packet to clean (from completion descriptor)
+ * @cleaned: pointer to stats struct to track cleaned packets/bytes
+ * @budget: Used to determine if we are in netpoll
+ *
+ * Cleans all buffers associated with the input completion tag either from the
+ * TX buffer ring or from the hash table if the buffers were previously
+ * stashed. Returns the byte/segment count for the cleaned packet associated
+ * this completion tag.
+ */
+static bool idpf_tx_clean_buf_ring(struct idpf_queue *txq, u16 compl_tag,
+				   struct idpf_cleaned_stats *cleaned,
+				   int budget)
+{
+	u16 idx = compl_tag & txq->compl_tag_bufid_m;
+	struct idpf_tx_buf *tx_buf = NULL;
+	u16 ntc = txq->next_to_clean;
+	u16 num_descs_cleaned = 0;
+	u16 orig_idx = idx;
+
+	tx_buf = &txq->tx_buf[idx];
+
+	while (tx_buf->compl_tag == (int)compl_tag) {
+		if (tx_buf->skb) {
+			idpf_tx_splitq_clean_hdr(txq, tx_buf, cleaned, budget);
+		} else if (dma_unmap_len(tx_buf, len)) {
+			dma_unmap_page(txq->dev,
+				       dma_unmap_addr(tx_buf, dma),
+				       dma_unmap_len(tx_buf, len),
+				       DMA_TO_DEVICE);
+			dma_unmap_len_set(tx_buf, len, 0);
+		}
+
+		memset(tx_buf, 0, sizeof(struct idpf_tx_buf));
+		tx_buf->compl_tag = IDPF_SPLITQ_TX_INVAL_COMPL_TAG;
+
+		num_descs_cleaned++;
+		idpf_tx_clean_buf_ring_bump_ntc(txq, idx, tx_buf);
+	}
+
+	/* If we didn't clean anything on the ring for this completion, there's
+	 * nothing more to do.
+	 */
+	if (unlikely(!num_descs_cleaned))
+		return false;
+
+	/* Otherwise, if we did clean a packet on the ring directly, it's safe
+	 * to assume that the descriptors starting from the original
+	 * next_to_clean up until the previously cleaned packet can be reused.
+	 * Therefore, we will go back in the ring and stash any buffers still
+	 * in the ring into the hash table to be cleaned later.
+	 */
+	tx_buf = &txq->tx_buf[ntc];
+	while (tx_buf != &txq->tx_buf[orig_idx]) {
+		idpf_stash_flow_sch_buffers(txq, tx_buf);
+		idpf_tx_clean_buf_ring_bump_ntc(txq, ntc, tx_buf);
+	}
+
+	/* Finally, update next_to_clean to reflect the work that was just done
+	 * on the ring, if any. If the packet was only cleaned from the hash
+	 * table, the ring will not be impacted, therefore we should not touch
+	 * next_to_clean. The updated idx is used here
+	 */
+	txq->next_to_clean = idx;
+
+	return true;
+}
+
+/**
+ * idpf_tx_handle_rs_completion - clean a single packet and all of its buffers
+ * whether on the buffer ring or in the hash table
+ * @txq: Tx ring to clean
+ * @desc: pointer to completion queue descriptor to extract completion
+ * information from
+ * @cleaned: pointer to stats struct to track cleaned packets/bytes
+ * @budget: Used to determine if we are in netpoll
+ *
+ * Returns bytes/packets cleaned
+ */
+static void idpf_tx_handle_rs_completion(struct idpf_queue *txq,
+					 struct idpf_splitq_tx_compl_desc *desc,
+					 struct idpf_cleaned_stats *cleaned,
+					 int budget)
+{
+	u16 compl_tag;
+
+	if (!test_bit(__IDPF_Q_FLOW_SCH_EN, txq->flags)) {
+		u16 head = le16_to_cpu(desc->q_head_compl_tag.q_head);
+
+		return idpf_tx_splitq_clean(txq, head, budget, cleaned, false);
+	}
+
+	compl_tag = le16_to_cpu(desc->q_head_compl_tag.compl_tag);
+
+	/* If we didn't clean anything on the ring, this packet must be
+	 * in the hash table. Go clean it there.
+	 */
+	if (!idpf_tx_clean_buf_ring(txq, compl_tag, cleaned, budget))
+		idpf_tx_clean_stashed_bufs(txq, compl_tag, cleaned, budget);
+}
+
+/**
+ * idpf_tx_clean_complq - Reclaim resources on completion queue
+ * @complq: Tx ring to clean
+ * @budget: Used to determine if we are in netpoll
+ * @cleaned: returns number of packets cleaned
+ *
+ * Returns true if there's any budget left (e.g. the clean is finished)
+ */
+static bool idpf_tx_clean_complq(struct idpf_queue *complq, int budget,
+				 int *cleaned)
+{
+	struct idpf_splitq_tx_compl_desc *tx_desc;
+	struct idpf_vport *vport = complq->vport;
+	s16 ntc = complq->next_to_clean;
+	struct idpf_netdev_priv *np;
+	unsigned int complq_budget;
+	bool complq_ok = true;
+	int i;
+
+	complq_budget = vport->compln_clean_budget;
+	tx_desc = IDPF_SPLITQ_TX_COMPLQ_DESC(complq, ntc);
+	ntc -= complq->desc_count;
+
+	do {
+		struct idpf_cleaned_stats cleaned_stats = { };
+		struct idpf_queue *tx_q;
+		int rel_tx_qid;
+		u16 hw_head;
+		u8 ctype;	/* completion type */
+		u16 gen;
+
+		/* if the descriptor isn't done, no work yet to do */
+		gen = (le16_to_cpu(tx_desc->qid_comptype_gen) &
+		      IDPF_TXD_COMPLQ_GEN_M) >> IDPF_TXD_COMPLQ_GEN_S;
+		if (test_bit(__IDPF_Q_GEN_CHK, complq->flags) != gen)
+			break;
+
+		/* Find necessary info of TX queue to clean buffers */
+		rel_tx_qid = (le16_to_cpu(tx_desc->qid_comptype_gen) &
+			 IDPF_TXD_COMPLQ_QID_M) >> IDPF_TXD_COMPLQ_QID_S;
+		if (rel_tx_qid >= complq->txq_grp->num_txq ||
+		    !complq->txq_grp->txqs[rel_tx_qid]) {
+			dev_err(&complq->vport->adapter->pdev->dev,
+				"TxQ not found\n");
+			goto fetch_next_desc;
+		}
+		tx_q = complq->txq_grp->txqs[rel_tx_qid];
+
+		/* Determine completion type */
+		ctype = (le16_to_cpu(tx_desc->qid_comptype_gen) &
+			IDPF_TXD_COMPLQ_COMPL_TYPE_M) >>
+			IDPF_TXD_COMPLQ_COMPL_TYPE_S;
+		switch (ctype) {
+		case IDPF_TXD_COMPLT_RE:
+			hw_head = le16_to_cpu(tx_desc->q_head_compl_tag.q_head);
+
+			idpf_tx_splitq_clean(tx_q, hw_head, budget,
+					     &cleaned_stats, true);
+			break;
+		case IDPF_TXD_COMPLT_RS:
+			idpf_tx_handle_rs_completion(tx_q, tx_desc,
+						     &cleaned_stats, budget);
+			break;
+		case IDPF_TXD_COMPLT_SW_MARKER:
+			idpf_tx_handle_sw_marker(tx_q);
+			break;
+		default:
+			dev_err(&tx_q->vport->adapter->pdev->dev,
+				"Unknown TX completion type: %d\n",
+				ctype);
+			goto fetch_next_desc;
+		}
+
+		u64_stats_update_begin(&tx_q->stats_sync);
+		u64_stats_add(&tx_q->q_stats.tx.packets, cleaned_stats.packets);
+		u64_stats_add(&tx_q->q_stats.tx.bytes, cleaned_stats.bytes);
+		tx_q->cleaned_pkts += cleaned_stats.packets;
+		tx_q->cleaned_bytes += cleaned_stats.bytes;
+		complq->num_completions++;
+		u64_stats_update_end(&tx_q->stats_sync);
+
+fetch_next_desc:
+		tx_desc++;
+		ntc++;
+		if (unlikely(!ntc)) {
+			ntc -= complq->desc_count;
+			tx_desc = IDPF_SPLITQ_TX_COMPLQ_DESC(complq, 0);
+			change_bit(__IDPF_Q_GEN_CHK, complq->flags);
+		}
+
+		prefetch(tx_desc);
+
+		/* update budget accounting */
+		complq_budget--;
+	} while (likely(complq_budget));
+
+	/* Store the state of the complq to be used later in deciding if a
+	 * TXQ can be started again
+	 */
+	if (unlikely(IDPF_TX_COMPLQ_PENDING(complq->txq_grp) >
+		     IDPF_TX_COMPLQ_OVERFLOW_THRESH(complq)))
+		complq_ok = false;
+
+	np = netdev_priv(complq->vport->netdev);
+	for (i = 0; i < complq->txq_grp->num_txq; ++i) {
+		struct idpf_queue *tx_q = complq->txq_grp->txqs[i];
+		struct netdev_queue *nq;
+		bool dont_wake;
+
+		/* We didn't clean anything on this queue, move along */
+		if (!tx_q->cleaned_bytes)
+			continue;
+
+		*cleaned += tx_q->cleaned_pkts;
+
+		/* Update BQL */
+		nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx);
+
+		dont_wake = !complq_ok || IDPF_TX_BUF_RSV_LOW(tx_q) ||
+			    np->state != __IDPF_VPORT_UP ||
+			    !netif_carrier_ok(tx_q->vport->netdev);
+		/* Check if the TXQ needs to and can be restarted */
+		__netif_txq_completed_wake(nq, tx_q->cleaned_pkts, tx_q->cleaned_bytes,
+					   IDPF_DESC_UNUSED(tx_q), IDPF_TX_WAKE_THRESH,
+					   dont_wake);
+
+		/* Reset cleaned stats for the next time this queue is
+		 * cleaned
+		 */
+		tx_q->cleaned_bytes = 0;
+		tx_q->cleaned_pkts = 0;
+	}
+
+	ntc += complq->desc_count;
+	complq->next_to_clean = ntc;
+
+	return !!complq_budget;
+}
+
+/**
+ * idpf_tx_splitq_build_ctb - populate command tag and size for queue
+ * based scheduling descriptors
+ * @desc: descriptor to populate
+ * @params: pointer to tx params struct
+ * @td_cmd: command to be filled in desc
+ * @size: size of buffer
+ */
+void idpf_tx_splitq_build_ctb(union idpf_tx_flex_desc *desc,
+			      struct idpf_tx_splitq_params *params,
+			      u16 td_cmd, u16 size)
+{
+	desc->q.qw1.cmd_dtype =
+		cpu_to_le16(params->dtype & IDPF_FLEX_TXD_QW1_DTYPE_M);
+	desc->q.qw1.cmd_dtype |=
+		cpu_to_le16((td_cmd << IDPF_FLEX_TXD_QW1_CMD_S) &
+			    IDPF_FLEX_TXD_QW1_CMD_M);
+	desc->q.qw1.buf_size = cpu_to_le16((u16)size);
+	desc->q.qw1.l2tags.l2tag1 = cpu_to_le16(params->td_tag);
+}
+
+/**
+ * idpf_tx_splitq_build_flow_desc - populate command tag and size for flow
+ * scheduling descriptors
+ * @desc: descriptor to populate
+ * @params: pointer to tx params struct
+ * @td_cmd: command to be filled in desc
+ * @size: size of buffer
+ */
+void idpf_tx_splitq_build_flow_desc(union idpf_tx_flex_desc *desc,
+				    struct idpf_tx_splitq_params *params,
+				    u16 td_cmd, u16 size)
+{
+	desc->flow.qw1.cmd_dtype = (u16)params->dtype | td_cmd;
+	desc->flow.qw1.rxr_bufsize = cpu_to_le16((u16)size);
+	desc->flow.qw1.compl_tag = cpu_to_le16(params->compl_tag);
+}
+
+/**
+ * idpf_tx_maybe_stop_common - 1st level check for common Tx stop conditions
+ * @tx_q: the queue to be checked
+ * @size: number of descriptors we want to assure is available
+ *
+ * Returns 0 if stop is not needed
+ */
+int idpf_tx_maybe_stop_common(struct idpf_queue *tx_q, unsigned int size)
+{
+	struct netdev_queue *nq;
+
+	if (likely(IDPF_DESC_UNUSED(tx_q) >= size))
+		return 0;
+
+	u64_stats_update_begin(&tx_q->stats_sync);
+	u64_stats_inc(&tx_q->q_stats.tx.q_busy);
+	u64_stats_update_end(&tx_q->stats_sync);
+
+	nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx);
+
+	return netif_txq_maybe_stop(nq, IDPF_DESC_UNUSED(tx_q), size, size);
+}
+
+/**
+ * idpf_tx_maybe_stop_splitq - 1st level check for Tx splitq stop conditions
+ * @tx_q: the queue to be checked
+ * @descs_needed: number of descriptors required for this packet
+ *
+ * Returns 0 if stop is not needed
+ */
+static int idpf_tx_maybe_stop_splitq(struct idpf_queue *tx_q,
+				     unsigned int descs_needed)
+{
+	if (idpf_tx_maybe_stop_common(tx_q, descs_needed))
+		goto splitq_stop;
+
+	/* If there are too many outstanding completions expected on the
+	 * completion queue, stop the TX queue to give the device some time to
+	 * catch up
+	 */
+	if (unlikely(IDPF_TX_COMPLQ_PENDING(tx_q->txq_grp) >
+		     IDPF_TX_COMPLQ_OVERFLOW_THRESH(tx_q->txq_grp->complq)))
+		goto splitq_stop;
+
+	/* Also check for available book keeping buffers; if we are low, stop
+	 * the queue to wait for more completions
+	 */
+	if (unlikely(IDPF_TX_BUF_RSV_LOW(tx_q)))
+		goto splitq_stop;
+
+	return 0;
+
+splitq_stop:
+	u64_stats_update_begin(&tx_q->stats_sync);
+	u64_stats_inc(&tx_q->q_stats.tx.q_busy);
+	u64_stats_update_end(&tx_q->stats_sync);
+	netif_stop_subqueue(tx_q->vport->netdev, tx_q->idx);
+
+	return -EBUSY;
+}
+
+/**
+ * idpf_tx_buf_hw_update - Store the new tail value
+ * @tx_q: queue to bump
+ * @val: new tail index
+ * @xmit_more: more skb's pending
+ *
+ * The naming here is special in that 'hw' signals that this function is about
+ * to do a register write to update our queue status. We know this can only
+ * mean tail here as HW should be owning head for TX.
+ */
+void idpf_tx_buf_hw_update(struct idpf_queue *tx_q, u32 val,
+			   bool xmit_more)
+{
+	struct netdev_queue *nq;
+
+	nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx);
+	tx_q->next_to_use = val;
+
+	idpf_tx_maybe_stop_common(tx_q, IDPF_TX_DESC_NEEDED);
+
+	/* Force memory writes to complete before letting h/w
+	 * know there are new descriptors to fetch.  (Only
+	 * applicable for weak-ordered memory model archs,
+	 * such as IA-64).
+	 */
+	wmb();
+
+	/* notify HW of packet */
+	if (netif_xmit_stopped(nq) || !xmit_more)
+		writel(val, tx_q->tail);
+}
+
+/**
+ * idpf_tx_desc_count_required - calculate number of Tx descriptors needed
+ * @txq: queue to send buffer on
+ * @skb: send buffer
+ *
+ * Returns number of data descriptors needed for this skb.
+ */
+unsigned int idpf_tx_desc_count_required(struct idpf_queue *txq,
+					 struct sk_buff *skb)
+{
+	const struct skb_shared_info *shinfo;
+	unsigned int count = 0, i;
+
+	count += !!skb_headlen(skb);
+
+	if (!skb_is_nonlinear(skb))
+		return count;
+
+	shinfo = skb_shinfo(skb);
+	for (i = 0; i < shinfo->nr_frags; i++) {
+		unsigned int size;
+
+		size = skb_frag_size(&shinfo->frags[i]);
+
+		/* We only need to use the idpf_size_to_txd_count check if the
+		 * fragment is going to span multiple descriptors,
+		 * i.e. size >= 16K.
+		 */
+		if (size >= SZ_16K)
+			count += idpf_size_to_txd_count(size);
+		else
+			count++;
+	}
+
+	if (idpf_chk_linearize(skb, txq->tx_max_bufs, count)) {
+		if (__skb_linearize(skb))
+			return 0;
+
+		count = idpf_size_to_txd_count(skb->len);
+		u64_stats_update_begin(&txq->stats_sync);
+		u64_stats_inc(&txq->q_stats.tx.linearize);
+		u64_stats_update_end(&txq->stats_sync);
+	}
+
+	return count;
+}
+
+/**
+ * idpf_tx_dma_map_error - handle TX DMA map errors
+ * @txq: queue to send buffer on
+ * @skb: send buffer
+ * @first: original first buffer info buffer for packet
+ * @idx: starting point on ring to unwind
+ */
+void idpf_tx_dma_map_error(struct idpf_queue *txq, struct sk_buff *skb,
+			   struct idpf_tx_buf *first, u16 idx)
+{
+	u64_stats_update_begin(&txq->stats_sync);
+	u64_stats_inc(&txq->q_stats.tx.dma_map_errs);
+	u64_stats_update_end(&txq->stats_sync);
+
+	/* clear dma mappings for failed tx_buf map */
+	for (;;) {
+		struct idpf_tx_buf *tx_buf;
+
+		tx_buf = &txq->tx_buf[idx];
+		idpf_tx_buf_rel(txq, tx_buf);
+		if (tx_buf == first)
+			break;
+		if (idx == 0)
+			idx = txq->desc_count;
+		idx--;
+	}
+
+	if (skb_is_gso(skb)) {
+		union idpf_tx_flex_desc *tx_desc;
+
+		/* If we failed a DMA mapping for a TSO packet, we will have
+		 * used one additional descriptor for a context
+		 * descriptor. Reset that here.
+		 */
+		tx_desc = IDPF_FLEX_TX_DESC(txq, idx);
+		memset(tx_desc, 0, sizeof(struct idpf_flex_tx_ctx_desc));
+		if (idx == 0)
+			idx = txq->desc_count;
+		idx--;
+	}
+
+	/* Update tail in case netdev_xmit_more was previously true */
+	idpf_tx_buf_hw_update(txq, idx, false);
+}
+
+/**
+ * idpf_tx_splitq_bump_ntu - adjust NTU and generation
+ * @txq: the tx ring to wrap
+ * @ntu: ring index to bump
+ */
+static unsigned int idpf_tx_splitq_bump_ntu(struct idpf_queue *txq, u16 ntu)
+{
+	ntu++;
+
+	if (ntu == txq->desc_count) {
+		ntu = 0;
+		txq->compl_tag_cur_gen = IDPF_TX_ADJ_COMPL_TAG_GEN(txq);
+	}
+
+	return ntu;
+}
+
+/**
+ * idpf_tx_splitq_map - Build the Tx flex descriptor
+ * @tx_q: queue to send buffer on
+ * @params: pointer to splitq params struct
+ * @first: first buffer info buffer to use
+ *
+ * This function loops over the skb data pointed to by *first
+ * and gets a physical address for each memory location and programs
+ * it and the length into the transmit flex descriptor.
+ */
+static void idpf_tx_splitq_map(struct idpf_queue *tx_q,
+			       struct idpf_tx_splitq_params *params,
+			       struct idpf_tx_buf *first)
+{
+	union idpf_tx_flex_desc *tx_desc;
+	unsigned int data_len, size;
+	struct idpf_tx_buf *tx_buf;
+	u16 i = tx_q->next_to_use;
+	struct netdev_queue *nq;
+	struct sk_buff *skb;
+	skb_frag_t *frag;
+	u16 td_cmd = 0;
+	dma_addr_t dma;
+
+	skb = first->skb;
+
+	td_cmd = params->offload.td_cmd;
+
+	data_len = skb->data_len;
+	size = skb_headlen(skb);
+
+	tx_desc = IDPF_FLEX_TX_DESC(tx_q, i);
+
+	dma = dma_map_single(tx_q->dev, skb->data, size, DMA_TO_DEVICE);
+
+	tx_buf = first;
+
+	params->compl_tag =
+		(tx_q->compl_tag_cur_gen << tx_q->compl_tag_gen_s) | i;
+
+	for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
+		unsigned int max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED;
+
+		if (dma_mapping_error(tx_q->dev, dma))
+			return idpf_tx_dma_map_error(tx_q, skb, first, i);
+
+		tx_buf->compl_tag = params->compl_tag;
+
+		/* record length, and DMA address */
+		dma_unmap_len_set(tx_buf, len, size);
+		dma_unmap_addr_set(tx_buf, dma, dma);
+
+		/* buf_addr is in same location for both desc types */
+		tx_desc->q.buf_addr = cpu_to_le64(dma);
+
+		/* The stack can send us fragments that are too large for a
+		 * single descriptor i.e. frag size > 16K-1. We will need to
+		 * split the fragment across multiple descriptors in this case.
+		 * To adhere to HW alignment restrictions, the fragment needs
+		 * to be split such that the first chunk ends on a 4K boundary
+		 * and all subsequent chunks start on a 4K boundary. We still
+		 * want to send as much data as possible though, so our
+		 * intermediate descriptor chunk size will be 12K.
+		 *
+		 * For example, consider a 32K fragment mapped to DMA addr 2600.
+		 * ------------------------------------------------------------
+		 * |                    frag_size = 32K                       |
+		 * ------------------------------------------------------------
+		 * |2600		  |16384	    |28672
+		 *
+		 * 3 descriptors will be used for this fragment. The HW expects
+		 * the descriptors to contain the following:
+		 * ------------------------------------------------------------
+		 * | size = 13784         | size = 12K      | size = 6696     |
+		 * | dma = 2600           | dma = 16384     | dma = 28672     |
+		 * ------------------------------------------------------------
+		 *
+		 * We need to first adjust the max_data for the first chunk so
+		 * that it ends on a 4K boundary. By negating the value of the
+		 * DMA address and taking only the low order bits, we're
+		 * effectively calculating
+		 *	4K - (DMA addr lower order bits) =
+		 *				bytes to next boundary.
+		 *
+		 * Add that to our base aligned max_data (12K) and we have
+		 * our first chunk size. In the example above,
+		 *	13784 = 12K + (4096-2600)
+		 *
+		 * After guaranteeing the first chunk ends on a 4K boundary, we
+		 * will give the intermediate descriptors 12K chunks and
+		 * whatever is left to the final descriptor. This ensures that
+		 * all descriptors used for the remaining chunks of the
+		 * fragment start on a 4K boundary and we use as few
+		 * descriptors as possible.
+		 */
+		max_data += -dma & (IDPF_TX_MAX_READ_REQ_SIZE - 1);
+		while (unlikely(size > IDPF_TX_MAX_DESC_DATA)) {
+			idpf_tx_splitq_build_desc(tx_desc, params, td_cmd,
+						  max_data);
+
+			tx_desc++;
+			i++;
+
+			if (i == tx_q->desc_count) {
+				tx_desc = IDPF_FLEX_TX_DESC(tx_q, 0);
+				i = 0;
+				tx_q->compl_tag_cur_gen =
+					IDPF_TX_ADJ_COMPL_TAG_GEN(tx_q);
+			}
+
+			/* Since this packet has a buffer that is going to span
+			 * multiple descriptors, it's going to leave holes in
+			 * to the TX buffer ring. To ensure these holes do not
+			 * cause issues in the cleaning routines, we will clear
+			 * them of any stale data and assign them the same
+			 * completion tag as the current packet. Then when the
+			 * packet is being cleaned, the cleaning routines will
+			 * simply pass over these holes and finish cleaning the
+			 * rest of the packet.
+			 */
+			memset(&tx_q->tx_buf[i], 0, sizeof(struct idpf_tx_buf));
+			tx_q->tx_buf[i].compl_tag = params->compl_tag;
+
+			/* Adjust the DMA offset and the remaining size of the
+			 * fragment.  On the first iteration of this loop,
+			 * max_data will be >= 12K and <= 16K-1.  On any
+			 * subsequent iteration of this loop, max_data will
+			 * always be 12K.
+			 */
+			dma += max_data;
+			size -= max_data;
+
+			/* Reset max_data since remaining chunks will be 12K
+			 * at most
+			 */
+			max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED;
+
+			/* buf_addr is in same location for both desc types */
+			tx_desc->q.buf_addr = cpu_to_le64(dma);
+		}
+
+		if (!data_len)
+			break;
+
+		idpf_tx_splitq_build_desc(tx_desc, params, td_cmd, size);
+		tx_desc++;
+		i++;
+
+		if (i == tx_q->desc_count) {
+			tx_desc = IDPF_FLEX_TX_DESC(tx_q, 0);
+			i = 0;
+			tx_q->compl_tag_cur_gen = IDPF_TX_ADJ_COMPL_TAG_GEN(tx_q);
+		}
+
+		size = skb_frag_size(frag);
+		data_len -= size;
+
+		dma = skb_frag_dma_map(tx_q->dev, frag, 0, size,
+				       DMA_TO_DEVICE);
+
+		tx_buf = &tx_q->tx_buf[i];
+	}
+
+	/* record SW timestamp if HW timestamp is not available */
+	skb_tx_timestamp(skb);
+
+	/* write last descriptor with RS and EOP bits */
+	td_cmd |= params->eop_cmd;
+	idpf_tx_splitq_build_desc(tx_desc, params, td_cmd, size);
+	i = idpf_tx_splitq_bump_ntu(tx_q, i);
+
+	/* set next_to_watch value indicating a packet is present */
+	first->next_to_watch = tx_desc;
+
+	tx_q->txq_grp->num_completions_pending++;
+
+	/* record bytecount for BQL */
+	nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx);
+	netdev_tx_sent_queue(nq, first->bytecount);
+
+	idpf_tx_buf_hw_update(tx_q, i, netdev_xmit_more());
+}
+
+/**
+ * idpf_tso - computes mss and TSO length to prepare for TSO
+ * @skb: pointer to skb
+ * @off: pointer to struct that holds offload parameters
+ *
+ * Returns error (negative) if TSO was requested but cannot be applied to the
+ * given skb, 0 if TSO does not apply to the given skb, or 1 otherwise.
+ */
+int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off)
+{
+	const struct skb_shared_info *shinfo = skb_shinfo(skb);
+	union {
+		struct iphdr *v4;
+		struct ipv6hdr *v6;
+		unsigned char *hdr;
+	} ip;
+	union {
+		struct tcphdr *tcp;
+		struct udphdr *udp;
+		unsigned char *hdr;
+	} l4;
+	u32 paylen, l4_start;
+	int err;
+
+	if (!shinfo->gso_size)
+		return 0;
+
+	err = skb_cow_head(skb, 0);
+	if (err < 0)
+		return err;
+
+	ip.hdr = skb_network_header(skb);
+	l4.hdr = skb_transport_header(skb);
+
+	/* initialize outer IP header fields */
+	if (ip.v4->version == 4) {
+		ip.v4->tot_len = 0;
+		ip.v4->check = 0;
+	} else if (ip.v6->version == 6) {
+		ip.v6->payload_len = 0;
+	}
+
+	l4_start = skb_transport_offset(skb);
+
+	/* remove payload length from checksum */
+	paylen = skb->len - l4_start;
+
+	switch (shinfo->gso_type & ~SKB_GSO_DODGY) {
+	case SKB_GSO_TCPV4:
+	case SKB_GSO_TCPV6:
+		csum_replace_by_diff(&l4.tcp->check,
+				     (__force __wsum)htonl(paylen));
+		off->tso_hdr_len = __tcp_hdrlen(l4.tcp) + l4_start;
+		break;
+	case SKB_GSO_UDP_L4:
+		csum_replace_by_diff(&l4.udp->check,
+				     (__force __wsum)htonl(paylen));
+		/* compute length of segmentation header */
+		off->tso_hdr_len = sizeof(struct udphdr) + l4_start;
+		l4.udp->len = htons(shinfo->gso_size + sizeof(struct udphdr));
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	off->tso_len = skb->len - off->tso_hdr_len;
+	off->mss = shinfo->gso_size;
+	off->tso_segs = shinfo->gso_segs;
+
+	off->tx_flags |= IDPF_TX_FLAGS_TSO;
+
+	return 1;
+}
+
+/**
+ * __idpf_chk_linearize - Check skb is not using too many buffers
+ * @skb: send buffer
+ * @max_bufs: maximum number of buffers
+ *
+ * For TSO we need to count the TSO header and segment payload separately.  As
+ * such we need to check cases where we have max_bufs-1 fragments or more as we
+ * can potentially require max_bufs+1 DMA transactions, 1 for the TSO header, 1
+ * for the segment payload in the first descriptor, and another max_buf-1 for
+ * the fragments.
+ */
+static bool __idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs)
+{
+	const struct skb_shared_info *shinfo = skb_shinfo(skb);
+	const skb_frag_t *frag, *stale;
+	int nr_frags, sum;
+
+	/* no need to check if number of frags is less than max_bufs - 1 */
+	nr_frags = shinfo->nr_frags;
+	if (nr_frags < (max_bufs - 1))
+		return false;
+
+	/* We need to walk through the list and validate that each group
+	 * of max_bufs-2 fragments totals at least gso_size.
+	 */
+	nr_frags -= max_bufs - 2;
+	frag = &shinfo->frags[0];
+
+	/* Initialize size to the negative value of gso_size minus 1.  We use
+	 * this as the worst case scenario in which the frag ahead of us only
+	 * provides one byte which is why we are limited to max_bufs-2
+	 * descriptors for a single transmit as the header and previous
+	 * fragment are already consuming 2 descriptors.
+	 */
+	sum = 1 - shinfo->gso_size;
+
+	/* Add size of frags 0 through 4 to create our initial sum */
+	sum += skb_frag_size(frag++);
+	sum += skb_frag_size(frag++);
+	sum += skb_frag_size(frag++);
+	sum += skb_frag_size(frag++);
+	sum += skb_frag_size(frag++);
+
+	/* Walk through fragments adding latest fragment, testing it, and
+	 * then removing stale fragments from the sum.
+	 */
+	for (stale = &shinfo->frags[0];; stale++) {
+		int stale_size = skb_frag_size(stale);
+
+		sum += skb_frag_size(frag++);
+
+		/* The stale fragment may present us with a smaller
+		 * descriptor than the actual fragment size. To account
+		 * for that we need to remove all the data on the front and
+		 * figure out what the remainder would be in the last
+		 * descriptor associated with the fragment.
+		 */
+		if (stale_size > IDPF_TX_MAX_DESC_DATA) {
+			int align_pad = -(skb_frag_off(stale)) &
+					(IDPF_TX_MAX_READ_REQ_SIZE - 1);
+
+			sum -= align_pad;
+			stale_size -= align_pad;
+
+			do {
+				sum -= IDPF_TX_MAX_DESC_DATA_ALIGNED;
+				stale_size -= IDPF_TX_MAX_DESC_DATA_ALIGNED;
+			} while (stale_size > IDPF_TX_MAX_DESC_DATA);
+		}
+
+		/* if sum is negative we failed to make sufficient progress */
+		if (sum < 0)
+			return true;
+
+		if (!nr_frags--)
+			break;
+
+		sum -= stale_size;
+	}
+
+	return false;
+}
+
+/**
+ * idpf_chk_linearize - Check if skb exceeds max descriptors per packet
+ * @skb: send buffer
+ * @max_bufs: maximum scatter gather buffers for single packet
+ * @count: number of buffers this packet needs
+ *
+ * Make sure we don't exceed maximum scatter gather buffers for a single
+ * packet. We have to do some special checking around the boundary (max_bufs-1)
+ * if TSO is on since we need count the TSO header and payload separately.
+ * E.g.: a packet with 7 fragments can require 9 DMA transactions; 1 for TSO
+ * header, 1 for segment payload, and then 7 for the fragments.
+ */
+bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs,
+			unsigned int count)
+{
+	if (likely(count < max_bufs))
+		return false;
+	if (skb_is_gso(skb))
+		return __idpf_chk_linearize(skb, max_bufs);
+
+	return count > max_bufs;
+}
+
+/**
+ * idpf_tx_splitq_get_ctx_desc - grab next desc and update buffer ring
+ * @txq: queue to put context descriptor on
+ *
+ * Since the TX buffer rings mimics the descriptor ring, update the tx buffer
+ * ring entry to reflect that this index is a context descriptor
+ */
+static struct idpf_flex_tx_ctx_desc *
+idpf_tx_splitq_get_ctx_desc(struct idpf_queue *txq)
+{
+	struct idpf_flex_tx_ctx_desc *desc;
+	int i = txq->next_to_use;
+
+	memset(&txq->tx_buf[i], 0, sizeof(struct idpf_tx_buf));
+	txq->tx_buf[i].compl_tag = IDPF_SPLITQ_TX_INVAL_COMPL_TAG;
+
+	/* grab the next descriptor */
+	desc = IDPF_FLEX_TX_CTX_DESC(txq, i);
+	txq->next_to_use = idpf_tx_splitq_bump_ntu(txq, i);
+
+	return desc;
+}
+
+/**
+ * idpf_tx_drop_skb - free the SKB and bump tail if necessary
+ * @tx_q: queue to send buffer on
+ * @skb: pointer to skb
+ */
+netdev_tx_t idpf_tx_drop_skb(struct idpf_queue *tx_q, struct sk_buff *skb)
+{
+	u64_stats_update_begin(&tx_q->stats_sync);
+	u64_stats_inc(&tx_q->q_stats.tx.skb_drops);
+	u64_stats_update_end(&tx_q->stats_sync);
+
+	idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false);
+
+	dev_kfree_skb(skb);
+
+	return NETDEV_TX_OK;
+}
+
+/**
+ * idpf_tx_splitq_frame - Sends buffer on Tx ring using flex descriptors
+ * @skb: send buffer
+ * @tx_q: queue to send buffer on
+ *
+ * Returns NETDEV_TX_OK if sent, else an error code
+ */
+static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb,
+					struct idpf_queue *tx_q)
+{
+	struct idpf_tx_splitq_params tx_params = { };
+	struct idpf_tx_buf *first;
+	unsigned int count;
+	int tso;
+
+	count = idpf_tx_desc_count_required(tx_q, skb);
+	if (unlikely(!count))
+		return idpf_tx_drop_skb(tx_q, skb);
+
+	tso = idpf_tso(skb, &tx_params.offload);
+	if (unlikely(tso < 0))
+		return idpf_tx_drop_skb(tx_q, skb);
+
+	/* Check for splitq specific TX resources */
+	count += (IDPF_TX_DESCS_PER_CACHE_LINE + tso);
+	if (idpf_tx_maybe_stop_splitq(tx_q, count)) {
+		idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false);
+
+		return NETDEV_TX_BUSY;
+	}
+
+	if (tso) {
+		/* If tso is needed, set up context desc */
+		struct idpf_flex_tx_ctx_desc *ctx_desc =
+			idpf_tx_splitq_get_ctx_desc(tx_q);
+
+		ctx_desc->tso.qw1.cmd_dtype =
+				cpu_to_le16(IDPF_TX_DESC_DTYPE_FLEX_TSO_CTX |
+					    IDPF_TX_FLEX_CTX_DESC_CMD_TSO);
+		ctx_desc->tso.qw0.flex_tlen =
+				cpu_to_le32(tx_params.offload.tso_len &
+					    IDPF_TXD_FLEX_CTX_TLEN_M);
+		ctx_desc->tso.qw0.mss_rt =
+				cpu_to_le16(tx_params.offload.mss &
+					    IDPF_TXD_FLEX_CTX_MSS_RT_M);
+		ctx_desc->tso.qw0.hdr_len = tx_params.offload.tso_hdr_len;
+
+		u64_stats_update_begin(&tx_q->stats_sync);
+		u64_stats_inc(&tx_q->q_stats.tx.lso_pkts);
+		u64_stats_update_end(&tx_q->stats_sync);
+	}
+
+	/* record the location of the first descriptor for this packet */
+	first = &tx_q->tx_buf[tx_q->next_to_use];
+	first->skb = skb;
+
+	if (tso) {
+		first->gso_segs = tx_params.offload.tso_segs;
+		first->bytecount = skb->len +
+			((first->gso_segs - 1) * tx_params.offload.tso_hdr_len);
+	} else {
+		first->gso_segs = 1;
+		first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN);
+	}
+
+	if (test_bit(__IDPF_Q_FLOW_SCH_EN, tx_q->flags)) {
+		tx_params.dtype = IDPF_TX_DESC_DTYPE_FLEX_FLOW_SCHE;
+		tx_params.eop_cmd = IDPF_TXD_FLEX_FLOW_CMD_EOP;
+		/* Set the RE bit to catch any packets that may have not been
+		 * stashed during RS completion cleaning. MIN_GAP is set to
+		 * MIN_RING size to ensure it will be set at least once each
+		 * time around the ring.
+		 */
+		if (!(tx_q->next_to_use % IDPF_TX_SPLITQ_RE_MIN_GAP)) {
+			tx_params.eop_cmd |= IDPF_TXD_FLEX_FLOW_CMD_RE;
+			tx_q->txq_grp->num_completions_pending++;
+		}
+
+		if (skb->ip_summed == CHECKSUM_PARTIAL)
+			tx_params.offload.td_cmd |= IDPF_TXD_FLEX_FLOW_CMD_CS_EN;
+
+	} else {
+		tx_params.dtype = IDPF_TX_DESC_DTYPE_FLEX_L2TAG1_L2TAG2;
+		tx_params.eop_cmd = IDPF_TXD_LAST_DESC_CMD;
+
+		if (skb->ip_summed == CHECKSUM_PARTIAL)
+			tx_params.offload.td_cmd |= IDPF_TX_FLEX_DESC_CMD_CS_EN;
+	}
+
+	idpf_tx_splitq_map(tx_q, &tx_params, first);
+
+	return NETDEV_TX_OK;
+}
+
+/**
+ * idpf_tx_splitq_start - Selects the right Tx queue to send buffer
+ * @skb: send buffer
+ * @netdev: network interface device structure
+ *
+ * Returns NETDEV_TX_OK if sent, else an error code
+ */
+netdev_tx_t idpf_tx_splitq_start(struct sk_buff *skb,
+				 struct net_device *netdev)
+{
+	struct idpf_vport *vport = idpf_netdev_to_vport(netdev);
+	struct idpf_queue *tx_q;
+
+	if (unlikely(skb_get_queue_mapping(skb) >= vport->num_txq)) {
+		dev_kfree_skb_any(skb);
+
+		return NETDEV_TX_OK;
+	}
+
+	tx_q = vport->txqs[skb_get_queue_mapping(skb)];
+
+	/* hardware can't handle really short frames, hardware padding works
+	 * beyond this point
+	 */
+	if (skb_put_padto(skb, tx_q->tx_min_pkt_len)) {
+		idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false);
+
+		return NETDEV_TX_OK;
+	}
+
+	return idpf_tx_splitq_frame(skb, tx_q);
+}
+
+/**
+ * idpf_ptype_to_htype - get a hash type
+ * @decoded: Decoded Rx packet type related fields
+ *
+ * Returns appropriate hash type (such as PKT_HASH_TYPE_L2/L3/L4) to be used by
+ * skb_set_hash based on PTYPE as parsed by HW Rx pipeline and is part of
+ * Rx desc.
+ */
+enum pkt_hash_types idpf_ptype_to_htype(const struct idpf_rx_ptype_decoded *decoded)
+{
+	if (!decoded->known)
+		return PKT_HASH_TYPE_NONE;
+	if (decoded->payload_layer == IDPF_RX_PTYPE_PAYLOAD_LAYER_PAY2 &&
+	    decoded->inner_prot)
+		return PKT_HASH_TYPE_L4;
+	if (decoded->payload_layer == IDPF_RX_PTYPE_PAYLOAD_LAYER_PAY2 &&
+	    decoded->outer_ip)
+		return PKT_HASH_TYPE_L3;
+	if (decoded->outer_ip == IDPF_RX_PTYPE_OUTER_L2)
+		return PKT_HASH_TYPE_L2;
+
+	return PKT_HASH_TYPE_NONE;
+}
+
+/**
+ * idpf_rx_hash - set the hash value in the skb
+ * @rxq: Rx descriptor ring packet is being transacted on
+ * @skb: pointer to current skb being populated
+ * @rx_desc: Receive descriptor
+ * @decoded: Decoded Rx packet type related fields
+ */
+static void idpf_rx_hash(struct idpf_queue *rxq, struct sk_buff *skb,
+			 struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc,
+			 struct idpf_rx_ptype_decoded *decoded)
+{
+	u32 hash;
+
+	if (unlikely(!idpf_is_feature_ena(rxq->vport, NETIF_F_RXHASH)))
+		return;
+
+	hash = le16_to_cpu(rx_desc->hash1) |
+	       (rx_desc->ff2_mirrid_hash2.hash2 << 16) |
+	       (rx_desc->hash3 << 24);
+
+	skb_set_hash(skb, hash, idpf_ptype_to_htype(decoded));
+}
+
+/**
+ * idpf_rx_csum - Indicate in skb if checksum is good
+ * @rxq: Rx descriptor ring packet is being transacted on
+ * @skb: pointer to current skb being populated
+ * @csum_bits: checksum fields extracted from the descriptor
+ * @decoded: Decoded Rx packet type related fields
+ *
+ * skb->protocol must be set before this function is called
+ */
+static void idpf_rx_csum(struct idpf_queue *rxq, struct sk_buff *skb,
+			 struct idpf_rx_csum_decoded *csum_bits,
+			 struct idpf_rx_ptype_decoded *decoded)
+{
+	bool ipv4, ipv6;
+
+	/* check if Rx checksum is enabled */
+	if (unlikely(!idpf_is_feature_ena(rxq->vport, NETIF_F_RXCSUM)))
+		return;
+
+	/* check if HW has decoded the packet and checksum */
+	if (!(csum_bits->l3l4p))
+		return;
+
+	ipv4 = IDPF_RX_PTYPE_TO_IPV(decoded, IDPF_RX_PTYPE_OUTER_IPV4);
+	ipv6 = IDPF_RX_PTYPE_TO_IPV(decoded, IDPF_RX_PTYPE_OUTER_IPV6);
+
+	if (ipv4 && (csum_bits->ipe || csum_bits->eipe))
+		goto checksum_fail;
+
+	if (ipv6 && csum_bits->ipv6exadd)
+		return;
+
+	/* check for L4 errors and handle packets that were not able to be
+	 * checksummed
+	 */
+	if (csum_bits->l4e)
+		goto checksum_fail;
+
+	/* Only report checksum unnecessary for ICMP, TCP, UDP, or SCTP */
+	switch (decoded->inner_prot) {
+	case IDPF_RX_PTYPE_INNER_PROT_ICMP:
+	case IDPF_RX_PTYPE_INNER_PROT_TCP:
+	case IDPF_RX_PTYPE_INNER_PROT_UDP:
+		if (!csum_bits->raw_csum_inv) {
+			u16 csum = csum_bits->raw_csum;
+
+			skb->csum = csum_unfold((__force __sum16)~swab16(csum));
+			skb->ip_summed = CHECKSUM_COMPLETE;
+		} else {
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+		}
+		break;
+	case IDPF_RX_PTYPE_INNER_PROT_SCTP:
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		break;
+	default:
+		break;
+	}
+
+	return;
+
+checksum_fail:
+	u64_stats_update_begin(&rxq->stats_sync);
+	u64_stats_inc(&rxq->q_stats.rx.hw_csum_err);
+	u64_stats_update_end(&rxq->stats_sync);
+}
+
+/**
+ * idpf_rx_splitq_extract_csum_bits - Extract checksum bits from descriptor
+ * @rx_desc: receive descriptor
+ * @csum: structure to extract checksum fields
+ *
+ **/
+static void idpf_rx_splitq_extract_csum_bits(struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc,
+					     struct idpf_rx_csum_decoded *csum)
+{
+	u8 qword0, qword1;
+
+	qword0 = rx_desc->status_err0_qw0;
+	qword1 = rx_desc->status_err0_qw1;
+
+	csum->ipe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_IPE_M,
+			      qword1);
+	csum->eipe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EIPE_M,
+			       qword1);
+	csum->l4e = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_L4E_M,
+			      qword1);
+	csum->l3l4p = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_L3L4P_M,
+				qword1);
+	csum->ipv6exadd = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_IPV6EXADD_M,
+				    qword0);
+	csum->raw_csum_inv = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_RAW_CSUM_INV_M,
+				       le16_to_cpu(rx_desc->ptype_err_fflags0));
+	csum->raw_csum = le16_to_cpu(rx_desc->misc.raw_cs);
+}
+
+/**
+ * idpf_rx_rsc - Set the RSC fields in the skb
+ * @rxq : Rx descriptor ring packet is being transacted on
+ * @skb : pointer to current skb being populated
+ * @rx_desc: Receive descriptor
+ * @decoded: Decoded Rx packet type related fields
+ *
+ * Return 0 on success and error code on failure
+ *
+ * Populate the skb fields with the total number of RSC segments, RSC payload
+ * length and packet type.
+ */
+static int idpf_rx_rsc(struct idpf_queue *rxq, struct sk_buff *skb,
+		       struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc,
+		       struct idpf_rx_ptype_decoded *decoded)
+{
+	u16 rsc_segments, rsc_seg_len;
+	bool ipv4, ipv6;
+	int len;
+
+	if (unlikely(!decoded->outer_ip))
+		return -EINVAL;
+
+	rsc_seg_len = le16_to_cpu(rx_desc->misc.rscseglen);
+	if (unlikely(!rsc_seg_len))
+		return -EINVAL;
+
+	ipv4 = IDPF_RX_PTYPE_TO_IPV(decoded, IDPF_RX_PTYPE_OUTER_IPV4);
+	ipv6 = IDPF_RX_PTYPE_TO_IPV(decoded, IDPF_RX_PTYPE_OUTER_IPV6);
+
+	if (unlikely(!(ipv4 ^ ipv6)))
+		return -EINVAL;
+
+	rsc_segments = DIV_ROUND_UP(skb->data_len, rsc_seg_len);
+	if (unlikely(rsc_segments == 1))
+		return 0;
+
+	NAPI_GRO_CB(skb)->count = rsc_segments;
+	skb_shinfo(skb)->gso_size = rsc_seg_len;
+
+	skb_reset_network_header(skb);
+	len = skb->len - skb_transport_offset(skb);
+
+	if (ipv4) {
+		struct iphdr *ipv4h = ip_hdr(skb);
+
+		skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+
+		/* Reset and set transport header offset in skb */
+		skb_set_transport_header(skb, sizeof(struct iphdr));
+
+		/* Compute the TCP pseudo header checksum*/
+		tcp_hdr(skb)->check =
+			~tcp_v4_check(len, ipv4h->saddr, ipv4h->daddr, 0);
+	} else {
+		struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+
+		skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
+		skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+		tcp_hdr(skb)->check =
+			~tcp_v6_check(len, &ipv6h->saddr, &ipv6h->daddr, 0);
+	}
+
+	tcp_gro_complete(skb);
+
+	u64_stats_update_begin(&rxq->stats_sync);
+	u64_stats_inc(&rxq->q_stats.rx.rsc_pkts);
+	u64_stats_update_end(&rxq->stats_sync);
+
+	return 0;
+}
+
+/**
+ * idpf_rx_process_skb_fields - Populate skb header fields from Rx descriptor
+ * @rxq: Rx descriptor ring packet is being transacted on
+ * @skb: pointer to current skb being populated
+ * @rx_desc: Receive descriptor
+ *
+ * This function checks the ring, descriptor, and packet information in
+ * order to populate the hash, checksum, protocol, and
+ * other fields within the skb.
+ */
+static int idpf_rx_process_skb_fields(struct idpf_queue *rxq,
+				      struct sk_buff *skb,
+				      struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc)
+{
+	struct idpf_rx_csum_decoded csum_bits = { };
+	struct idpf_rx_ptype_decoded decoded;
+	u16 rx_ptype;
+
+	rx_ptype = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_M,
+			     le16_to_cpu(rx_desc->ptype_err_fflags0));
+
+	decoded = rxq->vport->rx_ptype_lkup[rx_ptype];
+	/* If we don't know the ptype we can't do anything else with it. Just
+	 * pass it up the stack as-is.
+	 */
+	if (!decoded.known)
+		return 0;
+
+	/* process RSS/hash */
+	idpf_rx_hash(rxq, skb, rx_desc, &decoded);
+
+	skb->protocol = eth_type_trans(skb, rxq->vport->netdev);
+
+	if (FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_M,
+		      le16_to_cpu(rx_desc->hdrlen_flags)))
+		return idpf_rx_rsc(rxq, skb, rx_desc, &decoded);
+
+	idpf_rx_splitq_extract_csum_bits(rx_desc, &csum_bits);
+	idpf_rx_csum(rxq, skb, &csum_bits, &decoded);
+
+	return 0;
+}
+
+/**
+ * idpf_rx_add_frag - Add contents of Rx buffer to sk_buff as a frag
+ * @rx_buf: buffer containing page to add
+ * @skb: sk_buff to place the data into
+ * @size: packet length from rx_desc
+ *
+ * This function will add the data contained in rx_buf->page to the skb.
+ * It will just attach the page as a frag to the skb.
+ * The function will then update the page offset.
+ */
+void idpf_rx_add_frag(struct idpf_rx_buf *rx_buf, struct sk_buff *skb,
+		      unsigned int size)
+{
+	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buf->page,
+			rx_buf->page_offset, size, rx_buf->truesize);
+
+	rx_buf->page = NULL;
+}
+
+/**
+ * idpf_rx_construct_skb - Allocate skb and populate it
+ * @rxq: Rx descriptor queue
+ * @rx_buf: Rx buffer to pull data from
+ * @size: the length of the packet
+ *
+ * This function allocates an skb. It then populates it with the page
+ * data from the current receive descriptor, taking care to set up the
+ * skb correctly.
+ */
+struct sk_buff *idpf_rx_construct_skb(struct idpf_queue *rxq,
+				      struct idpf_rx_buf *rx_buf,
+				      unsigned int size)
+{
+	unsigned int headlen;
+	struct sk_buff *skb;
+	void *va;
+
+	va = page_address(rx_buf->page) + rx_buf->page_offset;
+
+	/* prefetch first cache line of first page */
+	net_prefetch(va);
+	/* allocate a skb to store the frags */
+	skb = __napi_alloc_skb(&rxq->q_vector->napi, IDPF_RX_HDR_SIZE,
+			       GFP_ATOMIC);
+	if (unlikely(!skb)) {
+		idpf_rx_put_page(rx_buf);
+
+		return NULL;
+	}
+
+	skb_record_rx_queue(skb, rxq->idx);
+	skb_mark_for_recycle(skb);
+
+	/* Determine available headroom for copy */
+	headlen = size;
+	if (headlen > IDPF_RX_HDR_SIZE)
+		headlen = eth_get_headlen(skb->dev, va, IDPF_RX_HDR_SIZE);
+
+	/* align pull length to size of long to optimize memcpy performance */
+	memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
+
+	/* if we exhaust the linear part then add what is left as a frag */
+	size -= headlen;
+	if (!size) {
+		idpf_rx_put_page(rx_buf);
+
+		return skb;
+	}
+
+	skb_add_rx_frag(skb, 0, rx_buf->page, rx_buf->page_offset + headlen,
+			size, rx_buf->truesize);
+
+	/* Since we're giving the page to the stack, clear our reference to it.
+	 * We'll get a new one during buffer posting.
+	 */
+	rx_buf->page = NULL;
+
+	return skb;
+}
+
+/**
+ * idpf_rx_hdr_construct_skb - Allocate skb and populate it from header buffer
+ * @rxq: Rx descriptor queue
+ * @va: Rx buffer to pull data from
+ * @size: the length of the packet
+ *
+ * This function allocates an skb. It then populates it with the page data from
+ * the current receive descriptor, taking care to set up the skb correctly.
+ * This specifically uses a header buffer to start building the skb.
+ */
+static struct sk_buff *idpf_rx_hdr_construct_skb(struct idpf_queue *rxq,
+						 const void *va,
+						 unsigned int size)
+{
+	struct sk_buff *skb;
+
+	/* allocate a skb to store the frags */
+	skb = __napi_alloc_skb(&rxq->q_vector->napi, size, GFP_ATOMIC);
+	if (unlikely(!skb))
+		return NULL;
+
+	skb_record_rx_queue(skb, rxq->idx);
+
+	memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
+
+	/* More than likely, a payload fragment, which will use a page from
+	 * page_pool will be added to the SKB so mark it for recycle
+	 * preemptively. And if not, it's inconsequential.
+	 */
+	skb_mark_for_recycle(skb);
+
+	return skb;
+}
+
+/**
+ * idpf_rx_splitq_test_staterr - tests bits in Rx descriptor
+ * status and error fields
+ * @stat_err_field: field from descriptor to test bits in
+ * @stat_err_bits: value to mask
+ *
+ */
+static bool idpf_rx_splitq_test_staterr(const u8 stat_err_field,
+					const u8 stat_err_bits)
+{
+	return !!(stat_err_field & stat_err_bits);
+}
+
+/**
+ * idpf_rx_splitq_is_eop - process handling of EOP buffers
+ * @rx_desc: Rx descriptor for current buffer
+ *
+ * If the buffer is an EOP buffer, this function exits returning true,
+ * otherwise return false indicating that this is in fact a non-EOP buffer.
+ */
+static bool idpf_rx_splitq_is_eop(struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc)
+{
+	/* if we are the last buffer then there is nothing else to do */
+	return likely(idpf_rx_splitq_test_staterr(rx_desc->status_err0_qw1,
+						  IDPF_RXD_EOF_SPLITQ));
+}
+
+/**
+ * idpf_rx_splitq_clean - Clean completed descriptors from Rx queue
+ * @rxq: Rx descriptor queue to retrieve receive buffer queue
+ * @budget: Total limit on number of packets to process
+ *
+ * This function provides a "bounce buffer" approach to Rx interrupt
+ * processing. The advantage to this is that on systems that have
+ * expensive overhead for IOMMU access this provides a means of avoiding
+ * it by maintaining the mapping of the page to the system.
+ *
+ * Returns amount of work completed
+ */
+static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget)
+{
+	int total_rx_bytes = 0, total_rx_pkts = 0;
+	struct idpf_queue *rx_bufq = NULL;
+	struct sk_buff *skb = rxq->skb;
+	u16 ntc = rxq->next_to_clean;
+
+	/* Process Rx packets bounded by budget */
+	while (likely(total_rx_pkts < budget)) {
+		struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc;
+		struct idpf_sw_queue *refillq = NULL;
+		struct idpf_rxq_set *rxq_set = NULL;
+		struct idpf_rx_buf *rx_buf = NULL;
+		union virtchnl2_rx_desc *desc;
+		unsigned int pkt_len = 0;
+		unsigned int hdr_len = 0;
+		u16 gen_id, buf_id = 0;
+		 /* Header buffer overflow only valid for header split */
+		bool hbo = false;
+		int bufq_id;
+		u8 rxdid;
+
+		/* get the Rx desc from Rx queue based on 'next_to_clean' */
+		desc = IDPF_RX_DESC(rxq, ntc);
+		rx_desc = (struct virtchnl2_rx_flex_desc_adv_nic_3 *)desc;
+
+		/* This memory barrier is needed to keep us from reading
+		 * any other fields out of the rx_desc
+		 */
+		dma_rmb();
+
+		/* if the descriptor isn't done, no work yet to do */
+		gen_id = le16_to_cpu(rx_desc->pktlen_gen_bufq_id);
+		gen_id = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_M, gen_id);
+
+		if (test_bit(__IDPF_Q_GEN_CHK, rxq->flags) != gen_id)
+			break;
+
+		rxdid = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_RXDID_M,
+				  rx_desc->rxdid_ucast);
+		if (rxdid != VIRTCHNL2_RXDID_2_FLEX_SPLITQ) {
+			IDPF_RX_BUMP_NTC(rxq, ntc);
+			u64_stats_update_begin(&rxq->stats_sync);
+			u64_stats_inc(&rxq->q_stats.rx.bad_descs);
+			u64_stats_update_end(&rxq->stats_sync);
+			continue;
+		}
+
+		pkt_len = le16_to_cpu(rx_desc->pktlen_gen_bufq_id);
+		pkt_len = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_PBUF_M,
+				    pkt_len);
+
+		hbo = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_HBO_M,
+				rx_desc->status_err0_qw1);
+
+		if (unlikely(hbo)) {
+			/* If a header buffer overflow, occurs, i.e. header is
+			 * too large to fit in the header split buffer, HW will
+			 * put the entire packet, including headers, in the
+			 * data/payload buffer.
+			 */
+			u64_stats_update_begin(&rxq->stats_sync);
+			u64_stats_inc(&rxq->q_stats.rx.hsplit_buf_ovf);
+			u64_stats_update_end(&rxq->stats_sync);
+			goto bypass_hsplit;
+		}
+
+		hdr_len = le16_to_cpu(rx_desc->hdrlen_flags);
+		hdr_len = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_HDR_M,
+				    hdr_len);
+
+bypass_hsplit:
+		bufq_id = le16_to_cpu(rx_desc->pktlen_gen_bufq_id);
+		bufq_id = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_M,
+				    bufq_id);
+
+		rxq_set = container_of(rxq, struct idpf_rxq_set, rxq);
+		if (!bufq_id)
+			refillq = rxq_set->refillq0;
+		else
+			refillq = rxq_set->refillq1;
+
+		/* retrieve buffer from the rxq */
+		rx_bufq = &rxq->rxq_grp->splitq.bufq_sets[bufq_id].bufq;
+
+		buf_id = le16_to_cpu(rx_desc->buf_id);
+
+		rx_buf = &rx_bufq->rx_buf.buf[buf_id];
+
+		if (hdr_len) {
+			const void *va = (u8 *)rx_bufq->rx_buf.hdr_buf_va +
+						(u32)buf_id * IDPF_HDR_BUF_SIZE;
+
+			skb = idpf_rx_hdr_construct_skb(rxq, va, hdr_len);
+			u64_stats_update_begin(&rxq->stats_sync);
+			u64_stats_inc(&rxq->q_stats.rx.hsplit_pkts);
+			u64_stats_update_end(&rxq->stats_sync);
+		}
+
+		if (pkt_len) {
+			idpf_rx_sync_for_cpu(rx_buf, pkt_len);
+			if (skb)
+				idpf_rx_add_frag(rx_buf, skb, pkt_len);
+			else
+				skb = idpf_rx_construct_skb(rxq, rx_buf,
+							    pkt_len);
+		} else {
+			idpf_rx_put_page(rx_buf);
+		}
+
+		/* exit if we failed to retrieve a buffer */
+		if (!skb)
+			break;
+
+		idpf_rx_post_buf_refill(refillq, buf_id);
+
+		IDPF_RX_BUMP_NTC(rxq, ntc);
+		/* skip if it is non EOP desc */
+		if (!idpf_rx_splitq_is_eop(rx_desc))
+			continue;
+
+		/* pad skb if needed (to make valid ethernet frame) */
+		if (eth_skb_pad(skb)) {
+			skb = NULL;
+			continue;
+		}
+
+		/* probably a little skewed due to removing CRC */
+		total_rx_bytes += skb->len;
+
+		/* protocol */
+		if (unlikely(idpf_rx_process_skb_fields(rxq, skb, rx_desc))) {
+			dev_kfree_skb_any(skb);
+			skb = NULL;
+			continue;
+		}
+
+		/* send completed skb up the stack */
+		napi_gro_receive(&rxq->q_vector->napi, skb);
+		skb = NULL;
+
+		/* update budget accounting */
+		total_rx_pkts++;
+	}
+
+	rxq->next_to_clean = ntc;
+
+	rxq->skb = skb;
+	u64_stats_update_begin(&rxq->stats_sync);
+	u64_stats_add(&rxq->q_stats.rx.packets, total_rx_pkts);
+	u64_stats_add(&rxq->q_stats.rx.bytes, total_rx_bytes);
+	u64_stats_update_end(&rxq->stats_sync);
+
+	/* guarantee a trip back through this routine if there was a failure */
+	return total_rx_pkts;
+}
+
+/**
+ * idpf_rx_update_bufq_desc - Update buffer queue descriptor
+ * @bufq: Pointer to the buffer queue
+ * @refill_desc: SW Refill queue descriptor containing buffer ID
+ * @buf_desc: Buffer queue descriptor
+ *
+ * Return 0 on success and negative on failure.
+ */
+static int idpf_rx_update_bufq_desc(struct idpf_queue *bufq, u16 refill_desc,
+				    struct virtchnl2_splitq_rx_buf_desc *buf_desc)
+{
+	struct idpf_rx_buf *buf;
+	dma_addr_t addr;
+	u16 buf_id;
+
+	buf_id = FIELD_GET(IDPF_RX_BI_BUFID_M, refill_desc);
+
+	buf = &bufq->rx_buf.buf[buf_id];
+
+	addr = idpf_alloc_page(bufq->pp, buf, bufq->rx_buf_size);
+	if (unlikely(addr == DMA_MAPPING_ERROR))
+		return -ENOMEM;
+
+	buf_desc->pkt_addr = cpu_to_le64(addr);
+	buf_desc->qword0.buf_id = cpu_to_le16(buf_id);
+
+	if (!bufq->rx_hsplit_en)
+		return 0;
+
+	buf_desc->hdr_addr = cpu_to_le64(bufq->rx_buf.hdr_buf_pa +
+					 (u32)buf_id * IDPF_HDR_BUF_SIZE);
+
+	return 0;
+}
+
+/**
+ * idpf_rx_clean_refillq - Clean refill queue buffers
+ * @bufq: buffer queue to post buffers back to
+ * @refillq: refill queue to clean
+ *
+ * This function takes care of the buffer refill management
+ */
+static void idpf_rx_clean_refillq(struct idpf_queue *bufq,
+				  struct idpf_sw_queue *refillq)
+{
+	struct virtchnl2_splitq_rx_buf_desc *buf_desc;
+	u16 bufq_nta = bufq->next_to_alloc;
+	u16 ntc = refillq->next_to_clean;
+	int cleaned = 0;
+	u16 gen;
+
+	buf_desc = IDPF_SPLITQ_RX_BUF_DESC(bufq, bufq_nta);
+
+	/* make sure we stop at ring wrap in the unlikely case ring is full */
+	while (likely(cleaned < refillq->desc_count)) {
+		u16 refill_desc = IDPF_SPLITQ_RX_BI_DESC(refillq, ntc);
+		bool failure;
+
+		gen = FIELD_GET(IDPF_RX_BI_GEN_M, refill_desc);
+		if (test_bit(__IDPF_RFLQ_GEN_CHK, refillq->flags) != gen)
+			break;
+
+		failure = idpf_rx_update_bufq_desc(bufq, refill_desc,
+						   buf_desc);
+		if (failure)
+			break;
+
+		if (unlikely(++ntc == refillq->desc_count)) {
+			change_bit(__IDPF_RFLQ_GEN_CHK, refillq->flags);
+			ntc = 0;
+		}
+
+		if (unlikely(++bufq_nta == bufq->desc_count)) {
+			buf_desc = IDPF_SPLITQ_RX_BUF_DESC(bufq, 0);
+			bufq_nta = 0;
+		} else {
+			buf_desc++;
+		}
+
+		cleaned++;
+	}
+
+	if (!cleaned)
+		return;
+
+	/* We want to limit how many transactions on the bus we trigger with
+	 * tail writes so we only do it in strides. It's also important we
+	 * align the write to a multiple of 8 as required by HW.
+	 */
+	if (((bufq->next_to_use <= bufq_nta ? 0 : bufq->desc_count) +
+	    bufq_nta - bufq->next_to_use) >= IDPF_RX_BUF_POST_STRIDE)
+		idpf_rx_buf_hw_update(bufq, ALIGN_DOWN(bufq_nta,
+						       IDPF_RX_BUF_POST_STRIDE));
+
+	/* update next to alloc since we have filled the ring */
+	refillq->next_to_clean = ntc;
+	bufq->next_to_alloc = bufq_nta;
+}
+
+/**
+ * idpf_rx_clean_refillq_all - Clean all refill queues
+ * @bufq: buffer queue with refill queues
+ *
+ * Iterates through all refill queues assigned to the buffer queue assigned to
+ * this vector.  Returns true if clean is complete within budget, false
+ * otherwise.
+ */
+static void idpf_rx_clean_refillq_all(struct idpf_queue *bufq)
+{
+	struct idpf_bufq_set *bufq_set;
+	int i;
+
+	bufq_set = container_of(bufq, struct idpf_bufq_set, bufq);
+	for (i = 0; i < bufq_set->num_refillqs; i++)
+		idpf_rx_clean_refillq(bufq, &bufq_set->refillqs[i]);
+}
+
+/**
+ * idpf_vport_intr_clean_queues - MSIX mode Interrupt Handler
+ * @irq: interrupt number
+ * @data: pointer to a q_vector
+ *
+ */
+static irqreturn_t idpf_vport_intr_clean_queues(int __always_unused irq,
+						void *data)
+{
+	struct idpf_q_vector *q_vector = (struct idpf_q_vector *)data;
+
+	q_vector->total_events++;
+	napi_schedule(&q_vector->napi);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * idpf_vport_intr_napi_del_all - Unregister napi for all q_vectors in vport
+ * @vport: virtual port structure
+ *
+ */
+static void idpf_vport_intr_napi_del_all(struct idpf_vport *vport)
+{
+	u16 v_idx;
+
+	for (v_idx = 0; v_idx < vport->num_q_vectors; v_idx++)
+		netif_napi_del(&vport->q_vectors[v_idx].napi);
+}
+
+/**
+ * idpf_vport_intr_napi_dis_all - Disable NAPI for all q_vectors in the vport
+ * @vport: main vport structure
+ */
+static void idpf_vport_intr_napi_dis_all(struct idpf_vport *vport)
+{
+	int v_idx;
+
+	for (v_idx = 0; v_idx < vport->num_q_vectors; v_idx++)
+		napi_disable(&vport->q_vectors[v_idx].napi);
+}
+
+/**
+ * idpf_vport_intr_rel - Free memory allocated for interrupt vectors
+ * @vport: virtual port
+ *
+ * Free the memory allocated for interrupt vectors  associated to a vport
+ */
+void idpf_vport_intr_rel(struct idpf_vport *vport)
+{
+	int i, j, v_idx;
+
+	for (v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) {
+		struct idpf_q_vector *q_vector = &vport->q_vectors[v_idx];
+
+		kfree(q_vector->bufq);
+		q_vector->bufq = NULL;
+		kfree(q_vector->tx);
+		q_vector->tx = NULL;
+		kfree(q_vector->rx);
+		q_vector->rx = NULL;
+	}
+
+	/* Clean up the mapping of queues to vectors */
+	for (i = 0; i < vport->num_rxq_grp; i++) {
+		struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i];
+
+		if (idpf_is_queue_model_split(vport->rxq_model))
+			for (j = 0; j < rx_qgrp->splitq.num_rxq_sets; j++)
+				rx_qgrp->splitq.rxq_sets[j]->rxq.q_vector = NULL;
+		else
+			for (j = 0; j < rx_qgrp->singleq.num_rxq; j++)
+				rx_qgrp->singleq.rxqs[j]->q_vector = NULL;
+	}
+
+	if (idpf_is_queue_model_split(vport->txq_model))
+		for (i = 0; i < vport->num_txq_grp; i++)
+			vport->txq_grps[i].complq->q_vector = NULL;
+	else
+		for (i = 0; i < vport->num_txq_grp; i++)
+			for (j = 0; j < vport->txq_grps[i].num_txq; j++)
+				vport->txq_grps[i].txqs[j]->q_vector = NULL;
+
+	kfree(vport->q_vectors);
+	vport->q_vectors = NULL;
+}
+
+/**
+ * idpf_vport_intr_rel_irq - Free the IRQ association with the OS
+ * @vport: main vport structure
+ */
+static void idpf_vport_intr_rel_irq(struct idpf_vport *vport)
+{
+	struct idpf_adapter *adapter = vport->adapter;
+	int vector;
+
+	for (vector = 0; vector < vport->num_q_vectors; vector++) {
+		struct idpf_q_vector *q_vector = &vport->q_vectors[vector];
+		int irq_num, vidx;
+
+		/* free only the irqs that were actually requested */
+		if (!q_vector)
+			continue;
+
+		vidx = vport->q_vector_idxs[vector];
+		irq_num = adapter->msix_entries[vidx].vector;
+
+		/* clear the affinity_mask in the IRQ descriptor */
+		irq_set_affinity_hint(irq_num, NULL);
+		free_irq(irq_num, q_vector);
+	}
+}
+
+/**
+ * idpf_vport_intr_dis_irq_all - Disable all interrupt
+ * @vport: main vport structure
+ */
+static void idpf_vport_intr_dis_irq_all(struct idpf_vport *vport)
+{
+	struct idpf_q_vector *q_vector = vport->q_vectors;
+	int q_idx;
+
+	for (q_idx = 0; q_idx < vport->num_q_vectors; q_idx++)
+		writel(0, q_vector[q_idx].intr_reg.dyn_ctl);
+}
+
+/**
+ * idpf_vport_intr_buildreg_itr - Enable default interrupt generation settings
+ * @q_vector: pointer to q_vector
+ * @type: itr index
+ * @itr: itr value
+ */
+static u32 idpf_vport_intr_buildreg_itr(struct idpf_q_vector *q_vector,
+					const int type, u16 itr)
+{
+	u32 itr_val;
+
+	itr &= IDPF_ITR_MASK;
+	/* Don't clear PBA because that can cause lost interrupts that
+	 * came in while we were cleaning/polling
+	 */
+	itr_val = q_vector->intr_reg.dyn_ctl_intena_m |
+		  (type << q_vector->intr_reg.dyn_ctl_itridx_s) |
+		  (itr << (q_vector->intr_reg.dyn_ctl_intrvl_s - 1));
+
+	return itr_val;
+}
+
+/**
+ * idpf_update_dim_sample - Update dim sample with packets and bytes
+ * @q_vector: the vector associated with the interrupt
+ * @dim_sample: dim sample to update
+ * @dim: dim instance structure
+ * @packets: total packets
+ * @bytes: total bytes
+ *
+ * Update the dim sample with the packets and bytes which are passed to this
+ * function. Set the dim state appropriately if the dim settings gets stale.
+ */
+static void idpf_update_dim_sample(struct idpf_q_vector *q_vector,
+				   struct dim_sample *dim_sample,
+				   struct dim *dim, u64 packets, u64 bytes)
+{
+	dim_update_sample(q_vector->total_events, packets, bytes, dim_sample);
+	dim_sample->comp_ctr = 0;
+
+	/* if dim settings get stale, like when not updated for 1 second or
+	 * longer, force it to start again. This addresses the frequent case
+	 * of an idle queue being switched to by the scheduler.
+	 */
+	if (ktime_ms_delta(dim_sample->time, dim->start_sample.time) >= HZ)
+		dim->state = DIM_START_MEASURE;
+}
+
+/**
+ * idpf_net_dim - Update net DIM algorithm
+ * @q_vector: the vector associated with the interrupt
+ *
+ * Create a DIM sample and notify net_dim() so that it can possibly decide
+ * a new ITR value based on incoming packets, bytes, and interrupts.
+ *
+ * This function is a no-op if the queue is not configured to dynamic ITR.
+ */
+static void idpf_net_dim(struct idpf_q_vector *q_vector)
+{
+	struct dim_sample dim_sample = { };
+	u64 packets, bytes;
+	u32 i;
+
+	if (!IDPF_ITR_IS_DYNAMIC(q_vector->tx_intr_mode))
+		goto check_rx_itr;
+
+	for (i = 0, packets = 0, bytes = 0; i < q_vector->num_txq; i++) {
+		struct idpf_queue *txq = q_vector->tx[i];
+		unsigned int start;
+
+		do {
+			start = u64_stats_fetch_begin(&txq->stats_sync);
+			packets += u64_stats_read(&txq->q_stats.tx.packets);
+			bytes += u64_stats_read(&txq->q_stats.tx.bytes);
+		} while (u64_stats_fetch_retry(&txq->stats_sync, start));
+	}
+
+	idpf_update_dim_sample(q_vector, &dim_sample, &q_vector->tx_dim,
+			       packets, bytes);
+	net_dim(&q_vector->tx_dim, dim_sample);
+
+check_rx_itr:
+	if (!IDPF_ITR_IS_DYNAMIC(q_vector->rx_intr_mode))
+		return;
+
+	for (i = 0, packets = 0, bytes = 0; i < q_vector->num_rxq; i++) {
+		struct idpf_queue *rxq = q_vector->rx[i];
+		unsigned int start;
+
+		do {
+			start = u64_stats_fetch_begin(&rxq->stats_sync);
+			packets += u64_stats_read(&rxq->q_stats.rx.packets);
+			bytes += u64_stats_read(&rxq->q_stats.rx.bytes);
+		} while (u64_stats_fetch_retry(&rxq->stats_sync, start));
+	}
+
+	idpf_update_dim_sample(q_vector, &dim_sample, &q_vector->rx_dim,
+			       packets, bytes);
+	net_dim(&q_vector->rx_dim, dim_sample);
+}
+
+/**
+ * idpf_vport_intr_update_itr_ena_irq - Update itr and re-enable MSIX interrupt
+ * @q_vector: q_vector for which itr is being updated and interrupt enabled
+ *
+ * Update the net_dim() algorithm and re-enable the interrupt associated with
+ * this vector.
+ */
+void idpf_vport_intr_update_itr_ena_irq(struct idpf_q_vector *q_vector)
+{
+	u32 intval;
+
+	/* net_dim() updates ITR out-of-band using a work item */
+	idpf_net_dim(q_vector);
+
+	intval = idpf_vport_intr_buildreg_itr(q_vector,
+					      IDPF_NO_ITR_UPDATE_IDX, 0);
+
+	writel(intval, q_vector->intr_reg.dyn_ctl);
+}
+
+/**
+ * idpf_vport_intr_req_irq - get MSI-X vectors from the OS for the vport
+ * @vport: main vport structure
+ * @basename: name for the vector
+ */
+static int idpf_vport_intr_req_irq(struct idpf_vport *vport, char *basename)
+{
+	struct idpf_adapter *adapter = vport->adapter;
+	int vector, err, irq_num, vidx;
+	const char *vec_name;
+
+	for (vector = 0; vector < vport->num_q_vectors; vector++) {
+		struct idpf_q_vector *q_vector = &vport->q_vectors[vector];
+
+		vidx = vport->q_vector_idxs[vector];
+		irq_num = adapter->msix_entries[vidx].vector;
+
+		if (q_vector->num_rxq && q_vector->num_txq)
+			vec_name = "TxRx";
+		else if (q_vector->num_rxq)
+			vec_name = "Rx";
+		else if (q_vector->num_txq)
+			vec_name = "Tx";
+		else
+			continue;
+
+		q_vector->name = kasprintf(GFP_KERNEL, "%s-%s-%d",
+					   basename, vec_name, vidx);
+
+		err = request_irq(irq_num, idpf_vport_intr_clean_queues, 0,
+				  q_vector->name, q_vector);
+		if (err) {
+			netdev_err(vport->netdev,
+				   "Request_irq failed, error: %d\n", err);
+			goto free_q_irqs;
+		}
+		/* assign the mask for this irq */
+		irq_set_affinity_hint(irq_num, &q_vector->affinity_mask);
+	}
+
+	return 0;
+
+free_q_irqs:
+	while (--vector >= 0) {
+		vidx = vport->q_vector_idxs[vector];
+		irq_num = adapter->msix_entries[vidx].vector;
+		free_irq(irq_num, &vport->q_vectors[vector]);
+	}
+
+	return err;
+}
+
+/**
+ * idpf_vport_intr_write_itr - Write ITR value to the ITR register
+ * @q_vector: q_vector structure
+ * @itr: Interrupt throttling rate
+ * @tx: Tx or Rx ITR
+ */
+void idpf_vport_intr_write_itr(struct idpf_q_vector *q_vector, u16 itr, bool tx)
+{
+	struct idpf_intr_reg *intr_reg;
+
+	if (tx && !q_vector->tx)
+		return;
+	else if (!tx && !q_vector->rx)
+		return;
+
+	intr_reg = &q_vector->intr_reg;
+	writel(ITR_REG_ALIGN(itr) >> IDPF_ITR_GRAN_S,
+	       tx ? intr_reg->tx_itr : intr_reg->rx_itr);
+}
+
+/**
+ * idpf_vport_intr_ena_irq_all - Enable IRQ for the given vport
+ * @vport: main vport structure
+ */
+static void idpf_vport_intr_ena_irq_all(struct idpf_vport *vport)
+{
+	bool dynamic;
+	int q_idx;
+	u16 itr;
+
+	for (q_idx = 0; q_idx < vport->num_q_vectors; q_idx++) {
+		struct idpf_q_vector *qv = &vport->q_vectors[q_idx];
+
+		/* Set the initial ITR values */
+		if (qv->num_txq) {
+			dynamic = IDPF_ITR_IS_DYNAMIC(qv->tx_intr_mode);
+			itr = vport->tx_itr_profile[qv->tx_dim.profile_ix];
+			idpf_vport_intr_write_itr(qv, dynamic ?
+						  itr : qv->tx_itr_value,
+						  true);
+		}
+
+		if (qv->num_rxq) {
+			dynamic = IDPF_ITR_IS_DYNAMIC(qv->rx_intr_mode);
+			itr = vport->rx_itr_profile[qv->rx_dim.profile_ix];
+			idpf_vport_intr_write_itr(qv, dynamic ?
+						  itr : qv->rx_itr_value,
+						  false);
+		}
+
+		if (qv->num_txq || qv->num_rxq)
+			idpf_vport_intr_update_itr_ena_irq(qv);
+	}
+}
+
+/**
+ * idpf_vport_intr_deinit - Release all vector associations for the vport
+ * @vport: main vport structure
+ */
+void idpf_vport_intr_deinit(struct idpf_vport *vport)
+{
+	idpf_vport_intr_napi_dis_all(vport);
+	idpf_vport_intr_napi_del_all(vport);
+	idpf_vport_intr_dis_irq_all(vport);
+	idpf_vport_intr_rel_irq(vport);
+}
+
+/**
+ * idpf_tx_dim_work - Call back from the stack
+ * @work: work queue structure
+ */
+static void idpf_tx_dim_work(struct work_struct *work)
+{
+	struct idpf_q_vector *q_vector;
+	struct idpf_vport *vport;
+	struct dim *dim;
+	u16 itr;
+
+	dim = container_of(work, struct dim, work);
+	q_vector = container_of(dim, struct idpf_q_vector, tx_dim);
+	vport = q_vector->vport;
+
+	if (dim->profile_ix >= ARRAY_SIZE(vport->tx_itr_profile))
+		dim->profile_ix = ARRAY_SIZE(vport->tx_itr_profile) - 1;
+
+	/* look up the values in our local table */
+	itr = vport->tx_itr_profile[dim->profile_ix];
+
+	idpf_vport_intr_write_itr(q_vector, itr, true);
+
+	dim->state = DIM_START_MEASURE;
+}
+
+/**
+ * idpf_rx_dim_work - Call back from the stack
+ * @work: work queue structure
+ */
+static void idpf_rx_dim_work(struct work_struct *work)
+{
+	struct idpf_q_vector *q_vector;
+	struct idpf_vport *vport;
+	struct dim *dim;
+	u16 itr;
+
+	dim = container_of(work, struct dim, work);
+	q_vector = container_of(dim, struct idpf_q_vector, rx_dim);
+	vport = q_vector->vport;
+
+	if (dim->profile_ix >= ARRAY_SIZE(vport->rx_itr_profile))
+		dim->profile_ix = ARRAY_SIZE(vport->rx_itr_profile) - 1;
+
+	/* look up the values in our local table */
+	itr = vport->rx_itr_profile[dim->profile_ix];
+
+	idpf_vport_intr_write_itr(q_vector, itr, false);
+
+	dim->state = DIM_START_MEASURE;
+}
+
+/**
+ * idpf_init_dim - Set up dynamic interrupt moderation
+ * @qv: q_vector structure
+ */
+static void idpf_init_dim(struct idpf_q_vector *qv)
+{
+	INIT_WORK(&qv->tx_dim.work, idpf_tx_dim_work);
+	qv->tx_dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+	qv->tx_dim.profile_ix = IDPF_DIM_DEFAULT_PROFILE_IX;
+
+	INIT_WORK(&qv->rx_dim.work, idpf_rx_dim_work);
+	qv->rx_dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+	qv->rx_dim.profile_ix = IDPF_DIM_DEFAULT_PROFILE_IX;
+}
+
+/**
+ * idpf_vport_intr_napi_ena_all - Enable NAPI for all q_vectors in the vport
+ * @vport: main vport structure
+ */
+static void idpf_vport_intr_napi_ena_all(struct idpf_vport *vport)
+{
+	int q_idx;
+
+	for (q_idx = 0; q_idx < vport->num_q_vectors; q_idx++) {
+		struct idpf_q_vector *q_vector = &vport->q_vectors[q_idx];
+
+		idpf_init_dim(q_vector);
+		napi_enable(&q_vector->napi);
+	}
+}
+
+/**
+ * idpf_tx_splitq_clean_all- Clean completion queues
+ * @q_vec: queue vector
+ * @budget: Used to determine if we are in netpoll
+ * @cleaned: returns number of packets cleaned
+ *
+ * Returns false if clean is not complete else returns true
+ */
+static bool idpf_tx_splitq_clean_all(struct idpf_q_vector *q_vec,
+				     int budget, int *cleaned)
+{
+	u16 num_txq = q_vec->num_txq;
+	bool clean_complete = true;
+	int i, budget_per_q;
+
+	if (unlikely(!num_txq))
+		return true;
+
+	budget_per_q = DIV_ROUND_UP(budget, num_txq);
+	for (i = 0; i < num_txq; i++)
+		clean_complete &= idpf_tx_clean_complq(q_vec->tx[i],
+						       budget_per_q, cleaned);
+
+	return clean_complete;
+}
+
+/**
+ * idpf_rx_splitq_clean_all- Clean completion queues
+ * @q_vec: queue vector
+ * @budget: Used to determine if we are in netpoll
+ * @cleaned: returns number of packets cleaned
+ *
+ * Returns false if clean is not complete else returns true
+ */
+static bool idpf_rx_splitq_clean_all(struct idpf_q_vector *q_vec, int budget,
+				     int *cleaned)
+{
+	u16 num_rxq = q_vec->num_rxq;
+	bool clean_complete = true;
+	int pkts_cleaned = 0;
+	int i, budget_per_q;
+
+	/* We attempt to distribute budget to each Rx queue fairly, but don't
+	 * allow the budget to go below 1 because that would exit polling early.
+	 */
+	budget_per_q = num_rxq ? max(budget / num_rxq, 1) : 0;
+	for (i = 0; i < num_rxq; i++) {
+		struct idpf_queue *rxq = q_vec->rx[i];
+		int pkts_cleaned_per_q;
+
+		pkts_cleaned_per_q = idpf_rx_splitq_clean(rxq, budget_per_q);
+		/* if we clean as many as budgeted, we must not be done */
+		if (pkts_cleaned_per_q >= budget_per_q)
+			clean_complete = false;
+		pkts_cleaned += pkts_cleaned_per_q;
+	}
+	*cleaned = pkts_cleaned;
+
+	for (i = 0; i < q_vec->num_bufq; i++)
+		idpf_rx_clean_refillq_all(q_vec->bufq[i]);
+
+	return clean_complete;
+}
+
+/**
+ * idpf_vport_splitq_napi_poll - NAPI handler
+ * @napi: struct from which you get q_vector
+ * @budget: budget provided by stack
+ */
+static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget)
+{
+	struct idpf_q_vector *q_vector =
+				container_of(napi, struct idpf_q_vector, napi);
+	bool clean_complete;
+	int work_done = 0;
+
+	/* Handle case where we are called by netpoll with a budget of 0 */
+	if (unlikely(!budget)) {
+		idpf_tx_splitq_clean_all(q_vector, budget, &work_done);
+
+		return 0;
+	}
+
+	clean_complete = idpf_rx_splitq_clean_all(q_vector, budget, &work_done);
+	clean_complete &= idpf_tx_splitq_clean_all(q_vector, budget, &work_done);
+
+	/* If work not completed, return budget and polling will return */
+	if (!clean_complete)
+		return budget;
+
+	work_done = min_t(int, work_done, budget - 1);
+
+	/* Exit the polling mode, but don't re-enable interrupts if stack might
+	 * poll us due to busy-polling
+	 */
+	if (likely(napi_complete_done(napi, work_done)))
+		idpf_vport_intr_update_itr_ena_irq(q_vector);
+
+	/* Switch to poll mode in the tear-down path after sending disable
+	 * queues virtchnl message, as the interrupts will be disabled after
+	 * that
+	 */
+	if (unlikely(q_vector->num_txq && test_bit(__IDPF_Q_POLL_MODE,
+						   q_vector->tx[0]->flags)))
+		return budget;
+	else
+		return work_done;
+}
+
+/**
+ * idpf_vport_intr_map_vector_to_qs - Map vectors to queues
+ * @vport: virtual port
+ *
+ * Mapping for vectors to queues
+ */
+static void idpf_vport_intr_map_vector_to_qs(struct idpf_vport *vport)
+{
+	u16 num_txq_grp = vport->num_txq_grp;
+	int i, j, qv_idx, bufq_vidx = 0;
+	struct idpf_rxq_group *rx_qgrp;
+	struct idpf_txq_group *tx_qgrp;
+	struct idpf_queue *q, *bufq;
+	u16 q_index;
+
+	for (i = 0, qv_idx = 0; i < vport->num_rxq_grp; i++) {
+		u16 num_rxq;
+
+		rx_qgrp = &vport->rxq_grps[i];
+		if (idpf_is_queue_model_split(vport->rxq_model))
+			num_rxq = rx_qgrp->splitq.num_rxq_sets;
+		else
+			num_rxq = rx_qgrp->singleq.num_rxq;
+
+		for (j = 0; j < num_rxq; j++) {
+			if (qv_idx >= vport->num_q_vectors)
+				qv_idx = 0;
+
+			if (idpf_is_queue_model_split(vport->rxq_model))
+				q = &rx_qgrp->splitq.rxq_sets[j]->rxq;
+			else
+				q = rx_qgrp->singleq.rxqs[j];
+			q->q_vector = &vport->q_vectors[qv_idx];
+			q_index = q->q_vector->num_rxq;
+			q->q_vector->rx[q_index] = q;
+			q->q_vector->num_rxq++;
+			qv_idx++;
+		}
+
+		if (idpf_is_queue_model_split(vport->rxq_model)) {
+			for (j = 0; j < vport->num_bufqs_per_qgrp; j++) {
+				bufq = &rx_qgrp->splitq.bufq_sets[j].bufq;
+				bufq->q_vector = &vport->q_vectors[bufq_vidx];
+				q_index = bufq->q_vector->num_bufq;
+				bufq->q_vector->bufq[q_index] = bufq;
+				bufq->q_vector->num_bufq++;
+			}
+			if (++bufq_vidx >= vport->num_q_vectors)
+				bufq_vidx = 0;
+		}
+	}
+
+	for (i = 0, qv_idx = 0; i < num_txq_grp; i++) {
+		u16 num_txq;
+
+		tx_qgrp = &vport->txq_grps[i];
+		num_txq = tx_qgrp->num_txq;
+
+		if (idpf_is_queue_model_split(vport->txq_model)) {
+			if (qv_idx >= vport->num_q_vectors)
+				qv_idx = 0;
+
+			q = tx_qgrp->complq;
+			q->q_vector = &vport->q_vectors[qv_idx];
+			q_index = q->q_vector->num_txq;
+			q->q_vector->tx[q_index] = q;
+			q->q_vector->num_txq++;
+			qv_idx++;
+		} else {
+			for (j = 0; j < num_txq; j++) {
+				if (qv_idx >= vport->num_q_vectors)
+					qv_idx = 0;
+
+				q = tx_qgrp->txqs[j];
+				q->q_vector = &vport->q_vectors[qv_idx];
+				q_index = q->q_vector->num_txq;
+				q->q_vector->tx[q_index] = q;
+				q->q_vector->num_txq++;
+
+				qv_idx++;
+			}
+		}
+	}
+}
+
+/**
+ * idpf_vport_intr_init_vec_idx - Initialize the vector indexes
+ * @vport: virtual port
+ *
+ * Initialize vector indexes with values returened over mailbox
+ */
+static int idpf_vport_intr_init_vec_idx(struct idpf_vport *vport)
+{
+	struct idpf_adapter *adapter = vport->adapter;
+	struct virtchnl2_alloc_vectors *ac;
+	u16 *vecids, total_vecs;
+	int i;
+
+	ac = adapter->req_vec_chunks;
+	if (!ac) {
+		for (i = 0; i < vport->num_q_vectors; i++)
+			vport->q_vectors[i].v_idx = vport->q_vector_idxs[i];
+
+		return 0;
+	}
+
+	total_vecs = idpf_get_reserved_vecs(adapter);
+	vecids = kcalloc(total_vecs, sizeof(u16), GFP_KERNEL);
+	if (!vecids)
+		return -ENOMEM;
+
+	idpf_get_vec_ids(adapter, vecids, total_vecs, &ac->vchunks);
+
+	for (i = 0; i < vport->num_q_vectors; i++)
+		vport->q_vectors[i].v_idx = vecids[vport->q_vector_idxs[i]];
+
+	kfree(vecids);
+
+	return 0;
+}
+
+/**
+ * idpf_vport_intr_napi_add_all- Register napi handler for all qvectors
+ * @vport: virtual port structure
+ */
+static void idpf_vport_intr_napi_add_all(struct idpf_vport *vport)
+{
+	int (*napi_poll)(struct napi_struct *napi, int budget);
+	u16 v_idx;
+
+	if (idpf_is_queue_model_split(vport->txq_model))
+		napi_poll = idpf_vport_splitq_napi_poll;
+	else
+		napi_poll = idpf_vport_singleq_napi_poll;
+
+	for (v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) {
+		struct idpf_q_vector *q_vector = &vport->q_vectors[v_idx];
+
+		netif_napi_add(vport->netdev, &q_vector->napi, napi_poll);
+
+		/* only set affinity_mask if the CPU is online */
+		if (cpu_online(v_idx))
+			cpumask_set_cpu(v_idx, &q_vector->affinity_mask);
+	}
+}
+
+/**
+ * idpf_vport_intr_alloc - Allocate memory for interrupt vectors
+ * @vport: virtual port
+ *
+ * We allocate one q_vector per queue interrupt. If allocation fails we
+ * return -ENOMEM.
+ */
+int idpf_vport_intr_alloc(struct idpf_vport *vport)
+{
+	u16 txqs_per_vector, rxqs_per_vector, bufqs_per_vector;
+	struct idpf_q_vector *q_vector;
+	int v_idx, err;
+
+	vport->q_vectors = kcalloc(vport->num_q_vectors,
+				   sizeof(struct idpf_q_vector), GFP_KERNEL);
+	if (!vport->q_vectors)
+		return -ENOMEM;
+
+	txqs_per_vector = DIV_ROUND_UP(vport->num_txq, vport->num_q_vectors);
+	rxqs_per_vector = DIV_ROUND_UP(vport->num_rxq, vport->num_q_vectors);
+	bufqs_per_vector = vport->num_bufqs_per_qgrp *
+			   DIV_ROUND_UP(vport->num_rxq_grp,
+					vport->num_q_vectors);
+
+	for (v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) {
+		q_vector = &vport->q_vectors[v_idx];
+		q_vector->vport = vport;
+
+		q_vector->tx_itr_value = IDPF_ITR_TX_DEF;
+		q_vector->tx_intr_mode = IDPF_ITR_DYNAMIC;
+		q_vector->tx_itr_idx = VIRTCHNL2_ITR_IDX_1;
+
+		q_vector->rx_itr_value = IDPF_ITR_RX_DEF;
+		q_vector->rx_intr_mode = IDPF_ITR_DYNAMIC;
+		q_vector->rx_itr_idx = VIRTCHNL2_ITR_IDX_0;
+
+		q_vector->tx = kcalloc(txqs_per_vector,
+				       sizeof(struct idpf_queue *),
+				       GFP_KERNEL);
+		if (!q_vector->tx) {
+			err = -ENOMEM;
+			goto error;
+		}
+
+		q_vector->rx = kcalloc(rxqs_per_vector,
+				       sizeof(struct idpf_queue *),
+				       GFP_KERNEL);
+		if (!q_vector->rx) {
+			err = -ENOMEM;
+			goto error;
+		}
+
+		if (!idpf_is_queue_model_split(vport->rxq_model))
+			continue;
+
+		q_vector->bufq = kcalloc(bufqs_per_vector,
+					 sizeof(struct idpf_queue *),
+					 GFP_KERNEL);
+		if (!q_vector->bufq) {
+			err = -ENOMEM;
+			goto error;
+		}
+	}
+
+	return 0;
+
+error:
+	idpf_vport_intr_rel(vport);
+
+	return err;
+}
+
+/**
+ * idpf_vport_intr_init - Setup all vectors for the given vport
+ * @vport: virtual port
+ *
+ * Returns 0 on success or negative on failure
+ */
+int idpf_vport_intr_init(struct idpf_vport *vport)
+{
+	char *int_name;
+	int err;
+
+	err = idpf_vport_intr_init_vec_idx(vport);
+	if (err)
+		return err;
+
+	idpf_vport_intr_map_vector_to_qs(vport);
+	idpf_vport_intr_napi_add_all(vport);
+	idpf_vport_intr_napi_ena_all(vport);
+
+	err = vport->adapter->dev_ops.reg_ops.intr_reg_init(vport);
+	if (err)
+		goto unroll_vectors_alloc;
+
+	int_name = kasprintf(GFP_KERNEL, "%s-%s",
+			     dev_driver_string(&vport->adapter->pdev->dev),
+			     vport->netdev->name);
+
+	err = idpf_vport_intr_req_irq(vport, int_name);
+	if (err)
+		goto unroll_vectors_alloc;
+
+	idpf_vport_intr_ena_irq_all(vport);
+
+	return 0;
+
+unroll_vectors_alloc:
+	idpf_vport_intr_napi_dis_all(vport);
+	idpf_vport_intr_napi_del_all(vport);
+
+	return err;
+}
+
+/**
+ * idpf_config_rss - Send virtchnl messages to configure RSS
+ * @vport: virtual port
+ *
+ * Return 0 on success, negative on failure
+ */
+int idpf_config_rss(struct idpf_vport *vport)
+{
+	int err;
+
+	err = idpf_send_get_set_rss_key_msg(vport, false);
+	if (err)
+		return err;
+
+	return idpf_send_get_set_rss_lut_msg(vport, false);
+}
+
+/**
+ * idpf_fill_dflt_rss_lut - Fill the indirection table with the default values
+ * @vport: virtual port structure
+ */
+static void idpf_fill_dflt_rss_lut(struct idpf_vport *vport)
+{
+	struct idpf_adapter *adapter = vport->adapter;
+	u16 num_active_rxq = vport->num_rxq;
+	struct idpf_rss_data *rss_data;
+	int i;
+
+	rss_data = &adapter->vport_config[vport->idx]->user_config.rss_data;
+
+	for (i = 0; i < rss_data->rss_lut_size; i++) {
+		rss_data->rss_lut[i] = i % num_active_rxq;
+		rss_data->cached_lut[i] = rss_data->rss_lut[i];
+	}
+}
+
+/**
+ * idpf_init_rss - Allocate and initialize RSS resources
+ * @vport: virtual port
+ *
+ * Return 0 on success, negative on failure
+ */
+int idpf_init_rss(struct idpf_vport *vport)
+{
+	struct idpf_adapter *adapter = vport->adapter;
+	struct idpf_rss_data *rss_data;
+	u32 lut_size;
+
+	rss_data = &adapter->vport_config[vport->idx]->user_config.rss_data;
+
+	lut_size = rss_data->rss_lut_size * sizeof(u32);
+	rss_data->rss_lut = kzalloc(lut_size, GFP_KERNEL);
+	if (!rss_data->rss_lut)
+		return -ENOMEM;
+
+	rss_data->cached_lut = kzalloc(lut_size, GFP_KERNEL);
+	if (!rss_data->cached_lut) {
+		kfree(rss_data->rss_lut);
+		rss_data->rss_lut = NULL;
+
+		return -ENOMEM;
+	}
+
+	/* Fill the default RSS lut values */
+	idpf_fill_dflt_rss_lut(vport);
+
+	return idpf_config_rss(vport);
+}
+
+/**
+ * idpf_deinit_rss - Release RSS resources
+ * @vport: virtual port
+ */
+void idpf_deinit_rss(struct idpf_vport *vport)
+{
+	struct idpf_adapter *adapter = vport->adapter;
+	struct idpf_rss_data *rss_data;
+
+	rss_data = &adapter->vport_config[vport->idx]->user_config.rss_data;
+	kfree(rss_data->cached_lut);
+	rss_data->cached_lut = NULL;
+	kfree(rss_data->rss_lut);
+	rss_data->rss_lut = NULL;
+}
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
new file mode 100644
index 000000000000..df76493faa75
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
@@ -0,0 +1,1023 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2023 Intel Corporation */
+
+#ifndef _IDPF_TXRX_H_
+#define _IDPF_TXRX_H_
+
+#include <net/page_pool/helpers.h>
+#include <net/tcp.h>
+#include <net/netdev_queues.h>
+
+#define IDPF_LARGE_MAX_Q			256
+#define IDPF_MAX_Q				16
+#define IDPF_MIN_Q				2
+/* Mailbox Queue */
+#define IDPF_MAX_MBXQ				1
+
+#define IDPF_MIN_TXQ_DESC			64
+#define IDPF_MIN_RXQ_DESC			64
+#define IDPF_MIN_TXQ_COMPLQ_DESC		256
+#define IDPF_MAX_QIDS				256
+
+/* Number of descriptors in a queue should be a multiple of 32. RX queue
+ * descriptors alone should be a multiple of IDPF_REQ_RXQ_DESC_MULTIPLE
+ * to achieve BufQ descriptors aligned to 32
+ */
+#define IDPF_REQ_DESC_MULTIPLE			32
+#define IDPF_REQ_RXQ_DESC_MULTIPLE (IDPF_MAX_BUFQS_PER_RXQ_GRP * 32)
+#define IDPF_MIN_TX_DESC_NEEDED (MAX_SKB_FRAGS + 6)
+#define IDPF_TX_WAKE_THRESH ((u16)IDPF_MIN_TX_DESC_NEEDED * 2)
+
+#define IDPF_MAX_DESCS				8160
+#define IDPF_MAX_TXQ_DESC ALIGN_DOWN(IDPF_MAX_DESCS, IDPF_REQ_DESC_MULTIPLE)
+#define IDPF_MAX_RXQ_DESC ALIGN_DOWN(IDPF_MAX_DESCS, IDPF_REQ_RXQ_DESC_MULTIPLE)
+#define MIN_SUPPORT_TXDID (\
+	VIRTCHNL2_TXDID_FLEX_FLOW_SCHED |\
+	VIRTCHNL2_TXDID_FLEX_TSO_CTX)
+
+#define IDPF_DFLT_SINGLEQ_TX_Q_GROUPS		1
+#define IDPF_DFLT_SINGLEQ_RX_Q_GROUPS		1
+#define IDPF_DFLT_SINGLEQ_TXQ_PER_GROUP		4
+#define IDPF_DFLT_SINGLEQ_RXQ_PER_GROUP		4
+
+#define IDPF_COMPLQ_PER_GROUP			1
+#define IDPF_SINGLE_BUFQ_PER_RXQ_GRP		1
+#define IDPF_MAX_BUFQS_PER_RXQ_GRP		2
+#define IDPF_BUFQ2_ENA				1
+#define IDPF_NUMQ_PER_CHUNK			1
+
+#define IDPF_DFLT_SPLITQ_TXQ_PER_GROUP		1
+#define IDPF_DFLT_SPLITQ_RXQ_PER_GROUP		1
+
+/* Default vector sharing */
+#define IDPF_MBX_Q_VEC		1
+#define IDPF_MIN_Q_VEC		1
+
+#define IDPF_DFLT_TX_Q_DESC_COUNT		512
+#define IDPF_DFLT_TX_COMPLQ_DESC_COUNT		512
+#define IDPF_DFLT_RX_Q_DESC_COUNT		512
+
+/* IMPORTANT: We absolutely _cannot_ have more buffers in the system than a
+ * given RX completion queue has descriptors. This includes _ALL_ buffer
+ * queues. E.g.: If you have two buffer queues of 512 descriptors and buffers,
+ * you have a total of 1024 buffers so your RX queue _must_ have at least that
+ * many descriptors. This macro divides a given number of RX descriptors by
+ * number of buffer queues to calculate how many descriptors each buffer queue
+ * can have without overrunning the RX queue.
+ *
+ * If you give hardware more buffers than completion descriptors what will
+ * happen is that if hardware gets a chance to post more than ring wrap of
+ * descriptors before SW gets an interrupt and overwrites SW head, the gen bit
+ * in the descriptor will be wrong. Any overwritten descriptors' buffers will
+ * be gone forever and SW has no reasonable way to tell that this has happened.
+ * From SW perspective, when we finally get an interrupt, it looks like we're
+ * still waiting for descriptor to be done, stalling forever.
+ */
+#define IDPF_RX_BUFQ_DESC_COUNT(RXD, NUM_BUFQ)	((RXD) / (NUM_BUFQ))
+
+#define IDPF_RX_BUFQ_WORKING_SET(rxq)		((rxq)->desc_count - 1)
+
+#define IDPF_RX_BUMP_NTC(rxq, ntc)				\
+do {								\
+	if (unlikely(++(ntc) == (rxq)->desc_count)) {		\
+		ntc = 0;					\
+		change_bit(__IDPF_Q_GEN_CHK, (rxq)->flags);	\
+	}							\
+} while (0)
+
+#define IDPF_SINGLEQ_BUMP_RING_IDX(q, idx)			\
+do {								\
+	if (unlikely(++(idx) == (q)->desc_count))		\
+		idx = 0;					\
+} while (0)
+
+#define IDPF_RX_HDR_SIZE			256
+#define IDPF_RX_BUF_2048			2048
+#define IDPF_RX_BUF_4096			4096
+#define IDPF_RX_BUF_STRIDE			32
+#define IDPF_RX_BUF_POST_STRIDE			16
+#define IDPF_LOW_WATERMARK			64
+/* Size of header buffer specifically for header split */
+#define IDPF_HDR_BUF_SIZE			256
+#define IDPF_PACKET_HDR_PAD	\
+	(ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN * 2)
+#define IDPF_TX_TSO_MIN_MSS			88
+
+/* Minimum number of descriptors between 2 descriptors with the RE bit set;
+ * only relevant in flow scheduling mode
+ */
+#define IDPF_TX_SPLITQ_RE_MIN_GAP	64
+
+#define IDPF_RX_BI_BUFID_S		0
+#define IDPF_RX_BI_BUFID_M		GENMASK(14, 0)
+#define IDPF_RX_BI_GEN_S		15
+#define IDPF_RX_BI_GEN_M		BIT(IDPF_RX_BI_GEN_S)
+#define IDPF_RXD_EOF_SPLITQ		VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_EOF_M
+#define IDPF_RXD_EOF_SINGLEQ		VIRTCHNL2_RX_BASE_DESC_STATUS_EOF_M
+
+#define IDPF_SINGLEQ_RX_BUF_DESC(rxq, i)	\
+	(&(((struct virtchnl2_singleq_rx_buf_desc *)((rxq)->desc_ring))[i]))
+#define IDPF_SPLITQ_RX_BUF_DESC(rxq, i)	\
+	(&(((struct virtchnl2_splitq_rx_buf_desc *)((rxq)->desc_ring))[i]))
+#define IDPF_SPLITQ_RX_BI_DESC(rxq, i) ((((rxq)->ring))[i])
+
+#define IDPF_BASE_TX_DESC(txq, i)	\
+	(&(((struct idpf_base_tx_desc *)((txq)->desc_ring))[i]))
+#define IDPF_BASE_TX_CTX_DESC(txq, i) \
+	(&(((struct idpf_base_tx_ctx_desc *)((txq)->desc_ring))[i]))
+#define IDPF_SPLITQ_TX_COMPLQ_DESC(txcq, i)	\
+	(&(((struct idpf_splitq_tx_compl_desc *)((txcq)->desc_ring))[i]))
+
+#define IDPF_FLEX_TX_DESC(txq, i) \
+	(&(((union idpf_tx_flex_desc *)((txq)->desc_ring))[i]))
+#define IDPF_FLEX_TX_CTX_DESC(txq, i)	\
+	(&(((struct idpf_flex_tx_ctx_desc *)((txq)->desc_ring))[i]))
+
+#define IDPF_DESC_UNUSED(txq)     \
+	((((txq)->next_to_clean > (txq)->next_to_use) ? 0 : (txq)->desc_count) + \
+	(txq)->next_to_clean - (txq)->next_to_use - 1)
+
+#define IDPF_TX_BUF_RSV_UNUSED(txq)	((txq)->buf_stack.top)
+#define IDPF_TX_BUF_RSV_LOW(txq)	(IDPF_TX_BUF_RSV_UNUSED(txq) < \
+					 (txq)->desc_count >> 2)
+
+#define IDPF_TX_COMPLQ_OVERFLOW_THRESH(txcq)	((txcq)->desc_count >> 1)
+/* Determine the absolute number of completions pending, i.e. the number of
+ * completions that are expected to arrive on the TX completion queue.
+ */
+#define IDPF_TX_COMPLQ_PENDING(txq)	\
+	(((txq)->num_completions_pending >= (txq)->complq->num_completions ? \
+	0 : U64_MAX) + \
+	(txq)->num_completions_pending - (txq)->complq->num_completions)
+
+#define IDPF_TX_SPLITQ_COMPL_TAG_WIDTH	16
+#define IDPF_SPLITQ_TX_INVAL_COMPL_TAG	-1
+/* Adjust the generation for the completion tag and wrap if necessary */
+#define IDPF_TX_ADJ_COMPL_TAG_GEN(txq) \
+	((++(txq)->compl_tag_cur_gen) >= (txq)->compl_tag_gen_max ? \
+	0 : (txq)->compl_tag_cur_gen)
+
+#define IDPF_TXD_LAST_DESC_CMD (IDPF_TX_DESC_CMD_EOP | IDPF_TX_DESC_CMD_RS)
+
+#define IDPF_TX_FLAGS_TSO		BIT(0)
+#define IDPF_TX_FLAGS_IPV4		BIT(1)
+#define IDPF_TX_FLAGS_IPV6		BIT(2)
+#define IDPF_TX_FLAGS_TUNNEL		BIT(3)
+
+union idpf_tx_flex_desc {
+	struct idpf_flex_tx_desc q; /* queue based scheduling */
+	struct idpf_flex_tx_sched_desc flow; /* flow based scheduling */
+};
+
+/**
+ * struct idpf_tx_buf
+ * @next_to_watch: Next descriptor to clean
+ * @skb: Pointer to the skb
+ * @dma: DMA address
+ * @len: DMA length
+ * @bytecount: Number of bytes
+ * @gso_segs: Number of GSO segments
+ * @compl_tag: Splitq only, unique identifier for a buffer. Used to compare
+ *	       with completion tag returned in buffer completion event.
+ *	       Because the completion tag is expected to be the same in all
+ *	       data descriptors for a given packet, and a single packet can
+ *	       span multiple buffers, we need this field to track all
+ *	       buffers associated with this completion tag independently of
+ *	       the buf_id. The tag consists of a N bit buf_id and M upper
+ *	       order "generation bits". See compl_tag_bufid_m and
+ *	       compl_tag_gen_s in struct idpf_queue. We'll use a value of -1
+ *	       to indicate the tag is not valid.
+ * @ctx_entry: Singleq only. Used to indicate the corresponding entry
+ *	       in the descriptor ring was used for a context descriptor and
+ *	       this buffer entry should be skipped.
+ */
+struct idpf_tx_buf {
+	void *next_to_watch;
+	struct sk_buff *skb;
+	DEFINE_DMA_UNMAP_ADDR(dma);
+	DEFINE_DMA_UNMAP_LEN(len);
+	unsigned int bytecount;
+	unsigned short gso_segs;
+
+	union {
+		int compl_tag;
+
+		bool ctx_entry;
+	};
+};
+
+struct idpf_tx_stash {
+	struct hlist_node hlist;
+	struct idpf_tx_buf buf;
+};
+
+/**
+ * struct idpf_buf_lifo - LIFO for managing OOO completions
+ * @top: Used to know how many buffers are left
+ * @size: Total size of LIFO
+ * @bufs: Backing array
+ */
+struct idpf_buf_lifo {
+	u16 top;
+	u16 size;
+	struct idpf_tx_stash **bufs;
+};
+
+/**
+ * struct idpf_tx_offload_params - Offload parameters for a given packet
+ * @tx_flags: Feature flags enabled for this packet
+ * @hdr_offsets: Offset parameter for single queue model
+ * @cd_tunneling: Type of tunneling enabled for single queue model
+ * @tso_len: Total length of payload to segment
+ * @mss: Segment size
+ * @tso_segs: Number of segments to be sent
+ * @tso_hdr_len: Length of headers to be duplicated
+ * @td_cmd: Command field to be inserted into descriptor
+ */
+struct idpf_tx_offload_params {
+	u32 tx_flags;
+
+	u32 hdr_offsets;
+	u32 cd_tunneling;
+
+	u32 tso_len;
+	u16 mss;
+	u16 tso_segs;
+	u16 tso_hdr_len;
+
+	u16 td_cmd;
+};
+
+/**
+ * struct idpf_tx_splitq_params
+ * @dtype: General descriptor info
+ * @eop_cmd: Type of EOP
+ * @compl_tag: Associated tag for completion
+ * @td_tag: Descriptor tunneling tag
+ * @offload: Offload parameters
+ */
+struct idpf_tx_splitq_params {
+	enum idpf_tx_desc_dtype_value dtype;
+	u16 eop_cmd;
+	union {
+		u16 compl_tag;
+		u16 td_tag;
+	};
+
+	struct idpf_tx_offload_params offload;
+};
+
+enum idpf_tx_ctx_desc_eipt_offload {
+	IDPF_TX_CTX_EXT_IP_NONE         = 0x0,
+	IDPF_TX_CTX_EXT_IP_IPV6         = 0x1,
+	IDPF_TX_CTX_EXT_IP_IPV4_NO_CSUM = 0x2,
+	IDPF_TX_CTX_EXT_IP_IPV4         = 0x3
+};
+
+/* Checksum offload bits decoded from the receive descriptor. */
+struct idpf_rx_csum_decoded {
+	u32 l3l4p : 1;
+	u32 ipe : 1;
+	u32 eipe : 1;
+	u32 eudpe : 1;
+	u32 ipv6exadd : 1;
+	u32 l4e : 1;
+	u32 pprs : 1;
+	u32 nat : 1;
+	u32 raw_csum_inv : 1;
+	u32 raw_csum : 16;
+};
+
+struct idpf_rx_extracted {
+	unsigned int size;
+	u16 rx_ptype;
+};
+
+#define IDPF_TX_COMPLQ_CLEAN_BUDGET	256
+#define IDPF_TX_MIN_PKT_LEN		17
+#define IDPF_TX_DESCS_FOR_SKB_DATA_PTR	1
+#define IDPF_TX_DESCS_PER_CACHE_LINE	(L1_CACHE_BYTES / \
+					 sizeof(struct idpf_flex_tx_desc))
+#define IDPF_TX_DESCS_FOR_CTX		1
+/* TX descriptors needed, worst case */
+#define IDPF_TX_DESC_NEEDED (MAX_SKB_FRAGS + IDPF_TX_DESCS_FOR_CTX + \
+			     IDPF_TX_DESCS_PER_CACHE_LINE + \
+			     IDPF_TX_DESCS_FOR_SKB_DATA_PTR)
+
+/* The size limit for a transmit buffer in a descriptor is (16K - 1).
+ * In order to align with the read requests we will align the value to
+ * the nearest 4K which represents our maximum read request size.
+ */
+#define IDPF_TX_MAX_READ_REQ_SIZE	SZ_4K
+#define IDPF_TX_MAX_DESC_DATA		(SZ_16K - 1)
+#define IDPF_TX_MAX_DESC_DATA_ALIGNED \
+	ALIGN_DOWN(IDPF_TX_MAX_DESC_DATA, IDPF_TX_MAX_READ_REQ_SIZE)
+
+#define IDPF_RX_DMA_ATTR \
+	(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
+#define IDPF_RX_DESC(rxq, i)	\
+	(&(((union virtchnl2_rx_desc *)((rxq)->desc_ring))[i]))
+
+struct idpf_rx_buf {
+	struct page *page;
+	unsigned int page_offset;
+	u16 truesize;
+};
+
+#define IDPF_RX_MAX_PTYPE_PROTO_IDS    32
+#define IDPF_RX_MAX_PTYPE_SZ	(sizeof(struct virtchnl2_ptype) + \
+				 (sizeof(u16) * IDPF_RX_MAX_PTYPE_PROTO_IDS))
+#define IDPF_RX_PTYPE_HDR_SZ	sizeof(struct virtchnl2_get_ptype_info)
+#define IDPF_RX_MAX_PTYPES_PER_BUF	\
+	DIV_ROUND_DOWN_ULL((IDPF_CTLQ_MAX_BUF_LEN - IDPF_RX_PTYPE_HDR_SZ), \
+			   IDPF_RX_MAX_PTYPE_SZ)
+
+#define IDPF_GET_PTYPE_SIZE(p) struct_size((p), proto_id, (p)->proto_id_count)
+
+#define IDPF_TUN_IP_GRE (\
+	IDPF_PTYPE_TUNNEL_IP |\
+	IDPF_PTYPE_TUNNEL_IP_GRENAT)
+
+#define IDPF_TUN_IP_GRE_MAC (\
+	IDPF_TUN_IP_GRE |\
+	IDPF_PTYPE_TUNNEL_IP_GRENAT_MAC)
+
+#define IDPF_RX_MAX_PTYPE	1024
+#define IDPF_RX_MAX_BASE_PTYPE	256
+#define IDPF_INVALID_PTYPE_ID	0xFFFF
+
+/* Packet type non-ip values */
+enum idpf_rx_ptype_l2 {
+	IDPF_RX_PTYPE_L2_RESERVED	= 0,
+	IDPF_RX_PTYPE_L2_MAC_PAY2	= 1,
+	IDPF_RX_PTYPE_L2_TIMESYNC_PAY2	= 2,
+	IDPF_RX_PTYPE_L2_FIP_PAY2	= 3,
+	IDPF_RX_PTYPE_L2_OUI_PAY2	= 4,
+	IDPF_RX_PTYPE_L2_MACCNTRL_PAY2	= 5,
+	IDPF_RX_PTYPE_L2_LLDP_PAY2	= 6,
+	IDPF_RX_PTYPE_L2_ECP_PAY2	= 7,
+	IDPF_RX_PTYPE_L2_EVB_PAY2	= 8,
+	IDPF_RX_PTYPE_L2_QCN_PAY2	= 9,
+	IDPF_RX_PTYPE_L2_EAPOL_PAY2	= 10,
+	IDPF_RX_PTYPE_L2_ARP		= 11,
+};
+
+enum idpf_rx_ptype_outer_ip {
+	IDPF_RX_PTYPE_OUTER_L2	= 0,
+	IDPF_RX_PTYPE_OUTER_IP	= 1,
+};
+
+#define IDPF_RX_PTYPE_TO_IPV(ptype, ipv)			\
+	(((ptype)->outer_ip == IDPF_RX_PTYPE_OUTER_IP) &&	\
+	 ((ptype)->outer_ip_ver == (ipv)))
+
+enum idpf_rx_ptype_outer_ip_ver {
+	IDPF_RX_PTYPE_OUTER_NONE	= 0,
+	IDPF_RX_PTYPE_OUTER_IPV4	= 1,
+	IDPF_RX_PTYPE_OUTER_IPV6	= 2,
+};
+
+enum idpf_rx_ptype_outer_fragmented {
+	IDPF_RX_PTYPE_NOT_FRAG	= 0,
+	IDPF_RX_PTYPE_FRAG	= 1,
+};
+
+enum idpf_rx_ptype_tunnel_type {
+	IDPF_RX_PTYPE_TUNNEL_NONE		= 0,
+	IDPF_RX_PTYPE_TUNNEL_IP_IP		= 1,
+	IDPF_RX_PTYPE_TUNNEL_IP_GRENAT		= 2,
+	IDPF_RX_PTYPE_TUNNEL_IP_GRENAT_MAC	= 3,
+	IDPF_RX_PTYPE_TUNNEL_IP_GRENAT_MAC_VLAN	= 4,
+};
+
+enum idpf_rx_ptype_tunnel_end_prot {
+	IDPF_RX_PTYPE_TUNNEL_END_NONE	= 0,
+	IDPF_RX_PTYPE_TUNNEL_END_IPV4	= 1,
+	IDPF_RX_PTYPE_TUNNEL_END_IPV6	= 2,
+};
+
+enum idpf_rx_ptype_inner_prot {
+	IDPF_RX_PTYPE_INNER_PROT_NONE		= 0,
+	IDPF_RX_PTYPE_INNER_PROT_UDP		= 1,
+	IDPF_RX_PTYPE_INNER_PROT_TCP		= 2,
+	IDPF_RX_PTYPE_INNER_PROT_SCTP		= 3,
+	IDPF_RX_PTYPE_INNER_PROT_ICMP		= 4,
+	IDPF_RX_PTYPE_INNER_PROT_TIMESYNC	= 5,
+};
+
+enum idpf_rx_ptype_payload_layer {
+	IDPF_RX_PTYPE_PAYLOAD_LAYER_NONE	= 0,
+	IDPF_RX_PTYPE_PAYLOAD_LAYER_PAY2	= 1,
+	IDPF_RX_PTYPE_PAYLOAD_LAYER_PAY3	= 2,
+	IDPF_RX_PTYPE_PAYLOAD_LAYER_PAY4	= 3,
+};
+
+enum idpf_tunnel_state {
+	IDPF_PTYPE_TUNNEL_IP                    = BIT(0),
+	IDPF_PTYPE_TUNNEL_IP_GRENAT             = BIT(1),
+	IDPF_PTYPE_TUNNEL_IP_GRENAT_MAC         = BIT(2),
+};
+
+struct idpf_ptype_state {
+	bool outer_ip;
+	bool outer_frag;
+	u8 tunnel_state;
+};
+
+struct idpf_rx_ptype_decoded {
+	u32 ptype:10;
+	u32 known:1;
+	u32 outer_ip:1;
+	u32 outer_ip_ver:2;
+	u32 outer_frag:1;
+	u32 tunnel_type:3;
+	u32 tunnel_end_prot:2;
+	u32 tunnel_end_frag:1;
+	u32 inner_prot:4;
+	u32 payload_layer:3;
+};
+
+/**
+ * enum idpf_queue_flags_t
+ * @__IDPF_Q_GEN_CHK: Queues operating in splitq mode use a generation bit to
+ *		      identify new descriptor writebacks on the ring. HW sets
+ *		      the gen bit to 1 on the first writeback of any given
+ *		      descriptor. After the ring wraps, HW sets the gen bit of
+ *		      those descriptors to 0, and continues flipping
+ *		      0->1 or 1->0 on each ring wrap. SW maintains its own
+ *		      gen bit to know what value will indicate writebacks on
+ *		      the next pass around the ring. E.g. it is initialized
+ *		      to 1 and knows that reading a gen bit of 1 in any
+ *		      descriptor on the initial pass of the ring indicates a
+ *		      writeback. It also flips on every ring wrap.
+ * @__IDPF_RFLQ_GEN_CHK: Refill queues are SW only, so Q_GEN acts as the HW bit
+ *			 and RFLGQ_GEN is the SW bit.
+ * @__IDPF_Q_FLOW_SCH_EN: Enable flow scheduling
+ * @__IDPF_Q_SW_MARKER: Used to indicate TX queue marker completions
+ * @__IDPF_Q_POLL_MODE: Enable poll mode
+ * @__IDPF_Q_FLAGS_NBITS: Must be last
+ */
+enum idpf_queue_flags_t {
+	__IDPF_Q_GEN_CHK,
+	__IDPF_RFLQ_GEN_CHK,
+	__IDPF_Q_FLOW_SCH_EN,
+	__IDPF_Q_SW_MARKER,
+	__IDPF_Q_POLL_MODE,
+
+	__IDPF_Q_FLAGS_NBITS,
+};
+
+/**
+ * struct idpf_vec_regs
+ * @dyn_ctl_reg: Dynamic control interrupt register offset
+ * @itrn_reg: Interrupt Throttling Rate register offset
+ * @itrn_index_spacing: Register spacing between ITR registers of the same
+ *			vector
+ */
+struct idpf_vec_regs {
+	u32 dyn_ctl_reg;
+	u32 itrn_reg;
+	u32 itrn_index_spacing;
+};
+
+/**
+ * struct idpf_intr_reg
+ * @dyn_ctl: Dynamic control interrupt register
+ * @dyn_ctl_intena_m: Mask for dyn_ctl interrupt enable
+ * @dyn_ctl_itridx_s: Register bit offset for ITR index
+ * @dyn_ctl_itridx_m: Mask for ITR index
+ * @dyn_ctl_intrvl_s: Register bit offset for ITR interval
+ * @rx_itr: RX ITR register
+ * @tx_itr: TX ITR register
+ * @icr_ena: Interrupt cause register offset
+ * @icr_ena_ctlq_m: Mask for ICR
+ */
+struct idpf_intr_reg {
+	void __iomem *dyn_ctl;
+	u32 dyn_ctl_intena_m;
+	u32 dyn_ctl_itridx_s;
+	u32 dyn_ctl_itridx_m;
+	u32 dyn_ctl_intrvl_s;
+	void __iomem *rx_itr;
+	void __iomem *tx_itr;
+	void __iomem *icr_ena;
+	u32 icr_ena_ctlq_m;
+};
+
+/**
+ * struct idpf_q_vector
+ * @vport: Vport back pointer
+ * @affinity_mask: CPU affinity mask
+ * @napi: napi handler
+ * @v_idx: Vector index
+ * @intr_reg: See struct idpf_intr_reg
+ * @num_txq: Number of TX queues
+ * @tx: Array of TX queues to service
+ * @tx_dim: Data for TX net_dim algorithm
+ * @tx_itr_value: TX interrupt throttling rate
+ * @tx_intr_mode: Dynamic ITR or not
+ * @tx_itr_idx: TX ITR index
+ * @num_rxq: Number of RX queues
+ * @rx: Array of RX queues to service
+ * @rx_dim: Data for RX net_dim algorithm
+ * @rx_itr_value: RX interrupt throttling rate
+ * @rx_intr_mode: Dynamic ITR or not
+ * @rx_itr_idx: RX ITR index
+ * @num_bufq: Number of buffer queues
+ * @bufq: Array of buffer queues to service
+ * @total_events: Number of interrupts processed
+ * @name: Queue vector name
+ */
+struct idpf_q_vector {
+	struct idpf_vport *vport;
+	cpumask_t affinity_mask;
+	struct napi_struct napi;
+	u16 v_idx;
+	struct idpf_intr_reg intr_reg;
+
+	u16 num_txq;
+	struct idpf_queue **tx;
+	struct dim tx_dim;
+	u16 tx_itr_value;
+	bool tx_intr_mode;
+	u32 tx_itr_idx;
+
+	u16 num_rxq;
+	struct idpf_queue **rx;
+	struct dim rx_dim;
+	u16 rx_itr_value;
+	bool rx_intr_mode;
+	u32 rx_itr_idx;
+
+	u16 num_bufq;
+	struct idpf_queue **bufq;
+
+	u16 total_events;
+	char *name;
+};
+
+struct idpf_rx_queue_stats {
+	u64_stats_t packets;
+	u64_stats_t bytes;
+	u64_stats_t rsc_pkts;
+	u64_stats_t hw_csum_err;
+	u64_stats_t hsplit_pkts;
+	u64_stats_t hsplit_buf_ovf;
+	u64_stats_t bad_descs;
+};
+
+struct idpf_tx_queue_stats {
+	u64_stats_t packets;
+	u64_stats_t bytes;
+	u64_stats_t lso_pkts;
+	u64_stats_t linearize;
+	u64_stats_t q_busy;
+	u64_stats_t skb_drops;
+	u64_stats_t dma_map_errs;
+};
+
+struct idpf_cleaned_stats {
+	u32 packets;
+	u32 bytes;
+};
+
+union idpf_queue_stats {
+	struct idpf_rx_queue_stats rx;
+	struct idpf_tx_queue_stats tx;
+};
+
+#define IDPF_ITR_DYNAMIC	1
+#define IDPF_ITR_MAX		0x1FE0
+#define IDPF_ITR_20K		0x0032
+#define IDPF_ITR_GRAN_S		1	/* Assume ITR granularity is 2us */
+#define IDPF_ITR_MASK		0x1FFE  /* ITR register value alignment mask */
+#define ITR_REG_ALIGN(setting)	((setting) & IDPF_ITR_MASK)
+#define IDPF_ITR_IS_DYNAMIC(itr_mode) (itr_mode)
+#define IDPF_ITR_TX_DEF		IDPF_ITR_20K
+#define IDPF_ITR_RX_DEF		IDPF_ITR_20K
+/* Index used for 'No ITR' update in DYN_CTL register */
+#define IDPF_NO_ITR_UPDATE_IDX	3
+#define IDPF_ITR_IDX_SPACING(spacing, dflt)	(spacing ? spacing : dflt)
+#define IDPF_DIM_DEFAULT_PROFILE_IX		1
+
+/**
+ * struct idpf_queue
+ * @dev: Device back pointer for DMA mapping
+ * @vport: Back pointer to associated vport
+ * @txq_grp: See struct idpf_txq_group
+ * @rxq_grp: See struct idpf_rxq_group
+ * @idx: For buffer queue, it is used as group id, either 0 or 1. On clean,
+ *	 buffer queue uses this index to determine which group of refill queues
+ *	 to clean.
+ *	 For TX queue, it is used as index to map between TX queue group and
+ *	 hot path TX pointers stored in vport. Used in both singleq/splitq.
+ *	 For RX queue, it is used to index to total RX queue across groups and
+ *	 used for skb reporting.
+ * @tail: Tail offset. Used for both queue models single and split. In splitq
+ *	  model relevant only for TX queue and RX queue.
+ * @tx_buf: See struct idpf_tx_buf
+ * @rx_buf: Struct with RX buffer related members
+ * @rx_buf.buf: See struct idpf_rx_buf
+ * @rx_buf.hdr_buf_pa: DMA handle
+ * @rx_buf.hdr_buf_va: Virtual address
+ * @pp: Page pool pointer
+ * @skb: Pointer to the skb
+ * @q_type: Queue type (TX, RX, TX completion, RX buffer)
+ * @q_id: Queue id
+ * @desc_count: Number of descriptors
+ * @next_to_use: Next descriptor to use. Relevant in both split & single txq
+ *		 and bufq.
+ * @next_to_clean: Next descriptor to clean. In split queue model, only
+ *		   relevant to TX completion queue and RX queue.
+ * @next_to_alloc: RX buffer to allocate at. Used only for RX. In splitq model
+ *		   only relevant to RX queue.
+ * @flags: See enum idpf_queue_flags_t
+ * @q_stats: See union idpf_queue_stats
+ * @stats_sync: See struct u64_stats_sync
+ * @cleaned_bytes: Splitq only, TXQ only: When a TX completion is received on
+ *		   the TX completion queue, it can be for any TXQ associated
+ *		   with that completion queue. This means we can clean up to
+ *		   N TXQs during a single call to clean the completion queue.
+ *		   cleaned_bytes|pkts tracks the clean stats per TXQ during
+ *		   that single call to clean the completion queue. By doing so,
+ *		   we can update BQL with aggregate cleaned stats for each TXQ
+ *		   only once at the end of the cleaning routine.
+ * @cleaned_pkts: Number of packets cleaned for the above said case
+ * @rx_hsplit_en: RX headsplit enable
+ * @rx_hbuf_size: Header buffer size
+ * @rx_buf_size: Buffer size
+ * @rx_max_pkt_size: RX max packet size
+ * @rx_buf_stride: RX buffer stride
+ * @rx_buffer_low_watermark: RX buffer low watermark
+ * @rxdids: Supported RX descriptor ids
+ * @q_vector: Backreference to associated vector
+ * @size: Length of descriptor ring in bytes
+ * @dma: Physical address of ring
+ * @desc_ring: Descriptor ring memory
+ * @tx_max_bufs: Max buffers that can be transmitted with scatter-gather
+ * @tx_min_pkt_len: Min supported packet length
+ * @num_completions: Only relevant for TX completion queue. It tracks the
+ *		     number of completions received to compare against the
+ *		     number of completions pending, as accumulated by the
+ *		     TX queues.
+ * @buf_stack: Stack of empty buffers to store buffer info for out of order
+ *	       buffer completions. See struct idpf_buf_lifo.
+ * @compl_tag_bufid_m: Completion tag buffer id mask
+ * @compl_tag_gen_s: Completion tag generation bit
+ *	The format of the completion tag will change based on the TXQ
+ *	descriptor ring size so that we can maintain roughly the same level
+ *	of "uniqueness" across all descriptor sizes. For example, if the
+ *	TXQ descriptor ring size is 64 (the minimum size supported), the
+ *	completion tag will be formatted as below:
+ *	15                 6 5         0
+ *	--------------------------------
+ *	|    GEN=0-1023     |IDX = 0-63|
+ *	--------------------------------
+ *
+ *	This gives us 64*1024 = 65536 possible unique values. Similarly, if
+ *	the TXQ descriptor ring size is 8160 (the maximum size supported),
+ *	the completion tag will be formatted as below:
+ *	15 13 12                       0
+ *	--------------------------------
+ *	|GEN |       IDX = 0-8159      |
+ *	--------------------------------
+ *
+ *	This gives us 8*8160 = 65280 possible unique values.
+ * @compl_tag_cur_gen: Used to keep track of current completion tag generation
+ * @compl_tag_gen_max: To determine when compl_tag_cur_gen should be reset
+ * @sched_buf_hash: Hash table to stores buffers
+ */
+struct idpf_queue {
+	struct device *dev;
+	struct idpf_vport *vport;
+	union {
+		struct idpf_txq_group *txq_grp;
+		struct idpf_rxq_group *rxq_grp;
+	};
+	u16 idx;
+	void __iomem *tail;
+	union {
+		struct idpf_tx_buf *tx_buf;
+		struct {
+			struct idpf_rx_buf *buf;
+			dma_addr_t hdr_buf_pa;
+			void *hdr_buf_va;
+		} rx_buf;
+	};
+	struct page_pool *pp;
+	struct sk_buff *skb;
+	u16 q_type;
+	u32 q_id;
+	u16 desc_count;
+
+	u16 next_to_use;
+	u16 next_to_clean;
+	u16 next_to_alloc;
+	DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS);
+
+	union idpf_queue_stats q_stats;
+	struct u64_stats_sync stats_sync;
+
+	u32 cleaned_bytes;
+	u16 cleaned_pkts;
+
+	bool rx_hsplit_en;
+	u16 rx_hbuf_size;
+	u16 rx_buf_size;
+	u16 rx_max_pkt_size;
+	u16 rx_buf_stride;
+	u8 rx_buffer_low_watermark;
+	u64 rxdids;
+	struct idpf_q_vector *q_vector;
+	unsigned int size;
+	dma_addr_t dma;
+	void *desc_ring;
+
+	u16 tx_max_bufs;
+	u8 tx_min_pkt_len;
+
+	u32 num_completions;
+
+	struct idpf_buf_lifo buf_stack;
+
+	u16 compl_tag_bufid_m;
+	u16 compl_tag_gen_s;
+
+	u16 compl_tag_cur_gen;
+	u16 compl_tag_gen_max;
+
+	DECLARE_HASHTABLE(sched_buf_hash, 12);
+} ____cacheline_internodealigned_in_smp;
+
+/**
+ * struct idpf_sw_queue
+ * @next_to_clean: Next descriptor to clean
+ * @next_to_alloc: Buffer to allocate at
+ * @flags: See enum idpf_queue_flags_t
+ * @ring: Pointer to the ring
+ * @desc_count: Descriptor count
+ * @dev: Device back pointer for DMA mapping
+ *
+ * Software queues are used in splitq mode to manage buffers between rxq
+ * producer and the bufq consumer.  These are required in order to maintain a
+ * lockless buffer management system and are strictly software only constructs.
+ */
+struct idpf_sw_queue {
+	u16 next_to_clean;
+	u16 next_to_alloc;
+	DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS);
+	u16 *ring;
+	u16 desc_count;
+	struct device *dev;
+} ____cacheline_internodealigned_in_smp;
+
+/**
+ * struct idpf_rxq_set
+ * @rxq: RX queue
+ * @refillq0: Pointer to refill queue 0
+ * @refillq1: Pointer to refill queue 1
+ *
+ * Splitq only.  idpf_rxq_set associates an rxq with at an array of refillqs.
+ * Each rxq needs a refillq to return used buffers back to the respective bufq.
+ * Bufqs then clean these refillqs for buffers to give to hardware.
+ */
+struct idpf_rxq_set {
+	struct idpf_queue rxq;
+	struct idpf_sw_queue *refillq0;
+	struct idpf_sw_queue *refillq1;
+};
+
+/**
+ * struct idpf_bufq_set
+ * @bufq: Buffer queue
+ * @num_refillqs: Number of refill queues. This is always equal to num_rxq_sets
+ *		  in idpf_rxq_group.
+ * @refillqs: Pointer to refill queues array.
+ *
+ * Splitq only. idpf_bufq_set associates a bufq to an array of refillqs.
+ * In this bufq_set, there will be one refillq for each rxq in this rxq_group.
+ * Used buffers received by rxqs will be put on refillqs which bufqs will
+ * clean to return new buffers back to hardware.
+ *
+ * Buffers needed by some number of rxqs associated in this rxq_group are
+ * managed by at most two bufqs (depending on performance configuration).
+ */
+struct idpf_bufq_set {
+	struct idpf_queue bufq;
+	int num_refillqs;
+	struct idpf_sw_queue *refillqs;
+};
+
+/**
+ * struct idpf_rxq_group
+ * @vport: Vport back pointer
+ * @singleq: Struct with single queue related members
+ * @singleq.num_rxq: Number of RX queues associated
+ * @singleq.rxqs: Array of RX queue pointers
+ * @splitq: Struct with split queue related members
+ * @splitq.num_rxq_sets: Number of RX queue sets
+ * @splitq.rxq_sets: Array of RX queue sets
+ * @splitq.bufq_sets: Buffer queue set pointer
+ *
+ * In singleq mode, an rxq_group is simply an array of rxqs.  In splitq, a
+ * rxq_group contains all the rxqs, bufqs and refillqs needed to
+ * manage buffers in splitq mode.
+ */
+struct idpf_rxq_group {
+	struct idpf_vport *vport;
+
+	union {
+		struct {
+			u16 num_rxq;
+			struct idpf_queue *rxqs[IDPF_LARGE_MAX_Q];
+		} singleq;
+		struct {
+			u16 num_rxq_sets;
+			struct idpf_rxq_set *rxq_sets[IDPF_LARGE_MAX_Q];
+			struct idpf_bufq_set *bufq_sets;
+		} splitq;
+	};
+};
+
+/**
+ * struct idpf_txq_group
+ * @vport: Vport back pointer
+ * @num_txq: Number of TX queues associated
+ * @txqs: Array of TX queue pointers
+ * @complq: Associated completion queue pointer, split queue only
+ * @num_completions_pending: Total number of completions pending for the
+ *			     completion queue, acculumated for all TX queues
+ *			     associated with that completion queue.
+ *
+ * Between singleq and splitq, a txq_group is largely the same except for the
+ * complq. In splitq a single complq is responsible for handling completions
+ * for some number of txqs associated in this txq_group.
+ */
+struct idpf_txq_group {
+	struct idpf_vport *vport;
+
+	u16 num_txq;
+	struct idpf_queue *txqs[IDPF_LARGE_MAX_Q];
+
+	struct idpf_queue *complq;
+
+	u32 num_completions_pending;
+};
+
+/**
+ * idpf_size_to_txd_count - Get number of descriptors needed for large Tx frag
+ * @size: transmit request size in bytes
+ *
+ * In the case where a large frag (>= 16K) needs to be split across multiple
+ * descriptors, we need to assume that we can have no more than 12K of data
+ * per descriptor due to hardware alignment restrictions (4K alignment).
+ */
+static inline u32 idpf_size_to_txd_count(unsigned int size)
+{
+	return DIV_ROUND_UP(size, IDPF_TX_MAX_DESC_DATA_ALIGNED);
+}
+
+/**
+ * idpf_tx_singleq_build_ctob - populate command tag offset and size
+ * @td_cmd: Command to be filled in desc
+ * @td_offset: Offset to be filled in desc
+ * @size: Size of the buffer
+ * @td_tag: td tag to be filled
+ *
+ * Returns the 64 bit value populated with the input parameters
+ */
+static inline __le64 idpf_tx_singleq_build_ctob(u64 td_cmd, u64 td_offset,
+						unsigned int size, u64 td_tag)
+{
+	return cpu_to_le64(IDPF_TX_DESC_DTYPE_DATA |
+			   (td_cmd << IDPF_TXD_QW1_CMD_S) |
+			   (td_offset << IDPF_TXD_QW1_OFFSET_S) |
+			   ((u64)size << IDPF_TXD_QW1_TX_BUF_SZ_S) |
+			   (td_tag << IDPF_TXD_QW1_L2TAG1_S));
+}
+
+void idpf_tx_splitq_build_ctb(union idpf_tx_flex_desc *desc,
+			      struct idpf_tx_splitq_params *params,
+			      u16 td_cmd, u16 size);
+void idpf_tx_splitq_build_flow_desc(union idpf_tx_flex_desc *desc,
+				    struct idpf_tx_splitq_params *params,
+				    u16 td_cmd, u16 size);
+/**
+ * idpf_tx_splitq_build_desc - determine which type of data descriptor to build
+ * @desc: descriptor to populate
+ * @params: pointer to tx params struct
+ * @td_cmd: command to be filled in desc
+ * @size: size of buffer
+ */
+static inline void idpf_tx_splitq_build_desc(union idpf_tx_flex_desc *desc,
+					     struct idpf_tx_splitq_params *params,
+					     u16 td_cmd, u16 size)
+{
+	if (params->dtype == IDPF_TX_DESC_DTYPE_FLEX_L2TAG1_L2TAG2)
+		idpf_tx_splitq_build_ctb(desc, params, td_cmd, size);
+	else
+		idpf_tx_splitq_build_flow_desc(desc, params, td_cmd, size);
+}
+
+/**
+ * idpf_alloc_page - Allocate a new RX buffer from the page pool
+ * @pool: page_pool to allocate from
+ * @buf: metadata struct to populate with page info
+ * @buf_size: 2K or 4K
+ *
+ * Returns &dma_addr_t to be passed to HW for Rx, %DMA_MAPPING_ERROR otherwise.
+ */
+static inline dma_addr_t idpf_alloc_page(struct page_pool *pool,
+					 struct idpf_rx_buf *buf,
+					 unsigned int buf_size)
+{
+	if (buf_size == IDPF_RX_BUF_2048)
+		buf->page = page_pool_dev_alloc_frag(pool, &buf->page_offset,
+						     buf_size);
+	else
+		buf->page = page_pool_dev_alloc_pages(pool);
+
+	if (!buf->page)
+		return DMA_MAPPING_ERROR;
+
+	buf->truesize = buf_size;
+
+	return page_pool_get_dma_addr(buf->page) + buf->page_offset +
+	       pool->p.offset;
+}
+
+/**
+ * idpf_rx_put_page - Return RX buffer page to pool
+ * @rx_buf: RX buffer metadata struct
+ */
+static inline void idpf_rx_put_page(struct idpf_rx_buf *rx_buf)
+{
+	page_pool_put_page(rx_buf->page->pp, rx_buf->page,
+			   rx_buf->truesize, true);
+	rx_buf->page = NULL;
+}
+
+/**
+ * idpf_rx_sync_for_cpu - Synchronize DMA buffer
+ * @rx_buf: RX buffer metadata struct
+ * @len: frame length from descriptor
+ */
+static inline void idpf_rx_sync_for_cpu(struct idpf_rx_buf *rx_buf, u32 len)
+{
+	struct page *page = rx_buf->page;
+	struct page_pool *pp = page->pp;
+
+	dma_sync_single_range_for_cpu(pp->p.dev,
+				      page_pool_get_dma_addr(page),
+				      rx_buf->page_offset + pp->p.offset, len,
+				      page_pool_get_dma_dir(pp));
+}
+
+int idpf_vport_singleq_napi_poll(struct napi_struct *napi, int budget);
+void idpf_vport_init_num_qs(struct idpf_vport *vport,
+			    struct virtchnl2_create_vport *vport_msg);
+void idpf_vport_calc_num_q_desc(struct idpf_vport *vport);
+int idpf_vport_calc_total_qs(struct idpf_adapter *adapter, u16 vport_index,
+			     struct virtchnl2_create_vport *vport_msg,
+			     struct idpf_vport_max_q *max_q);
+void idpf_vport_calc_num_q_groups(struct idpf_vport *vport);
+int idpf_vport_queues_alloc(struct idpf_vport *vport);
+void idpf_vport_queues_rel(struct idpf_vport *vport);
+void idpf_vport_intr_rel(struct idpf_vport *vport);
+int idpf_vport_intr_alloc(struct idpf_vport *vport);
+void idpf_vport_intr_update_itr_ena_irq(struct idpf_q_vector *q_vector);
+void idpf_vport_intr_deinit(struct idpf_vport *vport);
+int idpf_vport_intr_init(struct idpf_vport *vport);
+enum pkt_hash_types idpf_ptype_to_htype(const struct idpf_rx_ptype_decoded *decoded);
+int idpf_config_rss(struct idpf_vport *vport);
+int idpf_init_rss(struct idpf_vport *vport);
+void idpf_deinit_rss(struct idpf_vport *vport);
+int idpf_rx_bufs_init_all(struct idpf_vport *vport);
+void idpf_rx_add_frag(struct idpf_rx_buf *rx_buf, struct sk_buff *skb,
+		      unsigned int size);
+struct sk_buff *idpf_rx_construct_skb(struct idpf_queue *rxq,
+				      struct idpf_rx_buf *rx_buf,
+				      unsigned int size);
+bool idpf_init_rx_buf_hw_alloc(struct idpf_queue *rxq, struct idpf_rx_buf *buf);
+void idpf_rx_buf_hw_update(struct idpf_queue *rxq, u32 val);
+void idpf_tx_buf_hw_update(struct idpf_queue *tx_q, u32 val,
+			   bool xmit_more);
+unsigned int idpf_size_to_txd_count(unsigned int size);
+netdev_tx_t idpf_tx_drop_skb(struct idpf_queue *tx_q, struct sk_buff *skb);
+void idpf_tx_dma_map_error(struct idpf_queue *txq, struct sk_buff *skb,
+			   struct idpf_tx_buf *first, u16 ring_idx);
+unsigned int idpf_tx_desc_count_required(struct idpf_queue *txq,
+					 struct sk_buff *skb);
+bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs,
+			unsigned int count);
+int idpf_tx_maybe_stop_common(struct idpf_queue *tx_q, unsigned int size);
+void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue);
+netdev_tx_t idpf_tx_splitq_start(struct sk_buff *skb,
+				 struct net_device *netdev);
+netdev_tx_t idpf_tx_singleq_start(struct sk_buff *skb,
+				  struct net_device *netdev);
+bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rxq,
+				      u16 cleaned_count);
+int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off);
+
+#endif /* !_IDPF_TXRX_H_ */
diff --git a/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c b/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c
new file mode 100644
index 000000000000..8ade4e3a9fe1
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c
@@ -0,0 +1,163 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2023 Intel Corporation */
+
+#include "idpf.h"
+#include "idpf_lan_vf_regs.h"
+
+#define IDPF_VF_ITR_IDX_SPACING		0x40
+
+/**
+ * idpf_vf_ctlq_reg_init - initialize default mailbox registers
+ * @cq: pointer to the array of create control queues
+ */
+static void idpf_vf_ctlq_reg_init(struct idpf_ctlq_create_info *cq)
+{
+	int i;
+
+	for (i = 0; i < IDPF_NUM_DFLT_MBX_Q; i++) {
+		struct idpf_ctlq_create_info *ccq = cq + i;
+
+		switch (ccq->type) {
+		case IDPF_CTLQ_TYPE_MAILBOX_TX:
+			/* set head and tail registers in our local struct */
+			ccq->reg.head = VF_ATQH;
+			ccq->reg.tail = VF_ATQT;
+			ccq->reg.len = VF_ATQLEN;
+			ccq->reg.bah = VF_ATQBAH;
+			ccq->reg.bal = VF_ATQBAL;
+			ccq->reg.len_mask = VF_ATQLEN_ATQLEN_M;
+			ccq->reg.len_ena_mask = VF_ATQLEN_ATQENABLE_M;
+			ccq->reg.head_mask = VF_ATQH_ATQH_M;
+			break;
+		case IDPF_CTLQ_TYPE_MAILBOX_RX:
+			/* set head and tail registers in our local struct */
+			ccq->reg.head = VF_ARQH;
+			ccq->reg.tail = VF_ARQT;
+			ccq->reg.len = VF_ARQLEN;
+			ccq->reg.bah = VF_ARQBAH;
+			ccq->reg.bal = VF_ARQBAL;
+			ccq->reg.len_mask = VF_ARQLEN_ARQLEN_M;
+			ccq->reg.len_ena_mask = VF_ARQLEN_ARQENABLE_M;
+			ccq->reg.head_mask = VF_ARQH_ARQH_M;
+			break;
+		default:
+			break;
+		}
+	}
+}
+
+/**
+ * idpf_vf_mb_intr_reg_init - Initialize the mailbox register
+ * @adapter: adapter structure
+ */
+static void idpf_vf_mb_intr_reg_init(struct idpf_adapter *adapter)
+{
+	struct idpf_intr_reg *intr = &adapter->mb_vector.intr_reg;
+	u32 dyn_ctl = le32_to_cpu(adapter->caps.mailbox_dyn_ctl);
+
+	intr->dyn_ctl = idpf_get_reg_addr(adapter, dyn_ctl);
+	intr->dyn_ctl_intena_m = VF_INT_DYN_CTL0_INTENA_M;
+	intr->dyn_ctl_itridx_m = VF_INT_DYN_CTL0_ITR_INDX_M;
+	intr->icr_ena = idpf_get_reg_addr(adapter, VF_INT_ICR0_ENA1);
+	intr->icr_ena_ctlq_m = VF_INT_ICR0_ENA1_ADMINQ_M;
+}
+
+/**
+ * idpf_vf_intr_reg_init - Initialize interrupt registers
+ * @vport: virtual port structure
+ */
+static int idpf_vf_intr_reg_init(struct idpf_vport *vport)
+{
+	struct idpf_adapter *adapter = vport->adapter;
+	int num_vecs = vport->num_q_vectors;
+	struct idpf_vec_regs *reg_vals;
+	int num_regs, i, err = 0;
+	u32 rx_itr, tx_itr;
+	u16 total_vecs;
+
+	total_vecs = idpf_get_reserved_vecs(vport->adapter);
+	reg_vals = kcalloc(total_vecs, sizeof(struct idpf_vec_regs),
+			   GFP_KERNEL);
+	if (!reg_vals)
+		return -ENOMEM;
+
+	num_regs = idpf_get_reg_intr_vecs(vport, reg_vals);
+	if (num_regs < num_vecs) {
+		err = -EINVAL;
+		goto free_reg_vals;
+	}
+
+	for (i = 0; i < num_vecs; i++) {
+		struct idpf_q_vector *q_vector = &vport->q_vectors[i];
+		u16 vec_id = vport->q_vector_idxs[i] - IDPF_MBX_Q_VEC;
+		struct idpf_intr_reg *intr = &q_vector->intr_reg;
+		u32 spacing;
+
+		intr->dyn_ctl = idpf_get_reg_addr(adapter,
+						  reg_vals[vec_id].dyn_ctl_reg);
+		intr->dyn_ctl_intena_m = VF_INT_DYN_CTLN_INTENA_M;
+		intr->dyn_ctl_itridx_s = VF_INT_DYN_CTLN_ITR_INDX_S;
+
+		spacing = IDPF_ITR_IDX_SPACING(reg_vals[vec_id].itrn_index_spacing,
+					       IDPF_VF_ITR_IDX_SPACING);
+		rx_itr = VF_INT_ITRN_ADDR(VIRTCHNL2_ITR_IDX_0,
+					  reg_vals[vec_id].itrn_reg,
+					  spacing);
+		tx_itr = VF_INT_ITRN_ADDR(VIRTCHNL2_ITR_IDX_1,
+					  reg_vals[vec_id].itrn_reg,
+					  spacing);
+		intr->rx_itr = idpf_get_reg_addr(adapter, rx_itr);
+		intr->tx_itr = idpf_get_reg_addr(adapter, tx_itr);
+	}
+
+free_reg_vals:
+	kfree(reg_vals);
+
+	return err;
+}
+
+/**
+ * idpf_vf_reset_reg_init - Initialize reset registers
+ * @adapter: Driver specific private structure
+ */
+static void idpf_vf_reset_reg_init(struct idpf_adapter *adapter)
+{
+	adapter->reset_reg.rstat = idpf_get_reg_addr(adapter, VFGEN_RSTAT);
+	adapter->reset_reg.rstat_m = VFGEN_RSTAT_VFR_STATE_M;
+}
+
+/**
+ * idpf_vf_trigger_reset - trigger reset
+ * @adapter: Driver specific private structure
+ * @trig_cause: Reason to trigger a reset
+ */
+static void idpf_vf_trigger_reset(struct idpf_adapter *adapter,
+				  enum idpf_flags trig_cause)
+{
+	/* Do not send VIRTCHNL2_OP_RESET_VF message on driver unload */
+	if (trig_cause == IDPF_HR_FUNC_RESET &&
+	    !test_bit(IDPF_REMOVE_IN_PROG, adapter->flags))
+		idpf_send_mb_msg(adapter, VIRTCHNL2_OP_RESET_VF, 0, NULL);
+}
+
+/**
+ * idpf_vf_reg_ops_init - Initialize register API function pointers
+ * @adapter: Driver specific private structure
+ */
+static void idpf_vf_reg_ops_init(struct idpf_adapter *adapter)
+{
+	adapter->dev_ops.reg_ops.ctlq_reg_init = idpf_vf_ctlq_reg_init;
+	adapter->dev_ops.reg_ops.intr_reg_init = idpf_vf_intr_reg_init;
+	adapter->dev_ops.reg_ops.mb_intr_reg_init = idpf_vf_mb_intr_reg_init;
+	adapter->dev_ops.reg_ops.reset_reg_init = idpf_vf_reset_reg_init;
+	adapter->dev_ops.reg_ops.trigger_reset = idpf_vf_trigger_reset;
+}
+
+/**
+ * idpf_vf_dev_ops_init - Initialize device API function pointers
+ * @adapter: Driver specific private structure
+ */
+void idpf_vf_dev_ops_init(struct idpf_adapter *adapter)
+{
+	idpf_vf_reg_ops_init(adapter);
+}
diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
new file mode 100644
index 000000000000..9bc85b2f1709
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
@@ -0,0 +1,3791 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2023 Intel Corporation */
+
+#include "idpf.h"
+
+/**
+ * idpf_recv_event_msg - Receive virtchnl event message
+ * @vport: virtual port structure
+ * @ctlq_msg: message to copy from
+ *
+ * Receive virtchnl event message
+ */
+static void idpf_recv_event_msg(struct idpf_vport *vport,
+				struct idpf_ctlq_msg *ctlq_msg)
+{
+	struct idpf_netdev_priv *np = netdev_priv(vport->netdev);
+	struct virtchnl2_event *v2e;
+	bool link_status;
+	u32 event;
+
+	v2e = (struct virtchnl2_event *)ctlq_msg->ctx.indirect.payload->va;
+	event = le32_to_cpu(v2e->event);
+
+	switch (event) {
+	case VIRTCHNL2_EVENT_LINK_CHANGE:
+		vport->link_speed_mbps = le32_to_cpu(v2e->link_speed);
+		link_status = v2e->link_status;
+
+		if (vport->link_up == link_status)
+			break;
+
+		vport->link_up = link_status;
+		if (np->state == __IDPF_VPORT_UP) {
+			if (vport->link_up) {
+				netif_carrier_on(vport->netdev);
+				netif_tx_start_all_queues(vport->netdev);
+			} else {
+				netif_tx_stop_all_queues(vport->netdev);
+				netif_carrier_off(vport->netdev);
+			}
+		}
+		break;
+	default:
+		dev_err(&vport->adapter->pdev->dev,
+			"Unknown event %d from PF\n", event);
+		break;
+	}
+}
+
+/**
+ * idpf_mb_clean - Reclaim the send mailbox queue entries
+ * @adapter: Driver specific private structure
+ *
+ * Reclaim the send mailbox queue entries to be used to send further messages
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int idpf_mb_clean(struct idpf_adapter *adapter)
+{
+	u16 i, num_q_msg = IDPF_DFLT_MBX_Q_LEN;
+	struct idpf_ctlq_msg **q_msg;
+	struct idpf_dma_mem *dma_mem;
+	int err;
+
+	q_msg = kcalloc(num_q_msg, sizeof(struct idpf_ctlq_msg *), GFP_ATOMIC);
+	if (!q_msg)
+		return -ENOMEM;
+
+	err = idpf_ctlq_clean_sq(adapter->hw.asq, &num_q_msg, q_msg);
+	if (err)
+		goto err_kfree;
+
+	for (i = 0; i < num_q_msg; i++) {
+		if (!q_msg[i])
+			continue;
+		dma_mem = q_msg[i]->ctx.indirect.payload;
+		if (dma_mem)
+			dma_free_coherent(&adapter->pdev->dev, dma_mem->size,
+					  dma_mem->va, dma_mem->pa);
+		kfree(q_msg[i]);
+		kfree(dma_mem);
+	}
+
+err_kfree:
+	kfree(q_msg);
+
+	return err;
+}
+
+/**
+ * idpf_send_mb_msg - Send message over mailbox
+ * @adapter: Driver specific private structure
+ * @op: virtchnl opcode
+ * @msg_size: size of the payload
+ * @msg: pointer to buffer holding the payload
+ *
+ * Will prepare the control queue message and initiates the send api
+ *
+ * Returns 0 on success, negative on failure
+ */
+int idpf_send_mb_msg(struct idpf_adapter *adapter, u32 op,
+		     u16 msg_size, u8 *msg)
+{
+	struct idpf_ctlq_msg *ctlq_msg;
+	struct idpf_dma_mem *dma_mem;
+	int err;
+
+	/* If we are here and a reset is detected nothing much can be
+	 * done. This thread should silently abort and expected to
+	 * be corrected with a new run either by user or driver
+	 * flows after reset
+	 */
+	if (idpf_is_reset_detected(adapter))
+		return 0;
+
+	err = idpf_mb_clean(adapter);
+	if (err)
+		return err;
+
+	ctlq_msg = kzalloc(sizeof(*ctlq_msg), GFP_ATOMIC);
+	if (!ctlq_msg)
+		return -ENOMEM;
+
+	dma_mem = kzalloc(sizeof(*dma_mem), GFP_ATOMIC);
+	if (!dma_mem) {
+		err = -ENOMEM;
+		goto dma_mem_error;
+	}
+
+	ctlq_msg->opcode = idpf_mbq_opc_send_msg_to_cp;
+	ctlq_msg->func_id = 0;
+	ctlq_msg->data_len = msg_size;
+	ctlq_msg->cookie.mbx.chnl_opcode = op;
+	ctlq_msg->cookie.mbx.chnl_retval = 0;
+	dma_mem->size = IDPF_CTLQ_MAX_BUF_LEN;
+	dma_mem->va = dma_alloc_coherent(&adapter->pdev->dev, dma_mem->size,
+					 &dma_mem->pa, GFP_ATOMIC);
+	if (!dma_mem->va) {
+		err = -ENOMEM;
+		goto dma_alloc_error;
+	}
+	memcpy(dma_mem->va, msg, msg_size);
+	ctlq_msg->ctx.indirect.payload = dma_mem;
+
+	err = idpf_ctlq_send(&adapter->hw, adapter->hw.asq, 1, ctlq_msg);
+	if (err)
+		goto send_error;
+
+	return 0;
+
+send_error:
+	dma_free_coherent(&adapter->pdev->dev, dma_mem->size, dma_mem->va,
+			  dma_mem->pa);
+dma_alloc_error:
+	kfree(dma_mem);
+dma_mem_error:
+	kfree(ctlq_msg);
+
+	return err;
+}
+
+/**
+ * idpf_find_vport - Find vport pointer from control queue message
+ * @adapter: driver specific private structure
+ * @vport: address of vport pointer to copy the vport from adapters vport list
+ * @ctlq_msg: control queue message
+ *
+ * Return 0 on success, error value on failure. Also this function does check
+ * for the opcodes which expect to receive payload and return error value if
+ * it is not the case.
+ */
+static int idpf_find_vport(struct idpf_adapter *adapter,
+			   struct idpf_vport **vport,
+			   struct idpf_ctlq_msg *ctlq_msg)
+{
+	bool no_op = false, vid_found = false;
+	int i, err = 0;
+	char *vc_msg;
+	u32 v_id;
+
+	vc_msg = kcalloc(IDPF_CTLQ_MAX_BUF_LEN, sizeof(char), GFP_KERNEL);
+	if (!vc_msg)
+		return -ENOMEM;
+
+	if (ctlq_msg->data_len) {
+		size_t payload_size = ctlq_msg->ctx.indirect.payload->size;
+
+		if (!payload_size) {
+			dev_err(&adapter->pdev->dev, "Failed to receive payload buffer\n");
+			kfree(vc_msg);
+
+			return -EINVAL;
+		}
+
+		memcpy(vc_msg, ctlq_msg->ctx.indirect.payload->va,
+		       min_t(size_t, payload_size, IDPF_CTLQ_MAX_BUF_LEN));
+	}
+
+	switch (ctlq_msg->cookie.mbx.chnl_opcode) {
+	case VIRTCHNL2_OP_VERSION:
+	case VIRTCHNL2_OP_GET_CAPS:
+	case VIRTCHNL2_OP_CREATE_VPORT:
+	case VIRTCHNL2_OP_SET_SRIOV_VFS:
+	case VIRTCHNL2_OP_ALLOC_VECTORS:
+	case VIRTCHNL2_OP_DEALLOC_VECTORS:
+	case VIRTCHNL2_OP_GET_PTYPE_INFO:
+		goto free_vc_msg;
+	case VIRTCHNL2_OP_ENABLE_VPORT:
+	case VIRTCHNL2_OP_DISABLE_VPORT:
+	case VIRTCHNL2_OP_DESTROY_VPORT:
+		v_id = le32_to_cpu(((struct virtchnl2_vport *)vc_msg)->vport_id);
+		break;
+	case VIRTCHNL2_OP_CONFIG_TX_QUEUES:
+		v_id = le32_to_cpu(((struct virtchnl2_config_tx_queues *)vc_msg)->vport_id);
+		break;
+	case VIRTCHNL2_OP_CONFIG_RX_QUEUES:
+		v_id = le32_to_cpu(((struct virtchnl2_config_rx_queues *)vc_msg)->vport_id);
+		break;
+	case VIRTCHNL2_OP_ENABLE_QUEUES:
+	case VIRTCHNL2_OP_DISABLE_QUEUES:
+	case VIRTCHNL2_OP_DEL_QUEUES:
+		v_id = le32_to_cpu(((struct virtchnl2_del_ena_dis_queues *)vc_msg)->vport_id);
+		break;
+	case VIRTCHNL2_OP_ADD_QUEUES:
+		v_id = le32_to_cpu(((struct virtchnl2_add_queues *)vc_msg)->vport_id);
+		break;
+	case VIRTCHNL2_OP_MAP_QUEUE_VECTOR:
+	case VIRTCHNL2_OP_UNMAP_QUEUE_VECTOR:
+		v_id = le32_to_cpu(((struct virtchnl2_queue_vector_maps *)vc_msg)->vport_id);
+		break;
+	case VIRTCHNL2_OP_GET_STATS:
+		v_id = le32_to_cpu(((struct virtchnl2_vport_stats *)vc_msg)->vport_id);
+		break;
+	case VIRTCHNL2_OP_GET_RSS_LUT:
+	case VIRTCHNL2_OP_SET_RSS_LUT:
+		v_id = le32_to_cpu(((struct virtchnl2_rss_lut *)vc_msg)->vport_id);
+		break;
+	case VIRTCHNL2_OP_GET_RSS_KEY:
+	case VIRTCHNL2_OP_SET_RSS_KEY:
+		v_id = le32_to_cpu(((struct virtchnl2_rss_key *)vc_msg)->vport_id);
+		break;
+	case VIRTCHNL2_OP_EVENT:
+		v_id = le32_to_cpu(((struct virtchnl2_event *)vc_msg)->vport_id);
+		break;
+	case VIRTCHNL2_OP_LOOPBACK:
+		v_id = le32_to_cpu(((struct virtchnl2_loopback *)vc_msg)->vport_id);
+		break;
+	case VIRTCHNL2_OP_CONFIG_PROMISCUOUS_MODE:
+		v_id = le32_to_cpu(((struct virtchnl2_promisc_info *)vc_msg)->vport_id);
+		break;
+	case VIRTCHNL2_OP_ADD_MAC_ADDR:
+	case VIRTCHNL2_OP_DEL_MAC_ADDR:
+		v_id = le32_to_cpu(((struct virtchnl2_mac_addr_list *)vc_msg)->vport_id);
+		break;
+	default:
+		no_op = true;
+		break;
+	}
+
+	if (no_op)
+		goto free_vc_msg;
+
+	for (i = 0; i < idpf_get_max_vports(adapter); i++) {
+		if (adapter->vport_ids[i] == v_id) {
+			vid_found = true;
+			break;
+		}
+	}
+
+	if (vid_found)
+		*vport = adapter->vports[i];
+	else
+		err = -EINVAL;
+
+free_vc_msg:
+	kfree(vc_msg);
+
+	return err;
+}
+
+/**
+ * idpf_copy_data_to_vc_buf - Copy the virtchnl response data into the buffer.
+ * @adapter: driver specific private structure
+ * @vport: virtual port structure
+ * @ctlq_msg: msg to copy from
+ * @err_enum: err bit to set on error
+ *
+ * Copies the payload from ctlq_msg into virtchnl buffer. Returns 0 on success,
+ * negative on failure.
+ */
+static int idpf_copy_data_to_vc_buf(struct idpf_adapter *adapter,
+				    struct idpf_vport *vport,
+				    struct idpf_ctlq_msg *ctlq_msg,
+				    enum idpf_vport_vc_state err_enum)
+{
+	if (ctlq_msg->cookie.mbx.chnl_retval) {
+		if (vport)
+			set_bit(err_enum, vport->vc_state);
+		else
+			set_bit(err_enum, adapter->vc_state);
+
+		return -EINVAL;
+	}
+
+	if (vport)
+		memcpy(vport->vc_msg, ctlq_msg->ctx.indirect.payload->va,
+		       min_t(int, ctlq_msg->ctx.indirect.payload->size,
+			     IDPF_CTLQ_MAX_BUF_LEN));
+	else
+		memcpy(adapter->vc_msg, ctlq_msg->ctx.indirect.payload->va,
+		       min_t(int, ctlq_msg->ctx.indirect.payload->size,
+			     IDPF_CTLQ_MAX_BUF_LEN));
+
+	return 0;
+}
+
+/**
+ * idpf_recv_vchnl_op - helper function with common logic when handling the
+ * reception of VIRTCHNL OPs.
+ * @adapter: driver specific private structure
+ * @vport: virtual port structure
+ * @ctlq_msg: msg to copy from
+ * @state: state bit used on timeout check
+ * @err_state: err bit to set on error
+ */
+static void idpf_recv_vchnl_op(struct idpf_adapter *adapter,
+			       struct idpf_vport *vport,
+			       struct idpf_ctlq_msg *ctlq_msg,
+			       enum idpf_vport_vc_state state,
+			       enum idpf_vport_vc_state err_state)
+{
+	wait_queue_head_t *vchnl_wq;
+	int err;
+
+	if (vport)
+		vchnl_wq = &vport->vchnl_wq;
+	else
+		vchnl_wq = &adapter->vchnl_wq;
+
+	err = idpf_copy_data_to_vc_buf(adapter, vport, ctlq_msg, err_state);
+	if (wq_has_sleeper(vchnl_wq)) {
+		if (vport)
+			set_bit(state, vport->vc_state);
+		else
+			set_bit(state, adapter->vc_state);
+
+		wake_up(vchnl_wq);
+	} else {
+		if (!err) {
+			dev_warn(&adapter->pdev->dev, "opcode %d received without waiting thread\n",
+				 ctlq_msg->cookie.mbx.chnl_opcode);
+		} else {
+			/* Clear the errors since there is no sleeper to pass
+			 * them on
+			 */
+			if (vport)
+				clear_bit(err_state, vport->vc_state);
+			else
+				clear_bit(err_state, adapter->vc_state);
+		}
+	}
+}
+
+/**
+ * idpf_recv_mb_msg - Receive message over mailbox
+ * @adapter: Driver specific private structure
+ * @op: virtchannel operation code
+ * @msg: Received message holding buffer
+ * @msg_size: message size
+ *
+ * Will receive control queue message and posts the receive buffer. Returns 0
+ * on success and negative on failure.
+ */
+int idpf_recv_mb_msg(struct idpf_adapter *adapter, u32 op,
+		     void *msg, int msg_size)
+{
+	struct idpf_vport *vport = NULL;
+	struct idpf_ctlq_msg ctlq_msg;
+	struct idpf_dma_mem *dma_mem;
+	bool work_done = false;
+	int num_retry = 2000;
+	u16 num_q_msg;
+	int err;
+
+	while (1) {
+		struct idpf_vport_config *vport_config;
+		int payload_size = 0;
+
+		/* Try to get one message */
+		num_q_msg = 1;
+		dma_mem = NULL;
+		err = idpf_ctlq_recv(adapter->hw.arq, &num_q_msg, &ctlq_msg);
+		/* If no message then decide if we have to retry based on
+		 * opcode
+		 */
+		if (err || !num_q_msg) {
+			/* Increasing num_retry to consider the delayed
+			 * responses because of large number of VF's mailbox
+			 * messages. If the mailbox message is received from
+			 * the other side, we come out of the sleep cycle
+			 * immediately else we wait for more time.
+			 */
+			if (!op || !num_retry--)
+				break;
+			if (test_bit(IDPF_REMOVE_IN_PROG, adapter->flags)) {
+				err = -EIO;
+				break;
+			}
+			msleep(20);
+			continue;
+		}
+
+		/* If we are here a message is received. Check if we are looking
+		 * for a specific message based on opcode. If it is different
+		 * ignore and post buffers
+		 */
+		if (op && ctlq_msg.cookie.mbx.chnl_opcode != op)
+			goto post_buffs;
+
+		err = idpf_find_vport(adapter, &vport, &ctlq_msg);
+		if (err)
+			goto post_buffs;
+
+		if (ctlq_msg.data_len)
+			payload_size = ctlq_msg.ctx.indirect.payload->size;
+
+		/* All conditions are met. Either a message requested is
+		 * received or we received a message to be processed
+		 */
+		switch (ctlq_msg.cookie.mbx.chnl_opcode) {
+		case VIRTCHNL2_OP_VERSION:
+		case VIRTCHNL2_OP_GET_CAPS:
+			if (ctlq_msg.cookie.mbx.chnl_retval) {
+				dev_err(&adapter->pdev->dev, "Failure initializing, vc op: %u retval: %u\n",
+					ctlq_msg.cookie.mbx.chnl_opcode,
+					ctlq_msg.cookie.mbx.chnl_retval);
+				err = -EBADMSG;
+			} else if (msg) {
+				memcpy(msg, ctlq_msg.ctx.indirect.payload->va,
+				       min_t(int, payload_size, msg_size));
+			}
+			work_done = true;
+			break;
+		case VIRTCHNL2_OP_CREATE_VPORT:
+			idpf_recv_vchnl_op(adapter, NULL, &ctlq_msg,
+					   IDPF_VC_CREATE_VPORT,
+					   IDPF_VC_CREATE_VPORT_ERR);
+			break;
+		case VIRTCHNL2_OP_ENABLE_VPORT:
+			idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
+					   IDPF_VC_ENA_VPORT,
+					   IDPF_VC_ENA_VPORT_ERR);
+			break;
+		case VIRTCHNL2_OP_DISABLE_VPORT:
+			idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
+					   IDPF_VC_DIS_VPORT,
+					   IDPF_VC_DIS_VPORT_ERR);
+			break;
+		case VIRTCHNL2_OP_DESTROY_VPORT:
+			idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
+					   IDPF_VC_DESTROY_VPORT,
+					   IDPF_VC_DESTROY_VPORT_ERR);
+			break;
+		case VIRTCHNL2_OP_CONFIG_TX_QUEUES:
+			idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
+					   IDPF_VC_CONFIG_TXQ,
+					   IDPF_VC_CONFIG_TXQ_ERR);
+			break;
+		case VIRTCHNL2_OP_CONFIG_RX_QUEUES:
+			idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
+					   IDPF_VC_CONFIG_RXQ,
+					   IDPF_VC_CONFIG_RXQ_ERR);
+			break;
+		case VIRTCHNL2_OP_ENABLE_QUEUES:
+			idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
+					   IDPF_VC_ENA_QUEUES,
+					   IDPF_VC_ENA_QUEUES_ERR);
+			break;
+		case VIRTCHNL2_OP_DISABLE_QUEUES:
+			idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
+					   IDPF_VC_DIS_QUEUES,
+					   IDPF_VC_DIS_QUEUES_ERR);
+			break;
+		case VIRTCHNL2_OP_ADD_QUEUES:
+			idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
+					   IDPF_VC_ADD_QUEUES,
+					   IDPF_VC_ADD_QUEUES_ERR);
+			break;
+		case VIRTCHNL2_OP_DEL_QUEUES:
+			idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
+					   IDPF_VC_DEL_QUEUES,
+					   IDPF_VC_DEL_QUEUES_ERR);
+			break;
+		case VIRTCHNL2_OP_MAP_QUEUE_VECTOR:
+			idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
+					   IDPF_VC_MAP_IRQ,
+					   IDPF_VC_MAP_IRQ_ERR);
+			break;
+		case VIRTCHNL2_OP_UNMAP_QUEUE_VECTOR:
+			idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
+					   IDPF_VC_UNMAP_IRQ,
+					   IDPF_VC_UNMAP_IRQ_ERR);
+			break;
+		case VIRTCHNL2_OP_GET_STATS:
+			idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
+					   IDPF_VC_GET_STATS,
+					   IDPF_VC_GET_STATS_ERR);
+			break;
+		case VIRTCHNL2_OP_GET_RSS_LUT:
+			idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
+					   IDPF_VC_GET_RSS_LUT,
+					   IDPF_VC_GET_RSS_LUT_ERR);
+			break;
+		case VIRTCHNL2_OP_SET_RSS_LUT:
+			idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
+					   IDPF_VC_SET_RSS_LUT,
+					   IDPF_VC_SET_RSS_LUT_ERR);
+			break;
+		case VIRTCHNL2_OP_GET_RSS_KEY:
+			idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
+					   IDPF_VC_GET_RSS_KEY,
+					   IDPF_VC_GET_RSS_KEY_ERR);
+			break;
+		case VIRTCHNL2_OP_SET_RSS_KEY:
+			idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
+					   IDPF_VC_SET_RSS_KEY,
+					   IDPF_VC_SET_RSS_KEY_ERR);
+			break;
+		case VIRTCHNL2_OP_SET_SRIOV_VFS:
+			idpf_recv_vchnl_op(adapter, NULL, &ctlq_msg,
+					   IDPF_VC_SET_SRIOV_VFS,
+					   IDPF_VC_SET_SRIOV_VFS_ERR);
+			break;
+		case VIRTCHNL2_OP_ALLOC_VECTORS:
+			idpf_recv_vchnl_op(adapter, NULL, &ctlq_msg,
+					   IDPF_VC_ALLOC_VECTORS,
+					   IDPF_VC_ALLOC_VECTORS_ERR);
+			break;
+		case VIRTCHNL2_OP_DEALLOC_VECTORS:
+			idpf_recv_vchnl_op(adapter, NULL, &ctlq_msg,
+					   IDPF_VC_DEALLOC_VECTORS,
+					   IDPF_VC_DEALLOC_VECTORS_ERR);
+			break;
+		case VIRTCHNL2_OP_GET_PTYPE_INFO:
+			idpf_recv_vchnl_op(adapter, NULL, &ctlq_msg,
+					   IDPF_VC_GET_PTYPE_INFO,
+					   IDPF_VC_GET_PTYPE_INFO_ERR);
+			break;
+		case VIRTCHNL2_OP_LOOPBACK:
+			idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
+					   IDPF_VC_LOOPBACK_STATE,
+					   IDPF_VC_LOOPBACK_STATE_ERR);
+			break;
+		case VIRTCHNL2_OP_CONFIG_PROMISCUOUS_MODE:
+			/* This message can only be sent asynchronously. As
+			 * such we'll have lost the context in which it was
+			 * called and thus can only really report if it looks
+			 * like an error occurred. Don't bother setting ERR bit
+			 * or waking chnl_wq since no work queue will be waiting
+			 * to read the message.
+			 */
+			if (ctlq_msg.cookie.mbx.chnl_retval) {
+				dev_err(&adapter->pdev->dev, "Failed to set promiscuous mode: %d\n",
+					ctlq_msg.cookie.mbx.chnl_retval);
+			}
+			break;
+		case VIRTCHNL2_OP_ADD_MAC_ADDR:
+			vport_config = adapter->vport_config[vport->idx];
+			if (test_and_clear_bit(IDPF_VPORT_ADD_MAC_REQ,
+					       vport_config->flags)) {
+				/* Message was sent asynchronously. We don't
+				 * normally print errors here, instead
+				 * prefer to handle errors in the function
+				 * calling wait_for_event. However, if
+				 * asynchronous, the context in which the
+				 * message was sent is lost. We can't really do
+				 * anything about at it this point, but we
+				 * should at a minimum indicate that it looks
+				 * like something went wrong. Also don't bother
+				 * setting ERR bit or waking vchnl_wq since no
+				 * one will be waiting to read the async
+				 * message.
+				 */
+				if (ctlq_msg.cookie.mbx.chnl_retval)
+					dev_err(&adapter->pdev->dev, "Failed to add MAC address: %d\n",
+						ctlq_msg.cookie.mbx.chnl_retval);
+				break;
+			}
+			idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
+					   IDPF_VC_ADD_MAC_ADDR,
+					   IDPF_VC_ADD_MAC_ADDR_ERR);
+			break;
+		case VIRTCHNL2_OP_DEL_MAC_ADDR:
+			vport_config = adapter->vport_config[vport->idx];
+			if (test_and_clear_bit(IDPF_VPORT_DEL_MAC_REQ,
+					       vport_config->flags)) {
+				/* Message was sent asynchronously like the
+				 * VIRTCHNL2_OP_ADD_MAC_ADDR
+				 */
+				if (ctlq_msg.cookie.mbx.chnl_retval)
+					dev_err(&adapter->pdev->dev, "Failed to delete MAC address: %d\n",
+						ctlq_msg.cookie.mbx.chnl_retval);
+				break;
+			}
+			idpf_recv_vchnl_op(adapter, vport, &ctlq_msg,
+					   IDPF_VC_DEL_MAC_ADDR,
+					   IDPF_VC_DEL_MAC_ADDR_ERR);
+			break;
+		case VIRTCHNL2_OP_EVENT:
+			idpf_recv_event_msg(vport, &ctlq_msg);
+			break;
+		default:
+			dev_warn(&adapter->pdev->dev,
+				 "Unhandled virtchnl response %d\n",
+				 ctlq_msg.cookie.mbx.chnl_opcode);
+			break;
+		}
+
+post_buffs:
+		if (ctlq_msg.data_len)
+			dma_mem = ctlq_msg.ctx.indirect.payload;
+		else
+			num_q_msg = 0;
+
+		err = idpf_ctlq_post_rx_buffs(&adapter->hw, adapter->hw.arq,
+					      &num_q_msg, &dma_mem);
+		/* If post failed clear the only buffer we supplied */
+		if (err && dma_mem)
+			dma_free_coherent(&adapter->pdev->dev, dma_mem->size,
+					  dma_mem->va, dma_mem->pa);
+
+		/* Applies only if we are looking for a specific opcode */
+		if (work_done)
+			break;
+	}
+
+	return err;
+}
+
+/**
+ * __idpf_wait_for_event - wrapper function for wait on virtchannel response
+ * @adapter: Driver private data structure
+ * @vport: virtual port structure
+ * @state: check on state upon timeout
+ * @err_check: check if this specific error bit is set
+ * @timeout: Max time to wait
+ *
+ * Checks if state is set upon expiry of timeout.  Returns 0 on success,
+ * negative on failure.
+ */
+static int __idpf_wait_for_event(struct idpf_adapter *adapter,
+				 struct idpf_vport *vport,
+				 enum idpf_vport_vc_state state,
+				 enum idpf_vport_vc_state err_check,
+				 int timeout)
+{
+	int time_to_wait, num_waits;
+	wait_queue_head_t *vchnl_wq;
+	unsigned long *vc_state;
+
+	time_to_wait = ((timeout <= IDPF_MAX_WAIT) ? timeout : IDPF_MAX_WAIT);
+	num_waits = ((timeout <= IDPF_MAX_WAIT) ? 1 : timeout / IDPF_MAX_WAIT);
+
+	if (vport) {
+		vchnl_wq = &vport->vchnl_wq;
+		vc_state = vport->vc_state;
+	} else {
+		vchnl_wq = &adapter->vchnl_wq;
+		vc_state = adapter->vc_state;
+	}
+
+	while (num_waits) {
+		int event;
+
+		/* If we are here and a reset is detected do not wait but
+		 * return. Reset timing is out of drivers control. So
+		 * while we are cleaning resources as part of reset if the
+		 * underlying HW mailbox is gone, wait on mailbox messages
+		 * is not meaningful
+		 */
+		if (idpf_is_reset_detected(adapter))
+			return 0;
+
+		event = wait_event_timeout(*vchnl_wq,
+					   test_and_clear_bit(state, vc_state),
+					   msecs_to_jiffies(time_to_wait));
+		if (event) {
+			if (test_and_clear_bit(err_check, vc_state)) {
+				dev_err(&adapter->pdev->dev, "VC response error %s\n",
+					idpf_vport_vc_state_str[err_check]);
+
+				return -EINVAL;
+			}
+
+			return 0;
+		}
+		num_waits--;
+	}
+
+	/* Timeout occurred */
+	dev_err(&adapter->pdev->dev, "VC timeout, state = %s\n",
+		idpf_vport_vc_state_str[state]);
+
+	return -ETIMEDOUT;
+}
+
+/**
+ * idpf_min_wait_for_event - wait for virtchannel response
+ * @adapter: Driver private data structure
+ * @vport: virtual port structure
+ * @state: check on state upon timeout
+ * @err_check: check if this specific error bit is set
+ *
+ * Returns 0 on success, negative on failure.
+ */
+static int idpf_min_wait_for_event(struct idpf_adapter *adapter,
+				   struct idpf_vport *vport,
+				   enum idpf_vport_vc_state state,
+				   enum idpf_vport_vc_state err_check)
+{
+	return __idpf_wait_for_event(adapter, vport, state, err_check,
+				     IDPF_WAIT_FOR_EVENT_TIMEO_MIN);
+}
+
+/**
+ * idpf_wait_for_event - wait for virtchannel response
+ * @adapter: Driver private data structure
+ * @vport: virtual port structure
+ * @state: check on state upon timeout after 500ms
+ * @err_check: check if this specific error bit is set
+ *
+ * Returns 0 on success, negative on failure.
+ */
+static int idpf_wait_for_event(struct idpf_adapter *adapter,
+			       struct idpf_vport *vport,
+			       enum idpf_vport_vc_state state,
+			       enum idpf_vport_vc_state err_check)
+{
+	/* Increasing the timeout in __IDPF_INIT_SW flow to consider large
+	 * number of VF's mailbox message responses. When a message is received
+	 * on mailbox, this thread is woken up by the idpf_recv_mb_msg before
+	 * the timeout expires. Only in the error case i.e. if no message is
+	 * received on mailbox, we wait for the complete timeout which is
+	 * less likely to happen.
+	 */
+	return __idpf_wait_for_event(adapter, vport, state, err_check,
+				     IDPF_WAIT_FOR_EVENT_TIMEO);
+}
+
+/**
+ * idpf_wait_for_marker_event - wait for software marker response
+ * @vport: virtual port data structure
+ *
+ * Returns 0 success, negative on failure.
+ **/
+static int idpf_wait_for_marker_event(struct idpf_vport *vport)
+{
+	int event;
+	int i;
+
+	for (i = 0; i < vport->num_txq; i++)
+		set_bit(__IDPF_Q_SW_MARKER, vport->txqs[i]->flags);
+
+	event = wait_event_timeout(vport->sw_marker_wq,
+				   test_and_clear_bit(IDPF_VPORT_SW_MARKER,
+						      vport->flags),
+				   msecs_to_jiffies(500));
+
+	for (i = 0; i < vport->num_txq; i++)
+		clear_bit(__IDPF_Q_POLL_MODE, vport->txqs[i]->flags);
+
+	if (event)
+		return 0;
+
+	dev_warn(&vport->adapter->pdev->dev, "Failed to receive marker packets\n");
+
+	return -ETIMEDOUT;
+}
+
+/**
+ * idpf_send_ver_msg - send virtchnl version message
+ * @adapter: Driver specific private structure
+ *
+ * Send virtchnl version message.  Returns 0 on success, negative on failure.
+ */
+static int idpf_send_ver_msg(struct idpf_adapter *adapter)
+{
+	struct virtchnl2_version_info vvi;
+
+	if (adapter->virt_ver_maj) {
+		vvi.major = cpu_to_le32(adapter->virt_ver_maj);
+		vvi.minor = cpu_to_le32(adapter->virt_ver_min);
+	} else {
+		vvi.major = cpu_to_le32(IDPF_VIRTCHNL_VERSION_MAJOR);
+		vvi.minor = cpu_to_le32(IDPF_VIRTCHNL_VERSION_MINOR);
+	}
+
+	return idpf_send_mb_msg(adapter, VIRTCHNL2_OP_VERSION, sizeof(vvi),
+				(u8 *)&vvi);
+}
+
+/**
+ * idpf_recv_ver_msg - Receive virtchnl version message
+ * @adapter: Driver specific private structure
+ *
+ * Receive virtchnl version message. Returns 0 on success, -EAGAIN if we need
+ * to send version message again, otherwise negative on failure.
+ */
+static int idpf_recv_ver_msg(struct idpf_adapter *adapter)
+{
+	struct virtchnl2_version_info vvi;
+	u32 major, minor;
+	int err;
+
+	err = idpf_recv_mb_msg(adapter, VIRTCHNL2_OP_VERSION, &vvi,
+			       sizeof(vvi));
+	if (err)
+		return err;
+
+	major = le32_to_cpu(vvi.major);
+	minor = le32_to_cpu(vvi.minor);
+
+	if (major > IDPF_VIRTCHNL_VERSION_MAJOR) {
+		dev_warn(&adapter->pdev->dev,
+			 "Virtchnl major version (%d) greater than supported\n",
+			 major);
+
+		return -EINVAL;
+	}
+
+	if (major == IDPF_VIRTCHNL_VERSION_MAJOR &&
+	    minor > IDPF_VIRTCHNL_VERSION_MINOR)
+		dev_warn(&adapter->pdev->dev,
+			 "Virtchnl minor version (%d) didn't match\n", minor);
+
+	/* If we have a mismatch, resend version to update receiver on what
+	 * version we will use.
+	 */
+	if (!adapter->virt_ver_maj &&
+	    major != IDPF_VIRTCHNL_VERSION_MAJOR &&
+	    minor != IDPF_VIRTCHNL_VERSION_MINOR)
+		err = -EAGAIN;
+
+	adapter->virt_ver_maj = major;
+	adapter->virt_ver_min = minor;
+
+	return err;
+}
+
+/**
+ * idpf_send_get_caps_msg - Send virtchnl get capabilities message
+ * @adapter: Driver specific private structure
+ *
+ * Send virtchl get capabilities message. Returns 0 on success, negative on
+ * failure.
+ */
+static int idpf_send_get_caps_msg(struct idpf_adapter *adapter)
+{
+	struct virtchnl2_get_capabilities caps = { };
+
+	caps.csum_caps =
+		cpu_to_le32(VIRTCHNL2_CAP_TX_CSUM_L3_IPV4	|
+			    VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_TCP	|
+			    VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_UDP	|
+			    VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_SCTP	|
+			    VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_TCP	|
+			    VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_UDP	|
+			    VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_SCTP	|
+			    VIRTCHNL2_CAP_RX_CSUM_L3_IPV4	|
+			    VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_TCP	|
+			    VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_UDP	|
+			    VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_SCTP	|
+			    VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_TCP	|
+			    VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_UDP	|
+			    VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_SCTP	|
+			    VIRTCHNL2_CAP_TX_CSUM_L3_SINGLE_TUNNEL |
+			    VIRTCHNL2_CAP_RX_CSUM_L3_SINGLE_TUNNEL |
+			    VIRTCHNL2_CAP_TX_CSUM_L4_SINGLE_TUNNEL |
+			    VIRTCHNL2_CAP_RX_CSUM_L4_SINGLE_TUNNEL |
+			    VIRTCHNL2_CAP_RX_CSUM_GENERIC);
+
+	caps.seg_caps =
+		cpu_to_le32(VIRTCHNL2_CAP_SEG_IPV4_TCP		|
+			    VIRTCHNL2_CAP_SEG_IPV4_UDP		|
+			    VIRTCHNL2_CAP_SEG_IPV4_SCTP		|
+			    VIRTCHNL2_CAP_SEG_IPV6_TCP		|
+			    VIRTCHNL2_CAP_SEG_IPV6_UDP		|
+			    VIRTCHNL2_CAP_SEG_IPV6_SCTP		|
+			    VIRTCHNL2_CAP_SEG_TX_SINGLE_TUNNEL);
+
+	caps.rss_caps =
+		cpu_to_le64(VIRTCHNL2_CAP_RSS_IPV4_TCP		|
+			    VIRTCHNL2_CAP_RSS_IPV4_UDP		|
+			    VIRTCHNL2_CAP_RSS_IPV4_SCTP		|
+			    VIRTCHNL2_CAP_RSS_IPV4_OTHER	|
+			    VIRTCHNL2_CAP_RSS_IPV6_TCP		|
+			    VIRTCHNL2_CAP_RSS_IPV6_UDP		|
+			    VIRTCHNL2_CAP_RSS_IPV6_SCTP		|
+			    VIRTCHNL2_CAP_RSS_IPV6_OTHER);
+
+	caps.hsplit_caps =
+		cpu_to_le32(VIRTCHNL2_CAP_RX_HSPLIT_AT_L4V4	|
+			    VIRTCHNL2_CAP_RX_HSPLIT_AT_L4V6);
+
+	caps.rsc_caps =
+		cpu_to_le32(VIRTCHNL2_CAP_RSC_IPV4_TCP		|
+			    VIRTCHNL2_CAP_RSC_IPV6_TCP);
+
+	caps.other_caps =
+		cpu_to_le64(VIRTCHNL2_CAP_SRIOV			|
+			    VIRTCHNL2_CAP_MACFILTER		|
+			    VIRTCHNL2_CAP_SPLITQ_QSCHED		|
+			    VIRTCHNL2_CAP_PROMISC		|
+			    VIRTCHNL2_CAP_LOOPBACK);
+
+	return idpf_send_mb_msg(adapter, VIRTCHNL2_OP_GET_CAPS, sizeof(caps),
+				(u8 *)&caps);
+}
+
+/**
+ * idpf_recv_get_caps_msg - Receive virtchnl get capabilities message
+ * @adapter: Driver specific private structure
+ *
+ * Receive virtchnl get capabilities message. Returns 0 on success, negative on
+ * failure.
+ */
+static int idpf_recv_get_caps_msg(struct idpf_adapter *adapter)
+{
+	return idpf_recv_mb_msg(adapter, VIRTCHNL2_OP_GET_CAPS, &adapter->caps,
+				sizeof(struct virtchnl2_get_capabilities));
+}
+
+/**
+ * idpf_vport_alloc_max_qs - Allocate max queues for a vport
+ * @adapter: Driver specific private structure
+ * @max_q: vport max queue structure
+ */
+int idpf_vport_alloc_max_qs(struct idpf_adapter *adapter,
+			    struct idpf_vport_max_q *max_q)
+{
+	struct idpf_avail_queue_info *avail_queues = &adapter->avail_queues;
+	struct virtchnl2_get_capabilities *caps = &adapter->caps;
+	u16 default_vports = idpf_get_default_vports(adapter);
+	int max_rx_q, max_tx_q;
+
+	mutex_lock(&adapter->queue_lock);
+
+	max_rx_q = le16_to_cpu(caps->max_rx_q) / default_vports;
+	max_tx_q = le16_to_cpu(caps->max_tx_q) / default_vports;
+	if (adapter->num_alloc_vports < default_vports) {
+		max_q->max_rxq = min_t(u16, max_rx_q, IDPF_MAX_Q);
+		max_q->max_txq = min_t(u16, max_tx_q, IDPF_MAX_Q);
+	} else {
+		max_q->max_rxq = IDPF_MIN_Q;
+		max_q->max_txq = IDPF_MIN_Q;
+	}
+	max_q->max_bufq = max_q->max_rxq * IDPF_MAX_BUFQS_PER_RXQ_GRP;
+	max_q->max_complq = max_q->max_txq;
+
+	if (avail_queues->avail_rxq < max_q->max_rxq ||
+	    avail_queues->avail_txq < max_q->max_txq ||
+	    avail_queues->avail_bufq < max_q->max_bufq ||
+	    avail_queues->avail_complq < max_q->max_complq) {
+		mutex_unlock(&adapter->queue_lock);
+
+		return -EINVAL;
+	}
+
+	avail_queues->avail_rxq -= max_q->max_rxq;
+	avail_queues->avail_txq -= max_q->max_txq;
+	avail_queues->avail_bufq -= max_q->max_bufq;
+	avail_queues->avail_complq -= max_q->max_complq;
+
+	mutex_unlock(&adapter->queue_lock);
+
+	return 0;
+}
+
+/**
+ * idpf_vport_dealloc_max_qs - Deallocate max queues of a vport
+ * @adapter: Driver specific private structure
+ * @max_q: vport max queue structure
+ */
+void idpf_vport_dealloc_max_qs(struct idpf_adapter *adapter,
+			       struct idpf_vport_max_q *max_q)
+{
+	struct idpf_avail_queue_info *avail_queues;
+
+	mutex_lock(&adapter->queue_lock);
+	avail_queues = &adapter->avail_queues;
+
+	avail_queues->avail_rxq += max_q->max_rxq;
+	avail_queues->avail_txq += max_q->max_txq;
+	avail_queues->avail_bufq += max_q->max_bufq;
+	avail_queues->avail_complq += max_q->max_complq;
+
+	mutex_unlock(&adapter->queue_lock);
+}
+
+/**
+ * idpf_init_avail_queues - Initialize available queues on the device
+ * @adapter: Driver specific private structure
+ */
+static void idpf_init_avail_queues(struct idpf_adapter *adapter)
+{
+	struct idpf_avail_queue_info *avail_queues = &adapter->avail_queues;
+	struct virtchnl2_get_capabilities *caps = &adapter->caps;
+
+	avail_queues->avail_rxq = le16_to_cpu(caps->max_rx_q);
+	avail_queues->avail_txq = le16_to_cpu(caps->max_tx_q);
+	avail_queues->avail_bufq = le16_to_cpu(caps->max_rx_bufq);
+	avail_queues->avail_complq = le16_to_cpu(caps->max_tx_complq);
+}
+
+/**
+ * idpf_get_reg_intr_vecs - Get vector queue register offset
+ * @vport: virtual port structure
+ * @reg_vals: Register offsets to store in
+ *
+ * Returns number of registers that got populated
+ */
+int idpf_get_reg_intr_vecs(struct idpf_vport *vport,
+			   struct idpf_vec_regs *reg_vals)
+{
+	struct virtchnl2_vector_chunks *chunks;
+	struct idpf_vec_regs reg_val;
+	u16 num_vchunks, num_vec;
+	int num_regs = 0, i, j;
+
+	chunks = &vport->adapter->req_vec_chunks->vchunks;
+	num_vchunks = le16_to_cpu(chunks->num_vchunks);
+
+	for (j = 0; j < num_vchunks; j++) {
+		struct virtchnl2_vector_chunk *chunk;
+		u32 dynctl_reg_spacing;
+		u32 itrn_reg_spacing;
+
+		chunk = &chunks->vchunks[j];
+		num_vec = le16_to_cpu(chunk->num_vectors);
+		reg_val.dyn_ctl_reg = le32_to_cpu(chunk->dynctl_reg_start);
+		reg_val.itrn_reg = le32_to_cpu(chunk->itrn_reg_start);
+		reg_val.itrn_index_spacing = le32_to_cpu(chunk->itrn_index_spacing);
+
+		dynctl_reg_spacing = le32_to_cpu(chunk->dynctl_reg_spacing);
+		itrn_reg_spacing = le32_to_cpu(chunk->itrn_reg_spacing);
+
+		for (i = 0; i < num_vec; i++) {
+			reg_vals[num_regs].dyn_ctl_reg = reg_val.dyn_ctl_reg;
+			reg_vals[num_regs].itrn_reg = reg_val.itrn_reg;
+			reg_vals[num_regs].itrn_index_spacing =
+						reg_val.itrn_index_spacing;
+
+			reg_val.dyn_ctl_reg += dynctl_reg_spacing;
+			reg_val.itrn_reg += itrn_reg_spacing;
+			num_regs++;
+		}
+	}
+
+	return num_regs;
+}
+
+/**
+ * idpf_vport_get_q_reg - Get the queue registers for the vport
+ * @reg_vals: register values needing to be set
+ * @num_regs: amount we expect to fill
+ * @q_type: queue model
+ * @chunks: queue regs received over mailbox
+ *
+ * This function parses the queue register offsets from the queue register
+ * chunk information, with a specific queue type and stores it into the array
+ * passed as an argument. It returns the actual number of queue registers that
+ * are filled.
+ */
+static int idpf_vport_get_q_reg(u32 *reg_vals, int num_regs, u32 q_type,
+				struct virtchnl2_queue_reg_chunks *chunks)
+{
+	u16 num_chunks = le16_to_cpu(chunks->num_chunks);
+	int reg_filled = 0, i;
+	u32 reg_val;
+
+	while (num_chunks--) {
+		struct virtchnl2_queue_reg_chunk *chunk;
+		u16 num_q;
+
+		chunk = &chunks->chunks[num_chunks];
+		if (le32_to_cpu(chunk->type) != q_type)
+			continue;
+
+		num_q = le32_to_cpu(chunk->num_queues);
+		reg_val = le64_to_cpu(chunk->qtail_reg_start);
+		for (i = 0; i < num_q && reg_filled < num_regs ; i++) {
+			reg_vals[reg_filled++] = reg_val;
+			reg_val += le32_to_cpu(chunk->qtail_reg_spacing);
+		}
+	}
+
+	return reg_filled;
+}
+
+/**
+ * __idpf_queue_reg_init - initialize queue registers
+ * @vport: virtual port structure
+ * @reg_vals: registers we are initializing
+ * @num_regs: how many registers there are in total
+ * @q_type: queue model
+ *
+ * Return number of queues that are initialized
+ */
+static int __idpf_queue_reg_init(struct idpf_vport *vport, u32 *reg_vals,
+				 int num_regs, u32 q_type)
+{
+	struct idpf_adapter *adapter = vport->adapter;
+	struct idpf_queue *q;
+	int i, j, k = 0;
+
+	switch (q_type) {
+	case VIRTCHNL2_QUEUE_TYPE_TX:
+		for (i = 0; i < vport->num_txq_grp; i++) {
+			struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i];
+
+			for (j = 0; j < tx_qgrp->num_txq && k < num_regs; j++, k++)
+				tx_qgrp->txqs[j]->tail =
+					idpf_get_reg_addr(adapter, reg_vals[k]);
+		}
+		break;
+	case VIRTCHNL2_QUEUE_TYPE_RX:
+		for (i = 0; i < vport->num_rxq_grp; i++) {
+			struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i];
+			u16 num_rxq = rx_qgrp->singleq.num_rxq;
+
+			for (j = 0; j < num_rxq && k < num_regs; j++, k++) {
+				q = rx_qgrp->singleq.rxqs[j];
+				q->tail = idpf_get_reg_addr(adapter,
+							    reg_vals[k]);
+			}
+		}
+		break;
+	case VIRTCHNL2_QUEUE_TYPE_RX_BUFFER:
+		for (i = 0; i < vport->num_rxq_grp; i++) {
+			struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i];
+			u8 num_bufqs = vport->num_bufqs_per_qgrp;
+
+			for (j = 0; j < num_bufqs && k < num_regs; j++, k++) {
+				q = &rx_qgrp->splitq.bufq_sets[j].bufq;
+				q->tail = idpf_get_reg_addr(adapter,
+							    reg_vals[k]);
+			}
+		}
+		break;
+	default:
+		break;
+	}
+
+	return k;
+}
+
+/**
+ * idpf_queue_reg_init - initialize queue registers
+ * @vport: virtual port structure
+ *
+ * Return 0 on success, negative on failure
+ */
+int idpf_queue_reg_init(struct idpf_vport *vport)
+{
+	struct virtchnl2_create_vport *vport_params;
+	struct virtchnl2_queue_reg_chunks *chunks;
+	struct idpf_vport_config *vport_config;
+	u16 vport_idx = vport->idx;
+	int num_regs, ret = 0;
+	u32 *reg_vals;
+
+	/* We may never deal with more than 256 same type of queues */
+	reg_vals = kzalloc(sizeof(void *) * IDPF_LARGE_MAX_Q, GFP_KERNEL);
+	if (!reg_vals)
+		return -ENOMEM;
+
+	vport_config = vport->adapter->vport_config[vport_idx];
+	if (vport_config->req_qs_chunks) {
+		struct virtchnl2_add_queues *vc_aq =
+		  (struct virtchnl2_add_queues *)vport_config->req_qs_chunks;
+		chunks = &vc_aq->chunks;
+	} else {
+		vport_params = vport->adapter->vport_params_recvd[vport_idx];
+		chunks = &vport_params->chunks;
+	}
+
+	/* Initialize Tx queue tail register address */
+	num_regs = idpf_vport_get_q_reg(reg_vals, IDPF_LARGE_MAX_Q,
+					VIRTCHNL2_QUEUE_TYPE_TX,
+					chunks);
+	if (num_regs < vport->num_txq) {
+		ret = -EINVAL;
+		goto free_reg_vals;
+	}
+
+	num_regs = __idpf_queue_reg_init(vport, reg_vals, num_regs,
+					 VIRTCHNL2_QUEUE_TYPE_TX);
+	if (num_regs < vport->num_txq) {
+		ret = -EINVAL;
+		goto free_reg_vals;
+	}
+
+	/* Initialize Rx/buffer queue tail register address based on Rx queue
+	 * model
+	 */
+	if (idpf_is_queue_model_split(vport->rxq_model)) {
+		num_regs = idpf_vport_get_q_reg(reg_vals, IDPF_LARGE_MAX_Q,
+						VIRTCHNL2_QUEUE_TYPE_RX_BUFFER,
+						chunks);
+		if (num_regs < vport->num_bufq) {
+			ret = -EINVAL;
+			goto free_reg_vals;
+		}
+
+		num_regs = __idpf_queue_reg_init(vport, reg_vals, num_regs,
+						 VIRTCHNL2_QUEUE_TYPE_RX_BUFFER);
+		if (num_regs < vport->num_bufq) {
+			ret = -EINVAL;
+			goto free_reg_vals;
+		}
+	} else {
+		num_regs = idpf_vport_get_q_reg(reg_vals, IDPF_LARGE_MAX_Q,
+						VIRTCHNL2_QUEUE_TYPE_RX,
+						chunks);
+		if (num_regs < vport->num_rxq) {
+			ret = -EINVAL;
+			goto free_reg_vals;
+		}
+
+		num_regs = __idpf_queue_reg_init(vport, reg_vals, num_regs,
+						 VIRTCHNL2_QUEUE_TYPE_RX);
+		if (num_regs < vport->num_rxq) {
+			ret = -EINVAL;
+			goto free_reg_vals;
+		}
+	}
+
+free_reg_vals:
+	kfree(reg_vals);
+
+	return ret;
+}
+
+/**
+ * idpf_send_create_vport_msg - Send virtchnl create vport message
+ * @adapter: Driver specific private structure
+ * @max_q: vport max queue info
+ *
+ * send virtchnl creae vport message
+ *
+ * Returns 0 on success, negative on failure
+ */
+int idpf_send_create_vport_msg(struct idpf_adapter *adapter,
+			       struct idpf_vport_max_q *max_q)
+{
+	struct virtchnl2_create_vport *vport_msg;
+	u16 idx = adapter->next_vport;
+	int err, buf_size;
+
+	buf_size = sizeof(struct virtchnl2_create_vport);
+	if (!adapter->vport_params_reqd[idx]) {
+		adapter->vport_params_reqd[idx] = kzalloc(buf_size,
+							  GFP_KERNEL);
+		if (!adapter->vport_params_reqd[idx])
+			return -ENOMEM;
+	}
+
+	vport_msg = adapter->vport_params_reqd[idx];
+	vport_msg->vport_type = cpu_to_le16(VIRTCHNL2_VPORT_TYPE_DEFAULT);
+	vport_msg->vport_index = cpu_to_le16(idx);
+
+	if (adapter->req_tx_splitq)
+		vport_msg->txq_model = cpu_to_le16(VIRTCHNL2_QUEUE_MODEL_SPLIT);
+	else
+		vport_msg->txq_model = cpu_to_le16(VIRTCHNL2_QUEUE_MODEL_SINGLE);
+
+	if (adapter->req_rx_splitq)
+		vport_msg->rxq_model = cpu_to_le16(VIRTCHNL2_QUEUE_MODEL_SPLIT);
+	else
+		vport_msg->rxq_model = cpu_to_le16(VIRTCHNL2_QUEUE_MODEL_SINGLE);
+
+	err = idpf_vport_calc_total_qs(adapter, idx, vport_msg, max_q);
+	if (err) {
+		dev_err(&adapter->pdev->dev, "Enough queues are not available");
+
+		return err;
+	}
+
+	mutex_lock(&adapter->vc_buf_lock);
+
+	err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_CREATE_VPORT, buf_size,
+			       (u8 *)vport_msg);
+	if (err)
+		goto rel_lock;
+
+	err = idpf_wait_for_event(adapter, NULL, IDPF_VC_CREATE_VPORT,
+				  IDPF_VC_CREATE_VPORT_ERR);
+	if (err) {
+		dev_err(&adapter->pdev->dev, "Failed to receive create vport message");
+
+		goto rel_lock;
+	}
+
+	if (!adapter->vport_params_recvd[idx]) {
+		adapter->vport_params_recvd[idx] = kzalloc(IDPF_CTLQ_MAX_BUF_LEN,
+							   GFP_KERNEL);
+		if (!adapter->vport_params_recvd[idx]) {
+			err = -ENOMEM;
+			goto rel_lock;
+		}
+	}
+
+	vport_msg = adapter->vport_params_recvd[idx];
+	memcpy(vport_msg, adapter->vc_msg, IDPF_CTLQ_MAX_BUF_LEN);
+
+rel_lock:
+	mutex_unlock(&adapter->vc_buf_lock);
+
+	return err;
+}
+
+/**
+ * idpf_check_supported_desc_ids - Verify we have required descriptor support
+ * @vport: virtual port structure
+ *
+ * Return 0 on success, error on failure
+ */
+int idpf_check_supported_desc_ids(struct idpf_vport *vport)
+{
+	struct idpf_adapter *adapter = vport->adapter;
+	struct virtchnl2_create_vport *vport_msg;
+	u64 rx_desc_ids, tx_desc_ids;
+
+	vport_msg = adapter->vport_params_recvd[vport->idx];
+
+	rx_desc_ids = le64_to_cpu(vport_msg->rx_desc_ids);
+	tx_desc_ids = le64_to_cpu(vport_msg->tx_desc_ids);
+
+	if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) {
+		if (!(rx_desc_ids & VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M)) {
+			dev_info(&adapter->pdev->dev, "Minimum RX descriptor support not provided, using the default\n");
+			vport_msg->rx_desc_ids = cpu_to_le64(VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M);
+		}
+	} else {
+		if (!(rx_desc_ids & VIRTCHNL2_RXDID_2_FLEX_SQ_NIC_M))
+			vport->base_rxd = true;
+	}
+
+	if (vport->txq_model != VIRTCHNL2_QUEUE_MODEL_SPLIT)
+		return 0;
+
+	if ((tx_desc_ids & MIN_SUPPORT_TXDID) != MIN_SUPPORT_TXDID) {
+		dev_info(&adapter->pdev->dev, "Minimum TX descriptor support not provided, using the default\n");
+		vport_msg->tx_desc_ids = cpu_to_le64(MIN_SUPPORT_TXDID);
+	}
+
+	return 0;
+}
+
+/**
+ * idpf_send_destroy_vport_msg - Send virtchnl destroy vport message
+ * @vport: virtual port data structure
+ *
+ * Send virtchnl destroy vport message.  Returns 0 on success, negative on
+ * failure.
+ */
+int idpf_send_destroy_vport_msg(struct idpf_vport *vport)
+{
+	struct idpf_adapter *adapter = vport->adapter;
+	struct virtchnl2_vport v_id;
+	int err;
+
+	v_id.vport_id = cpu_to_le32(vport->vport_id);
+
+	mutex_lock(&vport->vc_buf_lock);
+
+	err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_DESTROY_VPORT,
+			       sizeof(v_id), (u8 *)&v_id);
+	if (err)
+		goto rel_lock;
+
+	err = idpf_min_wait_for_event(adapter, vport, IDPF_VC_DESTROY_VPORT,
+				      IDPF_VC_DESTROY_VPORT_ERR);
+
+rel_lock:
+	mutex_unlock(&vport->vc_buf_lock);
+
+	return err;
+}
+
+/**
+ * idpf_send_enable_vport_msg - Send virtchnl enable vport message
+ * @vport: virtual port data structure
+ *
+ * Send enable vport virtchnl message.  Returns 0 on success, negative on
+ * failure.
+ */
+int idpf_send_enable_vport_msg(struct idpf_vport *vport)
+{
+	struct idpf_adapter *adapter = vport->adapter;
+	struct virtchnl2_vport v_id;
+	int err;
+
+	v_id.vport_id = cpu_to_le32(vport->vport_id);
+
+	mutex_lock(&vport->vc_buf_lock);
+
+	err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_ENABLE_VPORT,
+			       sizeof(v_id), (u8 *)&v_id);
+	if (err)
+		goto rel_lock;
+
+	err = idpf_wait_for_event(adapter, vport, IDPF_VC_ENA_VPORT,
+				  IDPF_VC_ENA_VPORT_ERR);
+
+rel_lock:
+	mutex_unlock(&vport->vc_buf_lock);
+
+	return err;
+}
+
+/**
+ * idpf_send_disable_vport_msg - Send virtchnl disable vport message
+ * @vport: virtual port data structure
+ *
+ * Send disable vport virtchnl message.  Returns 0 on success, negative on
+ * failure.
+ */
+int idpf_send_disable_vport_msg(struct idpf_vport *vport)
+{
+	struct idpf_adapter *adapter = vport->adapter;
+	struct virtchnl2_vport v_id;
+	int err;
+
+	v_id.vport_id = cpu_to_le32(vport->vport_id);
+
+	mutex_lock(&vport->vc_buf_lock);
+
+	err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_DISABLE_VPORT,
+			       sizeof(v_id), (u8 *)&v_id);
+	if (err)
+		goto rel_lock;
+
+	err = idpf_min_wait_for_event(adapter, vport, IDPF_VC_DIS_VPORT,
+				      IDPF_VC_DIS_VPORT_ERR);
+
+rel_lock:
+	mutex_unlock(&vport->vc_buf_lock);
+
+	return err;
+}
+
+/**
+ * idpf_send_config_tx_queues_msg - Send virtchnl config tx queues message
+ * @vport: virtual port data structure
+ *
+ * Send config tx queues virtchnl message. Returns 0 on success, negative on
+ * failure.
+ */
+static int idpf_send_config_tx_queues_msg(struct idpf_vport *vport)
+{
+	struct virtchnl2_config_tx_queues *ctq;
+	u32 config_sz, chunk_sz, buf_sz;
+	int totqs, num_msgs, num_chunks;
+	struct virtchnl2_txq_info *qi;
+	int err = 0, i, k = 0;
+
+	totqs = vport->num_txq + vport->num_complq;
+	qi = kcalloc(totqs, sizeof(struct virtchnl2_txq_info), GFP_KERNEL);
+	if (!qi)
+		return -ENOMEM;
+
+	/* Populate the queue info buffer with all queue context info */
+	for (i = 0; i < vport->num_txq_grp; i++) {
+		struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i];
+		int j;
+
+		for (j = 0; j < tx_qgrp->num_txq; j++, k++) {
+			qi[k].queue_id =
+				cpu_to_le32(tx_qgrp->txqs[j]->q_id);
+			qi[k].model =
+				cpu_to_le16(vport->txq_model);
+			qi[k].type =
+				cpu_to_le32(tx_qgrp->txqs[j]->q_type);
+			qi[k].ring_len =
+				cpu_to_le16(tx_qgrp->txqs[j]->desc_count);
+			qi[k].dma_ring_addr =
+				cpu_to_le64(tx_qgrp->txqs[j]->dma);
+			if (idpf_is_queue_model_split(vport->txq_model)) {
+				struct idpf_queue *q = tx_qgrp->txqs[j];
+
+				qi[k].tx_compl_queue_id =
+					cpu_to_le16(tx_qgrp->complq->q_id);
+				qi[k].relative_queue_id = cpu_to_le16(j);
+
+				if (test_bit(__IDPF_Q_FLOW_SCH_EN, q->flags))
+					qi[k].sched_mode =
+					cpu_to_le16(VIRTCHNL2_TXQ_SCHED_MODE_FLOW);
+				else
+					qi[k].sched_mode =
+					cpu_to_le16(VIRTCHNL2_TXQ_SCHED_MODE_QUEUE);
+			} else {
+				qi[k].sched_mode =
+					cpu_to_le16(VIRTCHNL2_TXQ_SCHED_MODE_QUEUE);
+			}
+		}
+
+		if (!idpf_is_queue_model_split(vport->txq_model))
+			continue;
+
+		qi[k].queue_id = cpu_to_le32(tx_qgrp->complq->q_id);
+		qi[k].model = cpu_to_le16(vport->txq_model);
+		qi[k].type = cpu_to_le32(tx_qgrp->complq->q_type);
+		qi[k].ring_len = cpu_to_le16(tx_qgrp->complq->desc_count);
+		qi[k].dma_ring_addr = cpu_to_le64(tx_qgrp->complq->dma);
+
+		k++;
+	}
+
+	/* Make sure accounting agrees */
+	if (k != totqs) {
+		err = -EINVAL;
+		goto error;
+	}
+
+	/* Chunk up the queue contexts into multiple messages to avoid
+	 * sending a control queue message buffer that is too large
+	 */
+	config_sz = sizeof(struct virtchnl2_config_tx_queues);
+	chunk_sz = sizeof(struct virtchnl2_txq_info);
+
+	num_chunks = min_t(u32, IDPF_NUM_CHUNKS_PER_MSG(config_sz, chunk_sz),
+			   totqs);
+	num_msgs = DIV_ROUND_UP(totqs, num_chunks);
+
+	buf_sz = struct_size(ctq, qinfo, num_chunks);
+	ctq = kzalloc(buf_sz, GFP_KERNEL);
+	if (!ctq) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	mutex_lock(&vport->vc_buf_lock);
+
+	for (i = 0, k = 0; i < num_msgs; i++) {
+		memset(ctq, 0, buf_sz);
+		ctq->vport_id = cpu_to_le32(vport->vport_id);
+		ctq->num_qinfo = cpu_to_le16(num_chunks);
+		memcpy(ctq->qinfo, &qi[k], chunk_sz * num_chunks);
+
+		err = idpf_send_mb_msg(vport->adapter,
+				       VIRTCHNL2_OP_CONFIG_TX_QUEUES,
+				       buf_sz, (u8 *)ctq);
+		if (err)
+			goto mbx_error;
+
+		err = idpf_wait_for_event(vport->adapter, vport,
+					  IDPF_VC_CONFIG_TXQ,
+					  IDPF_VC_CONFIG_TXQ_ERR);
+		if (err)
+			goto mbx_error;
+
+		k += num_chunks;
+		totqs -= num_chunks;
+		num_chunks = min(num_chunks, totqs);
+		/* Recalculate buffer size */
+		buf_sz = struct_size(ctq, qinfo, num_chunks);
+	}
+
+mbx_error:
+	mutex_unlock(&vport->vc_buf_lock);
+	kfree(ctq);
+error:
+	kfree(qi);
+
+	return err;
+}
+
+/**
+ * idpf_send_config_rx_queues_msg - Send virtchnl config rx queues message
+ * @vport: virtual port data structure
+ *
+ * Send config rx queues virtchnl message.  Returns 0 on success, negative on
+ * failure.
+ */
+static int idpf_send_config_rx_queues_msg(struct idpf_vport *vport)
+{
+	struct virtchnl2_config_rx_queues *crq;
+	u32 config_sz, chunk_sz, buf_sz;
+	int totqs, num_msgs, num_chunks;
+	struct virtchnl2_rxq_info *qi;
+	int err = 0, i, k = 0;
+
+	totqs = vport->num_rxq + vport->num_bufq;
+	qi = kcalloc(totqs, sizeof(struct virtchnl2_rxq_info), GFP_KERNEL);
+	if (!qi)
+		return -ENOMEM;
+
+	/* Populate the queue info buffer with all queue context info */
+	for (i = 0; i < vport->num_rxq_grp; i++) {
+		struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i];
+		u16 num_rxq;
+		int j;
+
+		if (!idpf_is_queue_model_split(vport->rxq_model))
+			goto setup_rxqs;
+
+		for (j = 0; j < vport->num_bufqs_per_qgrp; j++, k++) {
+			struct idpf_queue *bufq =
+				&rx_qgrp->splitq.bufq_sets[j].bufq;
+
+			qi[k].queue_id = cpu_to_le32(bufq->q_id);
+			qi[k].model = cpu_to_le16(vport->rxq_model);
+			qi[k].type = cpu_to_le32(bufq->q_type);
+			qi[k].desc_ids = cpu_to_le64(VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M);
+			qi[k].ring_len = cpu_to_le16(bufq->desc_count);
+			qi[k].dma_ring_addr = cpu_to_le64(bufq->dma);
+			qi[k].data_buffer_size = cpu_to_le32(bufq->rx_buf_size);
+			qi[k].buffer_notif_stride = bufq->rx_buf_stride;
+			qi[k].rx_buffer_low_watermark =
+				cpu_to_le16(bufq->rx_buffer_low_watermark);
+			if (idpf_is_feature_ena(vport, NETIF_F_GRO_HW))
+				qi[k].qflags |= cpu_to_le16(VIRTCHNL2_RXQ_RSC);
+		}
+
+setup_rxqs:
+		if (idpf_is_queue_model_split(vport->rxq_model))
+			num_rxq = rx_qgrp->splitq.num_rxq_sets;
+		else
+			num_rxq = rx_qgrp->singleq.num_rxq;
+
+		for (j = 0; j < num_rxq; j++, k++) {
+			struct idpf_queue *rxq;
+
+			if (!idpf_is_queue_model_split(vport->rxq_model)) {
+				rxq = rx_qgrp->singleq.rxqs[j];
+				goto common_qi_fields;
+			}
+			rxq = &rx_qgrp->splitq.rxq_sets[j]->rxq;
+			qi[k].rx_bufq1_id =
+			  cpu_to_le16(rxq->rxq_grp->splitq.bufq_sets[0].bufq.q_id);
+			if (vport->num_bufqs_per_qgrp > IDPF_SINGLE_BUFQ_PER_RXQ_GRP) {
+				qi[k].bufq2_ena = IDPF_BUFQ2_ENA;
+				qi[k].rx_bufq2_id =
+				  cpu_to_le16(rxq->rxq_grp->splitq.bufq_sets[1].bufq.q_id);
+			}
+			qi[k].rx_buffer_low_watermark =
+				cpu_to_le16(rxq->rx_buffer_low_watermark);
+			if (idpf_is_feature_ena(vport, NETIF_F_GRO_HW))
+				qi[k].qflags |= cpu_to_le16(VIRTCHNL2_RXQ_RSC);
+
+common_qi_fields:
+			if (rxq->rx_hsplit_en) {
+				qi[k].qflags |=
+					cpu_to_le16(VIRTCHNL2_RXQ_HDR_SPLIT);
+				qi[k].hdr_buffer_size =
+					cpu_to_le16(rxq->rx_hbuf_size);
+			}
+			qi[k].queue_id = cpu_to_le32(rxq->q_id);
+			qi[k].model = cpu_to_le16(vport->rxq_model);
+			qi[k].type = cpu_to_le32(rxq->q_type);
+			qi[k].ring_len = cpu_to_le16(rxq->desc_count);
+			qi[k].dma_ring_addr = cpu_to_le64(rxq->dma);
+			qi[k].max_pkt_size = cpu_to_le32(rxq->rx_max_pkt_size);
+			qi[k].data_buffer_size = cpu_to_le32(rxq->rx_buf_size);
+			qi[k].qflags |=
+				cpu_to_le16(VIRTCHNL2_RX_DESC_SIZE_32BYTE);
+			qi[k].desc_ids = cpu_to_le64(rxq->rxdids);
+		}
+	}
+
+	/* Make sure accounting agrees */
+	if (k != totqs) {
+		err = -EINVAL;
+		goto error;
+	}
+
+	/* Chunk up the queue contexts into multiple messages to avoid
+	 * sending a control queue message buffer that is too large
+	 */
+	config_sz = sizeof(struct virtchnl2_config_rx_queues);
+	chunk_sz = sizeof(struct virtchnl2_rxq_info);
+
+	num_chunks = min_t(u32, IDPF_NUM_CHUNKS_PER_MSG(config_sz, chunk_sz),
+			   totqs);
+	num_msgs = DIV_ROUND_UP(totqs, num_chunks);
+
+	buf_sz = struct_size(crq, qinfo, num_chunks);
+	crq = kzalloc(buf_sz, GFP_KERNEL);
+	if (!crq) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	mutex_lock(&vport->vc_buf_lock);
+
+	for (i = 0, k = 0; i < num_msgs; i++) {
+		memset(crq, 0, buf_sz);
+		crq->vport_id = cpu_to_le32(vport->vport_id);
+		crq->num_qinfo = cpu_to_le16(num_chunks);
+		memcpy(crq->qinfo, &qi[k], chunk_sz * num_chunks);
+
+		err = idpf_send_mb_msg(vport->adapter,
+				       VIRTCHNL2_OP_CONFIG_RX_QUEUES,
+				       buf_sz, (u8 *)crq);
+		if (err)
+			goto mbx_error;
+
+		err = idpf_wait_for_event(vport->adapter, vport,
+					  IDPF_VC_CONFIG_RXQ,
+					  IDPF_VC_CONFIG_RXQ_ERR);
+		if (err)
+			goto mbx_error;
+
+		k += num_chunks;
+		totqs -= num_chunks;
+		num_chunks = min(num_chunks, totqs);
+		/* Recalculate buffer size */
+		buf_sz = struct_size(crq, qinfo, num_chunks);
+	}
+
+mbx_error:
+	mutex_unlock(&vport->vc_buf_lock);
+	kfree(crq);
+error:
+	kfree(qi);
+
+	return err;
+}
+
+/**
+ * idpf_send_ena_dis_queues_msg - Send virtchnl enable or disable
+ * queues message
+ * @vport: virtual port data structure
+ * @vc_op: virtchnl op code to send
+ *
+ * Send enable or disable queues virtchnl message. Returns 0 on success,
+ * negative on failure.
+ */
+static int idpf_send_ena_dis_queues_msg(struct idpf_vport *vport, u32 vc_op)
+{
+	u32 num_msgs, num_chunks, num_txq, num_rxq, num_q;
+	struct idpf_adapter *adapter = vport->adapter;
+	struct virtchnl2_del_ena_dis_queues *eq;
+	struct virtchnl2_queue_chunks *qcs;
+	struct virtchnl2_queue_chunk *qc;
+	u32 config_sz, chunk_sz, buf_sz;
+	int i, j, k = 0, err = 0;
+
+	/* validate virtchnl op */
+	switch (vc_op) {
+	case VIRTCHNL2_OP_ENABLE_QUEUES:
+	case VIRTCHNL2_OP_DISABLE_QUEUES:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	num_txq = vport->num_txq + vport->num_complq;
+	num_rxq = vport->num_rxq + vport->num_bufq;
+	num_q = num_txq + num_rxq;
+	buf_sz = sizeof(struct virtchnl2_queue_chunk) * num_q;
+	qc = kzalloc(buf_sz, GFP_KERNEL);
+	if (!qc)
+		return -ENOMEM;
+
+	for (i = 0; i < vport->num_txq_grp; i++) {
+		struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i];
+
+		for (j = 0; j < tx_qgrp->num_txq; j++, k++) {
+			qc[k].type = cpu_to_le32(tx_qgrp->txqs[j]->q_type);
+			qc[k].start_queue_id = cpu_to_le32(tx_qgrp->txqs[j]->q_id);
+			qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK);
+		}
+	}
+	if (vport->num_txq != k) {
+		err = -EINVAL;
+		goto error;
+	}
+
+	if (!idpf_is_queue_model_split(vport->txq_model))
+		goto setup_rx;
+
+	for (i = 0; i < vport->num_txq_grp; i++, k++) {
+		struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i];
+
+		qc[k].type = cpu_to_le32(tx_qgrp->complq->q_type);
+		qc[k].start_queue_id = cpu_to_le32(tx_qgrp->complq->q_id);
+		qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK);
+	}
+	if (vport->num_complq != (k - vport->num_txq)) {
+		err = -EINVAL;
+		goto error;
+	}
+
+setup_rx:
+	for (i = 0; i < vport->num_rxq_grp; i++) {
+		struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i];
+
+		if (idpf_is_queue_model_split(vport->rxq_model))
+			num_rxq = rx_qgrp->splitq.num_rxq_sets;
+		else
+			num_rxq = rx_qgrp->singleq.num_rxq;
+
+		for (j = 0; j < num_rxq; j++, k++) {
+			if (idpf_is_queue_model_split(vport->rxq_model)) {
+				qc[k].start_queue_id =
+				cpu_to_le32(rx_qgrp->splitq.rxq_sets[j]->rxq.q_id);
+				qc[k].type =
+				cpu_to_le32(rx_qgrp->splitq.rxq_sets[j]->rxq.q_type);
+			} else {
+				qc[k].start_queue_id =
+				cpu_to_le32(rx_qgrp->singleq.rxqs[j]->q_id);
+				qc[k].type =
+				cpu_to_le32(rx_qgrp->singleq.rxqs[j]->q_type);
+			}
+			qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK);
+		}
+	}
+	if (vport->num_rxq != k - (vport->num_txq + vport->num_complq)) {
+		err = -EINVAL;
+		goto error;
+	}
+
+	if (!idpf_is_queue_model_split(vport->rxq_model))
+		goto send_msg;
+
+	for (i = 0; i < vport->num_rxq_grp; i++) {
+		struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i];
+
+		for (j = 0; j < vport->num_bufqs_per_qgrp; j++, k++) {
+			struct idpf_queue *q;
+
+			q = &rx_qgrp->splitq.bufq_sets[j].bufq;
+			qc[k].type = cpu_to_le32(q->q_type);
+			qc[k].start_queue_id = cpu_to_le32(q->q_id);
+			qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK);
+		}
+	}
+	if (vport->num_bufq != k - (vport->num_txq +
+				    vport->num_complq +
+				    vport->num_rxq)) {
+		err = -EINVAL;
+		goto error;
+	}
+
+send_msg:
+	/* Chunk up the queue info into multiple messages */
+	config_sz = sizeof(struct virtchnl2_del_ena_dis_queues);
+	chunk_sz = sizeof(struct virtchnl2_queue_chunk);
+
+	num_chunks = min_t(u32, IDPF_NUM_CHUNKS_PER_MSG(config_sz, chunk_sz),
+			   num_q);
+	num_msgs = DIV_ROUND_UP(num_q, num_chunks);
+
+	buf_sz = struct_size(eq, chunks.chunks, num_chunks);
+	eq = kzalloc(buf_sz, GFP_KERNEL);
+	if (!eq) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	mutex_lock(&vport->vc_buf_lock);
+
+	for (i = 0, k = 0; i < num_msgs; i++) {
+		memset(eq, 0, buf_sz);
+		eq->vport_id = cpu_to_le32(vport->vport_id);
+		eq->chunks.num_chunks = cpu_to_le16(num_chunks);
+		qcs = &eq->chunks;
+		memcpy(qcs->chunks, &qc[k], chunk_sz * num_chunks);
+
+		err = idpf_send_mb_msg(adapter, vc_op, buf_sz, (u8 *)eq);
+		if (err)
+			goto mbx_error;
+
+		if (vc_op == VIRTCHNL2_OP_ENABLE_QUEUES)
+			err = idpf_wait_for_event(adapter, vport,
+						  IDPF_VC_ENA_QUEUES,
+						  IDPF_VC_ENA_QUEUES_ERR);
+		else
+			err = idpf_min_wait_for_event(adapter, vport,
+						      IDPF_VC_DIS_QUEUES,
+						      IDPF_VC_DIS_QUEUES_ERR);
+		if (err)
+			goto mbx_error;
+
+		k += num_chunks;
+		num_q -= num_chunks;
+		num_chunks = min(num_chunks, num_q);
+		/* Recalculate buffer size */
+		buf_sz = struct_size(eq, chunks.chunks, num_chunks);
+	}
+
+mbx_error:
+	mutex_unlock(&vport->vc_buf_lock);
+	kfree(eq);
+error:
+	kfree(qc);
+
+	return err;
+}
+
+/**
+ * idpf_send_map_unmap_queue_vector_msg - Send virtchnl map or unmap queue
+ * vector message
+ * @vport: virtual port data structure
+ * @map: true for map and false for unmap
+ *
+ * Send map or unmap queue vector virtchnl message.  Returns 0 on success,
+ * negative on failure.
+ */
+int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map)
+{
+	struct idpf_adapter *adapter = vport->adapter;
+	struct virtchnl2_queue_vector_maps *vqvm;
+	struct virtchnl2_queue_vector *vqv;
+	u32 config_sz, chunk_sz, buf_sz;
+	u32 num_msgs, num_chunks, num_q;
+	int i, j, k = 0, err = 0;
+
+	num_q = vport->num_txq + vport->num_rxq;
+
+	buf_sz = sizeof(struct virtchnl2_queue_vector) * num_q;
+	vqv = kzalloc(buf_sz, GFP_KERNEL);
+	if (!vqv)
+		return -ENOMEM;
+
+	for (i = 0; i < vport->num_txq_grp; i++) {
+		struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i];
+
+		for (j = 0; j < tx_qgrp->num_txq; j++, k++) {
+			vqv[k].queue_type = cpu_to_le32(tx_qgrp->txqs[j]->q_type);
+			vqv[k].queue_id = cpu_to_le32(tx_qgrp->txqs[j]->q_id);
+
+			if (idpf_is_queue_model_split(vport->txq_model)) {
+				vqv[k].vector_id =
+				cpu_to_le16(tx_qgrp->complq->q_vector->v_idx);
+				vqv[k].itr_idx =
+				cpu_to_le32(tx_qgrp->complq->q_vector->tx_itr_idx);
+			} else {
+				vqv[k].vector_id =
+				cpu_to_le16(tx_qgrp->txqs[j]->q_vector->v_idx);
+				vqv[k].itr_idx =
+				cpu_to_le32(tx_qgrp->txqs[j]->q_vector->tx_itr_idx);
+			}
+		}
+	}
+
+	if (vport->num_txq != k) {
+		err = -EINVAL;
+		goto error;
+	}
+
+	for (i = 0; i < vport->num_rxq_grp; i++) {
+		struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i];
+		u16 num_rxq;
+
+		if (idpf_is_queue_model_split(vport->rxq_model))
+			num_rxq = rx_qgrp->splitq.num_rxq_sets;
+		else
+			num_rxq = rx_qgrp->singleq.num_rxq;
+
+		for (j = 0; j < num_rxq; j++, k++) {
+			struct idpf_queue *rxq;
+
+			if (idpf_is_queue_model_split(vport->rxq_model))
+				rxq = &rx_qgrp->splitq.rxq_sets[j]->rxq;
+			else
+				rxq = rx_qgrp->singleq.rxqs[j];
+
+			vqv[k].queue_type = cpu_to_le32(rxq->q_type);
+			vqv[k].queue_id = cpu_to_le32(rxq->q_id);
+			vqv[k].vector_id = cpu_to_le16(rxq->q_vector->v_idx);
+			vqv[k].itr_idx = cpu_to_le32(rxq->q_vector->rx_itr_idx);
+		}
+	}
+
+	if (idpf_is_queue_model_split(vport->txq_model)) {
+		if (vport->num_rxq != k - vport->num_complq) {
+			err = -EINVAL;
+			goto error;
+		}
+	} else {
+		if (vport->num_rxq != k - vport->num_txq) {
+			err = -EINVAL;
+			goto error;
+		}
+	}
+
+	/* Chunk up the vector info into multiple messages */
+	config_sz = sizeof(struct virtchnl2_queue_vector_maps);
+	chunk_sz = sizeof(struct virtchnl2_queue_vector);
+
+	num_chunks = min_t(u32, IDPF_NUM_CHUNKS_PER_MSG(config_sz, chunk_sz),
+			   num_q);
+	num_msgs = DIV_ROUND_UP(num_q, num_chunks);
+
+	buf_sz = struct_size(vqvm, qv_maps, num_chunks);
+	vqvm = kzalloc(buf_sz, GFP_KERNEL);
+	if (!vqvm) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	mutex_lock(&vport->vc_buf_lock);
+
+	for (i = 0, k = 0; i < num_msgs; i++) {
+		memset(vqvm, 0, buf_sz);
+		vqvm->vport_id = cpu_to_le32(vport->vport_id);
+		vqvm->num_qv_maps = cpu_to_le16(num_chunks);
+		memcpy(vqvm->qv_maps, &vqv[k], chunk_sz * num_chunks);
+
+		if (map) {
+			err = idpf_send_mb_msg(adapter,
+					       VIRTCHNL2_OP_MAP_QUEUE_VECTOR,
+					       buf_sz, (u8 *)vqvm);
+			if (!err)
+				err = idpf_wait_for_event(adapter, vport,
+							  IDPF_VC_MAP_IRQ,
+							  IDPF_VC_MAP_IRQ_ERR);
+		} else {
+			err = idpf_send_mb_msg(adapter,
+					       VIRTCHNL2_OP_UNMAP_QUEUE_VECTOR,
+					       buf_sz, (u8 *)vqvm);
+			if (!err)
+				err =
+				idpf_min_wait_for_event(adapter, vport,
+							IDPF_VC_UNMAP_IRQ,
+							IDPF_VC_UNMAP_IRQ_ERR);
+		}
+		if (err)
+			goto mbx_error;
+
+		k += num_chunks;
+		num_q -= num_chunks;
+		num_chunks = min(num_chunks, num_q);
+		/* Recalculate buffer size */
+		buf_sz = struct_size(vqvm, qv_maps, num_chunks);
+	}
+
+mbx_error:
+	mutex_unlock(&vport->vc_buf_lock);
+	kfree(vqvm);
+error:
+	kfree(vqv);
+
+	return err;
+}
+
+/**
+ * idpf_send_enable_queues_msg - send enable queues virtchnl message
+ * @vport: Virtual port private data structure
+ *
+ * Will send enable queues virtchnl message.  Returns 0 on success, negative on
+ * failure.
+ */
+int idpf_send_enable_queues_msg(struct idpf_vport *vport)
+{
+	return idpf_send_ena_dis_queues_msg(vport, VIRTCHNL2_OP_ENABLE_QUEUES);
+}
+
+/**
+ * idpf_send_disable_queues_msg - send disable queues virtchnl message
+ * @vport: Virtual port private data structure
+ *
+ * Will send disable queues virtchnl message.  Returns 0 on success, negative
+ * on failure.
+ */
+int idpf_send_disable_queues_msg(struct idpf_vport *vport)
+{
+	int err, i;
+
+	err = idpf_send_ena_dis_queues_msg(vport, VIRTCHNL2_OP_DISABLE_QUEUES);
+	if (err)
+		return err;
+
+	/* switch to poll mode as interrupts will be disabled after disable
+	 * queues virtchnl message is sent
+	 */
+	for (i = 0; i < vport->num_txq; i++)
+		set_bit(__IDPF_Q_POLL_MODE, vport->txqs[i]->flags);
+
+	/* schedule the napi to receive all the marker packets */
+	for (i = 0; i < vport->num_q_vectors; i++)
+		napi_schedule(&vport->q_vectors[i].napi);
+
+	return idpf_wait_for_marker_event(vport);
+}
+
+/**
+ * idpf_convert_reg_to_queue_chunks - Copy queue chunk information to the right
+ * structure
+ * @dchunks: Destination chunks to store data to
+ * @schunks: Source chunks to copy data from
+ * @num_chunks: number of chunks to copy
+ */
+static void idpf_convert_reg_to_queue_chunks(struct virtchnl2_queue_chunk *dchunks,
+					     struct virtchnl2_queue_reg_chunk *schunks,
+					     u16 num_chunks)
+{
+	u16 i;
+
+	for (i = 0; i < num_chunks; i++) {
+		dchunks[i].type = schunks[i].type;
+		dchunks[i].start_queue_id = schunks[i].start_queue_id;
+		dchunks[i].num_queues = schunks[i].num_queues;
+	}
+}
+
+/**
+ * idpf_send_delete_queues_msg - send delete queues virtchnl message
+ * @vport: Virtual port private data structure
+ *
+ * Will send delete queues virtchnl message. Return 0 on success, negative on
+ * failure.
+ */
+int idpf_send_delete_queues_msg(struct idpf_vport *vport)
+{
+	struct idpf_adapter *adapter = vport->adapter;
+	struct virtchnl2_create_vport *vport_params;
+	struct virtchnl2_queue_reg_chunks *chunks;
+	struct virtchnl2_del_ena_dis_queues *eq;
+	struct idpf_vport_config *vport_config;
+	u16 vport_idx = vport->idx;
+	int buf_size, err;
+	u16 num_chunks;
+
+	vport_config = adapter->vport_config[vport_idx];
+	if (vport_config->req_qs_chunks) {
+		struct virtchnl2_add_queues *vc_aq =
+			(struct virtchnl2_add_queues *)vport_config->req_qs_chunks;
+		chunks = &vc_aq->chunks;
+	} else {
+		vport_params = adapter->vport_params_recvd[vport_idx];
+		chunks = &vport_params->chunks;
+	}
+
+	num_chunks = le16_to_cpu(chunks->num_chunks);
+	buf_size = struct_size(eq, chunks.chunks, num_chunks);
+
+	eq = kzalloc(buf_size, GFP_KERNEL);
+	if (!eq)
+		return -ENOMEM;
+
+	eq->vport_id = cpu_to_le32(vport->vport_id);
+	eq->chunks.num_chunks = cpu_to_le16(num_chunks);
+
+	idpf_convert_reg_to_queue_chunks(eq->chunks.chunks, chunks->chunks,
+					 num_chunks);
+
+	mutex_lock(&vport->vc_buf_lock);
+
+	err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_DEL_QUEUES,
+			       buf_size, (u8 *)eq);
+	if (err)
+		goto rel_lock;
+
+	err = idpf_min_wait_for_event(adapter, vport, IDPF_VC_DEL_QUEUES,
+				      IDPF_VC_DEL_QUEUES_ERR);
+
+rel_lock:
+	mutex_unlock(&vport->vc_buf_lock);
+	kfree(eq);
+
+	return err;
+}
+
+/**
+ * idpf_send_config_queues_msg - Send config queues virtchnl message
+ * @vport: Virtual port private data structure
+ *
+ * Will send config queues virtchnl message. Returns 0 on success, negative on
+ * failure.
+ */
+int idpf_send_config_queues_msg(struct idpf_vport *vport)
+{
+	int err;
+
+	err = idpf_send_config_tx_queues_msg(vport);
+	if (err)
+		return err;
+
+	return idpf_send_config_rx_queues_msg(vport);
+}
+
+/**
+ * idpf_send_add_queues_msg - Send virtchnl add queues message
+ * @vport: Virtual port private data structure
+ * @num_tx_q: number of transmit queues
+ * @num_complq: number of transmit completion queues
+ * @num_rx_q: number of receive queues
+ * @num_rx_bufq: number of receive buffer queues
+ *
+ * Returns 0 on success, negative on failure. vport _MUST_ be const here as
+ * we should not change any fields within vport itself in this function.
+ */
+int idpf_send_add_queues_msg(const struct idpf_vport *vport, u16 num_tx_q,
+			     u16 num_complq, u16 num_rx_q, u16 num_rx_bufq)
+{
+	struct idpf_adapter *adapter = vport->adapter;
+	struct idpf_vport_config *vport_config;
+	struct virtchnl2_add_queues aq = { };
+	struct virtchnl2_add_queues *vc_msg;
+	u16 vport_idx = vport->idx;
+	int size, err;
+
+	vport_config = adapter->vport_config[vport_idx];
+
+	aq.vport_id = cpu_to_le32(vport->vport_id);
+	aq.num_tx_q = cpu_to_le16(num_tx_q);
+	aq.num_tx_complq = cpu_to_le16(num_complq);
+	aq.num_rx_q = cpu_to_le16(num_rx_q);
+	aq.num_rx_bufq = cpu_to_le16(num_rx_bufq);
+
+	mutex_lock(&((struct idpf_vport *)vport)->vc_buf_lock);
+
+	err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_ADD_QUEUES,
+			       sizeof(struct virtchnl2_add_queues), (u8 *)&aq);
+	if (err)
+		goto rel_lock;
+
+	/* We want vport to be const to prevent incidental code changes making
+	 * changes to the vport config. We're making a special exception here
+	 * to discard const to use the virtchnl.
+	 */
+	err = idpf_wait_for_event(adapter, (struct idpf_vport *)vport,
+				  IDPF_VC_ADD_QUEUES, IDPF_VC_ADD_QUEUES_ERR);
+	if (err)
+		goto rel_lock;
+
+	kfree(vport_config->req_qs_chunks);
+	vport_config->req_qs_chunks = NULL;
+
+	vc_msg = (struct virtchnl2_add_queues *)vport->vc_msg;
+	/* compare vc_msg num queues with vport num queues */
+	if (le16_to_cpu(vc_msg->num_tx_q) != num_tx_q ||
+	    le16_to_cpu(vc_msg->num_rx_q) != num_rx_q ||
+	    le16_to_cpu(vc_msg->num_tx_complq) != num_complq ||
+	    le16_to_cpu(vc_msg->num_rx_bufq) != num_rx_bufq) {
+		err = -EINVAL;
+		goto rel_lock;
+	}
+
+	size = struct_size(vc_msg, chunks.chunks,
+			   le16_to_cpu(vc_msg->chunks.num_chunks));
+	vport_config->req_qs_chunks = kmemdup(vc_msg, size, GFP_KERNEL);
+	if (!vport_config->req_qs_chunks) {
+		err = -ENOMEM;
+		goto rel_lock;
+	}
+
+rel_lock:
+	mutex_unlock(&((struct idpf_vport *)vport)->vc_buf_lock);
+
+	return err;
+}
+
+/**
+ * idpf_send_alloc_vectors_msg - Send virtchnl alloc vectors message
+ * @adapter: Driver specific private structure
+ * @num_vectors: number of vectors to be allocated
+ *
+ * Returns 0 on success, negative on failure.
+ */
+int idpf_send_alloc_vectors_msg(struct idpf_adapter *adapter, u16 num_vectors)
+{
+	struct virtchnl2_alloc_vectors *alloc_vec, *rcvd_vec;
+	struct virtchnl2_alloc_vectors ac = { };
+	u16 num_vchunks;
+	int size, err;
+
+	ac.num_vectors = cpu_to_le16(num_vectors);
+
+	mutex_lock(&adapter->vc_buf_lock);
+
+	err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_ALLOC_VECTORS,
+			       sizeof(ac), (u8 *)&ac);
+	if (err)
+		goto rel_lock;
+
+	err = idpf_wait_for_event(adapter, NULL, IDPF_VC_ALLOC_VECTORS,
+				  IDPF_VC_ALLOC_VECTORS_ERR);
+	if (err)
+		goto rel_lock;
+
+	rcvd_vec = (struct virtchnl2_alloc_vectors *)adapter->vc_msg;
+	num_vchunks = le16_to_cpu(rcvd_vec->vchunks.num_vchunks);
+
+	size = struct_size(rcvd_vec, vchunks.vchunks, num_vchunks);
+	if (size > sizeof(adapter->vc_msg)) {
+		err = -EINVAL;
+		goto rel_lock;
+	}
+
+	kfree(adapter->req_vec_chunks);
+	adapter->req_vec_chunks = NULL;
+	adapter->req_vec_chunks = kmemdup(adapter->vc_msg, size, GFP_KERNEL);
+	if (!adapter->req_vec_chunks) {
+		err = -ENOMEM;
+		goto rel_lock;
+	}
+
+	alloc_vec = adapter->req_vec_chunks;
+	if (le16_to_cpu(alloc_vec->num_vectors) < num_vectors) {
+		kfree(adapter->req_vec_chunks);
+		adapter->req_vec_chunks = NULL;
+		err = -EINVAL;
+	}
+
+rel_lock:
+	mutex_unlock(&adapter->vc_buf_lock);
+
+	return err;
+}
+
+/**
+ * idpf_send_dealloc_vectors_msg - Send virtchnl de allocate vectors message
+ * @adapter: Driver specific private structure
+ *
+ * Returns 0 on success, negative on failure.
+ */
+int idpf_send_dealloc_vectors_msg(struct idpf_adapter *adapter)
+{
+	struct virtchnl2_alloc_vectors *ac = adapter->req_vec_chunks;
+	struct virtchnl2_vector_chunks *vcs = &ac->vchunks;
+	int buf_size, err;
+
+	buf_size = struct_size(vcs, vchunks, le16_to_cpu(vcs->num_vchunks));
+
+	mutex_lock(&adapter->vc_buf_lock);
+
+	err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_DEALLOC_VECTORS, buf_size,
+			       (u8 *)vcs);
+	if (err)
+		goto rel_lock;
+
+	err = idpf_min_wait_for_event(adapter, NULL, IDPF_VC_DEALLOC_VECTORS,
+				      IDPF_VC_DEALLOC_VECTORS_ERR);
+	if (err)
+		goto rel_lock;
+
+	kfree(adapter->req_vec_chunks);
+	adapter->req_vec_chunks = NULL;
+
+rel_lock:
+	mutex_unlock(&adapter->vc_buf_lock);
+
+	return err;
+}
+
+/**
+ * idpf_get_max_vfs - Get max number of vfs supported
+ * @adapter: Driver specific private structure
+ *
+ * Returns max number of VFs
+ */
+static int idpf_get_max_vfs(struct idpf_adapter *adapter)
+{
+	return le16_to_cpu(adapter->caps.max_sriov_vfs);
+}
+
+/**
+ * idpf_send_set_sriov_vfs_msg - Send virtchnl set sriov vfs message
+ * @adapter: Driver specific private structure
+ * @num_vfs: number of virtual functions to be created
+ *
+ * Returns 0 on success, negative on failure.
+ */
+int idpf_send_set_sriov_vfs_msg(struct idpf_adapter *adapter, u16 num_vfs)
+{
+	struct virtchnl2_sriov_vfs_info svi = { };
+	int err;
+
+	svi.num_vfs = cpu_to_le16(num_vfs);
+
+	mutex_lock(&adapter->vc_buf_lock);
+
+	err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_SET_SRIOV_VFS,
+			       sizeof(svi), (u8 *)&svi);
+	if (err)
+		goto rel_lock;
+
+	err = idpf_wait_for_event(adapter, NULL, IDPF_VC_SET_SRIOV_VFS,
+				  IDPF_VC_SET_SRIOV_VFS_ERR);
+
+rel_lock:
+	mutex_unlock(&adapter->vc_buf_lock);
+
+	return err;
+}
+
+/**
+ * idpf_send_get_stats_msg - Send virtchnl get statistics message
+ * @vport: vport to get stats for
+ *
+ * Returns 0 on success, negative on failure.
+ */
+int idpf_send_get_stats_msg(struct idpf_vport *vport)
+{
+	struct idpf_netdev_priv *np = netdev_priv(vport->netdev);
+	struct rtnl_link_stats64 *netstats = &np->netstats;
+	struct idpf_adapter *adapter = vport->adapter;
+	struct virtchnl2_vport_stats stats_msg = { };
+	struct virtchnl2_vport_stats *stats;
+	int err;
+
+	/* Don't send get_stats message if the link is down */
+	if (np->state <= __IDPF_VPORT_DOWN)
+		return 0;
+
+	stats_msg.vport_id = cpu_to_le32(vport->vport_id);
+
+	mutex_lock(&vport->vc_buf_lock);
+
+	err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_GET_STATS,
+			       sizeof(struct virtchnl2_vport_stats),
+			       (u8 *)&stats_msg);
+	if (err)
+		goto rel_lock;
+
+	err = idpf_wait_for_event(adapter, vport, IDPF_VC_GET_STATS,
+				  IDPF_VC_GET_STATS_ERR);
+	if (err)
+		goto rel_lock;
+
+	stats = (struct virtchnl2_vport_stats *)vport->vc_msg;
+
+	spin_lock_bh(&np->stats_lock);
+
+	netstats->rx_packets = le64_to_cpu(stats->rx_unicast) +
+			       le64_to_cpu(stats->rx_multicast) +
+			       le64_to_cpu(stats->rx_broadcast);
+	netstats->rx_bytes = le64_to_cpu(stats->rx_bytes);
+	netstats->rx_dropped = le64_to_cpu(stats->rx_discards);
+	netstats->rx_over_errors = le64_to_cpu(stats->rx_overflow_drop);
+	netstats->rx_length_errors = le64_to_cpu(stats->rx_invalid_frame_length);
+
+	netstats->tx_packets = le64_to_cpu(stats->tx_unicast) +
+			       le64_to_cpu(stats->tx_multicast) +
+			       le64_to_cpu(stats->tx_broadcast);
+	netstats->tx_bytes = le64_to_cpu(stats->tx_bytes);
+	netstats->tx_errors = le64_to_cpu(stats->tx_errors);
+	netstats->tx_dropped = le64_to_cpu(stats->tx_discards);
+
+	vport->port_stats.vport_stats = *stats;
+
+	spin_unlock_bh(&np->stats_lock);
+
+rel_lock:
+	mutex_unlock(&vport->vc_buf_lock);
+
+	return err;
+}
+
+/**
+ * idpf_send_get_set_rss_lut_msg - Send virtchnl get or set rss lut message
+ * @vport: virtual port data structure
+ * @get: flag to set or get rss look up table
+ *
+ * Returns 0 on success, negative on failure.
+ */
+int idpf_send_get_set_rss_lut_msg(struct idpf_vport *vport, bool get)
+{
+	struct idpf_adapter *adapter = vport->adapter;
+	struct virtchnl2_rss_lut *recv_rl;
+	struct idpf_rss_data *rss_data;
+	struct virtchnl2_rss_lut *rl;
+	int buf_size, lut_buf_size;
+	int i, err;
+
+	rss_data = &adapter->vport_config[vport->idx]->user_config.rss_data;
+	buf_size = struct_size(rl, lut, rss_data->rss_lut_size);
+	rl = kzalloc(buf_size, GFP_KERNEL);
+	if (!rl)
+		return -ENOMEM;
+
+	rl->vport_id = cpu_to_le32(vport->vport_id);
+	mutex_lock(&vport->vc_buf_lock);
+
+	if (!get) {
+		rl->lut_entries = cpu_to_le16(rss_data->rss_lut_size);
+		for (i = 0; i < rss_data->rss_lut_size; i++)
+			rl->lut[i] = cpu_to_le32(rss_data->rss_lut[i]);
+
+		err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_SET_RSS_LUT,
+				       buf_size, (u8 *)rl);
+		if (err)
+			goto free_mem;
+
+		err = idpf_wait_for_event(adapter, vport, IDPF_VC_SET_RSS_LUT,
+					  IDPF_VC_SET_RSS_LUT_ERR);
+
+		goto free_mem;
+	}
+
+	err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_GET_RSS_LUT,
+			       buf_size, (u8 *)rl);
+	if (err)
+		goto free_mem;
+
+	err = idpf_wait_for_event(adapter, vport, IDPF_VC_GET_RSS_LUT,
+				  IDPF_VC_GET_RSS_LUT_ERR);
+	if (err)
+		goto free_mem;
+
+	recv_rl = (struct virtchnl2_rss_lut *)vport->vc_msg;
+	if (rss_data->rss_lut_size == le16_to_cpu(recv_rl->lut_entries))
+		goto do_memcpy;
+
+	rss_data->rss_lut_size = le16_to_cpu(recv_rl->lut_entries);
+	kfree(rss_data->rss_lut);
+
+	lut_buf_size = rss_data->rss_lut_size * sizeof(u32);
+	rss_data->rss_lut = kzalloc(lut_buf_size, GFP_KERNEL);
+	if (!rss_data->rss_lut) {
+		rss_data->rss_lut_size = 0;
+		err = -ENOMEM;
+		goto free_mem;
+	}
+
+do_memcpy:
+	memcpy(rss_data->rss_lut, vport->vc_msg, rss_data->rss_lut_size);
+free_mem:
+	mutex_unlock(&vport->vc_buf_lock);
+	kfree(rl);
+
+	return err;
+}
+
+/**
+ * idpf_send_get_set_rss_key_msg - Send virtchnl get or set rss key message
+ * @vport: virtual port data structure
+ * @get: flag to set or get rss look up table
+ *
+ * Returns 0 on success, negative on failure
+ */
+int idpf_send_get_set_rss_key_msg(struct idpf_vport *vport, bool get)
+{
+	struct idpf_adapter *adapter = vport->adapter;
+	struct virtchnl2_rss_key *recv_rk;
+	struct idpf_rss_data *rss_data;
+	struct virtchnl2_rss_key *rk;
+	int i, buf_size, err;
+
+	rss_data = &adapter->vport_config[vport->idx]->user_config.rss_data;
+	buf_size = struct_size(rk, key_flex, rss_data->rss_key_size);
+	rk = kzalloc(buf_size, GFP_KERNEL);
+	if (!rk)
+		return -ENOMEM;
+
+	rk->vport_id = cpu_to_le32(vport->vport_id);
+	mutex_lock(&vport->vc_buf_lock);
+
+	if (get) {
+		err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_GET_RSS_KEY,
+				       buf_size, (u8 *)rk);
+		if (err)
+			goto error;
+
+		err = idpf_wait_for_event(adapter, vport, IDPF_VC_GET_RSS_KEY,
+					  IDPF_VC_GET_RSS_KEY_ERR);
+		if (err)
+			goto error;
+
+		recv_rk = (struct virtchnl2_rss_key *)vport->vc_msg;
+		if (rss_data->rss_key_size !=
+		    le16_to_cpu(recv_rk->key_len)) {
+			rss_data->rss_key_size =
+				min_t(u16, NETDEV_RSS_KEY_LEN,
+				      le16_to_cpu(recv_rk->key_len));
+			kfree(rss_data->rss_key);
+			rss_data->rss_key = kzalloc(rss_data->rss_key_size,
+						    GFP_KERNEL);
+			if (!rss_data->rss_key) {
+				rss_data->rss_key_size = 0;
+				err = -ENOMEM;
+				goto error;
+			}
+		}
+		memcpy(rss_data->rss_key, recv_rk->key_flex,
+		       rss_data->rss_key_size);
+	} else {
+		rk->key_len = cpu_to_le16(rss_data->rss_key_size);
+		for (i = 0; i < rss_data->rss_key_size; i++)
+			rk->key_flex[i] = rss_data->rss_key[i];
+
+		err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_SET_RSS_KEY,
+				       buf_size, (u8 *)rk);
+		if (err)
+			goto error;
+
+		err = idpf_wait_for_event(adapter, vport, IDPF_VC_SET_RSS_KEY,
+					  IDPF_VC_SET_RSS_KEY_ERR);
+	}
+
+error:
+	mutex_unlock(&vport->vc_buf_lock);
+	kfree(rk);
+
+	return err;
+}
+
+/**
+ * idpf_fill_ptype_lookup - Fill L3 specific fields in ptype lookup table
+ * @ptype: ptype lookup table
+ * @pstate: state machine for ptype lookup table
+ * @ipv4: ipv4 or ipv6
+ * @frag: fragmentation allowed
+ *
+ */
+static void idpf_fill_ptype_lookup(struct idpf_rx_ptype_decoded *ptype,
+				   struct idpf_ptype_state *pstate,
+				   bool ipv4, bool frag)
+{
+	if (!pstate->outer_ip || !pstate->outer_frag) {
+		ptype->outer_ip = IDPF_RX_PTYPE_OUTER_IP;
+		pstate->outer_ip = true;
+
+		if (ipv4)
+			ptype->outer_ip_ver = IDPF_RX_PTYPE_OUTER_IPV4;
+		else
+			ptype->outer_ip_ver = IDPF_RX_PTYPE_OUTER_IPV6;
+
+		if (frag) {
+			ptype->outer_frag = IDPF_RX_PTYPE_FRAG;
+			pstate->outer_frag = true;
+		}
+	} else {
+		ptype->tunnel_type = IDPF_RX_PTYPE_TUNNEL_IP_IP;
+		pstate->tunnel_state = IDPF_PTYPE_TUNNEL_IP;
+
+		if (ipv4)
+			ptype->tunnel_end_prot =
+					IDPF_RX_PTYPE_TUNNEL_END_IPV4;
+		else
+			ptype->tunnel_end_prot =
+					IDPF_RX_PTYPE_TUNNEL_END_IPV6;
+
+		if (frag)
+			ptype->tunnel_end_frag = IDPF_RX_PTYPE_FRAG;
+	}
+}
+
+/**
+ * idpf_send_get_rx_ptype_msg - Send virtchnl for ptype info
+ * @vport: virtual port data structure
+ *
+ * Returns 0 on success, negative on failure.
+ */
+int idpf_send_get_rx_ptype_msg(struct idpf_vport *vport)
+{
+	struct idpf_rx_ptype_decoded *ptype_lkup = vport->rx_ptype_lkup;
+	struct virtchnl2_get_ptype_info get_ptype_info;
+	int max_ptype, ptypes_recvd = 0, ptype_offset;
+	struct idpf_adapter *adapter = vport->adapter;
+	struct virtchnl2_get_ptype_info *ptype_info;
+	u16 next_ptype_id = 0;
+	int err = 0, i, j, k;
+
+	if (idpf_is_queue_model_split(vport->rxq_model))
+		max_ptype = IDPF_RX_MAX_PTYPE;
+	else
+		max_ptype = IDPF_RX_MAX_BASE_PTYPE;
+
+	memset(vport->rx_ptype_lkup, 0, sizeof(vport->rx_ptype_lkup));
+
+	ptype_info = kzalloc(IDPF_CTLQ_MAX_BUF_LEN, GFP_KERNEL);
+	if (!ptype_info)
+		return -ENOMEM;
+
+	mutex_lock(&adapter->vc_buf_lock);
+
+	while (next_ptype_id < max_ptype) {
+		get_ptype_info.start_ptype_id = cpu_to_le16(next_ptype_id);
+
+		if ((next_ptype_id + IDPF_RX_MAX_PTYPES_PER_BUF) > max_ptype)
+			get_ptype_info.num_ptypes =
+				cpu_to_le16(max_ptype - next_ptype_id);
+		else
+			get_ptype_info.num_ptypes =
+				cpu_to_le16(IDPF_RX_MAX_PTYPES_PER_BUF);
+
+		err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_GET_PTYPE_INFO,
+				       sizeof(struct virtchnl2_get_ptype_info),
+				       (u8 *)&get_ptype_info);
+		if (err)
+			goto vc_buf_unlock;
+
+		err = idpf_wait_for_event(adapter, NULL, IDPF_VC_GET_PTYPE_INFO,
+					  IDPF_VC_GET_PTYPE_INFO_ERR);
+		if (err)
+			goto vc_buf_unlock;
+
+		memcpy(ptype_info, adapter->vc_msg, IDPF_CTLQ_MAX_BUF_LEN);
+
+		ptypes_recvd += le16_to_cpu(ptype_info->num_ptypes);
+		if (ptypes_recvd > max_ptype) {
+			err = -EINVAL;
+			goto vc_buf_unlock;
+		}
+
+		next_ptype_id = le16_to_cpu(get_ptype_info.start_ptype_id) +
+				le16_to_cpu(get_ptype_info.num_ptypes);
+
+		ptype_offset = IDPF_RX_PTYPE_HDR_SZ;
+
+		for (i = 0; i < le16_to_cpu(ptype_info->num_ptypes); i++) {
+			struct idpf_ptype_state pstate = { };
+			struct virtchnl2_ptype *ptype;
+			u16 id;
+
+			ptype = (struct virtchnl2_ptype *)
+					((u8 *)ptype_info + ptype_offset);
+
+			ptype_offset += IDPF_GET_PTYPE_SIZE(ptype);
+			if (ptype_offset > IDPF_CTLQ_MAX_BUF_LEN) {
+				err = -EINVAL;
+				goto vc_buf_unlock;
+			}
+
+			/* 0xFFFF indicates end of ptypes */
+			if (le16_to_cpu(ptype->ptype_id_10) ==
+							IDPF_INVALID_PTYPE_ID) {
+				err = 0;
+				goto vc_buf_unlock;
+			}
+
+			if (idpf_is_queue_model_split(vport->rxq_model))
+				k = le16_to_cpu(ptype->ptype_id_10);
+			else
+				k = ptype->ptype_id_8;
+
+			if (ptype->proto_id_count)
+				ptype_lkup[k].known = 1;
+
+			for (j = 0; j < ptype->proto_id_count; j++) {
+				id = le16_to_cpu(ptype->proto_id[j]);
+				switch (id) {
+				case VIRTCHNL2_PROTO_HDR_GRE:
+					if (pstate.tunnel_state ==
+							IDPF_PTYPE_TUNNEL_IP) {
+						ptype_lkup[k].tunnel_type =
+						IDPF_RX_PTYPE_TUNNEL_IP_GRENAT;
+						pstate.tunnel_state |=
+						IDPF_PTYPE_TUNNEL_IP_GRENAT;
+					}
+					break;
+				case VIRTCHNL2_PROTO_HDR_MAC:
+					ptype_lkup[k].outer_ip =
+						IDPF_RX_PTYPE_OUTER_L2;
+					if (pstate.tunnel_state ==
+							IDPF_TUN_IP_GRE) {
+						ptype_lkup[k].tunnel_type =
+						IDPF_RX_PTYPE_TUNNEL_IP_GRENAT_MAC;
+						pstate.tunnel_state |=
+						IDPF_PTYPE_TUNNEL_IP_GRENAT_MAC;
+					}
+					break;
+				case VIRTCHNL2_PROTO_HDR_IPV4:
+					idpf_fill_ptype_lookup(&ptype_lkup[k],
+							       &pstate, true,
+							       false);
+					break;
+				case VIRTCHNL2_PROTO_HDR_IPV6:
+					idpf_fill_ptype_lookup(&ptype_lkup[k],
+							       &pstate, false,
+							       false);
+					break;
+				case VIRTCHNL2_PROTO_HDR_IPV4_FRAG:
+					idpf_fill_ptype_lookup(&ptype_lkup[k],
+							       &pstate, true,
+							       true);
+					break;
+				case VIRTCHNL2_PROTO_HDR_IPV6_FRAG:
+					idpf_fill_ptype_lookup(&ptype_lkup[k],
+							       &pstate, false,
+							       true);
+					break;
+				case VIRTCHNL2_PROTO_HDR_UDP:
+					ptype_lkup[k].inner_prot =
+					IDPF_RX_PTYPE_INNER_PROT_UDP;
+					break;
+				case VIRTCHNL2_PROTO_HDR_TCP:
+					ptype_lkup[k].inner_prot =
+					IDPF_RX_PTYPE_INNER_PROT_TCP;
+					break;
+				case VIRTCHNL2_PROTO_HDR_SCTP:
+					ptype_lkup[k].inner_prot =
+					IDPF_RX_PTYPE_INNER_PROT_SCTP;
+					break;
+				case VIRTCHNL2_PROTO_HDR_ICMP:
+					ptype_lkup[k].inner_prot =
+					IDPF_RX_PTYPE_INNER_PROT_ICMP;
+					break;
+				case VIRTCHNL2_PROTO_HDR_PAY:
+					ptype_lkup[k].payload_layer =
+						IDPF_RX_PTYPE_PAYLOAD_LAYER_PAY2;
+					break;
+				case VIRTCHNL2_PROTO_HDR_ICMPV6:
+				case VIRTCHNL2_PROTO_HDR_IPV6_EH:
+				case VIRTCHNL2_PROTO_HDR_PRE_MAC:
+				case VIRTCHNL2_PROTO_HDR_POST_MAC:
+				case VIRTCHNL2_PROTO_HDR_ETHERTYPE:
+				case VIRTCHNL2_PROTO_HDR_SVLAN:
+				case VIRTCHNL2_PROTO_HDR_CVLAN:
+				case VIRTCHNL2_PROTO_HDR_MPLS:
+				case VIRTCHNL2_PROTO_HDR_MMPLS:
+				case VIRTCHNL2_PROTO_HDR_PTP:
+				case VIRTCHNL2_PROTO_HDR_CTRL:
+				case VIRTCHNL2_PROTO_HDR_LLDP:
+				case VIRTCHNL2_PROTO_HDR_ARP:
+				case VIRTCHNL2_PROTO_HDR_ECP:
+				case VIRTCHNL2_PROTO_HDR_EAPOL:
+				case VIRTCHNL2_PROTO_HDR_PPPOD:
+				case VIRTCHNL2_PROTO_HDR_PPPOE:
+				case VIRTCHNL2_PROTO_HDR_IGMP:
+				case VIRTCHNL2_PROTO_HDR_AH:
+				case VIRTCHNL2_PROTO_HDR_ESP:
+				case VIRTCHNL2_PROTO_HDR_IKE:
+				case VIRTCHNL2_PROTO_HDR_NATT_KEEP:
+				case VIRTCHNL2_PROTO_HDR_L2TPV2:
+				case VIRTCHNL2_PROTO_HDR_L2TPV2_CONTROL:
+				case VIRTCHNL2_PROTO_HDR_L2TPV3:
+				case VIRTCHNL2_PROTO_HDR_GTP:
+				case VIRTCHNL2_PROTO_HDR_GTP_EH:
+				case VIRTCHNL2_PROTO_HDR_GTPCV2:
+				case VIRTCHNL2_PROTO_HDR_GTPC_TEID:
+				case VIRTCHNL2_PROTO_HDR_GTPU:
+				case VIRTCHNL2_PROTO_HDR_GTPU_UL:
+				case VIRTCHNL2_PROTO_HDR_GTPU_DL:
+				case VIRTCHNL2_PROTO_HDR_ECPRI:
+				case VIRTCHNL2_PROTO_HDR_VRRP:
+				case VIRTCHNL2_PROTO_HDR_OSPF:
+				case VIRTCHNL2_PROTO_HDR_TUN:
+				case VIRTCHNL2_PROTO_HDR_NVGRE:
+				case VIRTCHNL2_PROTO_HDR_VXLAN:
+				case VIRTCHNL2_PROTO_HDR_VXLAN_GPE:
+				case VIRTCHNL2_PROTO_HDR_GENEVE:
+				case VIRTCHNL2_PROTO_HDR_NSH:
+				case VIRTCHNL2_PROTO_HDR_QUIC:
+				case VIRTCHNL2_PROTO_HDR_PFCP:
+				case VIRTCHNL2_PROTO_HDR_PFCP_NODE:
+				case VIRTCHNL2_PROTO_HDR_PFCP_SESSION:
+				case VIRTCHNL2_PROTO_HDR_RTP:
+				case VIRTCHNL2_PROTO_HDR_NO_PROTO:
+					break;
+				default:
+					break;
+				}
+			}
+		}
+	}
+
+vc_buf_unlock:
+	mutex_unlock(&adapter->vc_buf_lock);
+	kfree(ptype_info);
+
+	return err;
+}
+
+/**
+ * idpf_send_ena_dis_loopback_msg - Send virtchnl enable/disable loopback
+ *				    message
+ * @vport: virtual port data structure
+ *
+ * Returns 0 on success, negative on failure.
+ */
+int idpf_send_ena_dis_loopback_msg(struct idpf_vport *vport)
+{
+	struct virtchnl2_loopback loopback;
+	int err;
+
+	loopback.vport_id = cpu_to_le32(vport->vport_id);
+	loopback.enable = idpf_is_feature_ena(vport, NETIF_F_LOOPBACK);
+
+	mutex_lock(&vport->vc_buf_lock);
+
+	err = idpf_send_mb_msg(vport->adapter, VIRTCHNL2_OP_LOOPBACK,
+			       sizeof(loopback), (u8 *)&loopback);
+	if (err)
+		goto rel_lock;
+
+	err = idpf_wait_for_event(vport->adapter, vport,
+				  IDPF_VC_LOOPBACK_STATE,
+				  IDPF_VC_LOOPBACK_STATE_ERR);
+
+rel_lock:
+	mutex_unlock(&vport->vc_buf_lock);
+
+	return err;
+}
+
+/**
+ * idpf_find_ctlq - Given a type and id, find ctlq info
+ * @hw: hardware struct
+ * @type: type of ctrlq to find
+ * @id: ctlq id to find
+ *
+ * Returns pointer to found ctlq info struct, NULL otherwise.
+ */
+static struct idpf_ctlq_info *idpf_find_ctlq(struct idpf_hw *hw,
+					     enum idpf_ctlq_type type, int id)
+{
+	struct idpf_ctlq_info *cq, *tmp;
+
+	list_for_each_entry_safe(cq, tmp, &hw->cq_list_head, cq_list)
+		if (cq->q_id == id && cq->cq_type == type)
+			return cq;
+
+	return NULL;
+}
+
+/**
+ * idpf_init_dflt_mbx - Setup default mailbox parameters and make request
+ * @adapter: adapter info struct
+ *
+ * Returns 0 on success, negative otherwise
+ */
+int idpf_init_dflt_mbx(struct idpf_adapter *adapter)
+{
+	struct idpf_ctlq_create_info ctlq_info[] = {
+		{
+			.type = IDPF_CTLQ_TYPE_MAILBOX_TX,
+			.id = IDPF_DFLT_MBX_ID,
+			.len = IDPF_DFLT_MBX_Q_LEN,
+			.buf_size = IDPF_CTLQ_MAX_BUF_LEN
+		},
+		{
+			.type = IDPF_CTLQ_TYPE_MAILBOX_RX,
+			.id = IDPF_DFLT_MBX_ID,
+			.len = IDPF_DFLT_MBX_Q_LEN,
+			.buf_size = IDPF_CTLQ_MAX_BUF_LEN
+		}
+	};
+	struct idpf_hw *hw = &adapter->hw;
+	int err;
+
+	adapter->dev_ops.reg_ops.ctlq_reg_init(ctlq_info);
+
+	err = idpf_ctlq_init(hw, IDPF_NUM_DFLT_MBX_Q, ctlq_info);
+	if (err)
+		return err;
+
+	hw->asq = idpf_find_ctlq(hw, IDPF_CTLQ_TYPE_MAILBOX_TX,
+				 IDPF_DFLT_MBX_ID);
+	hw->arq = idpf_find_ctlq(hw, IDPF_CTLQ_TYPE_MAILBOX_RX,
+				 IDPF_DFLT_MBX_ID);
+
+	if (!hw->asq || !hw->arq) {
+		idpf_ctlq_deinit(hw);
+
+		return -ENOENT;
+	}
+
+	adapter->state = __IDPF_STARTUP;
+
+	return 0;
+}
+
+/**
+ * idpf_deinit_dflt_mbx - Free up ctlqs setup
+ * @adapter: Driver specific private data structure
+ */
+void idpf_deinit_dflt_mbx(struct idpf_adapter *adapter)
+{
+	if (adapter->hw.arq && adapter->hw.asq) {
+		idpf_mb_clean(adapter);
+		idpf_ctlq_deinit(&adapter->hw);
+	}
+	adapter->hw.arq = NULL;
+	adapter->hw.asq = NULL;
+}
+
+/**
+ * idpf_vport_params_buf_rel - Release memory for MailBox resources
+ * @adapter: Driver specific private data structure
+ *
+ * Will release memory to hold the vport parameters received on MailBox
+ */
+static void idpf_vport_params_buf_rel(struct idpf_adapter *adapter)
+{
+	kfree(adapter->vport_params_recvd);
+	adapter->vport_params_recvd = NULL;
+	kfree(adapter->vport_params_reqd);
+	adapter->vport_params_reqd = NULL;
+	kfree(adapter->vport_ids);
+	adapter->vport_ids = NULL;
+}
+
+/**
+ * idpf_vport_params_buf_alloc - Allocate memory for MailBox resources
+ * @adapter: Driver specific private data structure
+ *
+ * Will alloc memory to hold the vport parameters received on MailBox
+ */
+static int idpf_vport_params_buf_alloc(struct idpf_adapter *adapter)
+{
+	u16 num_max_vports = idpf_get_max_vports(adapter);
+
+	adapter->vport_params_reqd = kcalloc(num_max_vports,
+					     sizeof(*adapter->vport_params_reqd),
+					     GFP_KERNEL);
+	if (!adapter->vport_params_reqd)
+		return -ENOMEM;
+
+	adapter->vport_params_recvd = kcalloc(num_max_vports,
+					      sizeof(*adapter->vport_params_recvd),
+					      GFP_KERNEL);
+	if (!adapter->vport_params_recvd)
+		goto err_mem;
+
+	adapter->vport_ids = kcalloc(num_max_vports, sizeof(u32), GFP_KERNEL);
+	if (!adapter->vport_ids)
+		goto err_mem;
+
+	if (adapter->vport_config)
+		return 0;
+
+	adapter->vport_config = kcalloc(num_max_vports,
+					sizeof(*adapter->vport_config),
+					GFP_KERNEL);
+	if (!adapter->vport_config)
+		goto err_mem;
+
+	return 0;
+
+err_mem:
+	idpf_vport_params_buf_rel(adapter);
+
+	return -ENOMEM;
+}
+
+/**
+ * idpf_vc_core_init - Initialize state machine and get driver specific
+ * resources
+ * @adapter: Driver specific private structure
+ *
+ * This function will initialize the state machine and request all necessary
+ * resources required by the device driver. Once the state machine is
+ * initialized, allocate memory to store vport specific information and also
+ * requests required interrupts.
+ *
+ * Returns 0 on success, -EAGAIN function will get called again,
+ * otherwise negative on failure.
+ */
+int idpf_vc_core_init(struct idpf_adapter *adapter)
+{
+	int task_delay = 30;
+	u16 num_max_vports;
+	int err = 0;
+
+	while (adapter->state != __IDPF_INIT_SW) {
+		switch (adapter->state) {
+		case __IDPF_STARTUP:
+			if (idpf_send_ver_msg(adapter))
+				goto init_failed;
+			adapter->state = __IDPF_VER_CHECK;
+			goto restart;
+		case __IDPF_VER_CHECK:
+			err = idpf_recv_ver_msg(adapter);
+			if (err == -EIO) {
+				return err;
+			} else if (err == -EAGAIN) {
+				adapter->state = __IDPF_STARTUP;
+				goto restart;
+			} else if (err) {
+				goto init_failed;
+			}
+			if (idpf_send_get_caps_msg(adapter))
+				goto init_failed;
+			adapter->state = __IDPF_GET_CAPS;
+			goto restart;
+		case __IDPF_GET_CAPS:
+			if (idpf_recv_get_caps_msg(adapter))
+				goto init_failed;
+			adapter->state = __IDPF_INIT_SW;
+			break;
+		default:
+			dev_err(&adapter->pdev->dev, "Device is in bad state: %d\n",
+				adapter->state);
+			goto init_failed;
+		}
+		break;
+restart:
+		/* Give enough time before proceeding further with
+		 * state machine
+		 */
+		msleep(task_delay);
+	}
+
+	pci_sriov_set_totalvfs(adapter->pdev, idpf_get_max_vfs(adapter));
+	num_max_vports = idpf_get_max_vports(adapter);
+	adapter->max_vports = num_max_vports;
+	adapter->vports = kcalloc(num_max_vports, sizeof(*adapter->vports),
+				  GFP_KERNEL);
+	if (!adapter->vports)
+		return -ENOMEM;
+
+	if (!adapter->netdevs) {
+		adapter->netdevs = kcalloc(num_max_vports,
+					   sizeof(struct net_device *),
+					   GFP_KERNEL);
+		if (!adapter->netdevs) {
+			err = -ENOMEM;
+			goto err_netdev_alloc;
+		}
+	}
+
+	err = idpf_vport_params_buf_alloc(adapter);
+	if (err) {
+		dev_err(&adapter->pdev->dev, "Failed to alloc vport params buffer: %d\n",
+			err);
+		goto err_netdev_alloc;
+	}
+
+	/* Start the mailbox task before requesting vectors. This will ensure
+	 * vector information response from mailbox is handled
+	 */
+	queue_delayed_work(adapter->mbx_wq, &adapter->mbx_task, 0);
+
+	queue_delayed_work(adapter->serv_wq, &adapter->serv_task,
+			   msecs_to_jiffies(5 * (adapter->pdev->devfn & 0x07)));
+
+	err = idpf_intr_req(adapter);
+	if (err) {
+		dev_err(&adapter->pdev->dev, "failed to enable interrupt vectors: %d\n",
+			err);
+		goto err_intr_req;
+	}
+
+	idpf_init_avail_queues(adapter);
+
+	/* Skew the delay for init tasks for each function based on fn number
+	 * to prevent every function from making the same call simultaneously.
+	 */
+	queue_delayed_work(adapter->init_wq, &adapter->init_task,
+			   msecs_to_jiffies(5 * (adapter->pdev->devfn & 0x07)));
+
+	goto no_err;
+
+err_intr_req:
+	cancel_delayed_work_sync(&adapter->serv_task);
+	idpf_vport_params_buf_rel(adapter);
+err_netdev_alloc:
+	kfree(adapter->vports);
+	adapter->vports = NULL;
+no_err:
+	return err;
+
+init_failed:
+	/* Don't retry if we're trying to go down, just bail. */
+	if (test_bit(IDPF_REMOVE_IN_PROG, adapter->flags))
+		return err;
+
+	if (++adapter->mb_wait_count > IDPF_MB_MAX_ERR) {
+		dev_err(&adapter->pdev->dev, "Failed to establish mailbox communications with hardware\n");
+
+		return -EFAULT;
+	}
+	/* If it reached here, it is possible that mailbox queue initialization
+	 * register writes might not have taken effect. Retry to initialize
+	 * the mailbox again
+	 */
+	adapter->state = __IDPF_STARTUP;
+	idpf_deinit_dflt_mbx(adapter);
+	set_bit(IDPF_HR_DRV_LOAD, adapter->flags);
+	queue_delayed_work(adapter->vc_event_wq, &adapter->vc_event_task,
+			   msecs_to_jiffies(task_delay));
+
+	return -EAGAIN;
+}
+
+/**
+ * idpf_vc_core_deinit - Device deinit routine
+ * @adapter: Driver specific private structure
+ *
+ */
+void idpf_vc_core_deinit(struct idpf_adapter *adapter)
+{
+	int i;
+
+	idpf_deinit_task(adapter);
+	idpf_intr_rel(adapter);
+	/* Set all bits as we dont know on which vc_state the vhnl_wq is
+	 * waiting on and wakeup the virtchnl workqueue even if it is waiting
+	 * for the response as we are going down
+	 */
+	for (i = 0; i < IDPF_VC_NBITS; i++)
+		set_bit(i, adapter->vc_state);
+	wake_up(&adapter->vchnl_wq);
+
+	cancel_delayed_work_sync(&adapter->serv_task);
+	cancel_delayed_work_sync(&adapter->mbx_task);
+
+	idpf_vport_params_buf_rel(adapter);
+
+	/* Clear all the bits */
+	for (i = 0; i < IDPF_VC_NBITS; i++)
+		clear_bit(i, adapter->vc_state);
+
+	kfree(adapter->vports);
+	adapter->vports = NULL;
+}
+
+/**
+ * idpf_vport_alloc_vec_indexes - Get relative vector indexes
+ * @vport: virtual port data struct
+ *
+ * This function requests the vector information required for the vport and
+ * stores the vector indexes received from the 'global vector distribution'
+ * in the vport's queue vectors array.
+ *
+ * Return 0 on success, error on failure
+ */
+int idpf_vport_alloc_vec_indexes(struct idpf_vport *vport)
+{
+	struct idpf_vector_info vec_info;
+	int num_alloc_vecs;
+
+	vec_info.num_curr_vecs = vport->num_q_vectors;
+	vec_info.num_req_vecs = max(vport->num_txq, vport->num_rxq);
+	vec_info.default_vport = vport->default_vport;
+	vec_info.index = vport->idx;
+
+	num_alloc_vecs = idpf_req_rel_vector_indexes(vport->adapter,
+						     vport->q_vector_idxs,
+						     &vec_info);
+	if (num_alloc_vecs <= 0) {
+		dev_err(&vport->adapter->pdev->dev, "Vector distribution failed: %d\n",
+			num_alloc_vecs);
+		return -EINVAL;
+	}
+
+	vport->num_q_vectors = num_alloc_vecs;
+
+	return 0;
+}
+
+/**
+ * idpf_vport_init - Initialize virtual port
+ * @vport: virtual port to be initialized
+ * @max_q: vport max queue info
+ *
+ * Will initialize vport with the info received through MB earlier
+ */
+void idpf_vport_init(struct idpf_vport *vport, struct idpf_vport_max_q *max_q)
+{
+	struct idpf_adapter *adapter = vport->adapter;
+	struct virtchnl2_create_vport *vport_msg;
+	struct idpf_vport_config *vport_config;
+	u16 tx_itr[] = {2, 8, 64, 128, 256};
+	u16 rx_itr[] = {2, 8, 32, 96, 128};
+	struct idpf_rss_data *rss_data;
+	u16 idx = vport->idx;
+
+	vport_config = adapter->vport_config[idx];
+	rss_data = &vport_config->user_config.rss_data;
+	vport_msg = adapter->vport_params_recvd[idx];
+
+	vport_config->max_q.max_txq = max_q->max_txq;
+	vport_config->max_q.max_rxq = max_q->max_rxq;
+	vport_config->max_q.max_complq = max_q->max_complq;
+	vport_config->max_q.max_bufq = max_q->max_bufq;
+
+	vport->txq_model = le16_to_cpu(vport_msg->txq_model);
+	vport->rxq_model = le16_to_cpu(vport_msg->rxq_model);
+	vport->vport_type = le16_to_cpu(vport_msg->vport_type);
+	vport->vport_id = le32_to_cpu(vport_msg->vport_id);
+
+	rss_data->rss_key_size = min_t(u16, NETDEV_RSS_KEY_LEN,
+				       le16_to_cpu(vport_msg->rss_key_size));
+	rss_data->rss_lut_size = le16_to_cpu(vport_msg->rss_lut_size);
+
+	ether_addr_copy(vport->default_mac_addr, vport_msg->default_mac_addr);
+	vport->max_mtu = le16_to_cpu(vport_msg->max_mtu) - IDPF_PACKET_HDR_PAD;
+
+	/* Initialize Tx and Rx profiles for Dynamic Interrupt Moderation */
+	memcpy(vport->rx_itr_profile, rx_itr, IDPF_DIM_PROFILE_SLOTS);
+	memcpy(vport->tx_itr_profile, tx_itr, IDPF_DIM_PROFILE_SLOTS);
+
+	idpf_vport_init_num_qs(vport, vport_msg);
+	idpf_vport_calc_num_q_desc(vport);
+	idpf_vport_calc_num_q_groups(vport);
+	idpf_vport_alloc_vec_indexes(vport);
+
+	vport->crc_enable = adapter->crc_enable;
+}
+
+/**
+ * idpf_get_vec_ids - Initialize vector id from Mailbox parameters
+ * @adapter: adapter structure to get the mailbox vector id
+ * @vecids: Array of vector ids
+ * @num_vecids: number of vector ids
+ * @chunks: vector ids received over mailbox
+ *
+ * Will initialize the mailbox vector id which is received from the
+ * get capabilities and data queue vector ids with ids received as
+ * mailbox parameters.
+ * Returns number of ids filled
+ */
+int idpf_get_vec_ids(struct idpf_adapter *adapter,
+		     u16 *vecids, int num_vecids,
+		     struct virtchnl2_vector_chunks *chunks)
+{
+	u16 num_chunks = le16_to_cpu(chunks->num_vchunks);
+	int num_vecid_filled = 0;
+	int i, j;
+
+	vecids[num_vecid_filled] = adapter->mb_vector.v_idx;
+	num_vecid_filled++;
+
+	for (j = 0; j < num_chunks; j++) {
+		struct virtchnl2_vector_chunk *chunk;
+		u16 start_vecid, num_vec;
+
+		chunk = &chunks->vchunks[j];
+		num_vec = le16_to_cpu(chunk->num_vectors);
+		start_vecid = le16_to_cpu(chunk->start_vector_id);
+
+		for (i = 0; i < num_vec; i++) {
+			if ((num_vecid_filled + i) < num_vecids) {
+				vecids[num_vecid_filled + i] = start_vecid;
+				start_vecid++;
+			} else {
+				break;
+			}
+		}
+		num_vecid_filled = num_vecid_filled + i;
+	}
+
+	return num_vecid_filled;
+}
+
+/**
+ * idpf_vport_get_queue_ids - Initialize queue id from Mailbox parameters
+ * @qids: Array of queue ids
+ * @num_qids: number of queue ids
+ * @q_type: queue model
+ * @chunks: queue ids received over mailbox
+ *
+ * Will initialize all queue ids with ids received as mailbox parameters
+ * Returns number of ids filled
+ */
+static int idpf_vport_get_queue_ids(u32 *qids, int num_qids, u16 q_type,
+				    struct virtchnl2_queue_reg_chunks *chunks)
+{
+	u16 num_chunks = le16_to_cpu(chunks->num_chunks);
+	u32 num_q_id_filled = 0, i;
+	u32 start_q_id, num_q;
+
+	while (num_chunks--) {
+		struct virtchnl2_queue_reg_chunk *chunk;
+
+		chunk = &chunks->chunks[num_chunks];
+		if (le32_to_cpu(chunk->type) != q_type)
+			continue;
+
+		num_q = le32_to_cpu(chunk->num_queues);
+		start_q_id = le32_to_cpu(chunk->start_queue_id);
+
+		for (i = 0; i < num_q; i++) {
+			if ((num_q_id_filled + i) < num_qids) {
+				qids[num_q_id_filled + i] = start_q_id;
+				start_q_id++;
+			} else {
+				break;
+			}
+		}
+		num_q_id_filled = num_q_id_filled + i;
+	}
+
+	return num_q_id_filled;
+}
+
+/**
+ * __idpf_vport_queue_ids_init - Initialize queue ids from Mailbox parameters
+ * @vport: virtual port for which the queues ids are initialized
+ * @qids: queue ids
+ * @num_qids: number of queue ids
+ * @q_type: type of queue
+ *
+ * Will initialize all queue ids with ids received as mailbox
+ * parameters. Returns number of queue ids initialized.
+ */
+static int __idpf_vport_queue_ids_init(struct idpf_vport *vport,
+				       const u32 *qids,
+				       int num_qids,
+				       u32 q_type)
+{
+	struct idpf_queue *q;
+	int i, j, k = 0;
+
+	switch (q_type) {
+	case VIRTCHNL2_QUEUE_TYPE_TX:
+		for (i = 0; i < vport->num_txq_grp; i++) {
+			struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i];
+
+			for (j = 0; j < tx_qgrp->num_txq && k < num_qids; j++, k++) {
+				tx_qgrp->txqs[j]->q_id = qids[k];
+				tx_qgrp->txqs[j]->q_type =
+					VIRTCHNL2_QUEUE_TYPE_TX;
+			}
+		}
+		break;
+	case VIRTCHNL2_QUEUE_TYPE_RX:
+		for (i = 0; i < vport->num_rxq_grp; i++) {
+			struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i];
+			u16 num_rxq;
+
+			if (idpf_is_queue_model_split(vport->rxq_model))
+				num_rxq = rx_qgrp->splitq.num_rxq_sets;
+			else
+				num_rxq = rx_qgrp->singleq.num_rxq;
+
+			for (j = 0; j < num_rxq && k < num_qids; j++, k++) {
+				if (idpf_is_queue_model_split(vport->rxq_model))
+					q = &rx_qgrp->splitq.rxq_sets[j]->rxq;
+				else
+					q = rx_qgrp->singleq.rxqs[j];
+				q->q_id = qids[k];
+				q->q_type = VIRTCHNL2_QUEUE_TYPE_RX;
+			}
+		}
+		break;
+	case VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION:
+		for (i = 0; i < vport->num_txq_grp && k < num_qids; i++, k++) {
+			struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i];
+
+			tx_qgrp->complq->q_id = qids[k];
+			tx_qgrp->complq->q_type =
+				VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION;
+		}
+		break;
+	case VIRTCHNL2_QUEUE_TYPE_RX_BUFFER:
+		for (i = 0; i < vport->num_rxq_grp; i++) {
+			struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i];
+			u8 num_bufqs = vport->num_bufqs_per_qgrp;
+
+			for (j = 0; j < num_bufqs && k < num_qids; j++, k++) {
+				q = &rx_qgrp->splitq.bufq_sets[j].bufq;
+				q->q_id = qids[k];
+				q->q_type = VIRTCHNL2_QUEUE_TYPE_RX_BUFFER;
+			}
+		}
+		break;
+	default:
+		break;
+	}
+
+	return k;
+}
+
+/**
+ * idpf_vport_queue_ids_init - Initialize queue ids from Mailbox parameters
+ * @vport: virtual port for which the queues ids are initialized
+ *
+ * Will initialize all queue ids with ids received as mailbox parameters.
+ * Returns 0 on success, negative if all the queues are not initialized.
+ */
+int idpf_vport_queue_ids_init(struct idpf_vport *vport)
+{
+	struct virtchnl2_create_vport *vport_params;
+	struct virtchnl2_queue_reg_chunks *chunks;
+	struct idpf_vport_config *vport_config;
+	u16 vport_idx = vport->idx;
+	int num_ids, err = 0;
+	u16 q_type;
+	u32 *qids;
+
+	vport_config = vport->adapter->vport_config[vport_idx];
+	if (vport_config->req_qs_chunks) {
+		struct virtchnl2_add_queues *vc_aq =
+			(struct virtchnl2_add_queues *)vport_config->req_qs_chunks;
+		chunks = &vc_aq->chunks;
+	} else {
+		vport_params = vport->adapter->vport_params_recvd[vport_idx];
+		chunks = &vport_params->chunks;
+	}
+
+	qids = kcalloc(IDPF_MAX_QIDS, sizeof(u32), GFP_KERNEL);
+	if (!qids)
+		return -ENOMEM;
+
+	num_ids = idpf_vport_get_queue_ids(qids, IDPF_MAX_QIDS,
+					   VIRTCHNL2_QUEUE_TYPE_TX,
+					   chunks);
+	if (num_ids < vport->num_txq) {
+		err = -EINVAL;
+		goto mem_rel;
+	}
+	num_ids = __idpf_vport_queue_ids_init(vport, qids, num_ids,
+					      VIRTCHNL2_QUEUE_TYPE_TX);
+	if (num_ids < vport->num_txq) {
+		err = -EINVAL;
+		goto mem_rel;
+	}
+
+	num_ids = idpf_vport_get_queue_ids(qids, IDPF_MAX_QIDS,
+					   VIRTCHNL2_QUEUE_TYPE_RX,
+					   chunks);
+	if (num_ids < vport->num_rxq) {
+		err = -EINVAL;
+		goto mem_rel;
+	}
+	num_ids = __idpf_vport_queue_ids_init(vport, qids, num_ids,
+					      VIRTCHNL2_QUEUE_TYPE_RX);
+	if (num_ids < vport->num_rxq) {
+		err = -EINVAL;
+		goto mem_rel;
+	}
+
+	if (!idpf_is_queue_model_split(vport->txq_model))
+		goto check_rxq;
+
+	q_type = VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION;
+	num_ids = idpf_vport_get_queue_ids(qids, IDPF_MAX_QIDS, q_type, chunks);
+	if (num_ids < vport->num_complq) {
+		err = -EINVAL;
+		goto mem_rel;
+	}
+	num_ids = __idpf_vport_queue_ids_init(vport, qids, num_ids, q_type);
+	if (num_ids < vport->num_complq) {
+		err = -EINVAL;
+		goto mem_rel;
+	}
+
+check_rxq:
+	if (!idpf_is_queue_model_split(vport->rxq_model))
+		goto mem_rel;
+
+	q_type = VIRTCHNL2_QUEUE_TYPE_RX_BUFFER;
+	num_ids = idpf_vport_get_queue_ids(qids, IDPF_MAX_QIDS, q_type, chunks);
+	if (num_ids < vport->num_bufq) {
+		err = -EINVAL;
+		goto mem_rel;
+	}
+	num_ids = __idpf_vport_queue_ids_init(vport, qids, num_ids, q_type);
+	if (num_ids < vport->num_bufq)
+		err = -EINVAL;
+
+mem_rel:
+	kfree(qids);
+
+	return err;
+}
+
+/**
+ * idpf_vport_adjust_qs - Adjust to new requested queues
+ * @vport: virtual port data struct
+ *
+ * Renegotiate queues.  Returns 0 on success, negative on failure.
+ */
+int idpf_vport_adjust_qs(struct idpf_vport *vport)
+{
+	struct virtchnl2_create_vport vport_msg;
+	int err;
+
+	vport_msg.txq_model = cpu_to_le16(vport->txq_model);
+	vport_msg.rxq_model = cpu_to_le16(vport->rxq_model);
+	err = idpf_vport_calc_total_qs(vport->adapter, vport->idx, &vport_msg,
+				       NULL);
+	if (err)
+		return err;
+
+	idpf_vport_init_num_qs(vport, &vport_msg);
+	idpf_vport_calc_num_q_groups(vport);
+
+	return 0;
+}
+
+/**
+ * idpf_is_capability_ena - Default implementation of capability checking
+ * @adapter: Private data struct
+ * @all: all or one flag
+ * @field: caps field to check for flags
+ * @flag: flag to check
+ *
+ * Return true if all capabilities are supported, false otherwise
+ */
+bool idpf_is_capability_ena(struct idpf_adapter *adapter, bool all,
+			    enum idpf_cap_field field, u64 flag)
+{
+	u8 *caps = (u8 *)&adapter->caps;
+	u32 *cap_field;
+
+	if (!caps)
+		return false;
+
+	if (field == IDPF_BASE_CAPS)
+		return false;
+
+	cap_field = (u32 *)(caps + field);
+
+	if (all)
+		return (*cap_field & flag) == flag;
+	else
+		return !!(*cap_field & flag);
+}
+
+/**
+ * idpf_get_vport_id: Get vport id
+ * @vport: virtual port structure
+ *
+ * Return vport id from the adapter persistent data
+ */
+u32 idpf_get_vport_id(struct idpf_vport *vport)
+{
+	struct virtchnl2_create_vport *vport_msg;
+
+	vport_msg = vport->adapter->vport_params_recvd[vport->idx];
+
+	return le32_to_cpu(vport_msg->vport_id);
+}
+
+/**
+ * idpf_add_del_mac_filters - Add/del mac filters
+ * @vport: Virtual port data structure
+ * @np: Netdev private structure
+ * @add: Add or delete flag
+ * @async: Don't wait for return message
+ *
+ * Returns 0 on success, error on failure.
+ **/
+int idpf_add_del_mac_filters(struct idpf_vport *vport,
+			     struct idpf_netdev_priv *np,
+			     bool add, bool async)
+{
+	struct virtchnl2_mac_addr_list *ma_list = NULL;
+	struct idpf_adapter *adapter = np->adapter;
+	struct idpf_vport_config *vport_config;
+	enum idpf_vport_config_flags mac_flag;
+	struct pci_dev *pdev = adapter->pdev;
+	enum idpf_vport_vc_state vc, vc_err;
+	struct virtchnl2_mac_addr *mac_addr;
+	struct idpf_mac_filter *f, *tmp;
+	u32 num_msgs, total_filters = 0;
+	int i = 0, k, err = 0;
+	u32 vop;
+
+	vport_config = adapter->vport_config[np->vport_idx];
+	spin_lock_bh(&vport_config->mac_filter_list_lock);
+
+	/* Find the number of newly added filters */
+	list_for_each_entry(f, &vport_config->user_config.mac_filter_list,
+			    list) {
+		if (add && f->add)
+			total_filters++;
+		else if (!add && f->remove)
+			total_filters++;
+	}
+
+	if (!total_filters) {
+		spin_unlock_bh(&vport_config->mac_filter_list_lock);
+
+		return 0;
+	}
+
+	/* Fill all the new filters into virtchannel message */
+	mac_addr = kcalloc(total_filters, sizeof(struct virtchnl2_mac_addr),
+			   GFP_ATOMIC);
+	if (!mac_addr) {
+		err = -ENOMEM;
+		spin_unlock_bh(&vport_config->mac_filter_list_lock);
+		goto error;
+	}
+
+	list_for_each_entry_safe(f, tmp, &vport_config->user_config.mac_filter_list,
+				 list) {
+		if (add && f->add) {
+			ether_addr_copy(mac_addr[i].addr, f->macaddr);
+			i++;
+			f->add = false;
+			if (i == total_filters)
+				break;
+		}
+		if (!add && f->remove) {
+			ether_addr_copy(mac_addr[i].addr, f->macaddr);
+			i++;
+			f->remove = false;
+			if (i == total_filters)
+				break;
+		}
+	}
+
+	spin_unlock_bh(&vport_config->mac_filter_list_lock);
+
+	if (add) {
+		vop = VIRTCHNL2_OP_ADD_MAC_ADDR;
+		vc = IDPF_VC_ADD_MAC_ADDR;
+		vc_err = IDPF_VC_ADD_MAC_ADDR_ERR;
+		mac_flag = IDPF_VPORT_ADD_MAC_REQ;
+	} else {
+		vop = VIRTCHNL2_OP_DEL_MAC_ADDR;
+		vc = IDPF_VC_DEL_MAC_ADDR;
+		vc_err = IDPF_VC_DEL_MAC_ADDR_ERR;
+		mac_flag = IDPF_VPORT_DEL_MAC_REQ;
+	}
+
+	/* Chunk up the filters into multiple messages to avoid
+	 * sending a control queue message buffer that is too large
+	 */
+	num_msgs = DIV_ROUND_UP(total_filters, IDPF_NUM_FILTERS_PER_MSG);
+
+	if (!async)
+		mutex_lock(&vport->vc_buf_lock);
+
+	for (i = 0, k = 0; i < num_msgs; i++) {
+		u32 entries_size, buf_size, num_entries;
+
+		num_entries = min_t(u32, total_filters,
+				    IDPF_NUM_FILTERS_PER_MSG);
+		entries_size = sizeof(struct virtchnl2_mac_addr) * num_entries;
+		buf_size = struct_size(ma_list, mac_addr_list, num_entries);
+
+		if (!ma_list || num_entries != IDPF_NUM_FILTERS_PER_MSG) {
+			kfree(ma_list);
+			ma_list = kzalloc(buf_size, GFP_ATOMIC);
+			if (!ma_list) {
+				err = -ENOMEM;
+				goto list_prep_error;
+			}
+		} else {
+			memset(ma_list, 0, buf_size);
+		}
+
+		ma_list->vport_id = cpu_to_le32(np->vport_id);
+		ma_list->num_mac_addr = cpu_to_le16(num_entries);
+		memcpy(ma_list->mac_addr_list, &mac_addr[k], entries_size);
+
+		if (async)
+			set_bit(mac_flag, vport_config->flags);
+
+		err = idpf_send_mb_msg(adapter, vop, buf_size, (u8 *)ma_list);
+		if (err)
+			goto mbx_error;
+
+		if (!async) {
+			err = idpf_wait_for_event(adapter, vport, vc, vc_err);
+			if (err)
+				goto mbx_error;
+		}
+
+		k += num_entries;
+		total_filters -= num_entries;
+	}
+
+mbx_error:
+	if (!async)
+		mutex_unlock(&vport->vc_buf_lock);
+	kfree(ma_list);
+list_prep_error:
+	kfree(mac_addr);
+error:
+	if (err)
+		dev_err(&pdev->dev, "Failed to add or del mac filters %d", err);
+
+	return err;
+}
+
+/**
+ * idpf_set_promiscuous - set promiscuous and send message to mailbox
+ * @adapter: Driver specific private structure
+ * @config_data: Vport specific config data
+ * @vport_id: Vport identifier
+ *
+ * Request to enable promiscuous mode for the vport. Message is sent
+ * asynchronously and won't wait for response.  Returns 0 on success, negative
+ * on failure;
+ */
+int idpf_set_promiscuous(struct idpf_adapter *adapter,
+			 struct idpf_vport_user_config_data *config_data,
+			 u32 vport_id)
+{
+	struct virtchnl2_promisc_info vpi;
+	u16 flags = 0;
+	int err;
+
+	if (test_bit(__IDPF_PROMISC_UC, config_data->user_flags))
+		flags |= VIRTCHNL2_UNICAST_PROMISC;
+	if (test_bit(__IDPF_PROMISC_MC, config_data->user_flags))
+		flags |= VIRTCHNL2_MULTICAST_PROMISC;
+
+	vpi.vport_id = cpu_to_le32(vport_id);
+	vpi.flags = cpu_to_le16(flags);
+
+	err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_CONFIG_PROMISCUOUS_MODE,
+			       sizeof(struct virtchnl2_promisc_info),
+			       (u8 *)&vpi);
+
+	return err;
+}
diff --git a/drivers/net/ethernet/intel/idpf/virtchnl2.h b/drivers/net/ethernet/intel/idpf/virtchnl2.h
new file mode 100644
index 000000000000..07e72c72d156
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/virtchnl2.h
@@ -0,0 +1,1273 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2023 Intel Corporation */
+
+#ifndef _VIRTCHNL2_H_
+#define _VIRTCHNL2_H_
+
+/* All opcodes associated with virtchnl2 are prefixed with virtchnl2 or
+ * VIRTCHNL2. Any future opcodes, offloads/capabilities, structures,
+ * and defines must be prefixed with virtchnl2 or VIRTCHNL2 to avoid confusion.
+ *
+ * PF/VF uses the virtchnl2 interface defined in this header file to communicate
+ * with device Control Plane (CP). Driver and the CP may run on different
+ * platforms with different endianness. To avoid byte order discrepancies,
+ * all the structures in this header follow little-endian format.
+ *
+ * This is an interface definition file where existing enums and their values
+ * must remain unchanged over time, so we specify explicit values for all enums.
+ */
+
+#include "virtchnl2_lan_desc.h"
+
+/* This macro is used to generate compilation errors if a structure
+ * is not exactly the correct length.
+ */
+#define VIRTCHNL2_CHECK_STRUCT_LEN(n, X)	\
+	static_assert((n) == sizeof(struct X))
+
+/* New major set of opcodes introduced and so leaving room for
+ * old misc opcodes to be added in future. Also these opcodes may only
+ * be used if both the PF and VF have successfully negotiated the
+ * VIRTCHNL version as 2.0 during VIRTCHNL2_OP_VERSION exchange.
+ */
+enum virtchnl2_op {
+	VIRTCHNL2_OP_UNKNOWN			= 0,
+	VIRTCHNL2_OP_VERSION			= 1,
+	VIRTCHNL2_OP_GET_CAPS			= 500,
+	VIRTCHNL2_OP_CREATE_VPORT		= 501,
+	VIRTCHNL2_OP_DESTROY_VPORT		= 502,
+	VIRTCHNL2_OP_ENABLE_VPORT		= 503,
+	VIRTCHNL2_OP_DISABLE_VPORT		= 504,
+	VIRTCHNL2_OP_CONFIG_TX_QUEUES		= 505,
+	VIRTCHNL2_OP_CONFIG_RX_QUEUES		= 506,
+	VIRTCHNL2_OP_ENABLE_QUEUES		= 507,
+	VIRTCHNL2_OP_DISABLE_QUEUES		= 508,
+	VIRTCHNL2_OP_ADD_QUEUES			= 509,
+	VIRTCHNL2_OP_DEL_QUEUES			= 510,
+	VIRTCHNL2_OP_MAP_QUEUE_VECTOR		= 511,
+	VIRTCHNL2_OP_UNMAP_QUEUE_VECTOR		= 512,
+	VIRTCHNL2_OP_GET_RSS_KEY		= 513,
+	VIRTCHNL2_OP_SET_RSS_KEY		= 514,
+	VIRTCHNL2_OP_GET_RSS_LUT		= 515,
+	VIRTCHNL2_OP_SET_RSS_LUT		= 516,
+	VIRTCHNL2_OP_GET_RSS_HASH		= 517,
+	VIRTCHNL2_OP_SET_RSS_HASH		= 518,
+	VIRTCHNL2_OP_SET_SRIOV_VFS		= 519,
+	VIRTCHNL2_OP_ALLOC_VECTORS		= 520,
+	VIRTCHNL2_OP_DEALLOC_VECTORS		= 521,
+	VIRTCHNL2_OP_EVENT			= 522,
+	VIRTCHNL2_OP_GET_STATS			= 523,
+	VIRTCHNL2_OP_RESET_VF			= 524,
+	VIRTCHNL2_OP_GET_EDT_CAPS		= 525,
+	VIRTCHNL2_OP_GET_PTYPE_INFO		= 526,
+	/* Opcode 527 and 528 are reserved for VIRTCHNL2_OP_GET_PTYPE_ID and
+	 * VIRTCHNL2_OP_GET_PTYPE_INFO_RAW.
+	 * Opcodes 529, 530, 531, 532 and 533 are reserved.
+	 */
+	VIRTCHNL2_OP_LOOPBACK			= 534,
+	VIRTCHNL2_OP_ADD_MAC_ADDR		= 535,
+	VIRTCHNL2_OP_DEL_MAC_ADDR		= 536,
+	VIRTCHNL2_OP_CONFIG_PROMISCUOUS_MODE	= 537,
+};
+
+/**
+ * enum virtchnl2_vport_type - Type of virtual port.
+ * @VIRTCHNL2_VPORT_TYPE_DEFAULT: Default virtual port type.
+ */
+enum virtchnl2_vport_type {
+	VIRTCHNL2_VPORT_TYPE_DEFAULT		= 0,
+};
+
+/**
+ * enum virtchnl2_queue_model - Type of queue model.
+ * @VIRTCHNL2_QUEUE_MODEL_SINGLE: Single queue model.
+ * @VIRTCHNL2_QUEUE_MODEL_SPLIT: Split queue model.
+ *
+ * In the single queue model, the same transmit descriptor queue is used by
+ * software to post descriptors to hardware and by hardware to post completed
+ * descriptors to software.
+ * Likewise, the same receive descriptor queue is used by hardware to post
+ * completions to software and by software to post buffers to hardware.
+ *
+ * In the split queue model, hardware uses transmit completion queues to post
+ * descriptor/buffer completions to software, while software uses transmit
+ * descriptor queues to post descriptors to hardware.
+ * Likewise, hardware posts descriptor completions to the receive descriptor
+ * queue, while software uses receive buffer queues to post buffers to hardware.
+ */
+enum virtchnl2_queue_model {
+	VIRTCHNL2_QUEUE_MODEL_SINGLE		= 0,
+	VIRTCHNL2_QUEUE_MODEL_SPLIT		= 1,
+};
+
+/* Checksum offload capability flags */
+enum virtchnl2_cap_txrx_csum {
+	VIRTCHNL2_CAP_TX_CSUM_L3_IPV4		= BIT(0),
+	VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_TCP	= BIT(1),
+	VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_UDP	= BIT(2),
+	VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_SCTP	= BIT(3),
+	VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_TCP	= BIT(4),
+	VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_UDP	= BIT(5),
+	VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_SCTP	= BIT(6),
+	VIRTCHNL2_CAP_TX_CSUM_GENERIC		= BIT(7),
+	VIRTCHNL2_CAP_RX_CSUM_L3_IPV4		= BIT(8),
+	VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_TCP	= BIT(9),
+	VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_UDP	= BIT(10),
+	VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_SCTP	= BIT(11),
+	VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_TCP	= BIT(12),
+	VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_UDP	= BIT(13),
+	VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_SCTP	= BIT(14),
+	VIRTCHNL2_CAP_RX_CSUM_GENERIC		= BIT(15),
+	VIRTCHNL2_CAP_TX_CSUM_L3_SINGLE_TUNNEL	= BIT(16),
+	VIRTCHNL2_CAP_TX_CSUM_L3_DOUBLE_TUNNEL	= BIT(17),
+	VIRTCHNL2_CAP_RX_CSUM_L3_SINGLE_TUNNEL	= BIT(18),
+	VIRTCHNL2_CAP_RX_CSUM_L3_DOUBLE_TUNNEL	= BIT(19),
+	VIRTCHNL2_CAP_TX_CSUM_L4_SINGLE_TUNNEL	= BIT(20),
+	VIRTCHNL2_CAP_TX_CSUM_L4_DOUBLE_TUNNEL	= BIT(21),
+	VIRTCHNL2_CAP_RX_CSUM_L4_SINGLE_TUNNEL	= BIT(22),
+	VIRTCHNL2_CAP_RX_CSUM_L4_DOUBLE_TUNNEL	= BIT(23),
+};
+
+/* Segmentation offload capability flags */
+enum virtchnl2_cap_seg {
+	VIRTCHNL2_CAP_SEG_IPV4_TCP		= BIT(0),
+	VIRTCHNL2_CAP_SEG_IPV4_UDP		= BIT(1),
+	VIRTCHNL2_CAP_SEG_IPV4_SCTP		= BIT(2),
+	VIRTCHNL2_CAP_SEG_IPV6_TCP		= BIT(3),
+	VIRTCHNL2_CAP_SEG_IPV6_UDP		= BIT(4),
+	VIRTCHNL2_CAP_SEG_IPV6_SCTP		= BIT(5),
+	VIRTCHNL2_CAP_SEG_GENERIC		= BIT(6),
+	VIRTCHNL2_CAP_SEG_TX_SINGLE_TUNNEL	= BIT(7),
+	VIRTCHNL2_CAP_SEG_TX_DOUBLE_TUNNEL	= BIT(8),
+};
+
+/* Receive Side Scaling Flow type capability flags */
+enum virtchnl2_cap_rss {
+	VIRTCHNL2_CAP_RSS_IPV4_TCP		= BIT(0),
+	VIRTCHNL2_CAP_RSS_IPV4_UDP		= BIT(1),
+	VIRTCHNL2_CAP_RSS_IPV4_SCTP		= BIT(2),
+	VIRTCHNL2_CAP_RSS_IPV4_OTHER		= BIT(3),
+	VIRTCHNL2_CAP_RSS_IPV6_TCP		= BIT(4),
+	VIRTCHNL2_CAP_RSS_IPV6_UDP		= BIT(5),
+	VIRTCHNL2_CAP_RSS_IPV6_SCTP		= BIT(6),
+	VIRTCHNL2_CAP_RSS_IPV6_OTHER		= BIT(7),
+	VIRTCHNL2_CAP_RSS_IPV4_AH		= BIT(8),
+	VIRTCHNL2_CAP_RSS_IPV4_ESP		= BIT(9),
+	VIRTCHNL2_CAP_RSS_IPV4_AH_ESP		= BIT(10),
+	VIRTCHNL2_CAP_RSS_IPV6_AH		= BIT(11),
+	VIRTCHNL2_CAP_RSS_IPV6_ESP		= BIT(12),
+	VIRTCHNL2_CAP_RSS_IPV6_AH_ESP		= BIT(13),
+};
+
+/* Header split capability flags */
+enum virtchnl2_cap_rx_hsplit_at {
+	/* for prepended metadata  */
+	VIRTCHNL2_CAP_RX_HSPLIT_AT_L2		= BIT(0),
+	/* all VLANs go into header buffer */
+	VIRTCHNL2_CAP_RX_HSPLIT_AT_L3		= BIT(1),
+	VIRTCHNL2_CAP_RX_HSPLIT_AT_L4V4		= BIT(2),
+	VIRTCHNL2_CAP_RX_HSPLIT_AT_L4V6		= BIT(3),
+};
+
+/* Receive Side Coalescing offload capability flags */
+enum virtchnl2_cap_rsc {
+	VIRTCHNL2_CAP_RSC_IPV4_TCP		= BIT(0),
+	VIRTCHNL2_CAP_RSC_IPV4_SCTP		= BIT(1),
+	VIRTCHNL2_CAP_RSC_IPV6_TCP		= BIT(2),
+	VIRTCHNL2_CAP_RSC_IPV6_SCTP		= BIT(3),
+};
+
+/* Other capability flags */
+enum virtchnl2_cap_other {
+	VIRTCHNL2_CAP_RDMA			= BIT_ULL(0),
+	VIRTCHNL2_CAP_SRIOV			= BIT_ULL(1),
+	VIRTCHNL2_CAP_MACFILTER			= BIT_ULL(2),
+	VIRTCHNL2_CAP_FLOW_DIRECTOR		= BIT_ULL(3),
+	/* Queue based scheduling using split queue model */
+	VIRTCHNL2_CAP_SPLITQ_QSCHED		= BIT_ULL(4),
+	VIRTCHNL2_CAP_CRC			= BIT_ULL(5),
+	VIRTCHNL2_CAP_ADQ			= BIT_ULL(6),
+	VIRTCHNL2_CAP_WB_ON_ITR			= BIT_ULL(7),
+	VIRTCHNL2_CAP_PROMISC			= BIT_ULL(8),
+	VIRTCHNL2_CAP_LINK_SPEED		= BIT_ULL(9),
+	VIRTCHNL2_CAP_INLINE_IPSEC		= BIT_ULL(10),
+	VIRTCHNL2_CAP_LARGE_NUM_QUEUES		= BIT_ULL(11),
+	VIRTCHNL2_CAP_VLAN			= BIT_ULL(12),
+	VIRTCHNL2_CAP_PTP			= BIT_ULL(13),
+	/* EDT: Earliest Departure Time capability used for Timing Wheel */
+	VIRTCHNL2_CAP_EDT			= BIT_ULL(14),
+	VIRTCHNL2_CAP_ADV_RSS			= BIT_ULL(15),
+	VIRTCHNL2_CAP_FDIR			= BIT_ULL(16),
+	VIRTCHNL2_CAP_RX_FLEX_DESC		= BIT_ULL(17),
+	VIRTCHNL2_CAP_PTYPE			= BIT_ULL(18),
+	VIRTCHNL2_CAP_LOOPBACK			= BIT_ULL(19),
+	/* Other capability 20 is reserved */
+
+	/* this must be the last capability */
+	VIRTCHNL2_CAP_OEM			= BIT_ULL(63),
+};
+
+/* underlying device type */
+enum virtchl2_device_type {
+	VIRTCHNL2_MEV_DEVICE			= 0,
+};
+
+/**
+ * enum virtchnl2_txq_sched_mode - Transmit Queue Scheduling Modes.
+ * @VIRTCHNL2_TXQ_SCHED_MODE_QUEUE: Queue mode is the legacy mode i.e. inorder
+ *				    completions where descriptors and buffers
+ *				    are completed at the same time.
+ * @VIRTCHNL2_TXQ_SCHED_MODE_FLOW: Flow scheduling mode allows for out of order
+ *				   packet processing where descriptors are
+ *				   cleaned in order, but buffers can be
+ *				   completed out of order.
+ */
+enum virtchnl2_txq_sched_mode {
+	VIRTCHNL2_TXQ_SCHED_MODE_QUEUE		= 0,
+	VIRTCHNL2_TXQ_SCHED_MODE_FLOW		= 1,
+};
+
+/**
+ * enum virtchnl2_rxq_flags - Receive Queue Feature flags.
+ * @VIRTCHNL2_RXQ_RSC: Rx queue RSC flag.
+ * @VIRTCHNL2_RXQ_HDR_SPLIT: Rx queue header split flag.
+ * @VIRTCHNL2_RXQ_IMMEDIATE_WRITE_BACK: When set, packet descriptors are flushed
+ *					by hardware immediately after processing
+ *					each packet.
+ * @VIRTCHNL2_RX_DESC_SIZE_16BYTE: Rx queue 16 byte descriptor size.
+ * @VIRTCHNL2_RX_DESC_SIZE_32BYTE: Rx queue 32 byte descriptor size.
+ */
+enum virtchnl2_rxq_flags {
+	VIRTCHNL2_RXQ_RSC			= BIT(0),
+	VIRTCHNL2_RXQ_HDR_SPLIT			= BIT(1),
+	VIRTCHNL2_RXQ_IMMEDIATE_WRITE_BACK	= BIT(2),
+	VIRTCHNL2_RX_DESC_SIZE_16BYTE		= BIT(3),
+	VIRTCHNL2_RX_DESC_SIZE_32BYTE		= BIT(4),
+};
+
+/* Type of RSS algorithm */
+enum virtchnl2_rss_alg {
+	VIRTCHNL2_RSS_ALG_TOEPLITZ_ASYMMETRIC	= 0,
+	VIRTCHNL2_RSS_ALG_R_ASYMMETRIC		= 1,
+	VIRTCHNL2_RSS_ALG_TOEPLITZ_SYMMETRIC	= 2,
+	VIRTCHNL2_RSS_ALG_XOR_SYMMETRIC		= 3,
+};
+
+/* Type of event */
+enum virtchnl2_event_codes {
+	VIRTCHNL2_EVENT_UNKNOWN			= 0,
+	VIRTCHNL2_EVENT_LINK_CHANGE		= 1,
+	/* Event type 2, 3 are reserved */
+};
+
+/* Transmit and Receive queue types are valid in legacy as well as split queue
+ * models. With Split Queue model, 2 additional types are introduced -
+ * TX_COMPLETION and RX_BUFFER. In split queue model, receive  corresponds to
+ * the queue where hardware posts completions.
+ */
+enum virtchnl2_queue_type {
+	VIRTCHNL2_QUEUE_TYPE_TX			= 0,
+	VIRTCHNL2_QUEUE_TYPE_RX			= 1,
+	VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION	= 2,
+	VIRTCHNL2_QUEUE_TYPE_RX_BUFFER		= 3,
+	VIRTCHNL2_QUEUE_TYPE_CONFIG_TX		= 4,
+	VIRTCHNL2_QUEUE_TYPE_CONFIG_RX		= 5,
+	/* Queue types 6, 7, 8, 9 are reserved */
+	VIRTCHNL2_QUEUE_TYPE_MBX_TX		= 10,
+	VIRTCHNL2_QUEUE_TYPE_MBX_RX		= 11,
+};
+
+/* Interrupt throttling rate index */
+enum virtchnl2_itr_idx {
+	VIRTCHNL2_ITR_IDX_0			= 0,
+	VIRTCHNL2_ITR_IDX_1			= 1,
+};
+
+/**
+ * enum virtchnl2_mac_addr_type - MAC address types.
+ * @VIRTCHNL2_MAC_ADDR_PRIMARY: PF/VF driver should set this type for the
+ *				primary/device unicast MAC address filter for
+ *				VIRTCHNL2_OP_ADD_MAC_ADDR and
+ *				VIRTCHNL2_OP_DEL_MAC_ADDR. This allows for the
+ *				underlying control plane function to accurately
+ *				track the MAC address and for VM/function reset.
+ *
+ * @VIRTCHNL2_MAC_ADDR_EXTRA: PF/VF driver should set this type for any extra
+ *			      unicast and/or multicast filters that are being
+ *			      added/deleted via VIRTCHNL2_OP_ADD_MAC_ADDR or
+ *			      VIRTCHNL2_OP_DEL_MAC_ADDR.
+ */
+enum virtchnl2_mac_addr_type {
+	VIRTCHNL2_MAC_ADDR_PRIMARY		= 1,
+	VIRTCHNL2_MAC_ADDR_EXTRA		= 2,
+};
+
+/* Flags used for promiscuous mode */
+enum virtchnl2_promisc_flags {
+	VIRTCHNL2_UNICAST_PROMISC		= BIT(0),
+	VIRTCHNL2_MULTICAST_PROMISC		= BIT(1),
+};
+
+/* Protocol header type within a packet segment. A segment consists of one or
+ * more protocol headers that make up a logical group of protocol headers. Each
+ * logical group of protocol headers encapsulates or is encapsulated using/by
+ * tunneling or encapsulation protocols for network virtualization.
+ */
+enum virtchnl2_proto_hdr_type {
+	/* VIRTCHNL2_PROTO_HDR_ANY is a mandatory protocol id */
+	VIRTCHNL2_PROTO_HDR_ANY			= 0,
+	VIRTCHNL2_PROTO_HDR_PRE_MAC		= 1,
+	/* VIRTCHNL2_PROTO_HDR_MAC is a mandatory protocol id */
+	VIRTCHNL2_PROTO_HDR_MAC			= 2,
+	VIRTCHNL2_PROTO_HDR_POST_MAC		= 3,
+	VIRTCHNL2_PROTO_HDR_ETHERTYPE		= 4,
+	VIRTCHNL2_PROTO_HDR_VLAN		= 5,
+	VIRTCHNL2_PROTO_HDR_SVLAN		= 6,
+	VIRTCHNL2_PROTO_HDR_CVLAN		= 7,
+	VIRTCHNL2_PROTO_HDR_MPLS		= 8,
+	VIRTCHNL2_PROTO_HDR_UMPLS		= 9,
+	VIRTCHNL2_PROTO_HDR_MMPLS		= 10,
+	VIRTCHNL2_PROTO_HDR_PTP			= 11,
+	VIRTCHNL2_PROTO_HDR_CTRL		= 12,
+	VIRTCHNL2_PROTO_HDR_LLDP		= 13,
+	VIRTCHNL2_PROTO_HDR_ARP			= 14,
+	VIRTCHNL2_PROTO_HDR_ECP			= 15,
+	VIRTCHNL2_PROTO_HDR_EAPOL		= 16,
+	VIRTCHNL2_PROTO_HDR_PPPOD		= 17,
+	VIRTCHNL2_PROTO_HDR_PPPOE		= 18,
+	/* VIRTCHNL2_PROTO_HDR_IPV4 is a mandatory protocol id */
+	VIRTCHNL2_PROTO_HDR_IPV4		= 19,
+	/* IPv4 and IPv6 Fragment header types are only associated to
+	 * VIRTCHNL2_PROTO_HDR_IPV4 and VIRTCHNL2_PROTO_HDR_IPV6 respectively,
+	 * cannot be used independently.
+	 */
+	/* VIRTCHNL2_PROTO_HDR_IPV4_FRAG is a mandatory protocol id */
+	VIRTCHNL2_PROTO_HDR_IPV4_FRAG		= 20,
+	/* VIRTCHNL2_PROTO_HDR_IPV6 is a mandatory protocol id */
+	VIRTCHNL2_PROTO_HDR_IPV6		= 21,
+	/* VIRTCHNL2_PROTO_HDR_IPV6_FRAG is a mandatory protocol id */
+	VIRTCHNL2_PROTO_HDR_IPV6_FRAG		= 22,
+	VIRTCHNL2_PROTO_HDR_IPV6_EH		= 23,
+	/* VIRTCHNL2_PROTO_HDR_UDP is a mandatory protocol id */
+	VIRTCHNL2_PROTO_HDR_UDP			= 24,
+	/* VIRTCHNL2_PROTO_HDR_TCP is a mandatory protocol id */
+	VIRTCHNL2_PROTO_HDR_TCP			= 25,
+	/* VIRTCHNL2_PROTO_HDR_SCTP is a mandatory protocol id */
+	VIRTCHNL2_PROTO_HDR_SCTP		= 26,
+	/* VIRTCHNL2_PROTO_HDR_ICMP is a mandatory protocol id */
+	VIRTCHNL2_PROTO_HDR_ICMP		= 27,
+	/* VIRTCHNL2_PROTO_HDR_ICMPV6 is a mandatory protocol id */
+	VIRTCHNL2_PROTO_HDR_ICMPV6		= 28,
+	VIRTCHNL2_PROTO_HDR_IGMP		= 29,
+	VIRTCHNL2_PROTO_HDR_AH			= 30,
+	VIRTCHNL2_PROTO_HDR_ESP			= 31,
+	VIRTCHNL2_PROTO_HDR_IKE			= 32,
+	VIRTCHNL2_PROTO_HDR_NATT_KEEP		= 33,
+	/* VIRTCHNL2_PROTO_HDR_PAY is a mandatory protocol id */
+	VIRTCHNL2_PROTO_HDR_PAY			= 34,
+	VIRTCHNL2_PROTO_HDR_L2TPV2		= 35,
+	VIRTCHNL2_PROTO_HDR_L2TPV2_CONTROL	= 36,
+	VIRTCHNL2_PROTO_HDR_L2TPV3		= 37,
+	VIRTCHNL2_PROTO_HDR_GTP			= 38,
+	VIRTCHNL2_PROTO_HDR_GTP_EH		= 39,
+	VIRTCHNL2_PROTO_HDR_GTPCV2		= 40,
+	VIRTCHNL2_PROTO_HDR_GTPC_TEID		= 41,
+	VIRTCHNL2_PROTO_HDR_GTPU		= 42,
+	VIRTCHNL2_PROTO_HDR_GTPU_UL		= 43,
+	VIRTCHNL2_PROTO_HDR_GTPU_DL		= 44,
+	VIRTCHNL2_PROTO_HDR_ECPRI		= 45,
+	VIRTCHNL2_PROTO_HDR_VRRP		= 46,
+	VIRTCHNL2_PROTO_HDR_OSPF		= 47,
+	/* VIRTCHNL2_PROTO_HDR_TUN is a mandatory protocol id */
+	VIRTCHNL2_PROTO_HDR_TUN			= 48,
+	VIRTCHNL2_PROTO_HDR_GRE			= 49,
+	VIRTCHNL2_PROTO_HDR_NVGRE		= 50,
+	VIRTCHNL2_PROTO_HDR_VXLAN		= 51,
+	VIRTCHNL2_PROTO_HDR_VXLAN_GPE		= 52,
+	VIRTCHNL2_PROTO_HDR_GENEVE		= 53,
+	VIRTCHNL2_PROTO_HDR_NSH			= 54,
+	VIRTCHNL2_PROTO_HDR_QUIC		= 55,
+	VIRTCHNL2_PROTO_HDR_PFCP		= 56,
+	VIRTCHNL2_PROTO_HDR_PFCP_NODE		= 57,
+	VIRTCHNL2_PROTO_HDR_PFCP_SESSION	= 58,
+	VIRTCHNL2_PROTO_HDR_RTP			= 59,
+	VIRTCHNL2_PROTO_HDR_ROCE		= 60,
+	VIRTCHNL2_PROTO_HDR_ROCEV1		= 61,
+	VIRTCHNL2_PROTO_HDR_ROCEV2		= 62,
+	/* Protocol ids up to 32767 are reserved.
+	 * 32768 - 65534 are used for user defined protocol ids.
+	 * VIRTCHNL2_PROTO_HDR_NO_PROTO is a mandatory protocol id.
+	 */
+	VIRTCHNL2_PROTO_HDR_NO_PROTO		= 65535,
+};
+
+enum virtchl2_version {
+	VIRTCHNL2_VERSION_MINOR_0		= 0,
+	VIRTCHNL2_VERSION_MAJOR_2		= 2,
+};
+
+/**
+ * struct virtchnl2_edt_caps - Get EDT granularity and time horizon.
+ * @tstamp_granularity_ns: Timestamp granularity in nanoseconds.
+ * @time_horizon_ns: Total time window in nanoseconds.
+ *
+ * Associated with VIRTCHNL2_OP_GET_EDT_CAPS.
+ */
+struct virtchnl2_edt_caps {
+	__le64 tstamp_granularity_ns;
+	__le64 time_horizon_ns;
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_edt_caps);
+
+/**
+ * struct virtchnl2_version_info - Version information.
+ * @major: Major version.
+ * @minor: Minor version.
+ *
+ * PF/VF posts its version number to the CP. CP responds with its version number
+ * in the same format, along with a return code.
+ * If there is a major version mismatch, then the PF/VF cannot operate.
+ * If there is a minor version mismatch, then the PF/VF can operate but should
+ * add a warning to the system log.
+ *
+ * This version opcode MUST always be specified as == 1, regardless of other
+ * changes in the API. The CP must always respond to this message without
+ * error regardless of version mismatch.
+ *
+ * Associated with VIRTCHNL2_OP_VERSION.
+ */
+struct virtchnl2_version_info {
+	__le32 major;
+	__le32 minor;
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_version_info);
+
+/**
+ * struct virtchnl2_get_capabilities - Capabilities info.
+ * @csum_caps: See enum virtchnl2_cap_txrx_csum.
+ * @seg_caps: See enum virtchnl2_cap_seg.
+ * @hsplit_caps: See enum virtchnl2_cap_rx_hsplit_at.
+ * @rsc_caps: See enum virtchnl2_cap_rsc.
+ * @rss_caps: See enum virtchnl2_cap_rss.
+ * @other_caps: See enum virtchnl2_cap_other.
+ * @mailbox_dyn_ctl: DYN_CTL register offset and vector id for mailbox
+ *		     provided by CP.
+ * @mailbox_vector_id: Mailbox vector id.
+ * @num_allocated_vectors: Maximum number of allocated vectors for the device.
+ * @max_rx_q: Maximum number of supported Rx queues.
+ * @max_tx_q: Maximum number of supported Tx queues.
+ * @max_rx_bufq: Maximum number of supported buffer queues.
+ * @max_tx_complq: Maximum number of supported completion queues.
+ * @max_sriov_vfs: The PF sends the maximum VFs it is requesting. The CP
+ *		   responds with the maximum VFs granted.
+ * @max_vports: Maximum number of vports that can be supported.
+ * @default_num_vports: Default number of vports driver should allocate on load.
+ * @max_tx_hdr_size: Max header length hardware can parse/checksum, in bytes.
+ * @max_sg_bufs_per_tx_pkt: Max number of scatter gather buffers that can be
+ *			    sent per transmit packet without needing to be
+ *			    linearized.
+ * @pad: Padding.
+ * @reserved: Reserved.
+ * @device_type: See enum virtchl2_device_type.
+ * @min_sso_packet_len: Min packet length supported by device for single
+ *			segment offload.
+ * @max_hdr_buf_per_lso: Max number of header buffers that can be used for
+ *			 an LSO.
+ * @pad1: Padding for future extensions.
+ *
+ * Dataplane driver sends this message to CP to negotiate capabilities and
+ * provides a virtchnl2_get_capabilities structure with its desired
+ * capabilities, max_sriov_vfs and num_allocated_vectors.
+ * CP responds with a virtchnl2_get_capabilities structure updated
+ * with allowed capabilities and the other fields as below.
+ * If PF sets max_sriov_vfs as 0, CP will respond with max number of VFs
+ * that can be created by this PF. For any other value 'n', CP responds
+ * with max_sriov_vfs set to min(n, x) where x is the max number of VFs
+ * allowed by CP's policy. max_sriov_vfs is not applicable for VFs.
+ * If dataplane driver sets num_allocated_vectors as 0, CP will respond with 1
+ * which is default vector associated with the default mailbox. For any other
+ * value 'n', CP responds with a value <= n based on the CP's policy of
+ * max number of vectors for a PF.
+ * CP will respond with the vector ID of mailbox allocated to the PF in
+ * mailbox_vector_id and the number of itr index registers in itr_idx_map.
+ * It also responds with default number of vports that the dataplane driver
+ * should comeup with in default_num_vports and maximum number of vports that
+ * can be supported in max_vports.
+ *
+ * Associated with VIRTCHNL2_OP_GET_CAPS.
+ */
+struct virtchnl2_get_capabilities {
+	__le32 csum_caps;
+	__le32 seg_caps;
+	__le32 hsplit_caps;
+	__le32 rsc_caps;
+	__le64 rss_caps;
+	__le64 other_caps;
+	__le32 mailbox_dyn_ctl;
+	__le16 mailbox_vector_id;
+	__le16 num_allocated_vectors;
+	__le16 max_rx_q;
+	__le16 max_tx_q;
+	__le16 max_rx_bufq;
+	__le16 max_tx_complq;
+	__le16 max_sriov_vfs;
+	__le16 max_vports;
+	__le16 default_num_vports;
+	__le16 max_tx_hdr_size;
+	u8 max_sg_bufs_per_tx_pkt;
+	u8 pad[3];
+	u8 reserved[4];
+	__le32 device_type;
+	u8 min_sso_packet_len;
+	u8 max_hdr_buf_per_lso;
+	u8 pad1[10];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(80, virtchnl2_get_capabilities);
+
+/**
+ * struct virtchnl2_queue_reg_chunk - Single queue chunk.
+ * @type: See enum virtchnl2_queue_type.
+ * @start_queue_id: Start Queue ID.
+ * @num_queues: Number of queues in the chunk.
+ * @pad: Padding.
+ * @qtail_reg_start: Queue tail register offset.
+ * @qtail_reg_spacing: Queue tail register spacing.
+ * @pad1: Padding for future extensions.
+ */
+struct virtchnl2_queue_reg_chunk {
+	__le32 type;
+	__le32 start_queue_id;
+	__le32 num_queues;
+	__le32 pad;
+	__le64 qtail_reg_start;
+	__le32 qtail_reg_spacing;
+	u8 pad1[4];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(32, virtchnl2_queue_reg_chunk);
+
+/**
+ * struct virtchnl2_queue_reg_chunks - Specify several chunks of contiguous
+ *				       queues.
+ * @num_chunks: Number of chunks.
+ * @pad: Padding.
+ * @chunks: Chunks of queue info.
+ */
+struct virtchnl2_queue_reg_chunks {
+	__le16 num_chunks;
+	u8 pad[6];
+	struct virtchnl2_queue_reg_chunk chunks[];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_queue_reg_chunks);
+
+/**
+ * struct virtchnl2_create_vport - Create vport config info.
+ * @vport_type: See enum virtchnl2_vport_type.
+ * @txq_model: See virtchnl2_queue_model.
+ * @rxq_model: See virtchnl2_queue_model.
+ * @num_tx_q: Number of Tx queues.
+ * @num_tx_complq: Valid only if txq_model is split queue.
+ * @num_rx_q: Number of Rx queues.
+ * @num_rx_bufq: Valid only if rxq_model is split queue.
+ * @default_rx_q: Relative receive queue index to be used as default.
+ * @vport_index: Used to align PF and CP in case of default multiple vports,
+ *		 it is filled by the PF and CP returns the same value, to
+ *		 enable the driver to support multiple asynchronous parallel
+ *		 CREATE_VPORT requests and associate a response to a specific
+ *		 request.
+ * @max_mtu: Max MTU. CP populates this field on response.
+ * @vport_id: Vport id. CP populates this field on response.
+ * @default_mac_addr: Default MAC address.
+ * @pad: Padding.
+ * @rx_desc_ids: See VIRTCHNL2_RX_DESC_IDS definitions.
+ * @tx_desc_ids: See VIRTCHNL2_TX_DESC_IDS definitions.
+ * @pad1: Padding.
+ * @rss_algorithm: RSS algorithm.
+ * @rss_key_size: RSS key size.
+ * @rss_lut_size: RSS LUT size.
+ * @rx_split_pos: See enum virtchnl2_cap_rx_hsplit_at.
+ * @pad2: Padding.
+ * @chunks: Chunks of contiguous queues.
+ *
+ * PF sends this message to CP to create a vport by filling in required
+ * fields of virtchnl2_create_vport structure.
+ * CP responds with the updated virtchnl2_create_vport structure containing the
+ * necessary fields followed by chunks which in turn will have an array of
+ * num_chunks entries of virtchnl2_queue_chunk structures.
+ *
+ * Associated with VIRTCHNL2_OP_CREATE_VPORT.
+ */
+struct virtchnl2_create_vport {
+	__le16 vport_type;
+	__le16 txq_model;
+	__le16 rxq_model;
+	__le16 num_tx_q;
+	__le16 num_tx_complq;
+	__le16 num_rx_q;
+	__le16 num_rx_bufq;
+	__le16 default_rx_q;
+	__le16 vport_index;
+	/* CP populates the following fields on response */
+	__le16 max_mtu;
+	__le32 vport_id;
+	u8 default_mac_addr[ETH_ALEN];
+	__le16 pad;
+	__le64 rx_desc_ids;
+	__le64 tx_desc_ids;
+	u8 pad1[72];
+	__le32 rss_algorithm;
+	__le16 rss_key_size;
+	__le16 rss_lut_size;
+	__le32 rx_split_pos;
+	u8 pad2[20];
+	struct virtchnl2_queue_reg_chunks chunks;
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(160, virtchnl2_create_vport);
+
+/**
+ * struct virtchnl2_vport - Vport ID info.
+ * @vport_id: Vport id.
+ * @pad: Padding for future extensions.
+ *
+ * PF sends this message to CP to destroy, enable or disable a vport by filling
+ * in the vport_id in virtchnl2_vport structure.
+ * CP responds with the status of the requested operation.
+ *
+ * Associated with VIRTCHNL2_OP_DESTROY_VPORT, VIRTCHNL2_OP_ENABLE_VPORT,
+ * VIRTCHNL2_OP_DISABLE_VPORT.
+ */
+struct virtchnl2_vport {
+	__le32 vport_id;
+	u8 pad[4];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_vport);
+
+/**
+ * struct virtchnl2_txq_info - Transmit queue config info
+ * @dma_ring_addr: DMA address.
+ * @type: See enum virtchnl2_queue_type.
+ * @queue_id: Queue ID.
+ * @relative_queue_id: Valid only if queue model is split and type is transmit
+ *		       queue. Used in many to one mapping of transmit queues to
+ *		       completion queue.
+ * @model: See enum virtchnl2_queue_model.
+ * @sched_mode: See enum virtchnl2_txq_sched_mode.
+ * @qflags: TX queue feature flags.
+ * @ring_len: Ring length.
+ * @tx_compl_queue_id: Valid only if queue model is split and type is transmit
+ *		       queue.
+ * @peer_type: Valid only if queue type is VIRTCHNL2_QUEUE_TYPE_MAILBOX_TX
+ * @peer_rx_queue_id: Valid only if queue type is CONFIG_TX and used to deliver
+ *		      messages for the respective CONFIG_TX queue.
+ * @pad: Padding.
+ * @egress_pasid: Egress PASID info.
+ * @egress_hdr_pasid: Egress HDR passid.
+ * @egress_buf_pasid: Egress buf passid.
+ * @pad1: Padding for future extensions.
+ */
+struct virtchnl2_txq_info {
+	__le64 dma_ring_addr;
+	__le32 type;
+	__le32 queue_id;
+	__le16 relative_queue_id;
+	__le16 model;
+	__le16 sched_mode;
+	__le16 qflags;
+	__le16 ring_len;
+	__le16 tx_compl_queue_id;
+	__le16 peer_type;
+	__le16 peer_rx_queue_id;
+	u8 pad[4];
+	__le32 egress_pasid;
+	__le32 egress_hdr_pasid;
+	__le32 egress_buf_pasid;
+	u8 pad1[8];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(56, virtchnl2_txq_info);
+
+/**
+ * struct virtchnl2_config_tx_queues - TX queue config.
+ * @vport_id: Vport id.
+ * @num_qinfo: Number of virtchnl2_txq_info structs.
+ * @pad: Padding.
+ * @qinfo: Tx queues config info.
+ *
+ * PF sends this message to set up parameters for one or more transmit queues.
+ * This message contains an array of num_qinfo instances of virtchnl2_txq_info
+ * structures. CP configures requested queues and returns a status code. If
+ * num_qinfo specified is greater than the number of queues associated with the
+ * vport, an error is returned and no queues are configured.
+ *
+ * Associated with VIRTCHNL2_OP_CONFIG_TX_QUEUES.
+ */
+struct virtchnl2_config_tx_queues {
+	__le32 vport_id;
+	__le16 num_qinfo;
+	u8 pad[10];
+	struct virtchnl2_txq_info qinfo[];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_config_tx_queues);
+
+/**
+ * struct virtchnl2_rxq_info - Receive queue config info.
+ * @desc_ids: See VIRTCHNL2_RX_DESC_IDS definitions.
+ * @dma_ring_addr: See VIRTCHNL2_RX_DESC_IDS definitions.
+ * @type: See enum virtchnl2_queue_type.
+ * @queue_id: Queue id.
+ * @model: See enum virtchnl2_queue_model.
+ * @hdr_buffer_size: Header buffer size.
+ * @data_buffer_size: Data buffer size.
+ * @max_pkt_size: Max packet size.
+ * @ring_len: Ring length.
+ * @buffer_notif_stride: Buffer notification stride in units of 32-descriptors.
+ *			 This field must be a power of 2.
+ * @pad: Padding.
+ * @dma_head_wb_addr: Applicable only for receive buffer queues.
+ * @qflags: Applicable only for receive completion queues.
+ *	    See enum virtchnl2_rxq_flags.
+ * @rx_buffer_low_watermark: Rx buffer low watermark.
+ * @rx_bufq1_id: Buffer queue index of the first buffer queue associated with
+ *		 the Rx queue. Valid only in split queue model.
+ * @rx_bufq2_id: Buffer queue index of the second buffer queue associated with
+ *		 the Rx queue. Valid only in split queue model.
+ * @bufq2_ena: It indicates if there is a second buffer, rx_bufq2_id is valid
+ *	       only if this field is set.
+ * @pad1: Padding.
+ * @ingress_pasid: Ingress PASID.
+ * @ingress_hdr_pasid: Ingress PASID header.
+ * @ingress_buf_pasid: Ingress PASID buffer.
+ * @pad2: Padding for future extensions.
+ */
+struct virtchnl2_rxq_info {
+	__le64 desc_ids;
+	__le64 dma_ring_addr;
+	__le32 type;
+	__le32 queue_id;
+	__le16 model;
+	__le16 hdr_buffer_size;
+	__le32 data_buffer_size;
+	__le32 max_pkt_size;
+	__le16 ring_len;
+	u8 buffer_notif_stride;
+	u8 pad;
+	__le64 dma_head_wb_addr;
+	__le16 qflags;
+	__le16 rx_buffer_low_watermark;
+	__le16 rx_bufq1_id;
+	__le16 rx_bufq2_id;
+	u8 bufq2_ena;
+	u8 pad1[3];
+	__le32 ingress_pasid;
+	__le32 ingress_hdr_pasid;
+	__le32 ingress_buf_pasid;
+	u8 pad2[16];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(88, virtchnl2_rxq_info);
+
+/**
+ * struct virtchnl2_config_rx_queues - Rx queues config.
+ * @vport_id: Vport id.
+ * @num_qinfo: Number of instances.
+ * @pad: Padding.
+ * @qinfo: Rx queues config info.
+ *
+ * PF sends this message to set up parameters for one or more receive queues.
+ * This message contains an array of num_qinfo instances of virtchnl2_rxq_info
+ * structures. CP configures requested queues and returns a status code.
+ * If the number of queues specified is greater than the number of queues
+ * associated with the vport, an error is returned and no queues are configured.
+ *
+ * Associated with VIRTCHNL2_OP_CONFIG_RX_QUEUES.
+ */
+struct virtchnl2_config_rx_queues {
+	__le32 vport_id;
+	__le16 num_qinfo;
+	u8 pad[18];
+	struct virtchnl2_rxq_info qinfo[];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(24, virtchnl2_config_rx_queues);
+
+/**
+ * struct virtchnl2_add_queues - data for VIRTCHNL2_OP_ADD_QUEUES.
+ * @vport_id: Vport id.
+ * @num_tx_q: Number of Tx qieues.
+ * @num_tx_complq: Number of Tx completion queues.
+ * @num_rx_q:  Number of Rx queues.
+ * @num_rx_bufq:  Number of Rx buffer queues.
+ * @pad: Padding.
+ * @chunks: Chunks of contiguous queues.
+ *
+ * PF sends this message to request additional transmit/receive queues beyond
+ * the ones that were assigned via CREATE_VPORT request. virtchnl2_add_queues
+ * structure is used to specify the number of each type of queues.
+ * CP responds with the same structure with the actual number of queues assigned
+ * followed by num_chunks of virtchnl2_queue_chunk structures.
+ *
+ * Associated with VIRTCHNL2_OP_ADD_QUEUES.
+ */
+struct virtchnl2_add_queues {
+	__le32 vport_id;
+	__le16 num_tx_q;
+	__le16 num_tx_complq;
+	__le16 num_rx_q;
+	__le16 num_rx_bufq;
+	u8 pad[4];
+	struct virtchnl2_queue_reg_chunks chunks;
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(24, virtchnl2_add_queues);
+
+/**
+ * struct virtchnl2_vector_chunk - Structure to specify a chunk of contiguous
+ *				   interrupt vectors.
+ * @start_vector_id: Start vector id.
+ * @start_evv_id: Start EVV id.
+ * @num_vectors: Number of vectors.
+ * @pad: Padding.
+ * @dynctl_reg_start: DYN_CTL register offset.
+ * @dynctl_reg_spacing: register spacing between DYN_CTL registers of 2
+ *			consecutive vectors.
+ * @itrn_reg_start: ITRN register offset.
+ * @itrn_reg_spacing: Register spacing between dynctl registers of 2
+ *		      consecutive vectors.
+ * @itrn_index_spacing: Register spacing between itrn registers of the same
+ *			vector where n=0..2.
+ * @pad1: Padding for future extensions.
+ *
+ * Register offsets and spacing provided by CP.
+ * Dynamic control registers are used for enabling/disabling/re-enabling
+ * interrupts and updating interrupt rates in the hotpath. Any changes
+ * to interrupt rates in the dynamic control registers will be reflected
+ * in the interrupt throttling rate registers.
+ * itrn registers are used to update interrupt rates for specific
+ * interrupt indices without modifying the state of the interrupt.
+ */
+struct virtchnl2_vector_chunk {
+	__le16 start_vector_id;
+	__le16 start_evv_id;
+	__le16 num_vectors;
+	__le16 pad;
+	__le32 dynctl_reg_start;
+	__le32 dynctl_reg_spacing;
+	__le32 itrn_reg_start;
+	__le32 itrn_reg_spacing;
+	__le32 itrn_index_spacing;
+	u8 pad1[4];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(32, virtchnl2_vector_chunk);
+
+/**
+ * struct virtchnl2_vector_chunks - chunks of contiguous interrupt vectors.
+ * @num_vchunks: number of vector chunks.
+ * @pad: Padding.
+ * @vchunks: Chunks of contiguous vector info.
+ *
+ * PF sends virtchnl2_vector_chunks struct to specify the vectors it is giving
+ * away. CP performs requested action and returns status.
+ *
+ * Associated with VIRTCHNL2_OP_DEALLOC_VECTORS.
+ */
+struct virtchnl2_vector_chunks {
+	__le16 num_vchunks;
+	u8 pad[14];
+	struct virtchnl2_vector_chunk vchunks[];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_vector_chunks);
+
+/**
+ * struct virtchnl2_alloc_vectors - vector allocation info.
+ * @num_vectors: Number of vectors.
+ * @pad: Padding.
+ * @vchunks: Chunks of contiguous vector info.
+ *
+ * PF sends this message to request additional interrupt vectors beyond the
+ * ones that were assigned via GET_CAPS request. virtchnl2_alloc_vectors
+ * structure is used to specify the number of vectors requested. CP responds
+ * with the same structure with the actual number of vectors assigned followed
+ * by virtchnl2_vector_chunks structure identifying the vector ids.
+ *
+ * Associated with VIRTCHNL2_OP_ALLOC_VECTORS.
+ */
+struct virtchnl2_alloc_vectors {
+	__le16 num_vectors;
+	u8 pad[14];
+	struct virtchnl2_vector_chunks vchunks;
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(32, virtchnl2_alloc_vectors);
+
+/**
+ * struct virtchnl2_rss_lut - RSS LUT info.
+ * @vport_id: Vport id.
+ * @lut_entries_start: Start of LUT entries.
+ * @lut_entries: Number of LUT entrties.
+ * @pad: Padding.
+ * @lut: RSS lookup table.
+ *
+ * PF sends this message to get or set RSS lookup table. Only supported if
+ * both PF and CP drivers set the VIRTCHNL2_CAP_RSS bit during configuration
+ * negotiation.
+ *
+ * Associated with VIRTCHNL2_OP_GET_RSS_LUT and VIRTCHNL2_OP_SET_RSS_LUT.
+ */
+struct virtchnl2_rss_lut {
+	__le32 vport_id;
+	__le16 lut_entries_start;
+	__le16 lut_entries;
+	u8 pad[4];
+	__le32 lut[];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(12, virtchnl2_rss_lut);
+
+/**
+ * struct virtchnl2_rss_hash - RSS hash info.
+ * @ptype_groups: Packet type groups bitmap.
+ * @vport_id: Vport id.
+ * @pad: Padding for future extensions.
+ *
+ * PF sends these messages to get and set the hash filter enable bits for RSS.
+ * By default, the CP sets these to all possible traffic types that the
+ * hardware supports. The PF can query this value if it wants to change the
+ * traffic types that are hashed by the hardware.
+ * Only supported if both PF and CP drivers set the VIRTCHNL2_CAP_RSS bit
+ * during configuration negotiation.
+ *
+ * Associated with VIRTCHNL2_OP_GET_RSS_HASH and VIRTCHNL2_OP_SET_RSS_HASH
+ */
+struct virtchnl2_rss_hash {
+	__le64 ptype_groups;
+	__le32 vport_id;
+	u8 pad[4];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_rss_hash);
+
+/**
+ * struct virtchnl2_sriov_vfs_info - VFs info.
+ * @num_vfs: Number of VFs.
+ * @pad: Padding for future extensions.
+ *
+ * This message is used to set number of SRIOV VFs to be created. The actual
+ * allocation of resources for the VFs in terms of vport, queues and interrupts
+ * is done by CP. When this call completes, the IDPF driver calls
+ * pci_enable_sriov to let the OS instantiate the SRIOV PCIE devices.
+ * The number of VFs set to 0 will destroy all the VFs of this function.
+ *
+ * Associated with VIRTCHNL2_OP_SET_SRIOV_VFS.
+ */
+struct virtchnl2_sriov_vfs_info {
+	__le16 num_vfs;
+	__le16 pad;
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(4, virtchnl2_sriov_vfs_info);
+
+/**
+ * struct virtchnl2_ptype - Packet type info.
+ * @ptype_id_10: 10-bit packet type.
+ * @ptype_id_8: 8-bit packet type.
+ * @proto_id_count: Number of protocol ids the packet supports, maximum of 32
+ *		    protocol ids are supported.
+ * @pad: Padding.
+ * @proto_id: proto_id_count decides the allocation of protocol id array.
+ *	      See enum virtchnl2_proto_hdr_type.
+ *
+ * Based on the descriptor type the PF supports, CP fills ptype_id_10 or
+ * ptype_id_8 for flex and base descriptor respectively. If ptype_id_10 value
+ * is set to 0xFFFF, PF should consider this ptype as dummy one and it is the
+ * last ptype.
+ */
+struct virtchnl2_ptype {
+	__le16 ptype_id_10;
+	u8 ptype_id_8;
+	u8 proto_id_count;
+	__le16 pad;
+	__le16 proto_id[];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(6, virtchnl2_ptype);
+
+/**
+ * struct virtchnl2_get_ptype_info - Packet type info.
+ * @start_ptype_id: Starting ptype ID.
+ * @num_ptypes: Number of packet types from start_ptype_id.
+ * @pad: Padding for future extensions.
+ *
+ * The total number of supported packet types is based on the descriptor type.
+ * For the flex descriptor, it is 1024 (10-bit ptype), and for the base
+ * descriptor, it is 256 (8-bit ptype). Send this message to the CP by
+ * populating the 'start_ptype_id' and the 'num_ptypes'. CP responds with the
+ * 'start_ptype_id', 'num_ptypes', and the array of ptype (virtchnl2_ptype) that
+ * are added at the end of the 'virtchnl2_get_ptype_info' message (Note: There
+ * is no specific field for the ptypes but are added at the end of the
+ * ptype info message. PF/VF is expected to extract the ptypes accordingly.
+ * Reason for doing this is because compiler doesn't allow nested flexible
+ * array fields).
+ *
+ * If all the ptypes don't fit into one mailbox buffer, CP splits the
+ * ptype info into multiple messages, where each message will have its own
+ * 'start_ptype_id', 'num_ptypes', and the ptype array itself. When CP is done
+ * updating all the ptype information extracted from the package (the number of
+ * ptypes extracted might be less than what PF/VF expects), it will append a
+ * dummy ptype (which has 'ptype_id_10' of 'struct virtchnl2_ptype' as 0xFFFF)
+ * to the ptype array.
+ *
+ * PF/VF is expected to receive multiple VIRTCHNL2_OP_GET_PTYPE_INFO messages.
+ *
+ * Associated with VIRTCHNL2_OP_GET_PTYPE_INFO.
+ */
+struct virtchnl2_get_ptype_info {
+	__le16 start_ptype_id;
+	__le16 num_ptypes;
+	__le32 pad;
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_get_ptype_info);
+
+/**
+ * struct virtchnl2_vport_stats - Vport statistics.
+ * @vport_id: Vport id.
+ * @pad: Padding.
+ * @rx_bytes: Received bytes.
+ * @rx_unicast: Received unicast packets.
+ * @rx_multicast: Received multicast packets.
+ * @rx_broadcast: Received broadcast packets.
+ * @rx_discards: Discarded packets on receive.
+ * @rx_errors: Receive errors.
+ * @rx_unknown_protocol: Unlnown protocol.
+ * @tx_bytes: Transmitted bytes.
+ * @tx_unicast: Transmitted unicast packets.
+ * @tx_multicast: Transmitted multicast packets.
+ * @tx_broadcast: Transmitted broadcast packets.
+ * @tx_discards: Discarded packets on transmit.
+ * @tx_errors: Transmit errors.
+ * @rx_invalid_frame_length: Packets with invalid frame length.
+ * @rx_overflow_drop: Packets dropped on buffer overflow.
+ *
+ * PF/VF sends this message to CP to get the update stats by specifying the
+ * vport_id. CP responds with stats in struct virtchnl2_vport_stats.
+ *
+ * Associated with VIRTCHNL2_OP_GET_STATS.
+ */
+struct virtchnl2_vport_stats {
+	__le32 vport_id;
+	u8 pad[4];
+	__le64 rx_bytes;
+	__le64 rx_unicast;
+	__le64 rx_multicast;
+	__le64 rx_broadcast;
+	__le64 rx_discards;
+	__le64 rx_errors;
+	__le64 rx_unknown_protocol;
+	__le64 tx_bytes;
+	__le64 tx_unicast;
+	__le64 tx_multicast;
+	__le64 tx_broadcast;
+	__le64 tx_discards;
+	__le64 tx_errors;
+	__le64 rx_invalid_frame_length;
+	__le64 rx_overflow_drop;
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(128, virtchnl2_vport_stats);
+
+/**
+ * struct virtchnl2_event - Event info.
+ * @event: Event opcode. See enum virtchnl2_event_codes.
+ * @link_speed: Link_speed provided in Mbps.
+ * @vport_id: Vport ID.
+ * @link_status: Link status.
+ * @pad: Padding.
+ * @reserved: Reserved.
+ *
+ * CP sends this message to inform the PF/VF driver of events that may affect
+ * it. No direct response is expected from the driver, though it may generate
+ * other messages in response to this one.
+ *
+ * Associated with VIRTCHNL2_OP_EVENT.
+ */
+struct virtchnl2_event {
+	__le32 event;
+	__le32 link_speed;
+	__le32 vport_id;
+	u8 link_status;
+	u8 pad;
+	__le16 reserved;
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_event);
+
+/**
+ * struct virtchnl2_rss_key - RSS key info.
+ * @vport_id: Vport id.
+ * @key_len: Length of RSS key.
+ * @pad: Padding.
+ * @key_flex: RSS hash key, packed bytes.
+ * PF/VF sends this message to get or set RSS key. Only supported if both
+ * PF/VF and CP drivers set the VIRTCHNL2_CAP_RSS bit during configuration
+ * negotiation.
+ *
+ * Associated with VIRTCHNL2_OP_GET_RSS_KEY and VIRTCHNL2_OP_SET_RSS_KEY.
+ */
+struct virtchnl2_rss_key {
+	__le32 vport_id;
+	__le16 key_len;
+	u8 pad;
+	__DECLARE_FLEX_ARRAY(u8, key_flex);
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_rss_key);
+
+/**
+ * struct virtchnl2_queue_chunk - chunk of contiguous queues
+ * @type: See enum virtchnl2_queue_type.
+ * @start_queue_id: Starting queue id.
+ * @num_queues: Number of queues.
+ * @pad: Padding for future extensions.
+ */
+struct virtchnl2_queue_chunk {
+	__le32 type;
+	__le32 start_queue_id;
+	__le32 num_queues;
+	u8 pad[4];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_queue_chunk);
+
+/* struct virtchnl2_queue_chunks - chunks of contiguous queues
+ * @num_chunks: Number of chunks.
+ * @pad: Padding.
+ * @chunks: Chunks of contiguous queues info.
+ */
+struct virtchnl2_queue_chunks {
+	__le16 num_chunks;
+	u8 pad[6];
+	struct virtchnl2_queue_chunk chunks[];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_queue_chunks);
+
+/**
+ * struct virtchnl2_del_ena_dis_queues - Enable/disable queues info.
+ * @vport_id: Vport id.
+ * @pad: Padding.
+ * @chunks: Chunks of contiguous queues info.
+ *
+ * PF sends these messages to enable, disable or delete queues specified in
+ * chunks. PF sends virtchnl2_del_ena_dis_queues struct to specify the queues
+ * to be enabled/disabled/deleted. Also applicable to single queue receive or
+ * transmit. CP performs requested action and returns status.
+ *
+ * Associated with VIRTCHNL2_OP_ENABLE_QUEUES, VIRTCHNL2_OP_DISABLE_QUEUES and
+ * VIRTCHNL2_OP_DISABLE_QUEUES.
+ */
+struct virtchnl2_del_ena_dis_queues {
+	__le32 vport_id;
+	u8 pad[4];
+	struct virtchnl2_queue_chunks chunks;
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_del_ena_dis_queues);
+
+/**
+ * struct virtchnl2_queue_vector - Queue to vector mapping.
+ * @queue_id: Queue id.
+ * @vector_id: Vector id.
+ * @pad: Padding.
+ * @itr_idx: See enum virtchnl2_itr_idx.
+ * @queue_type: See enum virtchnl2_queue_type.
+ * @pad1: Padding for future extensions.
+ */
+struct virtchnl2_queue_vector {
+	__le32 queue_id;
+	__le16 vector_id;
+	u8 pad[2];
+	__le32 itr_idx;
+	__le32 queue_type;
+	u8 pad1[8];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(24, virtchnl2_queue_vector);
+
+/**
+ * struct virtchnl2_queue_vector_maps - Map/unmap queues info.
+ * @vport_id: Vport id.
+ * @num_qv_maps: Number of queue vector maps.
+ * @pad: Padding.
+ * @qv_maps: Queue to vector maps.
+ *
+ * PF sends this message to map or unmap queues to vectors and interrupt
+ * throttling rate index registers. External data buffer contains
+ * virtchnl2_queue_vector_maps structure that contains num_qv_maps of
+ * virtchnl2_queue_vector structures. CP maps the requested queue vector maps
+ * after validating the queue and vector ids and returns a status code.
+ *
+ * Associated with VIRTCHNL2_OP_MAP_QUEUE_VECTOR and
+ * VIRTCHNL2_OP_UNMAP_QUEUE_VECTOR.
+ */
+struct virtchnl2_queue_vector_maps {
+	__le32 vport_id;
+	__le16 num_qv_maps;
+	u8 pad[10];
+	struct virtchnl2_queue_vector qv_maps[];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_queue_vector_maps);
+
+/**
+ * struct virtchnl2_loopback - Loopback info.
+ * @vport_id: Vport id.
+ * @enable: Enable/disable.
+ * @pad: Padding for future extensions.
+ *
+ * PF/VF sends this message to transition to/from the loopback state. Setting
+ * the 'enable' to 1 enables the loopback state and setting 'enable' to 0
+ * disables it. CP configures the state to loopback and returns status.
+ *
+ * Associated with VIRTCHNL2_OP_LOOPBACK.
+ */
+struct virtchnl2_loopback {
+	__le32 vport_id;
+	u8 enable;
+	u8 pad[3];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_loopback);
+
+/* struct virtchnl2_mac_addr - MAC address info.
+ * @addr: MAC address.
+ * @type: MAC type. See enum virtchnl2_mac_addr_type.
+ * @pad: Padding for future extensions.
+ */
+struct virtchnl2_mac_addr {
+	u8 addr[ETH_ALEN];
+	u8 type;
+	u8 pad;
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_mac_addr);
+
+/**
+ * struct virtchnl2_mac_addr_list - List of MAC addresses.
+ * @vport_id: Vport id.
+ * @num_mac_addr: Number of MAC addresses.
+ * @pad: Padding.
+ * @mac_addr_list: List with MAC address info.
+ *
+ * PF/VF driver uses this structure to send list of MAC addresses to be
+ * added/deleted to the CP where as CP performs the action and returns the
+ * status.
+ *
+ * Associated with VIRTCHNL2_OP_ADD_MAC_ADDR and VIRTCHNL2_OP_DEL_MAC_ADDR.
+ */
+struct virtchnl2_mac_addr_list {
+	__le32 vport_id;
+	__le16 num_mac_addr;
+	u8 pad[2];
+	struct virtchnl2_mac_addr mac_addr_list[];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_mac_addr_list);
+
+/**
+ * struct virtchnl2_promisc_info - Promisc type info.
+ * @vport_id: Vport id.
+ * @flags: See enum virtchnl2_promisc_flags.
+ * @pad: Padding for future extensions.
+ *
+ * PF/VF sends vport id and flags to the CP where as CP performs the action
+ * and returns the status.
+ *
+ * Associated with VIRTCHNL2_OP_CONFIG_PROMISCUOUS_MODE.
+ */
+struct virtchnl2_promisc_info {
+	__le32 vport_id;
+	/* See VIRTCHNL2_PROMISC_FLAGS definitions */
+	__le16 flags;
+	u8 pad[2];
+};
+VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_promisc_info);
+
+#endif /* _VIRTCHNL_2_H_ */
diff --git a/drivers/net/ethernet/intel/idpf/virtchnl2_lan_desc.h b/drivers/net/ethernet/intel/idpf/virtchnl2_lan_desc.h
new file mode 100644
index 000000000000..f1b577f1c452
--- /dev/null
+++ b/drivers/net/ethernet/intel/idpf/virtchnl2_lan_desc.h
@@ -0,0 +1,451 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2023 Intel Corporation */
+
+#ifndef _VIRTCHNL2_LAN_DESC_H_
+#define _VIRTCHNL2_LAN_DESC_H_
+
+#include <linux/bits.h>
+
+/* This is an interface definition file where existing enums and their values
+ * must remain unchanged over time, so we specify explicit values for all enums.
+ */
+
+/* Transmit descriptor ID flags
+ */
+enum virtchnl2_tx_desc_ids {
+	VIRTCHNL2_TXDID_DATA				= BIT(0),
+	VIRTCHNL2_TXDID_CTX				= BIT(1),
+	/* TXDID bit 2 is reserved
+	 * TXDID bit 3 is free for future use
+	 * TXDID bit 4 is reserved
+	 */
+	VIRTCHNL2_TXDID_FLEX_TSO_CTX			= BIT(5),
+	/* TXDID bit 6 is reserved */
+	VIRTCHNL2_TXDID_FLEX_L2TAG1_L2TAG2		= BIT(7),
+	/* TXDID bits 8 and 9 are free for future use
+	 * TXDID bit 10 is reserved
+	 * TXDID bit 11 is free for future use
+	 */
+	VIRTCHNL2_TXDID_FLEX_FLOW_SCHED			= BIT(12),
+	/* TXDID bits 13 and 14 are free for future use */
+	VIRTCHNL2_TXDID_DESC_DONE			= BIT(15),
+};
+
+/* Receive descriptor IDs */
+enum virtchnl2_rx_desc_ids {
+	VIRTCHNL2_RXDID_1_32B_BASE	= 1,
+	/* FLEX_SQ_NIC and FLEX_SPLITQ share desc ids because they can be
+	 * differentiated based on queue model; e.g. single queue model can
+	 * only use FLEX_SQ_NIC and split queue model can only use FLEX_SPLITQ
+	 * for DID 2.
+	 */
+	VIRTCHNL2_RXDID_2_FLEX_SPLITQ	= 2,
+	VIRTCHNL2_RXDID_2_FLEX_SQ_NIC	= VIRTCHNL2_RXDID_2_FLEX_SPLITQ,
+	/* 3 through 6 are reserved */
+	VIRTCHNL2_RXDID_7_HW_RSVD	= 7,
+	/* 8 through 15 are free */
+};
+
+/* Receive descriptor ID bitmasks */
+#define VIRTCHNL2_RXDID_M(bit)			BIT_ULL(VIRTCHNL2_RXDID_##bit)
+
+enum virtchnl2_rx_desc_id_bitmasks {
+	VIRTCHNL2_RXDID_1_32B_BASE_M	= VIRTCHNL2_RXDID_M(1_32B_BASE),
+	VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M	= VIRTCHNL2_RXDID_M(2_FLEX_SPLITQ),
+	VIRTCHNL2_RXDID_2_FLEX_SQ_NIC_M	= VIRTCHNL2_RXDID_M(2_FLEX_SQ_NIC),
+	VIRTCHNL2_RXDID_7_HW_RSVD_M	= VIRTCHNL2_RXDID_M(7_HW_RSVD),
+};
+
+/* For splitq virtchnl2_rx_flex_desc_adv_nic_3 desc members */
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_RXDID_M		GENMASK(3, 0)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_UMBCAST_M		GENMASK(7, 6)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_M		GENMASK(9, 0)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_RAW_CSUM_INV_S	12
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_RAW_CSUM_INV_M	\
+	BIT_ULL(VIRTCHNL2_RX_FLEX_DESC_ADV_RAW_CSUM_INV_S)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_FF0_M		GENMASK(15, 13)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_PBUF_M		GENMASK(13, 0)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_S		14
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_M		\
+	BIT_ULL(VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_S)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_S		15
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_M		\
+	BIT_ULL(VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_S)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_HDR_M		GENMASK(9, 0)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_S		10
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_M		\
+	BIT_ULL(VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_S)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_SPH_S		11
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_SPH_M		\
+	BIT_ULL(VIRTCHNL2_RX_FLEX_DESC_ADV_SPH_S)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_FF1_S		12
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_FF1_M		GENMASK(14, 12)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_MISS_S		15
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_MISS_M		\
+	BIT_ULL(VIRTCHNL2_RX_FLEX_DESC_ADV_MISS_S)
+
+/* Bitmasks for splitq virtchnl2_rx_flex_desc_adv_nic_3 */
+enum virtchl2_rx_flex_desc_adv_status_error_0_qw1_bits {
+	VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_DD_M			= BIT(0),
+	VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_EOF_M		= BIT(1),
+	VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_HBO_M		= BIT(2),
+	VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_L3L4P_M		= BIT(3),
+	VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_IPE_M		= BIT(4),
+	VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_L4E_M		= BIT(5),
+	VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EIPE_M		= BIT(6),
+	VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EUDPE_M		= BIT(7),
+};
+
+/* Bitmasks for splitq virtchnl2_rx_flex_desc_adv_nic_3 */
+enum virtchnl2_rx_flex_desc_adv_status_error_0_qw0_bits {
+	VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_LPBK_M		= BIT(0),
+	VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_IPV6EXADD_M		= BIT(1),
+	VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_RXE_M		= BIT(2),
+	VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_CRCP_M		= BIT(3),
+	VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_RSS_VALID_M		= BIT(4),
+	VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_L2TAG1P_M		= BIT(5),
+	VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XTRMD0_VALID_M	= BIT(6),
+	VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XTRMD1_VALID_M	= BIT(7),
+};
+
+/* Bitmasks for splitq virtchnl2_rx_flex_desc_adv_nic_3 */
+enum virtchnl2_rx_flex_desc_adv_status_error_1_bits {
+	VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS1_RSVD_M		= GENMASK(1, 0),
+	VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS1_ATRAEFAIL_M		= BIT(2),
+	VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS1_L2TAG2P_M		= BIT(3),
+	VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS1_XTRMD2_VALID_M	= BIT(4),
+	VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS1_XTRMD3_VALID_M	= BIT(5),
+	VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS1_XTRMD4_VALID_M	= BIT(6),
+	VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS1_XTRMD5_VALID_M	= BIT(7),
+};
+
+/* For singleq (flex) virtchnl2_rx_flex_desc fields
+ * For virtchnl2_rx_flex_desc.ptype_flex_flags0 member
+ */
+#define VIRTCHNL2_RX_FLEX_DESC_PTYPE_M				GENMASK(9, 0)
+
+/* For virtchnl2_rx_flex_desc.pkt_len member */
+#define VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_M			GENMASK(13, 0)
+
+/* Bitmasks for singleq (flex) virtchnl2_rx_flex_desc */
+enum virtchnl2_rx_flex_desc_status_error_0_bits {
+	VIRTCHNL2_RX_FLEX_DESC_STATUS0_DD_M			= BIT(0),
+	VIRTCHNL2_RX_FLEX_DESC_STATUS0_EOF_M			= BIT(1),
+	VIRTCHNL2_RX_FLEX_DESC_STATUS0_HBO_M			= BIT(2),
+	VIRTCHNL2_RX_FLEX_DESC_STATUS0_L3L4P_M			= BIT(3),
+	VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_IPE_M		= BIT(4),
+	VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_L4E_M		= BIT(5),
+	VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EIPE_M		= BIT(6),
+	VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_M		= BIT(7),
+	VIRTCHNL2_RX_FLEX_DESC_STATUS0_LPBK_M			= BIT(8),
+	VIRTCHNL2_RX_FLEX_DESC_STATUS0_IPV6EXADD_M		= BIT(9),
+	VIRTCHNL2_RX_FLEX_DESC_STATUS0_RXE_M			= BIT(10),
+	VIRTCHNL2_RX_FLEX_DESC_STATUS0_CRCP_M			= BIT(11),
+	VIRTCHNL2_RX_FLEX_DESC_STATUS0_RSS_VALID_M		= BIT(12),
+	VIRTCHNL2_RX_FLEX_DESC_STATUS0_L2TAG1P_M		= BIT(13),
+	VIRTCHNL2_RX_FLEX_DESC_STATUS0_XTRMD0_VALID_M		= BIT(14),
+	VIRTCHNL2_RX_FLEX_DESC_STATUS0_XTRMD1_VALID_M		= BIT(15),
+};
+
+/* Bitmasks for singleq (flex) virtchnl2_rx_flex_desc */
+enum virtchnl2_rx_flex_desc_status_error_1_bits {
+	VIRTCHNL2_RX_FLEX_DESC_STATUS1_CPM_M			= GENMASK(3, 0),
+	VIRTCHNL2_RX_FLEX_DESC_STATUS1_NAT_M			= BIT(4),
+	VIRTCHNL2_RX_FLEX_DESC_STATUS1_CRYPTO_M			= BIT(5),
+	/* [10:6] reserved */
+	VIRTCHNL2_RX_FLEX_DESC_STATUS1_L2TAG2P_M		= BIT(11),
+	VIRTCHNL2_RX_FLEX_DESC_STATUS1_XTRMD2_VALID_M		= BIT(12),
+	VIRTCHNL2_RX_FLEX_DESC_STATUS1_XTRMD3_VALID_M		= BIT(13),
+	VIRTCHNL2_RX_FLEX_DESC_STATUS1_XTRMD4_VALID_M		= BIT(14),
+	VIRTCHNL2_RX_FLEX_DESC_STATUS1_XTRMD5_VALID_M		= BIT(15),
+};
+
+/* For virtchnl2_rx_flex_desc.ts_low member */
+#define VIRTCHNL2_RX_FLEX_TSTAMP_VALID				BIT(0)
+
+/* For singleq (non flex) virtchnl2_singleq_base_rx_desc legacy desc members */
+#define VIRTCHNL2_RX_BASE_DESC_QW1_LEN_PBUF_M		GENMASK_ULL(51, 38)
+#define VIRTCHNL2_RX_BASE_DESC_QW1_PTYPE_M		GENMASK_ULL(37, 30)
+#define VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M		GENMASK_ULL(26, 19)
+#define VIRTCHNL2_RX_BASE_DESC_QW1_STATUS_M		GENMASK_ULL(18, 0)
+
+/* Bitmasks for singleq (base) virtchnl2_rx_base_desc */
+enum virtchnl2_rx_base_desc_status_bits {
+	VIRTCHNL2_RX_BASE_DESC_STATUS_DD_M		= BIT(0),
+	VIRTCHNL2_RX_BASE_DESC_STATUS_EOF_M		= BIT(1),
+	VIRTCHNL2_RX_BASE_DESC_STATUS_L2TAG1P_M		= BIT(2),
+	VIRTCHNL2_RX_BASE_DESC_STATUS_L3L4P_M		= BIT(3),
+	VIRTCHNL2_RX_BASE_DESC_STATUS_CRCP_M		= BIT(4),
+	VIRTCHNL2_RX_BASE_DESC_STATUS_RSVD_M		= GENMASK(7, 5),
+	VIRTCHNL2_RX_BASE_DESC_STATUS_EXT_UDP_0_M	= BIT(8),
+	VIRTCHNL2_RX_BASE_DESC_STATUS_UMBCAST_M		= GENMASK(10, 9),
+	VIRTCHNL2_RX_BASE_DESC_STATUS_FLM_M		= BIT(11),
+	VIRTCHNL2_RX_BASE_DESC_STATUS_FLTSTAT_M		= GENMASK(13, 12),
+	VIRTCHNL2_RX_BASE_DESC_STATUS_LPBK_M		= BIT(14),
+	VIRTCHNL2_RX_BASE_DESC_STATUS_IPV6EXADD_M	= BIT(15),
+	VIRTCHNL2_RX_BASE_DESC_STATUS_RSVD1_M		= GENMASK(17, 16),
+	VIRTCHNL2_RX_BASE_DESC_STATUS_INT_UDP_0_M	= BIT(18),
+};
+
+/* Bitmasks for singleq (base) virtchnl2_rx_base_desc */
+enum virtchnl2_rx_base_desc_error_bits {
+	VIRTCHNL2_RX_BASE_DESC_ERROR_RXE_M		= BIT(0),
+	VIRTCHNL2_RX_BASE_DESC_ERROR_ATRAEFAIL_M	= BIT(1),
+	VIRTCHNL2_RX_BASE_DESC_ERROR_HBO_M		= BIT(2),
+	VIRTCHNL2_RX_BASE_DESC_ERROR_L3L4E_M		= GENMASK(5, 3),
+	VIRTCHNL2_RX_BASE_DESC_ERROR_IPE_M		= BIT(3),
+	VIRTCHNL2_RX_BASE_DESC_ERROR_L4E_M		= BIT(4),
+	VIRTCHNL2_RX_BASE_DESC_ERROR_EIPE_M		= BIT(5),
+	VIRTCHNL2_RX_BASE_DESC_ERROR_OVERSIZE_M		= BIT(6),
+	VIRTCHNL2_RX_BASE_DESC_ERROR_PPRS_M		= BIT(7),
+};
+
+/* Bitmasks for singleq (base) virtchnl2_rx_base_desc */
+#define VIRTCHNL2_RX_BASE_DESC_FLTSTAT_RSS_HASH_M	GENMASK(13, 12)
+
+/**
+ * struct virtchnl2_splitq_rx_buf_desc - SplitQ RX buffer descriptor format
+ * @qword0: RX buffer struct.
+ * @qword0.buf_id: Buffer identifier.
+ * @qword0.rsvd0: Reserved.
+ * @qword0.rsvd1: Reserved.
+ * @pkt_addr: Packet buffer address.
+ * @hdr_addr: Header buffer address.
+ * @rsvd2: Rerserved.
+ *
+ * Receive Descriptors
+ * SplitQ buffer
+ * |                                       16|                   0|
+ * ----------------------------------------------------------------
+ * | RSV                                     | Buffer ID          |
+ * ----------------------------------------------------------------
+ * | Rx packet buffer address                                     |
+ * ----------------------------------------------------------------
+ * | Rx header buffer address                                     |
+ * ----------------------------------------------------------------
+ * | RSV                                                          |
+ * ----------------------------------------------------------------
+ * |                                                             0|
+ */
+struct virtchnl2_splitq_rx_buf_desc {
+	struct {
+		__le16  buf_id;
+		__le16  rsvd0;
+		__le32  rsvd1;
+	} qword0;
+	__le64  pkt_addr;
+	__le64  hdr_addr;
+	__le64  rsvd2;
+};
+
+/**
+ * struct virtchnl2_singleq_rx_buf_desc - SingleQ RX buffer descriptor format.
+ * @pkt_addr: Packet buffer address.
+ * @hdr_addr: Header buffer address.
+ * @rsvd1: Reserved.
+ * @rsvd2: Reserved.
+ *
+ * SingleQ buffer
+ * |                                                             0|
+ * ----------------------------------------------------------------
+ * | Rx packet buffer address                                     |
+ * ----------------------------------------------------------------
+ * | Rx header buffer address                                     |
+ * ----------------------------------------------------------------
+ * | RSV                                                          |
+ * ----------------------------------------------------------------
+ * | RSV                                                          |
+ * ----------------------------------------------------------------
+ * |                                                             0|
+ */
+struct virtchnl2_singleq_rx_buf_desc {
+	__le64  pkt_addr;
+	__le64  hdr_addr;
+	__le64  rsvd1;
+	__le64  rsvd2;
+};
+
+/**
+ * struct virtchnl2_singleq_base_rx_desc - RX descriptor writeback format.
+ * @qword0: First quad word struct.
+ * @qword0.lo_dword: Lower dual word struct.
+ * @qword0.lo_dword.mirroring_status: Mirrored packet status.
+ * @qword0.lo_dword.l2tag1: Stripped L2 tag from the received packet.
+ * @qword0.hi_dword: High dual word union.
+ * @qword0.hi_dword.rss: RSS hash.
+ * @qword0.hi_dword.fd_id: Flow director filter id.
+ * @qword1: Second quad word struct.
+ * @qword1.status_error_ptype_len: Status/error/PTYPE/length.
+ * @qword2: Third quad word struct.
+ * @qword2.ext_status: Extended status.
+ * @qword2.rsvd: Reserved.
+ * @qword2.l2tag2_1: Extracted L2 tag 2 from the packet.
+ * @qword2.l2tag2_2: Reserved.
+ * @qword3: Fourth quad word struct.
+ * @qword3.reserved: Reserved.
+ * @qword3.fd_id: Flow director filter id.
+ *
+ * Profile ID 0x1, SingleQ, base writeback format
+ */
+struct virtchnl2_singleq_base_rx_desc {
+	struct {
+		struct {
+			__le16 mirroring_status;
+			__le16 l2tag1;
+		} lo_dword;
+		union {
+			__le32 rss;
+			__le32 fd_id;
+		} hi_dword;
+	} qword0;
+	struct {
+		__le64 status_error_ptype_len;
+	} qword1;
+	struct {
+		__le16 ext_status;
+		__le16 rsvd;
+		__le16 l2tag2_1;
+		__le16 l2tag2_2;
+	} qword2;
+	struct {
+		__le32 reserved;
+		__le32 fd_id;
+	} qword3;
+};
+
+/**
+ * struct virtchnl2_rx_flex_desc_nic - RX descriptor writeback format.
+ *
+ * @rxdid: Descriptor builder profile id.
+ * @mir_id_umb_cast: umb_cast=[7:6], mirror=[5:0]
+ * @ptype_flex_flags0: ff0=[15:10], ptype=[9:0]
+ * @pkt_len: Packet length, [15:14] are reserved.
+ * @hdr_len_sph_flex_flags1: ff1/ext=[15:12], sph=[11], header=[10:0].
+ * @status_error0: Status/Error section 0.
+ * @l2tag1: Stripped L2 tag from the received packet
+ * @rss_hash: RSS hash.
+ * @status_error1: Status/Error section 1.
+ * @flexi_flags2: Flexible flags section 2.
+ * @ts_low: Lower word of timestamp value.
+ * @l2tag2_1st: First L2TAG2.
+ * @l2tag2_2nd: Second L2TAG2.
+ * @flow_id: Flow id.
+ * @flex_ts: Timestamp and flexible flow id union.
+ * @flex_ts.ts_high: Timestamp higher word of the timestamp value.
+ * @flex_ts.flex.rsvd: Reserved.
+ * @flex_ts.flex.flow_id_ipv6: IPv6 flow id.
+ *
+ * Profile ID 0x2, SingleQ, flex writeback format
+ */
+struct virtchnl2_rx_flex_desc_nic {
+	/* Qword 0 */
+	u8 rxdid;
+	u8 mir_id_umb_cast;
+	__le16 ptype_flex_flags0;
+	__le16 pkt_len;
+	__le16 hdr_len_sph_flex_flags1;
+	/* Qword 1 */
+	__le16 status_error0;
+	__le16 l2tag1;
+	__le32 rss_hash;
+	/* Qword 2 */
+	__le16 status_error1;
+	u8 flexi_flags2;
+	u8 ts_low;
+	__le16 l2tag2_1st;
+	__le16 l2tag2_2nd;
+	/* Qword 3 */
+	__le32 flow_id;
+	union {
+		struct {
+			__le16 rsvd;
+			__le16 flow_id_ipv6;
+		} flex;
+		__le32 ts_high;
+	} flex_ts;
+};
+
+/**
+ * struct virtchnl2_rx_flex_desc_adv_nic_3 - RX descriptor writeback format.
+ * @rxdid_ucast: ucast=[7:6], rsvd=[5:4], profile_id=[3:0].
+ * @status_err0_qw0: Status/Error section 0 in quad word 0.
+ * @ptype_err_fflags0: ff0=[15:12], udp_len_err=[11], ip_hdr_err=[10],
+ *		       ptype=[9:0].
+ * @pktlen_gen_bufq_id: bufq_id=[15] only in splitq, gen=[14] only in splitq,
+ *			plen=[13:0].
+ * @hdrlen_flags: miss_prepend=[15], trunc_mirr=[14], int_udp_0=[13],
+ *		  ext_udp0=[12], sph=[11] only in splitq, rsc=[10]
+ *		  only in splitq, header=[9:0].
+ * @status_err0_qw1: Status/Error section 0 in quad word 1.
+ * @status_err1: Status/Error section 1.
+ * @fflags1: Flexible flags section 1.
+ * @ts_low: Lower word of timestamp value.
+ * @buf_id: Buffer identifier. Only in splitq mode.
+ * @misc: Union.
+ * @misc.raw_cs: Raw checksum.
+ * @misc.l2tag1: Stripped L2 tag from the received packet
+ * @misc.rscseglen:
+ * @hash1: Lower bits of Rx hash value.
+ * @ff2_mirrid_hash2: Union.
+ * @ff2_mirrid_hash2.fflags2: Flexible flags section 2.
+ * @ff2_mirrid_hash2.mirrorid: Mirror id.
+ * @ff2_mirrid_hash2.rscseglen: RSC segment length.
+ * @hash3: Upper bits of Rx hash value.
+ * @l2tag2: Extracted L2 tag 2 from the packet.
+ * @fmd4: Flexible metadata container 4.
+ * @l2tag1: Stripped L2 tag from the received packet
+ * @fmd6: Flexible metadata container 6.
+ * @ts_high: Timestamp higher word of the timestamp value.
+ *
+ * Profile ID 0x2, SplitQ, flex writeback format
+ *
+ * Flex-field 0: BufferID
+ * Flex-field 1: Raw checksum/L2TAG1/RSC Seg Len (determined by HW)
+ * Flex-field 2: Hash[15:0]
+ * Flex-flags 2: Hash[23:16]
+ * Flex-field 3: L2TAG2
+ * Flex-field 5: L2TAG1
+ * Flex-field 7: Timestamp (upper 32 bits)
+ */
+struct virtchnl2_rx_flex_desc_adv_nic_3 {
+	/* Qword 0 */
+	u8 rxdid_ucast;
+	u8 status_err0_qw0;
+	__le16 ptype_err_fflags0;
+	__le16 pktlen_gen_bufq_id;
+	__le16 hdrlen_flags;
+	/* Qword 1 */
+	u8 status_err0_qw1;
+	u8 status_err1;
+	u8 fflags1;
+	u8 ts_low;
+	__le16 buf_id;
+	union {
+		__le16 raw_cs;
+		__le16 l2tag1;
+		__le16 rscseglen;
+	} misc;
+	/* Qword 2 */
+	__le16 hash1;
+	union {
+		u8 fflags2;
+		u8 mirrorid;
+		u8 hash2;
+	} ff2_mirrid_hash2;
+	u8 hash3;
+	__le16 l2tag2;
+	__le16 fmd4;
+	/* Qword 3 */
+	__le16 l2tag1;
+	__le16 fmd6;
+	__le32 ts_high;
+};
+
+/* Common union for accessing descriptor format structs */
+union virtchnl2_rx_desc {
+	struct virtchnl2_singleq_base_rx_desc		base_wb;
+	struct virtchnl2_rx_flex_desc_nic		flex_nic_wb;
+	struct virtchnl2_rx_flex_desc_adv_nic_3		flex_adv_nic_3_wb;
+};
+
+#endif /* _VIRTCHNL_LAN_DESC_H_ */
diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c
index 5f6ae11212ae..81cf3361a1e5 100644
--- a/drivers/net/ethernet/korina.c
+++ b/drivers/net/ethernet/korina.c
@@ -1380,13 +1380,11 @@ static int korina_probe(struct platform_device *pdev)
 	return rc;
 }
 
-static int korina_remove(struct platform_device *pdev)
+static void korina_remove(struct platform_device *pdev)
 {
 	struct net_device *dev = platform_get_drvdata(pdev);
 
 	unregister_netdev(dev);
-
-	return 0;
 }
 
 #ifdef CONFIG_OF
@@ -1405,7 +1403,7 @@ static struct platform_driver korina_driver = {
 		.of_match_table = of_match_ptr(korina_match),
 	},
 	.probe = korina_probe,
-	.remove = korina_remove,
+	.remove_new = korina_remove,
 };
 
 module_platform_driver(korina_driver);
diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c
index f5961bdcc480..1d5b7bb6380f 100644
--- a/drivers/net/ethernet/lantiq_etop.c
+++ b/drivers/net/ethernet/lantiq_etop.c
@@ -721,8 +721,7 @@ err_out:
 	return err;
 }
 
-static int
-ltq_etop_remove(struct platform_device *pdev)
+static void ltq_etop_remove(struct platform_device *pdev)
 {
 	struct net_device *dev = platform_get_drvdata(pdev);
 
@@ -732,11 +731,10 @@ ltq_etop_remove(struct platform_device *pdev)
 		ltq_etop_mdio_cleanup(dev);
 		unregister_netdev(dev);
 	}
-	return 0;
 }
 
 static struct platform_driver ltq_mii_driver = {
-	.remove = ltq_etop_remove,
+	.remove_new = ltq_etop_remove,
 	.driver = {
 		.name = "ltq_etop",
 	},
diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c
index 8d646c7f8c82..8bd4def3622e 100644
--- a/drivers/net/ethernet/lantiq_xrx200.c
+++ b/drivers/net/ethernet/lantiq_xrx200.c
@@ -641,7 +641,7 @@ err_uninit_dma:
 	return err;
 }
 
-static int xrx200_remove(struct platform_device *pdev)
+static void xrx200_remove(struct platform_device *pdev)
 {
 	struct xrx200_priv *priv = platform_get_drvdata(pdev);
 	struct net_device *net_dev = priv->net_dev;
@@ -659,8 +659,6 @@ static int xrx200_remove(struct platform_device *pdev)
 
 	/* shut down hardware */
 	xrx200_hw_cleanup(priv);
-
-	return 0;
 }
 
 static const struct of_device_id xrx200_match[] = {
@@ -671,7 +669,7 @@ MODULE_DEVICE_TABLE(of, xrx200_match);
 
 static struct platform_driver xrx200_driver = {
 	.probe = xrx200_probe,
-	.remove = xrx200_remove,
+	.remove_new = xrx200_remove,
 	.driver = {
 		.name = "lantiq,xrx200-net",
 		.of_match_table = xrx200_match,
diff --git a/drivers/net/ethernet/litex/litex_liteeth.c b/drivers/net/ethernet/litex/litex_liteeth.c
index ffa96059079c..5182fe737c37 100644
--- a/drivers/net/ethernet/litex/litex_liteeth.c
+++ b/drivers/net/ethernet/litex/litex_liteeth.c
@@ -294,13 +294,11 @@ static int liteeth_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int liteeth_remove(struct platform_device *pdev)
+static void liteeth_remove(struct platform_device *pdev)
 {
 	struct net_device *netdev = platform_get_drvdata(pdev);
 
 	unregister_netdev(netdev);
-
-	return 0;
 }
 
 static const struct of_device_id liteeth_of_match[] = {
@@ -311,7 +309,7 @@ MODULE_DEVICE_TABLE(of, liteeth_of_match);
 
 static struct platform_driver liteeth_driver = {
 	.probe = liteeth_probe,
-	.remove = liteeth_remove,
+	.remove_new = liteeth_remove,
 	.driver = {
 		.name = DRV_NAME,
 		.of_match_table = liteeth_of_match,
diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index 3b129a1c3381..f0bdc06d253d 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -2892,19 +2892,18 @@ err_put_clk:
 	return ret;
 }
 
-static int mv643xx_eth_shared_remove(struct platform_device *pdev)
+static void mv643xx_eth_shared_remove(struct platform_device *pdev)
 {
 	struct mv643xx_eth_shared_private *msp = platform_get_drvdata(pdev);
 
 	mv643xx_eth_shared_of_remove();
 	if (!IS_ERR(msp->clk))
 		clk_disable_unprepare(msp->clk);
-	return 0;
 }
 
 static struct platform_driver mv643xx_eth_shared_driver = {
 	.probe		= mv643xx_eth_shared_probe,
-	.remove		= mv643xx_eth_shared_remove,
+	.remove_new	= mv643xx_eth_shared_remove,
 	.driver = {
 		.name	= MV643XX_ETH_SHARED_NAME,
 		.of_match_table = of_match_ptr(mv643xx_eth_shared_ids),
@@ -3279,7 +3278,7 @@ out:
 	return err;
 }
 
-static int mv643xx_eth_remove(struct platform_device *pdev)
+static void mv643xx_eth_remove(struct platform_device *pdev)
 {
 	struct mv643xx_eth_private *mp = platform_get_drvdata(pdev);
 	struct net_device *dev = mp->dev;
@@ -3293,8 +3292,6 @@ static int mv643xx_eth_remove(struct platform_device *pdev)
 		clk_disable_unprepare(mp->clk);
 
 	free_netdev(mp->dev);
-
-	return 0;
 }
 
 static void mv643xx_eth_shutdown(struct platform_device *pdev)
@@ -3311,7 +3308,7 @@ static void mv643xx_eth_shutdown(struct platform_device *pdev)
 
 static struct platform_driver mv643xx_eth_driver = {
 	.probe		= mv643xx_eth_probe,
-	.remove		= mv643xx_eth_remove,
+	.remove_new	= mv643xx_eth_remove,
 	.shutdown	= mv643xx_eth_shutdown,
 	.driver = {
 		.name	= MV643XX_ETH_NAME,
diff --git a/drivers/net/ethernet/marvell/mvmdio.c b/drivers/net/ethernet/marvell/mvmdio.c
index 674913184ebf..89f26402f8fb 100644
--- a/drivers/net/ethernet/marvell/mvmdio.c
+++ b/drivers/net/ethernet/marvell/mvmdio.c
@@ -388,7 +388,7 @@ out_clk:
 	return ret;
 }
 
-static int orion_mdio_remove(struct platform_device *pdev)
+static void orion_mdio_remove(struct platform_device *pdev)
 {
 	struct mii_bus *bus = platform_get_drvdata(pdev);
 	struct orion_mdio_dev *dev = bus->priv;
@@ -404,8 +404,6 @@ static int orion_mdio_remove(struct platform_device *pdev)
 		clk_disable_unprepare(dev->clk[i]);
 		clk_put(dev->clk[i]);
 	}
-
-	return 0;
 }
 
 static const struct of_device_id orion_mdio_match[] = {
@@ -426,7 +424,7 @@ MODULE_DEVICE_TABLE(acpi, orion_mdio_acpi_match);
 
 static struct platform_driver orion_mdio_driver = {
 	.probe = orion_mdio_probe,
-	.remove = orion_mdio_remove,
+	.remove_new = orion_mdio_remove,
 	.driver = {
 		.name = "orion-mdio",
 		.of_match_table = orion_mdio_match,
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index d483b8c00ec0..61a430e5bc91 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -5725,7 +5725,7 @@ err_free_irq:
 }
 
 /* Device removal routine */
-static int mvneta_remove(struct platform_device *pdev)
+static void mvneta_remove(struct platform_device *pdev)
 {
 	struct net_device  *dev = platform_get_drvdata(pdev);
 	struct mvneta_port *pp = netdev_priv(dev);
@@ -5744,8 +5744,6 @@ static int mvneta_remove(struct platform_device *pdev)
 				       1 << pp->id);
 		mvneta_bm_put(pp->bm_priv);
 	}
-
-	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -5871,7 +5869,7 @@ MODULE_DEVICE_TABLE(of, mvneta_match);
 
 static struct platform_driver mvneta_driver = {
 	.probe = mvneta_probe,
-	.remove = mvneta_remove,
+	.remove_new = mvneta_remove,
 	.driver = {
 		.name = MVNETA_DRIVER_NAME,
 		.of_match_table = mvneta_match,
diff --git a/drivers/net/ethernet/marvell/mvneta_bm.c b/drivers/net/ethernet/marvell/mvneta_bm.c
index 46c942ef2287..3f46a0fed048 100644
--- a/drivers/net/ethernet/marvell/mvneta_bm.c
+++ b/drivers/net/ethernet/marvell/mvneta_bm.c
@@ -457,7 +457,7 @@ err_clk:
 	return err;
 }
 
-static int mvneta_bm_remove(struct platform_device *pdev)
+static void mvneta_bm_remove(struct platform_device *pdev)
 {
 	struct mvneta_bm *priv = platform_get_drvdata(pdev);
 	u8 all_ports_map = 0xff;
@@ -475,8 +475,6 @@ static int mvneta_bm_remove(struct platform_device *pdev)
 	mvneta_bm_write(priv, MVNETA_BM_COMMAND_REG, MVNETA_BM_STOP_MASK);
 
 	clk_disable_unprepare(priv->clk);
-
-	return 0;
 }
 
 static const struct of_device_id mvneta_bm_match[] = {
@@ -487,7 +485,7 @@ MODULE_DEVICE_TABLE(of, mvneta_bm_match);
 
 static struct platform_driver mvneta_bm_driver = {
 	.probe = mvneta_bm_probe,
-	.remove = mvneta_bm_remove,
+	.remove_new = mvneta_bm_remove,
 	.driver = {
 		.name = MVNETA_BM_DRIVER_NAME,
 		.of_match_table = mvneta_bm_match,
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 21c3f9b015c8..463e89d3f17a 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -7662,7 +7662,7 @@ err_pp_clk:
 	return err;
 }
 
-static int mvpp2_remove(struct platform_device *pdev)
+static void mvpp2_remove(struct platform_device *pdev)
 {
 	struct mvpp2 *priv = platform_get_drvdata(pdev);
 	struct fwnode_handle *fwnode = pdev->dev.fwnode;
@@ -7700,15 +7700,13 @@ static int mvpp2_remove(struct platform_device *pdev)
 	}
 
 	if (is_acpi_node(port_fwnode))
-		return 0;
+		return;
 
 	clk_disable_unprepare(priv->axi_clk);
 	clk_disable_unprepare(priv->mg_core_clk);
 	clk_disable_unprepare(priv->mg_clk);
 	clk_disable_unprepare(priv->pp_clk);
 	clk_disable_unprepare(priv->gop_clk);
-
-	return 0;
 }
 
 static const struct of_device_id mvpp2_match[] = {
@@ -7734,7 +7732,7 @@ MODULE_DEVICE_TABLE(acpi, mvpp2_acpi_match);
 
 static struct platform_driver mvpp2_driver = {
 	.probe = mvpp2_probe,
-	.remove = mvpp2_remove,
+	.remove_new = mvpp2_remove,
 	.driver = {
 		.name = MVPP2_DRIVER_NAME,
 		.of_match_table = mvpp2_match,
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c b/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c
index 90c3a419932d..d4ee2454675b 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c
@@ -16,9 +16,6 @@
 #define CTRL_MBOX_MAX_PF	128
 #define CTRL_MBOX_SZ		((size_t)(0x400000 / CTRL_MBOX_MAX_PF))
 
-#define FW_HB_INTERVAL_IN_SECS		1
-#define FW_HB_MISS_COUNT		10
-
 /* Names of Hardware non-queue generic interrupts */
 static char *cn93_non_ioq_msix_names[] = {
 	"epf_ire_rint",
@@ -250,12 +247,11 @@ static void octep_init_config_cn93_pf(struct octep_device *oct)
 		link = PCI_DEVFN(PCI_SLOT(oct->pdev->devfn), link);
 	}
 	conf->ctrl_mbox_cfg.barmem_addr = (void __iomem *)oct->mmio[2].hw_addr +
-					   (0x400000ull * 7) +
+					   CN93_PEM_BAR4_INDEX_OFFSET +
 					   (link * CTRL_MBOX_SZ);
 
-	conf->hb_interval = FW_HB_INTERVAL_IN_SECS;
-	conf->max_hb_miss_cnt = FW_HB_MISS_COUNT;
-
+	conf->fw_info.hb_interval = OCTEP_DEFAULT_FW_HB_INTERVAL;
+	conf->fw_info.hb_miss_count = OCTEP_DEFAULT_FW_HB_MISS_COUNT;
 }
 
 /* Setup registers for a hardware Tx Queue  */
@@ -373,34 +369,40 @@ static void octep_setup_mbox_regs_cn93_pf(struct octep_device *oct, int q_no)
 	mbox->mbox_read_reg = oct->mmio[0].hw_addr + CN93_SDP_R_MBOX_VF_PF_DATA(q_no);
 }
 
-/* Process non-ioq interrupts required to keep pf interface running.
- * OEI_RINT is needed for control mailbox
- */
-static bool octep_poll_non_ioq_interrupts_cn93_pf(struct octep_device *oct)
-{
-	bool handled = false;
-	u64 reg0;
-
-	/* Check for OEI INTR */
-	reg0 = octep_read_csr64(oct, CN93_SDP_EPF_OEI_RINT);
-	if (reg0) {
-		dev_info(&oct->pdev->dev,
-			 "Received OEI_RINT intr: 0x%llx\n",
-			 reg0);
-		octep_write_csr64(oct, CN93_SDP_EPF_OEI_RINT, reg0);
-		if (reg0 & CN93_SDP_EPF_OEI_RINT_DATA_BIT_MBOX)
+/* Poll OEI events like heartbeat */
+static void octep_poll_oei_cn93_pf(struct octep_device *oct)
+{
+	u64 reg;
+
+	reg = octep_read_csr64(oct, CN93_SDP_EPF_OEI_RINT);
+	if (reg) {
+		octep_write_csr64(oct, CN93_SDP_EPF_OEI_RINT, reg);
+		if (reg & CN93_SDP_EPF_OEI_RINT_DATA_BIT_MBOX)
 			queue_work(octep_wq, &oct->ctrl_mbox_task);
-		else if (reg0 & CN93_SDP_EPF_OEI_RINT_DATA_BIT_HBEAT)
+		else if (reg & CN93_SDP_EPF_OEI_RINT_DATA_BIT_HBEAT)
 			atomic_set(&oct->hb_miss_cnt, 0);
-
-		handled = true;
 	}
+}
+
+/* OEI interrupt handler */
+static irqreturn_t octep_oei_intr_handler_cn93_pf(void *dev)
+{
+	struct octep_device *oct = (struct octep_device *)dev;
+
+	octep_poll_oei_cn93_pf(oct);
+	return IRQ_HANDLED;
+}
 
-	return handled;
+/* Process non-ioq interrupts required to keep pf interface running.
+ * OEI_RINT is needed for control mailbox
+ */
+static void octep_poll_non_ioq_interrupts_cn93_pf(struct octep_device *oct)
+{
+	octep_poll_oei_cn93_pf(oct);
 }
 
-/* Interrupts handler for all non-queue generic interrupts. */
-static irqreturn_t octep_non_ioq_intr_handler_cn93_pf(void *dev)
+/* Interrupt handler for input ring error interrupts. */
+static irqreturn_t octep_ire_intr_handler_cn93_pf(void *dev)
 {
 	struct octep_device *oct = (struct octep_device *)dev;
 	struct pci_dev *pdev = oct->pdev;
@@ -425,8 +427,17 @@ static irqreturn_t octep_non_ioq_intr_handler_cn93_pf(void *dev)
 						  reg_val);
 			}
 		}
-		goto irq_handled;
 	}
+	return IRQ_HANDLED;
+}
+
+/* Interrupt handler for output ring error interrupts. */
+static irqreturn_t octep_ore_intr_handler_cn93_pf(void *dev)
+{
+	struct octep_device *oct = (struct octep_device *)dev;
+	struct pci_dev *pdev = oct->pdev;
+	u64 reg_val = 0;
+	int i = 0;
 
 	/* Check for ORERR INTR */
 	reg_val = octep_read_csr64(oct, CN93_SDP_EPF_ORERR_RINT);
@@ -444,9 +455,16 @@ static irqreturn_t octep_non_ioq_intr_handler_cn93_pf(void *dev)
 						  reg_val);
 			}
 		}
-
-		goto irq_handled;
 	}
+	return IRQ_HANDLED;
+}
+
+/* Interrupt handler for vf input ring error interrupts. */
+static irqreturn_t octep_vfire_intr_handler_cn93_pf(void *dev)
+{
+	struct octep_device *oct = (struct octep_device *)dev;
+	struct pci_dev *pdev = oct->pdev;
+	u64 reg_val = 0;
 
 	/* Check for VFIRE INTR */
 	reg_val = octep_read_csr64(oct, CN93_SDP_EPF_VFIRE_RINT(0));
@@ -454,8 +472,16 @@ static irqreturn_t octep_non_ioq_intr_handler_cn93_pf(void *dev)
 		dev_info(&pdev->dev,
 			 "Received VFIRE_RINT intr: 0x%llx\n", reg_val);
 		octep_write_csr64(oct, CN93_SDP_EPF_VFIRE_RINT(0), reg_val);
-		goto irq_handled;
 	}
+	return IRQ_HANDLED;
+}
+
+/* Interrupt handler for vf output ring error interrupts. */
+static irqreturn_t octep_vfore_intr_handler_cn93_pf(void *dev)
+{
+	struct octep_device *oct = (struct octep_device *)dev;
+	struct pci_dev *pdev = oct->pdev;
+	u64 reg_val = 0;
 
 	/* Check for VFORE INTR */
 	reg_val = octep_read_csr64(oct, CN93_SDP_EPF_VFORE_RINT(0));
@@ -463,19 +489,30 @@ static irqreturn_t octep_non_ioq_intr_handler_cn93_pf(void *dev)
 		dev_info(&pdev->dev,
 			 "Received VFORE_RINT intr: 0x%llx\n", reg_val);
 		octep_write_csr64(oct, CN93_SDP_EPF_VFORE_RINT(0), reg_val);
-		goto irq_handled;
 	}
+	return IRQ_HANDLED;
+}
 
-	/* Check for MBOX INTR and OEI INTR */
-	if (octep_poll_non_ioq_interrupts_cn93_pf(oct))
-		goto irq_handled;
+/* Interrupt handler for dpi dma related interrupts. */
+static irqreturn_t octep_dma_intr_handler_cn93_pf(void *dev)
+{
+	struct octep_device *oct = (struct octep_device *)dev;
+	u64 reg_val = 0;
 
 	/* Check for DMA INTR */
 	reg_val = octep_read_csr64(oct, CN93_SDP_EPF_DMA_RINT);
 	if (reg_val) {
 		octep_write_csr64(oct, CN93_SDP_EPF_DMA_RINT, reg_val);
-		goto irq_handled;
 	}
+	return IRQ_HANDLED;
+}
+
+/* Interrupt handler for dpi dma transaction error interrupts for VFs  */
+static irqreturn_t octep_dma_vf_intr_handler_cn93_pf(void *dev)
+{
+	struct octep_device *oct = (struct octep_device *)dev;
+	struct pci_dev *pdev = oct->pdev;
+	u64 reg_val = 0;
 
 	/* Check for DMA VF INTR */
 	reg_val = octep_read_csr64(oct, CN93_SDP_EPF_DMA_VF_RINT(0));
@@ -483,8 +520,16 @@ static irqreturn_t octep_non_ioq_intr_handler_cn93_pf(void *dev)
 		dev_info(&pdev->dev,
 			 "Received DMA_VF_RINT intr: 0x%llx\n", reg_val);
 		octep_write_csr64(oct, CN93_SDP_EPF_DMA_VF_RINT(0), reg_val);
-		goto irq_handled;
 	}
+	return IRQ_HANDLED;
+}
+
+/* Interrupt handler for pp transaction error interrupts for VFs  */
+static irqreturn_t octep_pp_vf_intr_handler_cn93_pf(void *dev)
+{
+	struct octep_device *oct = (struct octep_device *)dev;
+	struct pci_dev *pdev = oct->pdev;
+	u64 reg_val = 0;
 
 	/* Check for PPVF INTR */
 	reg_val = octep_read_csr64(oct, CN93_SDP_EPF_PP_VF_RINT(0));
@@ -492,8 +537,16 @@ static irqreturn_t octep_non_ioq_intr_handler_cn93_pf(void *dev)
 		dev_info(&pdev->dev,
 			 "Received PP_VF_RINT intr: 0x%llx\n", reg_val);
 		octep_write_csr64(oct, CN93_SDP_EPF_PP_VF_RINT(0), reg_val);
-		goto irq_handled;
 	}
+	return IRQ_HANDLED;
+}
+
+/* Interrupt handler for mac related interrupts. */
+static irqreturn_t octep_misc_intr_handler_cn93_pf(void *dev)
+{
+	struct octep_device *oct = (struct octep_device *)dev;
+	struct pci_dev *pdev = oct->pdev;
+	u64 reg_val = 0;
 
 	/* Check for MISC INTR */
 	reg_val = octep_read_csr64(oct, CN93_SDP_EPF_MISC_RINT);
@@ -501,11 +554,17 @@ static irqreturn_t octep_non_ioq_intr_handler_cn93_pf(void *dev)
 		dev_info(&pdev->dev,
 			 "Received MISC_RINT intr: 0x%llx\n", reg_val);
 		octep_write_csr64(oct, CN93_SDP_EPF_MISC_RINT, reg_val);
-		goto irq_handled;
 	}
+	return IRQ_HANDLED;
+}
+
+/* Interrupts handler for all reserved interrupts. */
+static irqreturn_t octep_rsvd_intr_handler_cn93_pf(void *dev)
+{
+	struct octep_device *oct = (struct octep_device *)dev;
+	struct pci_dev *pdev = oct->pdev;
 
 	dev_info(&pdev->dev, "Reserved interrupts raised; Ignore\n");
-irq_handled:
 	return IRQ_HANDLED;
 }
 
@@ -569,8 +628,15 @@ static void octep_enable_interrupts_cn93_pf(struct octep_device *oct)
 	octep_write_csr64(oct, CN93_SDP_EPF_IRERR_RINT_ENA_W1S, intr_mask);
 	octep_write_csr64(oct, CN93_SDP_EPF_ORERR_RINT_ENA_W1S, intr_mask);
 	octep_write_csr64(oct, CN93_SDP_EPF_OEI_RINT_ENA_W1S, -1ULL);
+
+	octep_write_csr64(oct, CN93_SDP_EPF_VFIRE_RINT_ENA_W1S(0), -1ULL);
+	octep_write_csr64(oct, CN93_SDP_EPF_VFORE_RINT_ENA_W1S(0), -1ULL);
+
 	octep_write_csr64(oct, CN93_SDP_EPF_MISC_RINT_ENA_W1S, intr_mask);
 	octep_write_csr64(oct, CN93_SDP_EPF_DMA_RINT_ENA_W1S, intr_mask);
+
+	octep_write_csr64(oct, CN93_SDP_EPF_DMA_VF_RINT_ENA_W1S(0), -1ULL);
+	octep_write_csr64(oct, CN93_SDP_EPF_PP_VF_RINT_ENA_W1S(0), -1ULL);
 }
 
 /* Disable all interrupts */
@@ -588,8 +654,15 @@ static void octep_disable_interrupts_cn93_pf(struct octep_device *oct)
 	octep_write_csr64(oct, CN93_SDP_EPF_IRERR_RINT_ENA_W1C, intr_mask);
 	octep_write_csr64(oct, CN93_SDP_EPF_ORERR_RINT_ENA_W1C, intr_mask);
 	octep_write_csr64(oct, CN93_SDP_EPF_OEI_RINT_ENA_W1C, -1ULL);
+
+	octep_write_csr64(oct, CN93_SDP_EPF_VFIRE_RINT_ENA_W1C(0), -1ULL);
+	octep_write_csr64(oct, CN93_SDP_EPF_VFORE_RINT_ENA_W1C(0), -1ULL);
+
 	octep_write_csr64(oct, CN93_SDP_EPF_MISC_RINT_ENA_W1C, intr_mask);
 	octep_write_csr64(oct, CN93_SDP_EPF_DMA_RINT_ENA_W1C, intr_mask);
+
+	octep_write_csr64(oct, CN93_SDP_EPF_DMA_VF_RINT_ENA_W1C(0), -1ULL);
+	octep_write_csr64(oct, CN93_SDP_EPF_PP_VF_RINT_ENA_W1C(0), -1ULL);
 }
 
 /* Get new Octeon Read Index: index of descriptor that Octeon reads next. */
@@ -722,7 +795,16 @@ void octep_device_setup_cn93_pf(struct octep_device *oct)
 	oct->hw_ops.setup_oq_regs = octep_setup_oq_regs_cn93_pf;
 	oct->hw_ops.setup_mbox_regs = octep_setup_mbox_regs_cn93_pf;
 
-	oct->hw_ops.non_ioq_intr_handler = octep_non_ioq_intr_handler_cn93_pf;
+	oct->hw_ops.oei_intr_handler = octep_oei_intr_handler_cn93_pf;
+	oct->hw_ops.ire_intr_handler = octep_ire_intr_handler_cn93_pf;
+	oct->hw_ops.ore_intr_handler = octep_ore_intr_handler_cn93_pf;
+	oct->hw_ops.vfire_intr_handler = octep_vfire_intr_handler_cn93_pf;
+	oct->hw_ops.vfore_intr_handler = octep_vfore_intr_handler_cn93_pf;
+	oct->hw_ops.dma_intr_handler = octep_dma_intr_handler_cn93_pf;
+	oct->hw_ops.dma_vf_intr_handler = octep_dma_vf_intr_handler_cn93_pf;
+	oct->hw_ops.pp_vf_intr_handler = octep_pp_vf_intr_handler_cn93_pf;
+	oct->hw_ops.misc_intr_handler = octep_misc_intr_handler_cn93_pf;
+	oct->hw_ops.rsvd_intr_handler = octep_rsvd_intr_handler_cn93_pf;
 	oct->hw_ops.ioq_intr_handler = octep_ioq_intr_handler_cn93_pf;
 	oct->hw_ops.soft_reset = octep_soft_reset_cn93_pf;
 	oct->hw_ops.reinit_regs = octep_reinit_regs_cn93_pf;
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_config.h b/drivers/net/ethernet/marvell/octeon_ep/octep_config.h
index df7cd39d9fce..1622a6ebf036 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_config.h
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_config.h
@@ -49,6 +49,11 @@
 /* Default MTU */
 #define OCTEP_DEFAULT_MTU    1500
 
+/* pf heartbeat interval in milliseconds */
+#define OCTEP_DEFAULT_FW_HB_INTERVAL           1000
+/* pf heartbeat miss count */
+#define OCTEP_DEFAULT_FW_HB_MISS_COUNT         20
+
 /* Macros to get octeon config params */
 #define CFG_GET_IQ_CFG(cfg)             ((cfg)->iq)
 #define CFG_GET_IQ_NUM_DESC(cfg)        ((cfg)->iq.num_descs)
@@ -181,6 +186,16 @@ struct octep_ctrl_mbox_config {
 	void __iomem *barmem_addr;
 };
 
+/* Info from firmware */
+struct octep_fw_info {
+	/* interface pkind */
+	u16 pkind;
+	/* heartbeat interval in milliseconds */
+	u16 hb_interval;
+	/* heartbeat miss count */
+	u16 hb_miss_count;
+};
+
 /* Data Structure to hold configuration limits and active config */
 struct octep_config {
 	/* Input Queue attributes. */
@@ -201,10 +216,7 @@ struct octep_config {
 	/* ctrl mbox config */
 	struct octep_ctrl_mbox_config ctrl_mbox_cfg;
 
-	/* Configured maximum heartbeat miss count */
-	u32 max_hb_miss_cnt;
-
-	/* Configured firmware heartbeat interval in secs */
-	u32 hb_interval;
+	/* fw info */
+	struct octep_fw_info fw_info;
 };
 #endif /* _OCTEP_CONFIG_H_ */
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c
index 17bfd5cdf462..0594607a2585 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c
@@ -26,7 +26,7 @@ static atomic_t ctrl_net_msg_id;
 
 /* Control plane version in which OCTEP_CTRL_NET_H2F_CMD was added */
 static const u32 octep_ctrl_net_h2f_cmd_versions[OCTEP_CTRL_NET_H2F_CMD_MAX] = {
-	[OCTEP_CTRL_NET_H2F_CMD_INVALID ... OCTEP_CTRL_NET_H2F_CMD_LINK_INFO] =
+	[OCTEP_CTRL_NET_H2F_CMD_INVALID ... OCTEP_CTRL_NET_H2F_CMD_GET_INFO] =
 	 OCTEP_CP_VERSION(1, 0, 0)
 };
 
@@ -353,6 +353,28 @@ void octep_ctrl_net_recv_fw_messages(struct octep_device *oct)
 	}
 }
 
+int octep_ctrl_net_get_info(struct octep_device *oct, int vfid,
+			    struct octep_fw_info *info)
+{
+	struct octep_ctrl_net_wait_data d = {0};
+	struct octep_ctrl_net_h2f_resp *resp;
+	struct octep_ctrl_net_h2f_req *req;
+	int err;
+
+	req = &d.data.req;
+	init_send_req(&d.msg, req, 0, vfid);
+	req->hdr.s.cmd = OCTEP_CTRL_NET_H2F_CMD_GET_INFO;
+	req->link_info.cmd = OCTEP_CTRL_NET_CMD_GET;
+	err = octep_send_mbox_req(oct, &d, true);
+	if (err < 0)
+		return err;
+
+	resp = &d.data.resp;
+	memcpy(info, &resp->info.fw_info, sizeof(struct octep_fw_info));
+
+	return 0;
+}
+
 int octep_ctrl_net_uninit(struct octep_device *oct)
 {
 	struct octep_ctrl_net_wait_data *pos, *n;
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.h b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.h
index 1c2ef4ee31d9..b330f370131b 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.h
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.h
@@ -41,6 +41,7 @@ enum octep_ctrl_net_h2f_cmd {
 	OCTEP_CTRL_NET_H2F_CMD_LINK_STATUS,
 	OCTEP_CTRL_NET_H2F_CMD_RX_STATE,
 	OCTEP_CTRL_NET_H2F_CMD_LINK_INFO,
+	OCTEP_CTRL_NET_H2F_CMD_GET_INFO,
 	OCTEP_CTRL_NET_H2F_CMD_MAX
 };
 
@@ -161,6 +162,11 @@ struct octep_ctrl_net_h2f_resp_cmd_state {
 	u16 state;
 };
 
+/* get info request */
+struct octep_ctrl_net_h2f_resp_cmd_get_info {
+	struct octep_fw_info fw_info;
+};
+
 /* Host to fw response data */
 struct octep_ctrl_net_h2f_resp {
 	union octep_ctrl_net_resp_hdr hdr;
@@ -171,6 +177,7 @@ struct octep_ctrl_net_h2f_resp {
 		struct octep_ctrl_net_h2f_resp_cmd_state link;
 		struct octep_ctrl_net_h2f_resp_cmd_state rx;
 		struct octep_ctrl_net_link_info link_info;
+		struct octep_ctrl_net_h2f_resp_cmd_get_info info;
 	};
 } __packed;
 
@@ -330,6 +337,17 @@ int octep_ctrl_net_set_link_info(struct octep_device *oct,
  */
 void octep_ctrl_net_recv_fw_messages(struct octep_device *oct);
 
+/** Get info from firmware.
+ *
+ * @param oct: non-null pointer to struct octep_device.
+ * @param vfid: Index of virtual function.
+ * @param info: non-null pointer to struct octep_fw_info.
+ *
+ * return value: 0 on success, -errno on failure.
+ */
+int octep_ctrl_net_get_info(struct octep_device *oct, int vfid,
+			    struct octep_fw_info *info);
+
 /** Uninitialize data for ctrl net.
  *
  * @param oct: non-null pointer to struct octep_device.
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
index 5b46ca47c8e5..552970c7dec0 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
@@ -155,18 +155,153 @@ static void octep_disable_msix(struct octep_device *oct)
 }
 
 /**
- * octep_non_ioq_intr_handler() - common handler for all generic interrupts.
+ * octep_oei_intr_handler() - common handler for output endpoint interrupts.
  *
  * @irq: Interrupt number.
  * @data: interrupt data.
  *
- * this is common handler for all non-queue (generic) interrupts.
+ * this is common handler for all output endpoint interrupts.
+ */
+static irqreturn_t octep_oei_intr_handler(int irq, void *data)
+{
+	struct octep_device *oct = data;
+
+	return oct->hw_ops.oei_intr_handler(oct);
+}
+
+/**
+ * octep_ire_intr_handler() - common handler for input ring error interrupts.
+ *
+ * @irq: Interrupt number.
+ * @data: interrupt data.
+ *
+ * this is common handler for input ring error interrupts.
+ */
+static irqreturn_t octep_ire_intr_handler(int irq, void *data)
+{
+	struct octep_device *oct = data;
+
+	return oct->hw_ops.ire_intr_handler(oct);
+}
+
+/**
+ * octep_ore_intr_handler() - common handler for output ring error interrupts.
+ *
+ * @irq: Interrupt number.
+ * @data: interrupt data.
+ *
+ * this is common handler for output ring error interrupts.
+ */
+static irqreturn_t octep_ore_intr_handler(int irq, void *data)
+{
+	struct octep_device *oct = data;
+
+	return oct->hw_ops.ore_intr_handler(oct);
+}
+
+/**
+ * octep_vfire_intr_handler() - common handler for vf input ring error interrupts.
+ *
+ * @irq: Interrupt number.
+ * @data: interrupt data.
+ *
+ * this is common handler for vf input ring error interrupts.
+ */
+static irqreturn_t octep_vfire_intr_handler(int irq, void *data)
+{
+	struct octep_device *oct = data;
+
+	return oct->hw_ops.vfire_intr_handler(oct);
+}
+
+/**
+ * octep_vfore_intr_handler() - common handler for vf output ring error interrupts.
+ *
+ * @irq: Interrupt number.
+ * @data: interrupt data.
+ *
+ * this is common handler for vf output ring error interrupts.
+ */
+static irqreturn_t octep_vfore_intr_handler(int irq, void *data)
+{
+	struct octep_device *oct = data;
+
+	return oct->hw_ops.vfore_intr_handler(oct);
+}
+
+/**
+ * octep_dma_intr_handler() - common handler for dpi dma related interrupts.
+ *
+ * @irq: Interrupt number.
+ * @data: interrupt data.
+ *
+ * this is common handler for dpi dma related interrupts.
+ */
+static irqreturn_t octep_dma_intr_handler(int irq, void *data)
+{
+	struct octep_device *oct = data;
+
+	return oct->hw_ops.dma_intr_handler(oct);
+}
+
+/**
+ * octep_dma_vf_intr_handler() - common handler for dpi dma transaction error interrupts for VFs.
+ *
+ * @irq: Interrupt number.
+ * @data: interrupt data.
+ *
+ * this is common handler for dpi dma transaction error interrupts for VFs.
  */
-static irqreturn_t octep_non_ioq_intr_handler(int irq, void *data)
+static irqreturn_t octep_dma_vf_intr_handler(int irq, void *data)
 {
 	struct octep_device *oct = data;
 
-	return oct->hw_ops.non_ioq_intr_handler(oct);
+	return oct->hw_ops.dma_vf_intr_handler(oct);
+}
+
+/**
+ * octep_pp_vf_intr_handler() - common handler for pp transaction error interrupts for VFs.
+ *
+ * @irq: Interrupt number.
+ * @data: interrupt data.
+ *
+ * this is common handler for pp transaction error interrupts for VFs.
+ */
+static irqreturn_t octep_pp_vf_intr_handler(int irq, void *data)
+{
+	struct octep_device *oct = data;
+
+	return oct->hw_ops.pp_vf_intr_handler(oct);
+}
+
+/**
+ * octep_misc_intr_handler() - common handler for mac related interrupts.
+ *
+ * @irq: Interrupt number.
+ * @data: interrupt data.
+ *
+ * this is common handler for mac related interrupts.
+ */
+static irqreturn_t octep_misc_intr_handler(int irq, void *data)
+{
+	struct octep_device *oct = data;
+
+	return oct->hw_ops.misc_intr_handler(oct);
+}
+
+/**
+ * octep_rsvd_intr_handler() - common handler for reserved interrupts (future use).
+ *
+ * @irq: Interrupt number.
+ * @data: interrupt data.
+ *
+ * this is common handler for all reserved interrupts.
+ */
+static irqreturn_t octep_rsvd_intr_handler(int irq, void *data)
+{
+	struct octep_device *oct = data;
+
+	return oct->hw_ops.rsvd_intr_handler(oct);
 }
 
 /**
@@ -222,9 +357,57 @@ static int octep_request_irqs(struct octep_device *oct)
 
 		snprintf(irq_name, OCTEP_MSIX_NAME_SIZE,
 			 "%s-%s", netdev->name, non_ioq_msix_names[i]);
-		ret = request_irq(msix_entry->vector,
-				  octep_non_ioq_intr_handler, 0,
-				  irq_name, oct);
+		if (!strncmp(non_ioq_msix_names[i], "epf_oei_rint",
+			     strlen("epf_oei_rint"))) {
+			ret = request_irq(msix_entry->vector,
+					  octep_oei_intr_handler, 0,
+					  irq_name, oct);
+		} else if (!strncmp(non_ioq_msix_names[i], "epf_ire_rint",
+			   strlen("epf_ire_rint"))) {
+			ret = request_irq(msix_entry->vector,
+					  octep_ire_intr_handler, 0,
+					  irq_name, oct);
+		} else if (!strncmp(non_ioq_msix_names[i], "epf_ore_rint",
+			   strlen("epf_ore_rint"))) {
+			ret = request_irq(msix_entry->vector,
+					  octep_ore_intr_handler, 0,
+					  irq_name, oct);
+		} else if (!strncmp(non_ioq_msix_names[i], "epf_vfire_rint",
+			   strlen("epf_vfire_rint"))) {
+			ret = request_irq(msix_entry->vector,
+					  octep_vfire_intr_handler, 0,
+					  irq_name, oct);
+		} else if (!strncmp(non_ioq_msix_names[i], "epf_vfore_rint",
+			   strlen("epf_vfore_rint"))) {
+			ret = request_irq(msix_entry->vector,
+					  octep_vfore_intr_handler, 0,
+					  irq_name, oct);
+		} else if (!strncmp(non_ioq_msix_names[i], "epf_dma_rint",
+			   strlen("epf_dma_rint"))) {
+			ret = request_irq(msix_entry->vector,
+					  octep_dma_intr_handler, 0,
+					  irq_name, oct);
+		} else if (!strncmp(non_ioq_msix_names[i], "epf_dma_vf_rint",
+			   strlen("epf_dma_vf_rint"))) {
+			ret = request_irq(msix_entry->vector,
+					  octep_dma_vf_intr_handler, 0,
+					  irq_name, oct);
+		} else if (!strncmp(non_ioq_msix_names[i], "epf_pp_vf_rint",
+			   strlen("epf_pp_vf_rint"))) {
+			ret = request_irq(msix_entry->vector,
+					  octep_pp_vf_intr_handler, 0,
+					  irq_name, oct);
+		} else if (!strncmp(non_ioq_msix_names[i], "epf_misc_rint",
+			   strlen("epf_misc_rint"))) {
+			ret = request_irq(msix_entry->vector,
+					  octep_misc_intr_handler, 0,
+					  irq_name, oct);
+		} else {
+			ret = request_irq(msix_entry->vector,
+					  octep_rsvd_intr_handler, 0,
+					  irq_name, oct);
+		}
+
 		if (ret) {
 			netdev_err(netdev,
 				   "request_irq failed for %s; err=%d",
@@ -917,9 +1100,9 @@ static void octep_hb_timeout_task(struct work_struct *work)
 	int miss_cnt;
 
 	miss_cnt = atomic_inc_return(&oct->hb_miss_cnt);
-	if (miss_cnt < oct->conf->max_hb_miss_cnt) {
+	if (miss_cnt < oct->conf->fw_info.hb_miss_count) {
 		queue_delayed_work(octep_wq, &oct->hb_task,
-				   msecs_to_jiffies(oct->conf->hb_interval * 1000));
+				   msecs_to_jiffies(oct->conf->fw_info.hb_interval));
 		return;
 	}
 
@@ -1012,8 +1195,7 @@ int octep_device_setup(struct octep_device *oct)
 
 	atomic_set(&oct->hb_miss_cnt, 0);
 	INIT_DELAYED_WORK(&oct->hb_task, octep_hb_timeout_task);
-	queue_delayed_work(octep_wq, &oct->hb_task,
-			   msecs_to_jiffies(oct->conf->hb_interval * 1000));
+
 	return 0;
 
 unsupported_dev:
@@ -1142,6 +1324,15 @@ static int octep_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		dev_err(&pdev->dev, "Device setup failed\n");
 		goto err_octep_config;
 	}
+
+	octep_ctrl_net_get_info(octep_dev, OCTEP_CTRL_NET_INVALID_VFID,
+				&octep_dev->conf->fw_info);
+	dev_info(&octep_dev->pdev->dev, "Heartbeat interval %u msecs Heartbeat miss count %u\n",
+		 octep_dev->conf->fw_info.hb_interval,
+		 octep_dev->conf->fw_info.hb_miss_count);
+	queue_delayed_work(octep_wq, &octep_dev->hb_task,
+			   msecs_to_jiffies(octep_dev->conf->fw_info.hb_interval));
+
 	INIT_WORK(&octep_dev->tx_timeout_task, octep_tx_timeout_task);
 	INIT_WORK(&octep_dev->ctrl_mbox_task, octep_ctrl_mbox_task);
 	INIT_DELAYED_WORK(&octep_dev->intr_poll_task, octep_intr_poll_task);
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h
index e0907a719133..6df902ebb7f3 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h
@@ -65,7 +65,16 @@ struct octep_hw_ops {
 	void (*setup_oq_regs)(struct octep_device *oct, int q);
 	void (*setup_mbox_regs)(struct octep_device *oct, int mbox);
 
-	irqreturn_t (*non_ioq_intr_handler)(void *ioq_vector);
+	irqreturn_t (*oei_intr_handler)(void *ioq_vector);
+	irqreturn_t (*ire_intr_handler)(void *ioq_vector);
+	irqreturn_t (*ore_intr_handler)(void *ioq_vector);
+	irqreturn_t (*vfire_intr_handler)(void *ioq_vector);
+	irqreturn_t (*vfore_intr_handler)(void *ioq_vector);
+	irqreturn_t (*dma_intr_handler)(void *ioq_vector);
+	irqreturn_t (*dma_vf_intr_handler)(void *ioq_vector);
+	irqreturn_t (*pp_vf_intr_handler)(void *ioq_vector);
+	irqreturn_t (*misc_intr_handler)(void *ioq_vector);
+	irqreturn_t (*rsvd_intr_handler)(void *ioq_vector);
 	irqreturn_t (*ioq_intr_handler)(void *ioq_vector);
 	int (*soft_reset)(struct octep_device *oct);
 	void (*reinit_regs)(struct octep_device *oct);
@@ -73,7 +82,7 @@ struct octep_hw_ops {
 
 	void (*enable_interrupts)(struct octep_device *oct);
 	void (*disable_interrupts)(struct octep_device *oct);
-	bool (*poll_non_ioq_interrupts)(struct octep_device *oct);
+	void (*poll_non_ioq_interrupts)(struct octep_device *oct);
 
 	void (*enable_io_queues)(struct octep_device *oct);
 	void (*disable_io_queues)(struct octep_device *oct);
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h
index b25c3093dc7b..0a43983e9101 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h
@@ -370,4 +370,8 @@
 /* bit 1 for firmware heartbeat interrupt */
 #define CN93_SDP_EPF_OEI_RINT_DATA_BIT_HBEAT	BIT_ULL(1)
 
+#define CN93_PEM_BAR4_INDEX            7
+#define CN93_PEM_BAR4_INDEX_SIZE       0x400000ULL
+#define CN93_PEM_BAR4_INDEX_OFFSET     (CN93_PEM_BAR4_INDEX * CN93_PEM_BAR4_INDEX_SIZE)
+
 #endif /* _OCTEP_REGS_CN9K_PF_H_ */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index 6b5b06c2b4e9..c5bd7747fc0e 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -1574,7 +1574,7 @@ enum ptp_op {
 	PTP_OP_GET_CLOCK = 1,
 	PTP_OP_GET_TSTMP = 2,
 	PTP_OP_SET_THRESH = 3,
-	PTP_OP_EXTTS_ON = 4,
+	PTP_OP_PPS_ON = 4,
 	PTP_OP_ADJTIME = 5,
 	PTP_OP_SET_CLOCK = 6,
 };
@@ -1584,7 +1584,8 @@ struct ptp_req {
 	u8 op;
 	s64 scaled_ppm;
 	u64 thresh;
-	int extts_on;
+	u64 period;
+	int pps_on;
 	s64 delta;
 	u64 clk;
 };
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/ptp.c b/drivers/net/ethernet/marvell/octeontx2/af/ptp.c
index ffbd22797163..bcc96eed2481 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/ptp.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/ptp.c
@@ -46,6 +46,7 @@
 
 #define PTP_PPS_HI_INCR				0xF60ULL
 #define PTP_PPS_LO_INCR				0xF68ULL
+#define PTP_PPS_THRESH_LO			0xF50ULL
 #define PTP_PPS_THRESH_HI			0xF58ULL
 
 #define PTP_CLOCK_LO				0xF08ULL
@@ -411,29 +412,12 @@ void ptp_start(struct rvu *rvu, u64 sclk, u32 ext_clk_freq, u32 extts)
 	}
 
 	clock_cfg |= PTP_CLOCK_CFG_PTP_EN;
-	clock_cfg |= PTP_CLOCK_CFG_PPS_EN | PTP_CLOCK_CFG_PPS_INV;
 	writeq(clock_cfg, ptp->reg_base + PTP_CLOCK_CFG);
 	clock_cfg = readq(ptp->reg_base + PTP_CLOCK_CFG);
 	clock_cfg &= ~PTP_CLOCK_CFG_ATOMIC_OP_MASK;
 	clock_cfg |= (ATOMIC_SET << 26);
 	writeq(clock_cfg, ptp->reg_base + PTP_CLOCK_CFG);
 
-	/* Set 50% duty cycle for 1Hz output */
-	writeq(0x1dcd650000000000, ptp->reg_base + PTP_PPS_HI_INCR);
-	writeq(0x1dcd650000000000, ptp->reg_base + PTP_PPS_LO_INCR);
-	if (cn10k_ptp_errata(ptp)) {
-		/* The ptp_clock_hi rollsover to zero once clock cycle before it
-		 * reaches one second boundary. so, program the pps_lo_incr in
-		 * such a way that the pps threshold value comparison at one
-		 * second boundary will succeed and pps edge changes. After each
-		 * one second boundary, the hrtimer handler will be invoked and
-		 * reprograms the pps threshold value.
-		 */
-		ptp->clock_period = NSEC_PER_SEC / ptp->clock_rate;
-		writeq((0x1dcd6500ULL - ptp->clock_period) << 32,
-		       ptp->reg_base + PTP_PPS_LO_INCR);
-	}
-
 	if (cn10k_ptp_errata(ptp))
 		clock_comp = ptp_calc_adjusted_comp(ptp->clock_rate);
 	else
@@ -465,20 +449,68 @@ static int ptp_set_thresh(struct ptp *ptp, u64 thresh)
 	return 0;
 }
 
-static int ptp_extts_on(struct ptp *ptp, int on)
+static int ptp_config_hrtimer(struct ptp *ptp, int on)
 {
 	u64 ptp_clock_hi;
 
-	if (cn10k_ptp_errata(ptp)) {
-		if (on) {
-			ptp_clock_hi = readq(ptp->reg_base + PTP_CLOCK_HI);
-			ptp_hrtimer_start(ptp, (ktime_t)ptp_clock_hi);
-		} else {
-			if (hrtimer_active(&ptp->hrtimer))
-				hrtimer_cancel(&ptp->hrtimer);
+	if (on) {
+		ptp_clock_hi = readq(ptp->reg_base + PTP_CLOCK_HI);
+		ptp_hrtimer_start(ptp, (ktime_t)ptp_clock_hi);
+	} else {
+		if (hrtimer_active(&ptp->hrtimer))
+			hrtimer_cancel(&ptp->hrtimer);
+	}
+
+	return 0;
+}
+
+static int ptp_pps_on(struct ptp *ptp, int on, u64 period)
+{
+	u64 clock_cfg;
+
+	clock_cfg = readq(ptp->reg_base + PTP_CLOCK_CFG);
+	if (on) {
+		if (cn10k_ptp_errata(ptp) && period != NSEC_PER_SEC) {
+			dev_err(&ptp->pdev->dev, "Supports max period value as 1 second\n");
+			return -EINVAL;
 		}
+
+		if (period > (8 * NSEC_PER_SEC)) {
+			dev_err(&ptp->pdev->dev, "Supports max period as 8 seconds\n");
+			return -EINVAL;
+		}
+
+		clock_cfg |= PTP_CLOCK_CFG_PPS_EN | PTP_CLOCK_CFG_PPS_INV;
+		writeq(clock_cfg, ptp->reg_base + PTP_CLOCK_CFG);
+
+		writeq(0, ptp->reg_base + PTP_PPS_THRESH_HI);
+		writeq(0, ptp->reg_base + PTP_PPS_THRESH_LO);
+
+		/* Configure high/low phase time */
+		period = period / 2;
+		writeq(((u64)period << 32), ptp->reg_base + PTP_PPS_HI_INCR);
+		writeq(((u64)period << 32), ptp->reg_base + PTP_PPS_LO_INCR);
+	} else {
+		clock_cfg &= ~(PTP_CLOCK_CFG_PPS_EN | PTP_CLOCK_CFG_PPS_INV);
+		writeq(clock_cfg, ptp->reg_base + PTP_CLOCK_CFG);
 	}
 
+	if (on && cn10k_ptp_errata(ptp)) {
+		/* The ptp_clock_hi rollsover to zero once clock cycle before it
+		 * reaches one second boundary. so, program the pps_lo_incr in
+		 * such a way that the pps threshold value comparison at one
+		 * second boundary will succeed and pps edge changes. After each
+		 * one second boundary, the hrtimer handler will be invoked and
+		 * reprograms the pps threshold value.
+		 */
+		ptp->clock_period = NSEC_PER_SEC / ptp->clock_rate;
+		writeq((0x1dcd6500ULL - ptp->clock_period) << 32,
+		       ptp->reg_base + PTP_PPS_LO_INCR);
+	}
+
+	if (cn10k_ptp_errata(ptp))
+		ptp_config_hrtimer(ptp, on);
+
 	return 0;
 }
 
@@ -613,8 +645,8 @@ int rvu_mbox_handler_ptp_op(struct rvu *rvu, struct ptp_req *req,
 	case PTP_OP_SET_THRESH:
 		err = ptp_set_thresh(rvu->ptp, req->thresh);
 		break;
-	case PTP_OP_EXTTS_ON:
-		err = ptp_extts_on(rvu->ptp, req->extts_on);
+	case PTP_OP_PPS_ON:
+		err = ptp_pps_on(rvu->ptp, req->pps_on, req->period);
 		break;
 	case PTP_OP_ADJTIME:
 		ptp_atomic_adjtime(rvu->ptp, req->delta);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c
index 3a72b0793d4a..63130ba37e9d 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c
@@ -175,7 +175,7 @@ static int ptp_set_thresh(struct otx2_ptp *ptp, u64 thresh)
 	return otx2_sync_mbox_msg(&ptp->nic->mbox);
 }
 
-static int ptp_extts_on(struct otx2_ptp *ptp, int on)
+static int ptp_pps_on(struct otx2_ptp *ptp, int on, u64 period)
 {
 	struct ptp_req *req;
 
@@ -186,8 +186,9 @@ static int ptp_extts_on(struct otx2_ptp *ptp, int on)
 	if (!req)
 		return -ENOMEM;
 
-	req->op = PTP_OP_EXTTS_ON;
-	req->extts_on = on;
+	req->op = PTP_OP_PPS_ON;
+	req->pps_on = on;
+	req->period = period;
 
 	return otx2_sync_mbox_msg(&ptp->nic->mbox);
 }
@@ -276,8 +277,8 @@ static int otx2_ptp_verify_pin(struct ptp_clock_info *ptp, unsigned int pin,
 	switch (func) {
 	case PTP_PF_NONE:
 	case PTP_PF_EXTTS:
-		break;
 	case PTP_PF_PEROUT:
+		break;
 	case PTP_PF_PHYSYNC:
 		return -1;
 	}
@@ -340,6 +341,7 @@ static int otx2_ptp_enable(struct ptp_clock_info *ptp_info,
 {
 	struct otx2_ptp *ptp = container_of(ptp_info, struct otx2_ptp,
 					    ptp_info);
+	u64 period = 0;
 	int pin;
 
 	if (!ptp->nic)
@@ -351,12 +353,24 @@ static int otx2_ptp_enable(struct ptp_clock_info *ptp_info,
 				   rq->extts.index);
 		if (pin < 0)
 			return -EBUSY;
-		if (on) {
-			ptp_extts_on(ptp, on);
+		if (on)
 			schedule_delayed_work(&ptp->extts_work, msecs_to_jiffies(200));
-		} else {
-			ptp_extts_on(ptp, on);
+		else
 			cancel_delayed_work_sync(&ptp->extts_work);
+
+		return 0;
+	case PTP_CLK_REQ_PEROUT:
+		if (rq->perout.flags)
+			return -EOPNOTSUPP;
+
+		if (rq->perout.index >= ptp_info->n_pins)
+			return -EINVAL;
+		if (on) {
+			period = rq->perout.period.sec * NSEC_PER_SEC +
+				 rq->perout.period.nsec;
+			ptp_pps_on(ptp, on, period);
+		} else {
+			ptp_pps_on(ptp, on, period);
 		}
 		return 0;
 	default:
@@ -411,6 +425,7 @@ int otx2_ptp_init(struct otx2_nic *pfvf)
 		.name           = "OcteonTX2 PTP",
 		.max_adj        = 1000000000ull,
 		.n_ext_ts       = 1,
+		.n_per_out      = 1,
 		.n_pins         = 1,
 		.pps            = 0,
 		.pin_config     = &ptp_ptr->extts_config,
diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c
index d5691b6a2bc5..dd6ca2e4fd51 100644
--- a/drivers/net/ethernet/marvell/pxa168_eth.c
+++ b/drivers/net/ethernet/marvell/pxa168_eth.c
@@ -1528,7 +1528,7 @@ err_clk:
 	return err;
 }
 
-static int pxa168_eth_remove(struct platform_device *pdev)
+static void pxa168_eth_remove(struct platform_device *pdev)
 {
 	struct net_device *dev = platform_get_drvdata(pdev);
 	struct pxa168_eth_private *pep = netdev_priv(dev);
@@ -1547,7 +1547,6 @@ static int pxa168_eth_remove(struct platform_device *pdev)
 	mdiobus_free(pep->smi_bus);
 	unregister_netdev(dev);
 	free_netdev(dev);
-	return 0;
 }
 
 static void pxa168_eth_shutdown(struct platform_device *pdev)
@@ -1580,7 +1579,7 @@ MODULE_DEVICE_TABLE(of, pxa168_eth_of_match);
 
 static struct platform_driver pxa168_eth_driver = {
 	.probe = pxa168_eth_probe,
-	.remove = pxa168_eth_remove,
+	.remove_new = pxa168_eth_remove,
 	.shutdown = pxa168_eth_shutdown,
 	.resume = pxa168_eth_resume,
 	.suspend = pxa168_eth_suspend,
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 20afe79f380a..7669b446915a 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -197,6 +197,7 @@ static const struct mtk_reg_map mt7988_reg_map = {
 	.wdma_base = {
 		[0]		= 0x4800,
 		[1]		= 0x4c00,
+		[2]		= 0x5000,
 	},
 	.pse_iq_sta		= 0x0180,
 	.pse_oq_sta		= 0x01a0,
@@ -5002,7 +5003,7 @@ err_destroy_sgmii:
 	return err;
 }
 
-static int mtk_remove(struct platform_device *pdev)
+static void mtk_remove(struct platform_device *pdev)
 {
 	struct mtk_eth *eth = platform_get_drvdata(pdev);
 	struct mtk_mac *mac;
@@ -5024,8 +5025,6 @@ static int mtk_remove(struct platform_device *pdev)
 	netif_napi_del(&eth->rx_napi);
 	mtk_cleanup(eth);
 	mtk_mdio_cleanup(eth);
-
-	return 0;
 }
 
 static const struct mtk_soc_data mt2701_data = {
@@ -5226,7 +5225,7 @@ MODULE_DEVICE_TABLE(of, of_mtk_match);
 
 static struct platform_driver mtk_driver = {
 	.probe = mtk_probe,
-	.remove = mtk_remove,
+	.remove_new = mtk_remove,
 	.driver = {
 		.name = "mtk_soc_eth",
 		.of_match_table = of_mtk_match,
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index 403219d987ef..9ae3b8a71d0e 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -1132,7 +1132,7 @@ struct mtk_reg_map {
 	u32	gdm1_cnt;
 	u32	gdma_to_ppe;
 	u32	ppe_base;
-	u32	wdma_base[2];
+	u32	wdma_base[3];
 	u32	pse_iq_sta;
 	u32	pse_oq_sta;
 };
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c
index 86f32f486043..b2a5d9c3733d 100644
--- a/drivers/net/ethernet/mediatek/mtk_ppe.c
+++ b/drivers/net/ethernet/mediatek/mtk_ppe.c
@@ -425,7 +425,8 @@ int mtk_foe_entry_set_pppoe(struct mtk_eth *eth, struct mtk_foe_entry *entry,
 }
 
 int mtk_foe_entry_set_wdma(struct mtk_eth *eth, struct mtk_foe_entry *entry,
-			   int wdma_idx, int txq, int bss, int wcid)
+			   int wdma_idx, int txq, int bss, int wcid,
+			   bool amsdu_en)
 {
 	struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(eth, entry);
 	u32 *ib2 = mtk_foe_entry_ib2(eth, entry);
@@ -437,6 +438,7 @@ int mtk_foe_entry_set_wdma(struct mtk_eth *eth, struct mtk_foe_entry *entry,
 			 MTK_FOE_IB2_WDMA_WINFO_V2;
 		l2->w3info = FIELD_PREP(MTK_FOE_WINFO_WCID_V3, wcid) |
 			     FIELD_PREP(MTK_FOE_WINFO_BSS_V3, bss);
+		l2->amsdu = FIELD_PREP(MTK_FOE_WINFO_AMSDU_EN, amsdu_en);
 		break;
 	case 2:
 		*ib2 &= ~MTK_FOE_IB2_PORT_MG_V2;
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.h b/drivers/net/ethernet/mediatek/mtk_ppe.h
index e3d0ec72bc69..691806bca372 100644
--- a/drivers/net/ethernet/mediatek/mtk_ppe.h
+++ b/drivers/net/ethernet/mediatek/mtk_ppe.h
@@ -88,13 +88,13 @@ enum {
 #define MTK_FOE_WINFO_BSS_V3		GENMASK(23, 16)
 #define MTK_FOE_WINFO_WCID_V3		GENMASK(15, 0)
 
-#define MTK_FOE_WINFO_PAO_USR_INFO	GENMASK(15, 0)
-#define MTK_FOE_WINFO_PAO_TID		GENMASK(19, 16)
-#define MTK_FOE_WINFO_PAO_IS_FIXEDRATE	BIT(20)
-#define MTK_FOE_WINFO_PAO_IS_PRIOR	BIT(21)
-#define MTK_FOE_WINFO_PAO_IS_SP		BIT(22)
-#define MTK_FOE_WINFO_PAO_HF		BIT(23)
-#define MTK_FOE_WINFO_PAO_AMSDU_EN	BIT(24)
+#define MTK_FOE_WINFO_AMSDU_USR_INFO	GENMASK(15, 0)
+#define MTK_FOE_WINFO_AMSDU_TID		GENMASK(19, 16)
+#define MTK_FOE_WINFO_AMSDU_IS_FIXEDRATE	BIT(20)
+#define MTK_FOE_WINFO_AMSDU_IS_PRIOR	BIT(21)
+#define MTK_FOE_WINFO_AMSDU_IS_SP	BIT(22)
+#define MTK_FOE_WINFO_AMSDU_HF		BIT(23)
+#define MTK_FOE_WINFO_AMSDU_EN		BIT(24)
 
 enum {
 	MTK_FOE_STATE_INVALID,
@@ -123,7 +123,7 @@ struct mtk_foe_mac_info {
 
 	/* netsys_v3 */
 	u32 w3info;
-	u32 wpao;
+	u32 amsdu;
 };
 
 /* software-only entry type */
@@ -392,7 +392,8 @@ int mtk_foe_entry_set_vlan(struct mtk_eth *eth, struct mtk_foe_entry *entry,
 int mtk_foe_entry_set_pppoe(struct mtk_eth *eth, struct mtk_foe_entry *entry,
 			    int sid);
 int mtk_foe_entry_set_wdma(struct mtk_eth *eth, struct mtk_foe_entry *entry,
-			   int wdma_idx, int txq, int bss, int wcid);
+			   int wdma_idx, int txq, int bss, int wcid,
+			   bool amsdu_en);
 int mtk_foe_entry_set_queue(struct mtk_eth *eth, struct mtk_foe_entry *entry,
 			    unsigned int queue);
 int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_flow_entry *entry);
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
index a4efbeb16208..e073d2b5542c 100644
--- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
+++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
@@ -111,6 +111,7 @@ mtk_flow_get_wdma_info(struct net_device *dev, const u8 *addr, struct mtk_wdma_i
 	info->queue = path->mtk_wdma.queue;
 	info->bss = path->mtk_wdma.bss;
 	info->wcid = path->mtk_wdma.wcid;
+	info->amsdu = path->mtk_wdma.amsdu;
 
 	return 0;
 }
@@ -192,14 +193,17 @@ mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe,
 
 	if (mtk_flow_get_wdma_info(dev, dest_mac, &info) == 0) {
 		mtk_foe_entry_set_wdma(eth, foe, info.wdma_idx, info.queue,
-				       info.bss, info.wcid);
+				       info.bss, info.wcid, info.amsdu);
 		if (mtk_is_netsys_v2_or_greater(eth)) {
 			switch (info.wdma_idx) {
 			case 0:
-				pse_port = 8;
+				pse_port = PSE_WDMA0_PORT;
 				break;
 			case 1:
-				pse_port = 9;
+				pse_port = PSE_WDMA1_PORT;
+				break;
+			case 2:
+				pse_port = PSE_WDMA2_PORT;
 				break;
 			default:
 				return -EINVAL;
diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c
index 94376aa2b34c..9a6744c0d458 100644
--- a/drivers/net/ethernet/mediatek/mtk_wed.c
+++ b/drivers/net/ethernet/mediatek/mtk_wed.c
@@ -17,17 +17,21 @@
 #include <net/flow_offload.h>
 #include <net/pkt_cls.h>
 #include "mtk_eth_soc.h"
-#include "mtk_wed_regs.h"
 #include "mtk_wed.h"
 #include "mtk_ppe.h"
 #include "mtk_wed_wo.h"
 
 #define MTK_PCIE_BASE(n)		(0x1a143000 + (n) * 0x2000)
 
-#define MTK_WED_PKT_SIZE		1900
+#define MTK_WED_PKT_SIZE		1920
 #define MTK_WED_BUF_SIZE		2048
+#define MTK_WED_PAGE_BUF_SIZE		128
 #define MTK_WED_BUF_PER_PAGE		(PAGE_SIZE / 2048)
+#define MTK_WED_RX_BUF_PER_PAGE		(PAGE_SIZE / MTK_WED_PAGE_BUF_SIZE)
 #define MTK_WED_RX_RING_SIZE		1536
+#define MTK_WED_RX_PG_BM_CNT		8192
+#define MTK_WED_AMSDU_BUF_SIZE		(PAGE_SIZE << 4)
+#define MTK_WED_AMSDU_NPAGES		32
 
 #define MTK_WED_TX_RING_SIZE		2048
 #define MTK_WED_WDMA_RING_SIZE		1024
@@ -41,7 +45,10 @@
 #define MTK_WED_RRO_QUE_CNT		8192
 #define MTK_WED_MIOD_ENTRY_CNT		128
 
-static struct mtk_wed_hw *hw_list[2];
+#define MTK_WED_TX_BM_DMA_SIZE		65536
+#define MTK_WED_TX_BM_PKT_CNT		32768
+
+static struct mtk_wed_hw *hw_list[3];
 static DEFINE_MUTEX(hw_lock);
 
 struct mtk_wed_flow_block_priv {
@@ -49,6 +56,39 @@ struct mtk_wed_flow_block_priv {
 	struct net_device *dev;
 };
 
+static const struct mtk_wed_soc_data mt7622_data = {
+	.regmap = {
+		.tx_bm_tkid		= 0x088,
+		.wpdma_rx_ring0		= 0x770,
+		.reset_idx_tx_mask	= GENMASK(3, 0),
+		.reset_idx_rx_mask	= GENMASK(17, 16),
+	},
+	.tx_ring_desc_size = sizeof(struct mtk_wdma_desc),
+	.wdma_desc_size = sizeof(struct mtk_wdma_desc),
+};
+
+static const struct mtk_wed_soc_data mt7986_data = {
+	.regmap = {
+		.tx_bm_tkid		= 0x0c8,
+		.wpdma_rx_ring0		= 0x770,
+		.reset_idx_tx_mask	= GENMASK(1, 0),
+		.reset_idx_rx_mask	= GENMASK(7, 6),
+	},
+	.tx_ring_desc_size = sizeof(struct mtk_wdma_desc),
+	.wdma_desc_size = 2 * sizeof(struct mtk_wdma_desc),
+};
+
+static const struct mtk_wed_soc_data mt7988_data = {
+	.regmap = {
+		.tx_bm_tkid		= 0x0c8,
+		.wpdma_rx_ring0		= 0x7d0,
+		.reset_idx_tx_mask	= GENMASK(1, 0),
+		.reset_idx_rx_mask	= GENMASK(7, 6),
+	},
+	.tx_ring_desc_size = sizeof(struct mtk_wed_bm_desc),
+	.wdma_desc_size = 2 * sizeof(struct mtk_wdma_desc),
+};
+
 static void
 wed_m32(struct mtk_wed_device *dev, u32 reg, u32 mask, u32 val)
 {
@@ -109,6 +149,90 @@ mtk_wdma_read_reset(struct mtk_wed_device *dev)
 	return wdma_r32(dev, MTK_WDMA_GLO_CFG);
 }
 
+static void
+mtk_wdma_v3_rx_reset(struct mtk_wed_device *dev)
+{
+	u32 status;
+
+	if (!mtk_wed_is_v3_or_greater(dev->hw))
+		return;
+
+	wdma_clr(dev, MTK_WDMA_PREF_TX_CFG, MTK_WDMA_PREF_TX_CFG_PREF_EN);
+	wdma_clr(dev, MTK_WDMA_PREF_RX_CFG, MTK_WDMA_PREF_RX_CFG_PREF_EN);
+
+	if (read_poll_timeout(wdma_r32, status,
+			      !(status & MTK_WDMA_PREF_TX_CFG_PREF_BUSY),
+			      0, 10000, false, dev, MTK_WDMA_PREF_TX_CFG))
+		dev_err(dev->hw->dev, "rx reset failed\n");
+
+	if (read_poll_timeout(wdma_r32, status,
+			      !(status & MTK_WDMA_PREF_RX_CFG_PREF_BUSY),
+			      0, 10000, false, dev, MTK_WDMA_PREF_RX_CFG))
+		dev_err(dev->hw->dev, "rx reset failed\n");
+
+	wdma_clr(dev, MTK_WDMA_WRBK_TX_CFG, MTK_WDMA_WRBK_TX_CFG_WRBK_EN);
+	wdma_clr(dev, MTK_WDMA_WRBK_RX_CFG, MTK_WDMA_WRBK_RX_CFG_WRBK_EN);
+
+	if (read_poll_timeout(wdma_r32, status,
+			      !(status & MTK_WDMA_WRBK_TX_CFG_WRBK_BUSY),
+			      0, 10000, false, dev, MTK_WDMA_WRBK_TX_CFG))
+		dev_err(dev->hw->dev, "rx reset failed\n");
+
+	if (read_poll_timeout(wdma_r32, status,
+			      !(status & MTK_WDMA_WRBK_RX_CFG_WRBK_BUSY),
+			      0, 10000, false, dev, MTK_WDMA_WRBK_RX_CFG))
+		dev_err(dev->hw->dev, "rx reset failed\n");
+
+	/* prefetch FIFO */
+	wdma_w32(dev, MTK_WDMA_PREF_RX_FIFO_CFG,
+		 MTK_WDMA_PREF_RX_FIFO_CFG_RING0_CLEAR |
+		 MTK_WDMA_PREF_RX_FIFO_CFG_RING1_CLEAR);
+	wdma_clr(dev, MTK_WDMA_PREF_RX_FIFO_CFG,
+		 MTK_WDMA_PREF_RX_FIFO_CFG_RING0_CLEAR |
+		 MTK_WDMA_PREF_RX_FIFO_CFG_RING1_CLEAR);
+
+	/* core FIFO */
+	wdma_w32(dev, MTK_WDMA_XDMA_RX_FIFO_CFG,
+		 MTK_WDMA_XDMA_RX_FIFO_CFG_RX_PAR_FIFO_CLEAR |
+		 MTK_WDMA_XDMA_RX_FIFO_CFG_RX_CMD_FIFO_CLEAR |
+		 MTK_WDMA_XDMA_RX_FIFO_CFG_RX_DMAD_FIFO_CLEAR |
+		 MTK_WDMA_XDMA_RX_FIFO_CFG_RX_ARR_FIFO_CLEAR |
+		 MTK_WDMA_XDMA_RX_FIFO_CFG_RX_LEN_FIFO_CLEAR |
+		 MTK_WDMA_XDMA_RX_FIFO_CFG_RX_WID_FIFO_CLEAR |
+		 MTK_WDMA_XDMA_RX_FIFO_CFG_RX_BID_FIFO_CLEAR);
+	wdma_clr(dev, MTK_WDMA_XDMA_RX_FIFO_CFG,
+		 MTK_WDMA_XDMA_RX_FIFO_CFG_RX_PAR_FIFO_CLEAR |
+		 MTK_WDMA_XDMA_RX_FIFO_CFG_RX_CMD_FIFO_CLEAR |
+		 MTK_WDMA_XDMA_RX_FIFO_CFG_RX_DMAD_FIFO_CLEAR |
+		 MTK_WDMA_XDMA_RX_FIFO_CFG_RX_ARR_FIFO_CLEAR |
+		 MTK_WDMA_XDMA_RX_FIFO_CFG_RX_LEN_FIFO_CLEAR |
+		 MTK_WDMA_XDMA_RX_FIFO_CFG_RX_WID_FIFO_CLEAR |
+		 MTK_WDMA_XDMA_RX_FIFO_CFG_RX_BID_FIFO_CLEAR);
+
+	/* writeback FIFO */
+	wdma_w32(dev, MTK_WDMA_WRBK_RX_FIFO_CFG(0),
+		 MTK_WDMA_WRBK_RX_FIFO_CFG_RING_CLEAR);
+	wdma_w32(dev, MTK_WDMA_WRBK_RX_FIFO_CFG(1),
+		 MTK_WDMA_WRBK_RX_FIFO_CFG_RING_CLEAR);
+
+	wdma_clr(dev, MTK_WDMA_WRBK_RX_FIFO_CFG(0),
+		 MTK_WDMA_WRBK_RX_FIFO_CFG_RING_CLEAR);
+	wdma_clr(dev, MTK_WDMA_WRBK_RX_FIFO_CFG(1),
+		 MTK_WDMA_WRBK_RX_FIFO_CFG_RING_CLEAR);
+
+	/* prefetch ring status */
+	wdma_w32(dev, MTK_WDMA_PREF_SIDX_CFG,
+		 MTK_WDMA_PREF_SIDX_CFG_RX_RING_CLEAR);
+	wdma_clr(dev, MTK_WDMA_PREF_SIDX_CFG,
+		 MTK_WDMA_PREF_SIDX_CFG_RX_RING_CLEAR);
+
+	/* writeback ring status */
+	wdma_w32(dev, MTK_WDMA_WRBK_SIDX_CFG,
+		 MTK_WDMA_WRBK_SIDX_CFG_RX_RING_CLEAR);
+	wdma_clr(dev, MTK_WDMA_WRBK_SIDX_CFG,
+		 MTK_WDMA_WRBK_SIDX_CFG_RX_RING_CLEAR);
+}
+
 static int
 mtk_wdma_rx_reset(struct mtk_wed_device *dev)
 {
@@ -121,6 +245,7 @@ mtk_wdma_rx_reset(struct mtk_wed_device *dev)
 	if (ret)
 		dev_err(dev->hw->dev, "rx reset failed\n");
 
+	mtk_wdma_v3_rx_reset(dev);
 	wdma_w32(dev, MTK_WDMA_RESET_IDX, MTK_WDMA_RESET_IDX_RX);
 	wdma_w32(dev, MTK_WDMA_RESET_IDX, 0);
 
@@ -135,6 +260,101 @@ mtk_wdma_rx_reset(struct mtk_wed_device *dev)
 	return ret;
 }
 
+static u32
+mtk_wed_check_busy(struct mtk_wed_device *dev, u32 reg, u32 mask)
+{
+	return !!(wed_r32(dev, reg) & mask);
+}
+
+static int
+mtk_wed_poll_busy(struct mtk_wed_device *dev, u32 reg, u32 mask)
+{
+	int sleep = 15000;
+	int timeout = 100 * sleep;
+	u32 val;
+
+	return read_poll_timeout(mtk_wed_check_busy, val, !val, sleep,
+				 timeout, false, dev, reg, mask);
+}
+
+static void
+mtk_wdma_v3_tx_reset(struct mtk_wed_device *dev)
+{
+	u32 status;
+
+	if (!mtk_wed_is_v3_or_greater(dev->hw))
+		return;
+
+	wdma_clr(dev, MTK_WDMA_PREF_TX_CFG, MTK_WDMA_PREF_TX_CFG_PREF_EN);
+	wdma_clr(dev, MTK_WDMA_PREF_RX_CFG, MTK_WDMA_PREF_RX_CFG_PREF_EN);
+
+	if (read_poll_timeout(wdma_r32, status,
+			      !(status & MTK_WDMA_PREF_TX_CFG_PREF_BUSY),
+			      0, 10000, false, dev, MTK_WDMA_PREF_TX_CFG))
+		dev_err(dev->hw->dev, "tx reset failed\n");
+
+	if (read_poll_timeout(wdma_r32, status,
+			      !(status & MTK_WDMA_PREF_RX_CFG_PREF_BUSY),
+			      0, 10000, false, dev, MTK_WDMA_PREF_RX_CFG))
+		dev_err(dev->hw->dev, "tx reset failed\n");
+
+	wdma_clr(dev, MTK_WDMA_WRBK_TX_CFG, MTK_WDMA_WRBK_TX_CFG_WRBK_EN);
+	wdma_clr(dev, MTK_WDMA_WRBK_RX_CFG, MTK_WDMA_WRBK_RX_CFG_WRBK_EN);
+
+	if (read_poll_timeout(wdma_r32, status,
+			      !(status & MTK_WDMA_WRBK_TX_CFG_WRBK_BUSY),
+			      0, 10000, false, dev, MTK_WDMA_WRBK_TX_CFG))
+		dev_err(dev->hw->dev, "tx reset failed\n");
+
+	if (read_poll_timeout(wdma_r32, status,
+			      !(status & MTK_WDMA_WRBK_RX_CFG_WRBK_BUSY),
+			      0, 10000, false, dev, MTK_WDMA_WRBK_RX_CFG))
+		dev_err(dev->hw->dev, "tx reset failed\n");
+
+	/* prefetch FIFO */
+	wdma_w32(dev, MTK_WDMA_PREF_TX_FIFO_CFG,
+		 MTK_WDMA_PREF_TX_FIFO_CFG_RING0_CLEAR |
+		 MTK_WDMA_PREF_TX_FIFO_CFG_RING1_CLEAR);
+	wdma_clr(dev, MTK_WDMA_PREF_TX_FIFO_CFG,
+		 MTK_WDMA_PREF_TX_FIFO_CFG_RING0_CLEAR |
+		 MTK_WDMA_PREF_TX_FIFO_CFG_RING1_CLEAR);
+
+	/* core FIFO */
+	wdma_w32(dev, MTK_WDMA_XDMA_TX_FIFO_CFG,
+		 MTK_WDMA_XDMA_TX_FIFO_CFG_TX_PAR_FIFO_CLEAR |
+		 MTK_WDMA_XDMA_TX_FIFO_CFG_TX_CMD_FIFO_CLEAR |
+		 MTK_WDMA_XDMA_TX_FIFO_CFG_TX_DMAD_FIFO_CLEAR |
+		 MTK_WDMA_XDMA_TX_FIFO_CFG_TX_ARR_FIFO_CLEAR);
+	wdma_clr(dev, MTK_WDMA_XDMA_TX_FIFO_CFG,
+		 MTK_WDMA_XDMA_TX_FIFO_CFG_TX_PAR_FIFO_CLEAR |
+		 MTK_WDMA_XDMA_TX_FIFO_CFG_TX_CMD_FIFO_CLEAR |
+		 MTK_WDMA_XDMA_TX_FIFO_CFG_TX_DMAD_FIFO_CLEAR |
+		 MTK_WDMA_XDMA_TX_FIFO_CFG_TX_ARR_FIFO_CLEAR);
+
+	/* writeback FIFO */
+	wdma_w32(dev, MTK_WDMA_WRBK_TX_FIFO_CFG(0),
+		 MTK_WDMA_WRBK_TX_FIFO_CFG_RING_CLEAR);
+	wdma_w32(dev, MTK_WDMA_WRBK_TX_FIFO_CFG(1),
+		 MTK_WDMA_WRBK_TX_FIFO_CFG_RING_CLEAR);
+
+	wdma_clr(dev, MTK_WDMA_WRBK_TX_FIFO_CFG(0),
+		 MTK_WDMA_WRBK_TX_FIFO_CFG_RING_CLEAR);
+	wdma_clr(dev, MTK_WDMA_WRBK_TX_FIFO_CFG(1),
+		 MTK_WDMA_WRBK_TX_FIFO_CFG_RING_CLEAR);
+
+	/* prefetch ring status */
+	wdma_w32(dev, MTK_WDMA_PREF_SIDX_CFG,
+		 MTK_WDMA_PREF_SIDX_CFG_TX_RING_CLEAR);
+	wdma_clr(dev, MTK_WDMA_PREF_SIDX_CFG,
+		 MTK_WDMA_PREF_SIDX_CFG_TX_RING_CLEAR);
+
+	/* writeback ring status */
+	wdma_w32(dev, MTK_WDMA_WRBK_SIDX_CFG,
+		 MTK_WDMA_WRBK_SIDX_CFG_TX_RING_CLEAR);
+	wdma_clr(dev, MTK_WDMA_WRBK_SIDX_CFG,
+		 MTK_WDMA_WRBK_SIDX_CFG_TX_RING_CLEAR);
+}
+
 static void
 mtk_wdma_tx_reset(struct mtk_wed_device *dev)
 {
@@ -146,6 +366,7 @@ mtk_wdma_tx_reset(struct mtk_wed_device *dev)
 			       !(status & mask), 0, 10000))
 		dev_err(dev->hw->dev, "tx reset failed\n");
 
+	mtk_wdma_v3_tx_reset(dev);
 	wdma_w32(dev, MTK_WDMA_RESET_IDX, MTK_WDMA_RESET_IDX_TX);
 	wdma_w32(dev, MTK_WDMA_RESET_IDX, 0);
 
@@ -278,7 +499,7 @@ mtk_wed_assign(struct mtk_wed_device *dev)
 		if (!hw->wed_dev)
 			goto out;
 
-		if (hw->version == 1)
+		if (mtk_wed_is_v1(hw))
 			return NULL;
 
 		/* MT7986 WED devices do not have any pcie slot restrictions */
@@ -298,35 +519,152 @@ out:
 }
 
 static int
+mtk_wed_amsdu_buffer_alloc(struct mtk_wed_device *dev)
+{
+	struct mtk_wed_hw *hw = dev->hw;
+	struct mtk_wed_amsdu *wed_amsdu;
+	int i;
+
+	if (!mtk_wed_is_v3_or_greater(hw))
+		return 0;
+
+	wed_amsdu = devm_kcalloc(hw->dev, MTK_WED_AMSDU_NPAGES,
+				 sizeof(*wed_amsdu), GFP_KERNEL);
+	if (!wed_amsdu)
+		return -ENOMEM;
+
+	for (i = 0; i < MTK_WED_AMSDU_NPAGES; i++) {
+		void *ptr;
+
+		/* each segment is 64K */
+		ptr = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN |
+					       __GFP_ZERO | __GFP_COMP |
+					       GFP_DMA32,
+					       get_order(MTK_WED_AMSDU_BUF_SIZE));
+		if (!ptr)
+			goto error;
+
+		wed_amsdu[i].txd = ptr;
+		wed_amsdu[i].txd_phy = dma_map_single(hw->dev, ptr,
+						      MTK_WED_AMSDU_BUF_SIZE,
+						      DMA_TO_DEVICE);
+		if (dma_mapping_error(hw->dev, wed_amsdu[i].txd_phy))
+			goto error;
+	}
+	dev->hw->wed_amsdu = wed_amsdu;
+
+	return 0;
+
+error:
+	for (i--; i >= 0; i--)
+		dma_unmap_single(hw->dev, wed_amsdu[i].txd_phy,
+				 MTK_WED_AMSDU_BUF_SIZE, DMA_TO_DEVICE);
+	return -ENOMEM;
+}
+
+static void
+mtk_wed_amsdu_free_buffer(struct mtk_wed_device *dev)
+{
+	struct mtk_wed_amsdu *wed_amsdu = dev->hw->wed_amsdu;
+	int i;
+
+	if (!wed_amsdu)
+		return;
+
+	for (i = 0; i < MTK_WED_AMSDU_NPAGES; i++) {
+		dma_unmap_single(dev->hw->dev, wed_amsdu[i].txd_phy,
+				 MTK_WED_AMSDU_BUF_SIZE, DMA_TO_DEVICE);
+		free_pages((unsigned long)wed_amsdu[i].txd,
+			   get_order(MTK_WED_AMSDU_BUF_SIZE));
+	}
+}
+
+static int
+mtk_wed_amsdu_init(struct mtk_wed_device *dev)
+{
+	struct mtk_wed_amsdu *wed_amsdu = dev->hw->wed_amsdu;
+	int i, ret;
+
+	if (!wed_amsdu)
+		return 0;
+
+	for (i = 0; i < MTK_WED_AMSDU_NPAGES; i++)
+		wed_w32(dev, MTK_WED_AMSDU_HIFTXD_BASE_L(i),
+			wed_amsdu[i].txd_phy);
+
+	/* init all sta parameter */
+	wed_w32(dev, MTK_WED_AMSDU_STA_INFO_INIT, MTK_WED_AMSDU_STA_RMVL |
+		MTK_WED_AMSDU_STA_WTBL_HDRT_MODE |
+		FIELD_PREP(MTK_WED_AMSDU_STA_MAX_AMSDU_LEN,
+			   dev->wlan.amsdu_max_len >> 8) |
+		FIELD_PREP(MTK_WED_AMSDU_STA_MAX_AMSDU_NUM,
+			   dev->wlan.amsdu_max_subframes));
+
+	wed_w32(dev, MTK_WED_AMSDU_STA_INFO, MTK_WED_AMSDU_STA_INFO_DO_INIT);
+
+	ret = mtk_wed_poll_busy(dev, MTK_WED_AMSDU_STA_INFO,
+				MTK_WED_AMSDU_STA_INFO_DO_INIT);
+	if (ret) {
+		dev_err(dev->hw->dev, "amsdu initialization failed\n");
+		return ret;
+	}
+
+	/* init partial amsdu offload txd src */
+	wed_set(dev, MTK_WED_AMSDU_HIFTXD_CFG,
+		FIELD_PREP(MTK_WED_AMSDU_HIFTXD_SRC, dev->hw->index));
+
+	/* init qmem */
+	wed_set(dev, MTK_WED_AMSDU_PSE, MTK_WED_AMSDU_PSE_RESET);
+	ret = mtk_wed_poll_busy(dev, MTK_WED_MON_AMSDU_QMEM_STS1, BIT(29));
+	if (ret) {
+		pr_info("%s: amsdu qmem initialization failed\n", __func__);
+		return ret;
+	}
+
+	/* eagle E1 PCIE1 tx ring 22 flow control issue */
+	if (dev->wlan.id == 0x7991)
+		wed_clr(dev, MTK_WED_AMSDU_FIFO, MTK_WED_AMSDU_IS_PRIOR0_RING);
+
+	wed_set(dev, MTK_WED_CTRL, MTK_WED_CTRL_TX_AMSDU_EN);
+
+	return 0;
+}
+
+static int
 mtk_wed_tx_buffer_alloc(struct mtk_wed_device *dev)
 {
-	struct mtk_wdma_desc *desc;
-	dma_addr_t desc_phys;
-	void **page_list;
+	u32 desc_size = dev->hw->soc->tx_ring_desc_size;
+	int i, page_idx = 0, n_pages, ring_size;
 	int token = dev->wlan.token_start;
-	int ring_size;
-	int n_pages;
-	int i, page_idx;
+	struct mtk_wed_buf *page_list;
+	dma_addr_t desc_phys;
+	void *desc_ptr;
 
-	ring_size = dev->wlan.nbuf & ~(MTK_WED_BUF_PER_PAGE - 1);
-	n_pages = ring_size / MTK_WED_BUF_PER_PAGE;
+	if (!mtk_wed_is_v3_or_greater(dev->hw)) {
+		ring_size = dev->wlan.nbuf & ~(MTK_WED_BUF_PER_PAGE - 1);
+		dev->tx_buf_ring.size = ring_size;
+	} else {
+		dev->tx_buf_ring.size = MTK_WED_TX_BM_DMA_SIZE;
+		ring_size = MTK_WED_TX_BM_PKT_CNT;
+	}
+	n_pages = dev->tx_buf_ring.size / MTK_WED_BUF_PER_PAGE;
 
 	page_list = kcalloc(n_pages, sizeof(*page_list), GFP_KERNEL);
 	if (!page_list)
 		return -ENOMEM;
 
-	dev->tx_buf_ring.size = ring_size;
 	dev->tx_buf_ring.pages = page_list;
 
-	desc = dma_alloc_coherent(dev->hw->dev, ring_size * sizeof(*desc),
-				  &desc_phys, GFP_KERNEL);
-	if (!desc)
+	desc_ptr = dma_alloc_coherent(dev->hw->dev,
+				      dev->tx_buf_ring.size * desc_size,
+				      &desc_phys, GFP_KERNEL);
+	if (!desc_ptr)
 		return -ENOMEM;
 
-	dev->tx_buf_ring.desc = desc;
+	dev->tx_buf_ring.desc = desc_ptr;
 	dev->tx_buf_ring.desc_phys = desc_phys;
 
-	for (i = 0, page_idx = 0; i < ring_size; i += MTK_WED_BUF_PER_PAGE) {
+	for (i = 0; i < ring_size; i += MTK_WED_BUF_PER_PAGE) {
 		dma_addr_t page_phys, buf_phys;
 		struct page *page;
 		void *buf;
@@ -343,7 +681,8 @@ mtk_wed_tx_buffer_alloc(struct mtk_wed_device *dev)
 			return -ENOMEM;
 		}
 
-		page_list[page_idx++] = page;
+		page_list[page_idx].p = page;
+		page_list[page_idx++].phy_addr = page_phys;
 		dma_sync_single_for_cpu(dev->hw->dev, page_phys, PAGE_SIZE,
 					DMA_BIDIRECTIONAL);
 
@@ -351,28 +690,31 @@ mtk_wed_tx_buffer_alloc(struct mtk_wed_device *dev)
 		buf_phys = page_phys;
 
 		for (s = 0; s < MTK_WED_BUF_PER_PAGE; s++) {
-			u32 txd_size;
-			u32 ctrl;
-
-			txd_size = dev->wlan.init_buf(buf, buf_phys, token++);
+			struct mtk_wdma_desc *desc = desc_ptr;
 
 			desc->buf0 = cpu_to_le32(buf_phys);
-			desc->buf1 = cpu_to_le32(buf_phys + txd_size);
-
-			if (dev->hw->version == 1)
-				ctrl = FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN0, txd_size) |
-				       FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN1,
-						  MTK_WED_BUF_SIZE - txd_size) |
-				       MTK_WDMA_DESC_CTRL_LAST_SEG1;
-			else
-				ctrl = FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN0, txd_size) |
-				       FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN1_V2,
-						  MTK_WED_BUF_SIZE - txd_size) |
-				       MTK_WDMA_DESC_CTRL_LAST_SEG0;
-			desc->ctrl = cpu_to_le32(ctrl);
-			desc->info = 0;
-			desc++;
-
+			if (!mtk_wed_is_v3_or_greater(dev->hw)) {
+				u32 txd_size, ctrl;
+
+				txd_size = dev->wlan.init_buf(buf, buf_phys,
+							      token++);
+				desc->buf1 = cpu_to_le32(buf_phys + txd_size);
+				ctrl = FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN0, txd_size);
+				if (mtk_wed_is_v1(dev->hw))
+					ctrl |= MTK_WDMA_DESC_CTRL_LAST_SEG1 |
+						FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN1,
+							   MTK_WED_BUF_SIZE - txd_size);
+				else
+					ctrl |= MTK_WDMA_DESC_CTRL_LAST_SEG0 |
+						FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN1_V2,
+							   MTK_WED_BUF_SIZE - txd_size);
+				desc->ctrl = cpu_to_le32(ctrl);
+				desc->info = 0;
+			} else {
+				desc->ctrl = cpu_to_le32(token << 16);
+			}
+
+			desc_ptr += desc_size;
 			buf += MTK_WED_BUF_SIZE;
 			buf_phys += MTK_WED_BUF_SIZE;
 		}
@@ -387,42 +729,103 @@ mtk_wed_tx_buffer_alloc(struct mtk_wed_device *dev)
 static void
 mtk_wed_free_tx_buffer(struct mtk_wed_device *dev)
 {
-	struct mtk_wdma_desc *desc = dev->tx_buf_ring.desc;
-	void **page_list = dev->tx_buf_ring.pages;
-	int page_idx;
-	int i;
+	struct mtk_wed_buf *page_list = dev->tx_buf_ring.pages;
+	struct mtk_wed_hw *hw = dev->hw;
+	int i, page_idx = 0;
 
 	if (!page_list)
 		return;
 
-	if (!desc)
+	if (!dev->tx_buf_ring.desc)
 		goto free_pagelist;
 
-	for (i = 0, page_idx = 0; i < dev->tx_buf_ring.size;
-	     i += MTK_WED_BUF_PER_PAGE) {
-		void *page = page_list[page_idx++];
-		dma_addr_t buf_addr;
+	for (i = 0; i < dev->tx_buf_ring.size; i += MTK_WED_BUF_PER_PAGE) {
+		dma_addr_t page_phy = page_list[page_idx].phy_addr;
+		void *page = page_list[page_idx++].p;
 
 		if (!page)
 			break;
 
-		buf_addr = le32_to_cpu(desc[i].buf0);
-		dma_unmap_page(dev->hw->dev, buf_addr, PAGE_SIZE,
+		dma_unmap_page(dev->hw->dev, page_phy, PAGE_SIZE,
 			       DMA_BIDIRECTIONAL);
 		__free_page(page);
 	}
 
-	dma_free_coherent(dev->hw->dev, dev->tx_buf_ring.size * sizeof(*desc),
-			  desc, dev->tx_buf_ring.desc_phys);
+	dma_free_coherent(dev->hw->dev,
+			  dev->tx_buf_ring.size * hw->soc->tx_ring_desc_size,
+			  dev->tx_buf_ring.desc,
+			  dev->tx_buf_ring.desc_phys);
 
 free_pagelist:
 	kfree(page_list);
 }
 
 static int
+mtk_wed_hwrro_buffer_alloc(struct mtk_wed_device *dev)
+{
+	int n_pages = MTK_WED_RX_PG_BM_CNT / MTK_WED_RX_BUF_PER_PAGE;
+	struct mtk_wed_buf *page_list;
+	struct mtk_wed_bm_desc *desc;
+	dma_addr_t desc_phys;
+	int i, page_idx = 0;
+
+	if (!dev->wlan.hw_rro)
+		return 0;
+
+	page_list = kcalloc(n_pages, sizeof(*page_list), GFP_KERNEL);
+	if (!page_list)
+		return -ENOMEM;
+
+	dev->hw_rro.size = dev->wlan.rx_nbuf & ~(MTK_WED_BUF_PER_PAGE - 1);
+	dev->hw_rro.pages = page_list;
+	desc = dma_alloc_coherent(dev->hw->dev,
+				  dev->wlan.rx_nbuf * sizeof(*desc),
+				  &desc_phys, GFP_KERNEL);
+	if (!desc)
+		return -ENOMEM;
+
+	dev->hw_rro.desc = desc;
+	dev->hw_rro.desc_phys = desc_phys;
+
+	for (i = 0; i < MTK_WED_RX_PG_BM_CNT; i += MTK_WED_RX_BUF_PER_PAGE) {
+		dma_addr_t page_phys, buf_phys;
+		struct page *page;
+		int s;
+
+		page = __dev_alloc_page(GFP_KERNEL);
+		if (!page)
+			return -ENOMEM;
+
+		page_phys = dma_map_page(dev->hw->dev, page, 0, PAGE_SIZE,
+					 DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(dev->hw->dev, page_phys)) {
+			__free_page(page);
+			return -ENOMEM;
+		}
+
+		page_list[page_idx].p = page;
+		page_list[page_idx++].phy_addr = page_phys;
+		dma_sync_single_for_cpu(dev->hw->dev, page_phys, PAGE_SIZE,
+					DMA_BIDIRECTIONAL);
+
+		buf_phys = page_phys;
+		for (s = 0; s < MTK_WED_RX_BUF_PER_PAGE; s++) {
+			desc->buf0 = cpu_to_le32(buf_phys);
+			buf_phys += MTK_WED_PAGE_BUF_SIZE;
+			desc++;
+		}
+
+		dma_sync_single_for_device(dev->hw->dev, page_phys, PAGE_SIZE,
+					   DMA_BIDIRECTIONAL);
+	}
+
+	return 0;
+}
+
+static int
 mtk_wed_rx_buffer_alloc(struct mtk_wed_device *dev)
 {
-	struct mtk_rxbm_desc *desc;
+	struct mtk_wed_bm_desc *desc;
 	dma_addr_t desc_phys;
 
 	dev->rx_buf_ring.size = dev->wlan.rx_nbuf;
@@ -436,13 +839,48 @@ mtk_wed_rx_buffer_alloc(struct mtk_wed_device *dev)
 	dev->rx_buf_ring.desc_phys = desc_phys;
 	dev->wlan.init_rx_buf(dev, dev->wlan.rx_npkt);
 
-	return 0;
+	return mtk_wed_hwrro_buffer_alloc(dev);
+}
+
+static void
+mtk_wed_hwrro_free_buffer(struct mtk_wed_device *dev)
+{
+	struct mtk_wed_buf *page_list = dev->hw_rro.pages;
+	struct mtk_wed_bm_desc *desc = dev->hw_rro.desc;
+	int i, page_idx = 0;
+
+	if (!dev->wlan.hw_rro)
+		return;
+
+	if (!page_list)
+		return;
+
+	if (!desc)
+		goto free_pagelist;
+
+	for (i = 0; i < MTK_WED_RX_PG_BM_CNT; i += MTK_WED_RX_BUF_PER_PAGE) {
+		dma_addr_t buf_addr = page_list[page_idx].phy_addr;
+		void *page = page_list[page_idx++].p;
+
+		if (!page)
+			break;
+
+		dma_unmap_page(dev->hw->dev, buf_addr, PAGE_SIZE,
+			       DMA_BIDIRECTIONAL);
+		__free_page(page);
+	}
+
+	dma_free_coherent(dev->hw->dev, dev->hw_rro.size * sizeof(*desc),
+			  desc, dev->hw_rro.desc_phys);
+
+free_pagelist:
+	kfree(page_list);
 }
 
 static void
 mtk_wed_free_rx_buffer(struct mtk_wed_device *dev)
 {
-	struct mtk_rxbm_desc *desc = dev->rx_buf_ring.desc;
+	struct mtk_wed_bm_desc *desc = dev->rx_buf_ring.desc;
 
 	if (!desc)
 		return;
@@ -450,6 +888,28 @@ mtk_wed_free_rx_buffer(struct mtk_wed_device *dev)
 	dev->wlan.release_rx_buf(dev);
 	dma_free_coherent(dev->hw->dev, dev->rx_buf_ring.size * sizeof(*desc),
 			  desc, dev->rx_buf_ring.desc_phys);
+
+	mtk_wed_hwrro_free_buffer(dev);
+}
+
+static void
+mtk_wed_hwrro_init(struct mtk_wed_device *dev)
+{
+	if (!mtk_wed_get_rx_capa(dev) || !dev->wlan.hw_rro)
+		return;
+
+	wed_set(dev, MTK_WED_RRO_PG_BM_RX_DMAM,
+		FIELD_PREP(MTK_WED_RRO_PG_BM_RX_SDL0, 128));
+
+	wed_w32(dev, MTK_WED_RRO_PG_BM_BASE, dev->hw_rro.desc_phys);
+
+	wed_w32(dev, MTK_WED_RRO_PG_BM_INIT_PTR,
+		MTK_WED_RRO_PG_BM_INIT_SW_TAIL_IDX |
+		FIELD_PREP(MTK_WED_RRO_PG_BM_SW_TAIL_IDX,
+			   MTK_WED_RX_PG_BM_CNT));
+
+	/* enable rx_page_bm to fetch dmad */
+	wed_set(dev, MTK_WED_CTRL, MTK_WED_CTRL_WED_RX_PG_BM_EN);
 }
 
 static void
@@ -463,6 +923,8 @@ mtk_wed_rx_buffer_hw_init(struct mtk_wed_device *dev)
 	wed_w32(dev, MTK_WED_RX_BM_DYN_ALLOC_TH,
 		FIELD_PREP(MTK_WED_RX_BM_DYN_ALLOC_TH_H, 0xffff));
 	wed_set(dev, MTK_WED_CTRL, MTK_WED_CTRL_WED_RX_BM_EN);
+
+	mtk_wed_hwrro_init(dev);
 }
 
 static void
@@ -498,13 +960,23 @@ mtk_wed_set_ext_int(struct mtk_wed_device *dev, bool en)
 {
 	u32 mask = MTK_WED_EXT_INT_STATUS_ERROR_MASK;
 
-	if (dev->hw->version == 1)
+	switch (dev->hw->version) {
+	case 1:
 		mask |= MTK_WED_EXT_INT_STATUS_TX_DRV_R_RESP_ERR;
-	else
+		break;
+	case 2:
 		mask |= MTK_WED_EXT_INT_STATUS_RX_FBUF_LO_TH |
 			MTK_WED_EXT_INT_STATUS_RX_FBUF_HI_TH |
 			MTK_WED_EXT_INT_STATUS_RX_DRV_COHERENT |
 			MTK_WED_EXT_INT_STATUS_TX_DMA_W_RESP_ERR;
+		break;
+	case 3:
+		mask = MTK_WED_EXT_INT_STATUS_RX_DRV_COHERENT |
+		       MTK_WED_EXT_INT_STATUS_TKID_WO_PYLD;
+		break;
+	default:
+		break;
+	}
 
 	if (!dev->hw->num_flows)
 		mask &= ~MTK_WED_EXT_INT_STATUS_TKID_WO_PYLD;
@@ -516,6 +988,9 @@ mtk_wed_set_ext_int(struct mtk_wed_device *dev, bool en)
 static void
 mtk_wed_set_512_support(struct mtk_wed_device *dev, bool enable)
 {
+	if (!mtk_wed_is_v2(dev->hw))
+		return;
+
 	if (enable) {
 		wed_w32(dev, MTK_WED_TXDP_CTRL, MTK_WED_TXDP_DW9_OVERWR);
 		wed_w32(dev, MTK_WED_TXP_DW1,
@@ -527,22 +1002,15 @@ mtk_wed_set_512_support(struct mtk_wed_device *dev, bool enable)
 	}
 }
 
-#define MTK_WFMDA_RX_DMA_EN	BIT(2)
-static void
-mtk_wed_check_wfdma_rx_fill(struct mtk_wed_device *dev, int idx)
+static int
+mtk_wed_check_wfdma_rx_fill(struct mtk_wed_device *dev,
+			    struct mtk_wed_ring *ring)
 {
-	u32 val;
 	int i;
 
-	if (!(dev->rx_ring[idx].flags & MTK_WED_RING_CONFIGURED))
-		return; /* queue is not configured by mt76 */
-
 	for (i = 0; i < 3; i++) {
-		u32 cur_idx;
+		u32 cur_idx = readl(ring->wpdma + MTK_WED_RING_OFS_CPU_IDX);
 
-		cur_idx = wed_r32(dev,
-				  MTK_WED_WPDMA_RING_RX_DATA(idx) +
-				  MTK_WED_RING_OFS_CPU_IDX);
 		if (cur_idx == MTK_WED_RX_RING_SIZE - 1)
 			break;
 
@@ -551,12 +1019,10 @@ mtk_wed_check_wfdma_rx_fill(struct mtk_wed_device *dev, int idx)
 
 	if (i == 3) {
 		dev_err(dev->hw->dev, "rx dma enable failed\n");
-		return;
+		return -ETIMEDOUT;
 	}
 
-	val = wifi_r32(dev, dev->wlan.wpdma_rx_glo - dev->wlan.phy_base) |
-	      MTK_WFMDA_RX_DMA_EN;
-	wifi_w32(dev, dev->wlan.wpdma_rx_glo - dev->wlan.phy_base, val);
+	return 0;
 }
 
 static void
@@ -577,7 +1043,7 @@ mtk_wed_dma_disable(struct mtk_wed_device *dev)
 		 MTK_WDMA_GLO_CFG_RX_INFO1_PRERES |
 		 MTK_WDMA_GLO_CFG_RX_INFO2_PRERES);
 
-	if (dev->hw->version == 1) {
+	if (mtk_wed_is_v1(dev->hw)) {
 		regmap_write(dev->hw->mirror, dev->hw->index * 4, 0);
 		wdma_clr(dev, MTK_WDMA_GLO_CFG,
 			 MTK_WDMA_GLO_CFG_RX_INFO3_PRERES);
@@ -590,6 +1056,14 @@ mtk_wed_dma_disable(struct mtk_wed_device *dev)
 			MTK_WED_WPDMA_RX_D_RX_DRV_EN);
 		wed_clr(dev, MTK_WED_WDMA_GLO_CFG,
 			MTK_WED_WDMA_GLO_CFG_TX_DDONE_CHK);
+
+		if (mtk_wed_is_v3_or_greater(dev->hw) &&
+		    mtk_wed_get_rx_capa(dev)) {
+			wdma_clr(dev, MTK_WDMA_PREF_TX_CFG,
+				 MTK_WDMA_PREF_TX_CFG_PREF_EN);
+			wdma_clr(dev, MTK_WDMA_PREF_RX_CFG,
+				 MTK_WDMA_PREF_RX_CFG_PREF_EN);
+		}
 	}
 
 	mtk_wed_set_512_support(dev, false);
@@ -606,7 +1080,7 @@ mtk_wed_stop(struct mtk_wed_device *dev)
 	wdma_w32(dev, MTK_WDMA_INT_GRP2, 0);
 	wed_w32(dev, MTK_WED_WPDMA_INT_MASK, 0);
 
-	if (dev->hw->version == 1)
+	if (!mtk_wed_get_rx_capa(dev))
 		return;
 
 	wed_w32(dev, MTK_WED_EXT_INT_MASK1, 0);
@@ -625,13 +1099,21 @@ mtk_wed_deinit(struct mtk_wed_device *dev)
 		MTK_WED_CTRL_WED_TX_BM_EN |
 		MTK_WED_CTRL_WED_TX_FREE_AGENT_EN);
 
-	if (dev->hw->version == 1)
+	if (mtk_wed_is_v1(dev->hw))
 		return;
 
 	wed_clr(dev, MTK_WED_CTRL,
 		MTK_WED_CTRL_RX_ROUTE_QM_EN |
 		MTK_WED_CTRL_WED_RX_BM_EN |
 		MTK_WED_CTRL_RX_RRO_QM_EN);
+
+	if (mtk_wed_is_v3_or_greater(dev->hw)) {
+		wed_clr(dev, MTK_WED_CTRL, MTK_WED_CTRL_TX_AMSDU_EN);
+		wed_clr(dev, MTK_WED_RESET, MTK_WED_RESET_TX_AMSDU);
+		wed_clr(dev, MTK_WED_PCIE_INT_CTRL,
+			MTK_WED_PCIE_INT_CTRL_MSK_EN_POLA |
+			MTK_WED_PCIE_INT_CTRL_MSK_IRQ_FILTER);
+	}
 }
 
 static void
@@ -643,6 +1125,7 @@ __mtk_wed_detach(struct mtk_wed_device *dev)
 
 	mtk_wdma_rx_reset(dev);
 	mtk_wed_reset(dev, MTK_WED_RESET_WED);
+	mtk_wed_amsdu_free_buffer(dev);
 	mtk_wed_free_tx_buffer(dev);
 	mtk_wed_free_tx_rings(dev);
 
@@ -681,21 +1164,37 @@ mtk_wed_detach(struct mtk_wed_device *dev)
 	mutex_unlock(&hw_lock);
 }
 
-#define PCIE_BASE_ADDR0		0x11280000
 static void
 mtk_wed_bus_init(struct mtk_wed_device *dev)
 {
 	switch (dev->wlan.bus_type) {
 	case MTK_WED_BUS_PCIE: {
 		struct device_node *np = dev->hw->eth->dev->of_node;
-		struct regmap *regs;
 
-		regs = syscon_regmap_lookup_by_phandle(np,
-						       "mediatek,wed-pcie");
-		if (IS_ERR(regs))
-			break;
+		if (mtk_wed_is_v2(dev->hw)) {
+			struct regmap *regs;
+
+			regs = syscon_regmap_lookup_by_phandle(np,
+							       "mediatek,wed-pcie");
+			if (IS_ERR(regs))
+				break;
 
-		regmap_update_bits(regs, 0, BIT(0), BIT(0));
+			regmap_update_bits(regs, 0, BIT(0), BIT(0));
+		}
+
+		if (dev->wlan.msi) {
+			wed_w32(dev, MTK_WED_PCIE_CFG_INTM,
+				dev->hw->pcie_base | 0xc08);
+			wed_w32(dev, MTK_WED_PCIE_CFG_BASE,
+				dev->hw->pcie_base | 0xc04);
+			wed_w32(dev, MTK_WED_PCIE_INT_TRIGGER, BIT(8));
+		} else {
+			wed_w32(dev, MTK_WED_PCIE_CFG_INTM,
+				dev->hw->pcie_base | 0x180);
+			wed_w32(dev, MTK_WED_PCIE_CFG_BASE,
+				dev->hw->pcie_base | 0x184);
+			wed_w32(dev, MTK_WED_PCIE_INT_TRIGGER, BIT(24));
+		}
 
 		wed_w32(dev, MTK_WED_PCIE_INT_CTRL,
 			FIELD_PREP(MTK_WED_PCIE_INT_CTRL_POLL_EN, 2));
@@ -703,19 +1202,9 @@ mtk_wed_bus_init(struct mtk_wed_device *dev)
 		/* pcie interrupt control: pola/source selection */
 		wed_set(dev, MTK_WED_PCIE_INT_CTRL,
 			MTK_WED_PCIE_INT_CTRL_MSK_EN_POLA |
-			FIELD_PREP(MTK_WED_PCIE_INT_CTRL_SRC_SEL, 1));
-		wed_r32(dev, MTK_WED_PCIE_INT_CTRL);
-
-		wed_w32(dev, MTK_WED_PCIE_CFG_INTM, PCIE_BASE_ADDR0 | 0x180);
-		wed_w32(dev, MTK_WED_PCIE_CFG_BASE, PCIE_BASE_ADDR0 | 0x184);
-
-		/* pcie interrupt status trigger register */
-		wed_w32(dev, MTK_WED_PCIE_INT_TRIGGER, BIT(24));
-		wed_r32(dev, MTK_WED_PCIE_INT_TRIGGER);
-
-		/* pola setting */
-		wed_set(dev, MTK_WED_PCIE_INT_CTRL,
-			MTK_WED_PCIE_INT_CTRL_MSK_EN_POLA);
+			MTK_WED_PCIE_INT_CTRL_MSK_IRQ_FILTER  |
+			FIELD_PREP(MTK_WED_PCIE_INT_CTRL_SRC_SEL,
+				   dev->hw->index));
 		break;
 	}
 	case MTK_WED_BUS_AXI:
@@ -731,38 +1220,55 @@ mtk_wed_bus_init(struct mtk_wed_device *dev)
 static void
 mtk_wed_set_wpdma(struct mtk_wed_device *dev)
 {
-	if (dev->hw->version == 1) {
-		wed_w32(dev, MTK_WED_WPDMA_CFG_BASE,  dev->wlan.wpdma_phys);
-	} else {
-		mtk_wed_bus_init(dev);
+	int i;
 
-		wed_w32(dev, MTK_WED_WPDMA_CFG_BASE, dev->wlan.wpdma_int);
-		wed_w32(dev, MTK_WED_WPDMA_CFG_INT_MASK, dev->wlan.wpdma_mask);
-		wed_w32(dev, MTK_WED_WPDMA_CFG_TX, dev->wlan.wpdma_tx);
-		wed_w32(dev, MTK_WED_WPDMA_CFG_TX_FREE, dev->wlan.wpdma_txfree);
-		wed_w32(dev, MTK_WED_WPDMA_RX_GLO_CFG, dev->wlan.wpdma_rx_glo);
-		wed_w32(dev, MTK_WED_WPDMA_RX_RING, dev->wlan.wpdma_rx);
+	if (mtk_wed_is_v1(dev->hw)) {
+		wed_w32(dev, MTK_WED_WPDMA_CFG_BASE,  dev->wlan.wpdma_phys);
+		return;
 	}
+
+	mtk_wed_bus_init(dev);
+
+	wed_w32(dev, MTK_WED_WPDMA_CFG_BASE, dev->wlan.wpdma_int);
+	wed_w32(dev, MTK_WED_WPDMA_CFG_INT_MASK, dev->wlan.wpdma_mask);
+	wed_w32(dev, MTK_WED_WPDMA_CFG_TX, dev->wlan.wpdma_tx);
+	wed_w32(dev, MTK_WED_WPDMA_CFG_TX_FREE, dev->wlan.wpdma_txfree);
+
+	if (!mtk_wed_get_rx_capa(dev))
+		return;
+
+	wed_w32(dev, MTK_WED_WPDMA_RX_GLO_CFG, dev->wlan.wpdma_rx_glo);
+	wed_w32(dev, dev->hw->soc->regmap.wpdma_rx_ring0, dev->wlan.wpdma_rx);
+
+	if (!dev->wlan.hw_rro)
+		return;
+
+	wed_w32(dev, MTK_WED_RRO_RX_D_CFG(0), dev->wlan.wpdma_rx_rro[0]);
+	wed_w32(dev, MTK_WED_RRO_RX_D_CFG(1), dev->wlan.wpdma_rx_rro[1]);
+	for (i = 0; i < MTK_WED_RX_PAGE_QUEUES; i++)
+		wed_w32(dev, MTK_WED_RRO_MSDU_PG_RING_CFG(i),
+			dev->wlan.wpdma_rx_pg + i * 0x10);
 }
 
 static void
 mtk_wed_hw_init_early(struct mtk_wed_device *dev)
 {
-	u32 mask, set;
+	u32 set = FIELD_PREP(MTK_WED_WDMA_GLO_CFG_BT_SIZE, 2);
+	u32 mask = MTK_WED_WDMA_GLO_CFG_BT_SIZE;
 
 	mtk_wed_deinit(dev);
 	mtk_wed_reset(dev, MTK_WED_RESET_WED);
 	mtk_wed_set_wpdma(dev);
 
-	mask = MTK_WED_WDMA_GLO_CFG_BT_SIZE |
-	       MTK_WED_WDMA_GLO_CFG_DYNAMIC_DMAD_RECYCLE |
-	       MTK_WED_WDMA_GLO_CFG_RX_DIS_FSM_AUTO_IDLE;
-	set = FIELD_PREP(MTK_WED_WDMA_GLO_CFG_BT_SIZE, 2) |
-	      MTK_WED_WDMA_GLO_CFG_DYNAMIC_SKIP_DMAD_PREP |
-	      MTK_WED_WDMA_GLO_CFG_IDLE_DMAD_SUPPLY;
+	if (!mtk_wed_is_v3_or_greater(dev->hw)) {
+		mask |= MTK_WED_WDMA_GLO_CFG_DYNAMIC_DMAD_RECYCLE |
+			MTK_WED_WDMA_GLO_CFG_RX_DIS_FSM_AUTO_IDLE;
+		set |= MTK_WED_WDMA_GLO_CFG_DYNAMIC_SKIP_DMAD_PREP |
+		       MTK_WED_WDMA_GLO_CFG_IDLE_DMAD_SUPPLY;
+	}
 	wed_m32(dev, MTK_WED_WDMA_GLO_CFG, mask, set);
 
-	if (dev->hw->version == 1) {
+	if (mtk_wed_is_v1(dev->hw)) {
 		u32 offset = dev->hw->index ? 0x04000400 : 0;
 
 		wdma_set(dev, MTK_WDMA_GLO_CFG,
@@ -907,11 +1413,18 @@ mtk_wed_route_qm_hw_init(struct mtk_wed_device *dev)
 	}
 
 	/* configure RX_ROUTE_QM */
-	wed_clr(dev, MTK_WED_RTQM_GLO_CFG, MTK_WED_RTQM_Q_RST);
-	wed_clr(dev, MTK_WED_RTQM_GLO_CFG, MTK_WED_RTQM_TXDMAD_FPORT);
-	wed_set(dev, MTK_WED_RTQM_GLO_CFG,
-		FIELD_PREP(MTK_WED_RTQM_TXDMAD_FPORT, 0x3 + dev->hw->index));
-	wed_clr(dev, MTK_WED_RTQM_GLO_CFG, MTK_WED_RTQM_Q_RST);
+	if (mtk_wed_is_v2(dev->hw)) {
+		wed_clr(dev, MTK_WED_RTQM_GLO_CFG, MTK_WED_RTQM_Q_RST);
+		wed_clr(dev, MTK_WED_RTQM_GLO_CFG, MTK_WED_RTQM_TXDMAD_FPORT);
+		wed_set(dev, MTK_WED_RTQM_GLO_CFG,
+			FIELD_PREP(MTK_WED_RTQM_TXDMAD_FPORT,
+				   0x3 + dev->hw->index));
+		wed_clr(dev, MTK_WED_RTQM_GLO_CFG, MTK_WED_RTQM_Q_RST);
+	} else {
+		wed_set(dev, MTK_WED_RTQM_ENQ_CFG0,
+			FIELD_PREP(MTK_WED_RTQM_ENQ_CFG_TXDMAD_FPORT,
+				   0x3 + dev->hw->index));
+	}
 	/* enable RX_ROUTE_QM */
 	wed_set(dev, MTK_WED_CTRL, MTK_WED_CTRL_RX_ROUTE_QM_EN);
 }
@@ -924,34 +1437,30 @@ mtk_wed_hw_init(struct mtk_wed_device *dev)
 
 	dev->init_done = true;
 	mtk_wed_set_ext_int(dev, false);
-	wed_w32(dev, MTK_WED_TX_BM_CTRL,
-		MTK_WED_TX_BM_CTRL_PAUSE |
-		FIELD_PREP(MTK_WED_TX_BM_CTRL_VLD_GRP_NUM,
-			   dev->tx_buf_ring.size / 128) |
-		FIELD_PREP(MTK_WED_TX_BM_CTRL_RSV_GRP_NUM,
-			   MTK_WED_TX_RING_SIZE / 256));
 
 	wed_w32(dev, MTK_WED_TX_BM_BASE, dev->tx_buf_ring.desc_phys);
-
 	wed_w32(dev, MTK_WED_TX_BM_BUF_LEN, MTK_WED_PKT_SIZE);
 
-	if (dev->hw->version == 1) {
-		wed_w32(dev, MTK_WED_TX_BM_TKID,
-			FIELD_PREP(MTK_WED_TX_BM_TKID_START,
-				   dev->wlan.token_start) |
-			FIELD_PREP(MTK_WED_TX_BM_TKID_END,
-				   dev->wlan.token_start +
-				   dev->wlan.nbuf - 1));
+	if (mtk_wed_is_v1(dev->hw)) {
+		wed_w32(dev, MTK_WED_TX_BM_CTRL,
+			MTK_WED_TX_BM_CTRL_PAUSE |
+			FIELD_PREP(MTK_WED_TX_BM_CTRL_VLD_GRP_NUM,
+				   dev->tx_buf_ring.size / 128) |
+			FIELD_PREP(MTK_WED_TX_BM_CTRL_RSV_GRP_NUM,
+				   MTK_WED_TX_RING_SIZE / 256));
 		wed_w32(dev, MTK_WED_TX_BM_DYN_THR,
 			FIELD_PREP(MTK_WED_TX_BM_DYN_THR_LO, 1) |
 			MTK_WED_TX_BM_DYN_THR_HI);
-	} else {
-		wed_w32(dev, MTK_WED_TX_BM_TKID_V2,
-			FIELD_PREP(MTK_WED_TX_BM_TKID_START,
-				   dev->wlan.token_start) |
-			FIELD_PREP(MTK_WED_TX_BM_TKID_END,
-				   dev->wlan.token_start +
-				   dev->wlan.nbuf - 1));
+	} else if (mtk_wed_is_v2(dev->hw)) {
+		wed_w32(dev, MTK_WED_TX_BM_CTRL,
+			MTK_WED_TX_BM_CTRL_PAUSE |
+			FIELD_PREP(MTK_WED_TX_BM_CTRL_VLD_GRP_NUM,
+				   dev->tx_buf_ring.size / 128) |
+			FIELD_PREP(MTK_WED_TX_BM_CTRL_RSV_GRP_NUM,
+				   MTK_WED_TX_RING_SIZE / 256));
+		wed_w32(dev, MTK_WED_TX_TKID_DYN_THR,
+			FIELD_PREP(MTK_WED_TX_TKID_DYN_THR_LO, 0) |
+			MTK_WED_TX_TKID_DYN_THR_HI);
 		wed_w32(dev, MTK_WED_TX_BM_DYN_THR,
 			FIELD_PREP(MTK_WED_TX_BM_DYN_THR_LO_V2, 0) |
 			MTK_WED_TX_BM_DYN_THR_HI_V2);
@@ -961,31 +1470,71 @@ mtk_wed_hw_init(struct mtk_wed_device *dev)
 				   dev->tx_buf_ring.size / 128) |
 			FIELD_PREP(MTK_WED_TX_TKID_CTRL_RSV_GRP_NUM,
 				   dev->tx_buf_ring.size / 128));
-		wed_w32(dev, MTK_WED_TX_TKID_DYN_THR,
-			FIELD_PREP(MTK_WED_TX_TKID_DYN_THR_LO, 0) |
-			MTK_WED_TX_TKID_DYN_THR_HI);
 	}
 
+	wed_w32(dev, dev->hw->soc->regmap.tx_bm_tkid,
+		FIELD_PREP(MTK_WED_TX_BM_TKID_START, dev->wlan.token_start) |
+		FIELD_PREP(MTK_WED_TX_BM_TKID_END,
+			   dev->wlan.token_start + dev->wlan.nbuf - 1));
+
 	mtk_wed_reset(dev, MTK_WED_RESET_TX_BM);
 
-	if (dev->hw->version == 1) {
+	if (mtk_wed_is_v3_or_greater(dev->hw)) {
+		/* switch to new bm architecture */
+		wed_clr(dev, MTK_WED_TX_BM_CTRL,
+			MTK_WED_TX_BM_CTRL_LEGACY_EN);
+
+		wed_w32(dev, MTK_WED_TX_TKID_CTRL,
+			MTK_WED_TX_TKID_CTRL_PAUSE |
+			FIELD_PREP(MTK_WED_TX_TKID_CTRL_VLD_GRP_NUM_V3,
+				   dev->wlan.nbuf / 128) |
+			FIELD_PREP(MTK_WED_TX_TKID_CTRL_RSV_GRP_NUM_V3,
+				   dev->wlan.nbuf / 128));
+		/* return SKBID + SDP back to bm */
+		wed_set(dev, MTK_WED_TX_TKID_CTRL,
+			MTK_WED_TX_TKID_CTRL_FREE_FORMAT);
+
+		wed_w32(dev, MTK_WED_TX_BM_INIT_PTR,
+			MTK_WED_TX_BM_PKT_CNT |
+			MTK_WED_TX_BM_INIT_SW_TAIL_IDX);
+	}
+
+	if (mtk_wed_is_v1(dev->hw)) {
 		wed_set(dev, MTK_WED_CTRL,
 			MTK_WED_CTRL_WED_TX_BM_EN |
 			MTK_WED_CTRL_WED_TX_FREE_AGENT_EN);
-	} else {
-		wed_clr(dev, MTK_WED_TX_TKID_CTRL, MTK_WED_TX_TKID_CTRL_PAUSE);
+	} else if (mtk_wed_get_rx_capa(dev)) {
 		/* rx hw init */
 		wed_w32(dev, MTK_WED_WPDMA_RX_D_RST_IDX,
 			MTK_WED_WPDMA_RX_D_RST_CRX_IDX |
 			MTK_WED_WPDMA_RX_D_RST_DRV_IDX);
 		wed_w32(dev, MTK_WED_WPDMA_RX_D_RST_IDX, 0);
 
+		/* reset prefetch index of ring */
+		wed_set(dev, MTK_WED_WPDMA_RX_D_PREF_RX0_SIDX,
+			MTK_WED_WPDMA_RX_D_PREF_SIDX_IDX_CLR);
+		wed_clr(dev, MTK_WED_WPDMA_RX_D_PREF_RX0_SIDX,
+			MTK_WED_WPDMA_RX_D_PREF_SIDX_IDX_CLR);
+
+		wed_set(dev, MTK_WED_WPDMA_RX_D_PREF_RX1_SIDX,
+			MTK_WED_WPDMA_RX_D_PREF_SIDX_IDX_CLR);
+		wed_clr(dev, MTK_WED_WPDMA_RX_D_PREF_RX1_SIDX,
+			MTK_WED_WPDMA_RX_D_PREF_SIDX_IDX_CLR);
+
+		/* reset prefetch FIFO of ring */
+		wed_set(dev, MTK_WED_WPDMA_RX_D_PREF_FIFO_CFG,
+			MTK_WED_WPDMA_RX_D_PREF_FIFO_CFG_R0_CLR |
+			MTK_WED_WPDMA_RX_D_PREF_FIFO_CFG_R1_CLR);
+		wed_w32(dev, MTK_WED_WPDMA_RX_D_PREF_FIFO_CFG, 0);
+
 		mtk_wed_rx_buffer_hw_init(dev);
 		mtk_wed_rro_hw_init(dev);
 		mtk_wed_route_qm_hw_init(dev);
 	}
 
 	wed_clr(dev, MTK_WED_TX_BM_CTRL, MTK_WED_TX_BM_CTRL_PAUSE);
+	if (!mtk_wed_is_v1(dev->hw))
+		wed_clr(dev, MTK_WED_TX_TKID_CTRL, MTK_WED_TX_TKID_CTRL_PAUSE);
 }
 
 static void
@@ -1008,23 +1557,6 @@ mtk_wed_ring_reset(struct mtk_wed_ring *ring, int size, bool tx)
 	}
 }
 
-static u32
-mtk_wed_check_busy(struct mtk_wed_device *dev, u32 reg, u32 mask)
-{
-	return !!(wed_r32(dev, reg) & mask);
-}
-
-static int
-mtk_wed_poll_busy(struct mtk_wed_device *dev, u32 reg, u32 mask)
-{
-	int sleep = 15000;
-	int timeout = 100 * sleep;
-	u32 val;
-
-	return read_poll_timeout(mtk_wed_check_busy, val, !val, sleep,
-				 timeout, false, dev, reg, mask);
-}
-
 static int
 mtk_wed_rx_reset(struct mtk_wed_device *dev)
 {
@@ -1038,13 +1570,33 @@ mtk_wed_rx_reset(struct mtk_wed_device *dev)
 	if (ret)
 		return ret;
 
+	if (dev->wlan.hw_rro) {
+		wed_clr(dev, MTK_WED_CTRL, MTK_WED_CTRL_WED_RX_IND_CMD_EN);
+		mtk_wed_poll_busy(dev, MTK_WED_RRO_RX_HW_STS,
+				  MTK_WED_RX_IND_CMD_BUSY);
+		mtk_wed_reset(dev, MTK_WED_RESET_RRO_RX_TO_PG);
+	}
+
 	wed_clr(dev, MTK_WED_WPDMA_RX_D_GLO_CFG, MTK_WED_WPDMA_RX_D_RX_DRV_EN);
 	ret = mtk_wed_poll_busy(dev, MTK_WED_WPDMA_RX_D_GLO_CFG,
 				MTK_WED_WPDMA_RX_D_RX_DRV_BUSY);
+	if (!ret && mtk_wed_is_v3_or_greater(dev->hw))
+		ret = mtk_wed_poll_busy(dev, MTK_WED_WPDMA_RX_D_PREF_CFG,
+					MTK_WED_WPDMA_RX_D_PREF_BUSY);
 	if (ret) {
 		mtk_wed_reset(dev, MTK_WED_RESET_WPDMA_INT_AGENT);
 		mtk_wed_reset(dev, MTK_WED_RESET_WPDMA_RX_D_DRV);
 	} else {
+		if (mtk_wed_is_v3_or_greater(dev->hw)) {
+			/* 1.a. disable prefetch HW */
+			wed_clr(dev, MTK_WED_WPDMA_RX_D_PREF_CFG,
+				MTK_WED_WPDMA_RX_D_PREF_EN);
+			mtk_wed_poll_busy(dev, MTK_WED_WPDMA_RX_D_PREF_CFG,
+					  MTK_WED_WPDMA_RX_D_PREF_BUSY);
+			wed_w32(dev, MTK_WED_WPDMA_RX_D_RST_IDX,
+				MTK_WED_WPDMA_RX_D_RST_DRV_IDX_ALL);
+		}
+
 		wed_w32(dev, MTK_WED_WPDMA_RX_D_RST_IDX,
 			MTK_WED_WPDMA_RX_D_RST_CRX_IDX |
 			MTK_WED_WPDMA_RX_D_RST_DRV_IDX);
@@ -1072,23 +1624,52 @@ mtk_wed_rx_reset(struct mtk_wed_device *dev)
 		wed_w32(dev, MTK_WED_RROQM_RST_IDX, 0);
 	}
 
+	if (dev->wlan.hw_rro) {
+		/* disable rro msdu page drv */
+		wed_clr(dev, MTK_WED_RRO_MSDU_PG_RING2_CFG,
+			MTK_WED_RRO_MSDU_PG_DRV_EN);
+
+		/* disable rro data drv */
+		wed_clr(dev, MTK_WED_RRO_RX_D_CFG(2), MTK_WED_RRO_RX_D_DRV_EN);
+
+		/* rro msdu page drv reset */
+		wed_w32(dev, MTK_WED_RRO_MSDU_PG_RING2_CFG,
+			MTK_WED_RRO_MSDU_PG_DRV_CLR);
+		mtk_wed_poll_busy(dev, MTK_WED_RRO_MSDU_PG_RING2_CFG,
+				  MTK_WED_RRO_MSDU_PG_DRV_CLR);
+
+		/* rro data drv reset */
+		wed_w32(dev, MTK_WED_RRO_RX_D_CFG(2),
+			MTK_WED_RRO_RX_D_DRV_CLR);
+		mtk_wed_poll_busy(dev, MTK_WED_RRO_RX_D_CFG(2),
+				  MTK_WED_RRO_RX_D_DRV_CLR);
+	}
+
 	/* reset route qm */
 	wed_clr(dev, MTK_WED_CTRL, MTK_WED_CTRL_RX_ROUTE_QM_EN);
 	ret = mtk_wed_poll_busy(dev, MTK_WED_CTRL,
 				MTK_WED_CTRL_RX_ROUTE_QM_BUSY);
-	if (ret)
+	if (ret) {
 		mtk_wed_reset(dev, MTK_WED_RESET_RX_ROUTE_QM);
-	else
-		wed_set(dev, MTK_WED_RTQM_GLO_CFG,
-			MTK_WED_RTQM_Q_RST);
+	} else if (mtk_wed_is_v3_or_greater(dev->hw)) {
+		wed_set(dev, MTK_WED_RTQM_RST, BIT(0));
+		wed_clr(dev, MTK_WED_RTQM_RST, BIT(0));
+		mtk_wed_reset(dev, MTK_WED_RESET_RX_ROUTE_QM);
+	} else {
+		wed_set(dev, MTK_WED_RTQM_GLO_CFG, MTK_WED_RTQM_Q_RST);
+	}
 
 	/* reset tx wdma */
 	mtk_wdma_tx_reset(dev);
 
 	/* reset tx wdma drv */
 	wed_clr(dev, MTK_WED_WDMA_GLO_CFG, MTK_WED_WDMA_GLO_CFG_TX_DRV_EN);
-	mtk_wed_poll_busy(dev, MTK_WED_CTRL,
-			  MTK_WED_CTRL_WDMA_INT_AGENT_BUSY);
+	if (mtk_wed_is_v3_or_greater(dev->hw))
+		mtk_wed_poll_busy(dev, MTK_WED_WPDMA_STATUS,
+				  MTK_WED_WPDMA_STATUS_TX_DRV);
+	else
+		mtk_wed_poll_busy(dev, MTK_WED_CTRL,
+				  MTK_WED_CTRL_WDMA_INT_AGENT_BUSY);
 	mtk_wed_reset(dev, MTK_WED_RESET_WDMA_TX_DRV);
 
 	/* reset wed rx dma */
@@ -1098,13 +1679,8 @@ mtk_wed_rx_reset(struct mtk_wed_device *dev)
 	if (ret) {
 		mtk_wed_reset(dev, MTK_WED_RESET_WED_RX_DMA);
 	} else {
-		struct mtk_eth *eth = dev->hw->eth;
-
-		if (mtk_is_netsys_v2_or_greater(eth))
-			wed_set(dev, MTK_WED_RESET_IDX,
-				MTK_WED_RESET_IDX_RX_V2);
-		else
-			wed_set(dev, MTK_WED_RESET_IDX, MTK_WED_RESET_IDX_RX);
+		wed_set(dev, MTK_WED_RESET_IDX,
+			dev->hw->soc->regmap.reset_idx_rx_mask);
 		wed_w32(dev, MTK_WED_RESET_IDX, 0);
 	}
 
@@ -1114,6 +1690,14 @@ mtk_wed_rx_reset(struct mtk_wed_device *dev)
 			  MTK_WED_CTRL_WED_RX_BM_BUSY);
 	mtk_wed_reset(dev, MTK_WED_RESET_RX_BM);
 
+	if (dev->wlan.hw_rro) {
+		wed_clr(dev, MTK_WED_CTRL, MTK_WED_CTRL_WED_RX_PG_BM_EN);
+		mtk_wed_poll_busy(dev, MTK_WED_CTRL,
+				  MTK_WED_CTRL_WED_RX_PG_BM_BUSY);
+		wed_set(dev, MTK_WED_RESET, MTK_WED_RESET_RX_PG_BM);
+		wed_clr(dev, MTK_WED_RESET, MTK_WED_RESET_RX_PG_BM);
+	}
+
 	/* wo change to enable state */
 	val = MTK_WED_WO_STATE_ENABLE;
 	ret = mtk_wed_mcu_send_msg(wo, MTK_WED_MODULE_ID_WO,
@@ -1131,6 +1715,7 @@ mtk_wed_rx_reset(struct mtk_wed_device *dev)
 				   false);
 	}
 	mtk_wed_free_rx_buffer(dev);
+	mtk_wed_hwrro_free_buffer(dev);
 
 	return 0;
 }
@@ -1157,21 +1742,48 @@ mtk_wed_reset_dma(struct mtk_wed_device *dev)
 	if (busy) {
 		mtk_wed_reset(dev, MTK_WED_RESET_WED_TX_DMA);
 	} else {
-		wed_w32(dev, MTK_WED_RESET_IDX, MTK_WED_RESET_IDX_TX);
+		wed_w32(dev, MTK_WED_RESET_IDX,
+			dev->hw->soc->regmap.reset_idx_tx_mask);
 		wed_w32(dev, MTK_WED_RESET_IDX, 0);
 	}
 
 	/* 2. reset WDMA rx DMA */
 	busy = !!mtk_wdma_rx_reset(dev);
-	wed_clr(dev, MTK_WED_WDMA_GLO_CFG, MTK_WED_WDMA_GLO_CFG_RX_DRV_EN);
+	if (mtk_wed_is_v3_or_greater(dev->hw)) {
+		val = MTK_WED_WDMA_GLO_CFG_RX_DIS_FSM_AUTO_IDLE |
+		      wed_r32(dev, MTK_WED_WDMA_GLO_CFG);
+		val &= ~MTK_WED_WDMA_GLO_CFG_RX_DRV_EN;
+		wed_w32(dev, MTK_WED_WDMA_GLO_CFG, val);
+	} else {
+		wed_clr(dev, MTK_WED_WDMA_GLO_CFG,
+			MTK_WED_WDMA_GLO_CFG_RX_DRV_EN);
+	}
+
 	if (!busy)
 		busy = mtk_wed_poll_busy(dev, MTK_WED_WDMA_GLO_CFG,
 					 MTK_WED_WDMA_GLO_CFG_RX_DRV_BUSY);
+	if (!busy && mtk_wed_is_v3_or_greater(dev->hw))
+		busy = mtk_wed_poll_busy(dev, MTK_WED_WDMA_RX_PREF_CFG,
+					 MTK_WED_WDMA_RX_PREF_BUSY);
 
 	if (busy) {
 		mtk_wed_reset(dev, MTK_WED_RESET_WDMA_INT_AGENT);
 		mtk_wed_reset(dev, MTK_WED_RESET_WDMA_RX_DRV);
 	} else {
+		if (mtk_wed_is_v3_or_greater(dev->hw)) {
+			/* 1.a. disable prefetch HW */
+			wed_clr(dev, MTK_WED_WDMA_RX_PREF_CFG,
+				MTK_WED_WDMA_RX_PREF_EN);
+			mtk_wed_poll_busy(dev, MTK_WED_WDMA_RX_PREF_CFG,
+					  MTK_WED_WDMA_RX_PREF_BUSY);
+			wed_clr(dev, MTK_WED_WDMA_RX_PREF_CFG,
+				MTK_WED_WDMA_RX_PREF_DDONE2_EN);
+
+			/* 2. Reset dma index */
+			wed_w32(dev, MTK_WED_WDMA_RESET_IDX,
+				MTK_WED_WDMA_RESET_IDX_RX_ALL);
+		}
+
 		wed_w32(dev, MTK_WED_WDMA_RESET_IDX,
 			MTK_WED_WDMA_RESET_IDX_RX | MTK_WED_WDMA_RESET_IDX_DRV);
 		wed_w32(dev, MTK_WED_WDMA_RESET_IDX, 0);
@@ -1187,8 +1799,13 @@ mtk_wed_reset_dma(struct mtk_wed_device *dev)
 	wed_clr(dev, MTK_WED_CTRL, MTK_WED_CTRL_WED_TX_FREE_AGENT_EN);
 
 	for (i = 0; i < 100; i++) {
-		val = wed_r32(dev, MTK_WED_TX_BM_INTF);
-		if (FIELD_GET(MTK_WED_TX_BM_INTF_TKFIFO_FDEP, val) == 0x40)
+		if (mtk_wed_is_v1(dev->hw))
+			val = FIELD_GET(MTK_WED_TX_BM_INTF_TKFIFO_FDEP,
+					wed_r32(dev, MTK_WED_TX_BM_INTF));
+		else
+			val = FIELD_GET(MTK_WED_TX_TKID_INTF_TKFIFO_FDEP,
+					wed_r32(dev, MTK_WED_TX_TKID_INTF));
+		if (val == 0x40)
 			break;
 	}
 
@@ -1210,6 +1827,8 @@ mtk_wed_reset_dma(struct mtk_wed_device *dev)
 		mtk_wed_reset(dev, MTK_WED_RESET_WPDMA_INT_AGENT);
 		mtk_wed_reset(dev, MTK_WED_RESET_WPDMA_TX_DRV);
 		mtk_wed_reset(dev, MTK_WED_RESET_WPDMA_RX_DRV);
+		if (mtk_wed_is_v3_or_greater(dev->hw))
+			wed_w32(dev, MTK_WED_RX1_CTRL2, 0);
 	} else {
 		wed_w32(dev, MTK_WED_WPDMA_RESET_IDX,
 			MTK_WED_WPDMA_RESET_IDX_TX |
@@ -1218,7 +1837,7 @@ mtk_wed_reset_dma(struct mtk_wed_device *dev)
 	}
 
 	dev->init_done = false;
-	if (dev->hw->version == 1)
+	if (mtk_wed_is_v1(dev->hw))
 		return;
 
 	if (!busy) {
@@ -1226,7 +1845,14 @@ mtk_wed_reset_dma(struct mtk_wed_device *dev)
 		wed_w32(dev, MTK_WED_RESET_IDX, 0);
 	}
 
-	mtk_wed_rx_reset(dev);
+	if (mtk_wed_is_v3_or_greater(dev->hw)) {
+		/* reset amsdu engine */
+		wed_clr(dev, MTK_WED_CTRL, MTK_WED_CTRL_TX_AMSDU_EN);
+		mtk_wed_reset(dev, MTK_WED_RESET_TX_AMSDU);
+	}
+
+	if (mtk_wed_get_rx_capa(dev))
+		mtk_wed_rx_reset(dev);
 }
 
 static int
@@ -1249,7 +1875,6 @@ static int
 mtk_wed_wdma_rx_ring_setup(struct mtk_wed_device *dev, int idx, int size,
 			   bool reset)
 {
-	u32 desc_size = sizeof(struct mtk_wdma_desc) * dev->hw->version;
 	struct mtk_wed_ring *wdma;
 
 	if (idx >= ARRAY_SIZE(dev->rx_wdma))
@@ -1257,7 +1882,7 @@ mtk_wed_wdma_rx_ring_setup(struct mtk_wed_device *dev, int idx, int size,
 
 	wdma = &dev->rx_wdma[idx];
 	if (!reset && mtk_wed_ring_alloc(dev, wdma, MTK_WED_WDMA_RING_SIZE,
-					 desc_size, true))
+					 dev->hw->soc->wdma_desc_size, true))
 		return -ENOMEM;
 
 	wdma_w32(dev, MTK_WDMA_RING_RX(idx) + MTK_WED_RING_OFS_BASE,
@@ -1278,7 +1903,6 @@ static int
 mtk_wed_wdma_tx_ring_setup(struct mtk_wed_device *dev, int idx, int size,
 			   bool reset)
 {
-	u32 desc_size = sizeof(struct mtk_wdma_desc) * dev->hw->version;
 	struct mtk_wed_ring *wdma;
 
 	if (idx >= ARRAY_SIZE(dev->tx_wdma))
@@ -1286,9 +1910,27 @@ mtk_wed_wdma_tx_ring_setup(struct mtk_wed_device *dev, int idx, int size,
 
 	wdma = &dev->tx_wdma[idx];
 	if (!reset && mtk_wed_ring_alloc(dev, wdma, MTK_WED_WDMA_RING_SIZE,
-					 desc_size, true))
+					 dev->hw->soc->wdma_desc_size, true))
 		return -ENOMEM;
 
+	if (mtk_wed_is_v3_or_greater(dev->hw)) {
+		struct mtk_wdma_desc *desc = wdma->desc;
+		int i;
+
+		for (i = 0; i < MTK_WED_WDMA_RING_SIZE; i++) {
+			desc->buf0 = 0;
+			desc->ctrl = cpu_to_le32(MTK_WDMA_DESC_CTRL_DMA_DONE);
+			desc->buf1 = 0;
+			desc->info = cpu_to_le32(MTK_WDMA_TXD0_DESC_INFO_DMA_DONE);
+			desc++;
+			desc->buf0 = 0;
+			desc->ctrl = cpu_to_le32(MTK_WDMA_DESC_CTRL_DMA_DONE);
+			desc->buf1 = 0;
+			desc->info = cpu_to_le32(MTK_WDMA_TXD1_DESC_INFO_DMA_DONE);
+			desc++;
+		}
+	}
+
 	wdma_w32(dev, MTK_WDMA_RING_TX(idx) + MTK_WED_RING_OFS_BASE,
 		 wdma->desc_phys);
 	wdma_w32(dev, MTK_WDMA_RING_TX(idx) + MTK_WED_RING_OFS_COUNT,
@@ -1344,7 +1986,7 @@ mtk_wed_configure_irq(struct mtk_wed_device *dev, u32 irq_mask)
 		MTK_WED_CTRL_WED_TX_BM_EN |
 		MTK_WED_CTRL_WED_TX_FREE_AGENT_EN);
 
-	if (dev->hw->version == 1) {
+	if (mtk_wed_is_v1(dev->hw)) {
 		wed_w32(dev, MTK_WED_PCIE_INT_TRIGGER,
 			MTK_WED_PCIE_INT_TRIGGER_STATUS);
 
@@ -1354,8 +1996,9 @@ mtk_wed_configure_irq(struct mtk_wed_device *dev, u32 irq_mask)
 
 		wed_clr(dev, MTK_WED_WDMA_INT_CTRL, wdma_mask);
 	} else {
-		wdma_mask |= FIELD_PREP(MTK_WDMA_INT_MASK_TX_DONE,
-					GENMASK(1, 0));
+		if (mtk_wed_is_v3_or_greater(dev->hw))
+			wed_set(dev, MTK_WED_CTRL, MTK_WED_CTRL_TX_TKID_ALI_EN);
+
 		/* initail tx interrupt trigger */
 		wed_w32(dev, MTK_WED_WPDMA_INT_CTRL_TX,
 			MTK_WED_WPDMA_INT_CTRL_TX0_DONE_EN |
@@ -1374,15 +2017,20 @@ mtk_wed_configure_irq(struct mtk_wed_device *dev, u32 irq_mask)
 			FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_TX_FREE_DONE_TRIG,
 				   dev->wlan.txfree_tbit));
 
-		wed_w32(dev, MTK_WED_WPDMA_INT_CTRL_RX,
-			MTK_WED_WPDMA_INT_CTRL_RX0_EN |
-			MTK_WED_WPDMA_INT_CTRL_RX0_CLR |
-			MTK_WED_WPDMA_INT_CTRL_RX1_EN |
-			MTK_WED_WPDMA_INT_CTRL_RX1_CLR |
-			FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_RX0_DONE_TRIG,
-				   dev->wlan.rx_tbit[0]) |
-			FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_RX1_DONE_TRIG,
-				   dev->wlan.rx_tbit[1]));
+		if (mtk_wed_get_rx_capa(dev)) {
+			wed_w32(dev, MTK_WED_WPDMA_INT_CTRL_RX,
+				MTK_WED_WPDMA_INT_CTRL_RX0_EN |
+				MTK_WED_WPDMA_INT_CTRL_RX0_CLR |
+				MTK_WED_WPDMA_INT_CTRL_RX1_EN |
+				MTK_WED_WPDMA_INT_CTRL_RX1_CLR |
+				FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_RX0_DONE_TRIG,
+					   dev->wlan.rx_tbit[0]) |
+				FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_RX1_DONE_TRIG,
+					   dev->wlan.rx_tbit[1]));
+
+			wdma_mask |= FIELD_PREP(MTK_WDMA_INT_MASK_TX_DONE,
+						GENMASK(1, 0));
+		}
 
 		wed_w32(dev, MTK_WED_WDMA_INT_CLR, wdma_mask);
 		wed_set(dev, MTK_WED_WDMA_INT_CTRL,
@@ -1398,55 +2046,280 @@ mtk_wed_configure_irq(struct mtk_wed_device *dev, u32 irq_mask)
 	wed_w32(dev, MTK_WED_INT_MASK, irq_mask);
 }
 
+#define MTK_WFMDA_RX_DMA_EN	BIT(2)
 static void
 mtk_wed_dma_enable(struct mtk_wed_device *dev)
 {
-	wed_set(dev, MTK_WED_WPDMA_INT_CTRL, MTK_WED_WPDMA_INT_CTRL_SUBRT_ADV);
+	int i;
+
+	if (!mtk_wed_is_v3_or_greater(dev->hw)) {
+		wed_set(dev, MTK_WED_WPDMA_INT_CTRL,
+			MTK_WED_WPDMA_INT_CTRL_SUBRT_ADV);
+		wed_set(dev, MTK_WED_WPDMA_GLO_CFG,
+			MTK_WED_WPDMA_GLO_CFG_TX_DRV_EN |
+			MTK_WED_WPDMA_GLO_CFG_RX_DRV_EN);
+		wdma_set(dev, MTK_WDMA_GLO_CFG,
+			 MTK_WDMA_GLO_CFG_TX_DMA_EN |
+			 MTK_WDMA_GLO_CFG_RX_INFO1_PRERES |
+			 MTK_WDMA_GLO_CFG_RX_INFO2_PRERES);
+		wed_set(dev, MTK_WED_WPDMA_CTRL, MTK_WED_WPDMA_CTRL_SDL1_FIXED);
+	} else {
+		wed_set(dev, MTK_WED_WPDMA_GLO_CFG,
+			MTK_WED_WPDMA_GLO_CFG_TX_DRV_EN |
+			MTK_WED_WPDMA_GLO_CFG_RX_DRV_EN |
+			MTK_WED_WPDMA_GLO_CFG_RX_DDONE2_WR);
+		wdma_set(dev, MTK_WDMA_GLO_CFG, MTK_WDMA_GLO_CFG_TX_DMA_EN);
+	}
 
 	wed_set(dev, MTK_WED_GLO_CFG,
 		MTK_WED_GLO_CFG_TX_DMA_EN |
 		MTK_WED_GLO_CFG_RX_DMA_EN);
-	wed_set(dev, MTK_WED_WPDMA_GLO_CFG,
-		MTK_WED_WPDMA_GLO_CFG_TX_DRV_EN |
-		MTK_WED_WPDMA_GLO_CFG_RX_DRV_EN);
+
 	wed_set(dev, MTK_WED_WDMA_GLO_CFG,
 		MTK_WED_WDMA_GLO_CFG_RX_DRV_EN);
 
-	wdma_set(dev, MTK_WDMA_GLO_CFG,
-		 MTK_WDMA_GLO_CFG_TX_DMA_EN |
-		 MTK_WDMA_GLO_CFG_RX_INFO1_PRERES |
-		 MTK_WDMA_GLO_CFG_RX_INFO2_PRERES);
-
-	if (dev->hw->version == 1) {
+	if (mtk_wed_is_v1(dev->hw)) {
 		wdma_set(dev, MTK_WDMA_GLO_CFG,
 			 MTK_WDMA_GLO_CFG_RX_INFO3_PRERES);
-	} else {
-		int i;
+		return;
+	}
 
-		wed_set(dev, MTK_WED_WPDMA_CTRL,
-			MTK_WED_WPDMA_CTRL_SDL1_FIXED);
+	wed_set(dev, MTK_WED_WPDMA_GLO_CFG,
+		MTK_WED_WPDMA_GLO_CFG_RX_DRV_R0_PKT_PROC |
+		MTK_WED_WPDMA_GLO_CFG_RX_DRV_R0_CRX_SYNC);
 
-		wed_set(dev, MTK_WED_WDMA_GLO_CFG,
-			MTK_WED_WDMA_GLO_CFG_TX_DRV_EN |
-			MTK_WED_WDMA_GLO_CFG_TX_DDONE_CHK);
+	if (mtk_wed_is_v3_or_greater(dev->hw)) {
+		wed_set(dev, MTK_WED_WDMA_RX_PREF_CFG,
+			FIELD_PREP(MTK_WED_WDMA_RX_PREF_BURST_SIZE, 0x10) |
+			FIELD_PREP(MTK_WED_WDMA_RX_PREF_LOW_THRES, 0x8));
+		wed_clr(dev, MTK_WED_WDMA_RX_PREF_CFG,
+			MTK_WED_WDMA_RX_PREF_DDONE2_EN);
+		wed_set(dev, MTK_WED_WDMA_RX_PREF_CFG, MTK_WED_WDMA_RX_PREF_EN);
 
+		wed_clr(dev, MTK_WED_WPDMA_GLO_CFG,
+			MTK_WED_WPDMA_GLO_CFG_TX_DDONE_CHK_LAST);
 		wed_set(dev, MTK_WED_WPDMA_GLO_CFG,
-			MTK_WED_WPDMA_GLO_CFG_RX_DRV_R0_PKT_PROC |
-			MTK_WED_WPDMA_GLO_CFG_RX_DRV_R0_CRX_SYNC);
+			MTK_WED_WPDMA_GLO_CFG_TX_DDONE_CHK |
+			MTK_WED_WPDMA_GLO_CFG_RX_DRV_EVENT_PKT_FMT_CHK |
+			MTK_WED_WPDMA_GLO_CFG_RX_DRV_UNS_VER_FORCE_4);
 
-		wed_clr(dev, MTK_WED_WPDMA_GLO_CFG,
-			MTK_WED_WPDMA_GLO_CFG_TX_TKID_KEEP |
-			MTK_WED_WPDMA_GLO_CFG_TX_DMAD_DW3_PREV);
+		wdma_set(dev, MTK_WDMA_PREF_RX_CFG, MTK_WDMA_PREF_RX_CFG_PREF_EN);
+		wdma_set(dev, MTK_WDMA_WRBK_RX_CFG, MTK_WDMA_WRBK_RX_CFG_WRBK_EN);
+	}
 
-		wed_set(dev, MTK_WED_WPDMA_RX_D_GLO_CFG,
-			MTK_WED_WPDMA_RX_D_RX_DRV_EN |
-			FIELD_PREP(MTK_WED_WPDMA_RX_D_RXD_READ_LEN, 0x18) |
-			FIELD_PREP(MTK_WED_WPDMA_RX_D_INIT_PHASE_RXEN_SEL,
-				   0x2));
+	wed_clr(dev, MTK_WED_WPDMA_GLO_CFG,
+		MTK_WED_WPDMA_GLO_CFG_TX_TKID_KEEP |
+		MTK_WED_WPDMA_GLO_CFG_TX_DMAD_DW3_PREV);
+
+	if (!mtk_wed_get_rx_capa(dev))
+		return;
+
+	wed_set(dev, MTK_WED_WDMA_GLO_CFG,
+		MTK_WED_WDMA_GLO_CFG_TX_DRV_EN |
+		MTK_WED_WDMA_GLO_CFG_TX_DDONE_CHK);
+
+	wed_clr(dev, MTK_WED_WPDMA_RX_D_GLO_CFG, MTK_WED_WPDMA_RX_D_RXD_READ_LEN);
+	wed_set(dev, MTK_WED_WPDMA_RX_D_GLO_CFG,
+		MTK_WED_WPDMA_RX_D_RX_DRV_EN |
+		FIELD_PREP(MTK_WED_WPDMA_RX_D_RXD_READ_LEN, 0x18) |
+		FIELD_PREP(MTK_WED_WPDMA_RX_D_INIT_PHASE_RXEN_SEL, 0x2));
+
+	if (mtk_wed_is_v3_or_greater(dev->hw)) {
+		wed_set(dev, MTK_WED_WPDMA_RX_D_PREF_CFG,
+			MTK_WED_WPDMA_RX_D_PREF_EN |
+			FIELD_PREP(MTK_WED_WPDMA_RX_D_PREF_BURST_SIZE, 0x10) |
+			FIELD_PREP(MTK_WED_WPDMA_RX_D_PREF_LOW_THRES, 0x8));
+
+		wed_set(dev, MTK_WED_RRO_RX_D_CFG(2), MTK_WED_RRO_RX_D_DRV_EN);
+		wdma_set(dev, MTK_WDMA_PREF_TX_CFG, MTK_WDMA_PREF_TX_CFG_PREF_EN);
+		wdma_set(dev, MTK_WDMA_WRBK_TX_CFG, MTK_WDMA_WRBK_TX_CFG_WRBK_EN);
+	}
+
+	for (i = 0; i < MTK_WED_RX_QUEUES; i++) {
+		struct mtk_wed_ring *ring = &dev->rx_ring[i];
+		u32 val;
+
+		if (!(ring->flags & MTK_WED_RING_CONFIGURED))
+			continue; /* queue is not configured by mt76 */
+
+		if (mtk_wed_check_wfdma_rx_fill(dev, ring)) {
+			dev_err(dev->hw->dev,
+				"rx_ring(%d) dma enable failed\n", i);
+			continue;
+		}
+
+		val = wifi_r32(dev,
+			       dev->wlan.wpdma_rx_glo -
+			       dev->wlan.phy_base) | MTK_WFMDA_RX_DMA_EN;
+		wifi_w32(dev,
+			 dev->wlan.wpdma_rx_glo - dev->wlan.phy_base,
+			 val);
+	}
+}
+
+static void
+mtk_wed_start_hw_rro(struct mtk_wed_device *dev, u32 irq_mask, bool reset)
+{
+	int i;
+
+	wed_w32(dev, MTK_WED_WPDMA_INT_MASK, irq_mask);
+	wed_w32(dev, MTK_WED_INT_MASK, irq_mask);
+
+	if (!mtk_wed_get_rx_capa(dev) || !dev->wlan.hw_rro)
+		return;
+
+	if (reset) {
+		wed_set(dev, MTK_WED_RRO_MSDU_PG_RING2_CFG,
+			MTK_WED_RRO_MSDU_PG_DRV_EN);
+		return;
+	}
+
+	wed_set(dev, MTK_WED_RRO_RX_D_CFG(2), MTK_WED_RRO_MSDU_PG_DRV_CLR);
+	wed_w32(dev, MTK_WED_RRO_MSDU_PG_RING2_CFG,
+		MTK_WED_RRO_MSDU_PG_DRV_CLR);
+
+	wed_w32(dev, MTK_WED_WPDMA_INT_CTRL_RRO_RX,
+		MTK_WED_WPDMA_INT_CTRL_RRO_RX0_EN |
+		MTK_WED_WPDMA_INT_CTRL_RRO_RX0_CLR |
+		MTK_WED_WPDMA_INT_CTRL_RRO_RX1_EN |
+		MTK_WED_WPDMA_INT_CTRL_RRO_RX1_CLR |
+		FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_RRO_RX0_DONE_TRIG,
+			   dev->wlan.rro_rx_tbit[0]) |
+		FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_RRO_RX1_DONE_TRIG,
+			   dev->wlan.rro_rx_tbit[1]));
+
+	wed_w32(dev, MTK_WED_WPDMA_INT_CTRL_RRO_MSDU_PG,
+		MTK_WED_WPDMA_INT_CTRL_RRO_PG0_EN |
+		MTK_WED_WPDMA_INT_CTRL_RRO_PG0_CLR |
+		MTK_WED_WPDMA_INT_CTRL_RRO_PG1_EN |
+		MTK_WED_WPDMA_INT_CTRL_RRO_PG1_CLR |
+		MTK_WED_WPDMA_INT_CTRL_RRO_PG2_EN |
+		MTK_WED_WPDMA_INT_CTRL_RRO_PG2_CLR |
+		FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_RRO_PG0_DONE_TRIG,
+			   dev->wlan.rx_pg_tbit[0]) |
+		FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_RRO_PG1_DONE_TRIG,
+			   dev->wlan.rx_pg_tbit[1]) |
+		FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_RRO_PG2_DONE_TRIG,
+			   dev->wlan.rx_pg_tbit[2]));
+
+	/* RRO_MSDU_PG_RING2_CFG1_FLD_DRV_EN should be enabled after
+	 * WM FWDL completed, otherwise RRO_MSDU_PG ring may broken
+	 */
+	wed_set(dev, MTK_WED_RRO_MSDU_PG_RING2_CFG,
+		MTK_WED_RRO_MSDU_PG_DRV_EN);
+
+	for (i = 0; i < MTK_WED_RX_QUEUES; i++) {
+		struct mtk_wed_ring *ring = &dev->rx_rro_ring[i];
+
+		if (!(ring->flags & MTK_WED_RING_CONFIGURED))
+			continue;
+
+		if (mtk_wed_check_wfdma_rx_fill(dev, ring))
+			dev_err(dev->hw->dev,
+				"rx_rro_ring(%d) initialization failed\n", i);
+	}
+
+	for (i = 0; i < MTK_WED_RX_PAGE_QUEUES; i++) {
+		struct mtk_wed_ring *ring = &dev->rx_page_ring[i];
+
+		if (!(ring->flags & MTK_WED_RING_CONFIGURED))
+			continue;
+
+		if (mtk_wed_check_wfdma_rx_fill(dev, ring))
+			dev_err(dev->hw->dev,
+				"rx_page_ring(%d) initialization failed\n", i);
+	}
+}
+
+static void
+mtk_wed_rro_rx_ring_setup(struct mtk_wed_device *dev, int idx,
+			  void __iomem *regs)
+{
+	struct mtk_wed_ring *ring = &dev->rx_rro_ring[idx];
+
+	ring->wpdma = regs;
+	wed_w32(dev, MTK_WED_RRO_RX_D_RX(idx) + MTK_WED_RING_OFS_BASE,
+		readl(regs));
+	wed_w32(dev, MTK_WED_RRO_RX_D_RX(idx) + MTK_WED_RING_OFS_COUNT,
+		readl(regs + MTK_WED_RING_OFS_COUNT));
+	ring->flags |= MTK_WED_RING_CONFIGURED;
+}
+
+static void
+mtk_wed_msdu_pg_rx_ring_setup(struct mtk_wed_device *dev, int idx, void __iomem *regs)
+{
+	struct mtk_wed_ring *ring = &dev->rx_page_ring[idx];
+
+	ring->wpdma = regs;
+	wed_w32(dev, MTK_WED_RRO_MSDU_PG_CTRL0(idx) + MTK_WED_RING_OFS_BASE,
+		readl(regs));
+	wed_w32(dev, MTK_WED_RRO_MSDU_PG_CTRL0(idx) + MTK_WED_RING_OFS_COUNT,
+		readl(regs + MTK_WED_RING_OFS_COUNT));
+	ring->flags |= MTK_WED_RING_CONFIGURED;
+}
+
+static int
+mtk_wed_ind_rx_ring_setup(struct mtk_wed_device *dev, void __iomem *regs)
+{
+	struct mtk_wed_ring *ring = &dev->ind_cmd_ring;
+	u32 val = readl(regs + MTK_WED_RING_OFS_COUNT);
+	int i, count = 0;
 
-		for (i = 0; i < MTK_WED_RX_QUEUES; i++)
-			mtk_wed_check_wfdma_rx_fill(dev, i);
+	ring->wpdma = regs;
+	wed_w32(dev, MTK_WED_IND_CMD_RX_CTRL1 + MTK_WED_RING_OFS_BASE,
+		readl(regs) & 0xfffffff0);
+
+	wed_w32(dev, MTK_WED_IND_CMD_RX_CTRL1 + MTK_WED_RING_OFS_COUNT,
+		readl(regs + MTK_WED_RING_OFS_COUNT));
+
+	/* ack sn cr */
+	wed_w32(dev, MTK_WED_RRO_CFG0, dev->wlan.phy_base +
+		dev->wlan.ind_cmd.ack_sn_addr);
+	wed_w32(dev, MTK_WED_RRO_CFG1,
+		FIELD_PREP(MTK_WED_RRO_CFG1_MAX_WIN_SZ,
+			   dev->wlan.ind_cmd.win_size) |
+		FIELD_PREP(MTK_WED_RRO_CFG1_PARTICL_SE_ID,
+			   dev->wlan.ind_cmd.particular_sid));
+
+	/* particular session addr element */
+	wed_w32(dev, MTK_WED_ADDR_ELEM_CFG0,
+		dev->wlan.ind_cmd.particular_se_phys);
+
+	for (i = 0; i < dev->wlan.ind_cmd.se_group_nums; i++) {
+		wed_w32(dev, MTK_WED_RADDR_ELEM_TBL_WDATA,
+			dev->wlan.ind_cmd.addr_elem_phys[i] >> 4);
+		wed_w32(dev, MTK_WED_ADDR_ELEM_TBL_CFG,
+			MTK_WED_ADDR_ELEM_TBL_WR | (i & 0x7f));
+
+		val = wed_r32(dev, MTK_WED_ADDR_ELEM_TBL_CFG);
+		while (!(val & MTK_WED_ADDR_ELEM_TBL_WR_RDY) && count++ < 100)
+			val = wed_r32(dev, MTK_WED_ADDR_ELEM_TBL_CFG);
+		if (count >= 100)
+			dev_err(dev->hw->dev,
+				"write ba session base failed\n");
+	}
+
+	/* pn check init */
+	for (i = 0; i < dev->wlan.ind_cmd.particular_sid; i++) {
+		wed_w32(dev, MTK_WED_PN_CHECK_WDATA_M,
+			MTK_WED_PN_CHECK_IS_FIRST);
+
+		wed_w32(dev, MTK_WED_PN_CHECK_CFG, MTK_WED_PN_CHECK_WR |
+			FIELD_PREP(MTK_WED_PN_CHECK_SE_ID, i));
+
+		count = 0;
+		val = wed_r32(dev, MTK_WED_PN_CHECK_CFG);
+		while (!(val & MTK_WED_PN_CHECK_WR_RDY) && count++ < 100)
+			val = wed_r32(dev, MTK_WED_PN_CHECK_CFG);
+		if (count >= 100)
+			dev_err(dev->hw->dev,
+				"session(%d) initialization failed\n", i);
 	}
+
+	wed_w32(dev, MTK_WED_RX_IND_CMD_CNT0, MTK_WED_RX_IND_CMD_DBG_CNT_EN);
+	wed_set(dev, MTK_WED_CTRL, MTK_WED_CTRL_WED_RX_IND_CMD_EN);
+
+	return 0;
 }
 
 static void
@@ -1466,14 +2339,14 @@ mtk_wed_start(struct mtk_wed_device *dev, u32 irq_mask)
 
 	mtk_wed_set_ext_int(dev, true);
 
-	if (dev->hw->version == 1) {
+	if (mtk_wed_is_v1(dev->hw)) {
 		u32 val = dev->wlan.wpdma_phys | MTK_PCIE_MIRROR_MAP_EN |
 			  FIELD_PREP(MTK_PCIE_MIRROR_MAP_WED_ID,
 				     dev->hw->index);
 
 		val |= BIT(0) | (BIT(1) * !!dev->hw->index);
 		regmap_write(dev->hw->mirror, dev->hw->index * 4, val);
-	} else {
+	} else if (mtk_wed_get_rx_capa(dev)) {
 		/* driver set mid ready and only once */
 		wed_w32(dev, MTK_WED_EXT_INT_MASK1,
 			MTK_WED_EXT_INT_STATUS_WPDMA_MID_RDY);
@@ -1483,12 +2356,18 @@ mtk_wed_start(struct mtk_wed_device *dev, u32 irq_mask)
 		wed_r32(dev, MTK_WED_EXT_INT_MASK1);
 		wed_r32(dev, MTK_WED_EXT_INT_MASK2);
 
+		if (mtk_wed_is_v3_or_greater(dev->hw)) {
+			wed_w32(dev, MTK_WED_EXT_INT_MASK3,
+				MTK_WED_EXT_INT_STATUS_WPDMA_MID_RDY);
+			wed_r32(dev, MTK_WED_EXT_INT_MASK3);
+		}
+
 		if (mtk_wed_rro_cfg(dev))
 			return;
-
 	}
 
 	mtk_wed_set_512_support(dev, dev->wlan.wcid_512);
+	mtk_wed_amsdu_init(dev);
 
 	mtk_wed_dma_enable(dev);
 	dev->running = true;
@@ -1535,6 +2414,7 @@ mtk_wed_attach(struct mtk_wed_device *dev)
 	dev->irq = hw->irq;
 	dev->wdma_idx = hw->index;
 	dev->version = hw->version;
+	dev->hw->pcie_base = mtk_wed_get_pcie_base(dev);
 
 	if (hw->eth->dma_dev == hw->eth->dev &&
 	    of_dma_is_coherent(hw->eth->dev->of_node))
@@ -1544,6 +2424,10 @@ mtk_wed_attach(struct mtk_wed_device *dev)
 	if (ret)
 		goto out;
 
+	ret = mtk_wed_amsdu_buffer_alloc(dev);
+	if (ret)
+		goto out;
+
 	if (mtk_wed_get_rx_capa(dev)) {
 		ret = mtk_wed_rro_alloc(dev);
 		if (ret)
@@ -1551,13 +2435,14 @@ mtk_wed_attach(struct mtk_wed_device *dev)
 	}
 
 	mtk_wed_hw_init_early(dev);
-	if (hw->version == 1) {
+	if (mtk_wed_is_v1(hw))
 		regmap_update_bits(hw->hifsys, HIFSYS_DMA_AG_MAP,
 				   BIT(hw->index), 0);
-	} else {
+	else
 		dev->rev_id = wed_r32(dev, MTK_WED_REV_ID);
+
+	if (mtk_wed_get_rx_capa(dev))
 		ret = mtk_wed_wo_init(hw);
-	}
 out:
 	if (ret) {
 		dev_err(dev->hw->dev, "failed to attach wed device\n");
@@ -1601,6 +2486,23 @@ mtk_wed_tx_ring_setup(struct mtk_wed_device *dev, int idx, void __iomem *regs,
 	ring->reg_base = MTK_WED_RING_TX(idx);
 	ring->wpdma = regs;
 
+	if (mtk_wed_is_v3_or_greater(dev->hw) && idx == 1) {
+		/* reset prefetch index */
+		wed_set(dev, MTK_WED_WDMA_RX_PREF_CFG,
+			MTK_WED_WDMA_RX_PREF_RX0_SIDX_CLR |
+			MTK_WED_WDMA_RX_PREF_RX1_SIDX_CLR);
+
+		wed_clr(dev, MTK_WED_WDMA_RX_PREF_CFG,
+			MTK_WED_WDMA_RX_PREF_RX0_SIDX_CLR |
+			MTK_WED_WDMA_RX_PREF_RX1_SIDX_CLR);
+
+		/* reset prefetch FIFO */
+		wed_w32(dev, MTK_WED_WDMA_RX_PREF_FIFO_CFG,
+			MTK_WED_WDMA_RX_PREF_FIFO_RX0_CLR |
+			MTK_WED_WDMA_RX_PREF_FIFO_RX1_CLR);
+		wed_w32(dev, MTK_WED_WDMA_RX_PREF_FIFO_CFG, 0);
+	}
+
 	/* WED -> WPDMA */
 	wpdma_tx_w32(dev, idx, MTK_WED_RING_OFS_BASE, ring->desc_phys);
 	wpdma_tx_w32(dev, idx, MTK_WED_RING_OFS_COUNT, MTK_WED_TX_RING_SIZE);
@@ -1619,7 +2521,7 @@ static int
 mtk_wed_txfree_ring_setup(struct mtk_wed_device *dev, void __iomem *regs)
 {
 	struct mtk_wed_ring *ring = &dev->txfree_ring;
-	int i, index = dev->hw->version == 1;
+	int i, index = mtk_wed_is_v1(dev->hw);
 
 	/*
 	 * For txfree event handling, the same DMA ring is shared between WED
@@ -1675,15 +2577,13 @@ mtk_wed_rx_ring_setup(struct mtk_wed_device *dev, int idx, void __iomem *regs,
 static u32
 mtk_wed_irq_get(struct mtk_wed_device *dev, u32 mask)
 {
-	u32 val, ext_mask = MTK_WED_EXT_INT_STATUS_ERROR_MASK;
+	u32 val, ext_mask;
 
-	if (dev->hw->version == 1)
-		ext_mask |= MTK_WED_EXT_INT_STATUS_TX_DRV_R_RESP_ERR;
+	if (mtk_wed_is_v3_or_greater(dev->hw))
+		ext_mask = MTK_WED_EXT_INT_STATUS_RX_DRV_COHERENT |
+			   MTK_WED_EXT_INT_STATUS_TKID_WO_PYLD;
 	else
-		ext_mask |= MTK_WED_EXT_INT_STATUS_RX_FBUF_LO_TH |
-			    MTK_WED_EXT_INT_STATUS_RX_FBUF_HI_TH |
-			    MTK_WED_EXT_INT_STATUS_RX_DRV_COHERENT |
-			    MTK_WED_EXT_INT_STATUS_TX_DMA_W_RESP_ERR;
+		ext_mask = MTK_WED_EXT_INT_STATUS_ERROR_MASK;
 
 	val = wed_r32(dev, MTK_WED_EXT_INT_STATUS);
 	wed_w32(dev, MTK_WED_EXT_INT_STATUS, val);
@@ -1713,19 +2613,20 @@ mtk_wed_irq_set_mask(struct mtk_wed_device *dev, u32 mask)
 int mtk_wed_flow_add(int index)
 {
 	struct mtk_wed_hw *hw = hw_list[index];
-	int ret;
+	int ret = 0;
 
-	if (!hw || !hw->wed_dev)
-		return -ENODEV;
+	mutex_lock(&hw_lock);
 
-	if (hw->num_flows) {
-		hw->num_flows++;
-		return 0;
+	if (!hw || !hw->wed_dev) {
+		ret = -ENODEV;
+		goto out;
 	}
 
-	mutex_lock(&hw_lock);
-	if (!hw->wed_dev) {
-		ret = -ENODEV;
+	if (!hw->wed_dev->wlan.offload_enable)
+		goto out;
+
+	if (hw->num_flows) {
+		hw->num_flows++;
 		goto out;
 	}
 
@@ -1744,14 +2645,15 @@ void mtk_wed_flow_remove(int index)
 {
 	struct mtk_wed_hw *hw = hw_list[index];
 
-	if (!hw)
-		return;
+	mutex_lock(&hw_lock);
 
-	if (--hw->num_flows)
-		return;
+	if (!hw || !hw->wed_dev)
+		goto out;
 
-	mutex_lock(&hw_lock);
-	if (!hw->wed_dev)
+	if (!hw->wed_dev->wlan.offload_disable)
+		goto out;
+
+	if (--hw->num_flows)
 		goto out;
 
 	hw->wed_dev->wlan.offload_disable(hw->wed_dev);
@@ -1842,7 +2744,7 @@ mtk_wed_setup_tc(struct mtk_wed_device *wed, struct net_device *dev,
 {
 	struct mtk_wed_hw *hw = wed->hw;
 
-	if (hw->version < 2)
+	if (mtk_wed_is_v1(hw))
 		return -EOPNOTSUPP;
 
 	switch (type) {
@@ -1874,6 +2776,10 @@ void mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth,
 		.detach = mtk_wed_detach,
 		.ppe_check = mtk_wed_ppe_check,
 		.setup_tc = mtk_wed_setup_tc,
+		.start_hw_rro = mtk_wed_start_hw_rro,
+		.rro_rx_ring_setup = mtk_wed_rro_rx_ring_setup,
+		.msdu_pg_rx_ring_setup = mtk_wed_msdu_pg_rx_ring_setup,
+		.ind_rx_ring_setup = mtk_wed_ind_rx_ring_setup,
 	};
 	struct device_node *eth_np = eth->dev->of_node;
 	struct platform_device *pdev;
@@ -1916,9 +2822,17 @@ void mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth,
 	hw->wdma = wdma;
 	hw->index = index;
 	hw->irq = irq;
-	hw->version = mtk_is_netsys_v1(eth) ? 1 : 2;
+	hw->version = eth->soc->version;
 
-	if (hw->version == 1) {
+	switch (hw->version) {
+	case 2:
+		hw->soc = &mt7986_data;
+		break;
+	case 3:
+		hw->soc = &mt7988_data;
+		break;
+	default:
+	case 1:
 		hw->mirror = syscon_regmap_lookup_by_phandle(eth_np,
 				"mediatek,pcie-mirror");
 		hw->hifsys = syscon_regmap_lookup_by_phandle(eth_np,
@@ -1932,6 +2846,8 @@ void mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth,
 			regmap_write(hw->mirror, 0, 0);
 			regmap_write(hw->mirror, 4, 0);
 		}
+		hw->soc = &mt7622_data;
+		break;
 	}
 
 	mtk_wed_hw_add_debugfs(hw);
diff --git a/drivers/net/ethernet/mediatek/mtk_wed.h b/drivers/net/ethernet/mediatek/mtk_wed.h
index 43ab77eaf683..c1f0479d7a71 100644
--- a/drivers/net/ethernet/mediatek/mtk_wed.h
+++ b/drivers/net/ethernet/mediatek/mtk_wed.h
@@ -9,10 +9,29 @@
 #include <linux/regmap.h>
 #include <linux/netdevice.h>
 
+#include "mtk_wed_regs.h"
+
 struct mtk_eth;
 struct mtk_wed_wo;
 
+struct mtk_wed_soc_data {
+	struct {
+		u32 tx_bm_tkid;
+		u32 wpdma_rx_ring0;
+		u32 reset_idx_tx_mask;
+		u32 reset_idx_rx_mask;
+	} regmap;
+	u32 tx_ring_desc_size;
+	u32 wdma_desc_size;
+};
+
+struct mtk_wed_amsdu {
+	void *txd;
+	dma_addr_t txd_phy;
+};
+
 struct mtk_wed_hw {
+	const struct mtk_wed_soc_data *soc;
 	struct device_node *node;
 	struct mtk_eth *eth;
 	struct regmap *regs;
@@ -24,6 +43,8 @@ struct mtk_wed_hw {
 	struct dentry *debugfs_dir;
 	struct mtk_wed_device *wed_dev;
 	struct mtk_wed_wo *wed_wo;
+	struct mtk_wed_amsdu *wed_amsdu;
+	u32 pcie_base;
 	u32 debugfs_reg;
 	u32 num_flows;
 	u8 version;
@@ -37,9 +58,30 @@ struct mtk_wdma_info {
 	u8 queue;
 	u16 wcid;
 	u8 bss;
+	u8 amsdu;
 };
 
 #ifdef CONFIG_NET_MEDIATEK_SOC_WED
+static inline bool mtk_wed_is_v1(struct mtk_wed_hw *hw)
+{
+	return hw->version == 1;
+}
+
+static inline bool mtk_wed_is_v2(struct mtk_wed_hw *hw)
+{
+	return hw->version == 2;
+}
+
+static inline bool mtk_wed_is_v3(struct mtk_wed_hw *hw)
+{
+	return hw->version == 3;
+}
+
+static inline bool mtk_wed_is_v3_or_greater(struct mtk_wed_hw *hw)
+{
+	return hw->version > 2;
+}
+
 static inline void
 wed_w32(struct mtk_wed_device *dev, u32 reg, u32 val)
 {
@@ -122,6 +164,21 @@ wpdma_txfree_w32(struct mtk_wed_device *dev, u32 reg, u32 val)
 	writel(val, dev->txfree_ring.wpdma + reg);
 }
 
+static inline u32 mtk_wed_get_pcie_base(struct mtk_wed_device *dev)
+{
+	if (!mtk_wed_is_v3_or_greater(dev->hw))
+		return MTK_WED_PCIE_BASE;
+
+	switch (dev->hw->index) {
+	case 1:
+		return MTK_WED_PCIE_BASE1;
+	case 2:
+		return MTK_WED_PCIE_BASE2;
+	default:
+		return MTK_WED_PCIE_BASE0;
+	}
+}
+
 void mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth,
 		    void __iomem *wdma, phys_addr_t wdma_phy,
 		    int index);
diff --git a/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c b/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c
index e24afeaea0da..781c691473e1 100644
--- a/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c
+++ b/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c
@@ -11,6 +11,7 @@ struct reg_dump {
 	u16 offset;
 	u8 type;
 	u8 base;
+	u32 mask;
 };
 
 enum {
@@ -25,6 +26,8 @@ enum {
 
 #define DUMP_STR(_str) { _str, 0, DUMP_TYPE_STRING }
 #define DUMP_REG(_reg, ...) { #_reg, MTK_##_reg, __VA_ARGS__ }
+#define DUMP_REG_MASK(_reg, _mask)	\
+	{ #_mask, MTK_##_reg, DUMP_TYPE_WED, 0, MTK_##_mask }
 #define DUMP_RING(_prefix, _base, ...)				\
 	{ _prefix " BASE", _base, __VA_ARGS__ },		\
 	{ _prefix " CNT",  _base + 0x4, __VA_ARGS__ },	\
@@ -32,6 +35,7 @@ enum {
 	{ _prefix " DIDX", _base + 0xc, __VA_ARGS__ }
 
 #define DUMP_WED(_reg) DUMP_REG(_reg, DUMP_TYPE_WED)
+#define DUMP_WED_MASK(_reg, _mask) DUMP_REG_MASK(_reg, _mask)
 #define DUMP_WED_RING(_base) DUMP_RING(#_base, MTK_##_base, DUMP_TYPE_WED)
 
 #define DUMP_WDMA(_reg) DUMP_REG(_reg, DUMP_TYPE_WDMA)
@@ -151,7 +155,7 @@ DEFINE_SHOW_ATTRIBUTE(wed_txinfo);
 static int
 wed_rxinfo_show(struct seq_file *s, void *data)
 {
-	static const struct reg_dump regs[] = {
+	static const struct reg_dump regs_common[] = {
 		DUMP_STR("WPDMA RX"),
 		DUMP_WPDMA_RX_RING(0),
 		DUMP_WPDMA_RX_RING(1),
@@ -169,7 +173,7 @@ wed_rxinfo_show(struct seq_file *s, void *data)
 		DUMP_WED_RING(WED_RING_RX_DATA(0)),
 		DUMP_WED_RING(WED_RING_RX_DATA(1)),
 
-		DUMP_STR("WED RRO"),
+		DUMP_STR("WED WO RRO"),
 		DUMP_WED_RRO_RING(WED_RROQM_MIOD_CTRL0),
 		DUMP_WED(WED_RROQM_MID_MIB),
 		DUMP_WED(WED_RROQM_MOD_MIB),
@@ -180,17 +184,6 @@ wed_rxinfo_show(struct seq_file *s, void *data)
 		DUMP_WED(WED_RROQM_FDBK_ANC_MIB),
 		DUMP_WED(WED_RROQM_FDBK_ANC2H_MIB),
 
-		DUMP_STR("WED Route QM"),
-		DUMP_WED(WED_RTQM_R2H_MIB(0)),
-		DUMP_WED(WED_RTQM_R2Q_MIB(0)),
-		DUMP_WED(WED_RTQM_Q2H_MIB(0)),
-		DUMP_WED(WED_RTQM_R2H_MIB(1)),
-		DUMP_WED(WED_RTQM_R2Q_MIB(1)),
-		DUMP_WED(WED_RTQM_Q2H_MIB(1)),
-		DUMP_WED(WED_RTQM_Q2N_MIB),
-		DUMP_WED(WED_RTQM_Q2B_MIB),
-		DUMP_WED(WED_RTQM_PFDBK_MIB),
-
 		DUMP_STR("WED WDMA TX"),
 		DUMP_WED(WED_WDMA_TX_MIB),
 		DUMP_WED_RING(WED_WDMA_RING_TX),
@@ -211,6 +204,287 @@ wed_rxinfo_show(struct seq_file *s, void *data)
 		DUMP_WED(WED_RX_BM_INTF),
 		DUMP_WED(WED_RX_BM_ERR_STS),
 	};
+	static const struct reg_dump regs_wed_v2[] = {
+		DUMP_STR("WED Route QM"),
+		DUMP_WED(WED_RTQM_R2H_MIB(0)),
+		DUMP_WED(WED_RTQM_R2Q_MIB(0)),
+		DUMP_WED(WED_RTQM_Q2H_MIB(0)),
+		DUMP_WED(WED_RTQM_R2H_MIB(1)),
+		DUMP_WED(WED_RTQM_R2Q_MIB(1)),
+		DUMP_WED(WED_RTQM_Q2H_MIB(1)),
+		DUMP_WED(WED_RTQM_Q2N_MIB),
+		DUMP_WED(WED_RTQM_Q2B_MIB),
+		DUMP_WED(WED_RTQM_PFDBK_MIB),
+	};
+	static const struct reg_dump regs_wed_v3[] = {
+		DUMP_STR("WED RX RRO DATA"),
+		DUMP_WED_RING(WED_RRO_RX_D_RX(0)),
+		DUMP_WED_RING(WED_RRO_RX_D_RX(1)),
+
+		DUMP_STR("WED RX MSDU PAGE"),
+		DUMP_WED_RING(WED_RRO_MSDU_PG_CTRL0(0)),
+		DUMP_WED_RING(WED_RRO_MSDU_PG_CTRL0(1)),
+		DUMP_WED_RING(WED_RRO_MSDU_PG_CTRL0(2)),
+
+		DUMP_STR("WED RX IND CMD"),
+		DUMP_WED(WED_IND_CMD_RX_CTRL1),
+		DUMP_WED_MASK(WED_IND_CMD_RX_CTRL2, WED_IND_CMD_MAX_CNT),
+		DUMP_WED_MASK(WED_IND_CMD_RX_CTRL0, WED_IND_CMD_PROC_IDX),
+		DUMP_WED_MASK(RRO_IND_CMD_SIGNATURE, RRO_IND_CMD_DMA_IDX),
+		DUMP_WED_MASK(WED_IND_CMD_RX_CTRL0, WED_IND_CMD_MAGIC_CNT),
+		DUMP_WED_MASK(RRO_IND_CMD_SIGNATURE, RRO_IND_CMD_MAGIC_CNT),
+		DUMP_WED_MASK(WED_IND_CMD_RX_CTRL0,
+			      WED_IND_CMD_PREFETCH_FREE_CNT),
+		DUMP_WED_MASK(WED_RRO_CFG1, WED_RRO_CFG1_PARTICL_SE_ID),
+
+		DUMP_STR("WED ADDR ELEM"),
+		DUMP_WED(WED_ADDR_ELEM_CFG0),
+		DUMP_WED_MASK(WED_ADDR_ELEM_CFG1,
+			      WED_ADDR_ELEM_PREFETCH_FREE_CNT),
+
+		DUMP_STR("WED Route QM"),
+		DUMP_WED(WED_RTQM_ENQ_I2Q_DMAD_CNT),
+		DUMP_WED(WED_RTQM_ENQ_I2N_DMAD_CNT),
+		DUMP_WED(WED_RTQM_ENQ_I2Q_PKT_CNT),
+		DUMP_WED(WED_RTQM_ENQ_I2N_PKT_CNT),
+		DUMP_WED(WED_RTQM_ENQ_USED_ENTRY_CNT),
+		DUMP_WED(WED_RTQM_ENQ_ERR_CNT),
+
+		DUMP_WED(WED_RTQM_DEQ_DMAD_CNT),
+		DUMP_WED(WED_RTQM_DEQ_Q2I_DMAD_CNT),
+		DUMP_WED(WED_RTQM_DEQ_PKT_CNT),
+		DUMP_WED(WED_RTQM_DEQ_Q2I_PKT_CNT),
+		DUMP_WED(WED_RTQM_DEQ_USED_PFDBK_CNT),
+		DUMP_WED(WED_RTQM_DEQ_ERR_CNT),
+	};
+	struct mtk_wed_hw *hw = s->private;
+	struct mtk_wed_device *dev = hw->wed_dev;
+
+	if (dev) {
+		dump_wed_regs(s, dev, regs_common, ARRAY_SIZE(regs_common));
+		if (mtk_wed_is_v2(hw))
+			dump_wed_regs(s, dev,
+				      regs_wed_v2, ARRAY_SIZE(regs_wed_v2));
+		else
+			dump_wed_regs(s, dev,
+				      regs_wed_v3, ARRAY_SIZE(regs_wed_v3));
+	}
+
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(wed_rxinfo);
+
+static int
+wed_amsdu_show(struct seq_file *s, void *data)
+{
+	static const struct reg_dump regs[] = {
+		DUMP_STR("WED AMDSU INFO"),
+		DUMP_WED(WED_MON_AMSDU_FIFO_DMAD),
+
+		DUMP_STR("WED AMDSU ENG0 INFO"),
+		DUMP_WED(WED_MON_AMSDU_ENG_DMAD(0)),
+		DUMP_WED(WED_MON_AMSDU_ENG_QFPL(0)),
+		DUMP_WED(WED_MON_AMSDU_ENG_QENI(0)),
+		DUMP_WED(WED_MON_AMSDU_ENG_QENO(0)),
+		DUMP_WED(WED_MON_AMSDU_ENG_MERG(0)),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(0),
+			      WED_AMSDU_ENG_MAX_PL_CNT),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(0),
+			      WED_AMSDU_ENG_MAX_QGPP_CNT),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(0),
+			      WED_AMSDU_ENG_CUR_ENTRY),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(0),
+			      WED_AMSDU_ENG_MAX_BUF_MERGED),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(0),
+			      WED_AMSDU_ENG_MAX_MSDU_MERGED),
+
+		DUMP_STR("WED AMDSU ENG1 INFO"),
+		DUMP_WED(WED_MON_AMSDU_ENG_DMAD(1)),
+		DUMP_WED(WED_MON_AMSDU_ENG_QFPL(1)),
+		DUMP_WED(WED_MON_AMSDU_ENG_QENI(1)),
+		DUMP_WED(WED_MON_AMSDU_ENG_QENO(1)),
+		DUMP_WED(WED_MON_AMSDU_ENG_MERG(1)),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(1),
+			      WED_AMSDU_ENG_MAX_PL_CNT),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(1),
+			      WED_AMSDU_ENG_MAX_QGPP_CNT),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(1),
+			      WED_AMSDU_ENG_CUR_ENTRY),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(2),
+			      WED_AMSDU_ENG_MAX_BUF_MERGED),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(2),
+			      WED_AMSDU_ENG_MAX_MSDU_MERGED),
+
+		DUMP_STR("WED AMDSU ENG2 INFO"),
+		DUMP_WED(WED_MON_AMSDU_ENG_DMAD(2)),
+		DUMP_WED(WED_MON_AMSDU_ENG_QFPL(2)),
+		DUMP_WED(WED_MON_AMSDU_ENG_QENI(2)),
+		DUMP_WED(WED_MON_AMSDU_ENG_QENO(2)),
+		DUMP_WED(WED_MON_AMSDU_ENG_MERG(2)),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(2),
+			      WED_AMSDU_ENG_MAX_PL_CNT),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(2),
+			      WED_AMSDU_ENG_MAX_QGPP_CNT),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(2),
+			      WED_AMSDU_ENG_CUR_ENTRY),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(2),
+			      WED_AMSDU_ENG_MAX_BUF_MERGED),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(2),
+			      WED_AMSDU_ENG_MAX_MSDU_MERGED),
+
+		DUMP_STR("WED AMDSU ENG3 INFO"),
+		DUMP_WED(WED_MON_AMSDU_ENG_DMAD(3)),
+		DUMP_WED(WED_MON_AMSDU_ENG_QFPL(3)),
+		DUMP_WED(WED_MON_AMSDU_ENG_QENI(3)),
+		DUMP_WED(WED_MON_AMSDU_ENG_QENO(3)),
+		DUMP_WED(WED_MON_AMSDU_ENG_MERG(3)),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(3),
+			      WED_AMSDU_ENG_MAX_PL_CNT),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(3),
+			      WED_AMSDU_ENG_MAX_QGPP_CNT),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(3),
+			      WED_AMSDU_ENG_CUR_ENTRY),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(3),
+			      WED_AMSDU_ENG_MAX_BUF_MERGED),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(3),
+			      WED_AMSDU_ENG_MAX_MSDU_MERGED),
+
+		DUMP_STR("WED AMDSU ENG4 INFO"),
+		DUMP_WED(WED_MON_AMSDU_ENG_DMAD(4)),
+		DUMP_WED(WED_MON_AMSDU_ENG_QFPL(4)),
+		DUMP_WED(WED_MON_AMSDU_ENG_QENI(4)),
+		DUMP_WED(WED_MON_AMSDU_ENG_QENO(4)),
+		DUMP_WED(WED_MON_AMSDU_ENG_MERG(4)),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(4),
+			      WED_AMSDU_ENG_MAX_PL_CNT),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(4),
+			      WED_AMSDU_ENG_MAX_QGPP_CNT),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(4),
+			      WED_AMSDU_ENG_CUR_ENTRY),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(4),
+			      WED_AMSDU_ENG_MAX_BUF_MERGED),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(4),
+			      WED_AMSDU_ENG_MAX_MSDU_MERGED),
+
+		DUMP_STR("WED AMDSU ENG5 INFO"),
+		DUMP_WED(WED_MON_AMSDU_ENG_DMAD(5)),
+		DUMP_WED(WED_MON_AMSDU_ENG_QFPL(5)),
+		DUMP_WED(WED_MON_AMSDU_ENG_QENI(5)),
+		DUMP_WED(WED_MON_AMSDU_ENG_QENO(5)),
+		DUMP_WED(WED_MON_AMSDU_ENG_MERG(5)),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(5),
+			      WED_AMSDU_ENG_MAX_PL_CNT),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(5),
+			      WED_AMSDU_ENG_MAX_QGPP_CNT),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(5),
+			      WED_AMSDU_ENG_CUR_ENTRY),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(5),
+			      WED_AMSDU_ENG_MAX_BUF_MERGED),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(5),
+			      WED_AMSDU_ENG_MAX_MSDU_MERGED),
+
+		DUMP_STR("WED AMDSU ENG6 INFO"),
+		DUMP_WED(WED_MON_AMSDU_ENG_DMAD(6)),
+		DUMP_WED(WED_MON_AMSDU_ENG_QFPL(6)),
+		DUMP_WED(WED_MON_AMSDU_ENG_QENI(6)),
+		DUMP_WED(WED_MON_AMSDU_ENG_QENO(6)),
+		DUMP_WED(WED_MON_AMSDU_ENG_MERG(6)),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(6),
+			      WED_AMSDU_ENG_MAX_PL_CNT),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(6),
+			      WED_AMSDU_ENG_MAX_QGPP_CNT),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(6),
+			      WED_AMSDU_ENG_CUR_ENTRY),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(6),
+			      WED_AMSDU_ENG_MAX_BUF_MERGED),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(6),
+			      WED_AMSDU_ENG_MAX_MSDU_MERGED),
+
+		DUMP_STR("WED AMDSU ENG7 INFO"),
+		DUMP_WED(WED_MON_AMSDU_ENG_DMAD(7)),
+		DUMP_WED(WED_MON_AMSDU_ENG_QFPL(7)),
+		DUMP_WED(WED_MON_AMSDU_ENG_QENI(7)),
+		DUMP_WED(WED_MON_AMSDU_ENG_QENO(7)),
+		DUMP_WED(WED_MON_AMSDU_ENG_MERG(7)),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(7),
+			      WED_AMSDU_ENG_MAX_PL_CNT),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(7),
+			      WED_AMSDU_ENG_MAX_QGPP_CNT),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(7),
+			      WED_AMSDU_ENG_CUR_ENTRY),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(7),
+			      WED_AMSDU_ENG_MAX_BUF_MERGED),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(4),
+			      WED_AMSDU_ENG_MAX_MSDU_MERGED),
+
+		DUMP_STR("WED AMDSU ENG8 INFO"),
+		DUMP_WED(WED_MON_AMSDU_ENG_DMAD(8)),
+		DUMP_WED(WED_MON_AMSDU_ENG_QFPL(8)),
+		DUMP_WED(WED_MON_AMSDU_ENG_QENI(8)),
+		DUMP_WED(WED_MON_AMSDU_ENG_QENO(8)),
+		DUMP_WED(WED_MON_AMSDU_ENG_MERG(8)),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(8),
+			      WED_AMSDU_ENG_MAX_PL_CNT),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(8),
+			      WED_AMSDU_ENG_MAX_QGPP_CNT),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(8),
+			      WED_AMSDU_ENG_CUR_ENTRY),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(8),
+			      WED_AMSDU_ENG_MAX_BUF_MERGED),
+		DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(8),
+			      WED_AMSDU_ENG_MAX_MSDU_MERGED),
+
+		DUMP_STR("WED QMEM INFO"),
+		DUMP_WED_MASK(WED_MON_AMSDU_QMEM_CNT(0), WED_AMSDU_QMEM_FQ_CNT),
+		DUMP_WED_MASK(WED_MON_AMSDU_QMEM_CNT(0), WED_AMSDU_QMEM_SP_QCNT),
+		DUMP_WED_MASK(WED_MON_AMSDU_QMEM_CNT(1), WED_AMSDU_QMEM_TID0_QCNT),
+		DUMP_WED_MASK(WED_MON_AMSDU_QMEM_CNT(1), WED_AMSDU_QMEM_TID1_QCNT),
+		DUMP_WED_MASK(WED_MON_AMSDU_QMEM_CNT(2), WED_AMSDU_QMEM_TID2_QCNT),
+		DUMP_WED_MASK(WED_MON_AMSDU_QMEM_CNT(2), WED_AMSDU_QMEM_TID3_QCNT),
+		DUMP_WED_MASK(WED_MON_AMSDU_QMEM_CNT(3), WED_AMSDU_QMEM_TID4_QCNT),
+		DUMP_WED_MASK(WED_MON_AMSDU_QMEM_CNT(3), WED_AMSDU_QMEM_TID5_QCNT),
+		DUMP_WED_MASK(WED_MON_AMSDU_QMEM_CNT(4), WED_AMSDU_QMEM_TID6_QCNT),
+		DUMP_WED_MASK(WED_MON_AMSDU_QMEM_CNT(4), WED_AMSDU_QMEM_TID7_QCNT),
+
+		DUMP_STR("WED QMEM HEAD INFO"),
+		DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(0), WED_AMSDU_QMEM_FQ_HEAD),
+		DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(0), WED_AMSDU_QMEM_SP_QHEAD),
+		DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(1), WED_AMSDU_QMEM_TID0_QHEAD),
+		DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(1), WED_AMSDU_QMEM_TID1_QHEAD),
+		DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(2), WED_AMSDU_QMEM_TID2_QHEAD),
+		DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(2), WED_AMSDU_QMEM_TID3_QHEAD),
+		DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(3), WED_AMSDU_QMEM_TID4_QHEAD),
+		DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(3), WED_AMSDU_QMEM_TID5_QHEAD),
+		DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(4), WED_AMSDU_QMEM_TID6_QHEAD),
+		DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(4), WED_AMSDU_QMEM_TID7_QHEAD),
+
+		DUMP_STR("WED QMEM TAIL INFO"),
+		DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(5), WED_AMSDU_QMEM_FQ_TAIL),
+		DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(5), WED_AMSDU_QMEM_SP_QTAIL),
+		DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(6), WED_AMSDU_QMEM_TID0_QTAIL),
+		DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(6), WED_AMSDU_QMEM_TID1_QTAIL),
+		DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(7), WED_AMSDU_QMEM_TID2_QTAIL),
+		DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(7), WED_AMSDU_QMEM_TID3_QTAIL),
+		DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(8), WED_AMSDU_QMEM_TID4_QTAIL),
+		DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(8), WED_AMSDU_QMEM_TID5_QTAIL),
+		DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(9), WED_AMSDU_QMEM_TID6_QTAIL),
+		DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(9), WED_AMSDU_QMEM_TID7_QTAIL),
+
+		DUMP_STR("WED HIFTXD MSDU INFO"),
+		DUMP_WED(WED_MON_AMSDU_HIFTXD_FETCH_MSDU(1)),
+		DUMP_WED(WED_MON_AMSDU_HIFTXD_FETCH_MSDU(2)),
+		DUMP_WED(WED_MON_AMSDU_HIFTXD_FETCH_MSDU(3)),
+		DUMP_WED(WED_MON_AMSDU_HIFTXD_FETCH_MSDU(4)),
+		DUMP_WED(WED_MON_AMSDU_HIFTXD_FETCH_MSDU(5)),
+		DUMP_WED(WED_MON_AMSDU_HIFTXD_FETCH_MSDU(6)),
+		DUMP_WED(WED_MON_AMSDU_HIFTXD_FETCH_MSDU(7)),
+		DUMP_WED(WED_MON_AMSDU_HIFTXD_FETCH_MSDU(8)),
+		DUMP_WED(WED_MON_AMSDU_HIFTXD_FETCH_MSDU(9)),
+		DUMP_WED(WED_MON_AMSDU_HIFTXD_FETCH_MSDU(10)),
+		DUMP_WED(WED_MON_AMSDU_HIFTXD_FETCH_MSDU(11)),
+		DUMP_WED(WED_MON_AMSDU_HIFTXD_FETCH_MSDU(12)),
+		DUMP_WED(WED_MON_AMSDU_HIFTXD_FETCH_MSDU(13)),
+	};
 	struct mtk_wed_hw *hw = s->private;
 	struct mtk_wed_device *dev = hw->wed_dev;
 
@@ -219,7 +493,94 @@ wed_rxinfo_show(struct seq_file *s, void *data)
 
 	return 0;
 }
-DEFINE_SHOW_ATTRIBUTE(wed_rxinfo);
+DEFINE_SHOW_ATTRIBUTE(wed_amsdu);
+
+static int
+wed_rtqm_show(struct seq_file *s, void *data)
+{
+	static const struct reg_dump regs[] = {
+		DUMP_STR("WED Route QM IGRS0(N2H + Recycle)"),
+		DUMP_WED(WED_RTQM_IGRS0_I2HW_DMAD_CNT),
+		DUMP_WED(WED_RTQM_IGRS0_I2H_DMAD_CNT(0)),
+		DUMP_WED(WED_RTQM_IGRS0_I2H_DMAD_CNT(1)),
+		DUMP_WED(WED_RTQM_IGRS0_I2HW_PKT_CNT),
+		DUMP_WED(WED_RTQM_IGRS0_I2H_PKT_CNT(0)),
+		DUMP_WED(WED_RTQM_IGRS0_I2H_PKT_CNT(0)),
+		DUMP_WED(WED_RTQM_IGRS0_FDROP_CNT),
+
+		DUMP_STR("WED Route QM IGRS1(Legacy)"),
+		DUMP_WED(WED_RTQM_IGRS1_I2HW_DMAD_CNT),
+		DUMP_WED(WED_RTQM_IGRS1_I2H_DMAD_CNT(0)),
+		DUMP_WED(WED_RTQM_IGRS1_I2H_DMAD_CNT(1)),
+		DUMP_WED(WED_RTQM_IGRS1_I2HW_PKT_CNT),
+		DUMP_WED(WED_RTQM_IGRS1_I2H_PKT_CNT(0)),
+		DUMP_WED(WED_RTQM_IGRS1_I2H_PKT_CNT(1)),
+		DUMP_WED(WED_RTQM_IGRS1_FDROP_CNT),
+
+		DUMP_STR("WED Route QM IGRS2(RRO3.0)"),
+		DUMP_WED(WED_RTQM_IGRS2_I2HW_DMAD_CNT),
+		DUMP_WED(WED_RTQM_IGRS2_I2H_DMAD_CNT(0)),
+		DUMP_WED(WED_RTQM_IGRS2_I2H_DMAD_CNT(1)),
+		DUMP_WED(WED_RTQM_IGRS2_I2HW_PKT_CNT),
+		DUMP_WED(WED_RTQM_IGRS2_I2H_PKT_CNT(0)),
+		DUMP_WED(WED_RTQM_IGRS2_I2H_PKT_CNT(1)),
+		DUMP_WED(WED_RTQM_IGRS2_FDROP_CNT),
+
+		DUMP_STR("WED Route QM IGRS3(DEBUG)"),
+		DUMP_WED(WED_RTQM_IGRS2_I2HW_DMAD_CNT),
+		DUMP_WED(WED_RTQM_IGRS3_I2H_DMAD_CNT(0)),
+		DUMP_WED(WED_RTQM_IGRS3_I2H_DMAD_CNT(1)),
+		DUMP_WED(WED_RTQM_IGRS3_I2HW_PKT_CNT),
+		DUMP_WED(WED_RTQM_IGRS3_I2H_PKT_CNT(0)),
+		DUMP_WED(WED_RTQM_IGRS3_I2H_PKT_CNT(1)),
+		DUMP_WED(WED_RTQM_IGRS3_FDROP_CNT),
+	};
+	struct mtk_wed_hw *hw = s->private;
+	struct mtk_wed_device *dev = hw->wed_dev;
+
+	if (dev)
+		dump_wed_regs(s, dev, regs, ARRAY_SIZE(regs));
+
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(wed_rtqm);
+
+static int
+wed_rro_show(struct seq_file *s, void *data)
+{
+	static const struct reg_dump regs[] = {
+		DUMP_STR("RRO/IND CMD CNT"),
+		DUMP_WED(WED_RX_IND_CMD_CNT(1)),
+		DUMP_WED(WED_RX_IND_CMD_CNT(2)),
+		DUMP_WED(WED_RX_IND_CMD_CNT(3)),
+		DUMP_WED(WED_RX_IND_CMD_CNT(4)),
+		DUMP_WED(WED_RX_IND_CMD_CNT(5)),
+		DUMP_WED(WED_RX_IND_CMD_CNT(6)),
+		DUMP_WED(WED_RX_IND_CMD_CNT(7)),
+		DUMP_WED(WED_RX_IND_CMD_CNT(8)),
+		DUMP_WED_MASK(WED_RX_IND_CMD_CNT(9),
+			      WED_IND_CMD_MAGIC_CNT_FAIL_CNT),
+
+		DUMP_WED(WED_RX_ADDR_ELEM_CNT(0)),
+		DUMP_WED_MASK(WED_RX_ADDR_ELEM_CNT(1),
+			      WED_ADDR_ELEM_SIG_FAIL_CNT),
+		DUMP_WED(WED_RX_MSDU_PG_CNT(1)),
+		DUMP_WED(WED_RX_MSDU_PG_CNT(2)),
+		DUMP_WED(WED_RX_MSDU_PG_CNT(3)),
+		DUMP_WED(WED_RX_MSDU_PG_CNT(4)),
+		DUMP_WED(WED_RX_MSDU_PG_CNT(5)),
+		DUMP_WED_MASK(WED_RX_PN_CHK_CNT,
+			      WED_PN_CHK_FAIL_CNT),
+	};
+	struct mtk_wed_hw *hw = s->private;
+	struct mtk_wed_device *dev = hw->wed_dev;
+
+	if (dev)
+		dump_wed_regs(s, dev, regs, ARRAY_SIZE(regs));
+
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(wed_rro);
 
 static int
 mtk_wed_reg_set(void *data, u64 val)
@@ -261,7 +622,16 @@ void mtk_wed_hw_add_debugfs(struct mtk_wed_hw *hw)
 	debugfs_create_u32("regidx", 0600, dir, &hw->debugfs_reg);
 	debugfs_create_file_unsafe("regval", 0600, dir, hw, &fops_regval);
 	debugfs_create_file_unsafe("txinfo", 0400, dir, hw, &wed_txinfo_fops);
-	if (hw->version != 1)
+	if (!mtk_wed_is_v1(hw)) {
 		debugfs_create_file_unsafe("rxinfo", 0400, dir, hw,
 					   &wed_rxinfo_fops);
+		if (mtk_wed_is_v3_or_greater(hw)) {
+			debugfs_create_file_unsafe("amsdu", 0400, dir, hw,
+						   &wed_amsdu_fops);
+			debugfs_create_file_unsafe("rtqm", 0400, dir, hw,
+						   &wed_rtqm_fops);
+			debugfs_create_file_unsafe("rro", 0400, dir, hw,
+						   &wed_rro_fops);
+		}
+	}
 }
diff --git a/drivers/net/ethernet/mediatek/mtk_wed_mcu.c b/drivers/net/ethernet/mediatek/mtk_wed_mcu.c
index 071ed3dea860..65a78e274009 100644
--- a/drivers/net/ethernet/mediatek/mtk_wed_mcu.c
+++ b/drivers/net/ethernet/mediatek/mtk_wed_mcu.c
@@ -16,14 +16,30 @@
 #include "mtk_wed_wo.h"
 #include "mtk_wed.h"
 
+static struct mtk_wed_wo_memory_region mem_region[] = {
+	[MTK_WED_WO_REGION_EMI] = {
+		.name = "wo-emi",
+	},
+	[MTK_WED_WO_REGION_ILM] = {
+		.name = "wo-ilm",
+	},
+	[MTK_WED_WO_REGION_DATA] = {
+		.name = "wo-data",
+		.shared = true,
+	},
+	[MTK_WED_WO_REGION_BOOT] = {
+		.name = "wo-boot",
+	},
+};
+
 static u32 wo_r32(struct mtk_wed_wo *wo, u32 reg)
 {
-	return readl(wo->boot.addr + reg);
+	return readl(mem_region[MTK_WED_WO_REGION_BOOT].addr + reg);
 }
 
 static void wo_w32(struct mtk_wed_wo *wo, u32 reg, u32 val)
 {
-	writel(val, wo->boot.addr + reg);
+	writel(val, mem_region[MTK_WED_WO_REGION_BOOT].addr + reg);
 }
 
 static struct sk_buff *
@@ -68,6 +84,9 @@ mtk_wed_update_rx_stats(struct mtk_wed_device *wed, struct sk_buff *skb)
 	struct mtk_wed_wo_rx_stats *stats;
 	int i;
 
+	if (!wed->wlan.update_wo_rx_stats)
+		return;
+
 	if (count * sizeof(*stats) > skb->len - sizeof(u32))
 		return;
 
@@ -204,7 +223,7 @@ int mtk_wed_mcu_msg_update(struct mtk_wed_device *dev, int id, void *data,
 {
 	struct mtk_wed_wo *wo = dev->hw->wed_wo;
 
-	if (dev->hw->version == 1)
+	if (!mtk_wed_get_rx_capa(dev))
 		return 0;
 
 	if (WARN_ON(!wo))
@@ -215,19 +234,13 @@ int mtk_wed_mcu_msg_update(struct mtk_wed_device *dev, int id, void *data,
 }
 
 static int
-mtk_wed_get_memory_region(struct mtk_wed_wo *wo,
+mtk_wed_get_memory_region(struct mtk_wed_hw *hw, int index,
 			  struct mtk_wed_wo_memory_region *region)
 {
 	struct reserved_mem *rmem;
 	struct device_node *np;
-	int index;
-
-	index = of_property_match_string(wo->hw->node, "memory-region-names",
-					 region->name);
-	if (index < 0)
-		return index;
 
-	np = of_parse_phandle(wo->hw->node, "memory-region", index);
+	np = of_parse_phandle(hw->node, "memory-region", index);
 	if (!np)
 		return -ENODEV;
 
@@ -239,7 +252,7 @@ mtk_wed_get_memory_region(struct mtk_wed_wo *wo,
 
 	region->phy_addr = rmem->base;
 	region->size = rmem->size;
-	region->addr = devm_ioremap(wo->hw->dev, region->phy_addr, region->size);
+	region->addr = devm_ioremap(hw->dev, region->phy_addr, region->size);
 
 	return !region->addr ? -EINVAL : 0;
 }
@@ -252,6 +265,9 @@ mtk_wed_mcu_run_firmware(struct mtk_wed_wo *wo, const struct firmware *fw,
 	const struct mtk_wed_fw_trailer *trailer;
 	const struct mtk_wed_fw_region *fw_region;
 
+	if (!region->phy_addr || !region->size)
+		return 0;
+
 	trailer_ptr = fw->data + fw->size - sizeof(*trailer);
 	trailer = (const struct mtk_wed_fw_trailer *)trailer_ptr;
 	region_ptr = trailer_ptr - trailer->num_region * sizeof(*fw_region);
@@ -291,18 +307,6 @@ next:
 static int
 mtk_wed_mcu_load_firmware(struct mtk_wed_wo *wo)
 {
-	static struct mtk_wed_wo_memory_region mem_region[] = {
-		[MTK_WED_WO_REGION_EMI] = {
-			.name = "wo-emi",
-		},
-		[MTK_WED_WO_REGION_ILM] = {
-			.name = "wo-ilm",
-		},
-		[MTK_WED_WO_REGION_DATA] = {
-			.name = "wo-data",
-			.shared = true,
-		},
-	};
 	const struct mtk_wed_fw_trailer *trailer;
 	const struct firmware *fw;
 	const char *fw_name;
@@ -311,25 +315,38 @@ mtk_wed_mcu_load_firmware(struct mtk_wed_wo *wo)
 
 	/* load firmware region metadata */
 	for (i = 0; i < ARRAY_SIZE(mem_region); i++) {
-		ret = mtk_wed_get_memory_region(wo, &mem_region[i]);
+		int index = of_property_match_string(wo->hw->node,
+						     "memory-region-names",
+						     mem_region[i].name);
+		if (index < 0)
+			continue;
+
+		ret = mtk_wed_get_memory_region(wo->hw, index, &mem_region[i]);
 		if (ret)
 			return ret;
 	}
 
-	wo->boot.name = "wo-boot";
-	ret = mtk_wed_get_memory_region(wo, &wo->boot);
-	if (ret)
-		return ret;
-
 	/* set dummy cr */
 	wed_w32(wo->hw->wed_dev, MTK_WED_SCR0 + 4 * MTK_WED_DUMMY_CR_FWDL,
 		wo->hw->index + 1);
 
 	/* load firmware */
-	if (of_device_is_compatible(wo->hw->node, "mediatek,mt7981-wed"))
-		fw_name = MT7981_FIRMWARE_WO;
-	else
-		fw_name = wo->hw->index ? MT7986_FIRMWARE_WO1 : MT7986_FIRMWARE_WO0;
+	switch (wo->hw->version) {
+	case 2:
+		if (of_device_is_compatible(wo->hw->node,
+					    "mediatek,mt7981-wed"))
+			fw_name = MT7981_FIRMWARE_WO;
+		else
+			fw_name = wo->hw->index ? MT7986_FIRMWARE_WO1
+						: MT7986_FIRMWARE_WO0;
+		break;
+	case 3:
+		fw_name = wo->hw->index ? MT7988_FIRMWARE_WO1
+					: MT7988_FIRMWARE_WO0;
+		break;
+	default:
+		return -EINVAL;
+	}
 
 	ret = request_firmware(&fw, fw_name, wo->hw->dev);
 	if (ret)
@@ -350,15 +367,16 @@ mtk_wed_mcu_load_firmware(struct mtk_wed_wo *wo)
 	}
 
 	/* set the start address */
-	boot_cr = wo->hw->index ? MTK_WO_MCU_CFG_LS_WA_BOOT_ADDR_ADDR
-				: MTK_WO_MCU_CFG_LS_WM_BOOT_ADDR_ADDR;
+	if (!mtk_wed_is_v3_or_greater(wo->hw) && wo->hw->index)
+		boot_cr = MTK_WO_MCU_CFG_LS_WA_BOOT_ADDR_ADDR;
+	else
+		boot_cr = MTK_WO_MCU_CFG_LS_WM_BOOT_ADDR_ADDR;
 	wo_w32(wo, boot_cr, mem_region[MTK_WED_WO_REGION_EMI].phy_addr >> 16);
 	/* wo firmware reset */
 	wo_w32(wo, MTK_WO_MCU_CFG_LS_WF_MCCR_CLR_ADDR, 0xc00);
 
-	val = wo_r32(wo, MTK_WO_MCU_CFG_LS_WF_MCU_CFG_WM_WA_ADDR);
-	val |= wo->hw->index ? MTK_WO_MCU_CFG_LS_WF_WM_WA_WA_CPU_RSTB_MASK
-			     : MTK_WO_MCU_CFG_LS_WF_WM_WA_WM_CPU_RSTB_MASK;
+	val = wo_r32(wo, MTK_WO_MCU_CFG_LS_WF_MCU_CFG_WM_WA_ADDR) |
+	      MTK_WO_MCU_CFG_LS_WF_WM_WA_WM_CPU_RSTB_MASK;
 	wo_w32(wo, MTK_WO_MCU_CFG_LS_WF_MCU_CFG_WM_WA_ADDR, val);
 out:
 	release_firmware(fw);
@@ -393,3 +411,5 @@ int mtk_wed_mcu_init(struct mtk_wed_wo *wo)
 MODULE_FIRMWARE(MT7981_FIRMWARE_WO);
 MODULE_FIRMWARE(MT7986_FIRMWARE_WO0);
 MODULE_FIRMWARE(MT7986_FIRMWARE_WO1);
+MODULE_FIRMWARE(MT7988_FIRMWARE_WO0);
+MODULE_FIRMWARE(MT7988_FIRMWARE_WO1);
diff --git a/drivers/net/ethernet/mediatek/mtk_wed_regs.h b/drivers/net/ethernet/mediatek/mtk_wed_regs.h
index 47ea69feb3b2..c71190924816 100644
--- a/drivers/net/ethernet/mediatek/mtk_wed_regs.h
+++ b/drivers/net/ethernet/mediatek/mtk_wed_regs.h
@@ -13,6 +13,9 @@
 #define MTK_WDMA_DESC_CTRL_LAST_SEG0		BIT(30)
 #define MTK_WDMA_DESC_CTRL_DMA_DONE		BIT(31)
 
+#define MTK_WDMA_TXD0_DESC_INFO_DMA_DONE	BIT(29)
+#define MTK_WDMA_TXD1_DESC_INFO_DMA_DONE	BIT(31)
+
 struct mtk_wdma_desc {
 	__le32 buf0;
 	__le32 ctrl;
@@ -25,6 +28,8 @@ struct mtk_wdma_desc {
 #define MTK_WED_RESET					0x008
 #define MTK_WED_RESET_TX_BM				BIT(0)
 #define MTK_WED_RESET_RX_BM				BIT(1)
+#define MTK_WED_RESET_RX_PG_BM				BIT(2)
+#define MTK_WED_RESET_RRO_RX_TO_PG			BIT(3)
 #define MTK_WED_RESET_TX_FREE_AGENT			BIT(4)
 #define MTK_WED_RESET_WPDMA_TX_DRV			BIT(8)
 #define MTK_WED_RESET_WPDMA_RX_DRV			BIT(9)
@@ -37,6 +42,7 @@ struct mtk_wdma_desc {
 #define MTK_WED_RESET_WDMA_INT_AGENT			BIT(19)
 #define MTK_WED_RESET_RX_RRO_QM				BIT(20)
 #define MTK_WED_RESET_RX_ROUTE_QM			BIT(21)
+#define MTK_WED_RESET_TX_AMSDU				BIT(22)
 #define MTK_WED_RESET_WED				BIT(31)
 
 #define MTK_WED_CTRL					0x00c
@@ -44,6 +50,9 @@ struct mtk_wdma_desc {
 #define MTK_WED_CTRL_WPDMA_INT_AGENT_BUSY		BIT(1)
 #define MTK_WED_CTRL_WDMA_INT_AGENT_EN			BIT(2)
 #define MTK_WED_CTRL_WDMA_INT_AGENT_BUSY		BIT(3)
+#define MTK_WED_CTRL_WED_RX_IND_CMD_EN			BIT(5)
+#define MTK_WED_CTRL_WED_RX_PG_BM_EN			BIT(6)
+#define MTK_WED_CTRL_WED_RX_PG_BM_BUSY			BIT(7)
 #define MTK_WED_CTRL_WED_TX_BM_EN			BIT(8)
 #define MTK_WED_CTRL_WED_TX_BM_BUSY			BIT(9)
 #define MTK_WED_CTRL_WED_TX_FREE_AGENT_EN		BIT(10)
@@ -54,9 +63,14 @@ struct mtk_wdma_desc {
 #define MTK_WED_CTRL_RX_RRO_QM_BUSY			BIT(15)
 #define MTK_WED_CTRL_RX_ROUTE_QM_EN			BIT(16)
 #define MTK_WED_CTRL_RX_ROUTE_QM_BUSY			BIT(17)
+#define MTK_WED_CTRL_TX_TKID_ALI_EN			BIT(20)
+#define MTK_WED_CTRL_TX_TKID_ALI_BUSY			BIT(21)
+#define MTK_WED_CTRL_TX_AMSDU_EN			BIT(22)
+#define MTK_WED_CTRL_TX_AMSDU_BUSY			BIT(23)
 #define MTK_WED_CTRL_FINAL_DIDX_READ			BIT(24)
 #define MTK_WED_CTRL_ETH_DMAD_FMT			BIT(25)
 #define MTK_WED_CTRL_MIB_READ_CLEAR			BIT(28)
+#define MTK_WED_CTRL_FLD_MIB_RD_CLR			BIT(28)
 
 #define MTK_WED_EXT_INT_STATUS				0x020
 #define MTK_WED_EXT_INT_STATUS_TF_LEN_ERR		BIT(0)
@@ -64,8 +78,8 @@ struct mtk_wdma_desc {
 #define MTK_WED_EXT_INT_STATUS_TKID_TITO_INVALID	BIT(4)
 #define MTK_WED_EXT_INT_STATUS_TX_FBUF_LO_TH		BIT(8)
 #define MTK_WED_EXT_INT_STATUS_TX_FBUF_HI_TH		BIT(9)
-#define MTK_WED_EXT_INT_STATUS_RX_FBUF_LO_TH		BIT(12)
-#define MTK_WED_EXT_INT_STATUS_RX_FBUF_HI_TH		BIT(13)
+#define MTK_WED_EXT_INT_STATUS_RX_FBUF_LO_TH		BIT(10) /* wed v2 */
+#define MTK_WED_EXT_INT_STATUS_RX_FBUF_HI_TH		BIT(11) /* wed v2 */
 #define MTK_WED_EXT_INT_STATUS_RX_DRV_R_RESP_ERR	BIT(16)
 #define MTK_WED_EXT_INT_STATUS_RX_DRV_W_RESP_ERR	BIT(17)
 #define MTK_WED_EXT_INT_STATUS_RX_DRV_COHERENT		BIT(18)
@@ -89,19 +103,26 @@ struct mtk_wdma_desc {
 #define MTK_WED_EXT_INT_MASK				0x028
 #define MTK_WED_EXT_INT_MASK1				0x02c
 #define MTK_WED_EXT_INT_MASK2				0x030
+#define MTK_WED_EXT_INT_MASK3				0x034
 
 #define MTK_WED_STATUS					0x060
 #define MTK_WED_STATUS_TX				GENMASK(15, 8)
 
+#define MTK_WED_WPDMA_STATUS				0x068
+#define MTK_WED_WPDMA_STATUS_TX_DRV			GENMASK(15, 8)
+
 #define MTK_WED_TX_BM_CTRL				0x080
 #define MTK_WED_TX_BM_CTRL_VLD_GRP_NUM			GENMASK(6, 0)
 #define MTK_WED_TX_BM_CTRL_RSV_GRP_NUM			GENMASK(22, 16)
+#define MTK_WED_TX_BM_CTRL_LEGACY_EN			BIT(26)
+#define MTK_WED_TX_TKID_CTRL_FREE_FORMAT		BIT(27)
 #define MTK_WED_TX_BM_CTRL_PAUSE			BIT(28)
 
 #define MTK_WED_TX_BM_BASE				0x084
+#define MTK_WED_TX_BM_INIT_PTR				0x088
+#define MTK_WED_TX_BM_SW_TAIL_IDX			GENMASK(16, 0)
+#define MTK_WED_TX_BM_INIT_SW_TAIL_IDX			BIT(16)
 
-#define MTK_WED_TX_BM_TKID				0x088
-#define MTK_WED_TX_BM_TKID_V2				0x0c8
 #define MTK_WED_TX_BM_TKID_START			GENMASK(15, 0)
 #define MTK_WED_TX_BM_TKID_END				GENMASK(31, 16)
 
@@ -124,6 +145,12 @@ struct mtk_wdma_desc {
 #define MTK_WED_TX_TKID_CTRL_RSV_GRP_NUM		GENMASK(22, 16)
 #define MTK_WED_TX_TKID_CTRL_PAUSE			BIT(28)
 
+#define MTK_WED_TX_TKID_INTF				0x0dc
+#define MTK_WED_TX_TKID_INTF_TKFIFO_FDEP		GENMASK(25, 16)
+
+#define MTK_WED_TX_TKID_CTRL_VLD_GRP_NUM_V3		GENMASK(7, 0)
+#define MTK_WED_TX_TKID_CTRL_RSV_GRP_NUM_V3		GENMASK(23, 16)
+
 #define MTK_WED_TX_TKID_DYN_THR				0x0e0
 #define MTK_WED_TX_TKID_DYN_THR_LO			GENMASK(6, 0)
 #define MTK_WED_TX_TKID_DYN_THR_HI			GENMASK(22, 16)
@@ -160,9 +187,6 @@ struct mtk_wdma_desc {
 #define MTK_WED_GLO_CFG_RX_2B_OFFSET			BIT(31)
 
 #define MTK_WED_RESET_IDX				0x20c
-#define MTK_WED_RESET_IDX_TX				GENMASK(3, 0)
-#define MTK_WED_RESET_IDX_RX				GENMASK(17, 16)
-#define MTK_WED_RESET_IDX_RX_V2				GENMASK(7, 6)
 #define MTK_WED_RESET_WPDMA_IDX_RX			GENMASK(31, 30)
 
 #define MTK_WED_TX_MIB(_n)				(0x2a0 + (_n) * 4)
@@ -174,6 +198,7 @@ struct mtk_wdma_desc {
 #define MTK_WED_RING_RX_DATA(_n)			(0x420 + (_n) * 0x10)
 
 #define MTK_WED_SCR0					0x3c0
+#define MTK_WED_RX1_CTRL2				0x418
 #define MTK_WED_WPDMA_INT_TRIGGER			0x504
 #define MTK_WED_WPDMA_INT_TRIGGER_RX_DONE		BIT(1)
 #define MTK_WED_WPDMA_INT_TRIGGER_TX_DONE		GENMASK(5, 4)
@@ -204,12 +229,15 @@ struct mtk_wdma_desc {
 #define MTK_WED_WPDMA_GLO_CFG_RX_DRV_R1_PKT_PROC	BIT(5)
 #define MTK_WED_WPDMA_GLO_CFG_RX_DRV_R0_CRX_SYNC	BIT(6)
 #define MTK_WED_WPDMA_GLO_CFG_RX_DRV_R1_CRX_SYNC	BIT(7)
-#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_EVENT_PKT_FMT_VER	GENMASK(18, 16)
+#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_EVENT_PKT_FMT_VER	GENMASK(15, 12)
+#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_UNS_VER_FORCE_4	BIT(18)
 #define MTK_WED_WPDMA_GLO_CFG_RX_DRV_UNSUPPORT_FMT	BIT(19)
-#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_UEVENT_PKT_FMT_CHK BIT(20)
+#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_EVENT_PKT_FMT_CHK	BIT(20)
 #define MTK_WED_WPDMA_GLO_CFG_RX_DDONE2_WR		BIT(21)
 #define MTK_WED_WPDMA_GLO_CFG_TX_TKID_KEEP		BIT(24)
+#define MTK_WED_WPDMA_GLO_CFG_TX_DDONE_CHK_LAST		BIT(25)
 #define MTK_WED_WPDMA_GLO_CFG_TX_DMAD_DW3_PREV		BIT(28)
+#define MTK_WED_WPDMA_GLO_CFG_TX_DDONE_CHK		BIT(30)
 
 #define MTK_WED_WPDMA_RESET_IDX				0x50c
 #define MTK_WED_WPDMA_RESET_IDX_TX			GENMASK(3, 0)
@@ -255,9 +283,10 @@ struct mtk_wdma_desc {
 #define MTK_WED_PCIE_INT_TRIGGER_STATUS			BIT(16)
 
 #define MTK_WED_PCIE_INT_CTRL				0x57c
-#define MTK_WED_PCIE_INT_CTRL_MSK_EN_POLA		BIT(20)
-#define MTK_WED_PCIE_INT_CTRL_SRC_SEL			GENMASK(17, 16)
 #define MTK_WED_PCIE_INT_CTRL_POLL_EN			GENMASK(13, 12)
+#define MTK_WED_PCIE_INT_CTRL_SRC_SEL			GENMASK(17, 16)
+#define MTK_WED_PCIE_INT_CTRL_MSK_EN_POLA		BIT(20)
+#define MTK_WED_PCIE_INT_CTRL_MSK_IRQ_FILTER		BIT(21)
 
 #define MTK_WED_WPDMA_CFG_BASE				0x580
 #define MTK_WED_WPDMA_CFG_INT_MASK			0x584
@@ -283,15 +312,30 @@ struct mtk_wdma_desc {
 
 #define MTK_WED_WPDMA_RX_D_RST_IDX			0x760
 #define MTK_WED_WPDMA_RX_D_RST_CRX_IDX			GENMASK(17, 16)
+#define MTK_WED_WPDMA_RX_D_RST_DRV_IDX_ALL		BIT(20)
 #define MTK_WED_WPDMA_RX_D_RST_DRV_IDX			GENMASK(25, 24)
 
 #define MTK_WED_WPDMA_RX_GLO_CFG			0x76c
-#define MTK_WED_WPDMA_RX_RING				0x770
 
 #define MTK_WED_WPDMA_RX_D_MIB(_n)			(0x774 + (_n) * 4)
 #define MTK_WED_WPDMA_RX_D_PROCESSED_MIB(_n)		(0x784 + (_n) * 4)
 #define MTK_WED_WPDMA_RX_D_COHERENT_MIB			0x78c
 
+#define MTK_WED_WPDMA_RX_D_PREF_CFG			0x7b4
+#define MTK_WED_WPDMA_RX_D_PREF_EN			BIT(0)
+#define MTK_WED_WPDMA_RX_D_PREF_BUSY			BIT(1)
+#define MTK_WED_WPDMA_RX_D_PREF_BURST_SIZE		GENMASK(12, 8)
+#define MTK_WED_WPDMA_RX_D_PREF_LOW_THRES		GENMASK(21, 16)
+
+#define MTK_WED_WPDMA_RX_D_PREF_RX0_SIDX		0x7b8
+#define MTK_WED_WPDMA_RX_D_PREF_SIDX_IDX_CLR		BIT(15)
+
+#define MTK_WED_WPDMA_RX_D_PREF_RX1_SIDX		0x7bc
+
+#define MTK_WED_WPDMA_RX_D_PREF_FIFO_CFG		0x7c0
+#define MTK_WED_WPDMA_RX_D_PREF_FIFO_CFG_R0_CLR		BIT(0)
+#define MTK_WED_WPDMA_RX_D_PREF_FIFO_CFG_R1_CLR		BIT(16)
+
 #define MTK_WED_WDMA_RING_TX				0x800
 
 #define MTK_WED_WDMA_TX_MIB				0x810
@@ -299,6 +343,20 @@ struct mtk_wdma_desc {
 #define MTK_WED_WDMA_RING_RX(_n)			(0x900 + (_n) * 0x10)
 #define MTK_WED_WDMA_RX_THRES(_n)			(0x940 + (_n) * 0x4)
 
+#define MTK_WED_WDMA_RX_PREF_CFG			0x950
+#define MTK_WED_WDMA_RX_PREF_EN				BIT(0)
+#define MTK_WED_WDMA_RX_PREF_BUSY			BIT(1)
+#define MTK_WED_WDMA_RX_PREF_BURST_SIZE			GENMASK(12, 8)
+#define MTK_WED_WDMA_RX_PREF_LOW_THRES			GENMASK(21, 16)
+#define MTK_WED_WDMA_RX_PREF_RX0_SIDX_CLR		BIT(24)
+#define MTK_WED_WDMA_RX_PREF_RX1_SIDX_CLR		BIT(25)
+#define MTK_WED_WDMA_RX_PREF_DDONE2_EN			BIT(26)
+#define MTK_WED_WDMA_RX_PREF_DDONE2_BUSY		BIT(27)
+
+#define MTK_WED_WDMA_RX_PREF_FIFO_CFG			0x95C
+#define MTK_WED_WDMA_RX_PREF_FIFO_RX0_CLR		BIT(0)
+#define MTK_WED_WDMA_RX_PREF_FIFO_RX1_CLR		BIT(16)
+
 #define MTK_WED_WDMA_GLO_CFG				0xa04
 #define MTK_WED_WDMA_GLO_CFG_TX_DRV_EN			BIT(0)
 #define MTK_WED_WDMA_GLO_CFG_TX_DDONE_CHK		BIT(1)
@@ -322,6 +380,7 @@ struct mtk_wdma_desc {
 
 #define MTK_WED_WDMA_RESET_IDX				0xa08
 #define MTK_WED_WDMA_RESET_IDX_RX			GENMASK(17, 16)
+#define MTK_WED_WDMA_RESET_IDX_RX_ALL			BIT(20)
 #define MTK_WED_WDMA_RESET_IDX_DRV			GENMASK(25, 24)
 
 #define MTK_WED_WDMA_INT_CLR				0xa24
@@ -331,6 +390,7 @@ struct mtk_wdma_desc {
 #define MTK_WED_WDMA_INT_TRIGGER_RX_DONE		GENMASK(17, 16)
 
 #define MTK_WED_WDMA_INT_CTRL				0xa2c
+#define MTK_WED_WDMA_INT_POLL_PRD			GENMASK(7, 0)
 #define MTK_WED_WDMA_INT_CTRL_POLL_SRC_SEL		GENMASK(17, 16)
 
 #define MTK_WED_WDMA_CFG_BASE				0xaa0
@@ -391,9 +451,62 @@ struct mtk_wdma_desc {
 #define MTK_WDMA_INT_MASK_RX_DELAY			BIT(30)
 #define MTK_WDMA_INT_MASK_RX_COHERENT			BIT(31)
 
+#define MTK_WDMA_XDMA_TX_FIFO_CFG			0x238
+#define MTK_WDMA_XDMA_TX_FIFO_CFG_TX_PAR_FIFO_CLEAR	BIT(0)
+#define MTK_WDMA_XDMA_TX_FIFO_CFG_TX_CMD_FIFO_CLEAR	BIT(4)
+#define MTK_WDMA_XDMA_TX_FIFO_CFG_TX_DMAD_FIFO_CLEAR	BIT(8)
+#define MTK_WDMA_XDMA_TX_FIFO_CFG_TX_ARR_FIFO_CLEAR	BIT(12)
+
+#define MTK_WDMA_XDMA_RX_FIFO_CFG			0x23c
+#define MTK_WDMA_XDMA_RX_FIFO_CFG_RX_PAR_FIFO_CLEAR	BIT(0)
+#define MTK_WDMA_XDMA_RX_FIFO_CFG_RX_CMD_FIFO_CLEAR	BIT(4)
+#define MTK_WDMA_XDMA_RX_FIFO_CFG_RX_DMAD_FIFO_CLEAR	BIT(8)
+#define MTK_WDMA_XDMA_RX_FIFO_CFG_RX_ARR_FIFO_CLEAR	BIT(12)
+#define MTK_WDMA_XDMA_RX_FIFO_CFG_RX_LEN_FIFO_CLEAR	BIT(15)
+#define MTK_WDMA_XDMA_RX_FIFO_CFG_RX_WID_FIFO_CLEAR	BIT(18)
+#define MTK_WDMA_XDMA_RX_FIFO_CFG_RX_BID_FIFO_CLEAR	BIT(21)
+
 #define MTK_WDMA_INT_GRP1				0x250
 #define MTK_WDMA_INT_GRP2				0x254
 
+#define MTK_WDMA_PREF_TX_CFG				0x2d0
+#define MTK_WDMA_PREF_TX_CFG_PREF_EN			BIT(0)
+#define MTK_WDMA_PREF_TX_CFG_PREF_BUSY			BIT(1)
+
+#define MTK_WDMA_PREF_RX_CFG				0x2dc
+#define MTK_WDMA_PREF_RX_CFG_PREF_EN			BIT(0)
+#define MTK_WDMA_PREF_RX_CFG_PREF_BUSY			BIT(1)
+
+#define MTK_WDMA_PREF_RX_FIFO_CFG			0x2e0
+#define MTK_WDMA_PREF_RX_FIFO_CFG_RING0_CLEAR		BIT(0)
+#define MTK_WDMA_PREF_RX_FIFO_CFG_RING1_CLEAR		BIT(16)
+
+#define MTK_WDMA_PREF_TX_FIFO_CFG			0x2d4
+#define MTK_WDMA_PREF_TX_FIFO_CFG_RING0_CLEAR		BIT(0)
+#define MTK_WDMA_PREF_TX_FIFO_CFG_RING1_CLEAR		BIT(16)
+
+#define MTK_WDMA_PREF_SIDX_CFG				0x2e4
+#define MTK_WDMA_PREF_SIDX_CFG_TX_RING_CLEAR		GENMASK(3, 0)
+#define MTK_WDMA_PREF_SIDX_CFG_RX_RING_CLEAR		GENMASK(5, 4)
+
+#define MTK_WDMA_WRBK_TX_CFG				0x300
+#define MTK_WDMA_WRBK_TX_CFG_WRBK_BUSY			BIT(0)
+#define MTK_WDMA_WRBK_TX_CFG_WRBK_EN			BIT(30)
+
+#define MTK_WDMA_WRBK_TX_FIFO_CFG(_n)			(0x304 + (_n) * 0x4)
+#define MTK_WDMA_WRBK_TX_FIFO_CFG_RING_CLEAR		BIT(0)
+
+#define MTK_WDMA_WRBK_RX_CFG				0x344
+#define MTK_WDMA_WRBK_RX_CFG_WRBK_BUSY			BIT(0)
+#define MTK_WDMA_WRBK_RX_CFG_WRBK_EN			BIT(30)
+
+#define MTK_WDMA_WRBK_RX_FIFO_CFG(_n)			(0x348 + (_n) * 0x4)
+#define MTK_WDMA_WRBK_RX_FIFO_CFG_RING_CLEAR		BIT(0)
+
+#define MTK_WDMA_WRBK_SIDX_CFG				0x388
+#define MTK_WDMA_WRBK_SIDX_CFG_TX_RING_CLEAR		GENMASK(3, 0)
+#define MTK_WDMA_WRBK_SIDX_CFG_RX_RING_CLEAR		GENMASK(5, 4)
+
 #define MTK_PCIE_MIRROR_MAP(n)				((n) ? 0x4 : 0x0)
 #define MTK_PCIE_MIRROR_MAP_EN				BIT(0)
 #define MTK_PCIE_MIRROR_MAP_WED_ID			BIT(1)
@@ -407,6 +520,32 @@ struct mtk_wdma_desc {
 #define MTK_WED_RTQM_Q_DBG_BYPASS			BIT(5)
 #define MTK_WED_RTQM_TXDMAD_FPORT			GENMASK(23, 20)
 
+#define MTK_WED_RTQM_RST				0xb04
+
+#define MTK_WED_RTQM_IGRS0_I2HW_DMAD_CNT		0xb1c
+#define MTK_WED_RTQM_IGRS0_I2H_DMAD_CNT(_n)		(0xb20 + (_n) * 0x4)
+#define	MTK_WED_RTQM_IGRS0_I2HW_PKT_CNT			0xb28
+#define MTK_WED_RTQM_IGRS0_I2H_PKT_CNT(_n)		(0xb2c + (_n) * 0x4)
+#define MTK_WED_RTQM_IGRS0_FDROP_CNT			0xb34
+
+#define MTK_WED_RTQM_IGRS1_I2HW_DMAD_CNT		0xb44
+#define MTK_WED_RTQM_IGRS1_I2H_DMAD_CNT(_n)		(0xb48 + (_n) * 0x4)
+#define MTK_WED_RTQM_IGRS1_I2HW_PKT_CNT			0xb50
+#define MTK_WED_RTQM_IGRS1_I2H_PKT_CNT(_n)		(0xb54 + (_n) * 0x4)
+#define MTK_WED_RTQM_IGRS1_FDROP_CNT			0xb5c
+
+#define MTK_WED_RTQM_IGRS2_I2HW_DMAD_CNT		0xb6c
+#define MTK_WED_RTQM_IGRS2_I2H_DMAD_CNT(_n)		(0xb70 + (_n) * 0x4)
+#define MTK_WED_RTQM_IGRS2_I2HW_PKT_CNT			0xb78
+#define MTK_WED_RTQM_IGRS2_I2H_PKT_CNT(_n)		(0xb7c + (_n) * 0x4)
+#define MTK_WED_RTQM_IGRS2_FDROP_CNT			0xb84
+
+#define MTK_WED_RTQM_IGRS3_I2HW_DMAD_CNT		0xb94
+#define MTK_WED_RTQM_IGRS3_I2H_DMAD_CNT(_n)		(0xb98 + (_n) * 0x4)
+#define MTK_WED_RTQM_IGRS3_I2HW_PKT_CNT			0xba0
+#define MTK_WED_RTQM_IGRS3_I2H_PKT_CNT(_n)		(0xba4 + (_n) * 0x4)
+#define MTK_WED_RTQM_IGRS3_FDROP_CNT			0xbac
+
 #define MTK_WED_RTQM_R2H_MIB(_n)			(0xb70 + (_n) * 0x4)
 #define MTK_WED_RTQM_R2Q_MIB(_n)			(0xb78 + (_n) * 0x4)
 #define MTK_WED_RTQM_Q2N_MIB				0xb80
@@ -415,6 +554,24 @@ struct mtk_wdma_desc {
 #define MTK_WED_RTQM_Q2B_MIB				0xb8c
 #define MTK_WED_RTQM_PFDBK_MIB				0xb90
 
+#define MTK_WED_RTQM_ENQ_CFG0				0xbb8
+#define MTK_WED_RTQM_ENQ_CFG_TXDMAD_FPORT		GENMASK(15, 12)
+
+#define MTK_WED_RTQM_FDROP_MIB				0xb84
+#define MTK_WED_RTQM_ENQ_I2Q_DMAD_CNT			0xbbc
+#define MTK_WED_RTQM_ENQ_I2N_DMAD_CNT			0xbc0
+#define MTK_WED_RTQM_ENQ_I2Q_PKT_CNT			0xbc4
+#define MTK_WED_RTQM_ENQ_I2N_PKT_CNT			0xbc8
+#define MTK_WED_RTQM_ENQ_USED_ENTRY_CNT			0xbcc
+#define MTK_WED_RTQM_ENQ_ERR_CNT			0xbd0
+
+#define MTK_WED_RTQM_DEQ_DMAD_CNT			0xbd8
+#define MTK_WED_RTQM_DEQ_Q2I_DMAD_CNT			0xbdc
+#define MTK_WED_RTQM_DEQ_PKT_CNT			0xbe0
+#define MTK_WED_RTQM_DEQ_Q2I_PKT_CNT			0xbe4
+#define MTK_WED_RTQM_DEQ_USED_PFDBK_CNT			0xbe8
+#define MTK_WED_RTQM_DEQ_ERR_CNT			0xbec
+
 #define MTK_WED_RROQM_GLO_CFG				0xc04
 #define MTK_WED_RROQM_RST_IDX				0xc08
 #define MTK_WED_RROQM_RST_IDX_MIOD			BIT(0)
@@ -464,7 +621,195 @@ struct mtk_wdma_desc {
 #define MTK_WED_RX_BM_INTF				0xd9c
 #define MTK_WED_RX_BM_ERR_STS				0xda8
 
+#define MTK_RRO_IND_CMD_SIGNATURE			0xe00
+#define MTK_RRO_IND_CMD_DMA_IDX				GENMASK(11, 0)
+#define MTK_RRO_IND_CMD_MAGIC_CNT			GENMASK(30, 28)
+
+#define MTK_WED_IND_CMD_RX_CTRL0			0xe04
+#define MTK_WED_IND_CMD_PROC_IDX			GENMASK(11, 0)
+#define MTK_WED_IND_CMD_PREFETCH_FREE_CNT		GENMASK(19, 16)
+#define MTK_WED_IND_CMD_MAGIC_CNT			GENMASK(30, 28)
+
+#define MTK_WED_IND_CMD_RX_CTRL1			0xe08
+#define MTK_WED_IND_CMD_RX_CTRL2			0xe0c
+#define MTK_WED_IND_CMD_MAX_CNT				GENMASK(11, 0)
+#define MTK_WED_IND_CMD_BASE_M				GENMASK(19, 16)
+
+#define MTK_WED_RRO_CFG0				0xe10
+#define MTK_WED_RRO_CFG1				0xe14
+#define MTK_WED_RRO_CFG1_MAX_WIN_SZ			GENMASK(31, 29)
+#define MTK_WED_RRO_CFG1_ACK_SN_BASE_M			GENMASK(19, 16)
+#define MTK_WED_RRO_CFG1_PARTICL_SE_ID			GENMASK(11, 0)
+
+#define MTK_WED_ADDR_ELEM_CFG0				0xe18
+#define MTK_WED_ADDR_ELEM_CFG1				0xe1c
+#define MTK_WED_ADDR_ELEM_PREFETCH_FREE_CNT		GENMASK(19, 16)
+
+#define MTK_WED_ADDR_ELEM_TBL_CFG			0xe20
+#define MTK_WED_ADDR_ELEM_TBL_OFFSET			GENMASK(6, 0)
+#define MTK_WED_ADDR_ELEM_TBL_RD_RDY			BIT(28)
+#define MTK_WED_ADDR_ELEM_TBL_WR_RDY			BIT(29)
+#define MTK_WED_ADDR_ELEM_TBL_RD			BIT(30)
+#define MTK_WED_ADDR_ELEM_TBL_WR			BIT(31)
+
+#define MTK_WED_RADDR_ELEM_TBL_WDATA			0xe24
+#define MTK_WED_RADDR_ELEM_TBL_RDATA			0xe28
+
+#define MTK_WED_PN_CHECK_CFG				0xe30
+#define MTK_WED_PN_CHECK_SE_ID				GENMASK(11, 0)
+#define MTK_WED_PN_CHECK_RD_RDY				BIT(28)
+#define MTK_WED_PN_CHECK_WR_RDY				BIT(29)
+#define MTK_WED_PN_CHECK_RD				BIT(30)
+#define MTK_WED_PN_CHECK_WR				BIT(31)
+
+#define MTK_WED_PN_CHECK_WDATA_M			0xe38
+#define MTK_WED_PN_CHECK_IS_FIRST			BIT(17)
+
+#define MTK_WED_RRO_MSDU_PG_RING_CFG(_n)		(0xe44 + (_n) * 0x8)
+
+#define MTK_WED_RRO_MSDU_PG_RING2_CFG			0xe58
+#define MTK_WED_RRO_MSDU_PG_DRV_CLR			BIT(26)
+#define MTK_WED_RRO_MSDU_PG_DRV_EN			BIT(31)
+
+#define MTK_WED_RRO_MSDU_PG_CTRL0(_n)			(0xe5c + (_n) * 0xc)
+#define MTK_WED_RRO_MSDU_PG_CTRL1(_n)			(0xe60 + (_n) * 0xc)
+#define MTK_WED_RRO_MSDU_PG_CTRL2(_n)			(0xe64 + (_n) * 0xc)
+
+#define MTK_WED_RRO_RX_D_RX(_n)				(0xe80 + (_n) * 0x10)
+
+#define MTK_WED_RRO_RX_MAGIC_CNT			BIT(13)
+
+#define MTK_WED_RRO_RX_D_CFG(_n)			(0xea0 + (_n) * 0x4)
+#define MTK_WED_RRO_RX_D_DRV_CLR			BIT(26)
+#define MTK_WED_RRO_RX_D_DRV_EN				BIT(31)
+
+#define MTK_WED_RRO_PG_BM_RX_DMAM			0xeb0
+#define MTK_WED_RRO_PG_BM_RX_SDL0			GENMASK(13, 0)
+
+#define MTK_WED_RRO_PG_BM_BASE				0xeb4
+#define MTK_WED_RRO_PG_BM_INIT_PTR			0xeb8
+#define MTK_WED_RRO_PG_BM_SW_TAIL_IDX			GENMASK(15, 0)
+#define MTK_WED_RRO_PG_BM_INIT_SW_TAIL_IDX		BIT(16)
+
+#define MTK_WED_WPDMA_INT_CTRL_RRO_RX			0xeec
+#define MTK_WED_WPDMA_INT_CTRL_RRO_RX0_EN		BIT(0)
+#define MTK_WED_WPDMA_INT_CTRL_RRO_RX0_CLR		BIT(1)
+#define MTK_WED_WPDMA_INT_CTRL_RRO_RX0_DONE_TRIG	GENMASK(6, 2)
+#define MTK_WED_WPDMA_INT_CTRL_RRO_RX1_EN		BIT(8)
+#define MTK_WED_WPDMA_INT_CTRL_RRO_RX1_CLR		BIT(9)
+#define MTK_WED_WPDMA_INT_CTRL_RRO_RX1_DONE_TRIG	GENMASK(14, 10)
+
+#define MTK_WED_WPDMA_INT_CTRL_RRO_MSDU_PG		0xef4
+#define MTK_WED_WPDMA_INT_CTRL_RRO_PG0_EN		BIT(0)
+#define MTK_WED_WPDMA_INT_CTRL_RRO_PG0_CLR		BIT(1)
+#define MTK_WED_WPDMA_INT_CTRL_RRO_PG0_DONE_TRIG	GENMASK(6, 2)
+#define MTK_WED_WPDMA_INT_CTRL_RRO_PG1_EN		BIT(8)
+#define MTK_WED_WPDMA_INT_CTRL_RRO_PG1_CLR		BIT(9)
+#define MTK_WED_WPDMA_INT_CTRL_RRO_PG1_DONE_TRIG	GENMASK(14, 10)
+#define MTK_WED_WPDMA_INT_CTRL_RRO_PG2_EN		BIT(16)
+#define MTK_WED_WPDMA_INT_CTRL_RRO_PG2_CLR		BIT(17)
+#define MTK_WED_WPDMA_INT_CTRL_RRO_PG2_DONE_TRIG	GENMASK(22, 18)
+
+#define MTK_WED_RRO_RX_HW_STS				0xf00
+#define MTK_WED_RX_IND_CMD_BUSY				GENMASK(31, 0)
+
+#define MTK_WED_RX_IND_CMD_CNT0				0xf20
+#define MTK_WED_RX_IND_CMD_DBG_CNT_EN			BIT(31)
+
+#define MTK_WED_RX_IND_CMD_CNT(_n)			(0xf20 + (_n) * 0x4)
+#define MTK_WED_IND_CMD_MAGIC_CNT_FAIL_CNT		GENMASK(15, 0)
+
+#define MTK_WED_RX_ADDR_ELEM_CNT(_n)			(0xf48 + (_n) * 0x4)
+#define MTK_WED_ADDR_ELEM_SIG_FAIL_CNT			GENMASK(15, 0)
+#define MTK_WED_ADDR_ELEM_FIRST_SIG_FAIL_CNT		GENMASK(31, 16)
+#define MTK_WED_ADDR_ELEM_ACKSN_CNT			GENMASK(27, 0)
+
+#define MTK_WED_RX_MSDU_PG_CNT(_n)			(0xf5c + (_n) * 0x4)
+
+#define MTK_WED_RX_PN_CHK_CNT				0xf70
+#define MTK_WED_PN_CHK_FAIL_CNT				GENMASK(15, 0)
+
 #define MTK_WED_WOCPU_VIEW_MIOD_BASE			0x8000
 #define MTK_WED_PCIE_INT_MASK				0x0
 
+#define MTK_WED_AMSDU_FIFO				0x1800
+#define MTK_WED_AMSDU_IS_PRIOR0_RING			BIT(10)
+
+#define MTK_WED_AMSDU_STA_INFO				0x01810
+#define MTK_WED_AMSDU_STA_INFO_DO_INIT			BIT(0)
+#define MTK_WED_AMSDU_STA_INFO_SET_INIT			BIT(1)
+
+#define MTK_WED_AMSDU_STA_INFO_INIT			0x01814
+#define MTK_WED_AMSDU_STA_WTBL_HDRT_MODE		BIT(0)
+#define MTK_WED_AMSDU_STA_RMVL				BIT(1)
+#define MTK_WED_AMSDU_STA_MAX_AMSDU_LEN			GENMASK(7, 2)
+#define MTK_WED_AMSDU_STA_MAX_AMSDU_NUM			GENMASK(11, 8)
+
+#define MTK_WED_AMSDU_HIFTXD_BASE_L(_n)			(0x1980 + (_n) * 0x4)
+
+#define MTK_WED_AMSDU_PSE				0x1910
+#define MTK_WED_AMSDU_PSE_RESET				BIT(16)
+
+#define MTK_WED_AMSDU_HIFTXD_CFG			0x1968
+#define MTK_WED_AMSDU_HIFTXD_SRC			GENMASK(16, 15)
+
+#define MTK_WED_MON_AMSDU_FIFO_DMAD			0x1a34
+
+#define MTK_WED_MON_AMSDU_ENG_DMAD(_n)			(0x1a80 + (_n) * 0x50)
+#define MTK_WED_MON_AMSDU_ENG_QFPL(_n)			(0x1a84 + (_n) * 0x50)
+#define MTK_WED_MON_AMSDU_ENG_QENI(_n)			(0x1a88 + (_n) * 0x50)
+#define MTK_WED_MON_AMSDU_ENG_QENO(_n)			(0x1a8c + (_n) * 0x50)
+#define MTK_WED_MON_AMSDU_ENG_MERG(_n)			(0x1a90 + (_n) * 0x50)
+
+#define MTK_WED_MON_AMSDU_ENG_CNT8(_n)			(0x1a94 + (_n) * 0x50)
+#define MTK_WED_AMSDU_ENG_MAX_QGPP_CNT			GENMASK(10, 0)
+#define MTK_WED_AMSDU_ENG_MAX_PL_CNT			GENMASK(27, 16)
+
+#define MTK_WED_MON_AMSDU_ENG_CNT9(_n)			(0x1a98 + (_n) * 0x50)
+#define MTK_WED_AMSDU_ENG_CUR_ENTRY			GENMASK(10, 0)
+#define MTK_WED_AMSDU_ENG_MAX_BUF_MERGED		GENMASK(20, 16)
+#define MTK_WED_AMSDU_ENG_MAX_MSDU_MERGED		GENMASK(28, 24)
+
+#define MTK_WED_MON_AMSDU_QMEM_STS1			0x1e04
+
+#define MTK_WED_MON_AMSDU_QMEM_CNT(_n)			(0x1e0c + (_n) * 0x4)
+#define MTK_WED_AMSDU_QMEM_FQ_CNT			GENMASK(27, 16)
+#define MTK_WED_AMSDU_QMEM_SP_QCNT			GENMASK(11, 0)
+#define MTK_WED_AMSDU_QMEM_TID0_QCNT			GENMASK(27, 16)
+#define MTK_WED_AMSDU_QMEM_TID1_QCNT			GENMASK(11, 0)
+#define MTK_WED_AMSDU_QMEM_TID2_QCNT			GENMASK(27, 16)
+#define MTK_WED_AMSDU_QMEM_TID3_QCNT			GENMASK(11, 0)
+#define MTK_WED_AMSDU_QMEM_TID4_QCNT			GENMASK(27, 16)
+#define MTK_WED_AMSDU_QMEM_TID5_QCNT			GENMASK(11, 0)
+#define MTK_WED_AMSDU_QMEM_TID6_QCNT			GENMASK(27, 16)
+#define MTK_WED_AMSDU_QMEM_TID7_QCNT			GENMASK(11, 0)
+
+#define MTK_WED_MON_AMSDU_QMEM_PTR(_n)			(0x1e20 + (_n) * 0x4)
+#define MTK_WED_AMSDU_QMEM_FQ_HEAD			GENMASK(27, 16)
+#define MTK_WED_AMSDU_QMEM_SP_QHEAD			GENMASK(11, 0)
+#define MTK_WED_AMSDU_QMEM_TID0_QHEAD			GENMASK(27, 16)
+#define MTK_WED_AMSDU_QMEM_TID1_QHEAD			GENMASK(11, 0)
+#define MTK_WED_AMSDU_QMEM_TID2_QHEAD			GENMASK(27, 16)
+#define MTK_WED_AMSDU_QMEM_TID3_QHEAD			GENMASK(11, 0)
+#define MTK_WED_AMSDU_QMEM_TID4_QHEAD			GENMASK(27, 16)
+#define MTK_WED_AMSDU_QMEM_TID5_QHEAD			GENMASK(11, 0)
+#define MTK_WED_AMSDU_QMEM_TID6_QHEAD			GENMASK(27, 16)
+#define MTK_WED_AMSDU_QMEM_TID7_QHEAD			GENMASK(11, 0)
+#define MTK_WED_AMSDU_QMEM_FQ_TAIL			GENMASK(27, 16)
+#define MTK_WED_AMSDU_QMEM_SP_QTAIL			GENMASK(11, 0)
+#define MTK_WED_AMSDU_QMEM_TID0_QTAIL			GENMASK(27, 16)
+#define MTK_WED_AMSDU_QMEM_TID1_QTAIL			GENMASK(11, 0)
+#define MTK_WED_AMSDU_QMEM_TID2_QTAIL			GENMASK(27, 16)
+#define MTK_WED_AMSDU_QMEM_TID3_QTAIL			GENMASK(11, 0)
+#define MTK_WED_AMSDU_QMEM_TID4_QTAIL			GENMASK(27, 16)
+#define MTK_WED_AMSDU_QMEM_TID5_QTAIL			GENMASK(11, 0)
+#define MTK_WED_AMSDU_QMEM_TID6_QTAIL			GENMASK(27, 16)
+#define MTK_WED_AMSDU_QMEM_TID7_QTAIL			GENMASK(11, 0)
+
+#define MTK_WED_MON_AMSDU_HIFTXD_FETCH_MSDU(_n)		(0x1ec4 + (_n) * 0x4)
+
+#define MTK_WED_PCIE_BASE			0x11280000
+#define MTK_WED_PCIE_BASE0			0x11300000
+#define MTK_WED_PCIE_BASE1			0x11310000
+#define MTK_WED_PCIE_BASE2			0x11290000
 #endif
diff --git a/drivers/net/ethernet/mediatek/mtk_wed_wo.h b/drivers/net/ethernet/mediatek/mtk_wed_wo.h
index 7a1a2a28f1ac..87a67fa3868d 100644
--- a/drivers/net/ethernet/mediatek/mtk_wed_wo.h
+++ b/drivers/net/ethernet/mediatek/mtk_wed_wo.h
@@ -91,6 +91,8 @@ enum mtk_wed_dummy_cr_idx {
 #define MT7981_FIRMWARE_WO	"mediatek/mt7981_wo.bin"
 #define MT7986_FIRMWARE_WO0	"mediatek/mt7986_wo_0.bin"
 #define MT7986_FIRMWARE_WO1	"mediatek/mt7986_wo_1.bin"
+#define MT7988_FIRMWARE_WO0	"mediatek/mt7988_wo_0.bin"
+#define MT7988_FIRMWARE_WO1	"mediatek/mt7988_wo_1.bin"
 
 #define MTK_WO_MCU_CFG_LS_BASE				0
 #define MTK_WO_MCU_CFG_LS_HW_VER_ADDR			(MTK_WO_MCU_CFG_LS_BASE + 0x000)
@@ -228,7 +230,6 @@ struct mtk_wed_wo_queue {
 
 struct mtk_wed_wo {
 	struct mtk_wed_hw *hw;
-	struct mtk_wed_wo_memory_region boot;
 
 	struct mtk_wed_wo_queue q_tx;
 	struct mtk_wed_wo_queue q_rx;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index c4f4de82e29e..685335832a93 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -189,3 +189,11 @@ config MLX5_SF_MANAGER
 	port is managed through devlink.  A subfunction supports RDMA, netdevice
 	and vdpa device. It is similar to a SRIOV VF but it doesn't require
 	SRIOV support.
+
+config MLX5_DPLL
+	tristate "Mellanox 5th generation network adapters (ConnectX series) DPLL support"
+	depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE
+	select DPLL
+	help
+	  DPLL support in Mellanox Technologies ConnectX NICs.
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 7e94caca4888..c44870b175f9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -128,3 +128,6 @@ mlx5_core-$(CONFIG_MLX5_SF) += sf/vhca_event.o sf/dev/dev.o sf/dev/driver.o irq_
 # SF manager
 #
 mlx5_core-$(CONFIG_MLX5_SF_MANAGER) += sf/cmd.o sf/hw_table.o sf/devlink.o
+
+obj-$(CONFIG_MLX5_DPLL) += mlx5_dpll.o
+mlx5_dpll-y :=	dpll.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index 7909f378dc93..1fc03480c2ff 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -206,6 +206,19 @@ static bool is_ib_enabled(struct mlx5_core_dev *dev)
 	return err ? false : val.vbool;
 }
 
+static bool is_dpll_supported(struct mlx5_core_dev *dev)
+{
+	if (!IS_ENABLED(CONFIG_MLX5_DPLL))
+		return false;
+
+	if (!MLX5_CAP_MCAM_REG2(dev, synce_registers)) {
+		mlx5_core_warn(dev, "Missing SyncE capability\n");
+		return false;
+	}
+
+	return true;
+}
+
 enum {
 	MLX5_INTERFACE_PROTOCOL_ETH,
 	MLX5_INTERFACE_PROTOCOL_ETH_REP,
@@ -215,6 +228,8 @@ enum {
 	MLX5_INTERFACE_PROTOCOL_MPIB,
 
 	MLX5_INTERFACE_PROTOCOL_VNET,
+
+	MLX5_INTERFACE_PROTOCOL_DPLL,
 };
 
 static const struct mlx5_adev_device {
@@ -237,6 +252,8 @@ static const struct mlx5_adev_device {
 					   .is_supported = &is_ib_rep_supported },
 	[MLX5_INTERFACE_PROTOCOL_MPIB] = { .suffix = "multiport",
 					   .is_supported = &is_mp_supported },
+	[MLX5_INTERFACE_PROTOCOL_DPLL] = { .suffix = "dpll",
+					   .is_supported = &is_dpll_supported },
 };
 
 int mlx5_adev_idx_alloc(void)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index af8460bb257b..3e064234f6fe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -138,7 +138,6 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change,
 {
 	struct mlx5_core_dev *dev = devlink_priv(devlink);
 	struct pci_dev *pdev = dev->pdev;
-	bool sf_dev_allocated;
 	int ret = 0;
 
 	if (mlx5_dev_is_lightweight(dev)) {
@@ -148,16 +147,6 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change,
 		return 0;
 	}
 
-	sf_dev_allocated = mlx5_sf_dev_allocated(dev);
-	if (sf_dev_allocated) {
-		/* Reload results in deleting SF device which further results in
-		 * unregistering devlink instance while holding devlink_mutext.
-		 * Hence, do not support reload.
-		 */
-		NL_SET_ERR_MSG_MOD(extack, "reload is unsupported when SFs are allocated");
-		return -EOPNOTSUPP;
-	}
-
 	if (mlx5_lag_is_active(dev)) {
 		NL_SET_ERR_MSG_MOD(extack, "reload is unsupported in Lag mode");
 		return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dpll.c b/drivers/net/ethernet/mellanox/mlx5/core/dpll.c
new file mode 100644
index 000000000000..74f0c7867120
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dpll.c
@@ -0,0 +1,432 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <linux/dpll.h>
+#include <linux/mlx5/driver.h>
+
+/* This structure represents a reference to DPLL, one is created
+ * per mdev instance.
+ */
+struct mlx5_dpll {
+	struct dpll_device *dpll;
+	struct dpll_pin *dpll_pin;
+	struct mlx5_core_dev *mdev;
+	struct workqueue_struct *wq;
+	struct delayed_work work;
+	struct {
+		bool valid;
+		enum dpll_lock_status lock_status;
+		enum dpll_pin_state pin_state;
+	} last;
+	struct notifier_block mdev_nb;
+	struct net_device *tracking_netdev;
+};
+
+static int mlx5_dpll_clock_id_get(struct mlx5_core_dev *mdev, u64 *clock_id)
+{
+	u32 out[MLX5_ST_SZ_DW(msecq_reg)] = {};
+	u32 in[MLX5_ST_SZ_DW(msecq_reg)] = {};
+	int err;
+
+	err = mlx5_core_access_reg(mdev, in, sizeof(in), out, sizeof(out),
+				   MLX5_REG_MSECQ, 0, 0);
+	if (err)
+		return err;
+	*clock_id = MLX5_GET64(msecq_reg, out, local_clock_identity);
+	return 0;
+}
+
+static int
+mlx5_dpll_synce_status_get(struct mlx5_core_dev *mdev,
+			   enum mlx5_msees_admin_status *admin_status,
+			   enum mlx5_msees_oper_status *oper_status,
+			   bool *ho_acq)
+{
+	u32 out[MLX5_ST_SZ_DW(msees_reg)] = {};
+	u32 in[MLX5_ST_SZ_DW(msees_reg)] = {};
+	int err;
+
+	err = mlx5_core_access_reg(mdev, in, sizeof(in), out, sizeof(out),
+				   MLX5_REG_MSEES, 0, 0);
+	if (err)
+		return err;
+	if (admin_status)
+		*admin_status = MLX5_GET(msees_reg, out, admin_status);
+	*oper_status = MLX5_GET(msees_reg, out, oper_status);
+	if (ho_acq)
+		*ho_acq = MLX5_GET(msees_reg, out, ho_acq);
+	return 0;
+}
+
+static int
+mlx5_dpll_synce_status_set(struct mlx5_core_dev *mdev,
+			   enum mlx5_msees_admin_status admin_status)
+{
+	u32 out[MLX5_ST_SZ_DW(msees_reg)] = {};
+	u32 in[MLX5_ST_SZ_DW(msees_reg)] = {};
+
+	MLX5_SET(msees_reg, in, field_select,
+		 MLX5_MSEES_FIELD_SELECT_ENABLE |
+		 MLX5_MSEES_FIELD_SELECT_ADMIN_STATUS);
+	MLX5_SET(msees_reg, in, admin_status, admin_status);
+	return mlx5_core_access_reg(mdev, in, sizeof(in), out, sizeof(out),
+				    MLX5_REG_MSEES, 0, 1);
+}
+
+static enum dpll_lock_status
+mlx5_dpll_lock_status_get(enum mlx5_msees_oper_status oper_status, bool ho_acq)
+{
+	switch (oper_status) {
+	case MLX5_MSEES_OPER_STATUS_SELF_TRACK:
+		fallthrough;
+	case MLX5_MSEES_OPER_STATUS_OTHER_TRACK:
+		return ho_acq ? DPLL_LOCK_STATUS_LOCKED_HO_ACQ :
+				DPLL_LOCK_STATUS_LOCKED;
+	case MLX5_MSEES_OPER_STATUS_HOLDOVER:
+		fallthrough;
+	case MLX5_MSEES_OPER_STATUS_FAIL_HOLDOVER:
+		return DPLL_LOCK_STATUS_HOLDOVER;
+	default:
+		return DPLL_LOCK_STATUS_UNLOCKED;
+	}
+}
+
+static enum dpll_pin_state
+mlx5_dpll_pin_state_get(enum mlx5_msees_admin_status admin_status,
+			enum mlx5_msees_oper_status oper_status)
+{
+	return (admin_status == MLX5_MSEES_ADMIN_STATUS_TRACK &&
+		(oper_status == MLX5_MSEES_OPER_STATUS_SELF_TRACK ||
+		 oper_status == MLX5_MSEES_OPER_STATUS_OTHER_TRACK)) ?
+	       DPLL_PIN_STATE_CONNECTED : DPLL_PIN_STATE_DISCONNECTED;
+}
+
+static int mlx5_dpll_device_lock_status_get(const struct dpll_device *dpll,
+					    void *priv,
+					    enum dpll_lock_status *status,
+					    struct netlink_ext_ack *extack)
+{
+	enum mlx5_msees_oper_status oper_status;
+	struct mlx5_dpll *mdpll = priv;
+	bool ho_acq;
+	int err;
+
+	err = mlx5_dpll_synce_status_get(mdpll->mdev, NULL,
+					 &oper_status, &ho_acq);
+	if (err)
+		return err;
+
+	*status = mlx5_dpll_lock_status_get(oper_status, ho_acq);
+	return 0;
+}
+
+static int mlx5_dpll_device_mode_get(const struct dpll_device *dpll,
+				     void *priv,
+				     u32 *mode, struct netlink_ext_ack *extack)
+{
+	*mode = DPLL_MODE_MANUAL;
+	return 0;
+}
+
+static bool mlx5_dpll_device_mode_supported(const struct dpll_device *dpll,
+					    void *priv,
+					    enum dpll_mode mode,
+					    struct netlink_ext_ack *extack)
+{
+	return mode == DPLL_MODE_MANUAL;
+}
+
+static const struct dpll_device_ops mlx5_dpll_device_ops = {
+	.lock_status_get = mlx5_dpll_device_lock_status_get,
+	.mode_get = mlx5_dpll_device_mode_get,
+	.mode_supported = mlx5_dpll_device_mode_supported,
+};
+
+static int mlx5_dpll_pin_direction_get(const struct dpll_pin *pin,
+				       void *pin_priv,
+				       const struct dpll_device *dpll,
+				       void *dpll_priv,
+				       enum dpll_pin_direction *direction,
+				       struct netlink_ext_ack *extack)
+{
+	*direction = DPLL_PIN_DIRECTION_INPUT;
+	return 0;
+}
+
+static int mlx5_dpll_state_on_dpll_get(const struct dpll_pin *pin,
+				       void *pin_priv,
+				       const struct dpll_device *dpll,
+				       void *dpll_priv,
+				       enum dpll_pin_state *state,
+				       struct netlink_ext_ack *extack)
+{
+	enum mlx5_msees_admin_status admin_status;
+	enum mlx5_msees_oper_status oper_status;
+	struct mlx5_dpll *mdpll = pin_priv;
+	int err;
+
+	err = mlx5_dpll_synce_status_get(mdpll->mdev, &admin_status,
+					 &oper_status, NULL);
+	if (err)
+		return err;
+	*state = mlx5_dpll_pin_state_get(admin_status, oper_status);
+	return 0;
+}
+
+static int mlx5_dpll_state_on_dpll_set(const struct dpll_pin *pin,
+				       void *pin_priv,
+				       const struct dpll_device *dpll,
+				       void *dpll_priv,
+				       enum dpll_pin_state state,
+				       struct netlink_ext_ack *extack)
+{
+	struct mlx5_dpll *mdpll = pin_priv;
+
+	return mlx5_dpll_synce_status_set(mdpll->mdev,
+					  state == DPLL_PIN_STATE_CONNECTED ?
+					  MLX5_MSEES_ADMIN_STATUS_TRACK :
+					  MLX5_MSEES_ADMIN_STATUS_FREE_RUNNING);
+}
+
+static const struct dpll_pin_ops mlx5_dpll_pins_ops = {
+	.direction_get = mlx5_dpll_pin_direction_get,
+	.state_on_dpll_get = mlx5_dpll_state_on_dpll_get,
+	.state_on_dpll_set = mlx5_dpll_state_on_dpll_set,
+};
+
+static const struct dpll_pin_properties mlx5_dpll_pin_properties = {
+	.type = DPLL_PIN_TYPE_SYNCE_ETH_PORT,
+	.capabilities = DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE,
+};
+
+#define MLX5_DPLL_PERIODIC_WORK_INTERVAL 500 /* ms */
+
+static void mlx5_dpll_periodic_work_queue(struct mlx5_dpll *mdpll)
+{
+	queue_delayed_work(mdpll->wq, &mdpll->work,
+			   msecs_to_jiffies(MLX5_DPLL_PERIODIC_WORK_INTERVAL));
+}
+
+static void mlx5_dpll_periodic_work(struct work_struct *work)
+{
+	struct mlx5_dpll *mdpll = container_of(work, struct mlx5_dpll,
+					       work.work);
+	enum mlx5_msees_admin_status admin_status;
+	enum mlx5_msees_oper_status oper_status;
+	enum dpll_lock_status lock_status;
+	enum dpll_pin_state pin_state;
+	bool ho_acq;
+	int err;
+
+	err = mlx5_dpll_synce_status_get(mdpll->mdev, &admin_status,
+					 &oper_status, &ho_acq);
+	if (err)
+		goto err_out;
+	lock_status = mlx5_dpll_lock_status_get(oper_status, ho_acq);
+	pin_state = mlx5_dpll_pin_state_get(admin_status, oper_status);
+
+	if (!mdpll->last.valid)
+		goto invalid_out;
+
+	if (mdpll->last.lock_status != lock_status)
+		dpll_device_change_ntf(mdpll->dpll);
+	if (mdpll->last.pin_state != pin_state)
+		dpll_pin_change_ntf(mdpll->dpll_pin);
+
+invalid_out:
+	mdpll->last.lock_status = lock_status;
+	mdpll->last.pin_state = pin_state;
+	mdpll->last.valid = true;
+err_out:
+	mlx5_dpll_periodic_work_queue(mdpll);
+}
+
+static void mlx5_dpll_netdev_dpll_pin_set(struct mlx5_dpll *mdpll,
+					  struct net_device *netdev)
+{
+	if (mdpll->tracking_netdev)
+		return;
+	netdev_dpll_pin_set(netdev, mdpll->dpll_pin);
+	mdpll->tracking_netdev = netdev;
+}
+
+static void mlx5_dpll_netdev_dpll_pin_clear(struct mlx5_dpll *mdpll)
+{
+	if (!mdpll->tracking_netdev)
+		return;
+	netdev_dpll_pin_clear(mdpll->tracking_netdev);
+	mdpll->tracking_netdev = NULL;
+}
+
+static int mlx5_dpll_mdev_notifier_event(struct notifier_block *nb,
+					 unsigned long event, void *data)
+{
+	struct mlx5_dpll *mdpll = container_of(nb, struct mlx5_dpll, mdev_nb);
+	struct net_device *netdev = data;
+
+	switch (event) {
+	case MLX5_DRIVER_EVENT_UPLINK_NETDEV:
+		if (netdev)
+			mlx5_dpll_netdev_dpll_pin_set(mdpll, netdev);
+		else
+			mlx5_dpll_netdev_dpll_pin_clear(mdpll);
+		break;
+	default:
+		return NOTIFY_DONE;
+	}
+
+	return NOTIFY_OK;
+}
+
+static void mlx5_dpll_mdev_netdev_track(struct mlx5_dpll *mdpll,
+					struct mlx5_core_dev *mdev)
+{
+	mdpll->mdev_nb.notifier_call = mlx5_dpll_mdev_notifier_event;
+	mlx5_blocking_notifier_register(mdev, &mdpll->mdev_nb);
+	mlx5_core_uplink_netdev_event_replay(mdev);
+}
+
+static void mlx5_dpll_mdev_netdev_untrack(struct mlx5_dpll *mdpll,
+					  struct mlx5_core_dev *mdev)
+{
+	mlx5_blocking_notifier_unregister(mdev, &mdpll->mdev_nb);
+	mlx5_dpll_netdev_dpll_pin_clear(mdpll);
+}
+
+static int mlx5_dpll_probe(struct auxiliary_device *adev,
+			   const struct auxiliary_device_id *id)
+{
+	struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev);
+	struct mlx5_core_dev *mdev = edev->mdev;
+	struct mlx5_dpll *mdpll;
+	u64 clock_id;
+	int err;
+
+	err = mlx5_dpll_synce_status_set(mdev,
+					 MLX5_MSEES_ADMIN_STATUS_FREE_RUNNING);
+	if (err)
+		return err;
+
+	err = mlx5_dpll_clock_id_get(mdev, &clock_id);
+	if (err)
+		return err;
+
+	mdpll = kzalloc(sizeof(*mdpll), GFP_KERNEL);
+	if (!mdpll)
+		return -ENOMEM;
+	mdpll->mdev = mdev;
+	auxiliary_set_drvdata(adev, mdpll);
+
+	/* Multiple mdev instances might share one DPLL device. */
+	mdpll->dpll = dpll_device_get(clock_id, 0, THIS_MODULE);
+	if (IS_ERR(mdpll->dpll)) {
+		err = PTR_ERR(mdpll->dpll);
+		goto err_free_mdpll;
+	}
+
+	err = dpll_device_register(mdpll->dpll, DPLL_TYPE_EEC,
+				   &mlx5_dpll_device_ops, mdpll);
+	if (err)
+		goto err_put_dpll_device;
+
+	/* Multiple mdev instances might share one DPLL pin. */
+	mdpll->dpll_pin = dpll_pin_get(clock_id, mlx5_get_dev_index(mdev),
+				       THIS_MODULE, &mlx5_dpll_pin_properties);
+	if (IS_ERR(mdpll->dpll_pin)) {
+		err = PTR_ERR(mdpll->dpll_pin);
+		goto err_unregister_dpll_device;
+	}
+
+	err = dpll_pin_register(mdpll->dpll, mdpll->dpll_pin,
+				&mlx5_dpll_pins_ops, mdpll);
+	if (err)
+		goto err_put_dpll_pin;
+
+	mdpll->wq = create_singlethread_workqueue("mlx5_dpll");
+	if (!mdpll->wq) {
+		err = -ENOMEM;
+		goto err_unregister_dpll_pin;
+	}
+
+	mlx5_dpll_mdev_netdev_track(mdpll, mdev);
+
+	INIT_DELAYED_WORK(&mdpll->work, &mlx5_dpll_periodic_work);
+	mlx5_dpll_periodic_work_queue(mdpll);
+
+	return 0;
+
+err_unregister_dpll_pin:
+	dpll_pin_unregister(mdpll->dpll, mdpll->dpll_pin,
+			    &mlx5_dpll_pins_ops, mdpll);
+err_put_dpll_pin:
+	dpll_pin_put(mdpll->dpll_pin);
+err_unregister_dpll_device:
+	dpll_device_unregister(mdpll->dpll, &mlx5_dpll_device_ops, mdpll);
+err_put_dpll_device:
+	dpll_device_put(mdpll->dpll);
+err_free_mdpll:
+	kfree(mdpll);
+	return err;
+}
+
+static void mlx5_dpll_remove(struct auxiliary_device *adev)
+{
+	struct mlx5_dpll *mdpll = auxiliary_get_drvdata(adev);
+	struct mlx5_core_dev *mdev = mdpll->mdev;
+
+	cancel_delayed_work(&mdpll->work);
+	mlx5_dpll_mdev_netdev_untrack(mdpll, mdev);
+	destroy_workqueue(mdpll->wq);
+	dpll_pin_unregister(mdpll->dpll, mdpll->dpll_pin,
+			    &mlx5_dpll_pins_ops, mdpll);
+	dpll_pin_put(mdpll->dpll_pin);
+	dpll_device_unregister(mdpll->dpll, &mlx5_dpll_device_ops, mdpll);
+	dpll_device_put(mdpll->dpll);
+	kfree(mdpll);
+
+	mlx5_dpll_synce_status_set(mdev,
+				   MLX5_MSEES_ADMIN_STATUS_FREE_RUNNING);
+}
+
+static int mlx5_dpll_suspend(struct auxiliary_device *adev, pm_message_t state)
+{
+	return 0;
+}
+
+static int mlx5_dpll_resume(struct auxiliary_device *adev)
+{
+	return 0;
+}
+
+static const struct auxiliary_device_id mlx5_dpll_id_table[] = {
+	{ .name = MLX5_ADEV_NAME ".dpll", },
+	{},
+};
+
+MODULE_DEVICE_TABLE(auxiliary, mlx5_dpll_id_table);
+
+static struct auxiliary_driver mlx5_dpll_driver = {
+	.name = "dpll",
+	.probe = mlx5_dpll_probe,
+	.remove = mlx5_dpll_remove,
+	.suspend = mlx5_dpll_suspend,
+	.resume = mlx5_dpll_resume,
+	.id_table = mlx5_dpll_id_table,
+};
+
+static int __init mlx5_dpll_init(void)
+{
+	return auxiliary_driver_register(&mlx5_dpll_driver);
+}
+
+static void __exit mlx5_dpll_exit(void)
+{
+	auxiliary_driver_unregister(&mlx5_dpll_driver);
+}
+
+module_init(mlx5_dpll_init);
+module_exit(mlx5_dpll_exit);
+
+MODULE_AUTHOR("Jiri Pirko <jiri@nvidia.com>");
+MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) DPLL driver");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
index c6b6e290fd79..0b1ac6e5c890 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
@@ -12,11 +12,19 @@ struct mlx5e_dev *mlx5e_create_devlink(struct device *dev,
 {
 	struct mlx5e_dev *mlx5e_dev;
 	struct devlink *devlink;
+	int err;
 
 	devlink = devlink_alloc_ns(&mlx5e_devlink_ops, sizeof(*mlx5e_dev),
 				   devlink_net(priv_to_devlink(mdev)), dev);
 	if (!devlink)
 		return ERR_PTR(-ENOMEM);
+
+	err = devl_nested_devlink_set(priv_to_devlink(mdev), devlink);
+	if (err) {
+		devlink_free(devlink);
+		return ERR_PTR(err);
+	}
+
 	devlink_register(devlink);
 	return devlink_priv(devlink);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 15561965d2af..d17c9c31b165 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1405,9 +1405,9 @@ err_irq_table:
 
 static void mlx5_unload(struct mlx5_core_dev *dev)
 {
+	mlx5_eswitch_disable(dev->priv.eswitch);
 	mlx5_devlink_traps_unregister(priv_to_devlink(dev));
 	mlx5_sf_dev_table_destroy(dev);
-	mlx5_eswitch_disable(dev->priv.eswitch);
 	mlx5_sriov_detach(dev);
 	mlx5_lag_remove_mdev(dev);
 	mlx5_ec_cleanup(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h
index 2a66a427ef15..b99131e95e37 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h
@@ -19,6 +19,12 @@ struct mlx5_sf_dev {
 	u16 fn_id;
 };
 
+struct mlx5_sf_peer_devlink_event_ctx {
+	u16 fn_id;
+	struct devlink *devlink;
+	int err;
+};
+
 void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev);
 void mlx5_sf_dev_table_destroy(struct mlx5_core_dev *dev);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
index 8fe82f1191bb..169c2c68ed5c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
@@ -8,6 +8,20 @@
 #include "dev.h"
 #include "devlink.h"
 
+static int mlx5_core_peer_devlink_set(struct mlx5_sf_dev *sf_dev, struct devlink *devlink)
+{
+	struct mlx5_sf_peer_devlink_event_ctx event_ctx = {
+		.fn_id = sf_dev->fn_id,
+		.devlink = devlink,
+	};
+	int ret;
+
+	ret = mlx5_blocking_notifier_call_chain(sf_dev->parent_mdev,
+						MLX5_DRIVER_EVENT_SF_PEER_DEVLINK,
+						&event_ctx);
+	return ret == NOTIFY_OK ? event_ctx.err : 0;
+}
+
 static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxiliary_device_id *id)
 {
 	struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev);
@@ -54,9 +68,21 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia
 		mlx5_core_warn(mdev, "mlx5_init_one err=%d\n", err);
 		goto init_one_err;
 	}
+
+	err = mlx5_core_peer_devlink_set(sf_dev, devlink);
+	if (err) {
+		mlx5_core_warn(mdev, "mlx5_core_peer_devlink_set err=%d\n", err);
+		goto peer_devlink_set_err;
+	}
+
 	devlink_register(devlink);
 	return 0;
 
+peer_devlink_set_err:
+	if (mlx5_dev_is_lightweight(sf_dev->mdev))
+		mlx5_uninit_one_light(sf_dev->mdev);
+	else
+		mlx5_uninit_one(sf_dev->mdev);
 init_one_err:
 	iounmap(mdev->iseg);
 remap_err:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
index e34a8f88c518..964a5b1876f3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
@@ -28,6 +28,7 @@ struct mlx5_sf_table {
 	struct mutex sf_state_lock; /* Serializes sf state among user cmds & vhca event handler. */
 	struct notifier_block esw_nb;
 	struct notifier_block vhca_nb;
+	struct notifier_block mdev_nb;
 };
 
 static struct mlx5_sf *
@@ -511,6 +512,35 @@ static int mlx5_sf_esw_event(struct notifier_block *nb, unsigned long event, voi
 	return 0;
 }
 
+static int mlx5_sf_mdev_event(struct notifier_block *nb, unsigned long event, void *data)
+{
+	struct mlx5_sf_table *table = container_of(nb, struct mlx5_sf_table, mdev_nb);
+	struct mlx5_sf_peer_devlink_event_ctx *event_ctx = data;
+	int ret = NOTIFY_DONE;
+	struct mlx5_sf *sf;
+
+	if (event != MLX5_DRIVER_EVENT_SF_PEER_DEVLINK)
+		return NOTIFY_DONE;
+
+	table = mlx5_sf_table_try_get(table->dev);
+	if (!table)
+		return NOTIFY_DONE;
+
+	mutex_lock(&table->sf_state_lock);
+	sf = mlx5_sf_lookup_by_function_id(table, event_ctx->fn_id);
+	if (!sf)
+		goto out;
+
+	event_ctx->err = devl_port_fn_devlink_set(&sf->dl_port.dl_port,
+						  event_ctx->devlink);
+
+	ret = NOTIFY_OK;
+out:
+	mutex_unlock(&table->sf_state_lock);
+	mlx5_sf_table_put(table);
+	return ret;
+}
+
 static bool mlx5_sf_table_supported(const struct mlx5_core_dev *dev)
 {
 	return dev->priv.eswitch && MLX5_ESWITCH_MANAGER(dev) &&
@@ -544,6 +574,9 @@ int mlx5_sf_table_init(struct mlx5_core_dev *dev)
 	if (err)
 		goto vhca_err;
 
+	table->mdev_nb.notifier_call = mlx5_sf_mdev_event;
+	mlx5_blocking_notifier_register(dev, &table->mdev_nb);
+
 	return 0;
 
 vhca_err:
@@ -562,6 +595,7 @@ void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev)
 	if (!table)
 		return;
 
+	mlx5_blocking_notifier_unregister(dev, &table->mdev_nb);
 	mlx5_vhca_event_notifier_unregister(table->dev, &table->vhca_nb);
 	mlx5_esw_event_notifier_unregister(dev->priv.eswitch, &table->esw_nb);
 	WARN_ON(refcount_read(&table->refcount));
diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
index 694de9513b9f..954ba0826c61 100644
--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
+++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
@@ -471,7 +471,7 @@ out:
 	return err;
 }
 
-static int mlxbf_gige_remove(struct platform_device *pdev)
+static void mlxbf_gige_remove(struct platform_device *pdev)
 {
 	struct mlxbf_gige *priv = platform_get_drvdata(pdev);
 
@@ -479,8 +479,6 @@ static int mlxbf_gige_remove(struct platform_device *pdev)
 	phy_disconnect(priv->netdev->phydev);
 	mlxbf_gige_mdio_remove(priv);
 	platform_set_drvdata(pdev, NULL);
-
-	return 0;
 }
 
 static void mlxbf_gige_shutdown(struct platform_device *pdev)
@@ -499,7 +497,7 @@ MODULE_DEVICE_TABLE(acpi, mlxbf_gige_acpi_match);
 
 static struct platform_driver mlxbf_gige_driver = {
 	.probe = mlxbf_gige_probe,
-	.remove = mlxbf_gige_remove,
+	.remove_new = mlxbf_gige_remove,
 	.shutdown = mlxbf_gige_shutdown,
 	.driver = {
 		.name = KBUILD_MODNAME,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_linecard_dev.c b/drivers/net/ethernet/mellanox/mlxsw/core_linecard_dev.c
index af37e650a8ad..e8d6fe35bf36 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_linecard_dev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_linecard_dev.c
@@ -132,6 +132,7 @@ static int mlxsw_linecard_bdev_probe(struct auxiliary_device *adev,
 	struct mlxsw_linecard *linecard = linecard_bdev->linecard;
 	struct mlxsw_linecard_dev *linecard_dev;
 	struct devlink *devlink;
+	int err;
 
 	devlink = devlink_alloc(&mlxsw_linecard_dev_devlink_ops,
 				sizeof(*linecard_dev), &adev->dev);
@@ -141,8 +142,12 @@ static int mlxsw_linecard_bdev_probe(struct auxiliary_device *adev,
 	linecard_dev->linecard = linecard_bdev->linecard;
 	linecard_bdev->linecard_dev = linecard_dev;
 
+	err = devlink_linecard_nested_dl_set(linecard->devlink_linecard, devlink);
+	if (err) {
+		devlink_free(devlink);
+		return err;
+	}
 	devlink_register(devlink);
-	devlink_linecard_nested_dl_set(linecard->devlink_linecard, devlink);
 	return 0;
 }
 
@@ -151,9 +156,7 @@ static void mlxsw_linecard_bdev_remove(struct auxiliary_device *adev)
 	struct mlxsw_linecard_bdev *linecard_bdev =
 			container_of(adev, struct mlxsw_linecard_bdev, adev);
 	struct devlink *devlink = priv_to_devlink(linecard_bdev->linecard_dev);
-	struct mlxsw_linecard *linecard = linecard_bdev->linecard;
 
-	devlink_linecard_nested_dl_set(linecard->devlink_linecard, NULL);
 	devlink_unregister(devlink);
 	devlink_free(devlink);
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c
index e2aced7ab454..95f63fcf4ba1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c
@@ -496,7 +496,7 @@ mlxsw_sp_acl_bf_init(struct mlxsw_sp *mlxsw_sp, unsigned int num_erp_banks)
 	 * is 2^ACL_MAX_BF_LOG
 	 */
 	bf_bank_size = 1 << MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_BF_LOG);
-	bf = kzalloc(struct_size(bf, refcnt, bf_bank_size * num_erp_banks),
+	bf = kzalloc(struct_size(bf, refcnt, size_mul(bf_bank_size, num_erp_banks)),
 		     GFP_KERNEL);
 	if (!bf)
 		return ERR_PTR(-ENOMEM);
diff --git a/drivers/net/ethernet/micrel/ks8842.c b/drivers/net/ethernet/micrel/ks8842.c
index c11b118dc415..ddd87ef71caf 100644
--- a/drivers/net/ethernet/micrel/ks8842.c
+++ b/drivers/net/ethernet/micrel/ks8842.c
@@ -1228,7 +1228,7 @@ err_mem_region:
 	return err;
 }
 
-static int ks8842_remove(struct platform_device *pdev)
+static void ks8842_remove(struct platform_device *pdev)
 {
 	struct net_device *netdev = platform_get_drvdata(pdev);
 	struct ks8842_adapter *adapter = netdev_priv(netdev);
@@ -1239,7 +1239,6 @@ static int ks8842_remove(struct platform_device *pdev)
 	iounmap(adapter->hw_addr);
 	free_netdev(netdev);
 	release_mem_region(iomem->start, resource_size(iomem));
-	return 0;
 }
 
 
@@ -1248,7 +1247,7 @@ static struct platform_driver ks8842_platform_driver = {
 		.name	= DRV_NAME,
 	},
 	.probe		= ks8842_probe,
-	.remove		= ks8842_remove,
+	.remove_new	= ks8842_remove,
 };
 
 module_platform_driver(ks8842_platform_driver);
diff --git a/drivers/net/ethernet/micrel/ks8851_par.c b/drivers/net/ethernet/micrel/ks8851_par.c
index 7f49042484bd..2a7f29854267 100644
--- a/drivers/net/ethernet/micrel/ks8851_par.c
+++ b/drivers/net/ethernet/micrel/ks8851_par.c
@@ -327,11 +327,9 @@ static int ks8851_probe_par(struct platform_device *pdev)
 	return ks8851_probe_common(netdev, dev, msg_enable);
 }
 
-static int ks8851_remove_par(struct platform_device *pdev)
+static void ks8851_remove_par(struct platform_device *pdev)
 {
 	ks8851_remove_common(&pdev->dev);
-
-	return 0;
 }
 
 static const struct of_device_id ks8851_match_table[] = {
@@ -347,7 +345,7 @@ static struct platform_driver ks8851_driver = {
 		.pm = &ks8851_pm_ops,
 	},
 	.probe = ks8851_probe_par,
-	.remove = ks8851_remove_par,
+	.remove_new = ks8851_remove_par,
 };
 module_platform_driver(ks8851_driver);
 
diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index c81cdeb4d4e7..f940895b14e8 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -1466,9 +1466,15 @@ static void lan743x_phy_link_status_change(struct net_device *netdev)
 static void lan743x_phy_close(struct lan743x_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
+	struct phy_device *phydev = netdev->phydev;
 
 	phy_stop(netdev->phydev);
 	phy_disconnect(netdev->phydev);
+
+	/* using phydev here as phy_disconnect NULLs netdev->phydev */
+	if (phy_is_pseudo_fixed_link(phydev))
+		fixed_phy_unregister(phydev);
+
 }
 
 static void lan743x_phy_interface_select(struct lan743x_adapter *adapter)
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
index 0d6e79af2410..8e4101628fbd 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
@@ -1261,7 +1261,7 @@ cleanup_ports:
 	return err;
 }
 
-static int lan966x_remove(struct platform_device *pdev)
+static void lan966x_remove(struct platform_device *pdev)
 {
 	struct lan966x *lan966x = platform_get_drvdata(pdev);
 
@@ -1280,13 +1280,11 @@ static int lan966x_remove(struct platform_device *pdev)
 	lan966x_ptp_deinit(lan966x);
 
 	debugfs_remove_recursive(lan966x->debugfs_root);
-
-	return 0;
 }
 
 static struct platform_driver lan966x_driver = {
 	.probe = lan966x_probe,
-	.remove = lan966x_remove,
+	.remove_new = lan966x_remove,
 	.driver = {
 		.name = "lan966x-switch",
 		.of_match_table = lan966x_match,
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
index dc9af480bfea..d1f7fc8b1b71 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
@@ -911,7 +911,7 @@ cleanup_pnode:
 	return err;
 }
 
-static int mchp_sparx5_remove(struct platform_device *pdev)
+static void mchp_sparx5_remove(struct platform_device *pdev)
 {
 	struct sparx5 *sparx5 = platform_get_drvdata(pdev);
 
@@ -931,8 +931,6 @@ static int mchp_sparx5_remove(struct platform_device *pdev)
 	/* Unregister netdevs */
 	sparx5_unregister_notifier_blocks(sparx5);
 	destroy_workqueue(sparx5->mact_queue);
-
-	return 0;
 }
 
 static const struct of_device_id mchp_sparx5_match[] = {
@@ -943,7 +941,7 @@ MODULE_DEVICE_TABLE(of, mchp_sparx5_match);
 
 static struct platform_driver mchp_sparx5_driver = {
 	.probe = mchp_sparx5_probe,
-	.remove = mchp_sparx5_remove,
+	.remove_new = mchp_sparx5_remove,
 	.driver = {
 		.name = "sparx5-switch",
 		.of_match_table = mchp_sparx5_match,
diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c
index 3da99b62797d..96dc69e7141f 100644
--- a/drivers/net/ethernet/moxa/moxart_ether.c
+++ b/drivers/net/ethernet/moxa/moxart_ether.c
@@ -558,7 +558,7 @@ irq_map_fail:
 	return ret;
 }
 
-static int moxart_remove(struct platform_device *pdev)
+static void moxart_remove(struct platform_device *pdev)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
 
@@ -566,8 +566,6 @@ static int moxart_remove(struct platform_device *pdev)
 	devm_free_irq(&pdev->dev, ndev->irq, ndev);
 	moxart_mac_free_memory(ndev);
 	free_netdev(ndev);
-
-	return 0;
 }
 
 static const struct of_device_id moxart_mac_match[] = {
@@ -578,7 +576,7 @@ MODULE_DEVICE_TABLE(of, moxart_mac_match);
 
 static struct platform_driver moxart_mac_driver = {
 	.probe	= moxart_mac_probe,
-	.remove	= moxart_remove,
+	.remove_new = moxart_remove,
 	.driver	= {
 		.name		= "moxart-ethernet",
 		.of_match_table	= moxart_mac_match,
diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c
index 151b42465348..993212c3a7da 100644
--- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c
+++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c
@@ -392,7 +392,7 @@ out_free_devlink:
 	return err;
 }
 
-static int mscc_ocelot_remove(struct platform_device *pdev)
+static void mscc_ocelot_remove(struct platform_device *pdev)
 {
 	struct ocelot *ocelot = platform_get_drvdata(pdev);
 
@@ -408,13 +408,11 @@ static int mscc_ocelot_remove(struct platform_device *pdev)
 	unregister_switchdev_notifier(&ocelot_switchdev_nb);
 	unregister_netdevice_notifier(&ocelot_netdevice_nb);
 	devlink_free(ocelot->devlink);
-
-	return 0;
 }
 
 static struct platform_driver mscc_ocelot_driver = {
 	.probe = mscc_ocelot_probe,
-	.remove = mscc_ocelot_remove,
+	.remove_new = mscc_ocelot_remove,
 	.driver = {
 		.name = "ocelot-switch",
 		.of_match_table = mscc_ocelot_match,
diff --git a/drivers/net/ethernet/natsemi/jazzsonic.c b/drivers/net/ethernet/natsemi/jazzsonic.c
index 3f371faeb6d0..2b6e097df28f 100644
--- a/drivers/net/ethernet/natsemi/jazzsonic.c
+++ b/drivers/net/ethernet/natsemi/jazzsonic.c
@@ -227,7 +227,7 @@ MODULE_ALIAS("platform:jazzsonic");
 
 #include "sonic.c"
 
-static int jazz_sonic_device_remove(struct platform_device *pdev)
+static void jazz_sonic_device_remove(struct platform_device *pdev)
 {
 	struct net_device *dev = platform_get_drvdata(pdev);
 	struct sonic_local* lp = netdev_priv(dev);
@@ -237,13 +237,11 @@ static int jazz_sonic_device_remove(struct platform_device *pdev)
 	                  lp->descriptors, lp->descriptors_laddr);
 	release_mem_region(dev->base_addr, SONIC_MEM_SIZE);
 	free_netdev(dev);
-
-	return 0;
 }
 
 static struct platform_driver jazz_sonic_driver = {
 	.probe	= jazz_sonic_probe,
-	.remove	= jazz_sonic_device_remove,
+	.remove_new = jazz_sonic_device_remove,
 	.driver	= {
 		.name	= jazz_sonic_string,
 	},
diff --git a/drivers/net/ethernet/natsemi/macsonic.c b/drivers/net/ethernet/natsemi/macsonic.c
index b16f7c830f9b..2fc63860dbdb 100644
--- a/drivers/net/ethernet/natsemi/macsonic.c
+++ b/drivers/net/ethernet/natsemi/macsonic.c
@@ -532,7 +532,7 @@ MODULE_ALIAS("platform:macsonic");
 
 #include "sonic.c"
 
-static int mac_sonic_platform_remove(struct platform_device *pdev)
+static void mac_sonic_platform_remove(struct platform_device *pdev)
 {
 	struct net_device *dev = platform_get_drvdata(pdev);
 	struct sonic_local* lp = netdev_priv(dev);
@@ -541,13 +541,11 @@ static int mac_sonic_platform_remove(struct platform_device *pdev)
 	dma_free_coherent(lp->device, SIZEOF_SONIC_DESC * SONIC_BUS_SCALE(lp->dma_bitmode),
 	                  lp->descriptors, lp->descriptors_laddr);
 	free_netdev(dev);
-
-	return 0;
 }
 
 static struct platform_driver mac_sonic_platform_driver = {
 	.probe  = mac_sonic_platform_probe,
-	.remove = mac_sonic_platform_remove,
+	.remove_new = mac_sonic_platform_remove,
 	.driver = {
 		.name = "macsonic",
 	},
diff --git a/drivers/net/ethernet/natsemi/xtsonic.c b/drivers/net/ethernet/natsemi/xtsonic.c
index 52fef34d43f9..8943e7244310 100644
--- a/drivers/net/ethernet/natsemi/xtsonic.c
+++ b/drivers/net/ethernet/natsemi/xtsonic.c
@@ -249,7 +249,7 @@ MODULE_DESCRIPTION("Xtensa XT2000 SONIC ethernet driver");
 
 #include "sonic.c"
 
-static int xtsonic_device_remove(struct platform_device *pdev)
+static void xtsonic_device_remove(struct platform_device *pdev)
 {
 	struct net_device *dev = platform_get_drvdata(pdev);
 	struct sonic_local *lp = netdev_priv(dev);
@@ -260,13 +260,11 @@ static int xtsonic_device_remove(struct platform_device *pdev)
 			  lp->descriptors, lp->descriptors_laddr);
 	release_region (dev->base_addr, SONIC_MEM_SIZE);
 	free_netdev(dev);
-
-	return 0;
 }
 
 static struct platform_driver xtsonic_driver = {
 	.probe = xtsonic_probe,
-	.remove = xtsonic_device_remove,
+	.remove_new = xtsonic_device_remove,
 	.driver = {
 		.name = xtsonic_string,
 	},
diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c
index ba27bbc68f85..97f4798f4b42 100644
--- a/drivers/net/ethernet/ni/nixge.c
+++ b/drivers/net/ethernet/ni/nixge.c
@@ -1397,7 +1397,7 @@ free_netdev:
 	return err;
 }
 
-static int nixge_remove(struct platform_device *pdev)
+static void nixge_remove(struct platform_device *pdev)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct nixge_priv *priv = netdev_priv(ndev);
@@ -1412,13 +1412,11 @@ static int nixge_remove(struct platform_device *pdev)
 		mdiobus_unregister(priv->mii_bus);
 
 	free_netdev(ndev);
-
-	return 0;
 }
 
 static struct platform_driver nixge_driver = {
 	.probe		= nixge_probe,
-	.remove		= nixge_remove,
+	.remove_new	= nixge_remove,
 	.driver		= {
 		.name		= "nixge",
 		.of_match_table	= nixge_dt_ids,
diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c
index 1a4a272f4c5c..dd3e58a1319c 100644
--- a/drivers/net/ethernet/nxp/lpc_eth.c
+++ b/drivers/net/ethernet/nxp/lpc_eth.c
@@ -1417,7 +1417,7 @@ err_exit:
 	return ret;
 }
 
-static int lpc_eth_drv_remove(struct platform_device *pdev)
+static void lpc_eth_drv_remove(struct platform_device *pdev)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct netdata_local *pldat = netdev_priv(ndev);
@@ -1436,8 +1436,6 @@ static int lpc_eth_drv_remove(struct platform_device *pdev)
 	clk_disable_unprepare(pldat->clk);
 	clk_put(pldat->clk);
 	free_netdev(ndev);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM
@@ -1505,7 +1503,7 @@ MODULE_DEVICE_TABLE(of, lpc_eth_match);
 
 static struct platform_driver lpc_eth_driver = {
 	.probe		= lpc_eth_drv_probe,
-	.remove		= lpc_eth_drv_remove,
+	.remove_new	= lpc_eth_drv_remove,
 #ifdef CONFIG_PM
 	.suspend	= lpc_eth_drv_suspend,
 	.resume		= lpc_eth_drv_resume,
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
index aae4131f146a..1dbc3cb50b1d 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
@@ -7,6 +7,7 @@
 #include <linux/atomic.h>
 #include <linux/mutex.h>
 #include <linux/workqueue.h>
+#include <linux/skbuff.h>
 
 #include "ionic_if.h"
 #include "ionic_regs.h"
@@ -217,7 +218,7 @@ struct ionic_desc_info {
 	};
 	unsigned int bytes;
 	unsigned int nbufs;
-	struct ionic_buf_info bufs[IONIC_MAX_FRAGS];
+	struct ionic_buf_info bufs[MAX_SKB_FRAGS + 1];
 	ionic_desc_cb cb;
 	void *cb_arg;
 };
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 2c3e36b2dd7f..edc14730ce88 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -3831,6 +3831,18 @@ static void ionic_lif_queue_identify(struct ionic_lif *lif)
 			qtype, qti->max_sg_elems);
 		dev_dbg(ionic->dev, " qtype[%d].sg_desc_stride = %d\n",
 			qtype, qti->sg_desc_stride);
+
+		if (qti->max_sg_elems >= IONIC_MAX_FRAGS) {
+			qti->max_sg_elems = IONIC_MAX_FRAGS - 1;
+			dev_dbg(ionic->dev, "limiting qtype %d max_sg_elems to IONIC_MAX_FRAGS-1 %d\n",
+				qtype, qti->max_sg_elems);
+		}
+
+		if (qti->max_sg_elems > MAX_SKB_FRAGS) {
+			qti->max_sg_elems = MAX_SKB_FRAGS;
+			dev_dbg(ionic->dev, "limiting qtype %d max_sg_elems to MAX_SKB_FRAGS %d\n",
+				qtype, qti->max_sg_elems);
+		}
 	}
 }
 
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index 44466e8c5d77..ccc1b1d407e4 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -1243,25 +1243,84 @@ static int ionic_tx(struct ionic_queue *q, struct sk_buff *skb)
 static int ionic_tx_descs_needed(struct ionic_queue *q, struct sk_buff *skb)
 {
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
+	bool too_many_frags = false;
+	skb_frag_t *frag;
+	int desc_bufs;
+	int chunk_len;
+	int frag_rem;
+	int tso_rem;
+	int seg_rem;
+	bool encap;
+	int hdrlen;
 	int ndescs;
 	int err;
 
 	/* Each desc is mss long max, so a descriptor for each gso_seg */
-	if (skb_is_gso(skb))
+	if (skb_is_gso(skb)) {
 		ndescs = skb_shinfo(skb)->gso_segs;
-	else
+	} else {
 		ndescs = 1;
+		if (skb_shinfo(skb)->nr_frags > q->max_sg_elems) {
+			too_many_frags = true;
+			goto linearize;
+		}
+	}
 
-	/* If non-TSO, just need 1 desc and nr_frags sg elems */
-	if (skb_shinfo(skb)->nr_frags <= q->max_sg_elems)
+	/* If non-TSO, or no frags to check, we're done */
+	if (!skb_is_gso(skb) || !skb_shinfo(skb)->nr_frags)
 		return ndescs;
 
-	/* Too many frags, so linearize */
-	err = skb_linearize(skb);
-	if (err)
-		return err;
+	/* We need to scan the skb to be sure that none of the MTU sized
+	 * packets in the TSO will require more sgs per descriptor than we
+	 * can support.  We loop through the frags, add up the lengths for
+	 * a packet, and count the number of sgs used per packet.
+	 */
+	tso_rem = skb->len;
+	frag = skb_shinfo(skb)->frags;
+	encap = skb->encapsulation;
+
+	/* start with just hdr in first part of first descriptor */
+	if (encap)
+		hdrlen = skb_inner_tcp_all_headers(skb);
+	else
+		hdrlen = skb_tcp_all_headers(skb);
+	seg_rem = min_t(int, tso_rem, hdrlen + skb_shinfo(skb)->gso_size);
+	frag_rem = hdrlen;
+
+	while (tso_rem > 0) {
+		desc_bufs = 0;
+		while (seg_rem > 0) {
+			desc_bufs++;
+
+			/* We add the +1 because we can take buffers for one
+			 * more than we have SGs: one for the initial desc data
+			 * in addition to the SG segments that might follow.
+			 */
+			if (desc_bufs > q->max_sg_elems + 1) {
+				too_many_frags = true;
+				goto linearize;
+			}
+
+			if (frag_rem == 0) {
+				frag_rem = skb_frag_size(frag);
+				frag++;
+			}
+			chunk_len = min(frag_rem, seg_rem);
+			frag_rem -= chunk_len;
+			tso_rem -= chunk_len;
+			seg_rem -= chunk_len;
+		}
+
+		seg_rem = min_t(int, tso_rem, skb_shinfo(skb)->gso_size);
+	}
 
-	stats->linearize++;
+linearize:
+	if (too_many_frags) {
+		err = skb_linearize(skb);
+		if (err)
+			return err;
+		stats->linearize++;
+	}
 
 	return ndescs;
 }
diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c
index 19bb16daf4e7..3270df72541b 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac.c
@@ -718,7 +718,7 @@ err_undo_netdev:
 	return ret;
 }
 
-static int emac_remove(struct platform_device *pdev)
+static void emac_remove(struct platform_device *pdev)
 {
 	struct net_device *netdev = dev_get_drvdata(&pdev->dev);
 	struct emac_adapter *adpt = netdev_priv(netdev);
@@ -742,8 +742,6 @@ static int emac_remove(struct platform_device *pdev)
 	iounmap(adpt->phy.base);
 
 	free_netdev(netdev);
-
-	return 0;
 }
 
 static void emac_shutdown(struct platform_device *pdev)
@@ -762,7 +760,7 @@ static void emac_shutdown(struct platform_device *pdev)
 
 static struct platform_driver emac_platform_driver = {
 	.probe	= emac_probe,
-	.remove	= emac_remove,
+	.remove_new = emac_remove,
 	.driver = {
 		.name		= "qcom-emac",
 		.of_match_table = emac_dt_match,
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 0ef0b88b7145..c70cff80cc99 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -2880,7 +2880,7 @@ out_release:
 	return error;
 }
 
-static int ravb_remove(struct platform_device *pdev)
+static void ravb_remove(struct platform_device *pdev)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct ravb_private *priv = netdev_priv(ndev);
@@ -2907,8 +2907,6 @@ static int ravb_remove(struct platform_device *pdev)
 	reset_control_assert(priv->rstc);
 	free_netdev(ndev);
 	platform_set_drvdata(pdev, NULL);
-
-	return 0;
 }
 
 static int ravb_wol_setup(struct net_device *ndev)
@@ -3046,7 +3044,7 @@ static const struct dev_pm_ops ravb_dev_pm_ops = {
 
 static struct platform_driver ravb_driver = {
 	.probe		= ravb_probe,
-	.remove		= ravb_remove,
+	.remove_new	= ravb_remove,
 	.driver = {
 		.name	= "ravb",
 		.pm	= &ravb_dev_pm_ops,
diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c
index 0fc0b6bea753..112e605f104a 100644
--- a/drivers/net/ethernet/renesas/rswitch.c
+++ b/drivers/net/ethernet/renesas/rswitch.c
@@ -1981,7 +1981,7 @@ static void rswitch_deinit(struct rswitch_private *priv)
 	rswitch_clock_disable(priv);
 }
 
-static int renesas_eth_sw_remove(struct platform_device *pdev)
+static void renesas_eth_sw_remove(struct platform_device *pdev)
 {
 	struct rswitch_private *priv = platform_get_drvdata(pdev);
 
@@ -1991,13 +1991,11 @@ static int renesas_eth_sw_remove(struct platform_device *pdev)
 	pm_runtime_disable(&pdev->dev);
 
 	platform_set_drvdata(pdev, NULL);
-
-	return 0;
 }
 
 static struct platform_driver renesas_eth_sw_driver_platform = {
 	.probe = renesas_eth_sw_probe,
-	.remove = renesas_eth_sw_remove,
+	.remove_new = renesas_eth_sw_remove,
 	.driver = {
 		.name = "renesas_eth_sw",
 		.of_match_table = renesas_eth_sw_of_table,
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 274ea16c0a1f..475e1e8c1d35 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -3431,7 +3431,7 @@ out_release:
 	return ret;
 }
 
-static int sh_eth_drv_remove(struct platform_device *pdev)
+static void sh_eth_drv_remove(struct platform_device *pdev)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct sh_eth_private *mdp = netdev_priv(ndev);
@@ -3441,8 +3441,6 @@ static int sh_eth_drv_remove(struct platform_device *pdev)
 	sh_mdio_release(mdp);
 	pm_runtime_disable(&pdev->dev);
 	free_netdev(ndev);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM
@@ -3562,7 +3560,7 @@ MODULE_DEVICE_TABLE(platform, sh_eth_id_table);
 
 static struct platform_driver sh_eth_driver = {
 	.probe = sh_eth_drv_probe,
-	.remove = sh_eth_drv_remove,
+	.remove_new = sh_eth_drv_remove,
 	.id_table = sh_eth_id_table,
 	.driver = {
 		   .name = CARDNAME,
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c
index fb59ff94509a..e6e130dbe1de 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c
@@ -169,13 +169,11 @@ err_out:
  * Description: this function calls the main to free the net resources
  * and calls the platforms hook and release the resources (e.g. mem).
  */
-static int sxgbe_platform_remove(struct platform_device *pdev)
+static void sxgbe_platform_remove(struct platform_device *pdev)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
 
 	sxgbe_drv_remove(ndev);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM
@@ -226,7 +224,7 @@ MODULE_DEVICE_TABLE(of, sxgbe_dt_ids);
 
 static struct platform_driver sxgbe_platform_driver = {
 	.probe	= sxgbe_platform_probe,
-	.remove	= sxgbe_platform_remove,
+	.remove_new = sxgbe_platform_remove,
 	.driver	= {
 		.name		= SXGBE_RESOURCE_NAME,
 		.pm		= &sxgbe_platform_pm_ops,
diff --git a/drivers/net/ethernet/seeq/sgiseeq.c b/drivers/net/ethernet/seeq/sgiseeq.c
index 96065dfc747b..76356dadf233 100644
--- a/drivers/net/ethernet/seeq/sgiseeq.c
+++ b/drivers/net/ethernet/seeq/sgiseeq.c
@@ -819,7 +819,7 @@ err_out:
 	return err;
 }
 
-static int sgiseeq_remove(struct platform_device *pdev)
+static void sgiseeq_remove(struct platform_device *pdev)
 {
 	struct net_device *dev = platform_get_drvdata(pdev);
 	struct sgiseeq_private *sp = netdev_priv(dev);
@@ -828,13 +828,11 @@ static int sgiseeq_remove(struct platform_device *pdev)
 	dma_free_noncoherent(&pdev->dev, sizeof(*sp->srings), sp->srings,
 		       sp->srings_dma, DMA_BIDIRECTIONAL);
 	free_netdev(dev);
-
-	return 0;
 }
 
 static struct platform_driver sgiseeq_driver = {
 	.probe	= sgiseeq_probe,
-	.remove	= sgiseeq_remove,
+	.remove_new = sgiseeq_remove,
 	.driver = {
 		.name	= "sgiseeq",
 	}
diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c
index f54200f03e15..b04fdbb8aece 100644
--- a/drivers/net/ethernet/sfc/ptp.c
+++ b/drivers/net/ethernet/sfc/ptp.c
@@ -108,11 +108,17 @@
 #define	PTP_MIN_LENGTH		63
 
 #define PTP_ADDR_IPV4		0xe0000181	/* 224.0.1.129 */
-#define PTP_ADDR_IPV6		{0xff, 0x0e, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
-				0, 0x01, 0x81}	/* ff0e::181 */
+
+/* ff0e::181 */
+static const struct in6_addr ptp_addr_ipv6 = { { {
+	0xff, 0x0e, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x01, 0x81 } } };
+
+/* 01-1B-19-00-00-00 */
+static const u8 ptp_addr_ether[ETH_ALEN] __aligned(2) = {
+	0x01, 0x1b, 0x19, 0x00, 0x00, 0x00 };
+
 #define PTP_EVENT_PORT		319
 #define PTP_GENERAL_PORT	320
-#define PTP_ADDR_ETHER		{0x01, 0x1b, 0x19, 0, 0, 0} /* 01-1B-19-00-00-00 */
 
 /* Annoyingly the format of the version numbers are different between
  * versions 1 and 2 so it isn't possible to simply look for 1 or 2.
@@ -1296,7 +1302,7 @@ static int efx_ptp_insert_ipv4_filter(struct efx_nic *efx,
 
 static int efx_ptp_insert_ipv6_filter(struct efx_nic *efx,
 				      struct list_head *filter_list,
-				      struct in6_addr *addr, u16 port,
+				      const struct in6_addr *addr, u16 port,
 				      unsigned long expiry)
 {
 	struct efx_filter_spec spec;
@@ -1309,11 +1315,10 @@ static int efx_ptp_insert_ipv6_filter(struct efx_nic *efx,
 static int efx_ptp_insert_eth_multicast_filter(struct efx_nic *efx)
 {
 	struct efx_ptp_data *ptp = efx->ptp_data;
-	const u8 addr[ETH_ALEN] = PTP_ADDR_ETHER;
 	struct efx_filter_spec spec;
 
 	efx_ptp_init_filter(efx, &spec);
-	efx_filter_set_eth_local(&spec, EFX_FILTER_VID_UNSPEC, addr);
+	efx_filter_set_eth_local(&spec, EFX_FILTER_VID_UNSPEC, ptp_addr_ether);
 	spec.match_flags |= EFX_FILTER_MATCH_ETHER_TYPE;
 	spec.ether_type = htons(ETH_P_1588);
 	return efx_ptp_insert_filter(efx, &ptp->rxfilters_mcast, &spec, 0);
@@ -1346,15 +1351,13 @@ static int efx_ptp_insert_multicast_filters(struct efx_nic *efx)
 	 * PTP over IPv6 and Ethernet
 	 */
 	if (efx_ptp_use_mac_tx_timestamps(efx)) {
-		struct in6_addr ipv6_addr = {{PTP_ADDR_IPV6}};
-
 		rc = efx_ptp_insert_ipv6_filter(efx, &ptp->rxfilters_mcast,
-						&ipv6_addr, PTP_EVENT_PORT, 0);
+						&ptp_addr_ipv6, PTP_EVENT_PORT, 0);
 		if (rc < 0)
 			goto fail;
 
 		rc = efx_ptp_insert_ipv6_filter(efx, &ptp->rxfilters_mcast,
-						&ipv6_addr, PTP_GENERAL_PORT, 0);
+						&ptp_addr_ipv6, PTP_GENERAL_PORT, 0);
 		if (rc < 0)
 			goto fail;
 
@@ -1379,9 +1382,7 @@ static bool efx_ptp_valid_unicast_event_pkt(struct sk_buff *skb)
 			ip_hdr(skb)->protocol == IPPROTO_UDP &&
 			udp_hdr(skb)->source == htons(PTP_EVENT_PORT);
 	} else if (skb->protocol == htons(ETH_P_IPV6)) {
-		struct in6_addr mcast_addr = {{PTP_ADDR_IPV6}};
-
-		return !ipv6_addr_equal(&ipv6_hdr(skb)->daddr, &mcast_addr) &&
+		return !ipv6_addr_equal(&ipv6_hdr(skb)->daddr, &ptp_addr_ipv6) &&
 			ipv6_hdr(skb)->nexthdr == IPPROTO_UDP &&
 			udp_hdr(skb)->source == htons(PTP_EVENT_PORT);
 	}
diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c
index 8fc3f5272fa7..98d0b561a057 100644
--- a/drivers/net/ethernet/sgi/ioc3-eth.c
+++ b/drivers/net/ethernet/sgi/ioc3-eth.c
@@ -962,7 +962,7 @@ out_free:
 	return err;
 }
 
-static int ioc3eth_remove(struct platform_device *pdev)
+static void ioc3eth_remove(struct platform_device *pdev)
 {
 	struct net_device *dev = platform_get_drvdata(pdev);
 	struct ioc3_private *ip = netdev_priv(dev);
@@ -973,8 +973,6 @@ static int ioc3eth_remove(struct platform_device *pdev)
 	unregister_netdev(dev);
 	del_timer_sync(&ip->ioc3_timer);
 	free_netdev(dev);
-
-	return 0;
 }
 
 
@@ -1275,7 +1273,7 @@ static void ioc3_set_multicast_list(struct net_device *dev)
 
 static struct platform_driver ioc3eth_driver = {
 	.probe  = ioc3eth_probe,
-	.remove = ioc3eth_remove,
+	.remove_new = ioc3eth_remove,
 	.driver = {
 		.name = "ioc3-eth",
 	}
diff --git a/drivers/net/ethernet/sgi/meth.c b/drivers/net/ethernet/sgi/meth.c
index 6d850ea2b94c..18b6f93d875e 100644
--- a/drivers/net/ethernet/sgi/meth.c
+++ b/drivers/net/ethernet/sgi/meth.c
@@ -854,19 +854,17 @@ static int meth_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int meth_remove(struct platform_device *pdev)
+static void meth_remove(struct platform_device *pdev)
 {
 	struct net_device *dev = platform_get_drvdata(pdev);
 
 	unregister_netdev(dev);
 	free_netdev(dev);
-
-	return 0;
 }
 
 static struct platform_driver meth_driver = {
 	.probe	= meth_probe,
-	.remove	= meth_remove,
+	.remove_new = meth_remove,
 	.driver = {
 		.name	= "meth",
 	}
diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c
index 032eccf8eb42..758347616535 100644
--- a/drivers/net/ethernet/smsc/smc91x.c
+++ b/drivers/net/ethernet/smsc/smc91x.c
@@ -2411,7 +2411,7 @@ static int smc_drv_probe(struct platform_device *pdev)
 	return ret;
 }
 
-static int smc_drv_remove(struct platform_device *pdev)
+static void smc_drv_remove(struct platform_device *pdev)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct smc_local *lp = netdev_priv(ndev);
@@ -2436,8 +2436,6 @@ static int smc_drv_remove(struct platform_device *pdev)
 	release_mem_region(res->start, SMC_IO_EXTENT);
 
 	free_netdev(ndev);
-
-	return 0;
 }
 
 static int smc_drv_suspend(struct device *dev)
@@ -2480,7 +2478,7 @@ static const struct dev_pm_ops smc_drv_pm_ops = {
 
 static struct platform_driver smc_driver = {
 	.probe		= smc_drv_probe,
-	.remove		= smc_drv_remove,
+	.remove_new	= smc_drv_remove,
 	.driver		= {
 		.name	= CARDNAME,
 		.pm	= &smc_drv_pm_ops,
diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c
index cb590db625e8..31cb7d0166f0 100644
--- a/drivers/net/ethernet/smsc/smsc911x.c
+++ b/drivers/net/ethernet/smsc/smsc911x.c
@@ -2314,7 +2314,7 @@ static int smsc911x_init(struct net_device *dev)
 	return 0;
 }
 
-static int smsc911x_drv_remove(struct platform_device *pdev)
+static void smsc911x_drv_remove(struct platform_device *pdev)
 {
 	struct net_device *dev;
 	struct smsc911x_data *pdata;
@@ -2348,8 +2348,6 @@ static int smsc911x_drv_remove(struct platform_device *pdev)
 	free_netdev(dev);
 
 	pm_runtime_disable(&pdev->dev);
-
-	return 0;
 }
 
 /* standard register acces */
@@ -2668,7 +2666,7 @@ MODULE_DEVICE_TABLE(acpi, smsc911x_acpi_match);
 
 static struct platform_driver smsc911x_driver = {
 	.probe = smsc911x_drv_probe,
-	.remove = smsc911x_drv_remove,
+	.remove_new = smsc911x_drv_remove,
 	.driver = {
 		.name	= SMSC_CHIPNAME,
 		.pm	= SMSC911X_PM_OPS,
diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c
index f358ea003193..d9cfafb96085 100644
--- a/drivers/net/ethernet/socionext/netsec.c
+++ b/drivers/net/ethernet/socionext/netsec.c
@@ -2150,7 +2150,7 @@ free_ndev:
 	return ret;
 }
 
-static int netsec_remove(struct platform_device *pdev)
+static void netsec_remove(struct platform_device *pdev)
 {
 	struct netsec_priv *priv = platform_get_drvdata(pdev);
 
@@ -2162,8 +2162,6 @@ static int netsec_remove(struct platform_device *pdev)
 
 	pm_runtime_disable(&pdev->dev);
 	free_netdev(priv->ndev);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM
@@ -2211,7 +2209,7 @@ MODULE_DEVICE_TABLE(acpi, netsec_acpi_ids);
 
 static struct platform_driver netsec_driver = {
 	.probe	= netsec_probe,
-	.remove	= netsec_remove,
+	.remove_new = netsec_remove,
 	.driver = {
 		.name = "netsec",
 		.pm = &netsec_pm_ops,
diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c
index 4838d2383a43..eed24e67c5a6 100644
--- a/drivers/net/ethernet/socionext/sni_ave.c
+++ b/drivers/net/ethernet/socionext/sni_ave.c
@@ -1719,7 +1719,7 @@ out_del_napi:
 	return ret;
 }
 
-static int ave_remove(struct platform_device *pdev)
+static void ave_remove(struct platform_device *pdev)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct ave_private *priv = netdev_priv(ndev);
@@ -1727,8 +1727,6 @@ static int ave_remove(struct platform_device *pdev)
 	unregister_netdev(ndev);
 	netif_napi_del(&priv->napi_rx);
 	netif_napi_del(&priv->napi_tx);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -1976,7 +1974,7 @@ MODULE_DEVICE_TABLE(of, of_ave_match);
 
 static struct platform_driver ave_driver = {
 	.probe  = ave_probe,
-	.remove = ave_remove,
+	.remove_new = ave_remove,
 	.driver	= {
 		.name = "ave",
 		.pm   = AVE_PM_OPS,
diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig
index 06c6871f8788..a2b9e289aa36 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Kconfig
+++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig
@@ -239,6 +239,17 @@ config DWMAC_INTEL_PLAT
 	  the stmmac device driver. This driver is used for the Intel Keem Bay
 	  SoC.
 
+config DWMAC_LOONGSON1
+	tristate "Loongson1 GMAC support"
+	default MACH_LOONGSON32
+	depends on OF && (MACH_LOONGSON32 || COMPILE_TEST)
+	help
+	  Support for ethernet controller on Loongson1 SoC.
+
+	  This selects Loongson1 SoC glue layer support for the stmmac
+	  device driver. This driver is used for Loongson1-based boards
+	  like Loongson LS1B/LS1C.
+
 config DWMAC_TEGRA
 	tristate "NVIDIA Tegra MGBE support"
 	depends on ARCH_TEGRA || COMPILE_TEST
diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile
index 5b57aee19267..80e598bd4255 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Makefile
+++ b/drivers/net/ethernet/stmicro/stmmac/Makefile
@@ -29,6 +29,7 @@ obj-$(CONFIG_DWMAC_SUNXI)	+= dwmac-sunxi.o
 obj-$(CONFIG_DWMAC_SUN8I)	+= dwmac-sun8i.o
 obj-$(CONFIG_DWMAC_DWC_QOS_ETH)	+= dwmac-dwc-qos-eth.o
 obj-$(CONFIG_DWMAC_INTEL_PLAT)	+= dwmac-intel-plat.o
+obj-$(CONFIG_DWMAC_LOONGSON1)	+= dwmac-loongson1.o
 obj-$(CONFIG_DWMAC_GENERIC)	+= dwmac-generic.o
 obj-$(CONFIG_DWMAC_IMX8)	+= dwmac-imx.o
 obj-$(CONFIG_DWMAC_TEGRA)	+= dwmac-tegra.o
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c
index 58a7f08e8d78..643ee6d8d4dd 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c
@@ -115,7 +115,7 @@ static int anarion_dwmac_probe(struct platform_device *pdev)
 	if (IS_ERR(gmac))
 		return PTR_ERR(gmac);
 
-	plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+	plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
 	if (IS_ERR(plat_dat))
 		return PTR_ERR(plat_dat);
 
@@ -124,13 +124,7 @@ static int anarion_dwmac_probe(struct platform_device *pdev)
 	anarion_gmac_init(pdev, gmac);
 	plat_dat->bsp_priv = gmac;
 
-	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
-	if (ret) {
-		stmmac_remove_config_dt(pdev, plat_dat);
-		return ret;
-	}
-
-	return 0;
+	return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
 }
 
 static const struct of_device_id anarion_dwmac_match[] = {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
index 61ebf36da13d..ec924c6c76c6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
@@ -435,15 +435,14 @@ static int dwc_eth_dwmac_probe(struct platform_device *pdev)
 	if (IS_ERR(stmmac_res.addr))
 		return PTR_ERR(stmmac_res.addr);
 
-	plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+	plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
 	if (IS_ERR(plat_dat))
 		return PTR_ERR(plat_dat);
 
 	ret = data->probe(pdev, plat_dat, &stmmac_res);
 	if (ret < 0) {
 		dev_err_probe(&pdev->dev, ret, "failed to probe subdriver\n");
-
-		goto remove_config;
+		return ret;
 	}
 
 	ret = dwc_eth_dwmac_config_dt(pdev, plat_dat);
@@ -458,25 +457,17 @@ static int dwc_eth_dwmac_probe(struct platform_device *pdev)
 
 remove:
 	data->remove(pdev);
-remove_config:
-	stmmac_remove_config_dt(pdev, plat_dat);
 
 	return ret;
 }
 
 static void dwc_eth_dwmac_remove(struct platform_device *pdev)
 {
-	struct net_device *ndev = platform_get_drvdata(pdev);
-	struct stmmac_priv *priv = netdev_priv(ndev);
-	const struct dwc_eth_dwmac_data *data;
-
-	data = device_get_match_data(&pdev->dev);
+	const struct dwc_eth_dwmac_data *data = device_get_match_data(&pdev->dev);
 
 	stmmac_dvr_remove(&pdev->dev);
 
 	data->remove(pdev);
-
-	stmmac_remove_config_dt(pdev, priv->plat);
 }
 
 static const struct of_device_id dwc_eth_dwmac_match[] = {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c
index 20fc455b3337..598eff926815 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c
@@ -27,7 +27,7 @@ static int dwmac_generic_probe(struct platform_device *pdev)
 		return ret;
 
 	if (pdev->dev.of_node) {
-		plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+		plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
 		if (IS_ERR(plat_dat)) {
 			dev_err(&pdev->dev, "dt configuration failed\n");
 			return PTR_ERR(plat_dat);
@@ -46,17 +46,7 @@ static int dwmac_generic_probe(struct platform_device *pdev)
 		plat_dat->unicast_filter_entries = 1;
 	}
 
-	ret = stmmac_pltfr_probe(pdev, plat_dat, &stmmac_res);
-	if (ret)
-		goto err_remove_config_dt;
-
-	return 0;
-
-err_remove_config_dt:
-	if (pdev->dev.of_node)
-		stmmac_remove_config_dt(pdev, plat_dat);
-
-	return ret;
+	return devm_stmmac_pltfr_probe(pdev, plat_dat, &stmmac_res);
 }
 
 static const struct of_device_id dwmac_generic_match[] = {
@@ -77,7 +67,6 @@ MODULE_DEVICE_TABLE(of, dwmac_generic_match);
 
 static struct platform_driver dwmac_generic_driver = {
 	.probe  = dwmac_generic_probe,
-	.remove_new = stmmac_pltfr_remove,
 	.driver = {
 		.name           = STMMAC_RESOURCE_NAME,
 		.pm		= &stmmac_pltfr_pm_ops,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
index df34e34cc14f..8f730ada71f9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
@@ -331,15 +331,14 @@ static int imx_dwmac_probe(struct platform_device *pdev)
 	if (!dwmac)
 		return -ENOMEM;
 
-	plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+	plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
 	if (IS_ERR(plat_dat))
 		return PTR_ERR(plat_dat);
 
 	data = of_device_get_match_data(&pdev->dev);
 	if (!data) {
 		dev_err(&pdev->dev, "failed to get match data\n");
-		ret = -EINVAL;
-		goto err_match_data;
+		return -EINVAL;
 	}
 
 	dwmac->ops = data;
@@ -348,7 +347,7 @@ static int imx_dwmac_probe(struct platform_device *pdev)
 	ret = imx_dwmac_parse_dt(dwmac, &pdev->dev);
 	if (ret) {
 		dev_err(&pdev->dev, "failed to parse OF data\n");
-		goto err_parse_dt;
+		return ret;
 	}
 
 	if (data->flags & STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY)
@@ -365,7 +364,7 @@ static int imx_dwmac_probe(struct platform_device *pdev)
 
 	ret = imx_dwmac_clks_config(dwmac, true);
 	if (ret)
-		goto err_clks_config;
+		return ret;
 
 	ret = imx_dwmac_init(pdev, dwmac);
 	if (ret)
@@ -385,10 +384,6 @@ err_drv_probe:
 	imx_dwmac_exit(pdev, plat_dat->bsp_priv);
 err_dwmac_init:
 	imx_dwmac_clks_config(dwmac, false);
-err_clks_config:
-err_parse_dt:
-err_match_data:
-	stmmac_remove_config_dt(pdev, plat_dat);
 	return ret;
 }
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c
index 0a20c3d24722..19c93b998fb3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c
@@ -241,29 +241,25 @@ static int ingenic_mac_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+	plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
 	if (IS_ERR(plat_dat))
 		return PTR_ERR(plat_dat);
 
 	mac = devm_kzalloc(&pdev->dev, sizeof(*mac), GFP_KERNEL);
-	if (!mac) {
-		ret = -ENOMEM;
-		goto err_remove_config_dt;
-	}
+	if (!mac)
+		return -ENOMEM;
 
 	data = of_device_get_match_data(&pdev->dev);
 	if (!data) {
 		dev_err(&pdev->dev, "No of match data provided\n");
-		ret = -EINVAL;
-		goto err_remove_config_dt;
+		return -EINVAL;
 	}
 
 	/* Get MAC PHY control register */
 	mac->regmap = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, "mode-reg");
 	if (IS_ERR(mac->regmap)) {
 		dev_err(&pdev->dev, "%s: Failed to get syscon regmap\n", __func__);
-		ret = PTR_ERR(mac->regmap);
-		goto err_remove_config_dt;
+		return PTR_ERR(mac->regmap);
 	}
 
 	if (!of_property_read_u32(pdev->dev.of_node, "tx-clk-delay-ps", &tx_delay_ps)) {
@@ -272,8 +268,7 @@ static int ingenic_mac_probe(struct platform_device *pdev)
 			mac->tx_delay = tx_delay_ps * 1000;
 		} else {
 			dev_err(&pdev->dev, "Invalid TX clock delay: %dps\n", tx_delay_ps);
-			ret = -EINVAL;
-			goto err_remove_config_dt;
+			return -EINVAL;
 		}
 	}
 
@@ -283,8 +278,7 @@ static int ingenic_mac_probe(struct platform_device *pdev)
 			mac->rx_delay = rx_delay_ps * 1000;
 		} else {
 			dev_err(&pdev->dev, "Invalid RX clock delay: %dps\n", rx_delay_ps);
-			ret = -EINVAL;
-			goto err_remove_config_dt;
+			return -EINVAL;
 		}
 	}
 
@@ -295,18 +289,9 @@ static int ingenic_mac_probe(struct platform_device *pdev)
 
 	ret = ingenic_mac_init(plat_dat);
 	if (ret)
-		goto err_remove_config_dt;
-
-	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
-	if (ret)
-		goto err_remove_config_dt;
-
-	return 0;
-
-err_remove_config_dt:
-	stmmac_remove_config_dt(pdev, plat_dat);
+		return ret;
 
-	return ret;
+	return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
 }
 
 #ifdef CONFIG_PM_SLEEP
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c
index d352a14f9d48..70edc5232379 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c
@@ -85,17 +85,15 @@ static int intel_eth_plat_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+	plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
 	if (IS_ERR(plat_dat)) {
 		dev_err(&pdev->dev, "dt configuration failed\n");
 		return PTR_ERR(plat_dat);
 	}
 
 	dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL);
-	if (!dwmac) {
-		ret = -ENOMEM;
-		goto err_remove_config_dt;
-	}
+	if (!dwmac)
+		return -ENOMEM;
 
 	dwmac->dev = &pdev->dev;
 	dwmac->tx_clk = NULL;
@@ -110,10 +108,8 @@ static int intel_eth_plat_probe(struct platform_device *pdev)
 		/* Enable TX clock */
 		if (dwmac->data->tx_clk_en) {
 			dwmac->tx_clk = devm_clk_get(&pdev->dev, "tx_clk");
-			if (IS_ERR(dwmac->tx_clk)) {
-				ret = PTR_ERR(dwmac->tx_clk);
-				goto err_remove_config_dt;
-			}
+			if (IS_ERR(dwmac->tx_clk))
+				return PTR_ERR(dwmac->tx_clk);
 
 			clk_prepare_enable(dwmac->tx_clk);
 
@@ -126,7 +122,7 @@ static int intel_eth_plat_probe(struct platform_device *pdev)
 				if (ret) {
 					dev_err(&pdev->dev,
 						"Failed to set tx_clk\n");
-					goto err_remove_config_dt;
+					return ret;
 				}
 			}
 		}
@@ -140,7 +136,7 @@ static int intel_eth_plat_probe(struct platform_device *pdev)
 			if (ret) {
 				dev_err(&pdev->dev,
 					"Failed to set clk_ptp_ref\n");
-				goto err_remove_config_dt;
+				return ret;
 			}
 		}
 	}
@@ -158,15 +154,10 @@ static int intel_eth_plat_probe(struct platform_device *pdev)
 	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
 	if (ret) {
 		clk_disable_unprepare(dwmac->tx_clk);
-		goto err_remove_config_dt;
+		return ret;
 	}
 
 	return 0;
-
-err_remove_config_dt:
-	stmmac_remove_config_dt(pdev, plat_dat);
-
-	return ret;
 }
 
 static void intel_eth_plat_remove(struct platform_device *pdev)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
index 9b0200749109..281687d7083b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
@@ -384,22 +384,20 @@ static int ipq806x_gmac_probe(struct platform_device *pdev)
 	if (val)
 		return val;
 
-	plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+	plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
 	if (IS_ERR(plat_dat))
 		return PTR_ERR(plat_dat);
 
 	gmac = devm_kzalloc(dev, sizeof(*gmac), GFP_KERNEL);
-	if (!gmac) {
-		err = -ENOMEM;
-		goto err_remove_config_dt;
-	}
+	if (!gmac)
+		return -ENOMEM;
 
 	gmac->pdev = pdev;
 
 	err = ipq806x_gmac_of_parse(gmac);
 	if (err) {
 		dev_err(dev, "device tree parsing error\n");
-		goto err_remove_config_dt;
+		return err;
 	}
 
 	regmap_write(gmac->qsgmii_csr, QSGMII_PCS_CAL_LCKDT_CTL,
@@ -459,11 +457,11 @@ static int ipq806x_gmac_probe(struct platform_device *pdev)
 	if (gmac->phy_mode == PHY_INTERFACE_MODE_SGMII) {
 		err = ipq806x_gmac_configure_qsgmii_params(gmac);
 		if (err)
-			goto err_remove_config_dt;
+			return err;
 
 		err = ipq806x_gmac_configure_qsgmii_pcs_speed(gmac);
 		if (err)
-			goto err_remove_config_dt;
+			return err;
 	}
 
 	plat_dat->has_gmac = true;
@@ -473,21 +471,12 @@ static int ipq806x_gmac_probe(struct platform_device *pdev)
 	plat_dat->tx_fifo_size = 8192;
 	plat_dat->rx_fifo_size = 8192;
 
-	err = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
-	if (err)
-		goto err_remove_config_dt;
-
-	return 0;
+	return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
 
 err_unsupported_phy:
 	dev_err(&pdev->dev, "Unsupported PHY mode: \"%s\"\n",
 		phy_modes(gmac->phy_mode));
-	err = -EINVAL;
-
-err_remove_config_dt:
-	stmmac_remove_config_dt(pdev, plat_dat);
-
-	return err;
+	return -EINVAL;
 }
 
 static const struct of_device_id ipq806x_gmac_dwmac_match[] = {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson1.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson1.c
new file mode 100644
index 000000000000..3e86810717d3
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson1.c
@@ -0,0 +1,209 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Loongson-1 DWMAC glue layer
+ *
+ * Copyright (C) 2011-2023 Keguang Zhang <keguang.zhang@gmail.com>
+ */
+
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/phy.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+#include "stmmac.h"
+#include "stmmac_platform.h"
+
+#define LS1B_GMAC0_BASE		(0x1fe10000)
+#define LS1B_GMAC1_BASE		(0x1fe20000)
+
+/* Loongson-1 SYSCON Registers */
+#define LS1X_SYSCON0		(0x0)
+#define LS1X_SYSCON1		(0x4)
+
+/* Loongson-1B SYSCON Register Bits */
+#define GMAC1_USE_UART1		BIT(4)
+#define GMAC1_USE_UART0		BIT(3)
+
+#define GMAC1_SHUT		BIT(13)
+#define GMAC0_SHUT		BIT(12)
+
+#define GMAC1_USE_TXCLK		BIT(3)
+#define GMAC0_USE_TXCLK		BIT(2)
+#define GMAC1_USE_PWM23		BIT(1)
+#define GMAC0_USE_PWM01		BIT(0)
+
+/* Loongson-1C SYSCON Register Bits */
+#define GMAC_SHUT		BIT(6)
+
+#define PHY_INTF_SELI		GENMASK(30, 28)
+#define PHY_INTF_MII		FIELD_PREP(PHY_INTF_SELI, 0)
+#define PHY_INTF_RMII		FIELD_PREP(PHY_INTF_SELI, 4)
+
+struct ls1x_dwmac {
+	struct plat_stmmacenet_data *plat_dat;
+	struct regmap *regmap;
+};
+
+static int ls1b_dwmac_syscon_init(struct platform_device *pdev, void *priv)
+{
+	struct ls1x_dwmac *dwmac = priv;
+	struct plat_stmmacenet_data *plat = dwmac->plat_dat;
+	struct regmap *regmap = dwmac->regmap;
+	struct resource *res;
+	unsigned long reg_base;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "Could not get IO_MEM resources\n");
+		return -EINVAL;
+	}
+	reg_base = (unsigned long)res->start;
+
+	if (reg_base == LS1B_GMAC0_BASE) {
+		switch (plat->phy_interface) {
+		case PHY_INTERFACE_MODE_RGMII_ID:
+			regmap_update_bits(regmap, LS1X_SYSCON0,
+					   GMAC0_USE_TXCLK | GMAC0_USE_PWM01,
+					   0);
+			break;
+		case PHY_INTERFACE_MODE_MII:
+			regmap_update_bits(regmap, LS1X_SYSCON0,
+					   GMAC0_USE_TXCLK | GMAC0_USE_PWM01,
+					   GMAC0_USE_TXCLK | GMAC0_USE_PWM01);
+			break;
+		default:
+			dev_err(&pdev->dev, "Unsupported PHY mode %u\n",
+				plat->phy_interface);
+			return -EOPNOTSUPP;
+		}
+
+		regmap_update_bits(regmap, LS1X_SYSCON0, GMAC0_SHUT, 0);
+	} else if (reg_base == LS1B_GMAC1_BASE) {
+		regmap_update_bits(regmap, LS1X_SYSCON0,
+				   GMAC1_USE_UART1 | GMAC1_USE_UART0,
+				   GMAC1_USE_UART1 | GMAC1_USE_UART0);
+
+		switch (plat->phy_interface) {
+		case PHY_INTERFACE_MODE_RGMII_ID:
+			regmap_update_bits(regmap, LS1X_SYSCON1,
+					   GMAC1_USE_TXCLK | GMAC1_USE_PWM23,
+					   0);
+
+			break;
+		case PHY_INTERFACE_MODE_MII:
+			regmap_update_bits(regmap, LS1X_SYSCON1,
+					   GMAC1_USE_TXCLK | GMAC1_USE_PWM23,
+					   GMAC1_USE_TXCLK | GMAC1_USE_PWM23);
+			break;
+		default:
+			dev_err(&pdev->dev, "Unsupported PHY mode %u\n",
+				plat->phy_interface);
+			return -EOPNOTSUPP;
+		}
+
+		regmap_update_bits(regmap, LS1X_SYSCON1, GMAC1_SHUT, 0);
+	} else {
+		dev_err(&pdev->dev, "Invalid Ethernet MAC base address %lx",
+			reg_base);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ls1c_dwmac_syscon_init(struct platform_device *pdev, void *priv)
+{
+	struct ls1x_dwmac *dwmac = priv;
+	struct plat_stmmacenet_data *plat = dwmac->plat_dat;
+	struct regmap *regmap = dwmac->regmap;
+
+	switch (plat->phy_interface) {
+	case PHY_INTERFACE_MODE_MII:
+		regmap_update_bits(regmap, LS1X_SYSCON1, PHY_INTF_SELI,
+				   PHY_INTF_MII);
+		break;
+	case PHY_INTERFACE_MODE_RMII:
+		regmap_update_bits(regmap, LS1X_SYSCON1, PHY_INTF_SELI,
+				   PHY_INTF_RMII);
+		break;
+	default:
+		dev_err(&pdev->dev, "Unsupported PHY-mode %u\n",
+			plat->phy_interface);
+		return -EOPNOTSUPP;
+	}
+
+	regmap_update_bits(regmap, LS1X_SYSCON0, GMAC0_SHUT, 0);
+
+	return 0;
+}
+
+static int ls1x_dwmac_probe(struct platform_device *pdev)
+{
+	struct plat_stmmacenet_data *plat_dat;
+	struct stmmac_resources stmmac_res;
+	struct regmap *regmap;
+	struct ls1x_dwmac *dwmac;
+	int (*init)(struct platform_device *pdev, void *priv);
+	int ret;
+
+	ret = stmmac_get_platform_resources(pdev, &stmmac_res);
+	if (ret)
+		return ret;
+
+	/* Probe syscon */
+	regmap = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
+						 "loongson,ls1-syscon");
+	if (IS_ERR(regmap))
+		return dev_err_probe(&pdev->dev, PTR_ERR(regmap),
+				     "Unable to find syscon\n");
+
+	init = of_device_get_match_data(&pdev->dev);
+	if (!init) {
+		dev_err(&pdev->dev, "No of match data provided\n");
+		return -EINVAL;
+	}
+
+	dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL);
+	if (!dwmac)
+		return -ENOMEM;
+
+	plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
+	if (IS_ERR(plat_dat))
+		return dev_err_probe(&pdev->dev, PTR_ERR(plat_dat),
+				     "dt configuration failed\n");
+
+	plat_dat->bsp_priv = dwmac;
+	plat_dat->init = init;
+	dwmac->plat_dat = plat_dat;
+	dwmac->regmap = regmap;
+
+	return devm_stmmac_pltfr_probe(pdev, plat_dat, &stmmac_res);
+}
+
+static const struct of_device_id ls1x_dwmac_match[] = {
+	{
+		.compatible = "loongson,ls1b-gmac",
+		.data = &ls1b_dwmac_syscon_init,
+	},
+	{
+		.compatible = "loongson,ls1c-emac",
+		.data = &ls1c_dwmac_syscon_init,
+	},
+	{ }
+};
+MODULE_DEVICE_TABLE(of, ls1x_dwmac_match);
+
+static struct platform_driver ls1x_dwmac_driver = {
+	.probe = ls1x_dwmac_probe,
+	.driver = {
+		.name = "loongson1-dwmac",
+		.of_match_table = ls1x_dwmac_match,
+	},
+};
+module_platform_driver(ls1x_dwmac_driver);
+
+MODULE_AUTHOR("Keguang Zhang <keguang.zhang@gmail.com>");
+MODULE_DESCRIPTION("Loongson-1 DWMAC glue layer");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c
index d0aa674ce705..4c810d8f5bea 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c
@@ -37,7 +37,7 @@ static int lpc18xx_dwmac_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+	plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
 	if (IS_ERR(plat_dat))
 		return PTR_ERR(plat_dat);
 
@@ -46,8 +46,7 @@ static int lpc18xx_dwmac_probe(struct platform_device *pdev)
 	reg = syscon_regmap_lookup_by_compatible("nxp,lpc1850-creg");
 	if (IS_ERR(reg)) {
 		dev_err(&pdev->dev, "syscon lookup failed\n");
-		ret = PTR_ERR(reg);
-		goto err_remove_config_dt;
+		return PTR_ERR(reg);
 	}
 
 	if (plat_dat->mac_interface == PHY_INTERFACE_MODE_MII) {
@@ -56,23 +55,13 @@ static int lpc18xx_dwmac_probe(struct platform_device *pdev)
 		ethmode = LPC18XX_CREG_CREG6_ETHMODE_RMII;
 	} else {
 		dev_err(&pdev->dev, "Only MII and RMII mode supported\n");
-		ret = -EINVAL;
-		goto err_remove_config_dt;
+		return -EINVAL;
 	}
 
 	regmap_update_bits(reg, LPC18XX_CREG_CREG6,
 			   LPC18XX_CREG_CREG6_ETHMODE_MASK, ethmode);
 
-	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
-	if (ret)
-		goto err_remove_config_dt;
-
-	return 0;
-
-err_remove_config_dt:
-	stmmac_remove_config_dt(pdev, plat_dat);
-
-	return ret;
+	return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
 }
 
 static const struct of_device_id lpc18xx_dwmac_match[] = {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
index cd796ec04132..2a9132d6d743 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
@@ -656,7 +656,7 @@ static int mediatek_dwmac_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+	plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
 	if (IS_ERR(plat_dat))
 		return PTR_ERR(plat_dat);
 
@@ -665,7 +665,7 @@ static int mediatek_dwmac_probe(struct platform_device *pdev)
 
 	ret = mediatek_dwmac_clks_config(priv_plat, true);
 	if (ret)
-		goto err_remove_config_dt;
+		return ret;
 
 	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
 	if (ret)
@@ -675,8 +675,6 @@ static int mediatek_dwmac_probe(struct platform_device *pdev)
 
 err_drv_probe:
 	mediatek_dwmac_clks_config(priv_plat, false);
-err_remove_config_dt:
-	stmmac_remove_config_dt(pdev, plat_dat);
 
 	return ret;
 }
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c
index 959f88c6da16..a16bfa9089ea 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c
@@ -52,35 +52,22 @@ static int meson6_dwmac_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+	plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
 	if (IS_ERR(plat_dat))
 		return PTR_ERR(plat_dat);
 
 	dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL);
-	if (!dwmac) {
-		ret = -ENOMEM;
-		goto err_remove_config_dt;
-	}
+	if (!dwmac)
+		return -ENOMEM;
 
 	dwmac->reg = devm_platform_ioremap_resource(pdev, 1);
-	if (IS_ERR(dwmac->reg)) {
-		ret = PTR_ERR(dwmac->reg);
-		goto err_remove_config_dt;
-	}
+	if (IS_ERR(dwmac->reg))
+		return PTR_ERR(dwmac->reg);
 
 	plat_dat->bsp_priv = dwmac;
 	plat_dat->fix_mac_speed = meson6_dwmac_fix_mac_speed;
 
-	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
-	if (ret)
-		goto err_remove_config_dt;
-
-	return 0;
-
-err_remove_config_dt:
-	stmmac_remove_config_dt(pdev, plat_dat);
-
-	return ret;
+	return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
 }
 
 static const struct of_device_id meson6_dwmac_match[] = {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
index 0b159dc0d5f6..b23944aa344e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
@@ -400,33 +400,27 @@ static int meson8b_dwmac_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+	plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
 	if (IS_ERR(plat_dat))
 		return PTR_ERR(plat_dat);
 
 	dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL);
-	if (!dwmac) {
-		ret = -ENOMEM;
-		goto err_remove_config_dt;
-	}
+	if (!dwmac)
+		return -ENOMEM;
 
 	dwmac->data = (const struct meson8b_dwmac_data *)
 		of_device_get_match_data(&pdev->dev);
-	if (!dwmac->data) {
-		ret = -EINVAL;
-		goto err_remove_config_dt;
-	}
+	if (!dwmac->data)
+		return -EINVAL;
 	dwmac->regs = devm_platform_ioremap_resource(pdev, 1);
-	if (IS_ERR(dwmac->regs)) {
-		ret = PTR_ERR(dwmac->regs);
-		goto err_remove_config_dt;
-	}
+	if (IS_ERR(dwmac->regs))
+		return PTR_ERR(dwmac->regs);
 
 	dwmac->dev = &pdev->dev;
 	ret = of_get_phy_mode(pdev->dev.of_node, &dwmac->phy_mode);
 	if (ret) {
 		dev_err(&pdev->dev, "missing phy-mode property\n");
-		goto err_remove_config_dt;
+		return ret;
 	}
 
 	/* use 2ns as fallback since this value was previously hardcoded */
@@ -448,53 +442,40 @@ static int meson8b_dwmac_probe(struct platform_device *pdev)
 		if (dwmac->rx_delay_ps > 3000 || dwmac->rx_delay_ps % 200) {
 			dev_err(dwmac->dev,
 				"The RGMII RX delay range is 0..3000ps in 200ps steps");
-			ret = -EINVAL;
-			goto err_remove_config_dt;
+			return -EINVAL;
 		}
 	} else {
 		if (dwmac->rx_delay_ps != 0 && dwmac->rx_delay_ps != 2000) {
 			dev_err(dwmac->dev,
 				"The only allowed RGMII RX delays values are: 0ps, 2000ps");
-			ret = -EINVAL;
-			goto err_remove_config_dt;
+			return -EINVAL;
 		}
 	}
 
 	dwmac->timing_adj_clk = devm_clk_get_optional(dwmac->dev,
 						      "timing-adjustment");
-	if (IS_ERR(dwmac->timing_adj_clk)) {
-		ret = PTR_ERR(dwmac->timing_adj_clk);
-		goto err_remove_config_dt;
-	}
+	if (IS_ERR(dwmac->timing_adj_clk))
+		return PTR_ERR(dwmac->timing_adj_clk);
 
 	ret = meson8b_init_rgmii_delays(dwmac);
 	if (ret)
-		goto err_remove_config_dt;
+		return ret;
 
 	ret = meson8b_init_rgmii_tx_clk(dwmac);
 	if (ret)
-		goto err_remove_config_dt;
+		return ret;
 
 	ret = dwmac->data->set_phy_mode(dwmac);
 	if (ret)
-		goto err_remove_config_dt;
+		return ret;
 
 	ret = meson8b_init_prg_eth(dwmac);
 	if (ret)
-		goto err_remove_config_dt;
+		return ret;
 
 	plat_dat->bsp_priv = dwmac;
 
-	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
-	if (ret)
-		goto err_remove_config_dt;
-
-	return 0;
-
-err_remove_config_dt:
-	stmmac_remove_config_dt(pdev, plat_dat);
-
-	return ret;
+	return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
 }
 
 static const struct meson8b_dwmac_data meson8b_dwmac_data = {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
index d920a50dd16c..382e8de1255d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
@@ -1824,7 +1824,7 @@ static int rk_gmac_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+	plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
 	if (IS_ERR(plat_dat))
 		return PTR_ERR(plat_dat);
 
@@ -1836,18 +1836,16 @@ static int rk_gmac_probe(struct platform_device *pdev)
 	plat_dat->fix_mac_speed = rk_fix_speed;
 
 	plat_dat->bsp_priv = rk_gmac_setup(pdev, plat_dat, data);
-	if (IS_ERR(plat_dat->bsp_priv)) {
-		ret = PTR_ERR(plat_dat->bsp_priv);
-		goto err_remove_config_dt;
-	}
+	if (IS_ERR(plat_dat->bsp_priv))
+		return PTR_ERR(plat_dat->bsp_priv);
 
 	ret = rk_gmac_clk_init(plat_dat);
 	if (ret)
-		goto err_remove_config_dt;
+		return ret;
 
 	ret = rk_gmac_powerup(plat_dat->bsp_priv);
 	if (ret)
-		goto err_remove_config_dt;
+		return ret;
 
 	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
 	if (ret)
@@ -1857,8 +1855,6 @@ static int rk_gmac_probe(struct platform_device *pdev)
 
 err_gmac_powerdown:
 	rk_gmac_powerdown(plat_dat->bsp_priv);
-err_remove_config_dt:
-	stmmac_remove_config_dt(pdev, plat_dat);
 
 	return ret;
 }
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
index 9bf102bbc6a0..ba2ce776bd4d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
@@ -400,21 +400,19 @@ static int socfpga_dwmac_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+	plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
 	if (IS_ERR(plat_dat))
 		return PTR_ERR(plat_dat);
 
 	dwmac = devm_kzalloc(dev, sizeof(*dwmac), GFP_KERNEL);
-	if (!dwmac) {
-		ret = -ENOMEM;
-		goto err_remove_config_dt;
-	}
+	if (!dwmac)
+		return -ENOMEM;
 
 	dwmac->stmmac_ocp_rst = devm_reset_control_get_optional(dev, "stmmaceth-ocp");
 	if (IS_ERR(dwmac->stmmac_ocp_rst)) {
 		ret = PTR_ERR(dwmac->stmmac_ocp_rst);
 		dev_err(dev, "error getting reset control of ocp %d\n", ret);
-		goto err_remove_config_dt;
+		return ret;
 	}
 
 	reset_control_deassert(dwmac->stmmac_ocp_rst);
@@ -422,7 +420,7 @@ static int socfpga_dwmac_probe(struct platform_device *pdev)
 	ret = socfpga_dwmac_parse_data(dwmac, dev);
 	if (ret) {
 		dev_err(dev, "Unable to parse OF data\n");
-		goto err_remove_config_dt;
+		return ret;
 	}
 
 	dwmac->ops = ops;
@@ -431,7 +429,7 @@ static int socfpga_dwmac_probe(struct platform_device *pdev)
 
 	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
 	if (ret)
-		goto err_remove_config_dt;
+		return ret;
 
 	ndev = platform_get_drvdata(pdev);
 	stpriv = netdev_priv(ndev);
@@ -492,8 +490,6 @@ static int socfpga_dwmac_probe(struct platform_device *pdev)
 
 err_dvr_remove:
 	stmmac_dvr_remove(&pdev->dev);
-err_remove_config_dt:
-	stmmac_remove_config_dt(pdev, plat_dat);
 
 	return ret;
 }
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c
index 9289bb87c3e3..5d630affb4d1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c
@@ -105,7 +105,7 @@ static int starfive_dwmac_probe(struct platform_device *pdev)
 		return dev_err_probe(&pdev->dev, err,
 				     "failed to get resources\n");
 
-	plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+	plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
 	if (IS_ERR(plat_dat))
 		return dev_err_probe(&pdev->dev, PTR_ERR(plat_dat),
 				     "dt configuration failed\n");
@@ -141,13 +141,7 @@ static int starfive_dwmac_probe(struct platform_device *pdev)
 	if (err)
 		return err;
 
-	err = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
-	if (err) {
-		stmmac_remove_config_dt(pdev, plat_dat);
-		return err;
-	}
-
-	return 0;
+	return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
 }
 
 static const struct of_device_id starfive_dwmac_match[] = {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
index 0d653bbb931b..4445cddc4cbe 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
@@ -273,20 +273,18 @@ static int sti_dwmac_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+	plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
 	if (IS_ERR(plat_dat))
 		return PTR_ERR(plat_dat);
 
 	dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL);
-	if (!dwmac) {
-		ret = -ENOMEM;
-		goto err_remove_config_dt;
-	}
+	if (!dwmac)
+		return -ENOMEM;
 
 	ret = sti_dwmac_parse_data(dwmac, pdev);
 	if (ret) {
 		dev_err(&pdev->dev, "Unable to parse OF data\n");
-		goto err_remove_config_dt;
+		return ret;
 	}
 
 	dwmac->fix_retime_src = data->fix_retime_src;
@@ -296,7 +294,7 @@ static int sti_dwmac_probe(struct platform_device *pdev)
 
 	ret = clk_prepare_enable(dwmac->clk);
 	if (ret)
-		goto err_remove_config_dt;
+		return ret;
 
 	ret = sti_dwmac_set_mode(dwmac);
 	if (ret)
@@ -310,8 +308,6 @@ static int sti_dwmac_probe(struct platform_device *pdev)
 
 disable_clk:
 	clk_disable_unprepare(dwmac->clk);
-err_remove_config_dt:
-	stmmac_remove_config_dt(pdev, plat_dat);
 
 	return ret;
 }
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
index a0e276783e65..d8d3c729f219 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
@@ -372,21 +372,18 @@ static int stm32_dwmac_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+	plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
 	if (IS_ERR(plat_dat))
 		return PTR_ERR(plat_dat);
 
 	dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL);
-	if (!dwmac) {
-		ret = -ENOMEM;
-		goto err_remove_config_dt;
-	}
+	if (!dwmac)
+		return -ENOMEM;
 
 	data = of_device_get_match_data(&pdev->dev);
 	if (!data) {
 		dev_err(&pdev->dev, "no of match data provided\n");
-		ret = -EINVAL;
-		goto err_remove_config_dt;
+		return -EINVAL;
 	}
 
 	dwmac->ops = data;
@@ -395,14 +392,14 @@ static int stm32_dwmac_probe(struct platform_device *pdev)
 	ret = stm32_dwmac_parse_data(dwmac, &pdev->dev);
 	if (ret) {
 		dev_err(&pdev->dev, "Unable to parse OF data\n");
-		goto err_remove_config_dt;
+		return ret;
 	}
 
 	plat_dat->bsp_priv = dwmac;
 
 	ret = stm32_dwmac_init(plat_dat);
 	if (ret)
-		goto err_remove_config_dt;
+		return ret;
 
 	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
 	if (ret)
@@ -412,8 +409,6 @@ static int stm32_dwmac_probe(struct platform_device *pdev)
 
 err_clk_disable:
 	stm32_dwmac_clk_disable(dwmac);
-err_remove_config_dt:
-	stmmac_remove_config_dt(pdev, plat_dat);
 
 	return ret;
 }
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index 465ff1fd4785..137741b94122 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -1224,7 +1224,7 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
 	if (ret)
 		return -EINVAL;
 
-	plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+	plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
 	if (IS_ERR(plat_dat))
 		return PTR_ERR(plat_dat);
 
@@ -1244,7 +1244,7 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
 
 	ret = sun8i_dwmac_set_syscon(&pdev->dev, plat_dat);
 	if (ret)
-		goto dwmac_deconfig;
+		return ret;
 
 	ret = sun8i_dwmac_init(pdev, plat_dat->bsp_priv);
 	if (ret)
@@ -1295,8 +1295,6 @@ dwmac_exit:
 	sun8i_dwmac_exit(pdev, gmac);
 dwmac_syscon:
 	sun8i_dwmac_unset_syscon(gmac);
-dwmac_deconfig:
-	stmmac_remove_config_dt(pdev, plat_dat);
 
 	return ret;
 }
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
index beceeae579bf..2653a9f0958c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
@@ -108,36 +108,31 @@ static int sun7i_gmac_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+	plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
 	if (IS_ERR(plat_dat))
 		return PTR_ERR(plat_dat);
 
 	gmac = devm_kzalloc(dev, sizeof(*gmac), GFP_KERNEL);
-	if (!gmac) {
-		ret = -ENOMEM;
-		goto err_remove_config_dt;
-	}
+	if (!gmac)
+		return -ENOMEM;
 
 	ret = of_get_phy_mode(dev->of_node, &gmac->interface);
 	if (ret && ret != -ENODEV) {
 		dev_err(dev, "Can't get phy-mode\n");
-		goto err_remove_config_dt;
+		return ret;
 	}
 
 	gmac->tx_clk = devm_clk_get(dev, "allwinner_gmac_tx");
 	if (IS_ERR(gmac->tx_clk)) {
 		dev_err(dev, "could not get tx clock\n");
-		ret = PTR_ERR(gmac->tx_clk);
-		goto err_remove_config_dt;
+		return PTR_ERR(gmac->tx_clk);
 	}
 
 	/* Optional regulator for PHY */
 	gmac->regulator = devm_regulator_get_optional(dev, "phy");
 	if (IS_ERR(gmac->regulator)) {
-		if (PTR_ERR(gmac->regulator) == -EPROBE_DEFER) {
-			ret = -EPROBE_DEFER;
-			goto err_remove_config_dt;
-		}
+		if (PTR_ERR(gmac->regulator) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
 		dev_info(dev, "no regulator found\n");
 		gmac->regulator = NULL;
 	}
@@ -155,7 +150,7 @@ static int sun7i_gmac_probe(struct platform_device *pdev)
 
 	ret = sun7i_gmac_init(pdev, plat_dat->bsp_priv);
 	if (ret)
-		goto err_remove_config_dt;
+		return ret;
 
 	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
 	if (ret)
@@ -165,8 +160,6 @@ static int sun7i_gmac_probe(struct platform_device *pdev)
 
 err_gmac_exit:
 	sun7i_gmac_exit(pdev, plat_dat->bsp_priv);
-err_remove_config_dt:
-	stmmac_remove_config_dt(pdev, plat_dat);
 
 	return ret;
 }
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c
index e0f3cbd36852..362f85136c3e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c
@@ -284,7 +284,7 @@ static int tegra_mgbe_probe(struct platform_device *pdev)
 	if (err < 0)
 		goto disable_clks;
 
-	plat = stmmac_probe_config_dt(pdev, res.mac);
+	plat = devm_stmmac_probe_config_dt(pdev, res.mac);
 	if (IS_ERR(plat)) {
 		err = PTR_ERR(plat);
 		goto disable_clks;
@@ -303,7 +303,7 @@ static int tegra_mgbe_probe(struct platform_device *pdev)
 						   GFP_KERNEL);
 		if (!plat->mdio_bus_data) {
 			err = -ENOMEM;
-			goto remove;
+			goto disable_clks;
 		}
 	}
 
@@ -321,7 +321,7 @@ static int tegra_mgbe_probe(struct platform_device *pdev)
 				 500, 500 * 2000);
 	if (err < 0) {
 		dev_err(mgbe->dev, "timeout waiting for TX lane to become enabled\n");
-		goto remove;
+		goto disable_clks;
 	}
 
 	plat->serdes_powerup = mgbe_uphy_lane_bringup_serdes_up;
@@ -342,12 +342,10 @@ static int tegra_mgbe_probe(struct platform_device *pdev)
 
 	err = stmmac_dvr_probe(&pdev->dev, plat, &res);
 	if (err < 0)
-		goto remove;
+		goto disable_clks;
 
 	return 0;
 
-remove:
-	stmmac_remove_config_dt(pdev, plat);
 disable_clks:
 	clk_bulk_disable_unprepare(ARRAY_SIZE(mgbe_clks), mgbe->clks);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
index 22d113fb8e09..a5a5cfa989c6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
@@ -220,15 +220,13 @@ static int visconti_eth_dwmac_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+	plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac);
 	if (IS_ERR(plat_dat))
 		return PTR_ERR(plat_dat);
 
 	dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL);
-	if (!dwmac) {
-		ret = -ENOMEM;
-		goto remove_config;
-	}
+	if (!dwmac)
+		return -ENOMEM;
 
 	spin_lock_init(&dwmac->lock);
 	dwmac->reg = stmmac_res.addr;
@@ -238,7 +236,7 @@ static int visconti_eth_dwmac_probe(struct platform_device *pdev)
 
 	ret = visconti_eth_clock_probe(pdev, plat_dat);
 	if (ret)
-		goto remove_config;
+		return ret;
 
 	visconti_eth_init_hw(pdev, plat_dat);
 
@@ -252,22 +250,14 @@ static int visconti_eth_dwmac_probe(struct platform_device *pdev)
 
 remove:
 	visconti_eth_clock_remove(pdev);
-remove_config:
-	stmmac_remove_config_dt(pdev, plat_dat);
 
 	return ret;
 }
 
 static void visconti_eth_dwmac_remove(struct platform_device *pdev)
 {
-	struct net_device *ndev = platform_get_drvdata(pdev);
-	struct stmmac_priv *priv = netdev_priv(ndev);
-
 	stmmac_pltfr_remove(pdev);
-
 	visconti_eth_clock_remove(pdev);
-
-	stmmac_remove_config_dt(pdev, priv->plat);
 }
 
 static const struct of_device_id visconti_eth_dwmac_match[] = {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 5801f4d50f95..bd1249a9d7f2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -4415,6 +4415,16 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	WARN_ON(tx_q->tx_skbuff[first_entry]);
 
 	csum_insertion = (skb->ip_summed == CHECKSUM_PARTIAL);
+	/* DWMAC IPs can be synthesized to support tx coe only for a few tx
+	 * queues. In that case, checksum offloading for those queues that don't
+	 * support tx coe needs to fallback to software checksum calculation.
+	 */
+	if (csum_insertion &&
+	    priv->plat->tx_queues_cfg[queue].coe_unsupported) {
+		if (unlikely(skb_checksum_help(skb)))
+			goto dma_map_err;
+		csum_insertion = !csum_insertion;
+	}
 
 	if (likely(priv->extend_desc))
 		desc = (struct dma_desc *)(tx_q->dma_etx + entry);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 2f0678f15fb7..1ffde555da47 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -276,6 +276,9 @@ static int stmmac_mtl_setup(struct platform_device *pdev,
 			plat->tx_queues_cfg[queue].use_prio = true;
 		}
 
+		plat->tx_queues_cfg[queue].coe_unsupported =
+			of_property_read_bool(q_node, "snps,coe-unsupported");
+
 		queue++;
 	}
 	if (queue != plat->tx_queues_to_use) {
@@ -385,6 +388,22 @@ static int stmmac_of_get_mac_mode(struct device_node *np)
 }
 
 /**
+ * stmmac_remove_config_dt - undo the effects of stmmac_probe_config_dt()
+ * @pdev: platform_device structure
+ * @plat: driver data platform structure
+ *
+ * Release resources claimed by stmmac_probe_config_dt().
+ */
+static void stmmac_remove_config_dt(struct platform_device *pdev,
+				    struct plat_stmmacenet_data *plat)
+{
+	clk_disable_unprepare(plat->stmmac_clk);
+	clk_disable_unprepare(plat->pclk);
+	of_node_put(plat->phy_node);
+	of_node_put(plat->mdio_node);
+}
+
+/**
  * stmmac_probe_config_dt - parse device-tree driver parameters
  * @pdev: platform_device structure
  * @mac: MAC address to use
@@ -392,7 +411,7 @@ static int stmmac_of_get_mac_mode(struct device_node *np)
  * this function is to read the driver parameters from device-tree and
  * set some private fields that will be used by the main at runtime.
  */
-struct plat_stmmacenet_data *
+static struct plat_stmmacenet_data *
 stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac)
 {
 	struct device_node *np = pdev->dev.of_node;
@@ -662,43 +681,14 @@ devm_stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac)
 
 	return plat;
 }
-
-/**
- * stmmac_remove_config_dt - undo the effects of stmmac_probe_config_dt()
- * @pdev: platform_device structure
- * @plat: driver data platform structure
- *
- * Release resources claimed by stmmac_probe_config_dt().
- */
-void stmmac_remove_config_dt(struct platform_device *pdev,
-			     struct plat_stmmacenet_data *plat)
-{
-	clk_disable_unprepare(plat->stmmac_clk);
-	clk_disable_unprepare(plat->pclk);
-	of_node_put(plat->phy_node);
-	of_node_put(plat->mdio_node);
-}
 #else
 struct plat_stmmacenet_data *
-stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac)
-{
-	return ERR_PTR(-EINVAL);
-}
-
-struct plat_stmmacenet_data *
 devm_stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac)
 {
 	return ERR_PTR(-EINVAL);
 }
-
-void stmmac_remove_config_dt(struct platform_device *pdev,
-			     struct plat_stmmacenet_data *plat)
-{
-}
 #endif /* CONFIG_OF */
-EXPORT_SYMBOL_GPL(stmmac_probe_config_dt);
 EXPORT_SYMBOL_GPL(devm_stmmac_probe_config_dt);
-EXPORT_SYMBOL_GPL(stmmac_remove_config_dt);
 
 int stmmac_get_platform_resources(struct platform_device *pdev,
 				  struct stmmac_resources *stmmac_res)
@@ -807,7 +797,7 @@ static void devm_stmmac_pltfr_remove(void *data)
 {
 	struct platform_device *pdev = data;
 
-	stmmac_pltfr_remove_no_dt(pdev);
+	stmmac_pltfr_remove(pdev);
 }
 
 /**
@@ -834,12 +824,12 @@ int devm_stmmac_pltfr_probe(struct platform_device *pdev,
 EXPORT_SYMBOL_GPL(devm_stmmac_pltfr_probe);
 
 /**
- * stmmac_pltfr_remove_no_dt
+ * stmmac_pltfr_remove
  * @pdev: pointer to the platform device
  * Description: This undoes the effects of stmmac_pltfr_probe() by removing the
  * driver and calling the platform's exit() callback.
  */
-void stmmac_pltfr_remove_no_dt(struct platform_device *pdev)
+void stmmac_pltfr_remove(struct platform_device *pdev)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct stmmac_priv *priv = netdev_priv(ndev);
@@ -848,23 +838,6 @@ void stmmac_pltfr_remove_no_dt(struct platform_device *pdev)
 	stmmac_dvr_remove(&pdev->dev);
 	stmmac_pltfr_exit(pdev, plat);
 }
-EXPORT_SYMBOL_GPL(stmmac_pltfr_remove_no_dt);
-
-/**
- * stmmac_pltfr_remove
- * @pdev: platform device pointer
- * Description: this function calls the main to free the net resources
- * and calls the platforms hook and release the resources (e.g. mem).
- */
-void stmmac_pltfr_remove(struct platform_device *pdev)
-{
-	struct net_device *ndev = platform_get_drvdata(pdev);
-	struct stmmac_priv *priv = netdev_priv(ndev);
-	struct plat_stmmacenet_data *plat = priv->plat;
-
-	stmmac_pltfr_remove_no_dt(pdev);
-	stmmac_remove_config_dt(pdev, plat);
-}
 EXPORT_SYMBOL_GPL(stmmac_pltfr_remove);
 
 /**
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h
index c5565b2a70ac..bb6fc7e59aed 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h
@@ -12,11 +12,7 @@
 #include "stmmac.h"
 
 struct plat_stmmacenet_data *
-stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac);
-struct plat_stmmacenet_data *
 devm_stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac);
-void stmmac_remove_config_dt(struct platform_device *pdev,
-			     struct plat_stmmacenet_data *plat);
 
 int stmmac_get_platform_resources(struct platform_device *pdev,
 				  struct stmmac_resources *stmmac_res);
@@ -32,7 +28,6 @@ int stmmac_pltfr_probe(struct platform_device *pdev,
 int devm_stmmac_pltfr_probe(struct platform_device *pdev,
 			    struct plat_stmmacenet_data *plat,
 			    struct stmmac_resources *res);
-void stmmac_pltfr_remove_no_dt(struct platform_device *pdev);
 void stmmac_pltfr_remove(struct platform_device *pdev);
 extern const struct dev_pm_ops stmmac_pltfr_pm_ops;
 
diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index 011d74087f86..21431f43e4c2 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -10132,7 +10132,7 @@ err_out:
 	return err;
 }
 
-static int niu_of_remove(struct platform_device *op)
+static void niu_of_remove(struct platform_device *op)
 {
 	struct net_device *dev = platform_get_drvdata(op);
 
@@ -10165,7 +10165,6 @@ static int niu_of_remove(struct platform_device *op)
 
 		free_netdev(dev);
 	}
-	return 0;
 }
 
 static const struct of_device_id niu_match[] = {
@@ -10183,7 +10182,7 @@ static struct platform_driver niu_of_driver = {
 		.of_match_table = niu_match,
 	},
 	.probe		= niu_of_probe,
-	.remove		= niu_of_remove,
+	.remove_new	= niu_of_remove,
 };
 
 #endif /* CONFIG_SPARC64 */
diff --git a/drivers/net/ethernet/sun/sunbmac.c b/drivers/net/ethernet/sun/sunbmac.c
index cc34d92d2e3d..16c86b13c185 100644
--- a/drivers/net/ethernet/sun/sunbmac.c
+++ b/drivers/net/ethernet/sun/sunbmac.c
@@ -1234,7 +1234,7 @@ static int bigmac_sbus_probe(struct platform_device *op)
 	return bigmac_ether_init(op, qec_op);
 }
 
-static int bigmac_sbus_remove(struct platform_device *op)
+static void bigmac_sbus_remove(struct platform_device *op)
 {
 	struct bigmac *bp = platform_get_drvdata(op);
 	struct device *parent = op->dev.parent;
@@ -1255,8 +1255,6 @@ static int bigmac_sbus_remove(struct platform_device *op)
 			  bp->bblock_dvma);
 
 	free_netdev(net_dev);
-
-	return 0;
 }
 
 static const struct of_device_id bigmac_sbus_match[] = {
@@ -1274,7 +1272,7 @@ static struct platform_driver bigmac_sbus_driver = {
 		.of_match_table = bigmac_sbus_match,
 	},
 	.probe		= bigmac_sbus_probe,
-	.remove		= bigmac_sbus_remove,
+	.remove_new	= bigmac_sbus_remove,
 };
 
 module_platform_driver(bigmac_sbus_driver);
diff --git a/drivers/net/ethernet/sun/sunqe.c b/drivers/net/ethernet/sun/sunqe.c
index b37360f44972..aedd13c94225 100644
--- a/drivers/net/ethernet/sun/sunqe.c
+++ b/drivers/net/ethernet/sun/sunqe.c
@@ -933,7 +933,7 @@ static int qec_sbus_probe(struct platform_device *op)
 	return qec_ether_init(op);
 }
 
-static int qec_sbus_remove(struct platform_device *op)
+static void qec_sbus_remove(struct platform_device *op)
 {
 	struct sunqe *qp = platform_get_drvdata(op);
 	struct net_device *net_dev = qp->dev;
@@ -948,8 +948,6 @@ static int qec_sbus_remove(struct platform_device *op)
 			  qp->buffers, qp->buffers_dvma);
 
 	free_netdev(net_dev);
-
-	return 0;
 }
 
 static const struct of_device_id qec_sbus_match[] = {
@@ -967,7 +965,7 @@ static struct platform_driver qec_sbus_driver = {
 		.of_match_table = qec_sbus_match,
 	},
 	.probe		= qec_sbus_probe,
-	.remove		= qec_sbus_remove,
+	.remove_new	= qec_sbus_remove,
 };
 
 static int __init qec_init(void)
diff --git a/drivers/net/ethernet/sunplus/spl2sw_driver.c b/drivers/net/ethernet/sunplus/spl2sw_driver.c
index c499a14314f1..391a1bc7f446 100644
--- a/drivers/net/ethernet/sunplus/spl2sw_driver.c
+++ b/drivers/net/ethernet/sunplus/spl2sw_driver.c
@@ -511,7 +511,7 @@ out_clk_disable:
 	return ret;
 }
 
-static int spl2sw_remove(struct platform_device *pdev)
+static void spl2sw_remove(struct platform_device *pdev)
 {
 	struct spl2sw_common *comm;
 	int i;
@@ -538,8 +538,6 @@ static int spl2sw_remove(struct platform_device *pdev)
 	spl2sw_mdio_remove(comm);
 
 	clk_disable_unprepare(comm->clk);
-
-	return 0;
 }
 
 static const struct of_device_id spl2sw_of_match[] = {
@@ -551,7 +549,7 @@ MODULE_DEVICE_TABLE(of, spl2sw_of_match);
 
 static struct platform_driver spl2sw_driver = {
 	.probe = spl2sw_probe,
-	.remove = spl2sw_remove,
+	.remove_new = spl2sw_remove,
 	.driver = {
 		.name = "sp7021_emac",
 		.of_match_table = spl2sw_of_match,
diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c
index 80eeeb463c4f..bc9b5d18427d 100644
--- a/drivers/net/ethernet/ti/cpmac.c
+++ b/drivers/net/ethernet/ti/cpmac.c
@@ -1151,14 +1151,12 @@ fail:
 	return rc;
 }
 
-static int cpmac_remove(struct platform_device *pdev)
+static void cpmac_remove(struct platform_device *pdev)
 {
 	struct net_device *dev = platform_get_drvdata(pdev);
 
 	unregister_netdev(dev);
 	free_netdev(dev);
-
-	return 0;
 }
 
 static struct platform_driver cpmac_driver = {
@@ -1166,7 +1164,7 @@ static struct platform_driver cpmac_driver = {
 		.name 	= "cpmac",
 	},
 	.probe 	= cpmac_probe,
-	.remove = cpmac_remove,
+	.remove_new = cpmac_remove,
 };
 
 int __init cpmac_init(void)
diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index 2eb9d5a32588..5d756df133eb 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -2002,7 +2002,7 @@ err_free_netdev:
  * Called when removing the device driver. We disable clock usage and release
  * the resources taken up by the driver and unregister network device
  */
-static int davinci_emac_remove(struct platform_device *pdev)
+static void davinci_emac_remove(struct platform_device *pdev)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct emac_priv *priv = netdev_priv(ndev);
@@ -2022,8 +2022,6 @@ static int davinci_emac_remove(struct platform_device *pdev)
 	if (of_phy_is_fixed_link(np))
 		of_phy_deregister_fixed_link(np);
 	free_netdev(ndev);
-
-	return 0;
 }
 
 static int davinci_emac_suspend(struct device *dev)
@@ -2076,7 +2074,7 @@ static struct platform_driver davinci_emac_driver = {
 		.of_match_table = davinci_emac_of_match,
 	},
 	.probe = davinci_emac_probe,
-	.remove = davinci_emac_remove,
+	.remove_new = davinci_emac_remove,
 };
 
 /**
diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c
index 89b6d23e9937..628c87dc1d28 100644
--- a/drivers/net/ethernet/ti/davinci_mdio.c
+++ b/drivers/net/ethernet/ti/davinci_mdio.c
@@ -673,7 +673,7 @@ bail_out:
 	return ret;
 }
 
-static int davinci_mdio_remove(struct platform_device *pdev)
+static void davinci_mdio_remove(struct platform_device *pdev)
 {
 	struct davinci_mdio_data *data = platform_get_drvdata(pdev);
 
@@ -686,8 +686,6 @@ static int davinci_mdio_remove(struct platform_device *pdev)
 
 	pm_runtime_dont_use_autosuspend(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM
@@ -766,7 +764,7 @@ static struct platform_driver davinci_mdio_driver = {
 		.of_match_table = of_match_ptr(davinci_mdio_of_mtable),
 	},
 	.probe = davinci_mdio_probe,
-	.remove = davinci_mdio_remove,
+	.remove_new = davinci_mdio_remove,
 };
 
 static int __init davinci_mdio_init(void)
diff --git a/drivers/net/ethernet/ti/icssg/icssg_config.c b/drivers/net/ethernet/ti/icssg/icssg_config.c
index b272361e378f..99de8a40ed60 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_config.c
+++ b/drivers/net/ethernet/ti/icssg/icssg_config.c
@@ -433,6 +433,17 @@ int emac_set_port_state(struct prueth_emac *emac,
 	return ret;
 }
 
+void icssg_config_half_duplex(struct prueth_emac *emac)
+{
+	u32 val;
+
+	if (!emac->half_duplex)
+		return;
+
+	val = get_random_u32();
+	writel(val, emac->dram.va + HD_RAND_SEED_OFFSET);
+}
+
 void icssg_config_set_speed(struct prueth_emac *emac)
 {
 	u8 fw_speed;
@@ -453,5 +464,8 @@ void icssg_config_set_speed(struct prueth_emac *emac)
 		return;
 	}
 
+	if (emac->duplex == DUPLEX_HALF)
+		fw_speed |= FW_LINK_SPEED_HD;
+
 	writeb(fw_speed, emac->dram.va + PORT_LINK_SPEED_OFFSET);
 }
diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c
index 4914d0ef58e9..482ae82a99c1 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c
+++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c
@@ -1029,6 +1029,8 @@ static void emac_adjust_link(struct net_device *ndev)
 		 * values
 		 */
 		if (emac->link) {
+			if (emac->duplex == DUPLEX_HALF)
+				icssg_config_half_duplex(emac);
 			/* Set the RGMII cfg for gig en and full duplex */
 			icssg_update_rgmii_cfg(prueth->miig_rt, emac);
 
@@ -1147,9 +1149,13 @@ static int emac_phy_connect(struct prueth_emac *emac)
 		return -ENODEV;
 	}
 
+	if (!emac->half_duplex) {
+		dev_dbg(prueth->dev, "half duplex mode is not supported\n");
+		phy_remove_link_mode(ndev->phydev, ETHTOOL_LINK_MODE_10baseT_Half_BIT);
+		phy_remove_link_mode(ndev->phydev, ETHTOOL_LINK_MODE_100baseT_Half_BIT);
+	}
+
 	/* remove unsupported modes */
-	phy_remove_link_mode(ndev->phydev, ETHTOOL_LINK_MODE_10baseT_Half_BIT);
-	phy_remove_link_mode(ndev->phydev, ETHTOOL_LINK_MODE_100baseT_Half_BIT);
 	phy_remove_link_mode(ndev->phydev, ETHTOOL_LINK_MODE_1000baseT_Half_BIT);
 	phy_remove_link_mode(ndev->phydev, ETHTOOL_LINK_MODE_Pause_BIT);
 	phy_remove_link_mode(ndev->phydev, ETHTOOL_LINK_MODE_Asym_Pause_BIT);
@@ -2113,6 +2119,10 @@ static int prueth_probe(struct platform_device *pdev)
 				      eth0_node->name);
 			goto exit_iep;
 		}
+
+		if (of_find_property(eth0_node, "ti,half-duplex-capable", NULL))
+			prueth->emac[PRUETH_MAC0]->half_duplex = 1;
+
 		prueth->emac[PRUETH_MAC0]->iep = prueth->iep0;
 	}
 
@@ -2124,6 +2134,9 @@ static int prueth_probe(struct platform_device *pdev)
 			goto netdev_exit;
 		}
 
+		if (of_find_property(eth1_node, "ti,half-duplex-capable", NULL))
+			prueth->emac[PRUETH_MAC1]->half_duplex = 1;
+
 		prueth->emac[PRUETH_MAC1]->iep = prueth->iep0;
 	}
 
@@ -2313,8 +2326,13 @@ static const struct prueth_pdata am654_icssg_pdata = {
 	.quirk_10m_link_issue = 1,
 };
 
+static const struct prueth_pdata am64x_icssg_pdata = {
+	.fdqring_mode = K3_RINGACC_RING_MODE_RING,
+};
+
 static const struct of_device_id prueth_dt_match[] = {
 	{ .compatible = "ti,am654-icssg-prueth", .data = &am654_icssg_pdata },
+	{ .compatible = "ti,am642-icssg-prueth", .data = &am64x_icssg_pdata },
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, prueth_dt_match);
diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h
index 3fe80a8758d3..8b6d6b497010 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h
+++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h
@@ -145,6 +145,7 @@ struct prueth_emac {
 	struct icss_iep *iep;
 	unsigned int rx_ts_enabled : 1;
 	unsigned int tx_ts_enabled : 1;
+	unsigned int half_duplex : 1;
 
 	/* DMA related */
 	struct prueth_tx_chn tx_chns[PRUETH_MAX_TX_QUEUES];
@@ -271,6 +272,7 @@ int icssg_config(struct prueth *prueth, struct prueth_emac *emac,
 int emac_set_port_state(struct prueth_emac *emac,
 			enum icssg_port_state_cmd state);
 void icssg_config_set_speed(struct prueth_emac *emac);
+void icssg_config_half_duplex(struct prueth_emac *emac);
 
 /* Buffer queue helpers */
 int icssg_queue_pop(struct prueth *prueth, u8 queue);
diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index d829113c16ee..11b90e1da0c6 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@ -2228,7 +2228,7 @@ probe_quit:
 	return ret;
 }
 
-static int netcp_remove(struct platform_device *pdev)
+static void netcp_remove(struct platform_device *pdev)
 {
 	struct netcp_device *netcp_device = platform_get_drvdata(pdev);
 	struct netcp_intf *netcp_intf, *netcp_tmp;
@@ -2256,7 +2256,6 @@ static int netcp_remove(struct platform_device *pdev)
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 	platform_set_drvdata(pdev, NULL);
-	return 0;
 }
 
 static const struct of_device_id of_match[] = {
@@ -2271,7 +2270,7 @@ static struct platform_driver netcp_driver = {
 		.of_match_table	= of_match,
 	},
 	.probe = netcp_probe,
-	.remove = netcp_remove,
+	.remove_new = netcp_remove,
 };
 module_platform_driver(netcp_driver);
 
diff --git a/drivers/net/ethernet/toshiba/spider_net.c b/drivers/net/ethernet/toshiba/spider_net.c
index 50d7eacfec58..87e67121477c 100644
--- a/drivers/net/ethernet/toshiba/spider_net.c
+++ b/drivers/net/ethernet/toshiba/spider_net.c
@@ -2332,7 +2332,7 @@ spider_net_alloc_card(void)
 	struct spider_net_card *card;
 
 	netdev = alloc_etherdev(struct_size(card, darray,
-					    tx_descriptors + rx_descriptors));
+					    size_add(tx_descriptors, rx_descriptors)));
 	if (!netdev)
 		return NULL;
 
diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c
index d09d352e1c0a..554aff7c8f3b 100644
--- a/drivers/net/ethernet/tundra/tsi108_eth.c
+++ b/drivers/net/ethernet/tundra/tsi108_eth.c
@@ -1660,7 +1660,7 @@ static void tsi108_timed_checker(struct timer_list *t)
 	mod_timer(&data->timer, jiffies + CHECK_PHY_INTERVAL);
 }
 
-static int tsi108_ether_remove(struct platform_device *pdev)
+static void tsi108_ether_remove(struct platform_device *pdev)
 {
 	struct net_device *dev = platform_get_drvdata(pdev);
 	struct tsi108_prv_data *priv = netdev_priv(dev);
@@ -1670,15 +1670,13 @@ static int tsi108_ether_remove(struct platform_device *pdev)
 	iounmap(priv->regs);
 	iounmap(priv->phyregs);
 	free_netdev(dev);
-
-	return 0;
 }
 
 /* Structure for a device driver */
 
 static struct platform_driver tsi_eth_driver = {
 	.probe = tsi108_init_one,
-	.remove = tsi108_ether_remove,
+	.remove_new = tsi108_ether_remove,
 	.driver	= {
 		.name = "tsi-ethernet",
 	},
diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c
index 3e09e5036490..e80c02948801 100644
--- a/drivers/net/ethernet/via/via-rhine.c
+++ b/drivers/net/ethernet/via/via-rhine.c
@@ -2443,7 +2443,7 @@ static void rhine_remove_one_pci(struct pci_dev *pdev)
 	pci_disable_device(pdev);
 }
 
-static int rhine_remove_one_platform(struct platform_device *pdev)
+static void rhine_remove_one_platform(struct platform_device *pdev)
 {
 	struct net_device *dev = platform_get_drvdata(pdev);
 	struct rhine_private *rp = netdev_priv(dev);
@@ -2453,8 +2453,6 @@ static int rhine_remove_one_platform(struct platform_device *pdev)
 	iounmap(rp->base);
 
 	free_netdev(dev);
-
-	return 0;
 }
 
 static void rhine_shutdown_pci(struct pci_dev *pdev)
@@ -2572,7 +2570,7 @@ static struct pci_driver rhine_driver_pci = {
 
 static struct platform_driver rhine_driver_platform = {
 	.probe		= rhine_init_one_platform,
-	.remove		= rhine_remove_one_platform,
+	.remove_new	= rhine_remove_one_platform,
 	.driver = {
 		.name	= DRV_NAME,
 		.of_match_table	= rhine_of_tbl,
diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c
index 731f689412e6..1c6b2a9bba08 100644
--- a/drivers/net/ethernet/via/via-velocity.c
+++ b/drivers/net/ethernet/via/via-velocity.c
@@ -2957,11 +2957,9 @@ static int velocity_platform_probe(struct platform_device *pdev)
 	return velocity_probe(&pdev->dev, irq, info, BUS_PLATFORM);
 }
 
-static int velocity_platform_remove(struct platform_device *pdev)
+static void velocity_platform_remove(struct platform_device *pdev)
 {
 	velocity_remove(&pdev->dev);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -3249,7 +3247,7 @@ static struct pci_driver velocity_pci_driver = {
 
 static struct platform_driver velocity_platform_driver = {
 	.probe		= velocity_platform_probe,
-	.remove		= velocity_platform_remove,
+	.remove_new	= velocity_platform_remove,
 	.driver = {
 		.name = "via-velocity",
 		.of_match_table = velocity_of_ids,
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
index 85dc16faca54..f0063d569c80 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
@@ -12,6 +12,98 @@
 #include "wx_lib.h"
 #include "wx_hw.h"
 
+static int wx_phy_read_reg_mdi(struct mii_bus *bus, int phy_addr, int devnum, int regnum)
+{
+	struct wx *wx = bus->priv;
+	u32 command, val;
+	int ret;
+
+	/* setup and write the address cycle command */
+	command = WX_MSCA_RA(regnum) |
+		  WX_MSCA_PA(phy_addr) |
+		  WX_MSCA_DA(devnum);
+	wr32(wx, WX_MSCA, command);
+
+	command = WX_MSCC_CMD(WX_MSCA_CMD_READ) | WX_MSCC_BUSY;
+	if (wx->mac.type == wx_mac_em)
+		command |= WX_MDIO_CLK(6);
+	wr32(wx, WX_MSCC, command);
+
+	/* wait to complete */
+	ret = read_poll_timeout(rd32, val, !(val & WX_MSCC_BUSY), 1000,
+				100000, false, wx, WX_MSCC);
+	if (ret) {
+		wx_err(wx, "Mdio read c22 command did not complete.\n");
+		return ret;
+	}
+
+	return (u16)rd32(wx, WX_MSCC);
+}
+
+static int wx_phy_write_reg_mdi(struct mii_bus *bus, int phy_addr,
+				int devnum, int regnum, u16 value)
+{
+	struct wx *wx = bus->priv;
+	u32 command, val;
+	int ret;
+
+	/* setup and write the address cycle command */
+	command = WX_MSCA_RA(regnum) |
+		  WX_MSCA_PA(phy_addr) |
+		  WX_MSCA_DA(devnum);
+	wr32(wx, WX_MSCA, command);
+
+	command = value | WX_MSCC_CMD(WX_MSCA_CMD_WRITE) | WX_MSCC_BUSY;
+	if (wx->mac.type == wx_mac_em)
+		command |= WX_MDIO_CLK(6);
+	wr32(wx, WX_MSCC, command);
+
+	/* wait to complete */
+	ret = read_poll_timeout(rd32, val, !(val & WX_MSCC_BUSY), 1000,
+				100000, false, wx, WX_MSCC);
+	if (ret)
+		wx_err(wx, "Mdio write c22 command did not complete.\n");
+
+	return ret;
+}
+
+int wx_phy_read_reg_mdi_c22(struct mii_bus *bus, int phy_addr, int regnum)
+{
+	struct wx *wx = bus->priv;
+
+	wr32(wx, WX_MDIO_CLAUSE_SELECT, 0xF);
+	return wx_phy_read_reg_mdi(bus, phy_addr, 0, regnum);
+}
+EXPORT_SYMBOL(wx_phy_read_reg_mdi_c22);
+
+int wx_phy_write_reg_mdi_c22(struct mii_bus *bus, int phy_addr, int regnum, u16 value)
+{
+	struct wx *wx = bus->priv;
+
+	wr32(wx, WX_MDIO_CLAUSE_SELECT, 0xF);
+	return wx_phy_write_reg_mdi(bus, phy_addr, 0, regnum, value);
+}
+EXPORT_SYMBOL(wx_phy_write_reg_mdi_c22);
+
+int wx_phy_read_reg_mdi_c45(struct mii_bus *bus, int phy_addr, int devnum, int regnum)
+{
+	struct wx *wx = bus->priv;
+
+	wr32(wx, WX_MDIO_CLAUSE_SELECT, 0);
+	return wx_phy_read_reg_mdi(bus, phy_addr, devnum, regnum);
+}
+EXPORT_SYMBOL(wx_phy_read_reg_mdi_c45);
+
+int wx_phy_write_reg_mdi_c45(struct mii_bus *bus, int phy_addr,
+			     int devnum, int regnum, u16 value)
+{
+	struct wx *wx = bus->priv;
+
+	wr32(wx, WX_MDIO_CLAUSE_SELECT, 0);
+	return wx_phy_write_reg_mdi(bus, phy_addr, devnum, regnum, value);
+}
+EXPORT_SYMBOL(wx_phy_write_reg_mdi_c45);
+
 static void wx_intr_disable(struct wx *wx, u64 qmask)
 {
 	u32 mask;
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.h b/drivers/net/ethernet/wangxun/libwx/wx_hw.h
index 0b3447bc6f2f..48d3ccabc272 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_hw.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.h
@@ -4,6 +4,13 @@
 #ifndef _WX_HW_H_
 #define _WX_HW_H_
 
+#include <linux/phy.h>
+
+int wx_phy_read_reg_mdi_c22(struct mii_bus *bus, int phy_addr, int regnum);
+int wx_phy_write_reg_mdi_c22(struct mii_bus *bus, int phy_addr, int regnum, u16 value);
+int wx_phy_read_reg_mdi_c45(struct mii_bus *bus, int phy_addr, int devnum, int regnum);
+int wx_phy_write_reg_mdi_c45(struct mii_bus *bus, int phy_addr,
+			     int devnum, int regnum, u16 value);
 void wx_intr_enable(struct wx *wx, u64 qmask);
 void wx_irq_disable(struct wx *wx);
 int wx_check_flash_load(struct wx *wx, u32 check_bit);
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h
index c5cbd177ef62..e3fc49284219 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_type.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h
@@ -251,6 +251,7 @@ enum WX_MSCA_CMD_value {
 #define WX_MSCC_SADDR                BIT(18)
 #define WX_MSCC_BUSY                 BIT(22)
 #define WX_MDIO_CLK(v)               FIELD_PREP(GENMASK(21, 19), v)
+#define WX_MDIO_CLAUSE_SELECT        0x11220
 #define WX_MMC_CONTROL               0x11800
 #define WX_MMC_CONTROL_RSTONRD       BIT(2) /* reset on read */
 
diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c
index 591f5b7b6da6..6302ecca71bb 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c
@@ -29,117 +29,6 @@ static int ngbe_phy_write_reg_internal(struct mii_bus *bus, int phy_addr, int re
 	return 0;
 }
 
-static int ngbe_phy_read_reg_mdi_c22(struct mii_bus *bus, int phy_addr, int regnum)
-{
-	u32 command, val, device_type = 0;
-	struct wx *wx = bus->priv;
-	int ret;
-
-	wr32(wx, NGBE_MDIO_CLAUSE_SELECT, 0xF);
-	/* setup and write the address cycle command */
-	command = WX_MSCA_RA(regnum) |
-		  WX_MSCA_PA(phy_addr) |
-		  WX_MSCA_DA(device_type);
-	wr32(wx, WX_MSCA, command);
-	command = WX_MSCC_CMD(WX_MSCA_CMD_READ) |
-		  WX_MSCC_BUSY |
-		  WX_MDIO_CLK(6);
-	wr32(wx, WX_MSCC, command);
-
-	/* wait to complete */
-	ret = read_poll_timeout(rd32, val, !(val & WX_MSCC_BUSY), 1000,
-				100000, false, wx, WX_MSCC);
-	if (ret) {
-		wx_err(wx, "Mdio read c22 command did not complete.\n");
-		return ret;
-	}
-
-	return (u16)rd32(wx, WX_MSCC);
-}
-
-static int ngbe_phy_write_reg_mdi_c22(struct mii_bus *bus, int phy_addr, int regnum, u16 value)
-{
-	u32 command, val, device_type = 0;
-	struct wx *wx = bus->priv;
-	int ret;
-
-	wr32(wx, NGBE_MDIO_CLAUSE_SELECT, 0xF);
-	/* setup and write the address cycle command */
-	command = WX_MSCA_RA(regnum) |
-		  WX_MSCA_PA(phy_addr) |
-		  WX_MSCA_DA(device_type);
-	wr32(wx, WX_MSCA, command);
-	command = value |
-		  WX_MSCC_CMD(WX_MSCA_CMD_WRITE) |
-		  WX_MSCC_BUSY |
-		  WX_MDIO_CLK(6);
-	wr32(wx, WX_MSCC, command);
-
-	/* wait to complete */
-	ret = read_poll_timeout(rd32, val, !(val & WX_MSCC_BUSY), 1000,
-				100000, false, wx, WX_MSCC);
-	if (ret)
-		wx_err(wx, "Mdio write c22 command did not complete.\n");
-
-	return ret;
-}
-
-static int ngbe_phy_read_reg_mdi_c45(struct mii_bus *bus, int phy_addr, int devnum, int regnum)
-{
-	struct wx *wx = bus->priv;
-	u32 val, command;
-	int ret;
-
-	wr32(wx, NGBE_MDIO_CLAUSE_SELECT, 0x0);
-	/* setup and write the address cycle command */
-	command = WX_MSCA_RA(regnum) |
-		  WX_MSCA_PA(phy_addr) |
-		  WX_MSCA_DA(devnum);
-	wr32(wx, WX_MSCA, command);
-	command = WX_MSCC_CMD(WX_MSCA_CMD_READ) |
-		  WX_MSCC_BUSY |
-		  WX_MDIO_CLK(6);
-	wr32(wx, WX_MSCC, command);
-
-	/* wait to complete */
-	ret = read_poll_timeout(rd32, val, !(val & WX_MSCC_BUSY), 1000,
-				100000, false, wx, WX_MSCC);
-	if (ret) {
-		wx_err(wx, "Mdio read c45 command did not complete.\n");
-		return ret;
-	}
-
-	return (u16)rd32(wx, WX_MSCC);
-}
-
-static int ngbe_phy_write_reg_mdi_c45(struct mii_bus *bus, int phy_addr,
-				      int devnum, int regnum, u16 value)
-{
-	struct wx *wx = bus->priv;
-	int ret, command;
-	u16 val;
-
-	wr32(wx, NGBE_MDIO_CLAUSE_SELECT, 0x0);
-	/* setup and write the address cycle command */
-	command = WX_MSCA_RA(regnum) |
-		  WX_MSCA_PA(phy_addr) |
-		  WX_MSCA_DA(devnum);
-	wr32(wx, WX_MSCA, command);
-	command = value |
-		  WX_MSCC_CMD(WX_MSCA_CMD_WRITE) |
-		  WX_MSCC_BUSY |
-		  WX_MDIO_CLK(6);
-	wr32(wx, WX_MSCC, command);
-
-	/* wait to complete */
-	ret = read_poll_timeout(rd32, val, !(val & WX_MSCC_BUSY), 1000,
-				100000, false, wx, WX_MSCC);
-	if (ret)
-		wx_err(wx, "Mdio write c45 command did not complete.\n");
-
-	return ret;
-}
-
 static int ngbe_phy_read_reg_c22(struct mii_bus *bus, int phy_addr, int regnum)
 {
 	struct wx *wx = bus->priv;
@@ -148,7 +37,7 @@ static int ngbe_phy_read_reg_c22(struct mii_bus *bus, int phy_addr, int regnum)
 	if (wx->mac_type == em_mac_type_mdi)
 		phy_data = ngbe_phy_read_reg_internal(bus, phy_addr, regnum);
 	else
-		phy_data = ngbe_phy_read_reg_mdi_c22(bus, phy_addr, regnum);
+		phy_data = wx_phy_read_reg_mdi_c22(bus, phy_addr, regnum);
 
 	return phy_data;
 }
@@ -162,7 +51,7 @@ static int ngbe_phy_write_reg_c22(struct mii_bus *bus, int phy_addr,
 	if (wx->mac_type == em_mac_type_mdi)
 		ret = ngbe_phy_write_reg_internal(bus, phy_addr, regnum, value);
 	else
-		ret = ngbe_phy_write_reg_mdi_c22(bus, phy_addr, regnum, value);
+		ret = wx_phy_write_reg_mdi_c22(bus, phy_addr, regnum, value);
 
 	return ret;
 }
@@ -262,8 +151,8 @@ int ngbe_mdio_init(struct wx *wx)
 	mii_bus->priv = wx;
 
 	if (wx->mac_type == em_mac_type_rgmii) {
-		mii_bus->read_c45 = ngbe_phy_read_reg_mdi_c45;
-		mii_bus->write_c45 = ngbe_phy_write_reg_mdi_c45;
+		mii_bus->read_c45 = wx_phy_read_reg_mdi_c45;
+		mii_bus->write_c45 = wx_phy_write_reg_mdi_c45;
 	}
 
 	snprintf(mii_bus->id, MII_BUS_ID_SIZE, "ngbe-%x", pci_dev_id(pdev));
diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
index 72c8cd2d5575..ff754d69bdf6 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
@@ -59,9 +59,6 @@
 #define NGBE_EEPROM_VERSION_L			0x1D
 #define NGBE_EEPROM_VERSION_H			0x1E
 
-/* Media-dependent registers. */
-#define NGBE_MDIO_CLAUSE_SELECT			0x11220
-
 /* GPIO Registers */
 #define NGBE_GPIO_DR				0x14800
 #define NGBE_GPIO_DDR				0x14804
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c
index 4159c84035fd..b6c06adb8656 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c
@@ -647,58 +647,6 @@ static int txgbe_sfp_register(struct txgbe *txgbe)
 	return 0;
 }
 
-static int txgbe_phy_read(struct mii_bus *bus, int phy_addr,
-			  int devnum, int regnum)
-{
-	struct wx *wx = bus->priv;
-	u32 val, command;
-	int ret;
-
-	/* setup and write the address cycle command */
-	command = WX_MSCA_RA(regnum) |
-		  WX_MSCA_PA(phy_addr) |
-		  WX_MSCA_DA(devnum);
-	wr32(wx, WX_MSCA, command);
-
-	command = WX_MSCC_CMD(WX_MSCA_CMD_READ) | WX_MSCC_BUSY;
-	wr32(wx, WX_MSCC, command);
-
-	/* wait to complete */
-	ret = read_poll_timeout(rd32, val, !(val & WX_MSCC_BUSY), 1000,
-				100000, false, wx, WX_MSCC);
-	if (ret) {
-		wx_err(wx, "Mdio read c45 command did not complete.\n");
-		return ret;
-	}
-
-	return (u16)rd32(wx, WX_MSCC);
-}
-
-static int txgbe_phy_write(struct mii_bus *bus, int phy_addr,
-			   int devnum, int regnum, u16 value)
-{
-	struct wx *wx = bus->priv;
-	int ret, command;
-	u16 val;
-
-	/* setup and write the address cycle command */
-	command = WX_MSCA_RA(regnum) |
-		  WX_MSCA_PA(phy_addr) |
-		  WX_MSCA_DA(devnum);
-	wr32(wx, WX_MSCA, command);
-
-	command = value | WX_MSCC_CMD(WX_MSCA_CMD_WRITE) | WX_MSCC_BUSY;
-	wr32(wx, WX_MSCC, command);
-
-	/* wait to complete */
-	ret = read_poll_timeout(rd32, val, !(val & WX_MSCC_BUSY), 1000,
-				100000, false, wx, WX_MSCC);
-	if (ret)
-		wx_err(wx, "Mdio write c45 command did not complete.\n");
-
-	return ret;
-}
-
 static int txgbe_ext_phy_init(struct txgbe *txgbe)
 {
 	struct phy_device *phydev;
@@ -715,8 +663,8 @@ static int txgbe_ext_phy_init(struct txgbe *txgbe)
 		return -ENOMEM;
 
 	mii_bus->name = "txgbe_mii_bus";
-	mii_bus->read_c45 = &txgbe_phy_read;
-	mii_bus->write_c45 = &txgbe_phy_write;
+	mii_bus->read_c45 = &wx_phy_read_reg_mdi_c45;
+	mii_bus->write_c45 = &wx_phy_write_reg_mdi_c45;
 	mii_bus->parent = &pdev->dev;
 	mii_bus->phy_mask = GENMASK(31, 1);
 	mii_bus->priv = wx;
diff --git a/drivers/net/ethernet/wiznet/w5100.c b/drivers/net/ethernet/wiznet/w5100.c
index 634946e87e5f..341ee2f249fd 100644
--- a/drivers/net/ethernet/wiznet/w5100.c
+++ b/drivers/net/ethernet/wiznet/w5100.c
@@ -1062,11 +1062,9 @@ static int w5100_mmio_probe(struct platform_device *pdev)
 			   mac_addr, irq, data ? data->link_gpio : -EINVAL);
 }
 
-static int w5100_mmio_remove(struct platform_device *pdev)
+static void w5100_mmio_remove(struct platform_device *pdev)
 {
 	w5100_remove(&pdev->dev);
-
-	return 0;
 }
 
 void *w5100_ops_priv(const struct net_device *ndev)
@@ -1273,6 +1271,6 @@ static struct platform_driver w5100_mmio_driver = {
 		.pm	= &w5100_pm_ops,
 	},
 	.probe		= w5100_mmio_probe,
-	.remove		= w5100_mmio_remove,
+	.remove_new	= w5100_mmio_remove,
 };
 module_platform_driver(w5100_mmio_driver);
diff --git a/drivers/net/ethernet/wiznet/w5300.c b/drivers/net/ethernet/wiznet/w5300.c
index b0958fe8111e..3318b50a5911 100644
--- a/drivers/net/ethernet/wiznet/w5300.c
+++ b/drivers/net/ethernet/wiznet/w5300.c
@@ -627,7 +627,7 @@ err_register:
 	return err;
 }
 
-static int w5300_remove(struct platform_device *pdev)
+static void w5300_remove(struct platform_device *pdev)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct w5300_priv *priv = netdev_priv(ndev);
@@ -639,7 +639,6 @@ static int w5300_remove(struct platform_device *pdev)
 
 	unregister_netdev(ndev);
 	free_netdev(ndev);
-	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -683,7 +682,7 @@ static struct platform_driver w5300_driver = {
 		.pm	= &w5300_pm_ops,
 	},
 	.probe		= w5300_probe,
-	.remove		= w5300_remove,
+	.remove_new	= w5300_remove,
 };
 
 module_platform_driver(w5300_driver);
diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index 1444b855e7aa..9df39cf8b097 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -1626,7 +1626,7 @@ err_sysfs_create:
 	return rc;
 }
 
-static int temac_remove(struct platform_device *pdev)
+static void temac_remove(struct platform_device *pdev)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct temac_local *lp = netdev_priv(ndev);
@@ -1636,7 +1636,6 @@ static int temac_remove(struct platform_device *pdev)
 	if (lp->phy_node)
 		of_node_put(lp->phy_node);
 	temac_mdio_teardown(lp);
-	return 0;
 }
 
 static const struct of_device_id temac_of_match[] = {
@@ -1650,7 +1649,7 @@ MODULE_DEVICE_TABLE(of, temac_of_match);
 
 static struct platform_driver temac_driver = {
 	.probe = temac_probe,
-	.remove = temac_remove,
+	.remove_new = temac_remove,
 	.driver = {
 		.name = "xilinx_temac",
 		.of_match_table = temac_of_match,
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index b7ec4dafae90..82d0d44b2b02 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -2183,7 +2183,7 @@ free_netdev:
 	return ret;
 }
 
-static int axienet_remove(struct platform_device *pdev)
+static void axienet_remove(struct platform_device *pdev)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct axienet_local *lp = netdev_priv(ndev);
@@ -2202,8 +2202,6 @@ static int axienet_remove(struct platform_device *pdev)
 	clk_disable_unprepare(lp->axi_clk);
 
 	free_netdev(ndev);
-
-	return 0;
 }
 
 static void axienet_shutdown(struct platform_device *pdev)
@@ -2256,7 +2254,7 @@ static DEFINE_SIMPLE_DEV_PM_OPS(axienet_pm_ops,
 
 static struct platform_driver axienet_driver = {
 	.probe = axienet_probe,
-	.remove = axienet_remove,
+	.remove_new = axienet_remove,
 	.shutdown = axienet_shutdown,
 	.driver = {
 		 .name = "xilinx_axienet",
diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
index b358ecc67227..32a502e7318b 100644
--- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c
+++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
@@ -1183,7 +1183,7 @@ error:
  *
  * Return:	0, always.
  */
-static int xemaclite_of_remove(struct platform_device *of_dev)
+static void xemaclite_of_remove(struct platform_device *of_dev)
 {
 	struct net_device *ndev = platform_get_drvdata(of_dev);
 
@@ -1202,8 +1202,6 @@ static int xemaclite_of_remove(struct platform_device *of_dev)
 	lp->phy_node = NULL;
 
 	free_netdev(ndev);
-
-	return 0;
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -1262,7 +1260,7 @@ static struct platform_driver xemaclite_of_driver = {
 		.of_match_table = xemaclite_of_match,
 	},
 	.probe		= xemaclite_of_probe,
-	.remove		= xemaclite_of_remove,
+	.remove_new	= xemaclite_of_remove,
 };
 
 module_platform_driver(xemaclite_of_driver);
diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c
index 3b0c5f177447..b242aa61d8ab 100644
--- a/drivers/net/ethernet/xscale/ixp4xx_eth.c
+++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c
@@ -1533,7 +1533,7 @@ err_free_mem:
 	return err;
 }
 
-static int ixp4xx_eth_remove(struct platform_device *pdev)
+static void ixp4xx_eth_remove(struct platform_device *pdev)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct phy_device *phydev = ndev->phydev;
@@ -1544,7 +1544,6 @@ static int ixp4xx_eth_remove(struct platform_device *pdev)
 	ixp4xx_mdio_remove();
 	npe_port_tab[NPE_ID(port->id)] = NULL;
 	npe_release(port->npe);
-	return 0;
 }
 
 static const struct of_device_id ixp4xx_eth_of_match[] = {
@@ -1560,7 +1559,7 @@ static struct platform_driver ixp4xx_eth_driver = {
 		.of_match_table = of_match_ptr(ixp4xx_eth_of_match),
 	},
 	.probe		= ixp4xx_eth_probe,
-	.remove		= ixp4xx_eth_remove,
+	.remove_new	= ixp4xx_eth_remove,
 };
 module_platform_driver(ixp4xx_eth_driver);
 
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index b22596b18ee8..b1919278e931 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -630,7 +630,7 @@ static void __gtp_encap_destroy(struct sock *sk)
 			gtp->sk0 = NULL;
 		else
 			gtp->sk1u = NULL;
-		udp_sk(sk)->encap_type = 0;
+		WRITE_ONCE(udp_sk(sk)->encap_type, 0);
 		rcu_assign_sk_user_data(sk, NULL);
 		release_sock(sk);
 		sock_put(sk);
@@ -682,7 +682,7 @@ static int gtp_encap_recv(struct sock *sk, struct sk_buff *skb)
 
 	netdev_dbg(gtp->dev, "encap_recv sk=%p\n", sk);
 
-	switch (udp_sk(sk)->encap_type) {
+	switch (READ_ONCE(udp_sk(sk)->encap_type)) {
 	case UDP_ENCAP_GTP0:
 		netdev_dbg(gtp->dev, "received GTP0 packet\n");
 		ret = gtp0_udp_encap_recv(gtp, skb);
diff --git a/drivers/net/mdio/mdio-aspeed.c b/drivers/net/mdio/mdio-aspeed.c
index c727103c8b05..70edeeb7771e 100644
--- a/drivers/net/mdio/mdio-aspeed.c
+++ b/drivers/net/mdio/mdio-aspeed.c
@@ -177,15 +177,13 @@ static int aspeed_mdio_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int aspeed_mdio_remove(struct platform_device *pdev)
+static void aspeed_mdio_remove(struct platform_device *pdev)
 {
 	struct mii_bus *bus = (struct mii_bus *)platform_get_drvdata(pdev);
 	struct aspeed_mdio *ctx = bus->priv;
 
 	reset_control_assert(ctx->reset);
 	mdiobus_unregister(bus);
-
-	return 0;
 }
 
 static const struct of_device_id aspeed_mdio_of_match[] = {
@@ -200,7 +198,7 @@ static struct platform_driver aspeed_mdio_driver = {
 		.of_match_table = aspeed_mdio_of_match,
 	},
 	.probe = aspeed_mdio_probe,
-	.remove = aspeed_mdio_remove,
+	.remove_new = aspeed_mdio_remove,
 };
 
 module_platform_driver(aspeed_mdio_driver);
diff --git a/drivers/net/mdio/mdio-bcm-iproc.c b/drivers/net/mdio/mdio-bcm-iproc.c
index 77fc970cdfde..5a2d26c6afdc 100644
--- a/drivers/net/mdio/mdio-bcm-iproc.c
+++ b/drivers/net/mdio/mdio-bcm-iproc.c
@@ -168,14 +168,12 @@ err_iproc_mdio:
 	return rc;
 }
 
-static int iproc_mdio_remove(struct platform_device *pdev)
+static void iproc_mdio_remove(struct platform_device *pdev)
 {
 	struct iproc_mdio_priv *priv = platform_get_drvdata(pdev);
 
 	mdiobus_unregister(priv->mii_bus);
 	mdiobus_free(priv->mii_bus);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -210,7 +208,7 @@ static struct platform_driver iproc_mdio_driver = {
 #endif
 	},
 	.probe = iproc_mdio_probe,
-	.remove = iproc_mdio_remove,
+	.remove_new = iproc_mdio_remove,
 };
 
 module_platform_driver(iproc_mdio_driver);
diff --git a/drivers/net/mdio/mdio-bcm-unimac.c b/drivers/net/mdio/mdio-bcm-unimac.c
index 6b26a0803696..e8cd8eef319b 100644
--- a/drivers/net/mdio/mdio-bcm-unimac.c
+++ b/drivers/net/mdio/mdio-bcm-unimac.c
@@ -296,15 +296,13 @@ out_clk_disable:
 	return ret;
 }
 
-static int unimac_mdio_remove(struct platform_device *pdev)
+static void unimac_mdio_remove(struct platform_device *pdev)
 {
 	struct unimac_mdio_priv *priv = platform_get_drvdata(pdev);
 
 	mdiobus_unregister(priv->mii_bus);
 	mdiobus_free(priv->mii_bus);
 	clk_disable_unprepare(priv->clk);
-
-	return 0;
 }
 
 static int __maybe_unused unimac_mdio_suspend(struct device *d)
@@ -353,7 +351,7 @@ static struct platform_driver unimac_mdio_driver = {
 		.pm = &unimac_mdio_pm_ops,
 	},
 	.probe	= unimac_mdio_probe,
-	.remove	= unimac_mdio_remove,
+	.remove_new = unimac_mdio_remove,
 };
 module_platform_driver(unimac_mdio_driver);
 
diff --git a/drivers/net/mdio/mdio-gpio.c b/drivers/net/mdio/mdio-gpio.c
index 0fb3c2de0845..897b88c50bbb 100644
--- a/drivers/net/mdio/mdio-gpio.c
+++ b/drivers/net/mdio/mdio-gpio.c
@@ -194,11 +194,9 @@ static int mdio_gpio_probe(struct platform_device *pdev)
 	return ret;
 }
 
-static int mdio_gpio_remove(struct platform_device *pdev)
+static void mdio_gpio_remove(struct platform_device *pdev)
 {
 	mdio_gpio_bus_destroy(&pdev->dev);
-
-	return 0;
 }
 
 static const struct of_device_id mdio_gpio_of_match[] = {
@@ -210,7 +208,7 @@ MODULE_DEVICE_TABLE(of, mdio_gpio_of_match);
 
 static struct platform_driver mdio_gpio_driver = {
 	.probe = mdio_gpio_probe,
-	.remove = mdio_gpio_remove,
+	.remove_new = mdio_gpio_remove,
 	.driver		= {
 		.name	= "mdio-gpio",
 		.of_match_table = mdio_gpio_of_match,
diff --git a/drivers/net/mdio/mdio-hisi-femac.c b/drivers/net/mdio/mdio-hisi-femac.c
index f231c2fbb1de..6703f626ee83 100644
--- a/drivers/net/mdio/mdio-hisi-femac.c
+++ b/drivers/net/mdio/mdio-hisi-femac.c
@@ -118,7 +118,7 @@ err_out_free_mdiobus:
 	return ret;
 }
 
-static int hisi_femac_mdio_remove(struct platform_device *pdev)
+static void hisi_femac_mdio_remove(struct platform_device *pdev)
 {
 	struct mii_bus *bus = platform_get_drvdata(pdev);
 	struct hisi_femac_mdio_data *data = bus->priv;
@@ -126,8 +126,6 @@ static int hisi_femac_mdio_remove(struct platform_device *pdev)
 	mdiobus_unregister(bus);
 	clk_disable_unprepare(data->clk);
 	mdiobus_free(bus);
-
-	return 0;
 }
 
 static const struct of_device_id hisi_femac_mdio_dt_ids[] = {
@@ -138,7 +136,7 @@ MODULE_DEVICE_TABLE(of, hisi_femac_mdio_dt_ids);
 
 static struct platform_driver hisi_femac_mdio_driver = {
 	.probe = hisi_femac_mdio_probe,
-	.remove = hisi_femac_mdio_remove,
+	.remove_new = hisi_femac_mdio_remove,
 	.driver = {
 		.name = "hisi-femac-mdio",
 		.of_match_table = hisi_femac_mdio_dt_ids,
diff --git a/drivers/net/mdio/mdio-ipq4019.c b/drivers/net/mdio/mdio-ipq4019.c
index 78b93de636f5..abd8b508ec16 100644
--- a/drivers/net/mdio/mdio-ipq4019.c
+++ b/drivers/net/mdio/mdio-ipq4019.c
@@ -278,13 +278,11 @@ static int ipq4019_mdio_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int ipq4019_mdio_remove(struct platform_device *pdev)
+static void ipq4019_mdio_remove(struct platform_device *pdev)
 {
 	struct mii_bus *bus = platform_get_drvdata(pdev);
 
 	mdiobus_unregister(bus);
-
-	return 0;
 }
 
 static const struct of_device_id ipq4019_mdio_dt_ids[] = {
@@ -296,7 +294,7 @@ MODULE_DEVICE_TABLE(of, ipq4019_mdio_dt_ids);
 
 static struct platform_driver ipq4019_mdio_driver = {
 	.probe = ipq4019_mdio_probe,
-	.remove = ipq4019_mdio_remove,
+	.remove_new = ipq4019_mdio_remove,
 	.driver = {
 		.name = "ipq4019-mdio",
 		.of_match_table = ipq4019_mdio_dt_ids,
diff --git a/drivers/net/mdio/mdio-ipq8064.c b/drivers/net/mdio/mdio-ipq8064.c
index fd9716960106..f71b6e1c66e4 100644
--- a/drivers/net/mdio/mdio-ipq8064.c
+++ b/drivers/net/mdio/mdio-ipq8064.c
@@ -147,14 +147,11 @@ ipq8064_mdio_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int
-ipq8064_mdio_remove(struct platform_device *pdev)
+static void ipq8064_mdio_remove(struct platform_device *pdev)
 {
 	struct mii_bus *bus = platform_get_drvdata(pdev);
 
 	mdiobus_unregister(bus);
-
-	return 0;
 }
 
 static const struct of_device_id ipq8064_mdio_dt_ids[] = {
@@ -165,7 +162,7 @@ MODULE_DEVICE_TABLE(of, ipq8064_mdio_dt_ids);
 
 static struct platform_driver ipq8064_mdio_driver = {
 	.probe = ipq8064_mdio_probe,
-	.remove = ipq8064_mdio_remove,
+	.remove_new = ipq8064_mdio_remove,
 	.driver = {
 		.name = "ipq8064-mdio",
 		.of_match_table = ipq8064_mdio_dt_ids,
diff --git a/drivers/net/mdio/mdio-moxart.c b/drivers/net/mdio/mdio-moxart.c
index f0cff584e176..d35af8cd7c4d 100644
--- a/drivers/net/mdio/mdio-moxart.c
+++ b/drivers/net/mdio/mdio-moxart.c
@@ -155,14 +155,12 @@ err_out_free_mdiobus:
 	return ret;
 }
 
-static int moxart_mdio_remove(struct platform_device *pdev)
+static void moxart_mdio_remove(struct platform_device *pdev)
 {
 	struct mii_bus *bus = platform_get_drvdata(pdev);
 
 	mdiobus_unregister(bus);
 	mdiobus_free(bus);
-
-	return 0;
 }
 
 static const struct of_device_id moxart_mdio_dt_ids[] = {
@@ -173,7 +171,7 @@ MODULE_DEVICE_TABLE(of, moxart_mdio_dt_ids);
 
 static struct platform_driver moxart_mdio_driver = {
 	.probe = moxart_mdio_probe,
-	.remove = moxart_mdio_remove,
+	.remove_new = moxart_mdio_remove,
 	.driver = {
 		.name = "moxart-mdio",
 		.of_match_table = moxart_mdio_dt_ids,
diff --git a/drivers/net/mdio/mdio-mscc-miim.c b/drivers/net/mdio/mdio-mscc-miim.c
index 1a1b95ae95fa..c29377c85307 100644
--- a/drivers/net/mdio/mdio-mscc-miim.c
+++ b/drivers/net/mdio/mdio-mscc-miim.c
@@ -335,15 +335,13 @@ out_disable_clk:
 	return ret;
 }
 
-static int mscc_miim_remove(struct platform_device *pdev)
+static void mscc_miim_remove(struct platform_device *pdev)
 {
 	struct mii_bus *bus = platform_get_drvdata(pdev);
 	struct mscc_miim_dev *miim = bus->priv;
 
 	clk_disable_unprepare(miim->clk);
 	mdiobus_unregister(bus);
-
-	return 0;
 }
 
 static const struct mscc_miim_info mscc_ocelot_miim_info = {
@@ -371,7 +369,7 @@ MODULE_DEVICE_TABLE(of, mscc_miim_match);
 
 static struct platform_driver mscc_miim_driver = {
 	.probe = mscc_miim_probe,
-	.remove = mscc_miim_remove,
+	.remove_new = mscc_miim_remove,
 	.driver = {
 		.name = "mscc-miim",
 		.of_match_table = mscc_miim_match,
diff --git a/drivers/net/mdio/mdio-mux-bcm-iproc.c b/drivers/net/mdio/mdio-mux-bcm-iproc.c
index 956d54846b62..a750bd4c77a0 100644
--- a/drivers/net/mdio/mdio-mux-bcm-iproc.c
+++ b/drivers/net/mdio/mdio-mux-bcm-iproc.c
@@ -287,15 +287,13 @@ out_clk:
 	return rc;
 }
 
-static int mdio_mux_iproc_remove(struct platform_device *pdev)
+static void mdio_mux_iproc_remove(struct platform_device *pdev)
 {
 	struct iproc_mdiomux_desc *md = platform_get_drvdata(pdev);
 
 	mdio_mux_uninit(md->mux_handle);
 	mdiobus_unregister(md->mii_bus);
 	clk_disable_unprepare(md->core_clk);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -342,7 +340,7 @@ static struct platform_driver mdiomux_iproc_driver = {
 		.pm		= &mdio_mux_iproc_pm_ops,
 	},
 	.probe		= mdio_mux_iproc_probe,
-	.remove		= mdio_mux_iproc_remove,
+	.remove_new	= mdio_mux_iproc_remove,
 };
 
 module_platform_driver(mdiomux_iproc_driver);
diff --git a/drivers/net/mdio/mdio-mux-bcm6368.c b/drivers/net/mdio/mdio-mux-bcm6368.c
index 8b444a8eb6b5..1b77e0e3e6e1 100644
--- a/drivers/net/mdio/mdio-mux-bcm6368.c
+++ b/drivers/net/mdio/mdio-mux-bcm6368.c
@@ -153,14 +153,12 @@ out_register:
 	return rc;
 }
 
-static int bcm6368_mdiomux_remove(struct platform_device *pdev)
+static void bcm6368_mdiomux_remove(struct platform_device *pdev)
 {
 	struct bcm6368_mdiomux_desc *md = platform_get_drvdata(pdev);
 
 	mdio_mux_uninit(md->mux_handle);
 	mdiobus_unregister(md->mii_bus);
-
-	return 0;
 }
 
 static const struct of_device_id bcm6368_mdiomux_ids[] = {
@@ -175,7 +173,7 @@ static struct platform_driver bcm6368_mdiomux_driver = {
 		.of_match_table = bcm6368_mdiomux_ids,
 	},
 	.probe	= bcm6368_mdiomux_probe,
-	.remove	= bcm6368_mdiomux_remove,
+	.remove_new = bcm6368_mdiomux_remove,
 };
 module_platform_driver(bcm6368_mdiomux_driver);
 
diff --git a/drivers/net/mdio/mdio-mux-gpio.c b/drivers/net/mdio/mdio-mux-gpio.c
index 3c7f16f06b45..38fb031f8979 100644
--- a/drivers/net/mdio/mdio-mux-gpio.c
+++ b/drivers/net/mdio/mdio-mux-gpio.c
@@ -62,11 +62,10 @@ static int mdio_mux_gpio_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int mdio_mux_gpio_remove(struct platform_device *pdev)
+static void mdio_mux_gpio_remove(struct platform_device *pdev)
 {
 	struct mdio_mux_gpio_state *s = dev_get_platdata(&pdev->dev);
 	mdio_mux_uninit(s->mux_handle);
-	return 0;
 }
 
 static const struct of_device_id mdio_mux_gpio_match[] = {
@@ -87,7 +86,7 @@ static struct platform_driver mdio_mux_gpio_driver = {
 		.of_match_table = mdio_mux_gpio_match,
 	},
 	.probe		= mdio_mux_gpio_probe,
-	.remove		= mdio_mux_gpio_remove,
+	.remove_new	= mdio_mux_gpio_remove,
 };
 
 module_platform_driver(mdio_mux_gpio_driver);
diff --git a/drivers/net/mdio/mdio-mux-meson-g12a.c b/drivers/net/mdio/mdio-mux-meson-g12a.c
index 910e5cf74e89..754b0f2cf15b 100644
--- a/drivers/net/mdio/mdio-mux-meson-g12a.c
+++ b/drivers/net/mdio/mdio-mux-meson-g12a.c
@@ -336,7 +336,7 @@ static int g12a_mdio_mux_probe(struct platform_device *pdev)
 	return ret;
 }
 
-static int g12a_mdio_mux_remove(struct platform_device *pdev)
+static void g12a_mdio_mux_remove(struct platform_device *pdev)
 {
 	struct g12a_mdio_mux *priv = platform_get_drvdata(pdev);
 
@@ -344,13 +344,11 @@ static int g12a_mdio_mux_remove(struct platform_device *pdev)
 
 	if (__clk_is_enabled(priv->pll))
 		clk_disable_unprepare(priv->pll);
-
-	return 0;
 }
 
 static struct platform_driver g12a_mdio_mux_driver = {
 	.probe		= g12a_mdio_mux_probe,
-	.remove		= g12a_mdio_mux_remove,
+	.remove_new	= g12a_mdio_mux_remove,
 	.driver		= {
 		.name	= "g12a-mdio_mux",
 		.of_match_table = g12a_mdio_mux_match,
diff --git a/drivers/net/mdio/mdio-mux-meson-gxl.c b/drivers/net/mdio/mdio-mux-meson-gxl.c
index 76188575ca1f..89554021b5cc 100644
--- a/drivers/net/mdio/mdio-mux-meson-gxl.c
+++ b/drivers/net/mdio/mdio-mux-meson-gxl.c
@@ -140,18 +140,16 @@ static int gxl_mdio_mux_probe(struct platform_device *pdev)
 	return ret;
 }
 
-static int gxl_mdio_mux_remove(struct platform_device *pdev)
+static void gxl_mdio_mux_remove(struct platform_device *pdev)
 {
 	struct gxl_mdio_mux *priv = platform_get_drvdata(pdev);
 
 	mdio_mux_uninit(priv->mux_handle);
-
-	return 0;
 }
 
 static struct platform_driver gxl_mdio_mux_driver = {
 	.probe		= gxl_mdio_mux_probe,
-	.remove		= gxl_mdio_mux_remove,
+	.remove_new	= gxl_mdio_mux_remove,
 	.driver		= {
 		.name	= "gxl-mdio-mux",
 		.of_match_table = gxl_mdio_mux_match,
diff --git a/drivers/net/mdio/mdio-mux-mmioreg.c b/drivers/net/mdio/mdio-mux-mmioreg.c
index 09af150ed774..de08419d0c98 100644
--- a/drivers/net/mdio/mdio-mux-mmioreg.c
+++ b/drivers/net/mdio/mdio-mux-mmioreg.c
@@ -169,13 +169,11 @@ static int mdio_mux_mmioreg_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int mdio_mux_mmioreg_remove(struct platform_device *pdev)
+static void mdio_mux_mmioreg_remove(struct platform_device *pdev)
 {
 	struct mdio_mux_mmioreg_state *s = dev_get_platdata(&pdev->dev);
 
 	mdio_mux_uninit(s->mux_handle);
-
-	return 0;
 }
 
 static const struct of_device_id mdio_mux_mmioreg_match[] = {
@@ -192,7 +190,7 @@ static struct platform_driver mdio_mux_mmioreg_driver = {
 		.of_match_table = mdio_mux_mmioreg_match,
 	},
 	.probe		= mdio_mux_mmioreg_probe,
-	.remove		= mdio_mux_mmioreg_remove,
+	.remove_new	= mdio_mux_mmioreg_remove,
 };
 
 module_platform_driver(mdio_mux_mmioreg_driver);
diff --git a/drivers/net/mdio/mdio-mux-multiplexer.c b/drivers/net/mdio/mdio-mux-multiplexer.c
index bfa5af577b0a..569b13383191 100644
--- a/drivers/net/mdio/mdio-mux-multiplexer.c
+++ b/drivers/net/mdio/mdio-mux-multiplexer.c
@@ -85,7 +85,7 @@ static int mdio_mux_multiplexer_probe(struct platform_device *pdev)
 	return ret;
 }
 
-static int mdio_mux_multiplexer_remove(struct platform_device *pdev)
+static void mdio_mux_multiplexer_remove(struct platform_device *pdev)
 {
 	struct mdio_mux_multiplexer_state *s = platform_get_drvdata(pdev);
 
@@ -93,8 +93,6 @@ static int mdio_mux_multiplexer_remove(struct platform_device *pdev)
 
 	if (s->do_deselect)
 		mux_control_deselect(s->muxc);
-
-	return 0;
 }
 
 static const struct of_device_id mdio_mux_multiplexer_match[] = {
@@ -109,7 +107,7 @@ static struct platform_driver mdio_mux_multiplexer_driver = {
 		.of_match_table	= mdio_mux_multiplexer_match,
 	},
 	.probe		= mdio_mux_multiplexer_probe,
-	.remove		= mdio_mux_multiplexer_remove,
+	.remove_new	= mdio_mux_multiplexer_remove,
 };
 
 module_platform_driver(mdio_mux_multiplexer_driver);
diff --git a/drivers/net/mdio/mdio-octeon.c b/drivers/net/mdio/mdio-octeon.c
index 7c65c547d377..037a38cfed56 100644
--- a/drivers/net/mdio/mdio-octeon.c
+++ b/drivers/net/mdio/mdio-octeon.c
@@ -78,7 +78,7 @@ fail_register:
 	return err;
 }
 
-static int octeon_mdiobus_remove(struct platform_device *pdev)
+static void octeon_mdiobus_remove(struct platform_device *pdev)
 {
 	struct cavium_mdiobus *bus;
 	union cvmx_smix_en smi_en;
@@ -88,7 +88,6 @@ static int octeon_mdiobus_remove(struct platform_device *pdev)
 	mdiobus_unregister(bus->mii_bus);
 	smi_en.u64 = 0;
 	oct_mdio_writeq(smi_en.u64, bus->register_base + SMI_EN);
-	return 0;
 }
 
 static const struct of_device_id octeon_mdiobus_match[] = {
@@ -105,7 +104,7 @@ static struct platform_driver octeon_mdiobus_driver = {
 		.of_match_table = octeon_mdiobus_match,
 	},
 	.probe		= octeon_mdiobus_probe,
-	.remove		= octeon_mdiobus_remove,
+	.remove_new	= octeon_mdiobus_remove,
 };
 
 module_platform_driver(octeon_mdiobus_driver);
diff --git a/drivers/net/mdio/mdio-sun4i.c b/drivers/net/mdio/mdio-sun4i.c
index f798de3276dc..4511bcc73b36 100644
--- a/drivers/net/mdio/mdio-sun4i.c
+++ b/drivers/net/mdio/mdio-sun4i.c
@@ -142,7 +142,7 @@ err_out_free_mdiobus:
 	return ret;
 }
 
-static int sun4i_mdio_remove(struct platform_device *pdev)
+static void sun4i_mdio_remove(struct platform_device *pdev)
 {
 	struct mii_bus *bus = platform_get_drvdata(pdev);
 	struct sun4i_mdio_data *data = bus->priv;
@@ -151,8 +151,6 @@ static int sun4i_mdio_remove(struct platform_device *pdev)
 	if (data->regulator)
 		regulator_disable(data->regulator);
 	mdiobus_free(bus);
-
-	return 0;
 }
 
 static const struct of_device_id sun4i_mdio_dt_ids[] = {
@@ -166,7 +164,7 @@ MODULE_DEVICE_TABLE(of, sun4i_mdio_dt_ids);
 
 static struct platform_driver sun4i_mdio_driver = {
 	.probe = sun4i_mdio_probe,
-	.remove = sun4i_mdio_remove,
+	.remove_new = sun4i_mdio_remove,
 	.driver = {
 		.name = "sun4i-mdio",
 		.of_match_table = sun4i_mdio_dt_ids,
diff --git a/drivers/net/mdio/mdio-xgene.c b/drivers/net/mdio/mdio-xgene.c
index 1190a793555a..7909d7caf45c 100644
--- a/drivers/net/mdio/mdio-xgene.c
+++ b/drivers/net/mdio/mdio-xgene.c
@@ -432,7 +432,7 @@ out_clk:
 	return ret;
 }
 
-static int xgene_mdio_remove(struct platform_device *pdev)
+static void xgene_mdio_remove(struct platform_device *pdev)
 {
 	struct xgene_mdio_pdata *pdata = platform_get_drvdata(pdev);
 	struct mii_bus *mdio_bus = pdata->mdio_bus;
@@ -443,8 +443,6 @@ static int xgene_mdio_remove(struct platform_device *pdev)
 
 	if (dev->of_node)
 		clk_disable_unprepare(pdata->clk);
-
-	return 0;
 }
 
 static struct platform_driver xgene_mdio_driver = {
@@ -454,7 +452,7 @@ static struct platform_driver xgene_mdio_driver = {
 		.acpi_match_table = ACPI_PTR(xgene_mdio_acpi_match),
 	},
 	.probe = xgene_mdio_probe,
-	.remove = xgene_mdio_remove,
+	.remove_new = xgene_mdio_remove,
 };
 
 module_platform_driver(xgene_mdio_driver);
diff --git a/drivers/net/phy/ax88796b.c b/drivers/net/phy/ax88796b.c
index 0f1e617a26c9..eb74a8cf8df1 100644
--- a/drivers/net/phy/ax88796b.c
+++ b/drivers/net/phy/ax88796b.c
@@ -90,7 +90,7 @@ static void asix_ax88772a_link_change_notify(struct phy_device *phydev)
 	 */
 	if (phydev->state == PHY_NOLINK) {
 		phy_init_hw(phydev);
-		phy_start_aneg(phydev);
+		_phy_start_aneg(phydev);
 	}
 }
 
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index df54c137c5f5..a5fa077650e8 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -981,7 +981,7 @@ static int phy_check_link_status(struct phy_device *phydev)
  *   If the PHYCONTROL Layer is operating, we change the state to
  *   reflect the beginning of Auto-negotiation or forcing.
  */
-static int _phy_start_aneg(struct phy_device *phydev)
+int _phy_start_aneg(struct phy_device *phydev)
 {
 	int err;
 
@@ -1002,6 +1002,7 @@ static int _phy_start_aneg(struct phy_device *phydev)
 
 	return err;
 }
+EXPORT_SYMBOL(_phy_start_aneg);
 
 /**
  * phy_start_aneg - start auto-negotiation for this PHY device
@@ -1231,9 +1232,7 @@ static void phy_error_precise(struct phy_device *phydev,
 			      const void *func, int err)
 {
 	WARN(1, "%pS: returned: %d\n", func, err);
-	mutex_lock(&phydev->lock);
 	phy_process_error(phydev);
-	mutex_unlock(&phydev->lock);
 }
 
 /**
@@ -1355,6 +1354,113 @@ void phy_free_interrupt(struct phy_device *phydev)
 }
 EXPORT_SYMBOL(phy_free_interrupt);
 
+enum phy_state_work {
+	PHY_STATE_WORK_NONE,
+	PHY_STATE_WORK_ANEG,
+	PHY_STATE_WORK_SUSPEND,
+};
+
+static enum phy_state_work _phy_state_machine(struct phy_device *phydev)
+{
+	enum phy_state_work state_work = PHY_STATE_WORK_NONE;
+	struct net_device *dev = phydev->attached_dev;
+	enum phy_state old_state = phydev->state;
+	const void *func = NULL;
+	bool finished = false;
+	int err = 0;
+
+	switch (phydev->state) {
+	case PHY_DOWN:
+	case PHY_READY:
+		break;
+	case PHY_UP:
+		state_work = PHY_STATE_WORK_ANEG;
+		break;
+	case PHY_NOLINK:
+	case PHY_RUNNING:
+		err = phy_check_link_status(phydev);
+		func = &phy_check_link_status;
+		break;
+	case PHY_CABLETEST:
+		err = phydev->drv->cable_test_get_status(phydev, &finished);
+		if (err) {
+			phy_abort_cable_test(phydev);
+			netif_testing_off(dev);
+			state_work = PHY_STATE_WORK_ANEG;
+			phydev->state = PHY_UP;
+			break;
+		}
+
+		if (finished) {
+			ethnl_cable_test_finished(phydev);
+			netif_testing_off(dev);
+			state_work = PHY_STATE_WORK_ANEG;
+			phydev->state = PHY_UP;
+		}
+		break;
+	case PHY_HALTED:
+	case PHY_ERROR:
+		if (phydev->link) {
+			phydev->link = 0;
+			phy_link_down(phydev);
+		}
+		state_work = PHY_STATE_WORK_SUSPEND;
+		break;
+	}
+
+	if (state_work == PHY_STATE_WORK_ANEG) {
+		err = _phy_start_aneg(phydev);
+		func = &_phy_start_aneg;
+	}
+
+	if (err == -ENODEV)
+		return state_work;
+
+	if (err < 0)
+		phy_error_precise(phydev, func, err);
+
+	phy_process_state_change(phydev, old_state);
+
+	/* Only re-schedule a PHY state machine change if we are polling the
+	 * PHY, if PHY_MAC_INTERRUPT is set, then we will be moving
+	 * between states from phy_mac_interrupt().
+	 *
+	 * In state PHY_HALTED the PHY gets suspended, so rescheduling the
+	 * state machine would be pointless and possibly error prone when
+	 * called from phy_disconnect() synchronously.
+	 */
+	if (phy_polling_mode(phydev) && phy_is_started(phydev))
+		phy_queue_state_machine(phydev, PHY_STATE_TIME);
+
+	return state_work;
+}
+
+/* unlocked part of the PHY state machine */
+static void _phy_state_machine_post_work(struct phy_device *phydev,
+					 enum phy_state_work state_work)
+{
+	if (state_work == PHY_STATE_WORK_SUSPEND)
+		phy_suspend(phydev);
+}
+
+/**
+ * phy_state_machine - Handle the state machine
+ * @work: work_struct that describes the work to be done
+ */
+void phy_state_machine(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct phy_device *phydev =
+			container_of(dwork, struct phy_device, state_queue);
+	enum phy_state_work state_work;
+
+	mutex_lock(&phydev->lock);
+	state_work = _phy_state_machine(phydev);
+	mutex_unlock(&phydev->lock);
+
+	_phy_state_machine_post_work(phydev, state_work);
+}
+
 /**
  * phy_stop - Bring down the PHY link, and stop checking the status
  * @phydev: target phy_device struct
@@ -1362,6 +1468,7 @@ EXPORT_SYMBOL(phy_free_interrupt);
 void phy_stop(struct phy_device *phydev)
 {
 	struct net_device *dev = phydev->attached_dev;
+	enum phy_state_work state_work;
 	enum phy_state old_state;
 
 	if (!phy_is_started(phydev) && phydev->state != PHY_DOWN &&
@@ -1385,9 +1492,10 @@ void phy_stop(struct phy_device *phydev)
 	phydev->state = PHY_HALTED;
 	phy_process_state_change(phydev, old_state);
 
+	state_work = _phy_state_machine(phydev);
 	mutex_unlock(&phydev->lock);
 
-	phy_state_machine(&phydev->state_queue.work);
+	_phy_state_machine_post_work(phydev, state_work);
 	phy_stop_machine(phydev);
 
 	/* Cannot call flush_scheduled_work() here as desired because
@@ -1432,97 +1540,6 @@ out:
 EXPORT_SYMBOL(phy_start);
 
 /**
- * phy_state_machine - Handle the state machine
- * @work: work_struct that describes the work to be done
- */
-void phy_state_machine(struct work_struct *work)
-{
-	struct delayed_work *dwork = to_delayed_work(work);
-	struct phy_device *phydev =
-			container_of(dwork, struct phy_device, state_queue);
-	struct net_device *dev = phydev->attached_dev;
-	bool needs_aneg = false, do_suspend = false;
-	enum phy_state old_state;
-	const void *func = NULL;
-	bool finished = false;
-	int err = 0;
-
-	mutex_lock(&phydev->lock);
-
-	old_state = phydev->state;
-
-	switch (phydev->state) {
-	case PHY_DOWN:
-	case PHY_READY:
-		break;
-	case PHY_UP:
-		needs_aneg = true;
-
-		break;
-	case PHY_NOLINK:
-	case PHY_RUNNING:
-		err = phy_check_link_status(phydev);
-		func = &phy_check_link_status;
-		break;
-	case PHY_CABLETEST:
-		err = phydev->drv->cable_test_get_status(phydev, &finished);
-		if (err) {
-			phy_abort_cable_test(phydev);
-			netif_testing_off(dev);
-			needs_aneg = true;
-			phydev->state = PHY_UP;
-			break;
-		}
-
-		if (finished) {
-			ethnl_cable_test_finished(phydev);
-			netif_testing_off(dev);
-			needs_aneg = true;
-			phydev->state = PHY_UP;
-		}
-		break;
-	case PHY_HALTED:
-	case PHY_ERROR:
-		if (phydev->link) {
-			phydev->link = 0;
-			phy_link_down(phydev);
-		}
-		do_suspend = true;
-		break;
-	}
-
-	mutex_unlock(&phydev->lock);
-
-	if (needs_aneg) {
-		err = phy_start_aneg(phydev);
-		func = &phy_start_aneg;
-	} else if (do_suspend) {
-		phy_suspend(phydev);
-	}
-
-	if (err == -ENODEV)
-		return;
-
-	if (err < 0)
-		phy_error_precise(phydev, func, err);
-
-	phy_process_state_change(phydev, old_state);
-
-	/* Only re-schedule a PHY state machine change if we are polling the
-	 * PHY, if PHY_MAC_INTERRUPT is set, then we will be moving
-	 * between states from phy_mac_interrupt().
-	 *
-	 * In state PHY_HALTED the PHY gets suspended, so rescheduling the
-	 * state machine would be pointless and possibly error prone when
-	 * called from phy_disconnect() synchronously.
-	 */
-	mutex_lock(&phydev->lock);
-	if (phy_polling_mode(phydev) && phy_is_started(phydev))
-		phy_queue_state_machine(phydev, PHY_STATE_TIME);
-	mutex_unlock(&phydev->lock);
-}
-
-/**
  * phy_mac_interrupt - MAC says the link has changed
  * @phydev: phy_device struct with changed link
  *
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
index fc7ace638ce8..e7d8e03f826f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
@@ -591,7 +591,7 @@ static void mt7915_mmio_wed_release_rx_buf(struct mtk_wed_device *wed)
 
 static u32 mt7915_mmio_wed_init_rx_buf(struct mtk_wed_device *wed, int size)
 {
-	struct mtk_rxbm_desc *desc = wed->rx_buf_ring.desc;
+	struct mtk_wed_bm_desc *desc = wed->rx_buf_ring.desc;
 	struct mt76_txwi_cache *t = NULL;
 	struct mt7915_dev *dev;
 	struct mt76_queue *q;
diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c
index 284ab1f56391..87df60916960 100644
--- a/drivers/net/wwan/wwan_core.c
+++ b/drivers/net/wwan/wwan_core.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2021, Linaro Ltd <loic.poulain@linaro.org> */
 
+#include <linux/bitmap.h>
 #include <linux/err.h>
 #include <linux/errno.h>
 #include <linux/debugfs.h>
@@ -395,7 +396,7 @@ static int __wwan_port_dev_assign_name(struct wwan_port *port, const char *fmt)
 	char buf[0x20];
 	int id;
 
-	idmap = (unsigned long *)get_zeroed_page(GFP_KERNEL);
+	idmap = bitmap_zalloc(max_ports, GFP_KERNEL);
 	if (!idmap)
 		return -ENOMEM;
 
@@ -414,7 +415,7 @@ static int __wwan_port_dev_assign_name(struct wwan_port *port, const char *fmt)
 
 	/* Allocate unique id */
 	id = find_first_zero_bit(idmap, max_ports);
-	free_page((unsigned long)idmap);
+	bitmap_free(idmap);
 
 	snprintf(buf, sizeof(buf), fmt, id);	/* Name generation */
 
diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig
index ed9d97a032f1..5dd5f188e14f 100644
--- a/drivers/ptp/Kconfig
+++ b/drivers/ptp/Kconfig
@@ -188,6 +188,7 @@ config PTP_1588_CLOCK_OCP
 	depends on COMMON_CLK
 	select NET_DEVLINK
 	select CRC16
+	select DPLL
 	help
 	  This driver adds support for an OpenCompute time card.
 
diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c
index a7a6947ab4bc..41eaffcae462 100644
--- a/drivers/ptp/ptp_ocp.c
+++ b/drivers/ptp/ptp_ocp.c
@@ -23,6 +23,7 @@
 #include <linux/mtd/mtd.h>
 #include <linux/nvmem-consumer.h>
 #include <linux/crc16.h>
+#include <linux/dpll.h>
 
 #define PCI_VENDOR_ID_FACEBOOK			0x1d9b
 #define PCI_DEVICE_ID_FACEBOOK_TIMECARD		0x0400
@@ -260,12 +261,21 @@ enum ptp_ocp_sma_mode {
 	SMA_MODE_OUT,
 };
 
+static struct dpll_pin_frequency ptp_ocp_sma_freq[] = {
+	DPLL_PIN_FREQUENCY_1PPS,
+	DPLL_PIN_FREQUENCY_10MHZ,
+	DPLL_PIN_FREQUENCY_IRIG_B,
+	DPLL_PIN_FREQUENCY_DCF77,
+};
+
 struct ptp_ocp_sma_connector {
 	enum	ptp_ocp_sma_mode mode;
 	bool	fixed_fcn;
 	bool	fixed_dir;
 	bool	disabled;
 	u8	default_fcn;
+	struct dpll_pin		   *dpll_pin;
+	struct dpll_pin_properties dpll_prop;
 };
 
 struct ocp_attr_group {
@@ -294,6 +304,7 @@ struct ptp_ocp_serial_port {
 
 #define OCP_BOARD_ID_LEN		13
 #define OCP_SERIAL_LEN			6
+#define OCP_SMA_NUM			4
 
 struct ptp_ocp {
 	struct pci_dev		*pdev;
@@ -331,7 +342,9 @@ struct ptp_ocp {
 	const struct attribute_group **attr_group;
 	const struct ptp_ocp_eeprom_map *eeprom_map;
 	struct dentry		*debug_root;
+	bool			sync;
 	time64_t		gnss_lost;
+	struct delayed_work	sync_work;
 	int			id;
 	int			n_irqs;
 	struct ptp_ocp_serial_port	gnss_port;
@@ -350,8 +363,9 @@ struct ptp_ocp {
 	u32			ts_window_adjust;
 	u64			fw_cap;
 	struct ptp_ocp_signal	signal[4];
-	struct ptp_ocp_sma_connector sma[4];
+	struct ptp_ocp_sma_connector sma[OCP_SMA_NUM];
 	const struct ocp_sma_op *sma_op;
+	struct dpll_device *dpll;
 };
 
 #define OCP_REQ_TIMESTAMP	BIT(0)
@@ -835,6 +849,7 @@ static DEFINE_IDR(ptp_ocp_idr);
 struct ocp_selector {
 	const char *name;
 	int value;
+	u64 frequency;
 };
 
 static const struct ocp_selector ptp_ocp_clock[] = {
@@ -855,31 +870,31 @@ static const struct ocp_selector ptp_ocp_clock[] = {
 #define SMA_SELECT_MASK		GENMASK(14, 0)
 
 static const struct ocp_selector ptp_ocp_sma_in[] = {
-	{ .name = "10Mhz",	.value = 0x0000 },
-	{ .name = "PPS1",	.value = 0x0001 },
-	{ .name = "PPS2",	.value = 0x0002 },
-	{ .name = "TS1",	.value = 0x0004 },
-	{ .name = "TS2",	.value = 0x0008 },
-	{ .name = "IRIG",	.value = 0x0010 },
-	{ .name = "DCF",	.value = 0x0020 },
-	{ .name = "TS3",	.value = 0x0040 },
-	{ .name = "TS4",	.value = 0x0080 },
-	{ .name = "FREQ1",	.value = 0x0100 },
-	{ .name = "FREQ2",	.value = 0x0200 },
-	{ .name = "FREQ3",	.value = 0x0400 },
-	{ .name = "FREQ4",	.value = 0x0800 },
-	{ .name = "None",	.value = SMA_DISABLE },
+	{ .name = "10Mhz",  .value = 0x0000,      .frequency = 10000000 },
+	{ .name = "PPS1",   .value = 0x0001,      .frequency = 1 },
+	{ .name = "PPS2",   .value = 0x0002,      .frequency = 1 },
+	{ .name = "TS1",    .value = 0x0004,      .frequency = 0 },
+	{ .name = "TS2",    .value = 0x0008,      .frequency = 0 },
+	{ .name = "IRIG",   .value = 0x0010,      .frequency = 10000 },
+	{ .name = "DCF",    .value = 0x0020,      .frequency = 77500 },
+	{ .name = "TS3",    .value = 0x0040,      .frequency = 0 },
+	{ .name = "TS4",    .value = 0x0080,      .frequency = 0 },
+	{ .name = "FREQ1",  .value = 0x0100,      .frequency = 0 },
+	{ .name = "FREQ2",  .value = 0x0200,      .frequency = 0 },
+	{ .name = "FREQ3",  .value = 0x0400,      .frequency = 0 },
+	{ .name = "FREQ4",  .value = 0x0800,      .frequency = 0 },
+	{ .name = "None",   .value = SMA_DISABLE, .frequency = 0 },
 	{ }
 };
 
 static const struct ocp_selector ptp_ocp_sma_out[] = {
-	{ .name = "10Mhz",	.value = 0x0000 },
-	{ .name = "PHC",	.value = 0x0001 },
-	{ .name = "MAC",	.value = 0x0002 },
-	{ .name = "GNSS1",	.value = 0x0004 },
-	{ .name = "GNSS2",	.value = 0x0008 },
-	{ .name = "IRIG",	.value = 0x0010 },
-	{ .name = "DCF",	.value = 0x0020 },
+	{ .name = "10Mhz",	.value = 0x0000,  .frequency = 10000000 },
+	{ .name = "PHC",	.value = 0x0001,  .frequency = 1 },
+	{ .name = "MAC",	.value = 0x0002,  .frequency = 1 },
+	{ .name = "GNSS1",	.value = 0x0004,  .frequency = 1 },
+	{ .name = "GNSS2",	.value = 0x0008,  .frequency = 1 },
+	{ .name = "IRIG",	.value = 0x0010,  .frequency = 10000 },
+	{ .name = "DCF",	.value = 0x0020,  .frequency = 77000 },
 	{ .name = "GEN1",	.value = 0x0040 },
 	{ .name = "GEN2",	.value = 0x0080 },
 	{ .name = "GEN3",	.value = 0x0100 },
@@ -890,15 +905,15 @@ static const struct ocp_selector ptp_ocp_sma_out[] = {
 };
 
 static const struct ocp_selector ptp_ocp_art_sma_in[] = {
-	{ .name = "PPS1",	.value = 0x0001 },
-	{ .name = "10Mhz",	.value = 0x0008 },
+	{ .name = "PPS1",	.value = 0x0001,  .frequency = 1 },
+	{ .name = "10Mhz",	.value = 0x0008,  .frequency = 1000000 },
 	{ }
 };
 
 static const struct ocp_selector ptp_ocp_art_sma_out[] = {
-	{ .name = "PHC",	.value = 0x0002 },
-	{ .name = "GNSS",	.value = 0x0004 },
-	{ .name = "10Mhz",	.value = 0x0010 },
+	{ .name = "PHC",	.value = 0x0002,  .frequency = 1 },
+	{ .name = "GNSS",	.value = 0x0004,  .frequency = 1 },
+	{ .name = "10Mhz",	.value = 0x0010,  .frequency = 10000000 },
 	{ }
 };
 
@@ -1351,7 +1366,6 @@ static int
 ptp_ocp_init_clock(struct ptp_ocp *bp)
 {
 	struct timespec64 ts;
-	bool sync;
 	u32 ctrl;
 
 	ctrl = OCP_CTRL_ENABLE;
@@ -1375,8 +1389,8 @@ ptp_ocp_init_clock(struct ptp_ocp *bp)
 
 	ptp_ocp_estimate_pci_timing(bp);
 
-	sync = ioread32(&bp->reg->status) & OCP_STATUS_IN_SYNC;
-	if (!sync) {
+	bp->sync = ioread32(&bp->reg->status) & OCP_STATUS_IN_SYNC;
+	if (!bp->sync) {
 		ktime_get_clocktai_ts64(&ts);
 		ptp_ocp_settime(&bp->ptp_info, &ts);
 	}
@@ -2289,22 +2303,35 @@ ptp_ocp_sma_fb_set_inputs(struct ptp_ocp *bp, int sma_nr, u32 val)
 static void
 ptp_ocp_sma_fb_init(struct ptp_ocp *bp)
 {
+	struct dpll_pin_properties prop = {
+		.board_label = NULL,
+		.type = DPLL_PIN_TYPE_EXT,
+		.capabilities = DPLL_PIN_CAPABILITIES_DIRECTION_CAN_CHANGE,
+		.freq_supported_num = ARRAY_SIZE(ptp_ocp_sma_freq),
+		.freq_supported = ptp_ocp_sma_freq,
+
+	};
 	u32 reg;
 	int i;
 
 	/* defaults */
+	for (i = 0; i < OCP_SMA_NUM; i++) {
+		bp->sma[i].default_fcn = i & 1;
+		bp->sma[i].dpll_prop = prop;
+		bp->sma[i].dpll_prop.board_label =
+			bp->ptp_info.pin_config[i].name;
+	}
 	bp->sma[0].mode = SMA_MODE_IN;
 	bp->sma[1].mode = SMA_MODE_IN;
 	bp->sma[2].mode = SMA_MODE_OUT;
 	bp->sma[3].mode = SMA_MODE_OUT;
-	for (i = 0; i < 4; i++)
-		bp->sma[i].default_fcn = i & 1;
-
 	/* If no SMA1 map, the pin functions and directions are fixed. */
 	if (!bp->sma_map1) {
-		for (i = 0; i < 4; i++) {
+		for (i = 0; i < OCP_SMA_NUM; i++) {
 			bp->sma[i].fixed_fcn = true;
 			bp->sma[i].fixed_dir = true;
+			bp->sma[1].dpll_prop.capabilities &=
+				~DPLL_PIN_CAPABILITIES_DIRECTION_CAN_CHANGE;
 		}
 		return;
 	}
@@ -2314,7 +2341,7 @@ ptp_ocp_sma_fb_init(struct ptp_ocp *bp)
 	 */
 	reg = ioread32(&bp->sma_map2->gpio2);
 	if (reg == 0xffffffff) {
-		for (i = 0; i < 4; i++)
+		for (i = 0; i < OCP_SMA_NUM; i++)
 			bp->sma[i].fixed_dir = true;
 	} else {
 		reg = ioread32(&bp->sma_map1->gpio1);
@@ -2336,7 +2363,7 @@ static const struct ocp_sma_op ocp_fb_sma_op = {
 };
 
 static int
-ptp_ocp_fb_set_pins(struct ptp_ocp *bp)
+ptp_ocp_set_pins(struct ptp_ocp *bp)
 {
 	struct ptp_pin_desc *config;
 	int i;
@@ -2403,16 +2430,16 @@ ptp_ocp_fb_board_init(struct ptp_ocp *bp, struct ocp_resource *r)
 
 	ptp_ocp_tod_init(bp);
 	ptp_ocp_nmea_out_init(bp);
-	ptp_ocp_sma_init(bp);
 	ptp_ocp_signal_init(bp);
 
 	err = ptp_ocp_attr_group_add(bp, fb_timecard_groups);
 	if (err)
 		return err;
 
-	err = ptp_ocp_fb_set_pins(bp);
+	err = ptp_ocp_set_pins(bp);
 	if (err)
 		return err;
+	ptp_ocp_sma_init(bp);
 
 	return ptp_ocp_init_clock(bp);
 }
@@ -2452,6 +2479,14 @@ ptp_ocp_register_resources(struct ptp_ocp *bp, kernel_ulong_t driver_data)
 static void
 ptp_ocp_art_sma_init(struct ptp_ocp *bp)
 {
+	struct dpll_pin_properties prop = {
+		.board_label = NULL,
+		.type = DPLL_PIN_TYPE_EXT,
+		.capabilities = 0,
+		.freq_supported_num = ARRAY_SIZE(ptp_ocp_sma_freq),
+		.freq_supported = ptp_ocp_sma_freq,
+
+	};
 	u32 reg;
 	int i;
 
@@ -2466,16 +2501,16 @@ ptp_ocp_art_sma_init(struct ptp_ocp *bp)
 	bp->sma[2].default_fcn = 0x10;	/* OUT: 10Mhz */
 	bp->sma[3].default_fcn = 0x02;	/* OUT: PHC */
 
-	/* If no SMA map, the pin functions and directions are fixed. */
-	if (!bp->art_sma) {
-		for (i = 0; i < 4; i++) {
+	for (i = 0; i < OCP_SMA_NUM; i++) {
+		/* If no SMA map, the pin functions and directions are fixed. */
+		bp->sma[i].dpll_prop = prop;
+		bp->sma[i].dpll_prop.board_label =
+			bp->ptp_info.pin_config[i].name;
+		if (!bp->art_sma) {
 			bp->sma[i].fixed_fcn = true;
 			bp->sma[i].fixed_dir = true;
+			continue;
 		}
-		return;
-	}
-
-	for (i = 0; i < 4; i++) {
 		reg = ioread32(&bp->art_sma->map[i].gpio);
 
 		switch (reg & 0xff) {
@@ -2486,9 +2521,13 @@ ptp_ocp_art_sma_init(struct ptp_ocp *bp)
 		case 1:
 		case 8:
 			bp->sma[i].mode = SMA_MODE_IN;
+			bp->sma[i].dpll_prop.capabilities =
+				DPLL_PIN_CAPABILITIES_DIRECTION_CAN_CHANGE;
 			break;
 		default:
 			bp->sma[i].mode = SMA_MODE_OUT;
+			bp->sma[i].dpll_prop.capabilities =
+				DPLL_PIN_CAPABILITIES_DIRECTION_CAN_CHANGE;
 			break;
 		}
 	}
@@ -2555,6 +2594,9 @@ ptp_ocp_art_board_init(struct ptp_ocp *bp, struct ocp_resource *r)
 	/* Enable MAC serial port during initialisation */
 	iowrite32(1, &bp->board_config->mro50_serial_activate);
 
+	err = ptp_ocp_set_pins(bp);
+	if (err)
+		return err;
 	ptp_ocp_sma_init(bp);
 
 	err = ptp_ocp_attr_group_add(bp, art_timecard_groups);
@@ -2696,16 +2738,9 @@ sma4_show(struct device *dev, struct device_attribute *attr, char *buf)
 }
 
 static int
-ptp_ocp_sma_store(struct ptp_ocp *bp, const char *buf, int sma_nr)
+ptp_ocp_sma_store_val(struct ptp_ocp *bp, int val, enum ptp_ocp_sma_mode mode, int sma_nr)
 {
 	struct ptp_ocp_sma_connector *sma = &bp->sma[sma_nr - 1];
-	enum ptp_ocp_sma_mode mode;
-	int val;
-
-	mode = sma->mode;
-	val = sma_parse_inputs(bp->sma_op->tbl, buf, &mode);
-	if (val < 0)
-		return val;
 
 	if (sma->fixed_dir && (mode != sma->mode || val & SMA_DISABLE))
 		return -EOPNOTSUPP;
@@ -2740,6 +2775,20 @@ ptp_ocp_sma_store(struct ptp_ocp *bp, const char *buf, int sma_nr)
 	return val;
 }
 
+static int
+ptp_ocp_sma_store(struct ptp_ocp *bp, const char *buf, int sma_nr)
+{
+	struct ptp_ocp_sma_connector *sma = &bp->sma[sma_nr - 1];
+	enum ptp_ocp_sma_mode mode;
+	int val;
+
+	mode = sma->mode;
+	val = sma_parse_inputs(bp->sma_op->tbl, buf, &mode);
+	if (val < 0)
+		return val;
+	return ptp_ocp_sma_store_val(bp, val, mode, sma_nr);
+}
+
 static ssize_t
 sma1_store(struct device *dev, struct device_attribute *attr,
 	   const char *buf, size_t count)
@@ -3834,9 +3883,8 @@ ptp_ocp_summary_show(struct seq_file *s, void *data)
 		strcpy(buf, "unknown");
 		break;
 	}
-	val = ioread32(&bp->reg->status);
 	seq_printf(s, "%7s: %s, state: %s\n", "PHC src", buf,
-		   val & OCP_STATUS_IN_SYNC ? "sync" : "unsynced");
+		   bp->sync ? "sync" : "unsynced");
 
 	if (!ptp_ocp_gettimex(&bp->ptp_info, &ts, &sts)) {
 		struct timespec64 sys_ts;
@@ -4067,7 +4115,6 @@ ptp_ocp_phc_info(struct ptp_ocp *bp)
 {
 	struct timespec64 ts;
 	u32 version, select;
-	bool sync;
 
 	version = ioread32(&bp->reg->version);
 	select = ioread32(&bp->reg->select);
@@ -4076,11 +4123,10 @@ ptp_ocp_phc_info(struct ptp_ocp *bp)
 		 ptp_ocp_select_name_from_val(ptp_ocp_clock, select >> 16),
 		 ptp_clock_index(bp->ptp));
 
-	sync = ioread32(&bp->reg->status) & OCP_STATUS_IN_SYNC;
 	if (!ptp_ocp_gettimex(&bp->ptp_info, &ts, NULL))
 		dev_info(&bp->pdev->dev, "Time: %lld.%ld, %s\n",
 			 ts.tv_sec, ts.tv_nsec,
-			 sync ? "in-sync" : "UNSYNCED");
+			 bp->sync ? "in-sync" : "UNSYNCED");
 }
 
 static void
@@ -4177,12 +4223,168 @@ ptp_ocp_detach(struct ptp_ocp *bp)
 	device_unregister(&bp->dev);
 }
 
+static int ptp_ocp_dpll_lock_status_get(const struct dpll_device *dpll,
+					void *priv,
+					enum dpll_lock_status *status,
+					struct netlink_ext_ack *extack)
+{
+	struct ptp_ocp *bp = priv;
+
+	*status = bp->sync ? DPLL_LOCK_STATUS_LOCKED : DPLL_LOCK_STATUS_UNLOCKED;
+
+	return 0;
+}
+
+static int ptp_ocp_dpll_state_get(const struct dpll_pin *pin, void *pin_priv,
+				  const struct dpll_device *dpll, void *priv,
+				  enum dpll_pin_state *state,
+				  struct netlink_ext_ack *extack)
+{
+	struct ptp_ocp *bp = priv;
+	int idx;
+
+	if (bp->pps_select) {
+		idx = ioread32(&bp->pps_select->gpio1);
+		*state = (&bp->sma[idx] == pin_priv) ? DPLL_PIN_STATE_CONNECTED :
+						      DPLL_PIN_STATE_SELECTABLE;
+		return 0;
+	}
+	NL_SET_ERR_MSG(extack, "pin selection is not supported on current HW");
+	return -EINVAL;
+}
+
+static int ptp_ocp_dpll_mode_get(const struct dpll_device *dpll, void *priv,
+				 u32 *mode, struct netlink_ext_ack *extack)
+{
+	*mode = DPLL_MODE_AUTOMATIC;
+	return 0;
+}
+
+static bool ptp_ocp_dpll_mode_supported(const struct dpll_device *dpll,
+					void *priv, const enum dpll_mode mode,
+					struct netlink_ext_ack *extack)
+{
+	return mode == DPLL_MODE_AUTOMATIC;
+}
+
+static int ptp_ocp_dpll_direction_get(const struct dpll_pin *pin,
+				      void *pin_priv,
+				      const struct dpll_device *dpll,
+				      void *priv,
+				      enum dpll_pin_direction *direction,
+				      struct netlink_ext_ack *extack)
+{
+	struct ptp_ocp_sma_connector *sma = pin_priv;
+
+	*direction = sma->mode == SMA_MODE_IN ?
+				  DPLL_PIN_DIRECTION_INPUT :
+				  DPLL_PIN_DIRECTION_OUTPUT;
+	return 0;
+}
+
+static int ptp_ocp_dpll_direction_set(const struct dpll_pin *pin,
+				      void *pin_priv,
+				      const struct dpll_device *dpll,
+				      void *dpll_priv,
+				      enum dpll_pin_direction direction,
+				      struct netlink_ext_ack *extack)
+{
+	struct ptp_ocp_sma_connector *sma = pin_priv;
+	struct ptp_ocp *bp = dpll_priv;
+	enum ptp_ocp_sma_mode mode;
+	int sma_nr = (sma - bp->sma);
+
+	if (sma->fixed_dir)
+		return -EOPNOTSUPP;
+	mode = direction == DPLL_PIN_DIRECTION_INPUT ?
+			    SMA_MODE_IN : SMA_MODE_OUT;
+	return ptp_ocp_sma_store_val(bp, 0, mode, sma_nr);
+}
+
+static int ptp_ocp_dpll_frequency_set(const struct dpll_pin *pin,
+				      void *pin_priv,
+				      const struct dpll_device *dpll,
+				      void *dpll_priv, u64 frequency,
+				      struct netlink_ext_ack *extack)
+{
+	struct ptp_ocp_sma_connector *sma = pin_priv;
+	struct ptp_ocp *bp = dpll_priv;
+	const struct ocp_selector *tbl;
+	int sma_nr = (sma - bp->sma);
+	int i;
+
+	if (sma->fixed_fcn)
+		return -EOPNOTSUPP;
+
+	tbl = bp->sma_op->tbl[sma->mode];
+	for (i = 0; tbl[i].name; i++)
+		if (tbl[i].frequency == frequency)
+			return ptp_ocp_sma_store_val(bp, i, sma->mode, sma_nr);
+	return -EINVAL;
+}
+
+static int ptp_ocp_dpll_frequency_get(const struct dpll_pin *pin,
+				      void *pin_priv,
+				      const struct dpll_device *dpll,
+				      void *dpll_priv, u64 *frequency,
+				      struct netlink_ext_ack *extack)
+{
+	struct ptp_ocp_sma_connector *sma = pin_priv;
+	struct ptp_ocp *bp = dpll_priv;
+	const struct ocp_selector *tbl;
+	int sma_nr = (sma - bp->sma);
+	u32 val;
+	int i;
+
+	val = bp->sma_op->get(bp, sma_nr);
+	tbl = bp->sma_op->tbl[sma->mode];
+	for (i = 0; tbl[i].name; i++)
+		if (val == tbl[i].value) {
+			*frequency = tbl[i].frequency;
+			return 0;
+		}
+
+	return -EINVAL;
+}
+
+static const struct dpll_device_ops dpll_ops = {
+	.lock_status_get = ptp_ocp_dpll_lock_status_get,
+	.mode_get = ptp_ocp_dpll_mode_get,
+	.mode_supported = ptp_ocp_dpll_mode_supported,
+};
+
+static const struct dpll_pin_ops dpll_pins_ops = {
+	.frequency_get = ptp_ocp_dpll_frequency_get,
+	.frequency_set = ptp_ocp_dpll_frequency_set,
+	.direction_get = ptp_ocp_dpll_direction_get,
+	.direction_set = ptp_ocp_dpll_direction_set,
+	.state_on_dpll_get = ptp_ocp_dpll_state_get,
+};
+
+static void
+ptp_ocp_sync_work(struct work_struct *work)
+{
+	struct ptp_ocp *bp;
+	bool sync;
+
+	bp = container_of(work, struct ptp_ocp, sync_work.work);
+	sync = !!(ioread32(&bp->reg->status) & OCP_STATUS_IN_SYNC);
+
+	if (bp->sync != sync)
+		dpll_device_change_ntf(bp->dpll);
+
+	bp->sync = sync;
+
+	queue_delayed_work(system_power_efficient_wq, &bp->sync_work, HZ);
+}
+
 static int
 ptp_ocp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	struct devlink *devlink;
 	struct ptp_ocp *bp;
-	int err;
+	int err, i;
+	u64 clkid;
 
 	devlink = devlink_alloc(&ptp_ocp_devlink_ops, sizeof(*bp), &pdev->dev);
 	if (!devlink) {
@@ -4201,6 +4403,8 @@ ptp_ocp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (err)
 		goto out_disable;
 
+	INIT_DELAYED_WORK(&bp->sync_work, ptp_ocp_sync_work);
+
 	/* compat mode.
 	 * Older FPGA firmware only returns 2 irq's.
 	 * allow this - if not all of the IRQ's are returned, skip the
@@ -4232,8 +4436,43 @@ ptp_ocp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	ptp_ocp_info(bp);
 	devlink_register(devlink);
-	return 0;
 
+	clkid = pci_get_dsn(pdev);
+	bp->dpll = dpll_device_get(clkid, 0, THIS_MODULE);
+	if (IS_ERR(bp->dpll)) {
+		err = PTR_ERR(bp->dpll);
+		dev_err(&pdev->dev, "dpll_device_alloc failed\n");
+		goto out;
+	}
+
+	err = dpll_device_register(bp->dpll, DPLL_TYPE_PPS, &dpll_ops, bp);
+	if (err)
+		goto out;
+
+	for (i = 0; i < OCP_SMA_NUM; i++) {
+		bp->sma[i].dpll_pin = dpll_pin_get(clkid, i, THIS_MODULE, &bp->sma[i].dpll_prop);
+		if (IS_ERR(bp->sma[i].dpll_pin)) {
+			err = PTR_ERR(bp->dpll);
+			goto out_dpll;
+		}
+
+		err = dpll_pin_register(bp->dpll, bp->sma[i].dpll_pin, &dpll_pins_ops,
+					&bp->sma[i]);
+		if (err) {
+			dpll_pin_put(bp->sma[i].dpll_pin);
+			goto out_dpll;
+		}
+	}
+	queue_delayed_work(system_power_efficient_wq, &bp->sync_work, HZ);
+
+	return 0;
+out_dpll:
+	while (i) {
+		--i;
+		dpll_pin_unregister(bp->dpll, bp->sma[i].dpll_pin, &dpll_pins_ops, &bp->sma[i]);
+		dpll_pin_put(bp->sma[i].dpll_pin);
+	}
+	dpll_device_put(bp->dpll);
 out:
 	ptp_ocp_detach(bp);
 out_disable:
@@ -4248,7 +4487,17 @@ ptp_ocp_remove(struct pci_dev *pdev)
 {
 	struct ptp_ocp *bp = pci_get_drvdata(pdev);
 	struct devlink *devlink = priv_to_devlink(bp);
+	int i;
 
+	cancel_delayed_work_sync(&bp->sync_work);
+	for (i = 0; i < OCP_SMA_NUM; i++) {
+		if (bp->sma[i].dpll_pin) {
+			dpll_pin_unregister(bp->dpll, bp->sma[i].dpll_pin, &dpll_pins_ops, bp);
+			dpll_pin_put(bp->sma[i].dpll_pin);
+		}
+	}
+	dpll_device_unregister(bp->dpll, &dpll_ops, bp);
+	dpll_device_put(bp->dpll);
 	devlink_unregister(devlink);
 	ptp_ocp_detach(bp);
 	pci_disable_device(pdev);
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 817d377a3f36..83711aad855c 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -114,6 +114,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
 		struct sk_buff *skb;
 		unsigned out, in;
 		size_t nbytes;
+		u32 offset;
 		int head;
 
 		skb = virtio_vsock_skb_dequeue(&vsock->send_pkt_queue);
@@ -156,7 +157,8 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
 		}
 
 		iov_iter_init(&iov_iter, ITER_DEST, &vq->iov[out], in, iov_len);
-		payload_len = skb->len;
+		offset = VIRTIO_VSOCK_SKB_CB(skb)->offset;
+		payload_len = skb->len - offset;
 		hdr = virtio_vsock_hdr(skb);
 
 		/* If the packet is greater than the space available in the
@@ -197,8 +199,10 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
 			break;
 		}
 
-		nbytes = copy_to_iter(skb->data, payload_len, &iov_iter);
-		if (nbytes != payload_len) {
+		if (skb_copy_datagram_iter(skb,
+					   offset,
+					   &iov_iter,
+					   payload_len)) {
 			kfree_skb(skb);
 			vq_err(vq, "Faulted on copying pkt buf\n");
 			break;
@@ -212,13 +216,13 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
 		vhost_add_used(vq, head, sizeof(*hdr) + payload_len);
 		added = true;
 
-		skb_pull(skb, payload_len);
+		VIRTIO_VSOCK_SKB_CB(skb)->offset += payload_len;
 		total_len += payload_len;
 
 		/* If we didn't send all the payload we can requeue the packet
 		 * to send it with the next available buffer.
 		 */
-		if (skb->len > 0) {
+		if (VIRTIO_VSOCK_SKB_CB(skb)->offset < skb->len) {
 			hdr->flags |= cpu_to_le32(flags_to_restore);
 
 			/* We are queueing the same skb to handle
diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index d0807ad43f93..dd71d3009771 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -240,6 +240,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource);
 #define VIRTCHNL_VF_OFFLOAD_REQ_QUEUES		BIT(6)
 /* used to negotiate communicating link speeds in Mbps */
 #define VIRTCHNL_VF_CAP_ADV_LINK_SPEED		BIT(7)
+#define  VIRTCHNL_VF_OFFLOAD_CRC		BIT(10)
 #define VIRTCHNL_VF_OFFLOAD_VLAN_V2		BIT(15)
 #define VIRTCHNL_VF_OFFLOAD_VLAN		BIT(16)
 #define VIRTCHNL_VF_OFFLOAD_RX_POLLING		BIT(17)
@@ -295,7 +296,13 @@ VIRTCHNL_CHECK_STRUCT_LEN(24, virtchnl_txq_info);
 /* VIRTCHNL_OP_CONFIG_RX_QUEUE
  * VF sends this message to set up parameters for one RX queue.
  * External data buffer contains one instance of virtchnl_rxq_info.
- * PF configures requested queue and returns a status code.
+ * PF configures requested queue and returns a status code. The
+ * crc_disable flag disables CRC stripping on the VF. Setting
+ * the crc_disable flag to 1 will disable CRC stripping for each
+ * queue in the VF where the flag is set. The VIRTCHNL_VF_OFFLOAD_CRC
+ * offload must have been set prior to sending this info or the PF
+ * will ignore the request. This flag should be set the same for
+ * all of the queues for a VF.
  */
 
 /* Rx queue config info */
@@ -307,7 +314,7 @@ struct virtchnl_rxq_info {
 	u16 splithdr_enabled; /* deprecated with AVF 1.0 */
 	u32 databuffer_size;
 	u32 max_pkt_size;
-	u8 pad0;
+	u8 crc_disable;
 	u8 rxdid;
 	u8 pad1[2];
 	u64 dma_ring_addr;
diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h
index 7b121bd780eb..0985221d5478 100644
--- a/include/linux/bpf-cgroup-defs.h
+++ b/include/linux/bpf-cgroup-defs.h
@@ -28,19 +28,24 @@ enum cgroup_bpf_attach_type {
 	CGROUP_INET6_BIND,
 	CGROUP_INET4_CONNECT,
 	CGROUP_INET6_CONNECT,
+	CGROUP_UNIX_CONNECT,
 	CGROUP_INET4_POST_BIND,
 	CGROUP_INET6_POST_BIND,
 	CGROUP_UDP4_SENDMSG,
 	CGROUP_UDP6_SENDMSG,
+	CGROUP_UNIX_SENDMSG,
 	CGROUP_SYSCTL,
 	CGROUP_UDP4_RECVMSG,
 	CGROUP_UDP6_RECVMSG,
+	CGROUP_UNIX_RECVMSG,
 	CGROUP_GETSOCKOPT,
 	CGROUP_SETSOCKOPT,
 	CGROUP_INET4_GETPEERNAME,
 	CGROUP_INET6_GETPEERNAME,
+	CGROUP_UNIX_GETPEERNAME,
 	CGROUP_INET4_GETSOCKNAME,
 	CGROUP_INET6_GETSOCKNAME,
+	CGROUP_UNIX_GETSOCKNAME,
 	CGROUP_INET_SOCK_RELEASE,
 	CGROUP_LSM_START,
 	CGROUP_LSM_END = CGROUP_LSM_START + CGROUP_LSM_NUM - 1,
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 8506690dbb9c..98b8cea904fe 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -48,19 +48,24 @@ to_cgroup_bpf_attach_type(enum bpf_attach_type attach_type)
 	CGROUP_ATYPE(CGROUP_INET6_BIND);
 	CGROUP_ATYPE(CGROUP_INET4_CONNECT);
 	CGROUP_ATYPE(CGROUP_INET6_CONNECT);
+	CGROUP_ATYPE(CGROUP_UNIX_CONNECT);
 	CGROUP_ATYPE(CGROUP_INET4_POST_BIND);
 	CGROUP_ATYPE(CGROUP_INET6_POST_BIND);
 	CGROUP_ATYPE(CGROUP_UDP4_SENDMSG);
 	CGROUP_ATYPE(CGROUP_UDP6_SENDMSG);
+	CGROUP_ATYPE(CGROUP_UNIX_SENDMSG);
 	CGROUP_ATYPE(CGROUP_SYSCTL);
 	CGROUP_ATYPE(CGROUP_UDP4_RECVMSG);
 	CGROUP_ATYPE(CGROUP_UDP6_RECVMSG);
+	CGROUP_ATYPE(CGROUP_UNIX_RECVMSG);
 	CGROUP_ATYPE(CGROUP_GETSOCKOPT);
 	CGROUP_ATYPE(CGROUP_SETSOCKOPT);
 	CGROUP_ATYPE(CGROUP_INET4_GETPEERNAME);
 	CGROUP_ATYPE(CGROUP_INET6_GETPEERNAME);
+	CGROUP_ATYPE(CGROUP_UNIX_GETPEERNAME);
 	CGROUP_ATYPE(CGROUP_INET4_GETSOCKNAME);
 	CGROUP_ATYPE(CGROUP_INET6_GETSOCKNAME);
+	CGROUP_ATYPE(CGROUP_UNIX_GETSOCKNAME);
 	CGROUP_ATYPE(CGROUP_INET_SOCK_RELEASE);
 	default:
 		return CGROUP_BPF_ATTACH_TYPE_INVALID;
@@ -120,6 +125,7 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
 
 int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
 				      struct sockaddr *uaddr,
+				      int *uaddrlen,
 				      enum cgroup_bpf_attach_type atype,
 				      void *t_ctx,
 				      u32 *flags);
@@ -230,22 +236,22 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
 #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk)				       \
 	BPF_CGROUP_RUN_SK_PROG(sk, CGROUP_INET6_POST_BIND)
 
-#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype)				       \
+#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, atype)		       \
 ({									       \
 	int __ret = 0;							       \
 	if (cgroup_bpf_enabled(atype))					       \
-		__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype,     \
-							  NULL, NULL);	       \
+		__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \
+							  atype, NULL, NULL);  \
 	__ret;								       \
 })
 
-#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx)		       \
+#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, atype, t_ctx)	       \
 ({									       \
 	int __ret = 0;							       \
 	if (cgroup_bpf_enabled(atype))	{				       \
 		lock_sock(sk);						       \
-		__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype,     \
-							  t_ctx, NULL);	       \
+		__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \
+							  atype, t_ctx, NULL); \
 		release_sock(sk);					       \
 	}								       \
 	__ret;								       \
@@ -256,14 +262,14 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
  * (at bit position 0) is to indicate CAP_NET_BIND_SERVICE capability check
  * should be bypassed (BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE).
  */
-#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, atype, bind_flags)	       \
+#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, uaddrlen, atype, bind_flags) \
 ({									       \
 	u32 __flags = 0;						       \
 	int __ret = 0;							       \
 	if (cgroup_bpf_enabled(atype))	{				       \
 		lock_sock(sk);						       \
-		__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype,     \
-							  NULL, &__flags);     \
+		__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \
+							  atype, NULL, &__flags); \
 		release_sock(sk);					       \
 		if (__flags & BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE)	       \
 			*bind_flags |= BIND_NO_CAP_NET_BIND_SERVICE;	       \
@@ -276,29 +282,38 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
 	  cgroup_bpf_enabled(CGROUP_INET6_CONNECT)) &&		       \
 	 (sk)->sk_prot->pre_connect)
 
-#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr)			       \
-	BPF_CGROUP_RUN_SA_PROG(sk, uaddr, CGROUP_INET4_CONNECT)
+#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, uaddrlen)			\
+	BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_INET4_CONNECT)
 
-#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr)			       \
-	BPF_CGROUP_RUN_SA_PROG(sk, uaddr, CGROUP_INET6_CONNECT)
+#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen)			\
+	BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT)
 
-#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr)		       \
-	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_INET4_CONNECT, NULL)
+#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen)		\
+	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET4_CONNECT, NULL)
 
-#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr)		       \
-	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_INET6_CONNECT, NULL)
+#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen)		\
+	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT, NULL)
 
-#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx)		       \
-	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP4_SENDMSG, t_ctx)
+#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, uaddrlen)		\
+	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_CONNECT, NULL)
 
-#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx)		       \
-	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP6_SENDMSG, t_ctx)
+#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx)	\
+	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_SENDMSG, t_ctx)
 
-#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr)			\
-	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP4_RECVMSG, NULL)
+#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx)	\
+	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_SENDMSG, t_ctx)
 
-#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr)			\
-	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP6_RECVMSG, NULL)
+#define BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx)	\
+	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_SENDMSG, t_ctx)
+
+#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen)		\
+	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_RECVMSG, NULL)
+
+#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen)		\
+	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_RECVMSG, NULL)
+
+#define BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, uaddr, uaddrlen)		\
+	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_RECVMSG, NULL)
 
 /* The SOCK_OPS"_SK" macro should be used when sock_ops->sk is not a
  * fullsock and its parent fullsock cannot be traced by
@@ -477,24 +492,27 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
 }
 
 #define cgroup_bpf_enabled(atype) (0)
-#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) ({ 0; })
-#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) ({ 0; })
+#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, atype, t_ctx) ({ 0; })
+#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, atype) ({ 0; })
 #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, atype, flags) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, uaddrlen, atype, flags) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, uaddrlen) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(atype, major, minor, access) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; })
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 49f8b691496c..d3c51a507508 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -55,8 +55,8 @@ struct cgroup;
 extern struct idr btf_idr;
 extern spinlock_t btf_idr_lock;
 extern struct kobject *btf_kobj;
-extern struct bpf_mem_alloc bpf_global_ma;
-extern bool bpf_global_ma_set;
+extern struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma;
+extern bool bpf_global_ma_set, bpf_global_percpu_ma_set;
 
 typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64);
 typedef int (*bpf_iter_init_seq_priv_t)(void *private_data,
@@ -180,14 +180,15 @@ enum btf_field_type {
 	BPF_TIMER      = (1 << 1),
 	BPF_KPTR_UNREF = (1 << 2),
 	BPF_KPTR_REF   = (1 << 3),
-	BPF_KPTR       = BPF_KPTR_UNREF | BPF_KPTR_REF,
-	BPF_LIST_HEAD  = (1 << 4),
-	BPF_LIST_NODE  = (1 << 5),
-	BPF_RB_ROOT    = (1 << 6),
-	BPF_RB_NODE    = (1 << 7),
+	BPF_KPTR_PERCPU = (1 << 4),
+	BPF_KPTR       = BPF_KPTR_UNREF | BPF_KPTR_REF | BPF_KPTR_PERCPU,
+	BPF_LIST_HEAD  = (1 << 5),
+	BPF_LIST_NODE  = (1 << 6),
+	BPF_RB_ROOT    = (1 << 7),
+	BPF_RB_NODE    = (1 << 8),
 	BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD |
 				 BPF_RB_NODE | BPF_RB_ROOT,
-	BPF_REFCOUNT   = (1 << 8),
+	BPF_REFCOUNT   = (1 << 9),
 };
 
 typedef void (*btf_dtor_kfunc_t)(void *);
@@ -300,6 +301,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type)
 	case BPF_KPTR_UNREF:
 	case BPF_KPTR_REF:
 		return "kptr";
+	case BPF_KPTR_PERCPU:
+		return "percpu_kptr";
 	case BPF_LIST_HEAD:
 		return "bpf_list_head";
 	case BPF_LIST_NODE:
@@ -325,6 +328,7 @@ static inline u32 btf_field_type_size(enum btf_field_type type)
 		return sizeof(struct bpf_timer);
 	case BPF_KPTR_UNREF:
 	case BPF_KPTR_REF:
+	case BPF_KPTR_PERCPU:
 		return sizeof(u64);
 	case BPF_LIST_HEAD:
 		return sizeof(struct bpf_list_head);
@@ -351,6 +355,7 @@ static inline u32 btf_field_type_align(enum btf_field_type type)
 		return __alignof__(struct bpf_timer);
 	case BPF_KPTR_UNREF:
 	case BPF_KPTR_REF:
+	case BPF_KPTR_PERCPU:
 		return __alignof__(u64);
 	case BPF_LIST_HEAD:
 		return __alignof__(struct bpf_list_head);
@@ -389,6 +394,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr)
 	case BPF_TIMER:
 	case BPF_KPTR_UNREF:
 	case BPF_KPTR_REF:
+	case BPF_KPTR_PERCPU:
 		break;
 	default:
 		WARN_ON_ONCE(1);
@@ -1029,6 +1035,11 @@ struct btf_func_model {
  */
 #define BPF_TRAMP_F_SHARE_IPMODIFY	BIT(6)
 
+/* Indicate that current trampoline is in a tail call context. Then, it has to
+ * cache and restore tail_call_cnt to avoid infinite tail call loop.
+ */
+#define BPF_TRAMP_F_TAIL_CALL_CTX	BIT(7)
+
 /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
  * bytes on x86.
  */
@@ -1378,6 +1389,7 @@ struct bpf_prog_aux {
 	u32 stack_depth;
 	u32 id;
 	u32 func_cnt; /* used by non-func prog as the number of func progs */
+	u32 real_func_cnt; /* includes hidden progs, only used for JIT and freeing progs */
 	u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */
 	u32 attach_btf_id; /* in-kernel BTF type id to attach to */
 	u32 ctx_arg_info_size;
@@ -1398,6 +1410,8 @@ struct bpf_prog_aux {
 	bool sleepable;
 	bool tail_call_reachable;
 	bool xdp_has_frags;
+	bool exception_cb;
+	bool exception_boundary;
 	/* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
 	const struct btf_type *attach_func_proto;
 	/* function name for valid attach_btf_id */
@@ -1420,6 +1434,7 @@ struct bpf_prog_aux {
 	int cgroup_atype; /* enum cgroup_bpf_attach_type */
 	struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
 	char name[BPF_OBJ_NAME_LEN];
+	unsigned int (*bpf_exception_cb)(u64 cookie, u64 sp, u64 bp);
 #ifdef CONFIG_SECURITY
 	void *security;
 #endif
@@ -2149,12 +2164,12 @@ static inline bool bpf_allow_uninit_stack(void)
 
 static inline bool bpf_bypass_spec_v1(void)
 {
-	return perfmon_capable();
+	return cpu_mitigations_off() || perfmon_capable();
 }
 
 static inline bool bpf_bypass_spec_v4(void)
 {
-	return perfmon_capable();
+	return cpu_mitigations_off() || perfmon_capable();
 }
 
 int bpf_map_new_fd(struct bpf_map *map, int flags);
@@ -2407,9 +2422,11 @@ int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog,
 int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog,
 			   struct bpf_reg_state *regs);
 int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
-			  struct bpf_reg_state *reg);
+			  struct bpf_reg_state *reg, bool is_ex_cb);
 int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog,
 			 struct btf *btf, const struct btf_type *t);
+const char *btf_find_decl_tag_value(const struct btf *btf, const struct btf_type *pt,
+				    int comp_idx, const char *tag_key);
 
 struct bpf_prog *bpf_prog_by_id(u32 id);
 struct bpf_link *bpf_link_by_id(u32 id);
@@ -2905,6 +2922,22 @@ static inline int sock_map_bpf_prog_query(const union bpf_attr *attr,
 #endif /* CONFIG_BPF_SYSCALL */
 #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
 
+static __always_inline void
+bpf_prog_inc_misses_counters(const struct bpf_prog_array *array)
+{
+	const struct bpf_prog_array_item *item;
+	struct bpf_prog *prog;
+
+	if (unlikely(!array))
+		return;
+
+	item = &array->items[0];
+	while ((prog = READ_ONCE(item->prog))) {
+		bpf_prog_inc_misses_counter(prog);
+		item++;
+	}
+}
+
 #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
 void bpf_sk_reuseport_detach(struct sock *sk);
 int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
@@ -3183,4 +3216,9 @@ static inline gfp_t bpf_memcg_flags(gfp_t flags)
 	return flags;
 }
 
+static inline bool bpf_is_subprog(const struct bpf_prog *prog)
+{
+	return prog->aux->func_idx != 0;
+}
+
 #endif /* _LINUX_BPF_H */
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index b6e58dab8e27..94ec766432f5 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -300,6 +300,7 @@ struct bpf_func_state {
 	bool in_callback_fn;
 	struct tnum callback_ret_range;
 	bool in_async_callback_fn;
+	bool in_exception_callback_fn;
 
 	/* The following fields should be last. See copy_func_state() */
 	int acquired_refs;
@@ -480,6 +481,7 @@ struct bpf_insn_aux_data {
 	bool zext_dst; /* this insn zero extends dst reg */
 	bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */
 	bool is_iter_next; /* bpf_iter_<type>_next() kfunc call */
+	bool call_with_percpu_alloc_ptr; /* {this,per}_cpu_ptr() with prog percpu alloc */
 	u8 alu_state; /* used in combination with alu_limit */
 
 	/* below fields are initialized once */
@@ -540,7 +542,9 @@ struct bpf_subprog_info {
 	bool has_tail_call;
 	bool tail_call_reachable;
 	bool has_ld_abs;
+	bool is_cb;
 	bool is_async_cb;
+	bool is_exception_cb;
 };
 
 struct bpf_verifier_env;
@@ -587,6 +591,8 @@ struct bpf_verifier_env {
 	u32 used_map_cnt;		/* number of used maps */
 	u32 used_btf_cnt;		/* number of used BTF objects */
 	u32 id_gen;			/* used to generate unique reg IDs */
+	u32 hidden_subprog_cnt;		/* number of hidden subprogs */
+	int exception_callback_subprog;
 	bool explore_alu_limits;
 	bool allow_ptr_leaks;
 	bool allow_uninit_stack;
@@ -594,10 +600,11 @@ struct bpf_verifier_env {
 	bool bypass_spec_v1;
 	bool bypass_spec_v4;
 	bool seen_direct_write;
+	bool seen_exception;
 	struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
 	const struct bpf_line_info *prev_linfo;
 	struct bpf_verifier_log log;
-	struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1];
+	struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 2]; /* max + 2 for the fake and exception subprogs */
 	union {
 		struct bpf_idmap idmap_scratch;
 		struct bpf_idset idset_scratch;
diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h
index b658961156a0..7a9a40163c0f 100644
--- a/include/linux/ceph/mon_client.h
+++ b/include/linux/ceph/mon_client.h
@@ -19,7 +19,7 @@ struct ceph_monmap {
 	struct ceph_fsid fsid;
 	u32 epoch;
 	u32 num_mon;
-	struct ceph_entity_inst mon_inst[];
+	struct ceph_entity_inst mon_inst[] __counted_by(num_mon);
 };
 
 struct ceph_mon_client;
diff --git a/include/linux/dpll.h b/include/linux/dpll.h
new file mode 100644
index 000000000000..bbc480cd2932
--- /dev/null
+++ b/include/linux/dpll.h
@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  Copyright (c) 2023 Meta Platforms, Inc. and affiliates
+ *  Copyright (c) 2023 Intel and affiliates
+ */
+
+#ifndef __DPLL_H__
+#define __DPLL_H__
+
+#include <uapi/linux/dpll.h>
+#include <linux/device.h>
+#include <linux/netlink.h>
+
+struct dpll_device;
+struct dpll_pin;
+
+struct dpll_device_ops {
+	int (*mode_get)(const struct dpll_device *dpll, void *dpll_priv,
+			enum dpll_mode *mode, struct netlink_ext_ack *extack);
+	bool (*mode_supported)(const struct dpll_device *dpll, void *dpll_priv,
+			       const enum dpll_mode mode,
+			       struct netlink_ext_ack *extack);
+	int (*lock_status_get)(const struct dpll_device *dpll, void *dpll_priv,
+			       enum dpll_lock_status *status,
+			       struct netlink_ext_ack *extack);
+	int (*temp_get)(const struct dpll_device *dpll, void *dpll_priv,
+			s32 *temp, struct netlink_ext_ack *extack);
+};
+
+struct dpll_pin_ops {
+	int (*frequency_set)(const struct dpll_pin *pin, void *pin_priv,
+			     const struct dpll_device *dpll, void *dpll_priv,
+			     const u64 frequency,
+			     struct netlink_ext_ack *extack);
+	int (*frequency_get)(const struct dpll_pin *pin, void *pin_priv,
+			     const struct dpll_device *dpll, void *dpll_priv,
+			     u64 *frequency, struct netlink_ext_ack *extack);
+	int (*direction_set)(const struct dpll_pin *pin, void *pin_priv,
+			     const struct dpll_device *dpll, void *dpll_priv,
+			     const enum dpll_pin_direction direction,
+			     struct netlink_ext_ack *extack);
+	int (*direction_get)(const struct dpll_pin *pin, void *pin_priv,
+			     const struct dpll_device *dpll, void *dpll_priv,
+			     enum dpll_pin_direction *direction,
+			     struct netlink_ext_ack *extack);
+	int (*state_on_pin_get)(const struct dpll_pin *pin, void *pin_priv,
+				const struct dpll_pin *parent_pin,
+				void *parent_pin_priv,
+				enum dpll_pin_state *state,
+				struct netlink_ext_ack *extack);
+	int (*state_on_dpll_get)(const struct dpll_pin *pin, void *pin_priv,
+				 const struct dpll_device *dpll,
+				 void *dpll_priv, enum dpll_pin_state *state,
+				 struct netlink_ext_ack *extack);
+	int (*state_on_pin_set)(const struct dpll_pin *pin, void *pin_priv,
+				const struct dpll_pin *parent_pin,
+				void *parent_pin_priv,
+				const enum dpll_pin_state state,
+				struct netlink_ext_ack *extack);
+	int (*state_on_dpll_set)(const struct dpll_pin *pin, void *pin_priv,
+				 const struct dpll_device *dpll,
+				 void *dpll_priv,
+				 const enum dpll_pin_state state,
+				 struct netlink_ext_ack *extack);
+	int (*prio_get)(const struct dpll_pin *pin,  void *pin_priv,
+			const struct dpll_device *dpll,  void *dpll_priv,
+			u32 *prio, struct netlink_ext_ack *extack);
+	int (*prio_set)(const struct dpll_pin *pin, void *pin_priv,
+			const struct dpll_device *dpll, void *dpll_priv,
+			const u32 prio, struct netlink_ext_ack *extack);
+};
+
+struct dpll_pin_frequency {
+	u64 min;
+	u64 max;
+};
+
+#define DPLL_PIN_FREQUENCY_RANGE(_min, _max)	\
+	{					\
+		.min = _min,			\
+		.max = _max,			\
+	}
+
+#define DPLL_PIN_FREQUENCY(_val) DPLL_PIN_FREQUENCY_RANGE(_val, _val)
+#define DPLL_PIN_FREQUENCY_1PPS \
+	DPLL_PIN_FREQUENCY(DPLL_PIN_FREQUENCY_1_HZ)
+#define DPLL_PIN_FREQUENCY_10MHZ \
+	DPLL_PIN_FREQUENCY(DPLL_PIN_FREQUENCY_10_MHZ)
+#define DPLL_PIN_FREQUENCY_IRIG_B \
+	DPLL_PIN_FREQUENCY(DPLL_PIN_FREQUENCY_10_KHZ)
+#define DPLL_PIN_FREQUENCY_DCF77 \
+	DPLL_PIN_FREQUENCY(DPLL_PIN_FREQUENCY_77_5_KHZ)
+
+struct dpll_pin_properties {
+	const char *board_label;
+	const char *panel_label;
+	const char *package_label;
+	enum dpll_pin_type type;
+	unsigned long capabilities;
+	u32 freq_supported_num;
+	struct dpll_pin_frequency *freq_supported;
+};
+
+#if IS_ENABLED(CONFIG_DPLL)
+size_t dpll_msg_pin_handle_size(struct dpll_pin *pin);
+int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin);
+#else
+static inline size_t dpll_msg_pin_handle_size(struct dpll_pin *pin)
+{
+	return 0;
+}
+
+static inline int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin)
+{
+	return 0;
+}
+#endif
+
+struct dpll_device *
+dpll_device_get(u64 clock_id, u32 dev_driver_id, struct module *module);
+
+void dpll_device_put(struct dpll_device *dpll);
+
+int dpll_device_register(struct dpll_device *dpll, enum dpll_type type,
+			 const struct dpll_device_ops *ops, void *priv);
+
+void dpll_device_unregister(struct dpll_device *dpll,
+			    const struct dpll_device_ops *ops, void *priv);
+
+struct dpll_pin *
+dpll_pin_get(u64 clock_id, u32 dev_driver_id, struct module *module,
+	     const struct dpll_pin_properties *prop);
+
+int dpll_pin_register(struct dpll_device *dpll, struct dpll_pin *pin,
+		      const struct dpll_pin_ops *ops, void *priv);
+
+void dpll_pin_unregister(struct dpll_device *dpll, struct dpll_pin *pin,
+			 const struct dpll_pin_ops *ops, void *priv);
+
+void dpll_pin_put(struct dpll_pin *pin);
+
+int dpll_pin_on_pin_register(struct dpll_pin *parent, struct dpll_pin *pin,
+			     const struct dpll_pin_ops *ops, void *priv);
+
+void dpll_pin_on_pin_unregister(struct dpll_pin *parent, struct dpll_pin *pin,
+				const struct dpll_pin_ops *ops, void *priv);
+
+int dpll_device_change_ntf(struct dpll_device *dpll);
+
+int dpll_pin_change_ntf(struct dpll_pin *pin);
+
+#endif
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 761af6b3cf2b..bcd2bc15ff56 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -117,21 +117,25 @@ struct ctl_table_header;
 
 /* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */
 
-#define BPF_ALU64_IMM(OP, DST, IMM)				\
+#define BPF_ALU64_IMM_OFF(OP, DST, IMM, OFF)			\
 	((struct bpf_insn) {					\
 		.code  = BPF_ALU64 | BPF_OP(OP) | BPF_K,	\
 		.dst_reg = DST,					\
 		.src_reg = 0,					\
-		.off   = 0,					\
+		.off   = OFF,					\
 		.imm   = IMM })
+#define BPF_ALU64_IMM(OP, DST, IMM)				\
+	BPF_ALU64_IMM_OFF(OP, DST, IMM, 0)
 
-#define BPF_ALU32_IMM(OP, DST, IMM)				\
+#define BPF_ALU32_IMM_OFF(OP, DST, IMM, OFF)			\
 	((struct bpf_insn) {					\
 		.code  = BPF_ALU | BPF_OP(OP) | BPF_K,		\
 		.dst_reg = DST,					\
 		.src_reg = 0,					\
-		.off   = 0,					\
+		.off   = OFF,					\
 		.imm   = IMM })
+#define BPF_ALU32_IMM(OP, DST, IMM)				\
+	BPF_ALU32_IMM_OFF(OP, DST, IMM, 0)
 
 /* Endianess conversion, cpu_to_{l,b}e(), {l,b}e_to_cpu() */
 
@@ -143,6 +147,16 @@ struct ctl_table_header;
 		.off   = 0,					\
 		.imm   = LEN })
 
+/* Byte Swap, bswap16/32/64 */
+
+#define BPF_BSWAP(DST, LEN)					\
+	((struct bpf_insn) {					\
+		.code  = BPF_ALU64 | BPF_END | BPF_SRC(BPF_TO_LE),	\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
+		.off   = 0,					\
+		.imm   = LEN })
+
 /* Short form of mov, dst_reg = src_reg */
 
 #define BPF_MOV64_REG(DST, SRC)					\
@@ -179,6 +193,24 @@ struct ctl_table_header;
 		.off   = 0,					\
 		.imm   = IMM })
 
+/* Short form of movsx, dst_reg = (s8,s16,s32)src_reg */
+
+#define BPF_MOVSX64_REG(DST, SRC, OFF)				\
+	((struct bpf_insn) {					\
+		.code  = BPF_ALU64 | BPF_MOV | BPF_X,		\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
+#define BPF_MOVSX32_REG(DST, SRC, OFF)				\
+	((struct bpf_insn) {					\
+		.code  = BPF_ALU | BPF_MOV | BPF_X,		\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
 /* Special form of mov32, used for doing explicit zero extension on dst. */
 #define BPF_ZEXT_REG(DST)					\
 	((struct bpf_insn) {					\
@@ -263,6 +295,16 @@ static inline bool insn_is_zext(const struct bpf_insn *insn)
 		.off   = OFF,					\
 		.imm   = 0 })
 
+/* Memory load, dst_reg = *(signed size *) (src_reg + off16) */
+
+#define BPF_LDX_MEMSX(SIZE, DST, SRC, OFF)			\
+	((struct bpf_insn) {					\
+		.code  = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEMSX,	\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
 /* Memory store, *(uint *) (dst_reg + off16) = src_reg */
 
 #define BPF_STX_MEM(SIZE, DST, SRC, OFF)			\
@@ -694,7 +736,7 @@ static inline void bpf_compute_and_save_data_end(
 	cb->data_end  = skb->data + skb_headlen(skb);
 }
 
-/* Restore data saved by bpf_compute_data_pointers(). */
+/* Restore data saved by bpf_compute_and_save_data_end(). */
 static inline void bpf_restore_data_end(
 	struct sk_buff *skb, void *saved_data_end)
 {
@@ -912,6 +954,8 @@ bool bpf_jit_needs_zext(void);
 bool bpf_jit_supports_subprog_tailcalls(void);
 bool bpf_jit_supports_kfunc_call(void);
 bool bpf_jit_supports_far_kfunc_call(void);
+bool bpf_jit_supports_exceptions(void);
+void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie);
 bool bpf_helper_changes_pkt_data(void *func);
 
 static inline bool bpf_dump_raw_ok(const struct cred *cred)
@@ -1127,6 +1171,7 @@ const char *__bpf_address_lookup(unsigned long addr, unsigned long *size,
 bool is_bpf_text_address(unsigned long addr);
 int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
 		    char *sym);
+struct bpf_prog *bpf_prog_ksym_find(unsigned long addr);
 
 static inline const char *
 bpf_address_lookup(unsigned long addr, unsigned long *size,
@@ -1194,6 +1239,11 @@ static inline int bpf_get_kallsym(unsigned int symnum, unsigned long *value,
 	return -ERANGE;
 }
 
+static inline struct bpf_prog *bpf_prog_ksym_find(unsigned long addr)
+{
+	return NULL;
+}
+
 static inline const char *
 bpf_address_lookup(unsigned long addr, unsigned long *size,
 		   unsigned long *off, char **modname, char *sym)
@@ -1285,6 +1335,7 @@ struct bpf_sock_addr_kern {
 	 */
 	u64 tmp_reg;
 	void *t_ctx;	/* Attach type specific context. */
+	u32 uaddrlen;
 };
 
 struct bpf_sock_ops_kern {
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index af8a771a053c..e400ff757f13 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -213,28 +213,9 @@ struct ipv6_pinfo {
 	__be32			flow_label;
 	__u32			frag_size;
 
-	/*
-	 * Packed in 16bits.
-	 * Omit one shift by putting the signed field at MSB.
-	 */
-#if defined(__BIG_ENDIAN_BITFIELD)
-	__s16			hop_limit:9;
-	__u16			__unused_1:7;
-#else
-	__u16			__unused_1:7;
-	__s16			hop_limit:9;
-#endif
+	s16			hop_limit;
+	u8			mcast_hops;
 
-#if defined(__BIG_ENDIAN_BITFIELD)
-	/* Packed in 16bits. */
-	__s16			mcast_hops:9;
-	__u16			__unused_2:6,
-				mc_loop:1;
-#else
-	__u16			mc_loop:1,
-				__unused_2:6;
-	__s16			mcast_hops:9;
-#endif
 	int			ucast_oif;
 	int			mcast_oif;
 
@@ -262,21 +243,11 @@ struct ipv6_pinfo {
 	} rxopt;
 
 	/* sockopt flags */
-	__u16			recverr:1,
-	                        sndflow:1,
-				repflow:1,
-				pmtudisc:3,
-				padding:1,	/* 1 bit hole */
-				srcprefs:3,	/* 001: prefer temporary address
+	__u8			srcprefs;	/* 001: prefer temporary address
 						 * 010: prefer public address
 						 * 100: prefer care-of address
 						 */
-				dontfrag:1,
-				autoflowlabel:1,
-				autoflowlabel_set:1,
-				mc_all:1,
-				recverr_rfc4884:1,
-				rtalert_isolate:1;
+	__u8			pmtudisc;
 	__u8			min_hopcount;
 	__u8			tclass;
 	__be32			rcv_flowinfo;
@@ -293,6 +264,18 @@ struct ipv6_pinfo {
 	struct inet6_cork	cork;
 };
 
+/* We currently use available bits from inet_sk(sk)->inet_flags,
+ * this could change in the future.
+ */
+#define inet6_test_bit(nr, sk)			\
+	test_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags)
+#define inet6_set_bit(nr, sk)			\
+	set_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags)
+#define inet6_clear_bit(nr, sk)			\
+	clear_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags)
+#define inet6_assign_bit(nr, sk, val)		\
+	assign_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags, val)
+
 /* WARNING: don't change the layout of the members in {raw,udp,tcp}6_sock! */
 struct raw6_sock {
 	/* inet_sock has to be the first member of raw6_sock */
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 71fa9a40fb5a..72cb693b075b 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -285,8 +285,10 @@ static inline bool kasan_check_byte(const void *address)
 
 #if defined(CONFIG_KASAN) && defined(CONFIG_KASAN_STACK)
 void kasan_unpoison_task_stack(struct task_struct *task);
+asmlinkage void kasan_unpoison_task_stack_below(const void *watermark);
 #else
 static inline void kasan_unpoison_task_stack(struct task_struct *task) {}
+static inline void kasan_unpoison_task_stack_below(const void *watermark) {}
 #endif
 
 #ifdef CONFIG_KASAN_GENERIC
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 4d5be378fa8c..8fbe22de16ef 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -366,6 +366,7 @@ enum mlx5_driver_event {
 	MLX5_DRIVER_EVENT_UPLINK_NETDEV,
 	MLX5_DRIVER_EVENT_MACSEC_SA_ADDED,
 	MLX5_DRIVER_EVENT_MACSEC_SA_DELETED,
+	MLX5_DRIVER_EVENT_SF_PEER_DEVLINK,
 };
 
 enum {
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 3033bbaeac81..92434814c855 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -155,6 +155,8 @@ enum {
 	MLX5_REG_MCC		 = 0x9062,
 	MLX5_REG_MCDA		 = 0x9063,
 	MLX5_REG_MCAM		 = 0x907f,
+	MLX5_REG_MSECQ		 = 0x9155,
+	MLX5_REG_MSEES		 = 0x9156,
 	MLX5_REG_MIRC		 = 0x9162,
 	MLX5_REG_SBCAM		 = 0xB01F,
 	MLX5_REG_RESOURCE_DUMP   = 0xC000,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index fc3db401f8a2..dd8421d021cf 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -10176,7 +10176,9 @@ struct mlx5_ifc_mcam_access_reg_bits2 {
 	u8         mirc[0x1];
 	u8         regs_97_to_96[0x2];
 
-	u8         regs_95_to_64[0x20];
+	u8         regs_95_to_87[0x09];
+	u8         synce_registers[0x2];
+	u8         regs_84_to_64[0x15];
 
 	u8         regs_63_to_32[0x20];
 
@@ -12549,4 +12551,59 @@ struct mlx5_ifc_modify_page_track_obj_in_bits {
 	struct mlx5_ifc_page_track_bits obj_context;
 };
 
+struct mlx5_ifc_msecq_reg_bits {
+	u8         reserved_at_0[0x20];
+
+	u8         reserved_at_20[0x12];
+	u8         network_option[0x2];
+	u8         local_ssm_code[0x4];
+	u8         local_enhanced_ssm_code[0x8];
+
+	u8         local_clock_identity[0x40];
+
+	u8         reserved_at_80[0x180];
+};
+
+enum {
+	MLX5_MSEES_FIELD_SELECT_ENABLE			= BIT(0),
+	MLX5_MSEES_FIELD_SELECT_ADMIN_STATUS		= BIT(1),
+	MLX5_MSEES_FIELD_SELECT_ADMIN_FREQ_MEASURE	= BIT(2),
+};
+
+enum mlx5_msees_admin_status {
+	MLX5_MSEES_ADMIN_STATUS_FREE_RUNNING		= 0x0,
+	MLX5_MSEES_ADMIN_STATUS_TRACK			= 0x1,
+};
+
+enum mlx5_msees_oper_status {
+	MLX5_MSEES_OPER_STATUS_FREE_RUNNING		= 0x0,
+	MLX5_MSEES_OPER_STATUS_SELF_TRACK		= 0x1,
+	MLX5_MSEES_OPER_STATUS_OTHER_TRACK		= 0x2,
+	MLX5_MSEES_OPER_STATUS_HOLDOVER			= 0x3,
+	MLX5_MSEES_OPER_STATUS_FAIL_HOLDOVER		= 0x4,
+	MLX5_MSEES_OPER_STATUS_FAIL_FREE_RUNNING	= 0x5,
+};
+
+struct mlx5_ifc_msees_reg_bits {
+	u8         reserved_at_0[0x8];
+	u8         local_port[0x8];
+	u8         pnat[0x2];
+	u8         lp_msb[0x2];
+	u8         reserved_at_14[0xc];
+
+	u8         field_select[0x20];
+
+	u8         admin_status[0x4];
+	u8         oper_status[0x4];
+	u8         ho_acq[0x1];
+	u8         reserved_at_49[0xc];
+	u8         admin_freq_measure[0x1];
+	u8         oper_freq_measure[0x1];
+	u8         failure_reason[0x9];
+
+	u8         frequency_diff[0x20];
+
+	u8         reserved_at_80[0x180];
+};
+
 #endif /* MLX5_IFC_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0896aaa91dd7..7e520c14eb8c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -79,6 +79,8 @@ struct xdp_buff;
 struct xdp_frame;
 struct xdp_metadata_ops;
 struct xdp_md;
+/* DPLL specific */
+struct dpll_pin;
 
 typedef u32 xdp_features_t;
 
@@ -917,6 +919,7 @@ struct net_device_path {
 			u8 queue;
 			u16 wcid;
 			u8 bss;
+			u8 amsdu;
 		} mtk_wdma;
 	};
 };
@@ -2049,6 +2052,9 @@ enum netdev_ml_priv_type {
  *			SET_NETDEV_DEVLINK_PORT macro. This pointer is static
  *			during the time netdevice is registered.
  *
+ *	@dpll_pin: Pointer to the SyncE source pin of a DPLL subsystem,
+ *		   where the clock is recovered.
+ *
  *	FIXME: cleanup struct net_device such that network protocol info
  *	moves out.
  */
@@ -2405,6 +2411,10 @@ struct net_device {
 	struct rtnl_hw_stats64	*offload_xstats_l3;
 
 	struct devlink_port	*devlink_port;
+
+#if IS_ENABLED(CONFIG_DPLL)
+	struct dpll_pin		*dpll_pin;
+#endif
 };
 #define to_net_dev(d) container_of(d, struct net_device, dev)
 
@@ -3940,6 +3950,18 @@ int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name);
 int dev_get_port_parent_id(struct net_device *dev,
 			   struct netdev_phys_item_id *ppid, bool recurse);
 bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b);
+void netdev_dpll_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin);
+void netdev_dpll_pin_clear(struct net_device *dev);
+
+static inline struct dpll_pin *netdev_dpll_pin(const struct net_device *dev)
+{
+#if IS_ENABLED(CONFIG_DPLL)
+	return dev->dpll_pin;
+#else
+	return NULL;
+#endif
+}
+
 struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again);
 struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 				    struct netdev_queue *txq, int *ret);
diff --git a/include/linux/pds/pds_core_if.h b/include/linux/pds/pds_core_if.h
index e838a2b90440..17a87c1a55d7 100644
--- a/include/linux/pds/pds_core_if.h
+++ b/include/linux/pds/pds_core_if.h
@@ -79,6 +79,7 @@ enum pds_core_status_code {
 	PDS_RC_EVFID	= 31,	/* VF ID does not exist */
 	PDS_RC_BAD_FW	= 32,	/* FW file is invalid or corrupted */
 	PDS_RC_ECLIENT	= 33,   /* No such client id */
+	PDS_RC_BAD_PCI	= 255,  /* Broken PCI when reading status */
 };
 
 /**
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 1351b802ffcf..3cc52826f18e 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1736,6 +1736,7 @@ void phy_detach(struct phy_device *phydev);
 void phy_start(struct phy_device *phydev);
 void phy_stop(struct phy_device *phydev);
 int phy_config_aneg(struct phy_device *phydev);
+int _phy_start_aneg(struct phy_device *phydev);
 int phy_start_aneg(struct phy_device *phydev);
 int phy_aneg_done(struct phy_device *phydev);
 int phy_speed_down(struct phy_device *phydev, bool sync);
diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h
index b2b28180dff7..a476648858a6 100644
--- a/include/linux/soc/mediatek/mtk_wed.h
+++ b/include/linux/soc/mediatek/mtk_wed.h
@@ -10,6 +10,7 @@
 
 #define MTK_WED_TX_QUEUES		2
 #define MTK_WED_RX_QUEUES		2
+#define MTK_WED_RX_PAGE_QUEUES		3
 
 #define WED_WO_STA_REC			0x6
 
@@ -45,7 +46,7 @@ enum mtk_wed_wo_cmd {
 	MTK_WED_WO_CMD_WED_END
 };
 
-struct mtk_rxbm_desc {
+struct mtk_wed_bm_desc {
 	__le32 buf0;
 	__le32 token;
 } __packed __aligned(4);
@@ -76,6 +77,11 @@ struct mtk_wed_wo_rx_stats {
 	__le32 rx_drop_cnt;
 };
 
+struct mtk_wed_buf {
+	void *p;
+	dma_addr_t phy_addr;
+};
+
 struct mtk_wed_device {
 #ifdef CONFIG_NET_MEDIATEK_SOC_WED
 	const struct mtk_wed_ops *ops;
@@ -94,17 +100,20 @@ struct mtk_wed_device {
 	struct mtk_wed_ring txfree_ring;
 	struct mtk_wed_ring tx_wdma[MTK_WED_TX_QUEUES];
 	struct mtk_wed_ring rx_wdma[MTK_WED_RX_QUEUES];
+	struct mtk_wed_ring rx_rro_ring[MTK_WED_RX_QUEUES];
+	struct mtk_wed_ring rx_page_ring[MTK_WED_RX_PAGE_QUEUES];
+	struct mtk_wed_ring ind_cmd_ring;
 
 	struct {
 		int size;
-		void **pages;
+		struct mtk_wed_buf *pages;
 		struct mtk_wdma_desc *desc;
 		dma_addr_t desc_phys;
 	} tx_buf_ring;
 
 	struct {
 		int size;
-		struct mtk_rxbm_desc *desc;
+		struct mtk_wed_bm_desc *desc;
 		dma_addr_t desc_phys;
 	} rx_buf_ring;
 
@@ -114,6 +123,13 @@ struct mtk_wed_device {
 		dma_addr_t fdbk_phys;
 	} rro;
 
+	struct {
+		int size;
+		struct mtk_wed_buf *pages;
+		struct mtk_wed_bm_desc *desc;
+		dma_addr_t desc_phys;
+	} hw_rro;
+
 	/* filled by driver: */
 	struct {
 		union {
@@ -123,6 +139,7 @@ struct mtk_wed_device {
 		enum mtk_wed_bus_tye bus_type;
 		void __iomem *base;
 		u32 phy_base;
+		u32 id;
 
 		u32 wpdma_phys;
 		u32 wpdma_int;
@@ -131,18 +148,35 @@ struct mtk_wed_device {
 		u32 wpdma_txfree;
 		u32 wpdma_rx_glo;
 		u32 wpdma_rx;
+		u32 wpdma_rx_rro[MTK_WED_RX_QUEUES];
+		u32 wpdma_rx_pg;
 
 		bool wcid_512;
+		bool hw_rro;
+		bool msi;
 
 		u16 token_start;
 		unsigned int nbuf;
 		unsigned int rx_nbuf;
 		unsigned int rx_npkt;
 		unsigned int rx_size;
+		unsigned int amsdu_max_len;
 
 		u8 tx_tbit[MTK_WED_TX_QUEUES];
 		u8 rx_tbit[MTK_WED_RX_QUEUES];
+		u8 rro_rx_tbit[MTK_WED_RX_QUEUES];
+		u8 rx_pg_tbit[MTK_WED_RX_PAGE_QUEUES];
 		u8 txfree_tbit;
+		u8 amsdu_max_subframes;
+
+		struct {
+			u8 se_group_nums;
+			u16 win_size;
+			u16 particular_sid;
+			u32 ack_sn_addr;
+			dma_addr_t particular_se_phys;
+			dma_addr_t addr_elem_phys[1024];
+		} ind_cmd;
 
 		u32 (*init_buf)(void *ptr, dma_addr_t phys, int token_id);
 		int (*offload_enable)(struct mtk_wed_device *wed);
@@ -182,6 +216,14 @@ struct mtk_wed_ops {
 	void (*irq_set_mask)(struct mtk_wed_device *dev, u32 mask);
 	int (*setup_tc)(struct mtk_wed_device *wed, struct net_device *dev,
 			enum tc_setup_type type, void *type_data);
+	void (*start_hw_rro)(struct mtk_wed_device *dev, u32 irq_mask,
+			     bool reset);
+	void (*rro_rx_ring_setup)(struct mtk_wed_device *dev, int ring,
+				  void __iomem *regs);
+	void (*msdu_pg_rx_ring_setup)(struct mtk_wed_device *dev, int ring,
+				      void __iomem *regs);
+	int (*ind_rx_ring_setup)(struct mtk_wed_device *dev,
+				 void __iomem *regs);
 };
 
 extern const struct mtk_wed_ops __rcu *mtk_soc_wed_ops;
@@ -206,16 +248,27 @@ mtk_wed_device_attach(struct mtk_wed_device *dev)
 	return ret;
 }
 
-static inline bool
-mtk_wed_get_rx_capa(struct mtk_wed_device *dev)
+static inline bool mtk_wed_get_rx_capa(struct mtk_wed_device *dev)
 {
 #ifdef CONFIG_NET_MEDIATEK_SOC_WED
+	if (dev->version == 3)
+		return dev->wlan.hw_rro;
+
 	return dev->version != 1;
 #else
 	return false;
 #endif
 }
 
+static inline bool mtk_wed_is_amsdu_supported(struct mtk_wed_device *dev)
+{
+#ifdef CONFIG_NET_MEDIATEK_SOC_WED
+	return dev->version == 3;
+#else
+	return false;
+#endif
+}
+
 #ifdef CONFIG_NET_MEDIATEK_SOC_WED
 #define mtk_wed_device_active(_dev) !!(_dev)->ops
 #define mtk_wed_device_detach(_dev) (_dev)->ops->detach(_dev)
@@ -242,6 +295,15 @@ mtk_wed_get_rx_capa(struct mtk_wed_device *dev)
 #define mtk_wed_device_dma_reset(_dev) (_dev)->ops->reset_dma(_dev)
 #define mtk_wed_device_setup_tc(_dev, _netdev, _type, _type_data) \
 	(_dev)->ops->setup_tc(_dev, _netdev, _type, _type_data)
+#define mtk_wed_device_start_hw_rro(_dev, _mask, _reset) \
+	(_dev)->ops->start_hw_rro(_dev, _mask, _reset)
+#define mtk_wed_device_rro_rx_ring_setup(_dev, _ring, _regs) \
+	(_dev)->ops->rro_rx_ring_setup(_dev, _ring, _regs)
+#define mtk_wed_device_msdu_pg_rx_ring_setup(_dev, _ring, _regs) \
+	(_dev)->ops->msdu_pg_rx_ring_setup(_dev, _ring, _regs)
+#define mtk_wed_device_ind_rx_ring_setup(_dev, _regs) \
+	(_dev)->ops->ind_rx_ring_setup(_dev, _regs)
+
 #else
 static inline bool mtk_wed_device_active(struct mtk_wed_device *dev)
 {
@@ -261,6 +323,10 @@ static inline bool mtk_wed_device_active(struct mtk_wed_device *dev)
 #define mtk_wed_device_stop(_dev) do {} while (0)
 #define mtk_wed_device_dma_reset(_dev) do {} while (0)
 #define mtk_wed_device_setup_tc(_dev, _netdev, _type, _type_data) -EOPNOTSUPP
+#define mtk_wed_device_start_hw_rro(_dev, _mask, _reset) do {} while (0)
+#define mtk_wed_device_rro_rx_ring_setup(_dev, _ring, _regs) -ENODEV
+#define mtk_wed_device_msdu_pg_rx_ring_setup(_dev, _ring, _regs) -ENODEV
+#define mtk_wed_device_ind_rx_ring_setup(_dev, _regs) -ENODEV
 #endif
 
 #endif
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index ce89cc3e4913..c0079a7574ae 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -139,6 +139,7 @@ struct stmmac_rxq_cfg {
 
 struct stmmac_txq_cfg {
 	u32 weight;
+	bool coe_unsupported;
 	u8 mode_to_use;
 	/* Credit Base Shaper parameters */
 	u32 send_slope;
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 3c5efeeb024f..e15452df9804 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -377,6 +377,14 @@ struct tcp_sock {
 				 * Total data bytes retransmitted
 				 */
 	u32	total_retrans;	/* Total retransmits for entire connection */
+	u32	rto_stamp;	/* Start time (ms) of last CA_Loss recovery */
+	u16	total_rto;	/* Total number of RTO timeouts, including
+				 * SYN/SYN-ACK and recurring timeouts.
+				 */
+	u16	total_rto_recoveries;	/* Total number of RTO recoveries,
+					 * including any unfinished recovery.
+					 */
+	u32	total_rto_time;	/* ms spent in (completed) RTO recoveries. */
 
 	u32	urg_seq;	/* Seq of received urgent pointer */
 	unsigned int		keepalive_time;	  /* time before keep alive takes place */
@@ -463,15 +471,17 @@ enum tsq_enum {
 	TCP_MTU_REDUCED_DEFERRED,  /* tcp_v{4|6}_err() could not call
 				    * tcp_v{4|6}_mtu_reduced()
 				    */
+	TCP_ACK_DEFERRED,	   /* TX pure ack is deferred */
 };
 
 enum tsq_flags {
-	TSQF_THROTTLED			= (1UL << TSQ_THROTTLED),
-	TSQF_QUEUED			= (1UL << TSQ_QUEUED),
-	TCPF_TSQ_DEFERRED		= (1UL << TCP_TSQ_DEFERRED),
-	TCPF_WRITE_TIMER_DEFERRED	= (1UL << TCP_WRITE_TIMER_DEFERRED),
-	TCPF_DELACK_TIMER_DEFERRED	= (1UL << TCP_DELACK_TIMER_DEFERRED),
-	TCPF_MTU_REDUCED_DEFERRED	= (1UL << TCP_MTU_REDUCED_DEFERRED),
+	TSQF_THROTTLED			= BIT(TSQ_THROTTLED),
+	TSQF_QUEUED			= BIT(TSQ_QUEUED),
+	TCPF_TSQ_DEFERRED		= BIT(TCP_TSQ_DEFERRED),
+	TCPF_WRITE_TIMER_DEFERRED	= BIT(TCP_WRITE_TIMER_DEFERRED),
+	TCPF_DELACK_TIMER_DEFERRED	= BIT(TCP_DELACK_TIMER_DEFERRED),
+	TCPF_MTU_REDUCED_DEFERRED	= BIT(TCP_MTU_REDUCED_DEFERRED),
+	TCPF_ACK_DEFERRED		= BIT(TCP_ACK_DEFERRED),
 };
 
 #define tcp_sk(ptr) container_of_const(ptr, struct tcp_sock, inet_conn.icsk_inet.sk)
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 21ae37e49319..5eb88a66eb68 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -761,7 +761,8 @@ struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name);
 void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp);
 int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
 			    u32 *fd_type, const char **buf,
-			    u64 *probe_offset, u64 *probe_addr);
+			    u64 *probe_offset, u64 *probe_addr,
+			    unsigned long *missed);
 int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
 int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
 #else
@@ -801,7 +802,7 @@ static inline void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp)
 static inline int bpf_get_perf_event_info(const struct perf_event *event,
 					  u32 *prog_id, u32 *fd_type,
 					  const char **buf, u64 *probe_offset,
-					  u64 *probe_addr)
+					  u64 *probe_addr, unsigned long *missed)
 {
 	return -EOPNOTSUPP;
 }
@@ -877,6 +878,7 @@ extern void perf_kprobe_destroy(struct perf_event *event);
 extern int bpf_get_kprobe_info(const struct perf_event *event,
 			       u32 *fd_type, const char **symbol,
 			       u64 *probe_offset, u64 *probe_addr,
+			       unsigned long *missed,
 			       bool perf_type_tracepoint);
 #endif
 #ifdef CONFIG_UPROBE_EVENTS
diff --git a/include/linux/udp.h b/include/linux/udp.h
index 43c1fb2d2c21..d04188714dca 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -32,25 +32,30 @@ static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask)
 	return (num + net_hash_mix(net)) & mask;
 }
 
+enum {
+	UDP_FLAGS_CORK,		/* Cork is required */
+	UDP_FLAGS_NO_CHECK6_TX, /* Send zero UDP6 checksums on TX? */
+	UDP_FLAGS_NO_CHECK6_RX, /* Allow zero UDP6 checksums on RX? */
+	UDP_FLAGS_GRO_ENABLED,	/* Request GRO aggregation */
+	UDP_FLAGS_ACCEPT_FRAGLIST,
+	UDP_FLAGS_ACCEPT_L4,
+	UDP_FLAGS_ENCAP_ENABLED, /* This socket enabled encap */
+	UDP_FLAGS_UDPLITE_SEND_CC, /* set via udplite setsockopt */
+	UDP_FLAGS_UDPLITE_RECV_CC, /* set via udplite setsockopt */
+};
+
 struct udp_sock {
 	/* inet_sock has to be the first member */
 	struct inet_sock inet;
 #define udp_port_hash		inet.sk.__sk_common.skc_u16hashes[0]
 #define udp_portaddr_hash	inet.sk.__sk_common.skc_u16hashes[1]
 #define udp_portaddr_node	inet.sk.__sk_common.skc_portaddr_node
+
+	unsigned long	 udp_flags;
+
 	int		 pending;	/* Any pending frames ? */
-	unsigned int	 corkflag;	/* Cork is required */
 	__u8		 encap_type;	/* Is this an Encapsulation socket? */
-	unsigned char	 no_check6_tx:1,/* Send zero UDP6 checksums on TX? */
-			 no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */
-			 encap_enabled:1, /* This socket enabled encap
-					   * processing; UDP tunnels and
-					   * different encapsulation layer set
-					   * this
-					   */
-			 gro_enabled:1,	/* Request GRO aggregation */
-			 accept_udp_l4:1,
-			 accept_udp_fraglist:1;
+
 	/*
 	 * Following member retains the information to create a UDP header
 	 * when the socket is uncorked.
@@ -62,12 +67,6 @@ struct udp_sock {
 	 */
 	__u16		 pcslen;
 	__u16		 pcrlen;
-/* indicator bits used by pcflag: */
-#define UDPLITE_BIT      0x1  		/* set by udplite proto init function */
-#define UDPLITE_SEND_CC  0x2  		/* set via udplite setsockopt         */
-#define UDPLITE_RECV_CC  0x4		/* set via udplite setsocktopt        */
-	__u8		 pcflag;        /* marks socket as UDP-Lite if > 0    */
-	__u8		 unused[3];
 	/*
 	 * For encapsulation sockets.
 	 */
@@ -95,28 +94,39 @@ struct udp_sock {
 	int		forward_threshold;
 };
 
+#define udp_test_bit(nr, sk)			\
+	test_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags)
+#define udp_set_bit(nr, sk)			\
+	set_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags)
+#define udp_test_and_set_bit(nr, sk)		\
+	test_and_set_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags)
+#define udp_clear_bit(nr, sk)			\
+	clear_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags)
+#define udp_assign_bit(nr, sk, val)		\
+	assign_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags, val)
+
 #define UDP_MAX_SEGMENTS	(1 << 6UL)
 
 #define udp_sk(ptr) container_of_const(ptr, struct udp_sock, inet.sk)
 
 static inline void udp_set_no_check6_tx(struct sock *sk, bool val)
 {
-	udp_sk(sk)->no_check6_tx = val;
+	udp_assign_bit(NO_CHECK6_TX, sk, val);
 }
 
 static inline void udp_set_no_check6_rx(struct sock *sk, bool val)
 {
-	udp_sk(sk)->no_check6_rx = val;
+	udp_assign_bit(NO_CHECK6_RX, sk, val);
 }
 
-static inline bool udp_get_no_check6_tx(struct sock *sk)
+static inline bool udp_get_no_check6_tx(const struct sock *sk)
 {
-	return udp_sk(sk)->no_check6_tx;
+	return udp_test_bit(NO_CHECK6_TX, sk);
 }
 
-static inline bool udp_get_no_check6_rx(struct sock *sk)
+static inline bool udp_get_no_check6_rx(const struct sock *sk)
 {
-	return udp_sk(sk)->no_check6_rx;
+	return udp_test_bit(NO_CHECK6_RX, sk);
 }
 
 static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk,
@@ -135,10 +145,12 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb)
 	if (!skb_is_gso(skb))
 		return false;
 
-	if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && !udp_sk(sk)->accept_udp_l4)
+	if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 &&
+	    !udp_test_bit(ACCEPT_L4, sk))
 		return true;
 
-	if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST && !udp_sk(sk)->accept_udp_fraglist)
+	if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST &&
+	    !udp_test_bit(ACCEPT_FRAGLIST, sk))
 		return true;
 
 	return false;
@@ -146,8 +158,8 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb)
 
 static inline void udp_allow_gso(struct sock *sk)
 {
-	udp_sk(sk)->accept_udp_l4 = 1;
-	udp_sk(sk)->accept_udp_fraglist = 1;
+	udp_set_bit(ACCEPT_L4, sk);
+	udp_set_bit(ACCEPT_FRAGLIST, sk);
 }
 
 #define udp_portaddr_for_each_entry(__sk, list) \
diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
index c58453699ee9..ebb3ce63d64d 100644
--- a/include/linux/virtio_vsock.h
+++ b/include/linux/virtio_vsock.h
@@ -12,6 +12,7 @@
 struct virtio_vsock_skb_cb {
 	bool reply;
 	bool tap_delivered;
+	u32 offset;
 };
 
 #define VIRTIO_VSOCK_SKB_CB(skb) ((struct virtio_vsock_skb_cb *)((skb)->cb))
@@ -159,6 +160,15 @@ struct virtio_transport {
 
 	/* Takes ownership of the packet */
 	int (*send_pkt)(struct sk_buff *skb);
+
+	/* Used in MSG_ZEROCOPY mode. Checks, that provided data
+	 * (number of buffers) could be transmitted with zerocopy
+	 * mode. If this callback is not implemented for the current
+	 * transport - this means that this transport doesn't need
+	 * extra checks and can perform zerocopy transmission by
+	 * default.
+	 */
+	bool (*can_msgzerocopy)(int bufs_num);
 };
 
 ssize_t
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 29fd1b4ee654..fad8e36e3d98 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -150,6 +150,7 @@ struct devlink_port {
 
 	struct devlink_rate *devlink_rate;
 	struct devlink_linecard *linecard;
+	u32 rel_index;
 };
 
 struct devlink_port_new_attrs {
@@ -1697,6 +1698,8 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 contro
 void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port,
 				   u32 controller, u16 pf, u32 sf,
 				   bool external);
+int devl_port_fn_devlink_set(struct devlink_port *devlink_port,
+			     struct devlink *fn_devlink);
 struct devlink_rate *
 devl_rate_node_create(struct devlink *devlink, void *priv, char *node_name,
 		      struct devlink_rate *parent);
@@ -1717,8 +1720,8 @@ void devlink_linecard_provision_clear(struct devlink_linecard *linecard);
 void devlink_linecard_provision_fail(struct devlink_linecard *linecard);
 void devlink_linecard_activate(struct devlink_linecard *linecard);
 void devlink_linecard_deactivate(struct devlink_linecard *linecard);
-void devlink_linecard_nested_dl_set(struct devlink_linecard *linecard,
-				    struct devlink *nested_devlink);
+int devlink_linecard_nested_dl_set(struct devlink_linecard *linecard,
+				   struct devlink *nested_devlink);
 int devl_sb_register(struct devlink *devlink, unsigned int sb_index,
 		     u32 size, u16 ingress_pools_count,
 		     u16 egress_pools_count, u16 ingress_tc_count,
@@ -1918,6 +1921,8 @@ devlink_health_reporter_state_update(struct devlink_health_reporter *reporter,
 void
 devlink_health_reporter_recovery_done(struct devlink_health_reporter *reporter);
 
+int devl_nested_devlink_set(struct devlink *devlink,
+			    struct devlink *nested_devlink);
 bool devlink_is_reload_failed(const struct devlink *devlink);
 void devlink_remote_reload_actions_performed(struct devlink *devlink,
 					     enum devlink_reload_limit limit,
diff --git a/include/net/dst.h b/include/net/dst.h
index 78884429deed..f8b8599a0600 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -392,10 +392,10 @@ static inline int dst_discard(struct sk_buff *skb)
 {
 	return dst_discard_out(&init_net, skb->sk, skb);
 }
-void *dst_alloc(struct dst_ops *ops, struct net_device *dev, int initial_ref,
+void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
 		int initial_obsolete, unsigned short flags);
 void dst_init(struct dst_entry *dst, struct dst_ops *ops,
-	      struct net_device *dev, int initial_ref, int initial_obsolete,
+	      struct net_device *dev, int initial_obsolete,
 	      unsigned short flags);
 struct dst_entry *dst_destroy(struct dst_entry *dst);
 void dst_dev_put(struct dst_entry *dst);
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 2de0e4d4a027..98e11958cdff 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -268,6 +268,16 @@ enum {
 	INET_FLAGS_NODEFRAG	= 17,
 	INET_FLAGS_BIND_ADDRESS_NO_PORT = 18,
 	INET_FLAGS_DEFER_CONNECT = 19,
+	INET_FLAGS_MC6_LOOP	= 20,
+	INET_FLAGS_RECVERR6_RFC4884 = 21,
+	INET_FLAGS_MC6_ALL	= 22,
+	INET_FLAGS_AUTOFLOWLABEL_SET = 23,
+	INET_FLAGS_AUTOFLOWLABEL = 24,
+	INET_FLAGS_DONTFRAG	= 25,
+	INET_FLAGS_RECVERR6	= 26,
+	INET_FLAGS_REPFLOW	= 27,
+	INET_FLAGS_RTALERT_ISOLATE = 28,
+	INET_FLAGS_SNDFLOW	= 29,
 };
 
 /* cmsg flags for inet */
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index b32539bb0fb0..28b065790261 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -53,13 +53,12 @@ struct route_info {
  */
 static inline int rt6_srcprefs2flags(unsigned int srcprefs)
 {
-	/* No need to bitmask because srcprefs have only 3 bits. */
-	return srcprefs << 3;
+	return (srcprefs & IPV6_PREFER_SRC_MASK) << 3;
 }
 
 static inline unsigned int rt6_flags2srcprefs(int flags)
 {
-	return (flags >> 3) & 7;
+	return (flags >> 3) & IPV6_PREFER_SRC_MASK;
 }
 
 static inline bool rt6_need_strict(const struct in6_addr *daddr)
@@ -266,7 +265,7 @@ static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb)
 	const struct dst_entry *dst = skb_dst(skb);
 	unsigned int mtu;
 
-	if (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) {
+	if (np && READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE) {
 		mtu = READ_ONCE(dst->dev->mtu);
 		mtu -= lwtunnel_headroom(dst->lwtstate, mtu);
 	} else {
@@ -277,14 +276,18 @@ static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb)
 
 static inline bool ip6_sk_accept_pmtu(const struct sock *sk)
 {
-	return inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_INTERFACE &&
-	       inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_OMIT;
+	u8 pmtudisc = READ_ONCE(inet6_sk(sk)->pmtudisc);
+
+	return pmtudisc != IPV6_PMTUDISC_INTERFACE &&
+	       pmtudisc != IPV6_PMTUDISC_OMIT;
 }
 
 static inline bool ip6_sk_ignore_df(const struct sock *sk)
 {
-	return inet6_sk(sk)->pmtudisc < IPV6_PMTUDISC_DO ||
-	       inet6_sk(sk)->pmtudisc == IPV6_PMTUDISC_OMIT;
+	u8 pmtudisc = READ_ONCE(inet6_sk(sk)->pmtudisc);
+
+	return pmtudisc < IPV6_PMTUDISC_DO ||
+	       pmtudisc == IPV6_PMTUDISC_OMIT;
 }
 
 static inline const struct in6_addr *rt6_nexthop(const struct rt6_info *rt,
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index c6932d1a3fa8..b3444c8a6f74 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -373,12 +373,12 @@ static inline void ipcm6_init(struct ipcm6_cookie *ipc6)
 }
 
 static inline void ipcm6_init_sk(struct ipcm6_cookie *ipc6,
-				 const struct ipv6_pinfo *np)
+				 const struct sock *sk)
 {
 	*ipc6 = (struct ipcm6_cookie) {
 		.hlimit = -1,
-		.tclass = np->tclass,
-		.dontfrag = np->dontfrag,
+		.tclass = inet6_sk(sk)->tclass,
+		.dontfrag = inet6_test_bit(DONTFRAG, sk),
 	};
 }
 
@@ -428,7 +428,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq,
 			   int flags);
 int ip6_flowlabel_init(void);
 void ip6_flowlabel_cleanup(void);
-bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np);
+bool ip6_autoflowlabel(struct net *net, const struct sock *sk);
 
 static inline void fl6_sock_release(struct ip6_flowlabel *fl)
 {
@@ -914,9 +914,9 @@ static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6,
 	int hlimit;
 
 	if (ipv6_addr_is_multicast(&fl6->daddr))
-		hlimit = np->mcast_hops;
+		hlimit = READ_ONCE(np->mcast_hops);
 	else
-		hlimit = np->hop_limit;
+		hlimit = READ_ONCE(np->hop_limit);
 	if (hlimit < 0)
 		hlimit = ip6_dst_hoplimit(dst);
 	return hlimit;
@@ -1303,15 +1303,16 @@ static inline int ip6_sock_set_v6only(struct sock *sk)
 
 static inline void ip6_sock_set_recverr(struct sock *sk)
 {
-	lock_sock(sk);
-	inet6_sk(sk)->recverr = true;
-	release_sock(sk);
+	inet6_set_bit(RECVERR6, sk);
 }
 
-static inline int __ip6_sock_set_addr_preferences(struct sock *sk, int val)
+#define IPV6_PREFER_SRC_MASK (IPV6_PREFER_SRC_TMP | IPV6_PREFER_SRC_PUBLIC | \
+			      IPV6_PREFER_SRC_COA)
+
+static inline int ip6_sock_set_addr_preferences(struct sock *sk, int val)
 {
+	unsigned int prefmask = ~IPV6_PREFER_SRC_MASK;
 	unsigned int pref = 0;
-	unsigned int prefmask = ~0;
 
 	/* check PUBLIC/TMP/PUBTMP_DEFAULT conflicts */
 	switch (val & (IPV6_PREFER_SRC_PUBLIC |
@@ -1361,20 +1362,11 @@ static inline int __ip6_sock_set_addr_preferences(struct sock *sk, int val)
 		return -EINVAL;
 	}
 
-	inet6_sk(sk)->srcprefs = (inet6_sk(sk)->srcprefs & prefmask) | pref;
+	WRITE_ONCE(inet6_sk(sk)->srcprefs,
+		   (READ_ONCE(inet6_sk(sk)->srcprefs) & prefmask) | pref);
 	return 0;
 }
 
-static inline int ip6_sock_set_addr_preferences(struct sock *sk, int val)
-{
-	int ret;
-
-	lock_sock(sk);
-	ret = __ip6_sock_set_addr_preferences(sk, val);
-	release_sock(sk);
-	return ret;
-}
-
 static inline void ip6_sock_set_recvpktinfo(struct sock *sk)
 {
 	lock_sock(sk);
diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
index c48186bf4737..21da31e1dff5 100644
--- a/include/net/ipv6_stubs.h
+++ b/include/net/ipv6_stubs.h
@@ -85,6 +85,11 @@ struct ipv6_bpf_stub {
 			       sockptr_t optval, unsigned int optlen);
 	int (*ipv6_getsockopt)(struct sock *sk, int level, int optname,
 			       sockptr_t optval, sockptr_t optlen);
+	int (*ipv6_dev_get_saddr)(struct net *net,
+				  const struct net_device *dst_dev,
+				  const struct in6_addr *daddr,
+				  unsigned int prefs,
+				  struct in6_addr *saddr);
 };
 extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
 
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 7a41c4791536..d96d05b08819 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -132,6 +132,7 @@ struct netns_ipv4 {
 	u8 sysctl_tcp_syncookies;
 	u8 sysctl_tcp_migrate_req;
 	u8 sysctl_tcp_comp_sack_nr;
+	u8 sysctl_tcp_backlog_ack_defer;
 	int sysctl_tcp_reordering;
 	u8 sysctl_tcp_retries1;
 	u8 sysctl_tcp_retries2;
diff --git a/include/net/sock.h b/include/net/sock.h
index 92f7ea62a915..3f2ecef884f8 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1821,12 +1821,11 @@ static inline bool sock_owned_by_user_nocheck(const struct sock *sk)
 
 static inline void sock_release_ownership(struct sock *sk)
 {
-	if (sock_owned_by_user_nocheck(sk)) {
-		sk->sk_lock.owned = 0;
+	DEBUG_NET_WARN_ON_ONCE(!sock_owned_by_user_nocheck(sk));
+	sk->sk_lock.owned = 0;
 
-		/* The sk_lock has mutex_unlock() semantics: */
-		mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
-	}
+	/* The sk_lock has mutex_unlock() semantics: */
+	mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
 }
 
 /* no reclassification while locks are held */
@@ -2237,7 +2236,7 @@ static inline void sock_confirm_neigh(struct sk_buff *skb, struct neighbour *n)
 	}
 }
 
-bool sk_mc_loop(struct sock *sk);
+bool sk_mc_loop(const struct sock *sk);
 
 static inline bool sk_can_gso(const struct sock *sk)
 {
diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index 0ca9b7a11baf..29251c3519cf 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -174,16 +174,13 @@ static inline int udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum)
 }
 #endif
 
-static inline void udp_tunnel_encap_enable(struct socket *sock)
+static inline void udp_tunnel_encap_enable(struct sock *sk)
 {
-	struct udp_sock *up = udp_sk(sock->sk);
-
-	if (up->encap_enabled)
+	if (udp_test_and_set_bit(ENCAP_ENABLED, sk))
 		return;
 
-	up->encap_enabled = 1;
 #if IS_ENABLED(CONFIG_IPV6)
-	if (sock->sk->sk_family == PF_INET6)
+	if (READ_ONCE(sk->sk_family) == PF_INET6)
 		ipv6_stub->udpv6_encap_enable();
 #endif
 	udp_encap_enable();
diff --git a/include/net/udplite.h b/include/net/udplite.h
index bd33ff2b8f42..786919d29f8d 100644
--- a/include/net/udplite.h
+++ b/include/net/udplite.h
@@ -66,14 +66,18 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh)
 /* Fast-path computation of checksum. Socket may not be locked. */
 static inline __wsum udplite_csum(struct sk_buff *skb)
 {
-	const struct udp_sock *up = udp_sk(skb->sk);
 	const int off = skb_transport_offset(skb);
+	const struct sock *sk = skb->sk;
 	int len = skb->len - off;
 
-	if ((up->pcflag & UDPLITE_SEND_CC) && up->pcslen < len) {
-		if (0 < up->pcslen)
-			len = up->pcslen;
-		udp_hdr(skb)->len = htons(up->pcslen);
+	if (udp_test_bit(UDPLITE_SEND_CC, sk)) {
+		u16 pcslen = READ_ONCE(udp_sk(sk)->pcslen);
+
+		if (pcslen < len) {
+			if (pcslen > 0)
+				len = pcslen;
+			udp_hdr(skb)->len = htons(pcslen);
+		}
 	}
 	skb->ip_summed = CHECKSUM_NONE;     /* no HW support for checksumming */
 
diff --git a/include/net/xdp.h b/include/net/xdp.h
index de08c8e0d134..349c36fb5fd8 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -383,14 +383,25 @@ void xdp_attachment_setup(struct xdp_attachment_info *info,
 
 #define DEV_MAP_BULK_SIZE XDP_BULK_QUEUE_SIZE
 
+/* Define the relationship between xdp-rx-metadata kfunc and
+ * various other entities:
+ * - xdp_rx_metadata enum
+ * - netdev netlink enum (Documentation/netlink/specs/netdev.yaml)
+ * - kfunc name
+ * - xdp_metadata_ops field
+ */
 #define XDP_METADATA_KFUNC_xxx	\
 	XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_TIMESTAMP, \
-			   bpf_xdp_metadata_rx_timestamp) \
+			   NETDEV_XDP_RX_METADATA_TIMESTAMP, \
+			   bpf_xdp_metadata_rx_timestamp, \
+			   xmo_rx_timestamp) \
 	XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_HASH, \
-			   bpf_xdp_metadata_rx_hash) \
+			   NETDEV_XDP_RX_METADATA_HASH, \
+			   bpf_xdp_metadata_rx_hash, \
+			   xmo_rx_hash) \
 
-enum {
-#define XDP_METADATA_KFUNC(name, _) name,
+enum xdp_rx_metadata {
+#define XDP_METADATA_KFUNC(name, _, __, ___) name,
 XDP_METADATA_KFUNC_xxx
 #undef XDP_METADATA_KFUNC
 MAX_XDP_METADATA_KFUNC,
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 1617af380162..69b472604b86 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -14,6 +14,8 @@
 #include <linux/mm.h>
 #include <net/sock.h>
 
+#define XDP_UMEM_SG_FLAG (1 << 1)
+
 struct net_device;
 struct xsk_queue;
 struct xdp_buff;
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 363c7d510554..98d7aa78adda 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -2166,7 +2166,7 @@ static inline bool xfrm6_local_dontfrag(const struct sock *sk)
 
 	proto = sk->sk_protocol;
 	if (proto == IPPROTO_UDP || proto == IPPROTO_RAW)
-		return inet6_sk(sk)->dontfrag;
+		return inet6_test_bit(DONTFRAG, sk);
 
 	return false;
 }
diff --git a/include/trace/events/vsock_virtio_transport_common.h b/include/trace/events/vsock_virtio_transport_common.h
index d0b3f0ea9ba1..f1ebe36787c3 100644
--- a/include/trace/events/vsock_virtio_transport_common.h
+++ b/include/trace/events/vsock_virtio_transport_common.h
@@ -43,7 +43,8 @@ TRACE_EVENT(virtio_transport_alloc_pkt,
 		 __u32 len,
 		 __u16 type,
 		 __u16 op,
-		 __u32 flags
+		 __u32 flags,
+		 bool zcopy
 	),
 	TP_ARGS(
 		src_cid, src_port,
@@ -51,7 +52,8 @@ TRACE_EVENT(virtio_transport_alloc_pkt,
 		len,
 		type,
 		op,
-		flags
+		flags,
+		zcopy
 	),
 	TP_STRUCT__entry(
 		__field(__u32, src_cid)
@@ -62,6 +64,7 @@ TRACE_EVENT(virtio_transport_alloc_pkt,
 		__field(__u16, type)
 		__field(__u16, op)
 		__field(__u32, flags)
+		__field(bool, zcopy)
 	),
 	TP_fast_assign(
 		__entry->src_cid = src_cid;
@@ -72,14 +75,15 @@ TRACE_EVENT(virtio_transport_alloc_pkt,
 		__entry->type = type;
 		__entry->op = op;
 		__entry->flags = flags;
+		__entry->zcopy = zcopy;
 	),
-	TP_printk("%u:%u -> %u:%u len=%u type=%s op=%s flags=%#x",
+	TP_printk("%u:%u -> %u:%u len=%u type=%s op=%s flags=%#x zcopy=%s",
 		  __entry->src_cid, __entry->src_port,
 		  __entry->dst_cid, __entry->dst_port,
 		  __entry->len,
 		  show_type(__entry->type),
 		  show_op(__entry->op),
-		  __entry->flags)
+		  __entry->flags, __entry->zcopy ? "true" : "false")
 );
 
 TRACE_EVENT(virtio_transport_recv_pkt,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0448700890f7..7ba61b75bc0e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -932,7 +932,14 @@ enum bpf_map_type {
 	 */
 	BPF_MAP_TYPE_CGROUP_STORAGE = BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED,
 	BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
-	BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+	BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED,
+	/* BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE is available to bpf programs
+	 * attaching to a cgroup. The new mechanism (BPF_MAP_TYPE_CGRP_STORAGE +
+	 * local percpu kptr) supports all BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE
+	 * functionality and more. So mark * BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE
+	 * deprecated.
+	 */
+	BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE = BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED,
 	BPF_MAP_TYPE_QUEUE,
 	BPF_MAP_TYPE_STACK,
 	BPF_MAP_TYPE_SK_STORAGE,
@@ -1040,6 +1047,11 @@ enum bpf_attach_type {
 	BPF_TCX_INGRESS,
 	BPF_TCX_EGRESS,
 	BPF_TRACE_UPROBE_MULTI,
+	BPF_CGROUP_UNIX_CONNECT,
+	BPF_CGROUP_UNIX_SENDMSG,
+	BPF_CGROUP_UNIX_RECVMSG,
+	BPF_CGROUP_UNIX_GETPEERNAME,
+	BPF_CGROUP_UNIX_GETSOCKNAME,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -2697,8 +2709,8 @@ union bpf_attr {
  * 		*bpf_socket* should be one of the following:
  *
  * 		* **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
- * 		* **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
- * 		  and **BPF_CGROUP_INET6_CONNECT**.
+ *		* **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**,
+ *		  **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**.
  *
  * 		This helper actually implements a subset of **setsockopt()**.
  * 		It supports the following *level*\ s:
@@ -2936,8 +2948,8 @@ union bpf_attr {
  * 		*bpf_socket* should be one of the following:
  *
  * 		* **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
- * 		* **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
- * 		  and **BPF_CGROUP_INET6_CONNECT**.
+ *		* **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**,
+ *		  **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**.
  *
  * 		This helper actually implements a subset of **getsockopt()**.
  * 		It supports the same set of *optname*\ s that is supported by
@@ -3257,6 +3269,11 @@ union bpf_attr {
  *			and *params*->smac will not be set as output. A common
  *			use case is to call **bpf_redirect_neigh**\ () after
  *			doing **bpf_fib_lookup**\ ().
+ *		**BPF_FIB_LOOKUP_SRC**
+ *			Derive and set source IP addr in *params*->ipv{4,6}_src
+ *			for the nexthop. If the src addr cannot be derived,
+ *			**BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this
+ *			case, *params*->dmac and *params*->smac are not set either.
  *
  *		*ctx* is either **struct xdp_md** for XDP programs or
  *		**struct sk_buff** tc cls_act programs.
@@ -5089,6 +5106,8 @@ union bpf_attr {
  *		**BPF_F_TIMER_ABS**
  *			Start the timer in absolute expire value instead of the
  *			default relative one.
+ *		**BPF_F_TIMER_CPU_PIN**
+ *			Timer will be pinned to the CPU of the caller.
  *
  *	Return
  *		0 on success.
@@ -6525,6 +6544,7 @@ struct bpf_link_info {
 			__aligned_u64 addrs;
 			__u32 count; /* in/out: kprobe_multi function count */
 			__u32 flags;
+			__u64 missed;
 		} kprobe_multi;
 		struct {
 			__u32 type; /* enum bpf_perf_event_type */
@@ -6540,6 +6560,7 @@ struct bpf_link_info {
 					__u32 name_len;
 					__u32 offset; /* offset from func_name */
 					__u64 addr;
+					__u64 missed;
 				} kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */
 				struct {
 					__aligned_u64 tp_name;   /* in/out */
@@ -6953,6 +6974,7 @@ enum {
 	BPF_FIB_LOOKUP_OUTPUT  = (1U << 1),
 	BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
 	BPF_FIB_LOOKUP_TBID    = (1U << 3),
+	BPF_FIB_LOOKUP_SRC     = (1U << 4),
 };
 
 enum {
@@ -6965,6 +6987,7 @@ enum {
 	BPF_FIB_LKUP_RET_UNSUPP_LWT,   /* fwd requires encapsulation */
 	BPF_FIB_LKUP_RET_NO_NEIGH,     /* no neighbor entry for nh */
 	BPF_FIB_LKUP_RET_FRAG_NEEDED,  /* fragmentation required to fwd */
+	BPF_FIB_LKUP_RET_NO_SRC_ADDR,  /* failed to derive IP src addr */
 };
 
 struct bpf_fib_lookup {
@@ -6999,6 +7022,9 @@ struct bpf_fib_lookup {
 		__u32	rt_metric;
 	};
 
+	/* input: source address to consider for lookup
+	 * output: source address result from lookup
+	 */
 	union {
 		__be32		ipv4_src;
 		__u32		ipv6_src[4];  /* in6_addr; network order */
@@ -7300,9 +7326,11 @@ struct bpf_core_relo {
  * Flags to control bpf_timer_start() behaviour.
  *     - BPF_F_TIMER_ABS: Timeout passed is absolute time, by default it is
  *       relative to current time.
+ *     - BPF_F_TIMER_CPU_PIN: Timer will be pinned to the CPU of the caller.
  */
 enum {
 	BPF_F_TIMER_ABS = (1ULL << 0),
+	BPF_F_TIMER_CPU_PIN = (1ULL << 1),
 };
 
 /* BPF numbers iterator state */
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 03875e078be8..cd4b82458d1b 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -680,6 +680,7 @@ enum devlink_port_function_attr {
 	DEVLINK_PORT_FN_ATTR_STATE,	/* u8 */
 	DEVLINK_PORT_FN_ATTR_OPSTATE,	/* u8 */
 	DEVLINK_PORT_FN_ATTR_CAPS,	/* bitfield32 */
+	DEVLINK_PORT_FN_ATTR_DEVLINK,	/* nested */
 
 	__DEVLINK_PORT_FUNCTION_ATTR_MAX,
 	DEVLINK_PORT_FUNCTION_ATTR_MAX = __DEVLINK_PORT_FUNCTION_ATTR_MAX - 1
diff --git a/include/uapi/linux/dpll.h b/include/uapi/linux/dpll.h
new file mode 100644
index 000000000000..20ef0718f8dc
--- /dev/null
+++ b/include/uapi/linux/dpll.h
@@ -0,0 +1,201 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* Do not edit directly, auto-generated from: */
+/*	Documentation/netlink/specs/dpll.yaml */
+/* YNL-GEN uapi header */
+
+#ifndef _UAPI_LINUX_DPLL_H
+#define _UAPI_LINUX_DPLL_H
+
+#define DPLL_FAMILY_NAME	"dpll"
+#define DPLL_FAMILY_VERSION	1
+
+/**
+ * enum dpll_mode - working modes a dpll can support, differentiates if and how
+ *   dpll selects one of its inputs to syntonize with it, valid values for
+ *   DPLL_A_MODE attribute
+ * @DPLL_MODE_MANUAL: input can be only selected by sending a request to dpll
+ * @DPLL_MODE_AUTOMATIC: highest prio input pin auto selected by dpll
+ */
+enum dpll_mode {
+	DPLL_MODE_MANUAL = 1,
+	DPLL_MODE_AUTOMATIC,
+
+	/* private: */
+	__DPLL_MODE_MAX,
+	DPLL_MODE_MAX = (__DPLL_MODE_MAX - 1)
+};
+
+/**
+ * enum dpll_lock_status - provides information of dpll device lock status,
+ *   valid values for DPLL_A_LOCK_STATUS attribute
+ * @DPLL_LOCK_STATUS_UNLOCKED: dpll was not yet locked to any valid input (or
+ *   forced by setting DPLL_A_MODE to DPLL_MODE_DETACHED)
+ * @DPLL_LOCK_STATUS_LOCKED: dpll is locked to a valid signal, but no holdover
+ *   available
+ * @DPLL_LOCK_STATUS_LOCKED_HO_ACQ: dpll is locked and holdover acquired
+ * @DPLL_LOCK_STATUS_HOLDOVER: dpll is in holdover state - lost a valid lock or
+ *   was forced by disconnecting all the pins (latter possible only when dpll
+ *   lock-state was already DPLL_LOCK_STATUS_LOCKED_HO_ACQ, if dpll lock-state
+ *   was not DPLL_LOCK_STATUS_LOCKED_HO_ACQ, the dpll's lock-state shall remain
+ *   DPLL_LOCK_STATUS_UNLOCKED)
+ */
+enum dpll_lock_status {
+	DPLL_LOCK_STATUS_UNLOCKED = 1,
+	DPLL_LOCK_STATUS_LOCKED,
+	DPLL_LOCK_STATUS_LOCKED_HO_ACQ,
+	DPLL_LOCK_STATUS_HOLDOVER,
+
+	/* private: */
+	__DPLL_LOCK_STATUS_MAX,
+	DPLL_LOCK_STATUS_MAX = (__DPLL_LOCK_STATUS_MAX - 1)
+};
+
+#define DPLL_TEMP_DIVIDER	1000
+
+/**
+ * enum dpll_type - type of dpll, valid values for DPLL_A_TYPE attribute
+ * @DPLL_TYPE_PPS: dpll produces Pulse-Per-Second signal
+ * @DPLL_TYPE_EEC: dpll drives the Ethernet Equipment Clock
+ */
+enum dpll_type {
+	DPLL_TYPE_PPS = 1,
+	DPLL_TYPE_EEC,
+
+	/* private: */
+	__DPLL_TYPE_MAX,
+	DPLL_TYPE_MAX = (__DPLL_TYPE_MAX - 1)
+};
+
+/**
+ * enum dpll_pin_type - defines possible types of a pin, valid values for
+ *   DPLL_A_PIN_TYPE attribute
+ * @DPLL_PIN_TYPE_MUX: aggregates another layer of selectable pins
+ * @DPLL_PIN_TYPE_EXT: external input
+ * @DPLL_PIN_TYPE_SYNCE_ETH_PORT: ethernet port PHY's recovered clock
+ * @DPLL_PIN_TYPE_INT_OSCILLATOR: device internal oscillator
+ * @DPLL_PIN_TYPE_GNSS: GNSS recovered clock
+ */
+enum dpll_pin_type {
+	DPLL_PIN_TYPE_MUX = 1,
+	DPLL_PIN_TYPE_EXT,
+	DPLL_PIN_TYPE_SYNCE_ETH_PORT,
+	DPLL_PIN_TYPE_INT_OSCILLATOR,
+	DPLL_PIN_TYPE_GNSS,
+
+	/* private: */
+	__DPLL_PIN_TYPE_MAX,
+	DPLL_PIN_TYPE_MAX = (__DPLL_PIN_TYPE_MAX - 1)
+};
+
+/**
+ * enum dpll_pin_direction - defines possible direction of a pin, valid values
+ *   for DPLL_A_PIN_DIRECTION attribute
+ * @DPLL_PIN_DIRECTION_INPUT: pin used as a input of a signal
+ * @DPLL_PIN_DIRECTION_OUTPUT: pin used to output the signal
+ */
+enum dpll_pin_direction {
+	DPLL_PIN_DIRECTION_INPUT = 1,
+	DPLL_PIN_DIRECTION_OUTPUT,
+
+	/* private: */
+	__DPLL_PIN_DIRECTION_MAX,
+	DPLL_PIN_DIRECTION_MAX = (__DPLL_PIN_DIRECTION_MAX - 1)
+};
+
+#define DPLL_PIN_FREQUENCY_1_HZ		1
+#define DPLL_PIN_FREQUENCY_10_KHZ	10000
+#define DPLL_PIN_FREQUENCY_77_5_KHZ	77500
+#define DPLL_PIN_FREQUENCY_10_MHZ	10000000
+
+/**
+ * enum dpll_pin_state - defines possible states of a pin, valid values for
+ *   DPLL_A_PIN_STATE attribute
+ * @DPLL_PIN_STATE_CONNECTED: pin connected, active input of phase locked loop
+ * @DPLL_PIN_STATE_DISCONNECTED: pin disconnected, not considered as a valid
+ *   input
+ * @DPLL_PIN_STATE_SELECTABLE: pin enabled for automatic input selection
+ */
+enum dpll_pin_state {
+	DPLL_PIN_STATE_CONNECTED = 1,
+	DPLL_PIN_STATE_DISCONNECTED,
+	DPLL_PIN_STATE_SELECTABLE,
+
+	/* private: */
+	__DPLL_PIN_STATE_MAX,
+	DPLL_PIN_STATE_MAX = (__DPLL_PIN_STATE_MAX - 1)
+};
+
+/**
+ * enum dpll_pin_capabilities - defines possible capabilities of a pin, valid
+ *   flags on DPLL_A_PIN_CAPABILITIES attribute
+ * @DPLL_PIN_CAPABILITIES_DIRECTION_CAN_CHANGE: pin direction can be changed
+ * @DPLL_PIN_CAPABILITIES_PRIORITY_CAN_CHANGE: pin priority can be changed
+ * @DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE: pin state can be changed
+ */
+enum dpll_pin_capabilities {
+	DPLL_PIN_CAPABILITIES_DIRECTION_CAN_CHANGE = 1,
+	DPLL_PIN_CAPABILITIES_PRIORITY_CAN_CHANGE = 2,
+	DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE = 4,
+};
+
+enum dpll_a {
+	DPLL_A_ID = 1,
+	DPLL_A_MODULE_NAME,
+	DPLL_A_PAD,
+	DPLL_A_CLOCK_ID,
+	DPLL_A_MODE,
+	DPLL_A_MODE_SUPPORTED,
+	DPLL_A_LOCK_STATUS,
+	DPLL_A_TEMP,
+	DPLL_A_TYPE,
+
+	__DPLL_A_MAX,
+	DPLL_A_MAX = (__DPLL_A_MAX - 1)
+};
+
+enum dpll_a_pin {
+	DPLL_A_PIN_ID = 1,
+	DPLL_A_PIN_PARENT_ID,
+	DPLL_A_PIN_MODULE_NAME,
+	DPLL_A_PIN_PAD,
+	DPLL_A_PIN_CLOCK_ID,
+	DPLL_A_PIN_BOARD_LABEL,
+	DPLL_A_PIN_PANEL_LABEL,
+	DPLL_A_PIN_PACKAGE_LABEL,
+	DPLL_A_PIN_TYPE,
+	DPLL_A_PIN_DIRECTION,
+	DPLL_A_PIN_FREQUENCY,
+	DPLL_A_PIN_FREQUENCY_SUPPORTED,
+	DPLL_A_PIN_FREQUENCY_MIN,
+	DPLL_A_PIN_FREQUENCY_MAX,
+	DPLL_A_PIN_PRIO,
+	DPLL_A_PIN_STATE,
+	DPLL_A_PIN_CAPABILITIES,
+	DPLL_A_PIN_PARENT_DEVICE,
+	DPLL_A_PIN_PARENT_PIN,
+
+	__DPLL_A_PIN_MAX,
+	DPLL_A_PIN_MAX = (__DPLL_A_PIN_MAX - 1)
+};
+
+enum dpll_cmd {
+	DPLL_CMD_DEVICE_ID_GET = 1,
+	DPLL_CMD_DEVICE_GET,
+	DPLL_CMD_DEVICE_SET,
+	DPLL_CMD_DEVICE_CREATE_NTF,
+	DPLL_CMD_DEVICE_DELETE_NTF,
+	DPLL_CMD_DEVICE_CHANGE_NTF,
+	DPLL_CMD_PIN_ID_GET,
+	DPLL_CMD_PIN_GET,
+	DPLL_CMD_PIN_SET,
+	DPLL_CMD_PIN_CREATE_NTF,
+	DPLL_CMD_PIN_DELETE_NTF,
+	DPLL_CMD_PIN_CHANGE_NTF,
+
+	__DPLL_CMD_MAX,
+	DPLL_CMD_MAX = (__DPLL_CMD_MAX - 1)
+};
+
+#define DPLL_MCGRP_MONITOR	"monitor"
+
+#endif /* _UAPI_LINUX_DPLL_H */
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index ce3117df9cec..fac351a93aed 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -376,7 +376,7 @@ enum {
 
 	IFLA_GSO_IPV4_MAX_SIZE,
 	IFLA_GRO_IPV4_MAX_SIZE,
-
+	IFLA_DPLL_PIN,
 	__IFLA_MAX
 };
 
diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h
index c1634b95c223..2943a151d4f1 100644
--- a/include/uapi/linux/netdev.h
+++ b/include/uapi/linux/netdev.h
@@ -38,11 +38,27 @@ enum netdev_xdp_act {
 	NETDEV_XDP_ACT_MASK = 127,
 };
 
+/**
+ * enum netdev_xdp_rx_metadata
+ * @NETDEV_XDP_RX_METADATA_TIMESTAMP: Device is capable of exposing receive HW
+ *   timestamp via bpf_xdp_metadata_rx_timestamp().
+ * @NETDEV_XDP_RX_METADATA_HASH: Device is capable of exposing receive packet
+ *   hash via bpf_xdp_metadata_rx_hash().
+ */
+enum netdev_xdp_rx_metadata {
+	NETDEV_XDP_RX_METADATA_TIMESTAMP = 1,
+	NETDEV_XDP_RX_METADATA_HASH = 2,
+
+	/* private: */
+	NETDEV_XDP_RX_METADATA_MASK = 3,
+};
+
 enum {
 	NETDEV_A_DEV_IFINDEX = 1,
 	NETDEV_A_DEV_PAD,
 	NETDEV_A_DEV_XDP_FEATURES,
 	NETDEV_A_DEV_XDP_ZC_MAX_SEGS,
+	NETDEV_A_DEV_XDP_RX_METADATA_FEATURES,
 
 	__NETDEV_A_DEV_MAX,
 	NETDEV_A_DEV_MAX = (__NETDEV_A_DEV_MAX - 1)
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 879eeb0a084b..d1d08da6331a 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -289,6 +289,18 @@ struct tcp_info {
 				      */
 
 	__u32   tcpi_rehash;         /* PLB or timeout triggered rehash attempts */
+
+	__u16	tcpi_total_rto;	/* Total number of RTO timeouts, including
+				 * SYN/SYN-ACK and recurring timeouts.
+				 */
+	__u16	tcpi_total_rto_recoveries;	/* Total number of RTO
+						 * recoveries, including any
+						 * unfinished recovery.
+						 */
+	__u32	tcpi_total_rto_time;	/* Total time spent in RTO recoveries
+					 * in milliseconds, including any
+					 * unfinished recovery.
+					 */
 };
 
 /* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 96856f130cbf..833faa04461b 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -793,8 +793,6 @@ __bpf_kfunc int bpf_iter_num_new(struct bpf_iter_num *it, int start, int end)
 	BUILD_BUG_ON(sizeof(struct bpf_iter_num_kern) != sizeof(struct bpf_iter_num));
 	BUILD_BUG_ON(__alignof__(struct bpf_iter_num_kern) != __alignof__(struct bpf_iter_num));
 
-	BTF_TYPE_EMIT(struct btf_iter_num);
-
 	/* start == end is legit, it's an empty range and we'll just get NULL
 	 * on first (and any subsequent) bpf_iter_num_next() call
 	 */
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index fdc3e8705a3c..db6176fb64dc 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -615,7 +615,10 @@ static void __bpf_struct_ops_map_free(struct bpf_map *map)
 	if (st_map->links)
 		bpf_struct_ops_map_put_progs(st_map);
 	bpf_map_area_free(st_map->links);
-	bpf_jit_free_exec(st_map->image);
+	if (st_map->image) {
+		bpf_jit_free_exec(st_map->image);
+		bpf_jit_uncharge_modmem(PAGE_SIZE);
+	}
 	bpf_map_area_free(st_map->uvalue);
 	bpf_map_area_free(st_map);
 }
@@ -657,6 +660,7 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
 	struct bpf_struct_ops_map *st_map;
 	const struct btf_type *t, *vt;
 	struct bpf_map *map;
+	int ret;
 
 	st_ops = bpf_struct_ops_find_value(attr->btf_vmlinux_value_type_id);
 	if (!st_ops)
@@ -681,12 +685,27 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
 	st_map->st_ops = st_ops;
 	map = &st_map->map;
 
+	ret = bpf_jit_charge_modmem(PAGE_SIZE);
+	if (ret) {
+		__bpf_struct_ops_map_free(map);
+		return ERR_PTR(ret);
+	}
+
+	st_map->image = bpf_jit_alloc_exec(PAGE_SIZE);
+	if (!st_map->image) {
+		/* __bpf_struct_ops_map_free() uses st_map->image as flag
+		 * for "charged or not". In this case, we need to unchange
+		 * here.
+		 */
+		bpf_jit_uncharge_modmem(PAGE_SIZE);
+		__bpf_struct_ops_map_free(map);
+		return ERR_PTR(-ENOMEM);
+	}
 	st_map->uvalue = bpf_map_area_alloc(vt->size, NUMA_NO_NODE);
 	st_map->links =
 		bpf_map_area_alloc(btf_type_vlen(t) * sizeof(struct bpf_links *),
 				   NUMA_NO_NODE);
-	st_map->image = bpf_jit_alloc_exec(PAGE_SIZE);
-	if (!st_map->uvalue || !st_map->links || !st_map->image) {
+	if (!st_map->uvalue || !st_map->links) {
 		__bpf_struct_ops_map_free(map);
 		return ERR_PTR(-ENOMEM);
 	}
@@ -907,4 +926,3 @@ err_out:
 	kfree(link);
 	return err;
 }
-
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 8090d7fb11ef..15d71d2986d3 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3293,6 +3293,8 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t,
 		type = BPF_KPTR_UNREF;
 	else if (!strcmp("kptr", __btf_name_by_offset(btf, t->name_off)))
 		type = BPF_KPTR_REF;
+	else if (!strcmp("percpu_kptr", __btf_name_by_offset(btf, t->name_off)))
+		type = BPF_KPTR_PERCPU;
 	else
 		return -EINVAL;
 
@@ -3308,10 +3310,10 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t,
 	return BTF_FIELD_FOUND;
 }
 
-static const char *btf_find_decl_tag_value(const struct btf *btf,
-					   const struct btf_type *pt,
-					   int comp_idx, const char *tag_key)
+const char *btf_find_decl_tag_value(const struct btf *btf, const struct btf_type *pt,
+				    int comp_idx, const char *tag_key)
 {
+	const char *value = NULL;
 	int i;
 
 	for (i = 1; i < btf_nr_types(btf); i++) {
@@ -3325,9 +3327,14 @@ static const char *btf_find_decl_tag_value(const struct btf *btf,
 			continue;
 		if (strncmp(__btf_name_by_offset(btf, t->name_off), tag_key, len))
 			continue;
-		return __btf_name_by_offset(btf, t->name_off) + len;
+		/* Prevent duplicate entries for same type */
+		if (value)
+			return ERR_PTR(-EEXIST);
+		value = __btf_name_by_offset(btf, t->name_off) + len;
 	}
-	return NULL;
+	if (!value)
+		return ERR_PTR(-ENOENT);
+	return value;
 }
 
 static int
@@ -3345,7 +3352,7 @@ btf_find_graph_root(const struct btf *btf, const struct btf_type *pt,
 	if (t->size != sz)
 		return BTF_FIELD_IGNORE;
 	value_type = btf_find_decl_tag_value(btf, pt, comp_idx, "contains:");
-	if (!value_type)
+	if (IS_ERR(value_type))
 		return -EINVAL;
 	node_field_name = strstr(value_type, ":");
 	if (!node_field_name)
@@ -3457,6 +3464,7 @@ static int btf_find_struct_field(const struct btf *btf,
 			break;
 		case BPF_KPTR_UNREF:
 		case BPF_KPTR_REF:
+		case BPF_KPTR_PERCPU:
 			ret = btf_find_kptr(btf, member_type, off, sz,
 					    idx < info_cnt ? &info[idx] : &tmp);
 			if (ret < 0)
@@ -3523,6 +3531,7 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
 			break;
 		case BPF_KPTR_UNREF:
 		case BPF_KPTR_REF:
+		case BPF_KPTR_PERCPU:
 			ret = btf_find_kptr(btf, var_type, off, sz,
 					    idx < info_cnt ? &info[idx] : &tmp);
 			if (ret < 0)
@@ -3783,6 +3792,7 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
 			break;
 		case BPF_KPTR_UNREF:
 		case BPF_KPTR_REF:
+		case BPF_KPTR_PERCPU:
 			ret = btf_parse_kptr(btf, &rec->fields[i], &info_arr[i]);
 			if (ret < 0)
 				goto end;
@@ -6949,7 +6959,7 @@ int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog,
  * (either PTR_TO_CTX or SCALAR_VALUE).
  */
 int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
-			  struct bpf_reg_state *regs)
+			  struct bpf_reg_state *regs, bool is_ex_cb)
 {
 	struct bpf_verifier_log *log = &env->log;
 	struct bpf_prog *prog = env->prog;
@@ -7006,7 +7016,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
 			tname, nargs, MAX_BPF_FUNC_REG_ARGS);
 		return -EINVAL;
 	}
-	/* check that function returns int */
+	/* check that function returns int, exception cb also requires this */
 	t = btf_type_by_id(btf, t->type);
 	while (btf_type_is_modifier(t))
 		t = btf_type_by_id(btf, t->type);
@@ -7055,6 +7065,14 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
 			i, btf_type_str(t), tname);
 		return -EINVAL;
 	}
+	/* We have already ensured that the callback returns an integer, just
+	 * like all global subprogs. We need to determine it only has a single
+	 * scalar argument.
+	 */
+	if (is_ex_cb && (nargs != 1 || regs[BPF_REG_1].type != SCALAR_VALUE)) {
+		bpf_log(log, "exception cb only supports single integer argument\n");
+		return -EINVAL;
+	}
 	return 0;
 }
 
@@ -7832,6 +7850,7 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
 	case BPF_PROG_TYPE_SYSCALL:
 		return BTF_KFUNC_HOOK_SYSCALL;
 	case BPF_PROG_TYPE_CGROUP_SKB:
+	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
 		return BTF_KFUNC_HOOK_CGROUP_SKB;
 	case BPF_PROG_TYPE_SCHED_ACT:
 		return BTF_KFUNC_HOOK_SCHED_ACT;
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 03b3d4492980..74ad2215e1ba 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1450,18 +1450,22 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
  *                                       provided by user sockaddr
  * @sk: sock struct that will use sockaddr
  * @uaddr: sockaddr struct provided by user
+ * @uaddrlen: Pointer to the size of the sockaddr struct provided by user. It is
+ *            read-only for AF_INET[6] uaddr but can be modified for AF_UNIX
+ *            uaddr.
  * @atype: The type of program to be executed
  * @t_ctx: Pointer to attach type specific context
  * @flags: Pointer to u32 which contains higher bits of BPF program
  *         return value (OR'ed together).
  *
- * socket is expected to be of type INET or INET6.
+ * socket is expected to be of type INET, INET6 or UNIX.
  *
  * This function will return %-EPERM if an attached program is found and
  * returned value != 1 during execution. In all other cases, 0 is returned.
  */
 int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
 				      struct sockaddr *uaddr,
+				      int *uaddrlen,
 				      enum cgroup_bpf_attach_type atype,
 				      void *t_ctx,
 				      u32 *flags)
@@ -1473,21 +1477,31 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
 	};
 	struct sockaddr_storage unspec;
 	struct cgroup *cgrp;
+	int ret;
 
 	/* Check socket family since not all sockets represent network
 	 * endpoint (e.g. AF_UNIX).
 	 */
-	if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
+	if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6 &&
+	    sk->sk_family != AF_UNIX)
 		return 0;
 
 	if (!ctx.uaddr) {
 		memset(&unspec, 0, sizeof(unspec));
 		ctx.uaddr = (struct sockaddr *)&unspec;
+		ctx.uaddrlen = 0;
+	} else {
+		ctx.uaddrlen = *uaddrlen;
 	}
 
 	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
-	return bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run,
-				     0, flags);
+	ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run,
+				    0, flags);
+
+	if (!ret && uaddr)
+		*uaddrlen = ctx.uaddrlen;
+
+	return ret;
 }
 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
 
@@ -2520,10 +2534,13 @@ cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		case BPF_CGROUP_SOCK_OPS:
 		case BPF_CGROUP_UDP4_RECVMSG:
 		case BPF_CGROUP_UDP6_RECVMSG:
+		case BPF_CGROUP_UNIX_RECVMSG:
 		case BPF_CGROUP_INET4_GETPEERNAME:
 		case BPF_CGROUP_INET6_GETPEERNAME:
+		case BPF_CGROUP_UNIX_GETPEERNAME:
 		case BPF_CGROUP_INET4_GETSOCKNAME:
 		case BPF_CGROUP_INET6_GETSOCKNAME:
+		case BPF_CGROUP_UNIX_GETSOCKNAME:
 			return NULL;
 		default:
 			return &bpf_get_retval_proto;
@@ -2535,10 +2552,13 @@ cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		case BPF_CGROUP_SOCK_OPS:
 		case BPF_CGROUP_UDP4_RECVMSG:
 		case BPF_CGROUP_UDP6_RECVMSG:
+		case BPF_CGROUP_UNIX_RECVMSG:
 		case BPF_CGROUP_INET4_GETPEERNAME:
 		case BPF_CGROUP_INET6_GETPEERNAME:
+		case BPF_CGROUP_UNIX_GETPEERNAME:
 		case BPF_CGROUP_INET4_GETSOCKNAME:
 		case BPF_CGROUP_INET6_GETSOCKNAME:
+		case BPF_CGROUP_UNIX_GETSOCKNAME:
 			return NULL;
 		default:
 			return &bpf_set_retval_proto;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 4e3ce0542e31..08626b519ce2 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -64,8 +64,8 @@
 #define OFF	insn->off
 #define IMM	insn->imm
 
-struct bpf_mem_alloc bpf_global_ma;
-bool bpf_global_ma_set;
+struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma;
+bool bpf_global_ma_set, bpf_global_percpu_ma_set;
 
 /* No hurry in this branch
  *
@@ -212,7 +212,7 @@ void bpf_prog_fill_jited_linfo(struct bpf_prog *prog,
 	const struct bpf_line_info *linfo;
 	void **jited_linfo;
 
-	if (!prog->aux->jited_linfo)
+	if (!prog->aux->jited_linfo || prog->aux->func_idx > prog->aux->func_cnt)
 		/* Userspace did not provide linfo */
 		return;
 
@@ -539,7 +539,7 @@ static void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp)
 {
 	int i;
 
-	for (i = 0; i < fp->aux->func_cnt; i++)
+	for (i = 0; i < fp->aux->real_func_cnt; i++)
 		bpf_prog_kallsyms_del(fp->aux->func[i]);
 }
 
@@ -589,7 +589,7 @@ bpf_prog_ksym_set_name(struct bpf_prog *prog)
 	sym  = bin2hex(sym, prog->tag, sizeof(prog->tag));
 
 	/* prog->aux->name will be ignored if full btf name is available */
-	if (prog->aux->func_info_cnt) {
+	if (prog->aux->func_info_cnt && prog->aux->func_idx < prog->aux->func_info_cnt) {
 		type = btf_type_by_id(prog->aux->btf,
 				      prog->aux->func_info[prog->aux->func_idx].type_id);
 		func_name = btf_name_by_offset(prog->aux->btf, type->name_off);
@@ -623,7 +623,11 @@ static __always_inline int bpf_tree_comp(void *key, struct latch_tree_node *n)
 
 	if (val < ksym->start)
 		return -1;
-	if (val >= ksym->end)
+	/* Ensure that we detect return addresses as part of the program, when
+	 * the final instruction is a call for a program part of the stack
+	 * trace. Therefore, do val > ksym->end instead of val >= ksym->end.
+	 */
+	if (val > ksym->end)
 		return  1;
 
 	return 0;
@@ -733,7 +737,7 @@ bool is_bpf_text_address(unsigned long addr)
 	return ret;
 }
 
-static struct bpf_prog *bpf_prog_ksym_find(unsigned long addr)
+struct bpf_prog *bpf_prog_ksym_find(unsigned long addr)
 {
 	struct bpf_ksym *ksym = bpf_ksym_find(addr);
 
@@ -1208,7 +1212,7 @@ int bpf_jit_get_func_addr(const struct bpf_prog *prog,
 		if (!extra_pass)
 			addr = NULL;
 		else if (prog->aux->func &&
-			 off >= 0 && off < prog->aux->func_cnt)
+			 off >= 0 && off < prog->aux->real_func_cnt)
 			addr = (u8 *)prog->aux->func[off]->bpf_func;
 		else
 			return -EINVAL;
@@ -2721,7 +2725,7 @@ static void bpf_prog_free_deferred(struct work_struct *work)
 #endif
 	if (aux->dst_trampoline)
 		bpf_trampoline_put(aux->dst_trampoline);
-	for (i = 0; i < aux->func_cnt; i++) {
+	for (i = 0; i < aux->real_func_cnt; i++) {
 		/* We can just unlink the subprog poke descriptor table as
 		 * it was originally linked to the main program and is also
 		 * released along with it.
@@ -2729,7 +2733,7 @@ static void bpf_prog_free_deferred(struct work_struct *work)
 		aux->func[i]->aux->poke_tab = NULL;
 		bpf_jit_free(aux->func[i]);
 	}
-	if (aux->func_cnt) {
+	if (aux->real_func_cnt) {
 		kfree(aux->func);
 		bpf_prog_unlock_free(aux->prog);
 	} else {
@@ -2914,6 +2918,15 @@ int __weak bpf_arch_text_invalidate(void *dst, size_t len)
 	return -ENOTSUPP;
 }
 
+bool __weak bpf_jit_supports_exceptions(void)
+{
+	return false;
+}
+
+void __weak arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie)
+{
+}
+
 #ifdef CONFIG_BPF_SYSCALL
 static int __init bpf_global_ma_init(void)
 {
@@ -2921,7 +2934,9 @@ static int __init bpf_global_ma_init(void)
 
 	ret = bpf_mem_alloc_init(&bpf_global_ma, 0, false);
 	bpf_global_ma_set = !ret;
-	return ret;
+	ret = bpf_mem_alloc_init(&bpf_global_percpu_ma, 0, true);
+	bpf_global_percpu_ma_set = !ret;
+	return !bpf_global_ma_set || !bpf_global_percpu_ma_set;
 }
 late_initcall(bpf_global_ma_init);
 #endif
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 8bd3812fb8df..62a53ebfedf9 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -22,6 +22,7 @@
 #include <linux/security.h>
 #include <linux/btf_ids.h>
 #include <linux/bpf_mem_alloc.h>
+#include <linux/kasan.h>
 
 #include "../../lib/kstrtox.h"
 
@@ -1271,7 +1272,7 @@ BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, fla
 
 	if (in_nmi())
 		return -EOPNOTSUPP;
-	if (flags > BPF_F_TIMER_ABS)
+	if (flags & ~(BPF_F_TIMER_ABS | BPF_F_TIMER_CPU_PIN))
 		return -EINVAL;
 	__bpf_spin_lock_irqsave(&timer->lock);
 	t = timer->timer;
@@ -1285,6 +1286,9 @@ BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, fla
 	else
 		mode = HRTIMER_MODE_REL_SOFT;
 
+	if (flags & BPF_F_TIMER_CPU_PIN)
+		mode |= HRTIMER_MODE_PINNED;
+
 	hrtimer_start(&t->timer, ns_to_ktime(nsecs), mode);
 out:
 	__bpf_spin_unlock_irqrestore(&timer->lock);
@@ -1902,6 +1906,14 @@ __bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign)
 	return p;
 }
 
+__bpf_kfunc void *bpf_percpu_obj_new_impl(u64 local_type_id__k, void *meta__ign)
+{
+	u64 size = local_type_id__k;
+
+	/* The verifier has ensured that meta__ign must be NULL */
+	return bpf_mem_alloc(&bpf_global_percpu_ma, size);
+}
+
 /* Must be called under migrate_disable(), as required by bpf_mem_free */
 void __bpf_obj_drop_impl(void *p, const struct btf_record *rec)
 {
@@ -1930,6 +1942,12 @@ __bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign)
 	__bpf_obj_drop_impl(p, meta ? meta->record : NULL);
 }
 
+__bpf_kfunc void bpf_percpu_obj_drop_impl(void *p__alloc, void *meta__ign)
+{
+	/* The verifier has ensured that meta__ign must be NULL */
+	bpf_mem_free_rcu(&bpf_global_percpu_ma, p__alloc);
+}
+
 __bpf_kfunc void *bpf_refcount_acquire_impl(void *p__refcounted_kptr, void *meta__ign)
 {
 	struct btf_struct_meta *meta = meta__ign;
@@ -2435,6 +2453,49 @@ __bpf_kfunc void bpf_rcu_read_unlock(void)
 	rcu_read_unlock();
 }
 
+struct bpf_throw_ctx {
+	struct bpf_prog_aux *aux;
+	u64 sp;
+	u64 bp;
+	int cnt;
+};
+
+static bool bpf_stack_walker(void *cookie, u64 ip, u64 sp, u64 bp)
+{
+	struct bpf_throw_ctx *ctx = cookie;
+	struct bpf_prog *prog;
+
+	if (!is_bpf_text_address(ip))
+		return !ctx->cnt;
+	prog = bpf_prog_ksym_find(ip);
+	ctx->cnt++;
+	if (bpf_is_subprog(prog))
+		return true;
+	ctx->aux = prog->aux;
+	ctx->sp = sp;
+	ctx->bp = bp;
+	return false;
+}
+
+__bpf_kfunc void bpf_throw(u64 cookie)
+{
+	struct bpf_throw_ctx ctx = {};
+
+	arch_bpf_stack_walk(bpf_stack_walker, &ctx);
+	WARN_ON_ONCE(!ctx.aux);
+	if (ctx.aux)
+		WARN_ON_ONCE(!ctx.aux->exception_boundary);
+	WARN_ON_ONCE(!ctx.bp);
+	WARN_ON_ONCE(!ctx.cnt);
+	/* Prevent KASAN false positives for CONFIG_KASAN_STACK by unpoisoning
+	 * deeper stack depths than ctx.sp as we do not return from bpf_throw,
+	 * which skips compiler generated instrumentation to do the same.
+	 */
+	kasan_unpoison_task_stack_below((void *)(long)ctx.sp);
+	ctx.aux->bpf_exception_cb(cookie, ctx.sp, ctx.bp);
+	WARN(1, "A call to BPF exception callback should never return\n");
+}
+
 __diag_pop();
 
 BTF_SET8_START(generic_btf_ids)
@@ -2442,7 +2503,9 @@ BTF_SET8_START(generic_btf_ids)
 BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)
 #endif
 BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_percpu_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_percpu_obj_drop_impl, KF_RELEASE)
 BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_list_push_front_impl)
 BTF_ID_FLAGS(func, bpf_list_push_back_impl)
@@ -2462,6 +2525,7 @@ BTF_ID_FLAGS(func, bpf_cgroup_from_id, KF_ACQUIRE | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_task_under_cgroup, KF_RCU)
 #endif
 BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_throw)
 BTF_SET8_END(generic_btf_ids)
 
 static const struct btf_kfunc_id_set generic_kfunc_set = {
@@ -2488,6 +2552,9 @@ BTF_ID_FLAGS(func, bpf_dynptr_slice_rdwr, KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_iter_num_new, KF_ITER_NEW)
 BTF_ID_FLAGS(func, bpf_iter_num_next, KF_ITER_NEXT | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY)
+BTF_ID_FLAGS(func, bpf_iter_task_vma_new, KF_ITER_NEW | KF_RCU)
+BTF_ID_FLAGS(func, bpf_iter_task_vma_next, KF_ITER_NEXT | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_iter_task_vma_destroy, KF_ITER_DESTROY)
 BTF_ID_FLAGS(func, bpf_dynptr_adjust)
 BTF_ID_FLAGS(func, bpf_dynptr_is_null)
 BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly)
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index d93ddac283d4..39ea316c55e7 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -526,15 +526,16 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
 	struct bpf_mem_cache *c, __percpu *pc;
 	struct obj_cgroup *objcg = NULL;
 
+	/* room for llist_node and per-cpu pointer */
+	if (percpu)
+		percpu_size = LLIST_NODE_SZ + sizeof(void *);
+
 	if (size) {
 		pc = __alloc_percpu_gfp(sizeof(*pc), 8, GFP_KERNEL);
 		if (!pc)
 			return -ENOMEM;
 
-		if (percpu)
-			/* room for llist_node and per-cpu pointer */
-			percpu_size = LLIST_NODE_SZ + sizeof(void *);
-		else
+		if (!percpu)
 			size += LLIST_NODE_SZ; /* room for llist_node */
 		unit_size = size;
 
@@ -555,10 +556,6 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
 		return 0;
 	}
 
-	/* size == 0 && percpu is an invalid combination */
-	if (WARN_ON_ONCE(percpu))
-		return -EINVAL;
-
 	pcc = __alloc_percpu_gfp(sizeof(*cc), 8, GFP_KERNEL);
 	if (!pcc)
 		return -ENOMEM;
@@ -572,6 +569,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
 			c = &cc->cache[i];
 			c->unit_size = sizes[i];
 			c->objcg = objcg;
+			c->percpu_size = percpu_size;
 			c->tgt = c;
 
 			init_refill_work(c);
@@ -782,12 +780,17 @@ static void notrace *unit_alloc(struct bpf_mem_cache *c)
 		}
 	}
 	local_dec(&c->active);
-	local_irq_restore(flags);
 
 	WARN_ON(cnt < 0);
 
 	if (cnt < c->low_watermark)
 		irq_work_raise(c);
+	/* Enable IRQ after the enqueue of irq work completes, so irq work
+	 * will run after IRQ is enabled and free_llist may be refilled by
+	 * irq work before other task preempts current task.
+	 */
+	local_irq_restore(flags);
+
 	return llnode;
 }
 
@@ -823,11 +826,16 @@ static void notrace unit_free(struct bpf_mem_cache *c, void *ptr)
 		llist_add(llnode, &c->free_llist_extra);
 	}
 	local_dec(&c->active);
-	local_irq_restore(flags);
 
 	if (cnt > c->high_watermark)
 		/* free few objects from current cpu into global kmalloc pool */
 		irq_work_raise(c);
+	/* Enable IRQ after irq_work_raise() completes, otherwise when current
+	 * task is preempted by task which does unit_alloc(), unit_alloc() may
+	 * return NULL unexpectedly because irq work is already pending but can
+	 * not been triggered and free_llist can not be refilled timely.
+	 */
+	local_irq_restore(flags);
 }
 
 static void notrace unit_free_rcu(struct bpf_mem_cache *c, void *ptr)
@@ -845,10 +853,10 @@ static void notrace unit_free_rcu(struct bpf_mem_cache *c, void *ptr)
 		llist_add(llnode, &c->free_llist_extra_rcu);
 	}
 	local_dec(&c->active);
-	local_irq_restore(flags);
 
 	if (!atomic_read(&c->call_rcu_in_progress))
 		irq_work_raise(c);
+	local_irq_restore(flags);
 }
 
 /* Called from BPF program or from sys_bpf syscall.
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 87d6693d8233..1a4fec330eaa 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -234,7 +234,14 @@ int bpf_prog_dev_bound_init(struct bpf_prog *prog, union bpf_attr *attr)
 	    attr->prog_type != BPF_PROG_TYPE_XDP)
 		return -EINVAL;
 
-	if (attr->prog_flags & ~BPF_F_XDP_DEV_BOUND_ONLY)
+	if (attr->prog_flags & ~(BPF_F_XDP_DEV_BOUND_ONLY | BPF_F_XDP_HAS_FRAGS))
+		return -EINVAL;
+
+	/* Frags are allowed only if program is dev-bound-only, but not
+	 * if it is requesting bpf offload.
+	 */
+	if (attr->prog_flags & BPF_F_XDP_HAS_FRAGS &&
+	    !(attr->prog_flags & BPF_F_XDP_DEV_BOUND_ONLY))
 		return -EINVAL;
 
 	if (attr->prog_type == BPF_PROG_TYPE_SCHED_CLS &&
@@ -847,10 +854,11 @@ void *bpf_dev_bound_resolve_kfunc(struct bpf_prog *prog, u32 func_id)
 	if (!ops)
 		goto out;
 
-	if (func_id == bpf_xdp_metadata_kfunc_id(XDP_METADATA_KFUNC_RX_TIMESTAMP))
-		p = ops->xmo_rx_timestamp;
-	else if (func_id == bpf_xdp_metadata_kfunc_id(XDP_METADATA_KFUNC_RX_HASH))
-		p = ops->xmo_rx_hash;
+#define XDP_METADATA_KFUNC(name, _, __, xmo) \
+	if (func_id == bpf_xdp_metadata_kfunc_id(name)) p = ops->xmo;
+	XDP_METADATA_KFUNC_xxx
+#undef XDP_METADATA_KFUNC
+
 out:
 	up_read(&bpf_devs_lock);
 
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 458bb80b14d5..d6b277482085 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -28,7 +28,7 @@ struct bpf_stack_map {
 	void *elems;
 	struct pcpu_freelist freelist;
 	u32 n_buckets;
-	struct stack_map_bucket *buckets[];
+	struct stack_map_bucket *buckets[] __counted_by(n_buckets);
 };
 
 static inline bool stack_map_use_build_id(struct bpf_map *map)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index d77b2f8b9364..341f8cb4405c 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -514,6 +514,7 @@ void btf_record_free(struct btf_record *rec)
 		switch (rec->fields[i].type) {
 		case BPF_KPTR_UNREF:
 		case BPF_KPTR_REF:
+		case BPF_KPTR_PERCPU:
 			if (rec->fields[i].kptr.module)
 				module_put(rec->fields[i].kptr.module);
 			btf_put(rec->fields[i].kptr.btf);
@@ -560,6 +561,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec)
 		switch (fields[i].type) {
 		case BPF_KPTR_UNREF:
 		case BPF_KPTR_REF:
+		case BPF_KPTR_PERCPU:
 			btf_get(fields[i].kptr.btf);
 			if (fields[i].kptr.module && !try_module_get(fields[i].kptr.module)) {
 				ret = -ENXIO;
@@ -650,6 +652,7 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
 			WRITE_ONCE(*(u64 *)field_ptr, 0);
 			break;
 		case BPF_KPTR_REF:
+		case BPF_KPTR_PERCPU:
 			xchgd_field = (void *)xchg((unsigned long *)field_ptr, 0);
 			if (!xchgd_field)
 				break;
@@ -1045,6 +1048,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
 				break;
 			case BPF_KPTR_UNREF:
 			case BPF_KPTR_REF:
+			case BPF_KPTR_PERCPU:
 			case BPF_REFCOUNT:
 				if (map->map_type != BPF_MAP_TYPE_HASH &&
 				    map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
@@ -2442,14 +2446,19 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
 		case BPF_CGROUP_INET6_BIND:
 		case BPF_CGROUP_INET4_CONNECT:
 		case BPF_CGROUP_INET6_CONNECT:
+		case BPF_CGROUP_UNIX_CONNECT:
 		case BPF_CGROUP_INET4_GETPEERNAME:
 		case BPF_CGROUP_INET6_GETPEERNAME:
+		case BPF_CGROUP_UNIX_GETPEERNAME:
 		case BPF_CGROUP_INET4_GETSOCKNAME:
 		case BPF_CGROUP_INET6_GETSOCKNAME:
+		case BPF_CGROUP_UNIX_GETSOCKNAME:
 		case BPF_CGROUP_UDP4_SENDMSG:
 		case BPF_CGROUP_UDP6_SENDMSG:
+		case BPF_CGROUP_UNIX_SENDMSG:
 		case BPF_CGROUP_UDP4_RECVMSG:
 		case BPF_CGROUP_UDP6_RECVMSG:
+		case BPF_CGROUP_UNIX_RECVMSG:
 			return 0;
 		default:
 			return -EINVAL;
@@ -2745,7 +2754,7 @@ free_used_maps:
 	 * period before we can tear down JIT memory since symbols
 	 * are already exposed under kallsyms.
 	 */
-	__bpf_prog_put_noref(prog, prog->aux->func_cnt);
+	__bpf_prog_put_noref(prog, prog->aux->real_func_cnt);
 	return err;
 free_prog_sec:
 	free_uid(prog->aux->user);
@@ -3370,7 +3379,7 @@ static void bpf_perf_link_dealloc(struct bpf_link *link)
 static int bpf_perf_link_fill_common(const struct perf_event *event,
 				     char __user *uname, u32 ulen,
 				     u64 *probe_offset, u64 *probe_addr,
-				     u32 *fd_type)
+				     u32 *fd_type, unsigned long *missed)
 {
 	const char *buf;
 	u32 prog_id;
@@ -3381,7 +3390,7 @@ static int bpf_perf_link_fill_common(const struct perf_event *event,
 		return -EINVAL;
 
 	err = bpf_get_perf_event_info(event, &prog_id, fd_type, &buf,
-				      probe_offset, probe_addr);
+				      probe_offset, probe_addr, missed);
 	if (err)
 		return err;
 	if (!uname)
@@ -3404,6 +3413,7 @@ static int bpf_perf_link_fill_common(const struct perf_event *event,
 static int bpf_perf_link_fill_kprobe(const struct perf_event *event,
 				     struct bpf_link_info *info)
 {
+	unsigned long missed;
 	char __user *uname;
 	u64 addr, offset;
 	u32 ulen, type;
@@ -3412,7 +3422,7 @@ static int bpf_perf_link_fill_kprobe(const struct perf_event *event,
 	uname = u64_to_user_ptr(info->perf_event.kprobe.func_name);
 	ulen = info->perf_event.kprobe.name_len;
 	err = bpf_perf_link_fill_common(event, uname, ulen, &offset, &addr,
-					&type);
+					&type, &missed);
 	if (err)
 		return err;
 	if (type == BPF_FD_TYPE_KRETPROBE)
@@ -3421,6 +3431,7 @@ static int bpf_perf_link_fill_kprobe(const struct perf_event *event,
 		info->perf_event.type = BPF_PERF_EVENT_KPROBE;
 
 	info->perf_event.kprobe.offset = offset;
+	info->perf_event.kprobe.missed = missed;
 	if (!kallsyms_show_value(current_cred()))
 		addr = 0;
 	info->perf_event.kprobe.addr = addr;
@@ -3440,7 +3451,7 @@ static int bpf_perf_link_fill_uprobe(const struct perf_event *event,
 	uname = u64_to_user_ptr(info->perf_event.uprobe.file_name);
 	ulen = info->perf_event.uprobe.name_len;
 	err = bpf_perf_link_fill_common(event, uname, ulen, &offset, &addr,
-					&type);
+					&type, NULL);
 	if (err)
 		return err;
 
@@ -3476,7 +3487,7 @@ static int bpf_perf_link_fill_tracepoint(const struct perf_event *event,
 	uname = u64_to_user_ptr(info->perf_event.tracepoint.tp_name);
 	ulen = info->perf_event.tracepoint.name_len;
 	info->perf_event.type = BPF_PERF_EVENT_TRACEPOINT;
-	return bpf_perf_link_fill_common(event, uname, ulen, NULL, NULL, NULL);
+	return bpf_perf_link_fill_common(event, uname, ulen, NULL, NULL, NULL, NULL);
 }
 
 static int bpf_perf_link_fill_perf_event(const struct perf_event *event,
@@ -3672,14 +3683,19 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
 	case BPF_CGROUP_INET6_BIND:
 	case BPF_CGROUP_INET4_CONNECT:
 	case BPF_CGROUP_INET6_CONNECT:
+	case BPF_CGROUP_UNIX_CONNECT:
 	case BPF_CGROUP_INET4_GETPEERNAME:
 	case BPF_CGROUP_INET6_GETPEERNAME:
+	case BPF_CGROUP_UNIX_GETPEERNAME:
 	case BPF_CGROUP_INET4_GETSOCKNAME:
 	case BPF_CGROUP_INET6_GETSOCKNAME:
+	case BPF_CGROUP_UNIX_GETSOCKNAME:
 	case BPF_CGROUP_UDP4_SENDMSG:
 	case BPF_CGROUP_UDP6_SENDMSG:
+	case BPF_CGROUP_UNIX_SENDMSG:
 	case BPF_CGROUP_UDP4_RECVMSG:
 	case BPF_CGROUP_UDP6_RECVMSG:
+	case BPF_CGROUP_UNIX_RECVMSG:
 		return BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
 	case BPF_CGROUP_SOCK_OPS:
 		return BPF_PROG_TYPE_SOCK_OPS;
@@ -3945,14 +3961,19 @@ static int bpf_prog_query(const union bpf_attr *attr,
 	case BPF_CGROUP_INET6_POST_BIND:
 	case BPF_CGROUP_INET4_CONNECT:
 	case BPF_CGROUP_INET6_CONNECT:
+	case BPF_CGROUP_UNIX_CONNECT:
 	case BPF_CGROUP_INET4_GETPEERNAME:
 	case BPF_CGROUP_INET6_GETPEERNAME:
+	case BPF_CGROUP_UNIX_GETPEERNAME:
 	case BPF_CGROUP_INET4_GETSOCKNAME:
 	case BPF_CGROUP_INET6_GETSOCKNAME:
+	case BPF_CGROUP_UNIX_GETSOCKNAME:
 	case BPF_CGROUP_UDP4_SENDMSG:
 	case BPF_CGROUP_UDP6_SENDMSG:
+	case BPF_CGROUP_UNIX_SENDMSG:
 	case BPF_CGROUP_UDP4_RECVMSG:
 	case BPF_CGROUP_UDP6_RECVMSG:
+	case BPF_CGROUP_UNIX_RECVMSG:
 	case BPF_CGROUP_SOCK_OPS:
 	case BPF_CGROUP_DEVICE:
 	case BPF_CGROUP_SYSCTL:
@@ -4818,7 +4839,7 @@ static int bpf_task_fd_query(const union bpf_attr *attr,
 
 		err = bpf_get_perf_event_info(event, &prog_id, &fd_type,
 					      &buf, &probe_offset,
-					      &probe_addr);
+					      &probe_addr, NULL);
 		if (!err)
 			err = bpf_task_fd_query_copy(attr, uattr, prog_id,
 						     fd_type, buf,
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index c4ab9d6cdbe9..fef17628341f 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -7,7 +7,9 @@
 #include <linux/fs.h>
 #include <linux/fdtable.h>
 #include <linux/filter.h>
+#include <linux/bpf_mem_alloc.h>
 #include <linux/btf_ids.h>
+#include <linux/mm_types.h>
 #include "mmap_unlock_work.h"
 
 static const char * const iter_task_type_names[] = {
@@ -35,16 +37,13 @@ static struct task_struct *task_group_seq_get_next(struct bpf_iter_seq_task_comm
 						   u32 *tid,
 						   bool skip_if_dup_files)
 {
-	struct task_struct *task, *next_task;
+	struct task_struct *task;
 	struct pid *pid;
-	u32 saved_tid;
+	u32 next_tid;
 
 	if (!*tid) {
 		/* The first time, the iterator calls this function. */
 		pid = find_pid_ns(common->pid, common->ns);
-		if (!pid)
-			return NULL;
-
 		task = get_pid_task(pid, PIDTYPE_TGID);
 		if (!task)
 			return NULL;
@@ -66,44 +65,27 @@ static struct task_struct *task_group_seq_get_next(struct bpf_iter_seq_task_comm
 		return task;
 	}
 
-	pid = find_pid_ns(common->pid_visiting, common->ns);
-	if (!pid)
-		return NULL;
-
-	task = get_pid_task(pid, PIDTYPE_PID);
+	task = find_task_by_pid_ns(common->pid_visiting, common->ns);
 	if (!task)
 		return NULL;
 
 retry:
-	if (!pid_alive(task)) {
-		put_task_struct(task);
-		return NULL;
-	}
-
-	next_task = next_thread(task);
-	put_task_struct(task);
-	if (!next_task)
-		return NULL;
+	task = next_thread(task);
 
-	saved_tid = *tid;
-	*tid = __task_pid_nr_ns(next_task, PIDTYPE_PID, common->ns);
-	if (!*tid || *tid == common->pid) {
+	next_tid = __task_pid_nr_ns(task, PIDTYPE_PID, common->ns);
+	if (!next_tid || next_tid == common->pid) {
 		/* Run out of tasks of a process.  The tasks of a
 		 * thread_group are linked as circular linked list.
 		 */
-		*tid = saved_tid;
 		return NULL;
 	}
 
-	get_task_struct(next_task);
-	common->pid_visiting = *tid;
-
-	if (skip_if_dup_files && task->files == task->group_leader->files) {
-		task = next_task;
+	if (skip_if_dup_files && task->files == task->group_leader->files)
 		goto retry;
-	}
 
-	return next_task;
+	*tid = common->pid_visiting = next_tid;
+	get_task_struct(task);
+	return task;
 }
 
 static struct task_struct *task_seq_get_next(struct bpf_iter_seq_task_common *common,
@@ -823,6 +805,95 @@ const struct bpf_func_proto bpf_find_vma_proto = {
 	.arg5_type	= ARG_ANYTHING,
 };
 
+struct bpf_iter_task_vma_kern_data {
+	struct task_struct *task;
+	struct mm_struct *mm;
+	struct mmap_unlock_irq_work *work;
+	struct vma_iterator vmi;
+};
+
+struct bpf_iter_task_vma {
+	/* opaque iterator state; having __u64 here allows to preserve correct
+	 * alignment requirements in vmlinux.h, generated from BTF
+	 */
+	__u64 __opaque[1];
+} __attribute__((aligned(8)));
+
+/* Non-opaque version of bpf_iter_task_vma */
+struct bpf_iter_task_vma_kern {
+	struct bpf_iter_task_vma_kern_data *data;
+} __attribute__((aligned(8)));
+
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+		  "Global functions as their definitions will be in vmlinux BTF");
+
+__bpf_kfunc int bpf_iter_task_vma_new(struct bpf_iter_task_vma *it,
+				      struct task_struct *task, u64 addr)
+{
+	struct bpf_iter_task_vma_kern *kit = (void *)it;
+	bool irq_work_busy = false;
+	int err;
+
+	BUILD_BUG_ON(sizeof(struct bpf_iter_task_vma_kern) != sizeof(struct bpf_iter_task_vma));
+	BUILD_BUG_ON(__alignof__(struct bpf_iter_task_vma_kern) != __alignof__(struct bpf_iter_task_vma));
+
+	/* is_iter_reg_valid_uninit guarantees that kit hasn't been initialized
+	 * before, so non-NULL kit->data doesn't point to previously
+	 * bpf_mem_alloc'd bpf_iter_task_vma_kern_data
+	 */
+	kit->data = bpf_mem_alloc(&bpf_global_ma, sizeof(struct bpf_iter_task_vma_kern_data));
+	if (!kit->data)
+		return -ENOMEM;
+
+	kit->data->task = get_task_struct(task);
+	kit->data->mm = task->mm;
+	if (!kit->data->mm) {
+		err = -ENOENT;
+		goto err_cleanup_iter;
+	}
+
+	/* kit->data->work == NULL is valid after bpf_mmap_unlock_get_irq_work */
+	irq_work_busy = bpf_mmap_unlock_get_irq_work(&kit->data->work);
+	if (irq_work_busy || !mmap_read_trylock(kit->data->mm)) {
+		err = -EBUSY;
+		goto err_cleanup_iter;
+	}
+
+	vma_iter_init(&kit->data->vmi, kit->data->mm, addr);
+	return 0;
+
+err_cleanup_iter:
+	if (kit->data->task)
+		put_task_struct(kit->data->task);
+	bpf_mem_free(&bpf_global_ma, kit->data);
+	/* NULL kit->data signals failed bpf_iter_task_vma initialization */
+	kit->data = NULL;
+	return err;
+}
+
+__bpf_kfunc struct vm_area_struct *bpf_iter_task_vma_next(struct bpf_iter_task_vma *it)
+{
+	struct bpf_iter_task_vma_kern *kit = (void *)it;
+
+	if (!kit->data) /* bpf_iter_task_vma_new failed */
+		return NULL;
+	return vma_next(&kit->data->vmi);
+}
+
+__bpf_kfunc void bpf_iter_task_vma_destroy(struct bpf_iter_task_vma *it)
+{
+	struct bpf_iter_task_vma_kern *kit = (void *)it;
+
+	if (kit->data) {
+		bpf_mmap_unlock_mm(kit->data->work, kit->data->mm);
+		put_task_struct(kit->data->task);
+		bpf_mem_free(&bpf_global_ma, kit->data);
+	}
+}
+
+__diag_pop();
+
 DEFINE_PER_CPU(struct mmap_unlock_irq_work, mmap_unlock_work);
 
 static void do_mmap_read_unlock(struct irq_work *entry)
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 53ff50cac61e..e97aeda3a86b 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -415,8 +415,8 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
 		goto out;
 	}
 
-	/* clear all bits except SHARE_IPMODIFY */
-	tr->flags &= BPF_TRAMP_F_SHARE_IPMODIFY;
+	/* clear all bits except SHARE_IPMODIFY and TAIL_CALL_CTX */
+	tr->flags &= (BPF_TRAMP_F_SHARE_IPMODIFY | BPF_TRAMP_F_TAIL_CALL_CTX);
 
 	if (tlinks[BPF_TRAMP_FEXIT].nr_links ||
 	    tlinks[BPF_TRAMP_MODIFY_RETURN].nr_links) {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 873ade146f3d..bb58987e4844 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -304,7 +304,7 @@ struct bpf_kfunc_call_arg_meta {
 	/* arg_{btf,btf_id,owning_ref} are used by kfunc-specific handling,
 	 * generally to pass info about user-defined local kptr types to later
 	 * verification logic
-	 *   bpf_obj_drop
+	 *   bpf_obj_drop/bpf_percpu_obj_drop
 	 *     Record the local kptr type to be drop'd
 	 *   bpf_refcount_acquire (via KF_ARG_PTR_TO_REFCOUNTED_KPTR arg type)
 	 *     Record the local kptr type to be refcount_incr'd and use
@@ -543,6 +543,7 @@ static bool is_dynptr_ref_function(enum bpf_func_id func_id)
 }
 
 static bool is_callback_calling_kfunc(u32 btf_id);
+static bool is_bpf_throw_kfunc(struct bpf_insn *insn);
 
 static bool is_callback_calling_function(enum bpf_func_id func_id)
 {
@@ -1341,6 +1342,50 @@ static void scrub_spilled_slot(u8 *stype)
 		*stype = STACK_MISC;
 }
 
+static void print_scalar_ranges(struct bpf_verifier_env *env,
+				const struct bpf_reg_state *reg,
+				const char **sep)
+{
+	struct {
+		const char *name;
+		u64 val;
+		bool omit;
+	} minmaxs[] = {
+		{"smin",   reg->smin_value,         reg->smin_value == S64_MIN},
+		{"smax",   reg->smax_value,         reg->smax_value == S64_MAX},
+		{"umin",   reg->umin_value,         reg->umin_value == 0},
+		{"umax",   reg->umax_value,         reg->umax_value == U64_MAX},
+		{"smin32", (s64)reg->s32_min_value, reg->s32_min_value == S32_MIN},
+		{"smax32", (s64)reg->s32_max_value, reg->s32_max_value == S32_MAX},
+		{"umin32", reg->u32_min_value,      reg->u32_min_value == 0},
+		{"umax32", reg->u32_max_value,      reg->u32_max_value == U32_MAX},
+	}, *m1, *m2, *mend = &minmaxs[ARRAY_SIZE(minmaxs)];
+	bool neg1, neg2;
+
+	for (m1 = &minmaxs[0]; m1 < mend; m1++) {
+		if (m1->omit)
+			continue;
+
+		neg1 = m1->name[0] == 's' && (s64)m1->val < 0;
+
+		verbose(env, "%s%s=", *sep, m1->name);
+		*sep = ",";
+
+		for (m2 = m1 + 2; m2 < mend; m2 += 2) {
+			if (m2->omit || m2->val != m1->val)
+				continue;
+			/* don't mix negatives with positives */
+			neg2 = m2->name[0] == 's' && (s64)m2->val < 0;
+			if (neg2 != neg1)
+				continue;
+			m2->omit = true;
+			verbose(env, "%s=", m2->name);
+		}
+
+		verbose(env, m1->name[0] == 's' ? "%lld" : "%llu", m1->val);
+	}
+}
+
 static void print_verifier_state(struct bpf_verifier_env *env,
 				 const struct bpf_func_state *state,
 				 bool print_all)
@@ -1404,34 +1449,13 @@ static void print_verifier_state(struct bpf_verifier_env *env,
 				 */
 				verbose_a("imm=%llx", reg->var_off.value);
 			} else {
-				if (reg->smin_value != reg->umin_value &&
-				    reg->smin_value != S64_MIN)
-					verbose_a("smin=%lld", (long long)reg->smin_value);
-				if (reg->smax_value != reg->umax_value &&
-				    reg->smax_value != S64_MAX)
-					verbose_a("smax=%lld", (long long)reg->smax_value);
-				if (reg->umin_value != 0)
-					verbose_a("umin=%llu", (unsigned long long)reg->umin_value);
-				if (reg->umax_value != U64_MAX)
-					verbose_a("umax=%llu", (unsigned long long)reg->umax_value);
+				print_scalar_ranges(env, reg, &sep);
 				if (!tnum_is_unknown(reg->var_off)) {
 					char tn_buf[48];
 
 					tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 					verbose_a("var_off=%s", tn_buf);
 				}
-				if (reg->s32_min_value != reg->smin_value &&
-				    reg->s32_min_value != S32_MIN)
-					verbose_a("s32_min=%d", (int)(reg->s32_min_value));
-				if (reg->s32_max_value != reg->smax_value &&
-				    reg->s32_max_value != S32_MAX)
-					verbose_a("s32_max=%d", (int)(reg->s32_max_value));
-				if (reg->u32_min_value != reg->umin_value &&
-				    reg->u32_min_value != U32_MIN)
-					verbose_a("u32_min=%d", (int)(reg->u32_min_value));
-				if (reg->u32_max_value != reg->umax_value &&
-				    reg->u32_max_value != U32_MAX)
-					verbose_a("u32_max=%d", (int)(reg->u32_max_value));
 			}
 #undef verbose_a
 
@@ -1515,7 +1539,8 @@ static void print_verifier_state(struct bpf_verifier_env *env,
 	if (state->in_async_callback_fn)
 		verbose(env, " async_cb");
 	verbose(env, "\n");
-	mark_verifier_state_clean(env);
+	if (!print_all)
+		mark_verifier_state_clean(env);
 }
 
 static inline u32 vlog_alignment(u32 pos)
@@ -1748,7 +1773,9 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
 		return -ENOMEM;
 	dst_state->jmp_history_cnt = src->jmp_history_cnt;
 
-	/* if dst has more stack frames then src frame, free them */
+	/* if dst has more stack frames then src frame, free them, this is also
+	 * necessary in case of exceptional exits using bpf_throw.
+	 */
 	for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
 		free_func_state(dst_state->frame[i]);
 		dst_state->frame[i] = NULL;
@@ -2454,6 +2481,68 @@ static int add_subprog(struct bpf_verifier_env *env, int off)
 	return env->subprog_cnt - 1;
 }
 
+static int bpf_find_exception_callback_insn_off(struct bpf_verifier_env *env)
+{
+	struct bpf_prog_aux *aux = env->prog->aux;
+	struct btf *btf = aux->btf;
+	const struct btf_type *t;
+	u32 main_btf_id, id;
+	const char *name;
+	int ret, i;
+
+	/* Non-zero func_info_cnt implies valid btf */
+	if (!aux->func_info_cnt)
+		return 0;
+	main_btf_id = aux->func_info[0].type_id;
+
+	t = btf_type_by_id(btf, main_btf_id);
+	if (!t) {
+		verbose(env, "invalid btf id for main subprog in func_info\n");
+		return -EINVAL;
+	}
+
+	name = btf_find_decl_tag_value(btf, t, -1, "exception_callback:");
+	if (IS_ERR(name)) {
+		ret = PTR_ERR(name);
+		/* If there is no tag present, there is no exception callback */
+		if (ret == -ENOENT)
+			ret = 0;
+		else if (ret == -EEXIST)
+			verbose(env, "multiple exception callback tags for main subprog\n");
+		return ret;
+	}
+
+	ret = btf_find_by_name_kind(btf, name, BTF_KIND_FUNC);
+	if (ret < 0) {
+		verbose(env, "exception callback '%s' could not be found in BTF\n", name);
+		return ret;
+	}
+	id = ret;
+	t = btf_type_by_id(btf, id);
+	if (btf_func_linkage(t) != BTF_FUNC_GLOBAL) {
+		verbose(env, "exception callback '%s' must have global linkage\n", name);
+		return -EINVAL;
+	}
+	ret = 0;
+	for (i = 0; i < aux->func_info_cnt; i++) {
+		if (aux->func_info[i].type_id != id)
+			continue;
+		ret = aux->func_info[i].insn_off;
+		/* Further func_info and subprog checks will also happen
+		 * later, so assume this is the right insn_off for now.
+		 */
+		if (!ret) {
+			verbose(env, "invalid exception callback insn_off in func_info: 0\n");
+			ret = -EINVAL;
+		}
+	}
+	if (!ret) {
+		verbose(env, "exception callback type id not found in func_info\n");
+		ret = -EINVAL;
+	}
+	return ret;
+}
+
 #define MAX_KFUNC_DESCS 256
 #define MAX_KFUNC_BTFS	256
 
@@ -2793,8 +2882,8 @@ bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
 static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
 {
 	struct bpf_subprog_info *subprog = env->subprog_info;
+	int i, ret, insn_cnt = env->prog->len, ex_cb_insn;
 	struct bpf_insn *insn = env->prog->insnsi;
-	int i, ret, insn_cnt = env->prog->len;
 
 	/* Add entry function. */
 	ret = add_subprog(env, 0);
@@ -2820,6 +2909,26 @@ static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
 			return ret;
 	}
 
+	ret = bpf_find_exception_callback_insn_off(env);
+	if (ret < 0)
+		return ret;
+	ex_cb_insn = ret;
+
+	/* If ex_cb_insn > 0, this means that the main program has a subprog
+	 * marked using BTF decl tag to serve as the exception callback.
+	 */
+	if (ex_cb_insn) {
+		ret = add_subprog(env, ex_cb_insn);
+		if (ret < 0)
+			return ret;
+		for (i = 1; i < env->subprog_cnt; i++) {
+			if (env->subprog_info[i].start != ex_cb_insn)
+				continue;
+			env->exception_callback_subprog = i;
+			break;
+		}
+	}
+
 	/* Add a fake 'exit' subprog which could simplify subprog iteration
 	 * logic. 'subprog_cnt' should not be increased.
 	 */
@@ -2868,7 +2977,7 @@ next:
 		if (i == subprog_end - 1) {
 			/* to avoid fall-through from one subprog into another
 			 * the last insn of the subprog should be either exit
-			 * or unconditional jump back
+			 * or unconditional jump back or bpf_throw call
 			 */
 			if (code != (BPF_JMP | BPF_EXIT) &&
 			    code != (BPF_JMP32 | BPF_JA) &&
@@ -3029,7 +3138,7 @@ static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
 
 	if (class == BPF_LDX) {
 		if (t != SRC_OP)
-			return BPF_SIZE(code) == BPF_DW;
+			return BPF_SIZE(code) == BPF_DW || BPF_MODE(code) == BPF_MEMSX;
 		/* LDX source must be ptr. */
 		return true;
 	}
@@ -4999,6 +5108,8 @@ static int map_kptr_match_type(struct bpf_verifier_env *env,
 			perm_flags |= PTR_UNTRUSTED;
 	} else {
 		perm_flags = PTR_MAYBE_NULL | MEM_ALLOC;
+		if (kptr_field->type == BPF_KPTR_PERCPU)
+			perm_flags |= MEM_PERCPU;
 	}
 
 	if (base_type(reg->type) != PTR_TO_BTF_ID || (type_flag(reg->type) & ~perm_flags))
@@ -5042,7 +5153,7 @@ static int map_kptr_match_type(struct bpf_verifier_env *env,
 	 */
 	if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
 				  kptr_field->kptr.btf, kptr_field->kptr.btf_id,
-				  kptr_field->type == BPF_KPTR_REF))
+				  kptr_field->type != BPF_KPTR_UNREF))
 		goto bad_type;
 	return 0;
 bad_type:
@@ -5086,7 +5197,18 @@ static bool rcu_safe_kptr(const struct btf_field *field)
 {
 	const struct btf_field_kptr *kptr = &field->kptr;
 
-	return field->type == BPF_KPTR_REF && rcu_protected_object(kptr->btf, kptr->btf_id);
+	return field->type == BPF_KPTR_PERCPU ||
+	       (field->type == BPF_KPTR_REF && rcu_protected_object(kptr->btf, kptr->btf_id));
+}
+
+static u32 btf_ld_kptr_type(struct bpf_verifier_env *env, struct btf_field *kptr_field)
+{
+	if (rcu_safe_kptr(kptr_field) && in_rcu_cs(env)) {
+		if (kptr_field->type != BPF_KPTR_PERCPU)
+			return PTR_MAYBE_NULL | MEM_RCU;
+		return PTR_MAYBE_NULL | MEM_RCU | MEM_PERCPU;
+	}
+	return PTR_MAYBE_NULL | PTR_UNTRUSTED;
 }
 
 static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
@@ -5112,7 +5234,8 @@ static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
 	/* We only allow loading referenced kptr, since it will be marked as
 	 * untrusted, similar to unreferenced kptr.
 	 */
-	if (class != BPF_LDX && kptr_field->type == BPF_KPTR_REF) {
+	if (class != BPF_LDX &&
+	    (kptr_field->type == BPF_KPTR_REF || kptr_field->type == BPF_KPTR_PERCPU)) {
 		verbose(env, "store to referenced kptr disallowed\n");
 		return -EACCES;
 	}
@@ -5123,10 +5246,7 @@ static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
 		 * value from map as PTR_TO_BTF_ID, with the correct type.
 		 */
 		mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID, kptr_field->kptr.btf,
-				kptr_field->kptr.btf_id,
-				rcu_safe_kptr(kptr_field) && in_rcu_cs(env) ?
-				PTR_MAYBE_NULL | MEM_RCU :
-				PTR_MAYBE_NULL | PTR_UNTRUSTED);
+				kptr_field->kptr.btf_id, btf_ld_kptr_type(env, kptr_field));
 		/* For mark_ptr_or_null_reg */
 		val_reg->id = ++env->id_gen;
 	} else if (class == BPF_STX) {
@@ -5180,6 +5300,7 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
 			switch (field->type) {
 			case BPF_KPTR_UNREF:
 			case BPF_KPTR_REF:
+			case BPF_KPTR_PERCPU:
 				if (src != ACCESS_DIRECT) {
 					verbose(env, "kptr cannot be accessed indirectly by helper\n");
 					return -EACCES;
@@ -5647,6 +5768,27 @@ continue_func:
 	for (; i < subprog_end; i++) {
 		int next_insn, sidx;
 
+		if (bpf_pseudo_kfunc_call(insn + i) && !insn[i].off) {
+			bool err = false;
+
+			if (!is_bpf_throw_kfunc(insn + i))
+				continue;
+			if (subprog[idx].is_cb)
+				err = true;
+			for (int c = 0; c < frame && !err; c++) {
+				if (subprog[ret_prog[c]].is_cb) {
+					err = true;
+					break;
+				}
+			}
+			if (!err)
+				continue;
+			verbose(env,
+				"bpf_throw kfunc (insn %d) cannot be called from callback subprog %d\n",
+				i, idx);
+			return -EINVAL;
+		}
+
 		if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
 			continue;
 		/* remember insn and function to return to */
@@ -5669,6 +5811,10 @@ continue_func:
 			/* async callbacks don't increase bpf prog stack size unless called directly */
 			if (!bpf_pseudo_call(insn + i))
 				continue;
+			if (subprog[sidx].is_exception_cb) {
+				verbose(env, "insn %d cannot call exception cb directly\n", i);
+				return -EINVAL;
+			}
 		}
 		i = next_insn;
 		idx = sidx;
@@ -5690,8 +5836,13 @@ continue_func:
 	 * tail call counter throughout bpf2bpf calls combined with tailcalls
 	 */
 	if (tail_call_reachable)
-		for (j = 0; j < frame; j++)
+		for (j = 0; j < frame; j++) {
+			if (subprog[ret_prog[j]].is_exception_cb) {
+				verbose(env, "cannot tail call within exception cb\n");
+				return -EINVAL;
+			}
 			subprog[ret_prog[j]].tail_call_reachable = true;
+		}
 	if (subprog[0].tail_call_reachable)
 		env->prog->aux->tail_call_reachable = true;
 
@@ -6207,7 +6358,7 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
 		}
 
 		if (type_is_alloc(reg->type) && !type_is_non_owning_ref(reg->type) &&
-		    !reg->ref_obj_id) {
+		    !(reg->type & MEM_RCU) && !reg->ref_obj_id) {
 			verbose(env, "verifier internal error: ref_obj_id for allocated object must be non-zero\n");
 			return -EFAULT;
 		}
@@ -7318,7 +7469,7 @@ static int process_kptr_func(struct bpf_verifier_env *env, int regno,
 		verbose(env, "off=%d doesn't point to kptr\n", kptr_off);
 		return -EACCES;
 	}
-	if (kptr_field->type != BPF_KPTR_REF) {
+	if (kptr_field->type != BPF_KPTR_REF && kptr_field->type != BPF_KPTR_PERCPU) {
 		verbose(env, "off=%d kptr isn't referenced kptr\n", kptr_off);
 		return -EACCES;
 	}
@@ -7751,6 +7902,7 @@ static const struct bpf_reg_types btf_ptr_types = {
 static const struct bpf_reg_types percpu_btf_ptr_types = {
 	.types = {
 		PTR_TO_BTF_ID | MEM_PERCPU,
+		PTR_TO_BTF_ID | MEM_PERCPU | MEM_RCU,
 		PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED,
 	}
 };
@@ -7829,8 +7981,10 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
 	if (base_type(arg_type) == ARG_PTR_TO_MEM)
 		type &= ~DYNPTR_TYPE_FLAG_MASK;
 
-	if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type))
+	if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type)) {
 		type &= ~MEM_ALLOC;
+		type &= ~MEM_PERCPU;
+	}
 
 	for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
 		expected = compatible->types[i];
@@ -7913,6 +8067,7 @@ found:
 		break;
 	}
 	case PTR_TO_BTF_ID | MEM_ALLOC:
+	case PTR_TO_BTF_ID | MEM_PERCPU | MEM_ALLOC:
 		if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock &&
 		    meta->func_id != BPF_FUNC_kptr_xchg) {
 			verbose(env, "verifier internal error: unimplemented handling of MEM_ALLOC\n");
@@ -7924,6 +8079,7 @@ found:
 		}
 		break;
 	case PTR_TO_BTF_ID | MEM_PERCPU:
+	case PTR_TO_BTF_ID | MEM_PERCPU | MEM_RCU:
 	case PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED:
 		/* Handled by helper specific checks */
 		break;
@@ -8900,6 +9056,7 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 	 * callbacks
 	 */
 	if (set_callee_state_cb != set_callee_state) {
+		env->subprog_info[subprog].is_cb = true;
 		if (bpf_pseudo_kfunc_call(insn) &&
 		    !is_callback_calling_kfunc(insn->imm)) {
 			verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n",
@@ -9289,7 +9446,8 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
 		verbose(env, "to caller at %d:\n", *insn_idx);
 		print_verifier_state(env, caller, true);
 	}
-	/* clear everything in the callee */
+	/* clear everything in the callee. In case of exceptional exits using
+	 * bpf_throw, this will be done by copy_verifier_state for extra frames. */
 	free_func_state(callee);
 	state->frame[state->curframe--] = NULL;
 	return 0;
@@ -9413,17 +9571,17 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
 	return 0;
 }
 
-static int check_reference_leak(struct bpf_verifier_env *env)
+static int check_reference_leak(struct bpf_verifier_env *env, bool exception_exit)
 {
 	struct bpf_func_state *state = cur_func(env);
 	bool refs_lingering = false;
 	int i;
 
-	if (state->frameno && !state->in_callback_fn)
+	if (!exception_exit && state->frameno && !state->in_callback_fn)
 		return 0;
 
 	for (i = 0; i < state->acquired_refs; i++) {
-		if (state->in_callback_fn && state->refs[i].callback_ref != state->frameno)
+		if (!exception_exit && state->in_callback_fn && state->refs[i].callback_ref != state->frameno)
 			continue;
 		verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
 			state->refs[i].id, state->refs[i].insn_idx);
@@ -9530,6 +9688,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 			     int *insn_idx_p)
 {
 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
+	bool returns_cpu_specific_alloc_ptr = false;
 	const struct bpf_func_proto *fn = NULL;
 	enum bpf_return_type ret_type;
 	enum bpf_type_flag ret_flag;
@@ -9640,6 +9799,26 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 				return -EFAULT;
 			}
 			err = unmark_stack_slots_dynptr(env, &regs[meta.release_regno]);
+		} else if (func_id == BPF_FUNC_kptr_xchg && meta.ref_obj_id) {
+			u32 ref_obj_id = meta.ref_obj_id;
+			bool in_rcu = in_rcu_cs(env);
+			struct bpf_func_state *state;
+			struct bpf_reg_state *reg;
+
+			err = release_reference_state(cur_func(env), ref_obj_id);
+			if (!err) {
+				bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
+					if (reg->ref_obj_id == ref_obj_id) {
+						if (in_rcu && (reg->type & MEM_ALLOC) && (reg->type & MEM_PERCPU)) {
+							reg->ref_obj_id = 0;
+							reg->type &= ~MEM_ALLOC;
+							reg->type |= MEM_RCU;
+						} else {
+							mark_reg_invalid(env, reg);
+						}
+					}
+				}));
+			}
 		} else if (meta.ref_obj_id) {
 			err = release_reference(env, meta.ref_obj_id);
 		} else if (register_is_null(&regs[meta.release_regno])) {
@@ -9657,7 +9836,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 
 	switch (func_id) {
 	case BPF_FUNC_tail_call:
-		err = check_reference_leak(env);
+		err = check_reference_leak(env, false);
 		if (err) {
 			verbose(env, "tail_call would lead to reference leak\n");
 			return err;
@@ -9768,6 +9947,23 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 
 		break;
 	}
+	case BPF_FUNC_per_cpu_ptr:
+	case BPF_FUNC_this_cpu_ptr:
+	{
+		struct bpf_reg_state *reg = &regs[BPF_REG_1];
+		const struct btf_type *type;
+
+		if (reg->type & MEM_RCU) {
+			type = btf_type_by_id(reg->btf, reg->btf_id);
+			if (!type || !btf_type_is_struct(type)) {
+				verbose(env, "Helper has invalid btf/btf_id in R1\n");
+				return -EFAULT;
+			}
+			returns_cpu_specific_alloc_ptr = true;
+			env->insn_aux_data[insn_idx].call_with_percpu_alloc_ptr = true;
+		}
+		break;
+	}
 	case BPF_FUNC_user_ringbuf_drain:
 		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
 					set_user_ringbuf_callback_state);
@@ -9857,14 +10053,18 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 			regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
 			regs[BPF_REG_0].mem_size = tsize;
 		} else {
-			/* MEM_RDONLY may be carried from ret_flag, but it
-			 * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise
-			 * it will confuse the check of PTR_TO_BTF_ID in
-			 * check_mem_access().
-			 */
-			ret_flag &= ~MEM_RDONLY;
+			if (returns_cpu_specific_alloc_ptr) {
+				regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC | MEM_RCU;
+			} else {
+				/* MEM_RDONLY may be carried from ret_flag, but it
+				 * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise
+				 * it will confuse the check of PTR_TO_BTF_ID in
+				 * check_mem_access().
+				 */
+				ret_flag &= ~MEM_RDONLY;
+				regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
+			}
 
-			regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
 			regs[BPF_REG_0].btf = meta.ret_btf;
 			regs[BPF_REG_0].btf_id = meta.ret_btf_id;
 		}
@@ -9880,8 +10080,11 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 		if (func_id == BPF_FUNC_kptr_xchg) {
 			ret_btf = meta.kptr_field->kptr.btf;
 			ret_btf_id = meta.kptr_field->kptr.btf_id;
-			if (!btf_is_kernel(ret_btf))
+			if (!btf_is_kernel(ret_btf)) {
 				regs[BPF_REG_0].type |= MEM_ALLOC;
+				if (meta.kptr_field->type == BPF_KPTR_PERCPU)
+					regs[BPF_REG_0].type |= MEM_PERCPU;
+			}
 		} else {
 			if (fn->ret_btf_id == BPF_PTR_POISON) {
 				verbose(env, "verifier internal error:");
@@ -10266,6 +10469,9 @@ enum special_kfunc_type {
 	KF_bpf_dynptr_slice,
 	KF_bpf_dynptr_slice_rdwr,
 	KF_bpf_dynptr_clone,
+	KF_bpf_percpu_obj_new_impl,
+	KF_bpf_percpu_obj_drop_impl,
+	KF_bpf_throw,
 };
 
 BTF_SET_START(special_kfunc_set)
@@ -10286,6 +10492,9 @@ BTF_ID(func, bpf_dynptr_from_xdp)
 BTF_ID(func, bpf_dynptr_slice)
 BTF_ID(func, bpf_dynptr_slice_rdwr)
 BTF_ID(func, bpf_dynptr_clone)
+BTF_ID(func, bpf_percpu_obj_new_impl)
+BTF_ID(func, bpf_percpu_obj_drop_impl)
+BTF_ID(func, bpf_throw)
 BTF_SET_END(special_kfunc_set)
 
 BTF_ID_LIST(special_kfunc_list)
@@ -10308,6 +10517,9 @@ BTF_ID(func, bpf_dynptr_from_xdp)
 BTF_ID(func, bpf_dynptr_slice)
 BTF_ID(func, bpf_dynptr_slice_rdwr)
 BTF_ID(func, bpf_dynptr_clone)
+BTF_ID(func, bpf_percpu_obj_new_impl)
+BTF_ID(func, bpf_percpu_obj_drop_impl)
+BTF_ID(func, bpf_throw)
 
 static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
 {
@@ -10625,6 +10837,12 @@ static bool is_callback_calling_kfunc(u32 btf_id)
 	return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl];
 }
 
+static bool is_bpf_throw_kfunc(struct bpf_insn *insn)
+{
+	return bpf_pseudo_kfunc_call(insn) && insn->off == 0 &&
+	       insn->imm == special_kfunc_list[KF_bpf_throw];
+}
+
 static bool is_rbtree_lock_required_kfunc(u32 btf_id)
 {
 	return is_bpf_rbtree_api_kfunc(btf_id);
@@ -11002,7 +11220,17 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 			}
 			break;
 		case KF_ARG_PTR_TO_ALLOC_BTF_ID:
-			if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
+			if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC)) {
+				if (meta->func_id != special_kfunc_list[KF_bpf_obj_drop_impl]) {
+					verbose(env, "arg#%d expected for bpf_obj_drop_impl()\n", i);
+					return -EINVAL;
+				}
+			} else if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC | MEM_PERCPU)) {
+				if (meta->func_id != special_kfunc_list[KF_bpf_percpu_obj_drop_impl]) {
+					verbose(env, "arg#%d expected for bpf_percpu_obj_drop_impl()\n", i);
+					return -EINVAL;
+				}
+			} else {
 				verbose(env, "arg#%d expected pointer to allocated object\n", i);
 				return -EINVAL;
 			}
@@ -11010,8 +11238,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 				verbose(env, "allocated object must be referenced\n");
 				return -EINVAL;
 			}
-			if (meta->btf == btf_vmlinux &&
-			    meta->func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
+			if (meta->btf == btf_vmlinux) {
 				meta->arg_btf = reg->btf;
 				meta->arg_btf_id = reg->btf_id;
 			}
@@ -11202,6 +11429,10 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 			break;
 		}
 		case KF_ARG_PTR_TO_CALLBACK:
+			if (reg->type != PTR_TO_FUNC) {
+				verbose(env, "arg%d expected pointer to func\n", i);
+				return -EINVAL;
+			}
 			meta->subprogno = reg->subprogno;
 			break;
 		case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
@@ -11280,6 +11511,8 @@ static int fetch_kfunc_meta(struct bpf_verifier_env *env,
 	return 0;
 }
 
+static int check_return_code(struct bpf_verifier_env *env, int regno);
+
 static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 			    int *insn_idx_p)
 {
@@ -11401,6 +11634,24 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 		}
 	}
 
+	if (meta.func_id == special_kfunc_list[KF_bpf_throw]) {
+		if (!bpf_jit_supports_exceptions()) {
+			verbose(env, "JIT does not support calling kfunc %s#%d\n",
+				func_name, meta.func_id);
+			return -ENOTSUPP;
+		}
+		env->seen_exception = true;
+
+		/* In the case of the default callback, the cookie value passed
+		 * to bpf_throw becomes the return value of the program.
+		 */
+		if (!env->exception_callback_subprog) {
+			err = check_return_code(env, BPF_REG_1);
+			if (err < 0)
+				return err;
+		}
+	}
+
 	for (i = 0; i < CALLER_SAVED_REGS; i++)
 		mark_reg_not_init(env, regs, caller_saved[i]);
 
@@ -11411,6 +11662,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 		/* Only exception is bpf_obj_new_impl */
 		if (meta.btf != btf_vmlinux ||
 		    (meta.func_id != special_kfunc_list[KF_bpf_obj_new_impl] &&
+		     meta.func_id != special_kfunc_list[KF_bpf_percpu_obj_new_impl] &&
 		     meta.func_id != special_kfunc_list[KF_bpf_refcount_acquire_impl])) {
 			verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
 			return -EINVAL;
@@ -11424,11 +11676,16 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 		ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id);
 
 		if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
-			if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl]) {
+			if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] ||
+			    meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
+				struct btf_struct_meta *struct_meta;
 				struct btf *ret_btf;
 				u32 ret_btf_id;
 
-				if (unlikely(!bpf_global_ma_set))
+				if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] && !bpf_global_ma_set)
+					return -ENOMEM;
+
+				if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl] && !bpf_global_percpu_ma_set)
 					return -ENOMEM;
 
 				if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) {
@@ -11441,24 +11698,38 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 
 				/* This may be NULL due to user not supplying a BTF */
 				if (!ret_btf) {
-					verbose(env, "bpf_obj_new requires prog BTF\n");
+					verbose(env, "bpf_obj_new/bpf_percpu_obj_new requires prog BTF\n");
 					return -EINVAL;
 				}
 
 				ret_t = btf_type_by_id(ret_btf, ret_btf_id);
 				if (!ret_t || !__btf_type_is_struct(ret_t)) {
-					verbose(env, "bpf_obj_new type ID argument must be of a struct\n");
+					verbose(env, "bpf_obj_new/bpf_percpu_obj_new type ID argument must be of a struct\n");
 					return -EINVAL;
 				}
 
+				struct_meta = btf_find_struct_meta(ret_btf, ret_btf_id);
+				if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
+					if (!__btf_type_is_scalar_struct(env, ret_btf, ret_t, 0)) {
+						verbose(env, "bpf_percpu_obj_new type ID argument must be of a struct of scalars\n");
+						return -EINVAL;
+					}
+
+					if (struct_meta) {
+						verbose(env, "bpf_percpu_obj_new type ID argument must not contain special fields\n");
+						return -EINVAL;
+					}
+				}
+
 				mark_reg_known_zero(env, regs, BPF_REG_0);
 				regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
 				regs[BPF_REG_0].btf = ret_btf;
 				regs[BPF_REG_0].btf_id = ret_btf_id;
+				if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl])
+					regs[BPF_REG_0].type |= MEM_PERCPU;
 
 				insn_aux->obj_new_size = ret_t->size;
-				insn_aux->kptr_struct_meta =
-					btf_find_struct_meta(ret_btf, ret_btf_id);
+				insn_aux->kptr_struct_meta = struct_meta;
 			} else if (meta.func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) {
 				mark_reg_known_zero(env, regs, BPF_REG_0);
 				regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
@@ -11595,7 +11866,8 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 			regs[BPF_REG_0].id = ++env->id_gen;
 	} else if (btf_type_is_void(t)) {
 		if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
-			if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
+			if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl] ||
+			    meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl]) {
 				insn_aux->kptr_struct_meta =
 					btf_find_struct_meta(meta.arg_btf,
 							     meta.arg_btf_id);
@@ -14135,6 +14407,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
 		    !sanitize_speculative_path(env, insn, *insn_idx + 1,
 					       *insn_idx))
 			return -EFAULT;
+		if (env->log.level & BPF_LOG_LEVEL)
+			print_insn_state(env, this_branch->frame[this_branch->curframe]);
 		*insn_idx += insn->off;
 		return 0;
 	} else if (pred == 0) {
@@ -14147,6 +14421,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
 					       *insn_idx + insn->off + 1,
 					       *insn_idx))
 			return -EFAULT;
+		if (env->log.level & BPF_LOG_LEVEL)
+			print_insn_state(env, this_branch->frame[this_branch->curframe]);
 		return 0;
 	}
 
@@ -14425,7 +14701,7 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
 	 * gen_ld_abs() may terminate the program at runtime, leading to
 	 * reference leak.
 	 */
-	err = check_reference_leak(env);
+	err = check_reference_leak(env, false);
 	if (err) {
 		verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
 		return err;
@@ -14474,7 +14750,7 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
 	return 0;
 }
 
-static int check_return_code(struct bpf_verifier_env *env)
+static int check_return_code(struct bpf_verifier_env *env, int regno)
 {
 	struct tnum enforce_attach_type_range = tnum_unknown;
 	const struct bpf_prog *prog = env->prog;
@@ -14486,7 +14762,7 @@ static int check_return_code(struct bpf_verifier_env *env)
 	const bool is_subprog = frame->subprogno;
 
 	/* LSM and struct_ops func-ptr's return type could be "void" */
-	if (!is_subprog) {
+	if (!is_subprog || frame->in_exception_callback_fn) {
 		switch (prog_type) {
 		case BPF_PROG_TYPE_LSM:
 			if (prog->expected_attach_type == BPF_LSM_CGROUP)
@@ -14508,22 +14784,22 @@ static int check_return_code(struct bpf_verifier_env *env)
 	 * of bpf_exit, which means that program wrote
 	 * something into it earlier
 	 */
-	err = check_reg_arg(env, BPF_REG_0, SRC_OP);
+	err = check_reg_arg(env, regno, SRC_OP);
 	if (err)
 		return err;
 
-	if (is_pointer_value(env, BPF_REG_0)) {
-		verbose(env, "R0 leaks addr as return value\n");
+	if (is_pointer_value(env, regno)) {
+		verbose(env, "R%d leaks addr as return value\n", regno);
 		return -EACCES;
 	}
 
-	reg = cur_regs(env) + BPF_REG_0;
+	reg = cur_regs(env) + regno;
 
 	if (frame->in_async_callback_fn) {
 		/* enforce return zero from async callbacks like timer */
 		if (reg->type != SCALAR_VALUE) {
-			verbose(env, "In async callback the register R0 is not a known value (%s)\n",
-				reg_type_str(env, reg->type));
+			verbose(env, "In async callback the register R%d is not a known value (%s)\n",
+				regno, reg_type_str(env, reg->type));
 			return -EINVAL;
 		}
 
@@ -14534,10 +14810,10 @@ static int check_return_code(struct bpf_verifier_env *env)
 		return 0;
 	}
 
-	if (is_subprog) {
+	if (is_subprog && !frame->in_exception_callback_fn) {
 		if (reg->type != SCALAR_VALUE) {
-			verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
-				reg_type_str(env, reg->type));
+			verbose(env, "At subprogram exit the register R%d is not a scalar value (%s)\n",
+				regno, reg_type_str(env, reg->type));
 			return -EINVAL;
 		}
 		return 0;
@@ -14547,10 +14823,13 @@ static int check_return_code(struct bpf_verifier_env *env)
 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
 		if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
 		    env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG ||
+		    env->prog->expected_attach_type == BPF_CGROUP_UNIX_RECVMSG ||
 		    env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME ||
 		    env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME ||
+		    env->prog->expected_attach_type == BPF_CGROUP_UNIX_GETPEERNAME ||
 		    env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
-		    env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME)
+		    env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME ||
+		    env->prog->expected_attach_type == BPF_CGROUP_UNIX_GETSOCKNAME)
 			range = tnum_range(1, 1);
 		if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND ||
 		    env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND)
@@ -14619,8 +14898,8 @@ static int check_return_code(struct bpf_verifier_env *env)
 	}
 
 	if (reg->type != SCALAR_VALUE) {
-		verbose(env, "At program exit the register R0 is not a known value (%s)\n",
-			reg_type_str(env, reg->type));
+		verbose(env, "At program exit the register R%d is not a known value (%s)\n",
+			regno, reg_type_str(env, reg->type));
 		return -EINVAL;
 	}
 
@@ -14891,8 +15170,8 @@ static int check_cfg(struct bpf_verifier_env *env)
 {
 	int insn_cnt = env->prog->len;
 	int *insn_stack, *insn_state;
-	int ret = 0;
-	int i;
+	int ex_insn_beg, i, ret = 0;
+	bool ex_done = false;
 
 	insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
 	if (!insn_state)
@@ -14908,6 +15187,7 @@ static int check_cfg(struct bpf_verifier_env *env)
 	insn_stack[0] = 0; /* 0 is the first instruction */
 	env->cfg.cur_stack = 1;
 
+walk_cfg:
 	while (env->cfg.cur_stack > 0) {
 		int t = insn_stack[env->cfg.cur_stack - 1];
 
@@ -14934,6 +15214,16 @@ static int check_cfg(struct bpf_verifier_env *env)
 		goto err_free;
 	}
 
+	if (env->exception_callback_subprog && !ex_done) {
+		ex_insn_beg = env->subprog_info[env->exception_callback_subprog].start;
+
+		insn_state[ex_insn_beg] = DISCOVERED;
+		insn_stack[0] = ex_insn_beg;
+		env->cfg.cur_stack = 1;
+		ex_done = true;
+		goto walk_cfg;
+	}
+
 	for (i = 0; i < insn_cnt; i++) {
 		if (insn_state[i] != EXPLORED) {
 			verbose(env, "unreachable insn %d\n", i);
@@ -14971,20 +15261,18 @@ static int check_abnormal_return(struct bpf_verifier_env *env)
 #define MIN_BPF_FUNCINFO_SIZE	8
 #define MAX_FUNCINFO_REC_SIZE	252
 
-static int check_btf_func(struct bpf_verifier_env *env,
-			  const union bpf_attr *attr,
-			  bpfptr_t uattr)
+static int check_btf_func_early(struct bpf_verifier_env *env,
+				const union bpf_attr *attr,
+				bpfptr_t uattr)
 {
-	const struct btf_type *type, *func_proto, *ret_type;
-	u32 i, nfuncs, urec_size, min_size;
 	u32 krec_size = sizeof(struct bpf_func_info);
+	const struct btf_type *type, *func_proto;
+	u32 i, nfuncs, urec_size, min_size;
 	struct bpf_func_info *krecord;
-	struct bpf_func_info_aux *info_aux = NULL;
 	struct bpf_prog *prog;
 	const struct btf *btf;
-	bpfptr_t urecord;
 	u32 prev_offset = 0;
-	bool scalar_return;
+	bpfptr_t urecord;
 	int ret = -ENOMEM;
 
 	nfuncs = attr->func_info_cnt;
@@ -14994,11 +15282,6 @@ static int check_btf_func(struct bpf_verifier_env *env,
 		return 0;
 	}
 
-	if (nfuncs != env->subprog_cnt) {
-		verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
-		return -EINVAL;
-	}
-
 	urec_size = attr->func_info_rec_size;
 	if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
 	    urec_size > MAX_FUNCINFO_REC_SIZE ||
@@ -15016,9 +15299,6 @@ static int check_btf_func(struct bpf_verifier_env *env,
 	krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
 	if (!krecord)
 		return -ENOMEM;
-	info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN);
-	if (!info_aux)
-		goto err_free;
 
 	for (i = 0; i < nfuncs; i++) {
 		ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
@@ -15057,11 +15337,6 @@ static int check_btf_func(struct bpf_verifier_env *env,
 			goto err_free;
 		}
 
-		if (env->subprog_info[i].start != krecord[i].insn_off) {
-			verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
-			goto err_free;
-		}
-
 		/* check type_id */
 		type = btf_type_by_id(btf, krecord[i].type_id);
 		if (!type || !btf_type_is_func(type)) {
@@ -15069,12 +15344,77 @@ static int check_btf_func(struct bpf_verifier_env *env,
 				krecord[i].type_id);
 			goto err_free;
 		}
-		info_aux[i].linkage = BTF_INFO_VLEN(type->info);
 
 		func_proto = btf_type_by_id(btf, type->type);
 		if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto)))
 			/* btf_func_check() already verified it during BTF load */
 			goto err_free;
+
+		prev_offset = krecord[i].insn_off;
+		bpfptr_add(&urecord, urec_size);
+	}
+
+	prog->aux->func_info = krecord;
+	prog->aux->func_info_cnt = nfuncs;
+	return 0;
+
+err_free:
+	kvfree(krecord);
+	return ret;
+}
+
+static int check_btf_func(struct bpf_verifier_env *env,
+			  const union bpf_attr *attr,
+			  bpfptr_t uattr)
+{
+	const struct btf_type *type, *func_proto, *ret_type;
+	u32 i, nfuncs, urec_size;
+	struct bpf_func_info *krecord;
+	struct bpf_func_info_aux *info_aux = NULL;
+	struct bpf_prog *prog;
+	const struct btf *btf;
+	bpfptr_t urecord;
+	bool scalar_return;
+	int ret = -ENOMEM;
+
+	nfuncs = attr->func_info_cnt;
+	if (!nfuncs) {
+		if (check_abnormal_return(env))
+			return -EINVAL;
+		return 0;
+	}
+	if (nfuncs != env->subprog_cnt) {
+		verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
+		return -EINVAL;
+	}
+
+	urec_size = attr->func_info_rec_size;
+
+	prog = env->prog;
+	btf = prog->aux->btf;
+
+	urecord = make_bpfptr(attr->func_info, uattr.is_kernel);
+
+	krecord = prog->aux->func_info;
+	info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN);
+	if (!info_aux)
+		return -ENOMEM;
+
+	for (i = 0; i < nfuncs; i++) {
+		/* check insn_off */
+		ret = -EINVAL;
+
+		if (env->subprog_info[i].start != krecord[i].insn_off) {
+			verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
+			goto err_free;
+		}
+
+		/* Already checked type_id */
+		type = btf_type_by_id(btf, krecord[i].type_id);
+		info_aux[i].linkage = BTF_INFO_VLEN(type->info);
+		/* Already checked func_proto */
+		func_proto = btf_type_by_id(btf, type->type);
+
 		ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
 		scalar_return =
 			btf_type_is_small_int(ret_type) || btf_is_any_enum(ret_type);
@@ -15087,17 +15427,13 @@ static int check_btf_func(struct bpf_verifier_env *env,
 			goto err_free;
 		}
 
-		prev_offset = krecord[i].insn_off;
 		bpfptr_add(&urecord, urec_size);
 	}
 
-	prog->aux->func_info = krecord;
-	prog->aux->func_info_cnt = nfuncs;
 	prog->aux->func_info_aux = info_aux;
 	return 0;
 
 err_free:
-	kvfree(krecord);
 	kfree(info_aux);
 	return ret;
 }
@@ -15110,7 +15446,8 @@ static void adjust_btf_func(struct bpf_verifier_env *env)
 	if (!aux->func_info)
 		return;
 
-	for (i = 0; i < env->subprog_cnt; i++)
+	/* func_info is not available for hidden subprogs */
+	for (i = 0; i < env->subprog_cnt - env->hidden_subprog_cnt; i++)
 		aux->func_info[i].insn_off = env->subprog_info[i].start;
 }
 
@@ -15314,9 +15651,9 @@ static int check_core_relo(struct bpf_verifier_env *env,
 	return err;
 }
 
-static int check_btf_info(struct bpf_verifier_env *env,
-			  const union bpf_attr *attr,
-			  bpfptr_t uattr)
+static int check_btf_info_early(struct bpf_verifier_env *env,
+				const union bpf_attr *attr,
+				bpfptr_t uattr)
 {
 	struct btf *btf;
 	int err;
@@ -15336,6 +15673,24 @@ static int check_btf_info(struct bpf_verifier_env *env,
 	}
 	env->prog->aux->btf = btf;
 
+	err = check_btf_func_early(env, attr, uattr);
+	if (err)
+		return err;
+	return 0;
+}
+
+static int check_btf_info(struct bpf_verifier_env *env,
+			  const union bpf_attr *attr,
+			  bpfptr_t uattr)
+{
+	int err;
+
+	if (!attr->func_info_cnt && !attr->line_info_cnt) {
+		if (check_abnormal_return(env))
+			return -EINVAL;
+		return 0;
+	}
+
 	err = check_btf_func(env, attr, uattr);
 	if (err)
 		return err;
@@ -16438,6 +16793,7 @@ static int do_check(struct bpf_verifier_env *env)
 	int prev_insn_idx = -1;
 
 	for (;;) {
+		bool exception_exit = false;
 		struct bpf_insn *insn;
 		u8 class;
 		int err;
@@ -16652,12 +17008,17 @@ static int do_check(struct bpf_verifier_env *env)
 						return -EINVAL;
 					}
 				}
-				if (insn->src_reg == BPF_PSEUDO_CALL)
+				if (insn->src_reg == BPF_PSEUDO_CALL) {
 					err = check_func_call(env, insn, &env->insn_idx);
-				else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL)
+				} else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
 					err = check_kfunc_call(env, insn, &env->insn_idx);
-				else
+					if (!err && is_bpf_throw_kfunc(insn)) {
+						exception_exit = true;
+						goto process_bpf_exit_full;
+					}
+				} else {
 					err = check_helper_call(env, insn, &env->insn_idx);
+				}
 				if (err)
 					return err;
 
@@ -16687,7 +17048,7 @@ static int do_check(struct bpf_verifier_env *env)
 					verbose(env, "BPF_EXIT uses reserved fields\n");
 					return -EINVAL;
 				}
-
+process_bpf_exit_full:
 				if (env->cur_state->active_lock.ptr &&
 				    !in_rbtree_lock_required_cb(env)) {
 					verbose(env, "bpf_spin_unlock is missing\n");
@@ -16706,10 +17067,23 @@ static int do_check(struct bpf_verifier_env *env)
 				 * function, for which reference_state must
 				 * match caller reference state when it exits.
 				 */
-				err = check_reference_leak(env);
+				err = check_reference_leak(env, exception_exit);
 				if (err)
 					return err;
 
+				/* The side effect of the prepare_func_exit
+				 * which is being skipped is that it frees
+				 * bpf_func_state. Typically, process_bpf_exit
+				 * will only be hit with outermost exit.
+				 * copy_verifier_state in pop_stack will handle
+				 * freeing of any extra bpf_func_state left over
+				 * from not processing all nested function
+				 * exits. We also skip return code checks as
+				 * they are not needed for exceptional exits.
+				 */
+				if (exception_exit)
+					goto process_bpf_exit;
+
 				if (state->curframe) {
 					/* exit from nested function */
 					err = prepare_func_exit(env, &env->insn_idx);
@@ -16719,7 +17093,7 @@ static int do_check(struct bpf_verifier_env *env)
 					continue;
 				}
 
-				err = check_return_code(env);
+				err = check_return_code(env, BPF_REG_0);
 				if (err)
 					return err;
 process_bpf_exit:
@@ -18012,6 +18386,9 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 		}
 		func[i]->aux->num_exentries = num_exentries;
 		func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
+		func[i]->aux->exception_cb = env->subprog_info[i].is_exception_cb;
+		if (!i)
+			func[i]->aux->exception_boundary = env->seen_exception;
 		func[i] = bpf_int_jit_compile(func[i]);
 		if (!func[i]->jited) {
 			err = -ENOTSUPP;
@@ -18051,7 +18428,8 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 		 * the call instruction, as an index for this list
 		 */
 		func[i]->aux->func = func;
-		func[i]->aux->func_cnt = env->subprog_cnt;
+		func[i]->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt;
+		func[i]->aux->real_func_cnt = env->subprog_cnt;
 	}
 	for (i = 0; i < env->subprog_cnt; i++) {
 		old_bpf_func = func[i]->bpf_func;
@@ -18097,7 +18475,10 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 	prog->aux->extable = func[0]->aux->extable;
 	prog->aux->num_exentries = func[0]->aux->num_exentries;
 	prog->aux->func = func;
-	prog->aux->func_cnt = env->subprog_cnt;
+	prog->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt;
+	prog->aux->real_func_cnt = env->subprog_cnt;
+	prog->aux->bpf_exception_cb = (void *)func[env->exception_callback_subprog]->bpf_func;
+	prog->aux->exception_boundary = func[0]->aux->exception_boundary;
 	bpf_prog_jit_attempt_done(prog);
 	return 0;
 out_free:
@@ -18264,21 +18645,35 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 		insn->imm = BPF_CALL_IMM(desc->addr);
 	if (insn->off)
 		return 0;
-	if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl]) {
+	if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl] ||
+	    desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
 		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
 		struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
 		u64 obj_new_size = env->insn_aux_data[insn_idx].obj_new_size;
 
+		if (desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl] && kptr_struct_meta) {
+			verbose(env, "verifier internal error: NULL kptr_struct_meta expected at insn_idx %d\n",
+				insn_idx);
+			return -EFAULT;
+		}
+
 		insn_buf[0] = BPF_MOV64_IMM(BPF_REG_1, obj_new_size);
 		insn_buf[1] = addr[0];
 		insn_buf[2] = addr[1];
 		insn_buf[3] = *insn;
 		*cnt = 4;
 	} else if (desc->func_id == special_kfunc_list[KF_bpf_obj_drop_impl] ||
+		   desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl] ||
 		   desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) {
 		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
 		struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
 
+		if (desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl] && kptr_struct_meta) {
+			verbose(env, "verifier internal error: NULL kptr_struct_meta expected at insn_idx %d\n",
+				insn_idx);
+			return -EFAULT;
+		}
+
 		if (desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl] &&
 		    !kptr_struct_meta) {
 			verbose(env, "verifier internal error: kptr_struct_meta expected at insn_idx %d\n",
@@ -18319,6 +18714,33 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 	return 0;
 }
 
+/* The function requires that first instruction in 'patch' is insnsi[prog->len - 1] */
+static int add_hidden_subprog(struct bpf_verifier_env *env, struct bpf_insn *patch, int len)
+{
+	struct bpf_subprog_info *info = env->subprog_info;
+	int cnt = env->subprog_cnt;
+	struct bpf_prog *prog;
+
+	/* We only reserve one slot for hidden subprogs in subprog_info. */
+	if (env->hidden_subprog_cnt) {
+		verbose(env, "verifier internal error: only one hidden subprog supported\n");
+		return -EFAULT;
+	}
+	/* We're not patching any existing instruction, just appending the new
+	 * ones for the hidden subprog. Hence all of the adjustment operations
+	 * in bpf_patch_insn_data are no-ops.
+	 */
+	prog = bpf_patch_insn_data(env, env->prog->len - 1, patch, len);
+	if (!prog)
+		return -ENOMEM;
+	env->prog = prog;
+	info[cnt + 1].start = info[cnt].start;
+	info[cnt].start = prog->len - len + 1;
+	env->subprog_cnt++;
+	env->hidden_subprog_cnt++;
+	return 0;
+}
+
 /* Do various post-verification rewrites in a single program pass.
  * These rewrites simplify JIT and interpreter implementations.
  */
@@ -18337,6 +18759,26 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
 	struct bpf_map *map_ptr;
 	int i, ret, cnt, delta = 0;
 
+	if (env->seen_exception && !env->exception_callback_subprog) {
+		struct bpf_insn patch[] = {
+			env->prog->insnsi[insn_cnt - 1],
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		};
+
+		ret = add_hidden_subprog(env, patch, ARRAY_SIZE(patch));
+		if (ret < 0)
+			return ret;
+		prog = env->prog;
+		insn = prog->insnsi;
+
+		env->exception_callback_subprog = env->subprog_cnt - 1;
+		/* Don't update insn_cnt, as add_hidden_subprog always appends insns */
+		env->subprog_info[env->exception_callback_subprog].is_cb = true;
+		env->subprog_info[env->exception_callback_subprog].is_async_cb = true;
+		env->subprog_info[env->exception_callback_subprog].is_exception_cb = true;
+	}
+
 	for (i = 0; i < insn_cnt; i++, insn++) {
 		/* Make divide-by-zero exceptions impossible. */
 		if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
@@ -18606,6 +19048,25 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
 			goto patch_call_imm;
 		}
 
+		/* bpf_per_cpu_ptr() and bpf_this_cpu_ptr() */
+		if (env->insn_aux_data[i + delta].call_with_percpu_alloc_ptr) {
+			/* patch with 'r1 = *(u64 *)(r1 + 0)' since for percpu data,
+			 * bpf_mem_alloc() returns a ptr to the percpu data ptr.
+			 */
+			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0);
+			insn_buf[1] = *insn;
+			cnt = 2;
+
+			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+			if (!new_prog)
+				return -ENOMEM;
+
+			delta += cnt - 1;
+			env->prog = prog = new_prog;
+			insn = new_prog->insnsi + i + delta;
+			goto patch_call_imm;
+		}
+
 		/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
 		 * and other inlining handlers are currently limited to 64 bit
 		 * only.
@@ -19015,7 +19476,7 @@ static void free_states(struct bpf_verifier_env *env)
 	}
 }
 
-static int do_check_common(struct bpf_verifier_env *env, int subprog)
+static int do_check_common(struct bpf_verifier_env *env, int subprog, bool is_ex_cb)
 {
 	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
 	struct bpf_verifier_state *state;
@@ -19046,7 +19507,7 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog)
 
 	regs = state->frame[state->curframe]->regs;
 	if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
-		ret = btf_prepare_func_args(env, subprog, regs);
+		ret = btf_prepare_func_args(env, subprog, regs, is_ex_cb);
 		if (ret)
 			goto out;
 		for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
@@ -19062,6 +19523,12 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog)
 				regs[i].id = ++env->id_gen;
 			}
 		}
+		if (is_ex_cb) {
+			state->frame[0]->in_exception_callback_fn = true;
+			env->subprog_info[subprog].is_cb = true;
+			env->subprog_info[subprog].is_async_cb = true;
+			env->subprog_info[subprog].is_exception_cb = true;
+		}
 	} else {
 		/* 1st arg to a function */
 		regs[BPF_REG_1].type = PTR_TO_CTX;
@@ -19126,7 +19593,7 @@ static int do_check_subprogs(struct bpf_verifier_env *env)
 			continue;
 		env->insn_idx = env->subprog_info[i].start;
 		WARN_ON_ONCE(env->insn_idx == 0);
-		ret = do_check_common(env, i);
+		ret = do_check_common(env, i, env->exception_callback_subprog == i);
 		if (ret) {
 			return ret;
 		} else if (env->log.level & BPF_LOG_LEVEL) {
@@ -19143,7 +19610,7 @@ static int do_check_main(struct bpf_verifier_env *env)
 	int ret;
 
 	env->insn_idx = 0;
-	ret = do_check_common(env, 0);
+	ret = do_check_common(env, 0, false);
 	if (!ret)
 		env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
 	return ret;
@@ -19312,6 +19779,12 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
 			bpf_log(log, "Subprog %s doesn't exist\n", tname);
 			return -EINVAL;
 		}
+		if (aux->func && aux->func[subprog]->aux->exception_cb) {
+			bpf_log(log,
+				"%s programs cannot attach to exception callback\n",
+				prog_extension ? "Extension" : "FENTRY/FEXIT");
+			return -EINVAL;
+		}
 		conservative = aux->func_info_aux[subprog].unreliable;
 		if (prog_extension) {
 			if (conservative) {
@@ -19641,6 +20114,9 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
 	if (!tr)
 		return -ENOMEM;
 
+	if (tgt_prog && tgt_prog->aux->tail_call_reachable)
+		tr->flags = BPF_TRAMP_F_TAIL_CALL_CTX;
+
 	prog->aux->dst_trampoline = tr;
 	return 0;
 }
@@ -19736,6 +20212,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
 	if (!env->explored_states)
 		goto skip_full_check;
 
+	ret = check_btf_info_early(env, attr, uattr);
+	if (ret < 0)
+		goto skip_full_check;
+
 	ret = add_subprog_and_kfunc(env);
 	if (ret < 0)
 		goto skip_full_check;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 868008f56fec..df697c74d519 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -117,6 +117,9 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
 		 * and don't send kprobe event into ring-buffer,
 		 * so return zero here
 		 */
+		rcu_read_lock();
+		bpf_prog_inc_misses_counters(rcu_dereference(call->prog_array));
+		rcu_read_unlock();
 		ret = 0;
 		goto out;
 	}
@@ -2384,7 +2387,8 @@ int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
 
 int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
 			    u32 *fd_type, const char **buf,
-			    u64 *probe_offset, u64 *probe_addr)
+			    u64 *probe_offset, u64 *probe_addr,
+			    unsigned long *missed)
 {
 	bool is_tracepoint, is_syscall_tp;
 	struct bpf_prog *prog;
@@ -2419,7 +2423,7 @@ int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
 #ifdef CONFIG_KPROBE_EVENTS
 		if (flags & TRACE_EVENT_FL_KPROBE)
 			err = bpf_get_kprobe_info(event, fd_type, buf,
-						  probe_offset, probe_addr,
+						  probe_offset, probe_addr, missed,
 						  event->attr.type == PERF_TYPE_TRACEPOINT);
 #endif
 #ifdef CONFIG_UPROBE_EVENTS
@@ -2614,6 +2618,7 @@ static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link,
 	kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link);
 	info->kprobe_multi.count = kmulti_link->cnt;
 	info->kprobe_multi.flags = kmulti_link->flags;
+	info->kprobe_multi.missed = kmulti_link->fp.nmissed;
 
 	if (!uaddrs)
 		return 0;
@@ -2710,6 +2715,7 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
 	int err;
 
 	if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
+		bpf_prog_inc_misses_counter(link->link.prog);
 		err = 0;
 		goto out;
 	}
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index e834f149695b..a3442db35670 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1249,6 +1249,12 @@ static const struct file_operations kprobe_events_ops = {
 	.write		= probes_write,
 };
 
+static unsigned long trace_kprobe_missed(struct trace_kprobe *tk)
+{
+	return trace_kprobe_is_return(tk) ?
+		tk->rp.kp.nmissed + tk->rp.nmissed : tk->rp.kp.nmissed;
+}
+
 /* Probes profiling interfaces */
 static int probes_profile_seq_show(struct seq_file *m, void *v)
 {
@@ -1260,8 +1266,7 @@ static int probes_profile_seq_show(struct seq_file *m, void *v)
 		return 0;
 
 	tk = to_trace_kprobe(ev);
-	nmissed = trace_kprobe_is_return(tk) ?
-		tk->rp.kp.nmissed + tk->rp.nmissed : tk->rp.kp.nmissed;
+	nmissed = trace_kprobe_missed(tk);
 	seq_printf(m, "  %-44s %15lu %15lu\n",
 		   trace_probe_name(&tk->tp),
 		   trace_kprobe_nhit(tk),
@@ -1607,7 +1612,8 @@ NOKPROBE_SYMBOL(kretprobe_perf_func);
 
 int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type,
 			const char **symbol, u64 *probe_offset,
-			u64 *probe_addr, bool perf_type_tracepoint)
+			u64 *probe_addr, unsigned long *missed,
+			bool perf_type_tracepoint)
 {
 	const char *pevent = trace_event_name(event->tp_event);
 	const char *group = event->tp_event->class->system;
@@ -1626,6 +1632,8 @@ int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type,
 	*probe_addr = kallsyms_show_value(current_cred()) ?
 		      (unsigned long)tk->rp.kp.addr : 0;
 	*symbol = tk->symbol;
+	if (missed)
+		*missed = trace_kprobe_missed(tk);
 	return 0;
 }
 #endif	/* CONFIG_PERF_EVENTS */
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index de753403cdaf..9c581d6da843 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -556,7 +556,7 @@ static int perf_call_bpf_enter(struct trace_event_call *call, struct pt_regs *re
 {
 	struct syscall_tp_t {
 		struct trace_entry ent;
-		unsigned long syscall_nr;
+		int syscall_nr;
 		unsigned long args[SYSCALL_DEFINE_MAXARGS];
 	} __aligned(8) param;
 	int i;
@@ -661,7 +661,7 @@ static int perf_call_bpf_exit(struct trace_event_call *call, struct pt_regs *reg
 {
 	struct syscall_tp_t {
 		struct trace_entry ent;
-		unsigned long syscall_nr;
+		int syscall_nr;
 		unsigned long ret;
 	} __aligned(8) param;
 
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index ecde4216201e..7916503e6a6a 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -5111,6 +5111,104 @@ static struct bpf_test tests[] = {
 		{ },
 		{ { 0, 0xffffffff } }
 	},
+	/* MOVSX32 */
+	{
+		"ALU_MOVSX | BPF_B",
+		.u.insns_int = {
+			BPF_LD_IMM64(R2, 0x00000000ffffffefLL),
+			BPF_LD_IMM64(R3, 0xdeadbeefdeadbeefLL),
+			BPF_MOVSX32_REG(R1, R3, 8),
+			BPF_JMP_REG(BPF_JEQ, R2, R1, 2),
+			BPF_MOV32_IMM(R0, 2),
+			BPF_EXIT_INSN(),
+			BPF_MOV32_IMM(R0, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 0x1 } },
+	},
+	{
+		"ALU_MOVSX | BPF_H",
+		.u.insns_int = {
+			BPF_LD_IMM64(R2, 0x00000000ffffbeefLL),
+			BPF_LD_IMM64(R3, 0xdeadbeefdeadbeefLL),
+			BPF_MOVSX32_REG(R1, R3, 16),
+			BPF_JMP_REG(BPF_JEQ, R2, R1, 2),
+			BPF_MOV32_IMM(R0, 2),
+			BPF_EXIT_INSN(),
+			BPF_MOV32_IMM(R0, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 0x1 } },
+	},
+	{
+		"ALU_MOVSX | BPF_W",
+		.u.insns_int = {
+			BPF_LD_IMM64(R2, 0x00000000deadbeefLL),
+			BPF_LD_IMM64(R3, 0xdeadbeefdeadbeefLL),
+			BPF_MOVSX32_REG(R1, R3, 32),
+			BPF_JMP_REG(BPF_JEQ, R2, R1, 2),
+			BPF_MOV32_IMM(R0, 2),
+			BPF_EXIT_INSN(),
+			BPF_MOV32_IMM(R0, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 0x1 } },
+	},
+	/* MOVSX64 REG */
+	{
+		"ALU64_MOVSX | BPF_B",
+		.u.insns_int = {
+			BPF_LD_IMM64(R2, 0xffffffffffffffefLL),
+			BPF_LD_IMM64(R3, 0xdeadbeefdeadbeefLL),
+			BPF_MOVSX64_REG(R1, R3, 8),
+			BPF_JMP_REG(BPF_JEQ, R2, R1, 2),
+			BPF_MOV32_IMM(R0, 2),
+			BPF_EXIT_INSN(),
+			BPF_MOV32_IMM(R0, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 0x1 } },
+	},
+	{
+		"ALU64_MOVSX | BPF_H",
+		.u.insns_int = {
+			BPF_LD_IMM64(R2, 0xffffffffffffbeefLL),
+			BPF_LD_IMM64(R3, 0xdeadbeefdeadbeefLL),
+			BPF_MOVSX64_REG(R1, R3, 16),
+			BPF_JMP_REG(BPF_JEQ, R2, R1, 2),
+			BPF_MOV32_IMM(R0, 2),
+			BPF_EXIT_INSN(),
+			BPF_MOV32_IMM(R0, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 0x1 } },
+	},
+	{
+		"ALU64_MOVSX | BPF_W",
+		.u.insns_int = {
+			BPF_LD_IMM64(R2, 0xffffffffdeadbeefLL),
+			BPF_LD_IMM64(R3, 0xdeadbeefdeadbeefLL),
+			BPF_MOVSX64_REG(R1, R3, 32),
+			BPF_JMP_REG(BPF_JEQ, R2, R1, 2),
+			BPF_MOV32_IMM(R0, 2),
+			BPF_EXIT_INSN(),
+			BPF_MOV32_IMM(R0, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 0x1 } },
+	},
 	/* BPF_ALU | BPF_ADD | BPF_X */
 	{
 		"ALU_ADD_X: 1 + 2 = 3",
@@ -6105,6 +6203,106 @@ static struct bpf_test tests[] = {
 		{ },
 		{ { 0, 2 } },
 	},
+	/* BPF_ALU | BPF_DIV | BPF_X off=1 (SDIV) */
+	{
+		"ALU_SDIV_X: -6 / 2 = -3",
+		.u.insns_int = {
+			BPF_LD_IMM64(R0, -6),
+			BPF_ALU32_IMM(BPF_MOV, R1, 2),
+			BPF_ALU32_REG_OFF(BPF_DIV, R0, R1, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, -3 } },
+	},
+	/* BPF_ALU | BPF_DIV | BPF_K off=1 (SDIV) */
+	{
+		"ALU_SDIV_K: -6 / 2 = -3",
+		.u.insns_int = {
+			BPF_LD_IMM64(R0, -6),
+			BPF_ALU32_IMM_OFF(BPF_DIV, R0, 2, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, -3 } },
+	},
+	/* BPF_ALU64 | BPF_DIV | BPF_X off=1 (SDIV64) */
+	{
+		"ALU64_SDIV_X: -6 / 2 = -3",
+		.u.insns_int = {
+			BPF_LD_IMM64(R0, -6),
+			BPF_ALU32_IMM(BPF_MOV, R1, 2),
+			BPF_ALU64_REG_OFF(BPF_DIV, R0, R1, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, -3 } },
+	},
+	/* BPF_ALU64 | BPF_DIV | BPF_K off=1 (SDIV64) */
+	{
+		"ALU64_SDIV_K: -6 / 2 = -3",
+		.u.insns_int = {
+			BPF_LD_IMM64(R0, -6),
+			BPF_ALU64_IMM_OFF(BPF_DIV, R0, 2, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, -3 } },
+	},
+	/* BPF_ALU | BPF_MOD | BPF_X off=1 (SMOD) */
+	{
+		"ALU_SMOD_X: -7 % 2 = -1",
+		.u.insns_int = {
+			BPF_LD_IMM64(R0, -7),
+			BPF_ALU32_IMM(BPF_MOV, R1, 2),
+			BPF_ALU32_REG_OFF(BPF_MOD, R0, R1, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, -1 } },
+	},
+	/* BPF_ALU | BPF_MOD | BPF_K off=1 (SMOD) */
+	{
+		"ALU_SMOD_K: -7 % 2 = -1",
+		.u.insns_int = {
+			BPF_LD_IMM64(R0, -7),
+			BPF_ALU32_IMM_OFF(BPF_MOD, R0, 2, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, -1 } },
+	},
+	/* BPF_ALU64 | BPF_MOD | BPF_X off=1 (SMOD64) */
+	{
+		"ALU64_SMOD_X: -7 % 2 = -1",
+		.u.insns_int = {
+			BPF_LD_IMM64(R0, -7),
+			BPF_ALU32_IMM(BPF_MOV, R1, 2),
+			BPF_ALU64_REG_OFF(BPF_MOD, R0, R1, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, -1 } },
+	},
+	/* BPF_ALU64 | BPF_MOD | BPF_K off=1 (SMOD64) */
+	{
+		"ALU64_SMOD_X: -7 % 2 = -1",
+		.u.insns_int = {
+			BPF_LD_IMM64(R0, -7),
+			BPF_ALU64_IMM_OFF(BPF_MOD, R0, 2, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, -1 } },
+	},
 	/* BPF_ALU | BPF_AND | BPF_X */
 	{
 		"ALU_AND_X: 3 & 2 = 2",
@@ -7837,6 +8035,104 @@ static struct bpf_test tests[] = {
 		{ },
 		{ { 0, (u32) (cpu_to_le64(0xfedcba9876543210ULL) >> 32) } },
 	},
+	/* BSWAP */
+	{
+		"BSWAP 16: 0x0123456789abcdef -> 0xefcd",
+		.u.insns_int = {
+			BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+			BPF_BSWAP(R0, 16),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 0xefcd } },
+	},
+	{
+		"BSWAP 32: 0x0123456789abcdef -> 0xefcdab89",
+		.u.insns_int = {
+			BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+			BPF_BSWAP(R0, 32),
+			BPF_ALU64_REG(BPF_MOV, R1, R0),
+			BPF_ALU64_IMM(BPF_RSH, R1, 32),
+			BPF_ALU32_REG(BPF_ADD, R0, R1), /* R1 = 0 */
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 0xefcdab89 } },
+	},
+	{
+		"BSWAP 64: 0x0123456789abcdef -> 0x67452301",
+		.u.insns_int = {
+			BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+			BPF_BSWAP(R0, 64),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 0x67452301 } },
+	},
+	{
+		"BSWAP 64: 0x0123456789abcdef >> 32 -> 0xefcdab89",
+		.u.insns_int = {
+			BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+			BPF_BSWAP(R0, 64),
+			BPF_ALU64_IMM(BPF_RSH, R0, 32),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 0xefcdab89 } },
+	},
+	/* BSWAP, reversed */
+	{
+		"BSWAP 16: 0xfedcba9876543210 -> 0x1032",
+		.u.insns_int = {
+			BPF_LD_IMM64(R0, 0xfedcba9876543210ULL),
+			BPF_BSWAP(R0, 16),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 0x1032 } },
+	},
+	{
+		"BSWAP 32: 0xfedcba9876543210 -> 0x10325476",
+		.u.insns_int = {
+			BPF_LD_IMM64(R0, 0xfedcba9876543210ULL),
+			BPF_BSWAP(R0, 32),
+			BPF_ALU64_REG(BPF_MOV, R1, R0),
+			BPF_ALU64_IMM(BPF_RSH, R1, 32),
+			BPF_ALU32_REG(BPF_ADD, R0, R1), /* R1 = 0 */
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 0x10325476 } },
+	},
+	{
+		"BSWAP 64: 0xfedcba9876543210 -> 0x98badcfe",
+		.u.insns_int = {
+			BPF_LD_IMM64(R0, 0xfedcba9876543210ULL),
+			BPF_BSWAP(R0, 64),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 0x98badcfe } },
+	},
+	{
+		"BSWAP 64: 0xfedcba9876543210 >> 32 -> 0x10325476",
+		.u.insns_int = {
+			BPF_LD_IMM64(R0, 0xfedcba9876543210ULL),
+			BPF_BSWAP(R0, 64),
+			BPF_ALU64_IMM(BPF_RSH, R0, 32),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 0x10325476 } },
+	},
 	/* BPF_LDX_MEM B/H/W/DW */
 	{
 		"BPF_LDX_MEM | BPF_B, base",
@@ -8228,6 +8524,67 @@ static struct bpf_test tests[] = {
 		{ { 32, 0 } },
 		.stack_depth = 0,
 	},
+	/* BPF_LDX_MEMSX B/H/W */
+	{
+		"BPF_LDX_MEMSX | BPF_B",
+		.u.insns_int = {
+			BPF_LD_IMM64(R1, 0xdead0000000000f0ULL),
+			BPF_LD_IMM64(R2, 0xfffffffffffffff0ULL),
+			BPF_STX_MEM(BPF_DW, R10, R1, -8),
+#ifdef __BIG_ENDIAN
+			BPF_LDX_MEMSX(BPF_B, R0, R10, -1),
+#else
+			BPF_LDX_MEMSX(BPF_B, R0, R10, -8),
+#endif
+			BPF_JMP_REG(BPF_JNE, R0, R2, 1),
+			BPF_ALU64_IMM(BPF_MOV, R0, 0),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 0 } },
+		.stack_depth = 8,
+	},
+	{
+		"BPF_LDX_MEMSX | BPF_H",
+		.u.insns_int = {
+			BPF_LD_IMM64(R1, 0xdead00000000f123ULL),
+			BPF_LD_IMM64(R2, 0xfffffffffffff123ULL),
+			BPF_STX_MEM(BPF_DW, R10, R1, -8),
+#ifdef __BIG_ENDIAN
+			BPF_LDX_MEMSX(BPF_H, R0, R10, -2),
+#else
+			BPF_LDX_MEMSX(BPF_H, R0, R10, -8),
+#endif
+			BPF_JMP_REG(BPF_JNE, R0, R2, 1),
+			BPF_ALU64_IMM(BPF_MOV, R0, 0),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 0 } },
+		.stack_depth = 8,
+	},
+	{
+		"BPF_LDX_MEMSX | BPF_W",
+		.u.insns_int = {
+			BPF_LD_IMM64(R1, 0x00000000deadbeefULL),
+			BPF_LD_IMM64(R2, 0xffffffffdeadbeefULL),
+			BPF_STX_MEM(BPF_DW, R10, R1, -8),
+#ifdef __BIG_ENDIAN
+			BPF_LDX_MEMSX(BPF_W, R0, R10, -4),
+#else
+			BPF_LDX_MEMSX(BPF_W, R0, R10, -8),
+#endif
+			BPF_JMP_REG(BPF_JNE, R0, R2, 1),
+			BPF_ALU64_IMM(BPF_MOV, R0, 0),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 0 } },
+		.stack_depth = 8,
+	},
 	/* BPF_STX_MEM B/H/W/DW */
 	{
 		"BPF_STX_MEM | BPF_B",
@@ -9474,6 +9831,20 @@ static struct bpf_test tests[] = {
 		{ },
 		{ { 0, 1 } },
 	},
+	/* BPF_JMP32 | BPF_JA */
+	{
+		"JMP32_JA: Unconditional jump: if (true) return 1",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R0, 0),
+			BPF_JMP32_IMM(BPF_JA, 0, 1, 0),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
 	/* BPF_JMP | BPF_JSLT | BPF_K */
 	{
 		"JMP_JSLT_K: Signed jump: if (-2 < -1) return 1",
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index d37831b8511c..8b06bab5c406 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -562,7 +562,6 @@ void kasan_restore_multi_shot(bool enabled);
  * code. Declared here to avoid warnings about missing declarations.
  */
 
-asmlinkage void kasan_unpoison_task_stack_below(const void *watermark);
 void __asan_register_globals(void *globals, ssize_t size);
 void __asan_unregister_globals(void *globals, ssize_t size);
 void __asan_handle_no_return(void);
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index faabad6603db..f263f7e91a21 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -1136,6 +1136,7 @@ static int build_initial_monmap(struct ceph_mon_client *monc)
 			       GFP_KERNEL);
 	if (!monc->monmap)
 		return -ENOMEM;
+	monc->monmap->num_mon = num_mon;
 
 	for (i = 0; i < num_mon; i++) {
 		struct ceph_entity_inst *inst = &monc->monmap->mon_inst[i];
@@ -1147,7 +1148,6 @@ static int build_initial_monmap(struct ceph_mon_client *monc)
 		inst->name.type = CEPH_ENTITY_TYPE_MON;
 		inst->name.num = cpu_to_le64(i);
 	}
-	monc->monmap->num_mon = num_mon;
 	return 0;
 }
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 9f3f8930c691..17ba4c5be97e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -9052,6 +9052,28 @@ bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b)
 }
 EXPORT_SYMBOL(netdev_port_same_parent_id);
 
+static void netdev_dpll_pin_assign(struct net_device *dev, struct dpll_pin *dpll_pin)
+{
+#if IS_ENABLED(CONFIG_DPLL)
+	rtnl_lock();
+	dev->dpll_pin = dpll_pin;
+	rtnl_unlock();
+#endif
+}
+
+void netdev_dpll_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin)
+{
+	WARN_ON(!dpll_pin);
+	netdev_dpll_pin_assign(dev, dpll_pin);
+}
+EXPORT_SYMBOL(netdev_dpll_pin_set);
+
+void netdev_dpll_pin_clear(struct net_device *dev)
+{
+	netdev_dpll_pin_assign(dev, NULL);
+}
+EXPORT_SYMBOL(netdev_dpll_pin_clear);
+
 /**
  *	dev_change_proto_down - set carrier according to proto_down.
  *
diff --git a/net/core/dst.c b/net/core/dst.c
index 980e2fd2f013..6838d3212c37 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -45,7 +45,7 @@ const struct dst_metrics dst_default_metrics = {
 EXPORT_SYMBOL(dst_default_metrics);
 
 void dst_init(struct dst_entry *dst, struct dst_ops *ops,
-	      struct net_device *dev, int initial_ref, int initial_obsolete,
+	      struct net_device *dev, int initial_obsolete,
 	      unsigned short flags)
 {
 	dst->dev = dev;
@@ -66,7 +66,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
 	dst->tclassid = 0;
 #endif
 	dst->lwtstate = NULL;
-	rcuref_init(&dst->__rcuref, initial_ref);
+	rcuref_init(&dst->__rcuref, 1);
 	INIT_LIST_HEAD(&dst->rt_uncached);
 	dst->__use = 0;
 	dst->lastuse = jiffies;
@@ -77,7 +77,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
 EXPORT_SYMBOL(dst_init);
 
 void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
-		int initial_ref, int initial_obsolete, unsigned short flags)
+		int initial_obsolete, unsigned short flags)
 {
 	struct dst_entry *dst;
 
@@ -90,7 +90,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
 	if (!dst)
 		return NULL;
 
-	dst_init(dst, ops, dev, initial_ref, initial_obsolete, flags);
+	dst_init(dst, ops, dev, initial_obsolete, flags);
 
 	return dst;
 }
@@ -270,7 +270,7 @@ static void __metadata_dst_init(struct metadata_dst *md_dst,
 	struct dst_entry *dst;
 
 	dst = &md_dst->dst;
-	dst_init(dst, &dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE,
+	dst_init(dst, &dst_blackhole_ops, NULL, DST_OBSOLETE_NONE,
 		 DST_METADATA | DST_NOCOUNT);
 	memset(dst + 1, 0, sizeof(*md_dst) + optslen - sizeof(*dst));
 	md_dst->type = type;
diff --git a/net/core/filter.c b/net/core/filter.c
index a094694899c9..cc2e4babc85f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -81,6 +81,7 @@
 #include <net/xdp.h>
 #include <net/mptcp.h>
 #include <net/netfilter/nf_conntrack_bpf.h>
+#include <linux/un.h>
 
 static const struct bpf_func_proto *
 bpf_sk_base_func_proto(enum bpf_func_id func_id);
@@ -5850,6 +5851,9 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 	params->rt_metric = res.fi->fib_priority;
 	params->ifindex = dev->ifindex;
 
+	if (flags & BPF_FIB_LOOKUP_SRC)
+		params->ipv4_src = fib_result_prefsrc(net, &res);
+
 	/* xdp and cls_bpf programs are run in RCU-bh so
 	 * rcu_read_lock_bh is not needed here
 	 */
@@ -5992,6 +5996,18 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 	params->rt_metric = res.f6i->fib6_metric;
 	params->ifindex = dev->ifindex;
 
+	if (flags & BPF_FIB_LOOKUP_SRC) {
+		if (res.f6i->fib6_prefsrc.plen) {
+			*src = res.f6i->fib6_prefsrc.addr;
+		} else {
+			err = ipv6_bpf_stub->ipv6_dev_get_saddr(net, dev,
+								&fl6.daddr, 0,
+								src);
+			if (err)
+				return BPF_FIB_LKUP_RET_NO_SRC_ADDR;
+		}
+	}
+
 	if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH)
 		goto set_fwd_params;
 
@@ -6010,7 +6026,8 @@ set_fwd_params:
 #endif
 
 #define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \
-			     BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID)
+			     BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID | \
+			     BPF_FIB_LOOKUP_SRC)
 
 BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
 	   struct bpf_fib_lookup *, params, int, plen, u32, flags)
@@ -7858,14 +7875,19 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		case BPF_CGROUP_INET6_BIND:
 		case BPF_CGROUP_INET4_CONNECT:
 		case BPF_CGROUP_INET6_CONNECT:
+		case BPF_CGROUP_UNIX_CONNECT:
 		case BPF_CGROUP_UDP4_RECVMSG:
 		case BPF_CGROUP_UDP6_RECVMSG:
+		case BPF_CGROUP_UNIX_RECVMSG:
 		case BPF_CGROUP_UDP4_SENDMSG:
 		case BPF_CGROUP_UDP6_SENDMSG:
+		case BPF_CGROUP_UNIX_SENDMSG:
 		case BPF_CGROUP_INET4_GETPEERNAME:
 		case BPF_CGROUP_INET6_GETPEERNAME:
+		case BPF_CGROUP_UNIX_GETPEERNAME:
 		case BPF_CGROUP_INET4_GETSOCKNAME:
 		case BPF_CGROUP_INET6_GETSOCKNAME:
+		case BPF_CGROUP_UNIX_GETSOCKNAME:
 			return &bpf_sock_addr_setsockopt_proto;
 		default:
 			return NULL;
@@ -7876,14 +7898,19 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		case BPF_CGROUP_INET6_BIND:
 		case BPF_CGROUP_INET4_CONNECT:
 		case BPF_CGROUP_INET6_CONNECT:
+		case BPF_CGROUP_UNIX_CONNECT:
 		case BPF_CGROUP_UDP4_RECVMSG:
 		case BPF_CGROUP_UDP6_RECVMSG:
+		case BPF_CGROUP_UNIX_RECVMSG:
 		case BPF_CGROUP_UDP4_SENDMSG:
 		case BPF_CGROUP_UDP6_SENDMSG:
+		case BPF_CGROUP_UNIX_SENDMSG:
 		case BPF_CGROUP_INET4_GETPEERNAME:
 		case BPF_CGROUP_INET6_GETPEERNAME:
+		case BPF_CGROUP_UNIX_GETPEERNAME:
 		case BPF_CGROUP_INET4_GETSOCKNAME:
 		case BPF_CGROUP_INET6_GETSOCKNAME:
+		case BPF_CGROUP_UNIX_GETSOCKNAME:
 			return &bpf_sock_addr_getsockopt_proto;
 		default:
 			return NULL;
@@ -8931,8 +8958,8 @@ static bool sock_addr_is_valid_access(int off, int size,
 	if (off % size != 0)
 		return false;
 
-	/* Disallow access to IPv6 fields from IPv4 contex and vise
-	 * versa.
+	/* Disallow access to fields not belonging to the attach type's address
+	 * family.
 	 */
 	switch (off) {
 	case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
@@ -11752,6 +11779,27 @@ __bpf_kfunc int bpf_dynptr_from_xdp(struct xdp_buff *xdp, u64 flags,
 
 	return 0;
 }
+
+__bpf_kfunc int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern,
+					   const u8 *sun_path, u32 sun_path__sz)
+{
+	struct sockaddr_un *un;
+
+	if (sa_kern->sk->sk_family != AF_UNIX)
+		return -EINVAL;
+
+	/* We do not allow changing the address to unnamed or larger than the
+	 * maximum allowed address size for a unix sockaddr.
+	 */
+	if (sun_path__sz == 0 || sun_path__sz > UNIX_PATH_MAX)
+		return -EINVAL;
+
+	un = (struct sockaddr_un *)sa_kern->uaddr;
+	memcpy(un->sun_path, sun_path, sun_path__sz);
+	sa_kern->uaddrlen = offsetof(struct sockaddr_un, sun_path) + sun_path__sz;
+
+	return 0;
+}
 __diag_pop();
 
 int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags,
@@ -11776,6 +11824,10 @@ BTF_SET8_START(bpf_kfunc_check_set_xdp)
 BTF_ID_FLAGS(func, bpf_dynptr_from_xdp)
 BTF_SET8_END(bpf_kfunc_check_set_xdp)
 
+BTF_SET8_START(bpf_kfunc_check_set_sock_addr)
+BTF_ID_FLAGS(func, bpf_sock_addr_set_sun_path)
+BTF_SET8_END(bpf_kfunc_check_set_sock_addr)
+
 static const struct btf_kfunc_id_set bpf_kfunc_set_skb = {
 	.owner = THIS_MODULE,
 	.set = &bpf_kfunc_check_set_skb,
@@ -11786,6 +11838,11 @@ static const struct btf_kfunc_id_set bpf_kfunc_set_xdp = {
 	.set = &bpf_kfunc_check_set_xdp,
 };
 
+static const struct btf_kfunc_id_set bpf_kfunc_set_sock_addr = {
+	.owner = THIS_MODULE,
+	.set = &bpf_kfunc_check_set_sock_addr,
+};
+
 static int __init bpf_kfunc_init(void)
 {
 	int ret;
@@ -11800,7 +11857,9 @@ static int __init bpf_kfunc_init(void)
 	ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_XMIT, &bpf_kfunc_set_skb);
 	ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_SEG6LOCAL, &bpf_kfunc_set_skb);
 	ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_NETFILTER, &bpf_kfunc_set_skb);
-	return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp);
+	ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp);
+	return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+						&bpf_kfunc_set_sock_addr);
 }
 late_initcall(bpf_kfunc_init);
 
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index c1aea8b756b6..fe61f85bcf33 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -5,6 +5,7 @@
 #include <linux/rtnetlink.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
+#include <net/xdp.h>
 
 #include "netdev-genl-gen.h"
 
@@ -12,15 +13,24 @@ static int
 netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp,
 		   const struct genl_info *info)
 {
+	u64 xdp_rx_meta = 0;
 	void *hdr;
 
 	hdr = genlmsg_iput(rsp, info);
 	if (!hdr)
 		return -EMSGSIZE;
 
+#define XDP_METADATA_KFUNC(_, flag, __, xmo) \
+	if (netdev->xdp_metadata_ops && netdev->xdp_metadata_ops->xmo) \
+		xdp_rx_meta |= flag;
+XDP_METADATA_KFUNC_xxx
+#undef XDP_METADATA_KFUNC
+
 	if (nla_put_u32(rsp, NETDEV_A_DEV_IFINDEX, netdev->ifindex) ||
 	    nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_FEATURES,
-			      netdev->xdp_features, NETDEV_A_DEV_PAD)) {
+			      netdev->xdp_features, NETDEV_A_DEV_PAD) ||
+	    nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_RX_METADATA_FEATURES,
+			      xdp_rx_meta, NETDEV_A_DEV_PAD)) {
 		genlmsg_cancel(rsp, hdr);
 		return -EINVAL;
 	}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 53c377d054f0..4363f913ce7c 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -57,6 +57,7 @@
 #if IS_ENABLED(CONFIG_IPV6)
 #include <net/addrconf.h>
 #endif
+#include <linux/dpll.h>
 
 #include "dev.h"
 
@@ -1055,6 +1056,15 @@ static size_t rtnl_devlink_port_size(const struct net_device *dev)
 	return size;
 }
 
+static size_t rtnl_dpll_pin_size(const struct net_device *dev)
+{
+	size_t size = nla_total_size(0); /* nest IFLA_DPLL_PIN */
+
+	size += dpll_msg_pin_handle_size(netdev_dpll_pin(dev));
+
+	return size;
+}
+
 static noinline size_t if_nlmsg_size(const struct net_device *dev,
 				     u32 ext_filter_mask)
 {
@@ -1111,6 +1121,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
 	       + rtnl_prop_list_size(dev)
 	       + nla_total_size(MAX_ADDR_LEN) /* IFLA_PERM_ADDRESS */
 	       + rtnl_devlink_port_size(dev)
+	       + rtnl_dpll_pin_size(dev)
 	       + 0;
 }
 
@@ -1774,6 +1785,28 @@ nest_cancel:
 	return ret;
 }
 
+static int rtnl_fill_dpll_pin(struct sk_buff *skb,
+			      const struct net_device *dev)
+{
+	struct nlattr *dpll_pin_nest;
+	int ret;
+
+	dpll_pin_nest = nla_nest_start(skb, IFLA_DPLL_PIN);
+	if (!dpll_pin_nest)
+		return -EMSGSIZE;
+
+	ret = dpll_msg_add_pin_handle(skb, netdev_dpll_pin(dev));
+	if (ret < 0)
+		goto nest_cancel;
+
+	nla_nest_end(skb, dpll_pin_nest);
+	return 0;
+
+nest_cancel:
+	nla_nest_cancel(skb, dpll_pin_nest);
+	return ret;
+}
+
 static int rtnl_fill_ifinfo(struct sk_buff *skb,
 			    struct net_device *dev, struct net *src_net,
 			    int type, u32 pid, u32 seq, u32 change,
@@ -1916,6 +1949,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
 	if (rtnl_fill_devlink_port(skb, dev))
 		goto nla_put_failure;
 
+	if (rtnl_fill_dpll_pin(skb, dev))
+		goto nla_put_failure;
+
 	nlmsg_end(skb, nlh);
 	return 0;
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4eaf7ed0d1f4..2198979470ec 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -847,6 +847,8 @@ EXPORT_SYMBOL(__napi_alloc_skb);
 void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
 		     int size, unsigned int truesize)
 {
+	DEBUG_NET_WARN_ON_ONCE(size > truesize);
+
 	skb_fill_page_desc(skb, i, page, off, size);
 	skb->len += size;
 	skb->data_len += size;
@@ -859,6 +861,8 @@ void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size,
 {
 	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 
+	DEBUG_NET_WARN_ON_ONCE(size > truesize);
+
 	skb_frag_size_add(frag, size);
 	skb->len += size;
 	skb->data_len += size;
diff --git a/net/core/sock.c b/net/core/sock.c
index 16584e2dd648..a5995750c5c5 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -759,7 +759,7 @@ out:
 	return ret;
 }
 
-bool sk_mc_loop(struct sock *sk)
+bool sk_mc_loop(const struct sock *sk)
 {
 	if (dev_recursion_level())
 		return false;
@@ -771,7 +771,7 @@ bool sk_mc_loop(struct sock *sk)
 		return inet_test_bit(MC_LOOP, sk);
 #if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
-		return inet6_sk(sk)->mc_loop;
+		return inet6_test_bit(MC6_LOOP, sk);
 #endif
 	}
 	WARN_ON_ONCE(1);
@@ -3001,6 +3001,11 @@ void __sk_flush_backlog(struct sock *sk)
 {
 	spin_lock_bh(&sk->sk_lock.slock);
 	__release_sock(sk);
+
+	if (sk->sk_prot->release_cb)
+		INDIRECT_CALL_INET_1(sk->sk_prot->release_cb,
+				     tcp_release_cb, sk);
+
 	spin_unlock_bh(&sk->sk_lock.slock);
 }
 EXPORT_SYMBOL_GPL(__sk_flush_backlog);
@@ -3519,11 +3524,9 @@ void release_sock(struct sock *sk)
 	if (sk->sk_backlog.tail)
 		__release_sock(sk);
 
-	/* Warning : release_cb() might need to release sk ownership,
-	 * ie call sock_release_ownership(sk) before us.
-	 */
 	if (sk->sk_prot->release_cb)
-		sk->sk_prot->release_cb(sk);
+		INDIRECT_CALL_INET_1(sk->sk_prot->release_cb,
+				     tcp_release_cb, sk);
 
 	sock_release_ownership(sk);
 	if (waitqueue_active(&sk->sk_lock.wq))
diff --git a/net/core/xdp.c b/net/core/xdp.c
index a70670fe9a2d..df4789ab512d 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -741,7 +741,7 @@ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash,
 __diag_pop();
 
 BTF_SET8_START(xdp_metadata_kfunc_ids)
-#define XDP_METADATA_KFUNC(_, name) BTF_ID_FLAGS(func, name, KF_TRUSTED_ARGS)
+#define XDP_METADATA_KFUNC(_, __, name, ___) BTF_ID_FLAGS(func, name, KF_TRUSTED_ARGS)
 XDP_METADATA_KFUNC_xxx
 #undef XDP_METADATA_KFUNC
 BTF_SET8_END(xdp_metadata_kfunc_ids)
@@ -752,7 +752,7 @@ static const struct btf_kfunc_id_set xdp_metadata_kfunc_set = {
 };
 
 BTF_ID_LIST(xdp_metadata_kfunc_ids_unsorted)
-#define XDP_METADATA_KFUNC(name, str) BTF_ID(func, str)
+#define XDP_METADATA_KFUNC(name, _, str, __) BTF_ID(func, str)
 XDP_METADATA_KFUNC_xxx
 #undef XDP_METADATA_KFUNC
 
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index c693a570682f..80b956b39252 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -180,7 +180,7 @@ static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		goto out;
 	}
 
-	if (!sock_owned_by_user(sk) && np->recverr) {
+	if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) {
 		sk->sk_err = err;
 		sk_error_report(sk);
 	} else {
@@ -671,10 +671,10 @@ ipv6_pktoptions:
 		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
 			np->mcast_oif = inet6_iif(opt_skb);
 		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
-			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
+			WRITE_ONCE(np->mcast_hops, ipv6_hdr(opt_skb)->hop_limit);
 		if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
 			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
-		if (np->repflow)
+		if (inet6_test_bit(REPFLOW, sk))
 			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
 		if (ipv6_opt_accepted(sk, opt_skb,
 				      &DCCP_SKB_CB(opt_skb)->header.h6)) {
@@ -839,7 +839,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 
 	memset(&fl6, 0, sizeof(fl6));
 
-	if (np->sndflow) {
+	if (inet6_test_bit(SNDFLOW, sk)) {
 		fl6.flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK;
 		IP6_ECN_flow_init(fl6.flowlabel);
 		if (fl6.flowlabel & IPV6_FLOWLABEL_MASK) {
diff --git a/net/devlink/core.c b/net/devlink/core.c
index 6cec4afb01fb..bcbbb952569f 100644
--- a/net/devlink/core.c
+++ b/net/devlink/core.c
@@ -16,6 +16,219 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_trap_report);
 
 DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC);
 
+static struct devlink *devlinks_xa_get(unsigned long index)
+{
+	struct devlink *devlink;
+
+	rcu_read_lock();
+	devlink = xa_find(&devlinks, &index, index, DEVLINK_REGISTERED);
+	if (!devlink || !devlink_try_get(devlink))
+		devlink = NULL;
+	rcu_read_unlock();
+	return devlink;
+}
+
+/* devlink_rels xarray contains 1:1 relationships between
+ * devlink object and related nested devlink instance.
+ * The xarray index is used to get the nested object from
+ * the nested-in object code.
+ */
+static DEFINE_XARRAY_FLAGS(devlink_rels, XA_FLAGS_ALLOC1);
+
+#define DEVLINK_REL_IN_USE XA_MARK_0
+
+struct devlink_rel {
+	u32 index;
+	refcount_t refcount;
+	u32 devlink_index;
+	struct {
+		u32 devlink_index;
+		u32 obj_index;
+		devlink_rel_notify_cb_t *notify_cb;
+		devlink_rel_cleanup_cb_t *cleanup_cb;
+		struct work_struct notify_work;
+	} nested_in;
+};
+
+static void devlink_rel_free(struct devlink_rel *rel)
+{
+	xa_erase(&devlink_rels, rel->index);
+	kfree(rel);
+}
+
+static void __devlink_rel_get(struct devlink_rel *rel)
+{
+	refcount_inc(&rel->refcount);
+}
+
+static void __devlink_rel_put(struct devlink_rel *rel)
+{
+	if (refcount_dec_and_test(&rel->refcount))
+		devlink_rel_free(rel);
+}
+
+static void devlink_rel_nested_in_notify_work(struct work_struct *work)
+{
+	struct devlink_rel *rel = container_of(work, struct devlink_rel,
+					       nested_in.notify_work);
+	struct devlink *devlink;
+
+	devlink = devlinks_xa_get(rel->nested_in.devlink_index);
+	if (!devlink)
+		goto rel_put;
+	if (!devl_trylock(devlink)) {
+		devlink_put(devlink);
+		goto reschedule_work;
+	}
+	if (!devl_is_registered(devlink)) {
+		devl_unlock(devlink);
+		devlink_put(devlink);
+		goto rel_put;
+	}
+	if (!xa_get_mark(&devlink_rels, rel->index, DEVLINK_REL_IN_USE))
+		rel->nested_in.cleanup_cb(devlink, rel->nested_in.obj_index, rel->index);
+	rel->nested_in.notify_cb(devlink, rel->nested_in.obj_index);
+	devl_unlock(devlink);
+	devlink_put(devlink);
+
+rel_put:
+	__devlink_rel_put(rel);
+	return;
+
+reschedule_work:
+	schedule_work(&rel->nested_in.notify_work);
+}
+
+static void devlink_rel_nested_in_notify_work_schedule(struct devlink_rel *rel)
+{
+	__devlink_rel_get(rel);
+	schedule_work(&rel->nested_in.notify_work);
+}
+
+static struct devlink_rel *devlink_rel_alloc(void)
+{
+	struct devlink_rel *rel;
+	static u32 next;
+	int err;
+
+	rel = kzalloc(sizeof(*rel), GFP_KERNEL);
+	if (!rel)
+		return ERR_PTR(-ENOMEM);
+
+	err = xa_alloc_cyclic(&devlink_rels, &rel->index, rel,
+			      xa_limit_32b, &next, GFP_KERNEL);
+	if (err) {
+		kfree(rel);
+		return ERR_PTR(err);
+	}
+
+	refcount_set(&rel->refcount, 1);
+	INIT_WORK(&rel->nested_in.notify_work,
+		  &devlink_rel_nested_in_notify_work);
+	return rel;
+}
+
+static void devlink_rel_put(struct devlink *devlink)
+{
+	struct devlink_rel *rel = devlink->rel;
+
+	if (!rel)
+		return;
+	xa_clear_mark(&devlink_rels, rel->index, DEVLINK_REL_IN_USE);
+	devlink_rel_nested_in_notify_work_schedule(rel);
+	__devlink_rel_put(rel);
+	devlink->rel = NULL;
+}
+
+void devlink_rel_nested_in_clear(u32 rel_index)
+{
+	xa_clear_mark(&devlink_rels, rel_index, DEVLINK_REL_IN_USE);
+}
+
+int devlink_rel_nested_in_add(u32 *rel_index, u32 devlink_index,
+			      u32 obj_index, devlink_rel_notify_cb_t *notify_cb,
+			      devlink_rel_cleanup_cb_t *cleanup_cb,
+			      struct devlink *devlink)
+{
+	struct devlink_rel *rel = devlink_rel_alloc();
+
+	ASSERT_DEVLINK_NOT_REGISTERED(devlink);
+
+	if (IS_ERR(rel))
+		return PTR_ERR(rel);
+
+	rel->devlink_index = devlink->index;
+	rel->nested_in.devlink_index = devlink_index;
+	rel->nested_in.obj_index = obj_index;
+	rel->nested_in.notify_cb = notify_cb;
+	rel->nested_in.cleanup_cb = cleanup_cb;
+	*rel_index = rel->index;
+	xa_set_mark(&devlink_rels, rel->index, DEVLINK_REL_IN_USE);
+	devlink->rel = rel;
+	return 0;
+}
+
+void devlink_rel_nested_in_notify(struct devlink *devlink)
+{
+	struct devlink_rel *rel = devlink->rel;
+
+	if (!rel)
+		return;
+	devlink_rel_nested_in_notify_work_schedule(rel);
+}
+
+static struct devlink_rel *devlink_rel_find(unsigned long rel_index)
+{
+	return xa_find(&devlink_rels, &rel_index, rel_index,
+		       DEVLINK_REL_IN_USE);
+}
+
+static struct devlink *devlink_rel_devlink_get_lock(u32 rel_index)
+{
+	struct devlink *devlink;
+	struct devlink_rel *rel;
+	u32 devlink_index;
+
+	if (!rel_index)
+		return NULL;
+	xa_lock(&devlink_rels);
+	rel = devlink_rel_find(rel_index);
+	if (rel)
+		devlink_index = rel->devlink_index;
+	xa_unlock(&devlink_rels);
+	if (!rel)
+		return NULL;
+	devlink = devlinks_xa_get(devlink_index);
+	if (!devlink)
+		return NULL;
+	devl_lock(devlink);
+	if (!devl_is_registered(devlink)) {
+		devl_unlock(devlink);
+		devlink_put(devlink);
+		return NULL;
+	}
+	return devlink;
+}
+
+int devlink_rel_devlink_handle_put(struct sk_buff *msg, struct devlink *devlink,
+				   u32 rel_index, int attrtype,
+				   bool *msg_updated)
+{
+	struct net *net = devlink_net(devlink);
+	struct devlink *rel_devlink;
+	int err;
+
+	rel_devlink = devlink_rel_devlink_get_lock(rel_index);
+	if (!rel_devlink)
+		return 0;
+	err = devlink_nl_put_nested_handle(msg, net, rel_devlink, attrtype);
+	devl_unlock(rel_devlink);
+	devlink_put(rel_devlink);
+	if (!err && msg_updated)
+		*msg_updated = true;
+	return err;
+}
+
 void *devlink_priv(struct devlink *devlink)
 {
 	return &devlink->priv;
@@ -142,6 +355,7 @@ int devl_register(struct devlink *devlink)
 
 	xa_set_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
 	devlink_notify_register(devlink);
+	devlink_rel_nested_in_notify(devlink);
 
 	return 0;
 }
@@ -166,6 +380,7 @@ void devl_unregister(struct devlink *devlink)
 
 	devlink_notify_unregister(devlink);
 	xa_clear_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
+	devlink_rel_put(devlink);
 }
 EXPORT_SYMBOL_GPL(devl_unregister);
 
@@ -215,6 +430,7 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
 	xa_init_flags(&devlink->ports, XA_FLAGS_ALLOC);
 	xa_init_flags(&devlink->params, XA_FLAGS_ALLOC);
 	xa_init_flags(&devlink->snapshot_ids, XA_FLAGS_ALLOC);
+	xa_init_flags(&devlink->nested_rels, XA_FLAGS_ALLOC);
 	write_pnet(&devlink->_net, net);
 	INIT_LIST_HEAD(&devlink->rate_list);
 	INIT_LIST_HEAD(&devlink->linecard_list);
@@ -261,6 +477,7 @@ void devlink_free(struct devlink *devlink)
 	WARN_ON(!list_empty(&devlink->linecard_list));
 	WARN_ON(!xa_empty(&devlink->ports));
 
+	xa_destroy(&devlink->nested_rels);
 	xa_destroy(&devlink->snapshot_ids);
 	xa_destroy(&devlink->params);
 	xa_destroy(&devlink->ports);
diff --git a/net/devlink/dev.c b/net/devlink/dev.c
index bba4ace7d22b..dc8039ca2b38 100644
--- a/net/devlink/dev.c
+++ b/net/devlink/dev.c
@@ -138,6 +138,23 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
+static int devlink_nl_nested_fill(struct sk_buff *msg, struct devlink *devlink)
+{
+	unsigned long rel_index;
+	void *unused;
+	int err;
+
+	xa_for_each(&devlink->nested_rels, rel_index, unused) {
+		err = devlink_rel_devlink_handle_put(msg, devlink,
+						     rel_index,
+						     DEVLINK_ATTR_NESTED_DEVLINK,
+						     NULL);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
 static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink,
 			   enum devlink_command cmd, u32 portid,
 			   u32 seq, int flags)
@@ -164,6 +181,10 @@ static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink,
 		goto dev_stats_nest_cancel;
 
 	nla_nest_end(msg, dev_stats);
+
+	if (devlink_nl_nested_fill(msg, devlink))
+		goto nla_put_failure;
+
 	genlmsg_end(msg, hdr);
 	return 0;
 
@@ -230,6 +251,34 @@ int devlink_nl_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb)
 	return devlink_nl_dumpit(msg, cb, devlink_nl_get_dump_one);
 }
 
+static void devlink_rel_notify_cb(struct devlink *devlink, u32 obj_index)
+{
+	devlink_notify(devlink, DEVLINK_CMD_NEW);
+}
+
+static void devlink_rel_cleanup_cb(struct devlink *devlink, u32 obj_index,
+				   u32 rel_index)
+{
+	xa_erase(&devlink->nested_rels, rel_index);
+}
+
+int devl_nested_devlink_set(struct devlink *devlink,
+			    struct devlink *nested_devlink)
+{
+	u32 rel_index;
+	int err;
+
+	err = devlink_rel_nested_in_add(&rel_index, devlink->index, 0,
+					devlink_rel_notify_cb,
+					devlink_rel_cleanup_cb,
+					nested_devlink);
+	if (err)
+		return err;
+	return xa_insert(&devlink->nested_rels, rel_index,
+			 xa_mk_value(0), GFP_KERNEL);
+}
+EXPORT_SYMBOL_GPL(devl_nested_devlink_set);
+
 void devlink_notify_register(struct devlink *devlink)
 {
 	devlink_notify(devlink, DEVLINK_CMD_NEW);
@@ -372,6 +421,7 @@ static void devlink_reload_netns_change(struct devlink *devlink,
 	devlink_notify_unregister(devlink);
 	write_pnet(&devlink->_net, dest_net);
 	devlink_notify_register(devlink);
+	devlink_rel_nested_in_notify(devlink);
 }
 
 int devlink_reload(struct devlink *devlink, struct net *dest_net,
diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h
index f6b5fea2e13c..741d1bf1bec8 100644
--- a/net/devlink/devl_internal.h
+++ b/net/devlink/devl_internal.h
@@ -17,6 +17,8 @@
 
 #include "netlink_gen.h"
 
+struct devlink_rel;
+
 #define DEVLINK_REGISTERED XA_MARK_1
 
 #define DEVLINK_RELOAD_STATS_ARRAY_SIZE \
@@ -55,6 +57,8 @@ struct devlink {
 	u8 reload_failed:1;
 	refcount_t refcount;
 	struct rcu_work rwork;
+	struct devlink_rel *rel;
+	struct xarray nested_rels;
 	char priv[] __aligned(NETDEV_ALIGN);
 };
 
@@ -92,6 +96,20 @@ static inline bool devl_is_registered(struct devlink *devlink)
 	return xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
 }
 
+typedef void devlink_rel_notify_cb_t(struct devlink *devlink, u32 obj_index);
+typedef void devlink_rel_cleanup_cb_t(struct devlink *devlink, u32 obj_index,
+				      u32 rel_index);
+
+void devlink_rel_nested_in_clear(u32 rel_index);
+int devlink_rel_nested_in_add(u32 *rel_index, u32 devlink_index,
+			      u32 obj_index, devlink_rel_notify_cb_t *notify_cb,
+			      devlink_rel_cleanup_cb_t *cleanup_cb,
+			      struct devlink *devlink);
+void devlink_rel_nested_in_notify(struct devlink *devlink);
+int devlink_rel_devlink_handle_put(struct sk_buff *msg, struct devlink *devlink,
+				   u32 rel_index, int attrtype,
+				   bool *msg_updated);
+
 /* Netlink */
 #define DEVLINK_NL_FLAG_NEED_PORT		BIT(0)
 #define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT	BIT(1)
@@ -145,6 +163,8 @@ devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink)
 	return 0;
 }
 
+int devlink_nl_put_nested_handle(struct sk_buff *msg, struct net *net,
+				 struct devlink *devlink, int attrtype);
 int devlink_nl_msg_reply_and_new(struct sk_buff **msg, struct genl_info *info);
 
 /* Notify */
@@ -206,19 +226,7 @@ int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
 			     struct netlink_ext_ack *extack);
 
 /* Linecards */
-struct devlink_linecard {
-	struct list_head list;
-	struct devlink *devlink;
-	unsigned int index;
-	const struct devlink_linecard_ops *ops;
-	void *priv;
-	enum devlink_linecard_state state;
-	struct mutex state_lock; /* Protects state */
-	const char *type;
-	struct devlink_linecard_type *types;
-	unsigned int types_count;
-	struct devlink *nested_devlink;
-};
+unsigned int devlink_linecard_index(struct devlink_linecard *linecard);
 
 /* Devlink nl cmds */
 int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/devlink/linecard.c b/net/devlink/linecard.c
index 85c32c314b0f..9ff1813f88c5 100644
--- a/net/devlink/linecard.c
+++ b/net/devlink/linecard.c
@@ -6,6 +6,25 @@
 
 #include "devl_internal.h"
 
+struct devlink_linecard {
+	struct list_head list;
+	struct devlink *devlink;
+	unsigned int index;
+	const struct devlink_linecard_ops *ops;
+	void *priv;
+	enum devlink_linecard_state state;
+	struct mutex state_lock; /* Protects state */
+	const char *type;
+	struct devlink_linecard_type *types;
+	unsigned int types_count;
+	u32 rel_index;
+};
+
+unsigned int devlink_linecard_index(struct devlink_linecard *linecard)
+{
+	return linecard->index;
+}
+
 static struct devlink_linecard *
 devlink_linecard_get_by_index(struct devlink *devlink,
 			      unsigned int linecard_index)
@@ -46,24 +65,6 @@ devlink_linecard_get_from_info(struct devlink *devlink, struct genl_info *info)
 	return devlink_linecard_get_from_attrs(devlink, info->attrs);
 }
 
-static int devlink_nl_put_nested_handle(struct sk_buff *msg, struct devlink *devlink)
-{
-	struct nlattr *nested_attr;
-
-	nested_attr = nla_nest_start(msg, DEVLINK_ATTR_NESTED_DEVLINK);
-	if (!nested_attr)
-		return -EMSGSIZE;
-	if (devlink_nl_put_handle(msg, devlink))
-		goto nla_put_failure;
-
-	nla_nest_end(msg, nested_attr);
-	return 0;
-
-nla_put_failure:
-	nla_nest_cancel(msg, nested_attr);
-	return -EMSGSIZE;
-}
-
 struct devlink_linecard_type {
 	const char *type;
 	const void *priv;
@@ -111,8 +112,10 @@ static int devlink_nl_linecard_fill(struct sk_buff *msg,
 		nla_nest_end(msg, attr);
 	}
 
-	if (linecard->nested_devlink &&
-	    devlink_nl_put_nested_handle(msg, linecard->nested_devlink))
+	if (devlink_rel_devlink_handle_put(msg, devlink,
+					   linecard->rel_index,
+					   DEVLINK_ATTR_NESTED_DEVLINK,
+					   NULL))
 		goto nla_put_failure;
 
 	genlmsg_end(msg, hdr);
@@ -521,7 +524,6 @@ EXPORT_SYMBOL_GPL(devlink_linecard_provision_set);
 void devlink_linecard_provision_clear(struct devlink_linecard *linecard)
 {
 	mutex_lock(&linecard->state_lock);
-	WARN_ON(linecard->nested_devlink);
 	linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
 	linecard->type = NULL;
 	devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
@@ -540,7 +542,6 @@ EXPORT_SYMBOL_GPL(devlink_linecard_provision_clear);
 void devlink_linecard_provision_fail(struct devlink_linecard *linecard)
 {
 	mutex_lock(&linecard->state_lock);
-	WARN_ON(linecard->nested_devlink);
 	linecard->state = DEVLINK_LINECARD_STATE_PROVISIONING_FAILED;
 	devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
 	mutex_unlock(&linecard->state_lock);
@@ -588,6 +589,27 @@ void devlink_linecard_deactivate(struct devlink_linecard *linecard)
 }
 EXPORT_SYMBOL_GPL(devlink_linecard_deactivate);
 
+static void devlink_linecard_rel_notify_cb(struct devlink *devlink,
+					   u32 linecard_index)
+{
+	struct devlink_linecard *linecard;
+
+	linecard = devlink_linecard_get_by_index(devlink, linecard_index);
+	if (!linecard)
+		return;
+	devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+}
+
+static void devlink_linecard_rel_cleanup_cb(struct devlink *devlink,
+					    u32 linecard_index, u32 rel_index)
+{
+	struct devlink_linecard *linecard;
+
+	linecard = devlink_linecard_get_by_index(devlink, linecard_index);
+	if (linecard && linecard->rel_index == rel_index)
+		linecard->rel_index = 0;
+}
+
 /**
  *	devlink_linecard_nested_dl_set - Attach/detach nested devlink
  *					 instance to linecard.
@@ -595,12 +617,14 @@ EXPORT_SYMBOL_GPL(devlink_linecard_deactivate);
  *	@linecard: devlink linecard
  *	@nested_devlink: devlink instance to attach or NULL to detach
  */
-void devlink_linecard_nested_dl_set(struct devlink_linecard *linecard,
-				    struct devlink *nested_devlink)
+int devlink_linecard_nested_dl_set(struct devlink_linecard *linecard,
+				   struct devlink *nested_devlink)
 {
-	mutex_lock(&linecard->state_lock);
-	linecard->nested_devlink = nested_devlink;
-	devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
-	mutex_unlock(&linecard->state_lock);
+	return devlink_rel_nested_in_add(&linecard->rel_index,
+					 linecard->devlink->index,
+					 linecard->index,
+					 devlink_linecard_rel_notify_cb,
+					 devlink_linecard_rel_cleanup_cb,
+					 nested_devlink);
 }
 EXPORT_SYMBOL_GPL(devlink_linecard_nested_dl_set);
diff --git a/net/devlink/netlink.c b/net/devlink/netlink.c
index fc3e7c029a3b..499304d9de49 100644
--- a/net/devlink/netlink.c
+++ b/net/devlink/netlink.c
@@ -82,6 +82,32 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_REGION_DIRECT] = { .type = NLA_FLAG },
 };
 
+int devlink_nl_put_nested_handle(struct sk_buff *msg, struct net *net,
+				 struct devlink *devlink, int attrtype)
+{
+	struct nlattr *nested_attr;
+
+	nested_attr = nla_nest_start(msg, attrtype);
+	if (!nested_attr)
+		return -EMSGSIZE;
+	if (devlink_nl_put_handle(msg, devlink))
+		goto nla_put_failure;
+	if (!net_eq(net, devlink_net(devlink))) {
+		int id = peernet2id_alloc(net, devlink_net(devlink),
+					  GFP_KERNEL);
+
+		if (nla_put_s32(msg, DEVLINK_ATTR_NETNS_ID, id))
+			return -EMSGSIZE;
+	}
+
+	nla_nest_end(msg, nested_attr);
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(msg, nested_attr);
+	return -EMSGSIZE;
+}
+
 int devlink_nl_msg_reply_and_new(struct sk_buff **msg, struct genl_info *info)
 {
 	int err;
diff --git a/net/devlink/port.c b/net/devlink/port.c
index 4763b42885fb..4e9003242448 100644
--- a/net/devlink/port.c
+++ b/net/devlink/port.c
@@ -428,6 +428,13 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por
 	if (err)
 		goto out;
 	err = devlink_port_fn_state_fill(port, msg, extack, &msg_updated);
+	if (err)
+		goto out;
+	err = devlink_rel_devlink_handle_put(msg, port->devlink,
+					     port->rel_index,
+					     DEVLINK_PORT_FN_ATTR_DEVLINK,
+					     &msg_updated);
+
 out:
 	if (err || !msg_updated)
 		nla_nest_cancel(msg, function_attr);
@@ -483,7 +490,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg,
 		goto nla_put_failure;
 	if (devlink_port->linecard &&
 	    nla_put_u32(msg, DEVLINK_ATTR_LINECARD_INDEX,
-			devlink_port->linecard->index))
+			devlink_linecard_index(devlink_port->linecard)))
 		goto nla_put_failure;
 
 	genlmsg_end(msg, hdr);
@@ -1392,6 +1399,50 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 contro
 }
 EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_sf_set);
 
+static void devlink_port_rel_notify_cb(struct devlink *devlink, u32 port_index)
+{
+	struct devlink_port *devlink_port;
+
+	devlink_port = devlink_port_get_by_index(devlink, port_index);
+	if (!devlink_port)
+		return;
+	devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
+}
+
+static void devlink_port_rel_cleanup_cb(struct devlink *devlink, u32 port_index,
+					u32 rel_index)
+{
+	struct devlink_port *devlink_port;
+
+	devlink_port = devlink_port_get_by_index(devlink, port_index);
+	if (devlink_port && devlink_port->rel_index == rel_index)
+		devlink_port->rel_index = 0;
+}
+
+/**
+ * devl_port_fn_devlink_set - Attach peer devlink
+ *			      instance to port function.
+ * @devlink_port: devlink port
+ * @fn_devlink: devlink instance to attach
+ */
+int devl_port_fn_devlink_set(struct devlink_port *devlink_port,
+			     struct devlink *fn_devlink)
+{
+	ASSERT_DEVLINK_PORT_REGISTERED(devlink_port);
+
+	if (WARN_ON(devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_SF ||
+		    devlink_port->attrs.pci_sf.external))
+		return -EINVAL;
+
+	return devlink_rel_nested_in_add(&devlink_port->rel_index,
+					 devlink_port->devlink->index,
+					 devlink_port->index,
+					 devlink_port_rel_notify_cb,
+					 devlink_port_rel_cleanup_cb,
+					 fn_devlink);
+}
+EXPORT_SYMBOL_GPL(devl_port_fn_devlink_set);
+
 /**
  *	devlink_port_linecard_set - Link port with a linecard
  *
@@ -1420,7 +1471,7 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
 	case DEVLINK_PORT_FLAVOUR_PHYSICAL:
 		if (devlink_port->linecard)
 			n = snprintf(name, len, "l%u",
-				     devlink_port->linecard->index);
+				     devlink_linecard_index(devlink_port->linecard));
 		if (n < len)
 			n += snprintf(name + n, len - n, "p%u",
 				      attrs->phys.port_number);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 2713c9b06c4c..fb81de10d332 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -452,7 +452,7 @@ int inet_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	/* BPF prog is run before any checks are done so that if the prog
 	 * changes context in a wrong way it will be caught.
 	 */
-	err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr,
+	err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, &addr_len,
 						 CGROUP_INET4_BIND, &flags);
 	if (err)
 		return err;
@@ -794,6 +794,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
 	struct sock *sk		= sock->sk;
 	struct inet_sock *inet	= inet_sk(sk);
 	DECLARE_SOCKADDR(struct sockaddr_in *, sin, uaddr);
+	int sin_addr_len = sizeof(*sin);
 
 	sin->sin_family = AF_INET;
 	lock_sock(sk);
@@ -806,7 +807,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
 		}
 		sin->sin_port = inet->inet_dport;
 		sin->sin_addr.s_addr = inet->inet_daddr;
-		BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin,
+		BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len,
 				       CGROUP_INET4_GETPEERNAME);
 	} else {
 		__be32 addr = inet->inet_rcv_saddr;
@@ -814,12 +815,12 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
 			addr = inet->inet_saddr;
 		sin->sin_port = inet->inet_sport;
 		sin->sin_addr.s_addr = addr;
-		BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin,
+		BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len,
 				       CGROUP_INET4_GETSOCKNAME);
 	}
 	release_sock(sk);
 	memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
-	return sizeof(*sin);
+	return sin_addr_len;
 }
 EXPORT_SYMBOL(inet_getname);
 
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 418e5fb58fd3..76c3ea75b8dd 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2944,8 +2944,6 @@ static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_l
 				continue;
 			state->im = rcu_dereference(state->idev->mc_list);
 		}
-		if (!state->im)
-			break;
 		spin_lock_bh(&state->im->lock);
 		psf = state->im->sources;
 	}
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 75e0aee35eb7..2887177822c9 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -301,7 +301,7 @@ static int ping_pre_connect(struct sock *sk, struct sockaddr *uaddr,
 	if (addr_len < sizeof(struct sockaddr_in))
 		return -EINVAL;
 
-	return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr);
+	return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, &addr_len);
 }
 
 /* Checks the bind address and possibly modifies sk->sk_bound_dev_if. */
@@ -581,7 +581,7 @@ void ping_err(struct sk_buff *skb, int offset, u32 info)
 	 *	4.1.3.3.
 	 */
 	if ((family == AF_INET && !inet_test_bit(RECVERR, sk)) ||
-	    (family == AF_INET6 && !inet6_sk(sk)->recverr)) {
+	    (family == AF_INET6 && !inet6_test_bit(RECVERR6, sk))) {
 		if (!harderr || sk->sk_state != TCP_ESTABLISHED)
 			goto out;
 	} else {
@@ -899,7 +899,6 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
 
 #if IS_ENABLED(CONFIG_IPV6)
 	} else if (family == AF_INET6) {
-		struct ipv6_pinfo *np = inet6_sk(sk);
 		struct ipv6hdr *ip6 = ipv6_hdr(skb);
 		DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
 
@@ -908,7 +907,7 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
 			sin6->sin6_port = 0;
 			sin6->sin6_addr = ip6->saddr;
 			sin6->sin6_flowinfo = 0;
-			if (np->sndflow)
+			if (inet6_test_bit(SNDFLOW, sk))
 				sin6->sin6_flowinfo = ip6_flowinfo(ip6);
 			sin6->sin6_scope_id =
 				ipv6_iface_scope_id(&sin6->sin6_addr,
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index b214b5a2e045..e2bf4602b559 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1632,7 +1632,7 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
 {
 	struct rtable *rt;
 
-	rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
+	rt = dst_alloc(&ipv4_dst_ops, dev, DST_OBSOLETE_FORCE_CHK,
 		       (noxfrm ? DST_NOXFRM : 0));
 
 	if (rt) {
@@ -1660,7 +1660,7 @@ struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt)
 {
 	struct rtable *new_rt;
 
-	new_rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
+	new_rt = dst_alloc(&ipv4_dst_ops, dev, DST_OBSOLETE_FORCE_CHK,
 			   rt->dst.flags);
 
 	if (new_rt) {
@@ -2834,7 +2834,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
 	struct rtable *ort = (struct rtable *) dst_orig;
 	struct rtable *rt;
 
-	rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_DEAD, 0);
+	rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, DST_OBSOLETE_DEAD, 0);
 	if (rt) {
 		struct dst_entry *new = &rt->dst;
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 6ac890b4073f..e7f024d93572 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -1367,6 +1367,15 @@ static struct ctl_table ipv4_net_table[] = {
 		.extra1		= SYSCTL_ZERO,
 	},
 	{
+		.procname	= "tcp_backlog_ack_defer",
+		.data		= &init_net.ipv4.sysctl_tcp_backlog_ack_defer,
+		.maxlen		= sizeof(u8),
+		.mode		= 0644,
+		.proc_handler	= proc_dou8vec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+	{
 		.procname       = "tcp_reflect_tos",
 		.data           = &init_net.ipv4.sysctl_tcp_reflect_tos,
 		.maxlen         = sizeof(u8),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3d3a24f79573..30e8e5d35983 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3814,6 +3814,15 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 	info->tcpi_rcv_wnd = tp->rcv_wnd;
 	info->tcpi_rehash = tp->plb_rehash + tp->timeout_rehash;
 	info->tcpi_fastopen_client_fail = tp->fastopen_client_fail;
+
+	info->tcpi_total_rto = tp->total_rto;
+	info->tcpi_total_rto_recoveries = tp->total_rto_recoveries;
+	info->tcpi_total_rto_time = tp->total_rto_time;
+	if (tp->rto_stamp) {
+		info->tcpi_total_rto_time += tcp_time_stamp_raw() -
+						tp->rto_stamp;
+	}
+
 	unlock_sock_fast(sk, slow);
 }
 EXPORT_SYMBOL_GPL(tcp_get_info);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 804821d6bd4d..2ce14ff2b00c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2101,6 +2101,10 @@ void tcp_clear_retrans(struct tcp_sock *tp)
 	tp->undo_marker = 0;
 	tp->undo_retrans = -1;
 	tp->sacked_out = 0;
+	tp->rto_stamp = 0;
+	tp->total_rto = 0;
+	tp->total_rto_recoveries = 0;
+	tp->total_rto_time = 0;
 }
 
 static inline void tcp_init_undo(struct tcp_sock *tp)
@@ -2839,6 +2843,14 @@ void tcp_enter_recovery(struct sock *sk, bool ece_ack)
 	tcp_set_ca_state(sk, TCP_CA_Recovery);
 }
 
+static void tcp_update_rto_time(struct tcp_sock *tp)
+{
+	if (tp->rto_stamp) {
+		tp->total_rto_time += tcp_time_stamp(tp) - tp->rto_stamp;
+		tp->rto_stamp = 0;
+	}
+}
+
 /* Process an ACK in CA_Loss state. Move to CA_Open if lost data are
  * recovered or spurious. Otherwise retransmits more on partial ACKs.
  */
@@ -3043,6 +3055,8 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
 		break;
 	case TCP_CA_Loss:
 		tcp_process_loss(sk, flag, num_dupack, rexmit);
+		if (icsk->icsk_ca_state != TCP_CA_Loss)
+			tcp_update_rto_time(tp);
 		tcp_identify_packet_loss(sk, ack_flag);
 		if (!(icsk->icsk_ca_state == TCP_CA_Open ||
 		      (*ack_flag & FLAG_LOST_RETRANS)))
@@ -5567,6 +5581,14 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
 	    tcp_in_quickack_mode(sk) ||
 	    /* Protocol state mandates a one-time immediate ACK */
 	    inet_csk(sk)->icsk_ack.pending & ICSK_ACK_NOW) {
+		/* If we are running from __release_sock() in user context,
+		 * Defer the ack until tcp_release_cb().
+		 */
+		if (sock_owned_by_user_nocheck(sk) &&
+		    READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_backlog_ack_defer)) {
+			set_bit(TCP_ACK_DEFERRED, &sk->sk_tsq_flags);
+			return;
+		}
 send_now:
 		tcp_send_ack(sk);
 		return;
@@ -6450,22 +6472,24 @@ reset_and_undo:
 
 static void tcp_rcv_synrecv_state_fastopen(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	struct request_sock *req;
 
 	/* If we are still handling the SYNACK RTO, see if timestamp ECR allows
 	 * undo. If peer SACKs triggered fast recovery, we can't undo here.
 	 */
-	if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
-		tcp_try_undo_loss(sk, false);
+	if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss && !tp->packets_out)
+		tcp_try_undo_recovery(sk);
 
 	/* Reset rtx states to prevent spurious retransmits_timed_out() */
-	tcp_sk(sk)->retrans_stamp = 0;
+	tcp_update_rto_time(tp);
+	tp->retrans_stamp = 0;
 	inet_csk(sk)->icsk_retransmits = 0;
 
 	/* Once we leave TCP_SYN_RECV or TCP_FIN_WAIT_1,
 	 * we no longer need req so release it.
 	 */
-	req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk,
+	req = rcu_dereference_protected(tp->fastopen_rsk,
 					lockdep_sock_is_held(sk));
 	reqsk_fastopen_remove(sk, req, false);
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4167e8a48b60..38892ae83e17 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -194,7 +194,7 @@ static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr,
 
 	sock_owned_by_me(sk);
 
-	return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr);
+	return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, &addr_len);
 }
 
 /* This will initiate an outgoing connection. */
@@ -3264,6 +3264,7 @@ static int __net_init tcp_sk_init(struct net *net)
 	net->ipv4.sysctl_tcp_comp_sack_delay_ns = NSEC_PER_MSEC;
 	net->ipv4.sysctl_tcp_comp_sack_slack_ns = 100 * NSEC_PER_USEC;
 	net->ipv4.sysctl_tcp_comp_sack_nr = 44;
+	net->ipv4.sysctl_tcp_backlog_ack_defer = 1;
 	net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
 	net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 0;
 	atomic_set(&net->ipv4.tfo_active_disable_times, 0);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index b98d476f1594..eee8ab1bfa0e 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -565,6 +565,10 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 		newtp->undo_marker = treq->snt_isn;
 		newtp->retrans_stamp = div_u64(treq->snt_synack,
 					       USEC_PER_SEC / TCP_TS_HZ);
+		newtp->total_rto = req->num_timeout;
+		newtp->total_rto_recoveries = 1;
+		newtp->total_rto_time = tcp_time_stamp_raw() -
+						newtp->retrans_stamp;
 	}
 	newtp->tsoffset = treq->ts_off;
 #ifdef CONFIG_TCP_MD5SIG
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f0723460753c..3b833fd76b24 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1076,7 +1076,8 @@ static void tcp_tasklet_func(struct tasklet_struct *t)
 #define TCP_DEFERRED_ALL (TCPF_TSQ_DEFERRED |		\
 			  TCPF_WRITE_TIMER_DEFERRED |	\
 			  TCPF_DELACK_TIMER_DEFERRED |	\
-			  TCPF_MTU_REDUCED_DEFERRED)
+			  TCPF_MTU_REDUCED_DEFERRED |	\
+			  TCPF_ACK_DEFERRED)
 /**
  * tcp_release_cb - tcp release_sock() callback
  * @sk: socket
@@ -1100,16 +1101,6 @@ void tcp_release_cb(struct sock *sk)
 		tcp_tsq_write(sk);
 		__sock_put(sk);
 	}
-	/* Here begins the tricky part :
-	 * We are called from release_sock() with :
-	 * 1) BH disabled
-	 * 2) sk_lock.slock spinlock held
-	 * 3) socket owned by us (sk->sk_lock.owned == 1)
-	 *
-	 * But following code is meant to be called from BH handlers,
-	 * so we should keep BH disabled, but early release socket ownership
-	 */
-	sock_release_ownership(sk);
 
 	if (flags & TCPF_WRITE_TIMER_DEFERRED) {
 		tcp_write_timer_handler(sk);
@@ -1123,6 +1114,8 @@ void tcp_release_cb(struct sock *sk)
 		inet_csk(sk)->icsk_af_ops->mtu_reduced(sk);
 		__sock_put(sk);
 	}
+	if ((flags & TCPF_ACK_DEFERRED) && inet_csk_ack_scheduled(sk))
+		tcp_send_ack(sk);
 }
 EXPORT_SYMBOL(tcp_release_cb);
 
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 984ab4a0421e..3f61c6a70a1f 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -394,7 +394,7 @@ static void tcp_probe_timer(struct sock *sk)
 		if (user_timeout &&
 		    (s32)(tcp_jiffies32 - icsk->icsk_probes_tstamp) >=
 		     msecs_to_jiffies(user_timeout))
-		goto abort;
+			goto abort;
 	}
 	max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2);
 	if (sock_flag(sk, SOCK_DEAD)) {
@@ -415,6 +415,19 @@ abort:		tcp_write_err(sk);
 	}
 }
 
+static void tcp_update_rto_stats(struct sock *sk)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	if (!icsk->icsk_retransmits) {
+		tp->total_rto_recoveries++;
+		tp->rto_stamp = tcp_time_stamp(tp);
+	}
+	icsk->icsk_retransmits++;
+	tp->total_rto++;
+}
+
 /*
  *	Timer for Fast Open socket to retransmit SYNACK. Note that the
  *	sk here is the child socket, not the parent (listener) socket.
@@ -447,7 +460,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req)
 	 */
 	inet_rtx_syn_ack(sk, req);
 	req->num_timeout++;
-	icsk->icsk_retransmits++;
+	tcp_update_rto_stats(sk);
 	if (!tp->retrans_stamp)
 		tp->retrans_stamp = tcp_time_stamp(tp);
 	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
@@ -575,7 +588,7 @@ void tcp_retransmit_timer(struct sock *sk)
 
 	tcp_enter_loss(sk);
 
-	icsk->icsk_retransmits++;
+	tcp_update_rto_stats(sk);
 	if (tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1) > 0) {
 		/* Retransmission failed because of local congestion,
 		 * Let senders fight for local resources conservatively.
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f39b9c844580..7b21a51dd25a 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -714,7 +714,7 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
 			       iph->saddr, uh->source, skb->dev->ifindex,
 			       inet_sdif(skb), udptable, NULL);
 
-	if (!sk || udp_sk(sk)->encap_type) {
+	if (!sk || READ_ONCE(udp_sk(sk)->encap_type)) {
 		/* No socket for error: try tunnels before discarding */
 		if (static_branch_unlikely(&udp_encap_needed_key)) {
 			sk = __udp4_lib_err_encap(net, iph, uh, udptable, sk, skb,
@@ -1051,7 +1051,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	u8 tos, scope;
 	__be16 dport;
 	int err, is_udplite = IS_UDPLITE(sk);
-	int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE;
+	int corkreq = udp_test_bit(CORK, sk) || msg->msg_flags & MSG_MORE;
 	int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
 	struct sk_buff *skb;
 	struct ip_options_data opt_copy;
@@ -1143,7 +1143,9 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 
 	if (cgroup_bpf_enabled(CGROUP_UDP4_SENDMSG) && !connected) {
 		err = BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk,
-					    (struct sockaddr *)usin, &ipc.addr);
+					    (struct sockaddr *)usin,
+					    &msg->msg_namelen,
+					    &ipc.addr);
 		if (err)
 			goto out_free;
 		if (usin) {
@@ -1315,11 +1317,11 @@ void udp_splice_eof(struct socket *sock)
 	struct sock *sk = sock->sk;
 	struct udp_sock *up = udp_sk(sk);
 
-	if (!up->pending || READ_ONCE(up->corkflag))
+	if (!up->pending || udp_test_bit(CORK, sk))
 		return;
 
 	lock_sock(sk);
-	if (up->pending && !READ_ONCE(up->corkflag))
+	if (up->pending && !udp_test_bit(CORK, sk))
 		udp_push_pending_frames(sk);
 	release_sock(sk);
 }
@@ -1865,10 +1867,11 @@ try_again:
 		*addr_len = sizeof(*sin);
 
 		BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk,
-						      (struct sockaddr *)sin);
+						      (struct sockaddr *)sin,
+						      addr_len);
 	}
 
-	if (udp_sk(sk)->gro_enabled)
+	if (udp_test_bit(GRO_ENABLED, sk))
 		udp_cmsg_recv(msg, sk, skb);
 
 	if (inet_cmsg_flags(inet))
@@ -1904,7 +1907,7 @@ int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	if (addr_len < sizeof(struct sockaddr_in))
 		return -EINVAL;
 
-	return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr);
+	return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, &addr_len);
 }
 EXPORT_SYMBOL(udp_pre_connect);
 
@@ -2081,7 +2084,8 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
 	}
 	nf_reset_ct(skb);
 
-	if (static_branch_unlikely(&udp_encap_needed_key) && up->encap_type) {
+	if (static_branch_unlikely(&udp_encap_needed_key) &&
+	    READ_ONCE(up->encap_type)) {
 		int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
 
 		/*
@@ -2119,7 +2123,8 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
 	/*
 	 * 	UDP-Lite specific tests, ignored on UDP sockets
 	 */
-	if ((up->pcflag & UDPLITE_RECV_CC)  &&  UDP_SKB_CB(skb)->partial_cov) {
+	if (udp_test_bit(UDPLITE_RECV_CC, sk) && UDP_SKB_CB(skb)->partial_cov) {
+		u16 pcrlen = READ_ONCE(up->pcrlen);
 
 		/*
 		 * MIB statistics other than incrementing the error count are
@@ -2132,7 +2137,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
 		 * delivery of packets with coverage values less than a value
 		 * provided by the application."
 		 */
-		if (up->pcrlen == 0) {          /* full coverage was set  */
+		if (pcrlen == 0) {          /* full coverage was set  */
 			net_dbg_ratelimited("UDPLite: partial coverage %d while full coverage %d requested\n",
 					    UDP_SKB_CB(skb)->cscov, skb->len);
 			goto drop;
@@ -2143,9 +2148,9 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
 		 * that it wants x while sender emits packets of smaller size y.
 		 * Therefore the above ...()->partial_cov statement is essential.
 		 */
-		if (UDP_SKB_CB(skb)->cscov  <  up->pcrlen) {
+		if (UDP_SKB_CB(skb)->cscov < pcrlen) {
 			net_dbg_ratelimited("UDPLite: coverage %d too small, need min %d\n",
-					    UDP_SKB_CB(skb)->cscov, up->pcrlen);
+					    UDP_SKB_CB(skb)->cscov, pcrlen);
 			goto drop;
 		}
 	}
@@ -2618,7 +2623,7 @@ void udp_destroy_sock(struct sock *sk)
 			if (encap_destroy)
 				encap_destroy(sk);
 		}
-		if (up->encap_enabled)
+		if (udp_test_bit(ENCAP_ENABLED, sk))
 			static_branch_dec(&udp_encap_needed_key);
 	}
 }
@@ -2658,9 +2663,9 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 	switch (optname) {
 	case UDP_CORK:
 		if (val != 0) {
-			WRITE_ONCE(up->corkflag, 1);
+			udp_set_bit(CORK, sk);
 		} else {
-			WRITE_ONCE(up->corkflag, 0);
+			udp_clear_bit(CORK, sk);
 			lock_sock(sk);
 			push_pending_frames(sk);
 			release_sock(sk);
@@ -2675,17 +2680,17 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 		case UDP_ENCAP_ESPINUDP_NON_IKE:
 #if IS_ENABLED(CONFIG_IPV6)
 			if (sk->sk_family == AF_INET6)
-				up->encap_rcv = ipv6_stub->xfrm6_udp_encap_rcv;
+				WRITE_ONCE(up->encap_rcv,
+					   ipv6_stub->xfrm6_udp_encap_rcv);
 			else
 #endif
-				up->encap_rcv = xfrm4_udp_encap_rcv;
+				WRITE_ONCE(up->encap_rcv,
+					   xfrm4_udp_encap_rcv);
 #endif
 			fallthrough;
 		case UDP_ENCAP_L2TPINUDP:
-			up->encap_type = val;
-			lock_sock(sk);
-			udp_tunnel_encap_enable(sk->sk_socket);
-			release_sock(sk);
+			WRITE_ONCE(up->encap_type, val);
+			udp_tunnel_encap_enable(sk);
 			break;
 		default:
 			err = -ENOPROTOOPT;
@@ -2694,11 +2699,11 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 		break;
 
 	case UDP_NO_CHECK6_TX:
-		up->no_check6_tx = valbool;
+		udp_set_no_check6_tx(sk, valbool);
 		break;
 
 	case UDP_NO_CHECK6_RX:
-		up->no_check6_rx = valbool;
+		udp_set_no_check6_rx(sk, valbool);
 		break;
 
 	case UDP_SEGMENT:
@@ -2708,14 +2713,12 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 		break;
 
 	case UDP_GRO:
-		lock_sock(sk);
 
 		/* when enabling GRO, accept the related GSO packet type */
 		if (valbool)
-			udp_tunnel_encap_enable(sk->sk_socket);
-		up->gro_enabled = valbool;
-		up->accept_udp_l4 = valbool;
-		release_sock(sk);
+			udp_tunnel_encap_enable(sk);
+		udp_assign_bit(GRO_ENABLED, sk, valbool);
+		udp_assign_bit(ACCEPT_L4, sk, valbool);
 		break;
 
 	/*
@@ -2730,8 +2733,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 			val = 8;
 		else if (val > USHRT_MAX)
 			val = USHRT_MAX;
-		up->pcslen = val;
-		up->pcflag |= UDPLITE_SEND_CC;
+		WRITE_ONCE(up->pcslen, val);
+		udp_set_bit(UDPLITE_SEND_CC, sk);
 		break;
 
 	/* The receiver specifies a minimum checksum coverage value. To make
@@ -2744,8 +2747,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 			val = 8;
 		else if (val > USHRT_MAX)
 			val = USHRT_MAX;
-		up->pcrlen = val;
-		up->pcflag |= UDPLITE_RECV_CC;
+		WRITE_ONCE(up->pcrlen, val);
+		udp_set_bit(UDPLITE_RECV_CC, sk);
 		break;
 
 	default:
@@ -2783,19 +2786,19 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
 
 	switch (optname) {
 	case UDP_CORK:
-		val = READ_ONCE(up->corkflag);
+		val = udp_test_bit(CORK, sk);
 		break;
 
 	case UDP_ENCAP:
-		val = up->encap_type;
+		val = READ_ONCE(up->encap_type);
 		break;
 
 	case UDP_NO_CHECK6_TX:
-		val = up->no_check6_tx;
+		val = udp_get_no_check6_tx(sk);
 		break;
 
 	case UDP_NO_CHECK6_RX:
-		val = up->no_check6_rx;
+		val = udp_get_no_check6_rx(sk);
 		break;
 
 	case UDP_SEGMENT:
@@ -2803,17 +2806,17 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
 		break;
 
 	case UDP_GRO:
-		val = up->gro_enabled;
+		val = udp_test_bit(GRO_ENABLED, sk);
 		break;
 
 	/* The following two cannot be changed on UDP sockets, the return is
 	 * always 0 (which corresponds to the full checksum coverage of UDP). */
 	case UDPLITE_SEND_CSCOV:
-		val = up->pcslen;
+		val = READ_ONCE(up->pcslen);
 		break;
 
 	case UDPLITE_RECV_CSCOV:
-		val = up->pcrlen;
+		val = READ_ONCE(up->pcrlen);
 		break;
 
 	default:
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 0f46b3c2e4ac..6c95d28d0c4a 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -557,10 +557,10 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
 	NAPI_GRO_CB(skb)->is_flist = 0;
 	if (!sk || !udp_sk(sk)->gro_receive) {
 		if (skb->dev->features & NETIF_F_GRO_FRAGLIST)
-			NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled : 1;
+			NAPI_GRO_CB(skb)->is_flist = sk ? !udp_test_bit(GRO_ENABLED, sk) : 1;
 
 		if ((!sk && (skb->dev->features & NETIF_F_GRO_UDP_FWD)) ||
-		    (sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_flist)
+		    (sk && udp_test_bit(GRO_ENABLED, sk)) || NAPI_GRO_CB(skb)->is_flist)
 			return call_gro_receive(udp_gro_receive_segment, head, skb);
 
 		/* no GRO, be sure flush the current packet */
diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c
index 9b18f371af0d..1e7e4aecdc48 100644
--- a/net/ipv4/udp_tunnel_core.c
+++ b/net/ipv4/udp_tunnel_core.c
@@ -78,7 +78,7 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
 	udp_sk(sk)->gro_receive = cfg->gro_receive;
 	udp_sk(sk)->gro_complete = cfg->gro_complete;
 
-	udp_tunnel_encap_enable(sock);
+	udp_tunnel_encap_enable(sk);
 }
 EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock);
 
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 39ecdad1b50c..af37af3ab727 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -21,7 +21,6 @@ EXPORT_SYMBOL(udplite_table);
 static int udplite_sk_init(struct sock *sk)
 {
 	udp_init_sock(sk);
-	udp_sk(sk)->pcflag = UDPLITE_BIT;
 	pr_warn_once("UDP-Lite is deprecated and scheduled to be removed in 2025, "
 		     "please contact the netdev mailing list\n");
 	return 0;
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index eac206a290d0..183f6dc37242 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -85,11 +85,11 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	struct udphdr *uh;
 	struct iphdr *iph;
 	int iphlen, len;
-
 	__u8 *udpdata;
 	__be32 *udpdata32;
-	__u16 encap_type = up->encap_type;
+	u16 encap_type;
 
+	encap_type = READ_ONCE(up->encap_type);
 	/* if this is not encapsulated socket, then just return now */
 	if (!encap_type)
 		return 1;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 368824fe9719..c35d302a3da9 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -217,10 +217,11 @@ lookup_protocol:
 	inet_sk(sk)->pinet6 = np = inet6_sk_generic(sk);
 	np->hop_limit	= -1;
 	np->mcast_hops	= IPV6_DEFAULT_MCASTHOPS;
-	np->mc_loop	= 1;
-	np->mc_all	= 1;
+	inet6_set_bit(MC6_LOOP, sk);
+	inet6_set_bit(MC6_ALL, sk);
 	np->pmtudisc	= IPV6_PMTUDISC_WANT;
-	np->repflow	= net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ESTABLISHED;
+	inet6_assign_bit(REPFLOW, sk, net->ipv6.sysctl.flowlabel_reflect &
+				     FLOWLABEL_REFLECT_ESTABLISHED);
 	sk->sk_ipv6only	= net->ipv6.sysctl.bindv6only;
 	sk->sk_txrehash = READ_ONCE(net->core.sysctl_txrehash);
 
@@ -453,7 +454,7 @@ int inet6_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	/* BPF prog is run before any checks are done so that if the prog
 	 * changes context in a wrong way it will be caught.
 	 */
-	err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr,
+	err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, &addr_len,
 						 CGROUP_INET6_BIND, &flags);
 	if (err)
 		return err;
@@ -519,6 +520,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
 		  int peer)
 {
 	struct sockaddr_in6 *sin = (struct sockaddr_in6 *)uaddr;
+	int sin_addr_len = sizeof(*sin);
 	struct sock *sk = sock->sk;
 	struct inet_sock *inet = inet_sk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
@@ -536,9 +538,9 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
 		}
 		sin->sin6_port = inet->inet_dport;
 		sin->sin6_addr = sk->sk_v6_daddr;
-		if (np->sndflow)
+		if (inet6_test_bit(SNDFLOW, sk))
 			sin->sin6_flowinfo = np->flow_label;
-		BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin,
+		BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len,
 				       CGROUP_INET6_GETPEERNAME);
 	} else {
 		if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
@@ -546,13 +548,13 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
 		else
 			sin->sin6_addr = sk->sk_v6_rcv_saddr;
 		sin->sin6_port = inet->inet_sport;
-		BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin,
+		BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len,
 				       CGROUP_INET6_GETSOCKNAME);
 	}
 	sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr,
 						 sk->sk_bound_dev_if);
 	release_sock(sk);
-	return sizeof(*sin);
+	return sin_addr_len;
 }
 EXPORT_SYMBOL(inet6_getname);
 
@@ -1060,6 +1062,7 @@ static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
 	.udp6_lib_lookup = __udp6_lib_lookup,
 	.ipv6_setsockopt = do_ipv6_setsockopt,
 	.ipv6_getsockopt = do_ipv6_getsockopt,
+	.ipv6_dev_get_saddr = ipv6_dev_get_saddr,
 };
 
 static int __init inet6_init(void)
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 41ebc4e57473..cc6a502db39d 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -80,7 +80,8 @@ int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr)
 	struct flowi6 fl6;
 	int err = 0;
 
-	if (np->sndflow && (np->flow_label & IPV6_FLOWLABEL_MASK)) {
+	if (inet6_test_bit(SNDFLOW, sk) &&
+	    (np->flow_label & IPV6_FLOWLABEL_MASK)) {
 		flowlabel = fl6_sock_lookup(sk, np->flow_label);
 		if (IS_ERR(flowlabel))
 			return -EINVAL;
@@ -163,7 +164,7 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr,
 	if (usin->sin6_family != AF_INET6)
 		return -EAFNOSUPPORT;
 
-	if (np->sndflow)
+	if (inet6_test_bit(SNDFLOW, sk))
 		fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK;
 
 	if (ipv6_addr_any(&usin->sin6_addr)) {
@@ -305,11 +306,10 @@ static void ipv6_icmp_error_rfc4884(const struct sk_buff *skb,
 void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
 		     __be16 port, u32 info, u8 *payload)
 {
-	struct ipv6_pinfo *np  = inet6_sk(sk);
 	struct icmp6hdr *icmph = icmp6_hdr(skb);
 	struct sock_exterr_skb *serr;
 
-	if (!np->recverr)
+	if (!inet6_test_bit(RECVERR6, sk))
 		return;
 
 	skb = skb_clone(skb, GFP_ATOMIC);
@@ -332,7 +332,7 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
 
 	__skb_pull(skb, payload - skb->data);
 
-	if (inet6_sk(sk)->recverr_rfc4884)
+	if (inet6_test_bit(RECVERR6_RFC4884, sk))
 		ipv6_icmp_error_rfc4884(skb, &serr->ee.ee_rfc4884);
 
 	skb_reset_transport_header(skb);
@@ -344,12 +344,11 @@ EXPORT_SYMBOL_GPL(ipv6_icmp_error);
 
 void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info)
 {
-	const struct ipv6_pinfo *np = inet6_sk(sk);
 	struct sock_exterr_skb *serr;
 	struct ipv6hdr *iph;
 	struct sk_buff *skb;
 
-	if (!np->recverr)
+	if (!inet6_test_bit(RECVERR6, sk))
 		return;
 
 	skb = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
@@ -493,7 +492,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 			const struct ipv6hdr *ip6h = container_of((struct in6_addr *)(nh + serr->addr_offset),
 								  struct ipv6hdr, daddr);
 			sin->sin6_addr = ip6h->daddr;
-			if (np->sndflow)
+			if (inet6_test_bit(SNDFLOW, sk))
 				sin->sin6_flowinfo = ip6_flowinfo(ip6h);
 			sin->sin6_scope_id =
 				ipv6_iface_scope_id(&sin->sin6_addr,
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 93a594a901d1..8fb4a791881a 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -588,7 +588,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 	else if (!fl6.flowi6_oif)
 		fl6.flowi6_oif = np->ucast_oif;
 
-	ipcm6_init_sk(&ipc6, np);
+	ipcm6_init_sk(&ipc6, sk);
 	ipc6.sockc.mark = mark;
 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
 
@@ -791,7 +791,7 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
 	msg.offset = 0;
 	msg.type = type;
 
-	ipcm6_init_sk(&ipc6, np);
+	ipcm6_init_sk(&ipc6, sk);
 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
 	ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
 	ipc6.sockc.mark = mark;
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index b3ca4beb4405..eca07e10e21f 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -513,7 +513,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq,
 		return 0;
 	}
 
-	if (np->repflow) {
+	if (inet6_test_bit(REPFLOW, sk)) {
 		freq->flr_label = np->flow_label;
 		return 0;
 	}
@@ -551,10 +551,10 @@ static int ipv6_flowlabel_put(struct sock *sk, struct in6_flowlabel_req *freq)
 	if (freq->flr_flags & IPV6_FL_F_REFLECT) {
 		if (sk->sk_protocol != IPPROTO_TCP)
 			return -ENOPROTOOPT;
-		if (!np->repflow)
+		if (!inet6_test_bit(REPFLOW, sk))
 			return -ESRCH;
 		np->flow_label = 0;
-		np->repflow = 0;
+		inet6_clear_bit(REPFLOW, sk);
 		return 0;
 	}
 
@@ -626,7 +626,7 @@ static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq,
 
 		if (sk->sk_protocol != IPPROTO_TCP)
 			return -ENOPROTOOPT;
-		np->repflow = 1;
+		inet6_set_bit(REPFLOW, sk);
 		return 0;
 	}
 
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 54fc4c711f2c..951ba8089b5b 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -232,12 +232,11 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(ip6_output);
 
-bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
+bool ip6_autoflowlabel(struct net *net, const struct sock *sk)
 {
-	if (!np->autoflowlabel_set)
+	if (!inet6_test_bit(AUTOFLOWLABEL_SET, sk))
 		return ip6_default_np_autolabel(net);
-	else
-		return np->autoflowlabel;
+	return inet6_test_bit(AUTOFLOWLABEL, sk);
 }
 
 /*
@@ -309,12 +308,12 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 	 *	Fill in the IPv6 header
 	 */
 	if (np)
-		hlimit = np->hop_limit;
+		hlimit = READ_ONCE(np->hop_limit);
 	if (hlimit < 0)
 		hlimit = ip6_dst_hoplimit(dst);
 
 	ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
-				ip6_autoflowlabel(net, np), fl6));
+				ip6_autoflowlabel(net, sk), fl6));
 
 	hdr->payload_len = htons(seg_len);
 	hdr->nexthdr = proto;
@@ -369,9 +368,8 @@ static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 		if (sk && ra->sel == sel &&
 		    (!sk->sk_bound_dev_if ||
 		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
-			struct ipv6_pinfo *np = inet6_sk(sk);
 
-			if (np && np->rtalert_isolate &&
+			if (inet6_test_bit(RTALERT_ISOLATE, sk) &&
 			    !net_eq(sock_net(sk), dev_net(skb->dev))) {
 				continue;
 			}
@@ -881,9 +879,11 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 			mtu = IPV6_MIN_MTU;
 	}
 
-	if (np && np->frag_size < mtu) {
-		if (np->frag_size)
-			mtu = np->frag_size;
+	if (np) {
+		u32 frag_size = READ_ONCE(np->frag_size);
+
+		if (frag_size && frag_size < mtu)
+			mtu = frag_size;
 	}
 	if (mtu < hlen + sizeof(struct frag_hdr) + 8)
 		goto fail_toobig;
@@ -1113,7 +1113,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
 		rcu_read_lock();
 		from = rt ? rcu_dereference(rt->from) : NULL;
 		err = ip6_route_get_saddr(net, from, &fl6->daddr,
-					  sk ? inet6_sk(sk)->srcprefs : 0,
+					  sk ? READ_ONCE(inet6_sk(sk)->srcprefs) : 0,
 					  &fl6->saddr);
 		rcu_read_unlock();
 
@@ -1392,7 +1392,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
 			  struct rt6_info *rt)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
-	unsigned int mtu;
+	unsigned int mtu, frag_size;
 	struct ipv6_txoptions *nopt, *opt = ipc6->opt;
 
 	/* callers pass dst together with a reference, set it first so
@@ -1436,15 +1436,16 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
 	v6_cork->hop_limit = ipc6->hlimit;
 	v6_cork->tclass = ipc6->tclass;
 	if (rt->dst.flags & DST_XFRM_TUNNEL)
-		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
+		mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ?
 		      READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
 	else
-		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
+		mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ?
 			READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
-	if (np->frag_size < mtu) {
-		if (np->frag_size)
-			mtu = np->frag_size;
-	}
+
+	frag_size = READ_ONCE(np->frag_size);
+	if (frag_size && frag_size < mtu)
+		mtu = frag_size;
+
 	cork->base.fragsize = mtu;
 	cork->base.gso_size = ipc6->gso_size;
 	cork->base.tx_flags = 0;
@@ -1935,7 +1936,6 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
 	struct sk_buff *skb, *tmp_skb;
 	struct sk_buff **tail_skb;
 	struct in6_addr *final_dst;
-	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct net *net = sock_net(sk);
 	struct ipv6hdr *hdr;
 	struct ipv6_txoptions *opt = v6_cork->opt;
@@ -1978,7 +1978,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
 
 	ip6_flow_hdr(hdr, v6_cork->tclass,
 		     ip6_make_flowlabel(net, skb, fl6->flowlabel,
-					ip6_autoflowlabel(net, np), fl6));
+					ip6_autoflowlabel(net, sk), fl6));
 	hdr->hop_limit = v6_cork->hop_limit;
 	hdr->nexthdr = proto;
 	hdr->saddr = fl6->saddr;
@@ -2091,7 +2091,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
 		return ERR_PTR(err);
 	}
 	if (ipc6->dontfrag < 0)
-		ipc6->dontfrag = inet6_sk(sk)->dontfrag;
+		ipc6->dontfrag = inet6_test_bit(DONTFRAG, sk);
 
 	err = __ip6_append_data(sk, &queue, cork, &v6_cork,
 				&current->task_frag, getfrag, from,
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 0e2a0847b387..7d661735cb9d 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -415,6 +415,101 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 	if (ip6_mroute_opt(optname))
 		return ip6_mroute_setsockopt(sk, optname, optval, optlen);
 
+	/* Handle options that can be set without locking the socket. */
+	switch (optname) {
+	case IPV6_UNICAST_HOPS:
+		if (optlen < sizeof(int))
+			return -EINVAL;
+		if (val > 255 || val < -1)
+			return -EINVAL;
+		WRITE_ONCE(np->hop_limit, val);
+		return 0;
+	case IPV6_MULTICAST_LOOP:
+		if (optlen < sizeof(int))
+			return -EINVAL;
+		if (val != valbool)
+			return -EINVAL;
+		inet6_assign_bit(MC6_LOOP, sk, valbool);
+		return 0;
+	case IPV6_MULTICAST_HOPS:
+		if (sk->sk_type == SOCK_STREAM)
+			return retv;
+		if (optlen < sizeof(int))
+			return -EINVAL;
+		if (val > 255 || val < -1)
+			return -EINVAL;
+		WRITE_ONCE(np->mcast_hops,
+			   val == -1 ? IPV6_DEFAULT_MCASTHOPS : val);
+		return 0;
+	case IPV6_MTU:
+		if (optlen < sizeof(int))
+			return -EINVAL;
+		if (val && val < IPV6_MIN_MTU)
+			return -EINVAL;
+		WRITE_ONCE(np->frag_size, val);
+		return 0;
+	case IPV6_MINHOPCOUNT:
+		if (optlen < sizeof(int))
+			return -EINVAL;
+		if (val < 0 || val > 255)
+			return -EINVAL;
+
+		if (val)
+			static_branch_enable(&ip6_min_hopcount);
+
+		/* tcp_v6_err() and tcp_v6_rcv() might read min_hopcount
+		 * while we are changing it.
+		 */
+		WRITE_ONCE(np->min_hopcount, val);
+		return 0;
+	case IPV6_RECVERR_RFC4884:
+		if (optlen < sizeof(int))
+			return -EINVAL;
+		if (val < 0 || val > 1)
+			return -EINVAL;
+		inet6_assign_bit(RECVERR6_RFC4884, sk, valbool);
+		return 0;
+	case IPV6_MULTICAST_ALL:
+		if (optlen < sizeof(int))
+			return -EINVAL;
+		inet6_assign_bit(MC6_ALL, sk, valbool);
+		return 0;
+	case IPV6_AUTOFLOWLABEL:
+		inet6_assign_bit(AUTOFLOWLABEL, sk, valbool);
+		inet6_set_bit(AUTOFLOWLABEL_SET, sk);
+		return 0;
+	case IPV6_DONTFRAG:
+		inet6_assign_bit(DONTFRAG, sk, valbool);
+		return 0;
+	case IPV6_RECVERR:
+		if (optlen < sizeof(int))
+			return -EINVAL;
+		inet6_assign_bit(RECVERR6, sk, valbool);
+		if (!val)
+			skb_errqueue_purge(&sk->sk_error_queue);
+		return 0;
+	case IPV6_ROUTER_ALERT_ISOLATE:
+		if (optlen < sizeof(int))
+			return -EINVAL;
+		inet6_assign_bit(RTALERT_ISOLATE, sk, valbool);
+		return 0;
+	case IPV6_MTU_DISCOVER:
+		if (optlen < sizeof(int))
+			return -EINVAL;
+		if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT)
+			return -EINVAL;
+		WRITE_ONCE(np->pmtudisc, val);
+		return 0;
+	case IPV6_FLOWINFO_SEND:
+		if (optlen < sizeof(int))
+			return -EINVAL;
+		inet6_assign_bit(SNDFLOW, sk, valbool);
+		return 0;
+	case IPV6_ADDR_PREFERENCES:
+		if (optlen < sizeof(int))
+			return -EINVAL;
+		return ip6_sock_set_addr_preferences(sk, val);
+	}
 	if (needs_rtnl)
 		rtnl_lock();
 	sockopt_lock_sock(sk);
@@ -733,34 +828,7 @@ done:
 		}
 		break;
 	}
-	case IPV6_UNICAST_HOPS:
-		if (optlen < sizeof(int))
-			goto e_inval;
-		if (val > 255 || val < -1)
-			goto e_inval;
-		np->hop_limit = val;
-		retv = 0;
-		break;
 
-	case IPV6_MULTICAST_HOPS:
-		if (sk->sk_type == SOCK_STREAM)
-			break;
-		if (optlen < sizeof(int))
-			goto e_inval;
-		if (val > 255 || val < -1)
-			goto e_inval;
-		np->mcast_hops = (val == -1 ? IPV6_DEFAULT_MCASTHOPS : val);
-		retv = 0;
-		break;
-
-	case IPV6_MULTICAST_LOOP:
-		if (optlen < sizeof(int))
-			goto e_inval;
-		if (val != valbool)
-			goto e_inval;
-		np->mc_loop = valbool;
-		retv = 0;
-		break;
 
 	case IPV6_UNICAST_IF:
 	{
@@ -862,13 +930,6 @@ done:
 			retv = ipv6_sock_ac_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_acaddr);
 		break;
 	}
-	case IPV6_MULTICAST_ALL:
-		if (optlen < sizeof(int))
-			goto e_inval;
-		np->mc_all = valbool;
-		retv = 0;
-		break;
-
 	case MCAST_JOIN_GROUP:
 	case MCAST_LEAVE_GROUP:
 		if (in_compat_syscall())
@@ -896,42 +957,6 @@ done:
 			goto e_inval;
 		retv = ip6_ra_control(sk, val);
 		break;
-	case IPV6_ROUTER_ALERT_ISOLATE:
-		if (optlen < sizeof(int))
-			goto e_inval;
-		np->rtalert_isolate = valbool;
-		retv = 0;
-		break;
-	case IPV6_MTU_DISCOVER:
-		if (optlen < sizeof(int))
-			goto e_inval;
-		if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT)
-			goto e_inval;
-		np->pmtudisc = val;
-		retv = 0;
-		break;
-	case IPV6_MTU:
-		if (optlen < sizeof(int))
-			goto e_inval;
-		if (val && val < IPV6_MIN_MTU)
-			goto e_inval;
-		np->frag_size = val;
-		retv = 0;
-		break;
-	case IPV6_RECVERR:
-		if (optlen < sizeof(int))
-			goto e_inval;
-		np->recverr = valbool;
-		if (!val)
-			skb_errqueue_purge(&sk->sk_error_queue);
-		retv = 0;
-		break;
-	case IPV6_FLOWINFO_SEND:
-		if (optlen < sizeof(int))
-			goto e_inval;
-		np->sndflow = valbool;
-		retv = 0;
-		break;
 	case IPV6_FLOWLABEL_MGR:
 		retv = ipv6_flowlabel_opt(sk, optval, optlen);
 		break;
@@ -943,47 +968,10 @@ done:
 		retv = xfrm_user_policy(sk, optname, optval, optlen);
 		break;
 
-	case IPV6_ADDR_PREFERENCES:
-		if (optlen < sizeof(int))
-			goto e_inval;
-		retv = __ip6_sock_set_addr_preferences(sk, val);
-		break;
-	case IPV6_MINHOPCOUNT:
-		if (optlen < sizeof(int))
-			goto e_inval;
-		if (val < 0 || val > 255)
-			goto e_inval;
-
-		if (val)
-			static_branch_enable(&ip6_min_hopcount);
-
-		/* tcp_v6_err() and tcp_v6_rcv() might read min_hopcount
-		 * while we are changing it.
-		 */
-		WRITE_ONCE(np->min_hopcount, val);
-		retv = 0;
-		break;
-	case IPV6_DONTFRAG:
-		np->dontfrag = valbool;
-		retv = 0;
-		break;
-	case IPV6_AUTOFLOWLABEL:
-		np->autoflowlabel = valbool;
-		np->autoflowlabel_set = 1;
-		retv = 0;
-		break;
 	case IPV6_RECVFRAGSIZE:
 		np->rxopt.bits.recvfragsize = valbool;
 		retv = 0;
 		break;
-	case IPV6_RECVERR_RFC4884:
-		if (optlen < sizeof(int))
-			goto e_inval;
-		if (val < 0 || val > 1)
-			goto e_inval;
-		np->recverr_rfc4884 = valbool;
-		retv = 0;
-		break;
 	}
 
 unlock:
@@ -1180,7 +1168,8 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 				put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
 			}
 			if (np->rxopt.bits.rxhlim) {
-				int hlim = np->mcast_hops;
+				int hlim = READ_ONCE(np->mcast_hops);
+
 				put_cmsg(&msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim);
 			}
 			if (np->rxopt.bits.rxtclass) {
@@ -1197,7 +1186,8 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 				put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info);
 			}
 			if (np->rxopt.bits.rxohlim) {
-				int hlim = np->mcast_hops;
+				int hlim = READ_ONCE(np->mcast_hops);
+
 				put_cmsg(&msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim);
 			}
 			if (np->rxopt.bits.rxflow) {
@@ -1347,9 +1337,9 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 		struct dst_entry *dst;
 
 		if (optname == IPV6_UNICAST_HOPS)
-			val = np->hop_limit;
+			val = READ_ONCE(np->hop_limit);
 		else
-			val = np->mcast_hops;
+			val = READ_ONCE(np->mcast_hops);
 
 		if (val < 0) {
 			rcu_read_lock();
@@ -1365,7 +1355,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 	}
 
 	case IPV6_MULTICAST_LOOP:
-		val = np->mc_loop;
+		val = inet6_test_bit(MC6_LOOP, sk);
 		break;
 
 	case IPV6_MULTICAST_IF:
@@ -1373,7 +1363,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 		break;
 
 	case IPV6_MULTICAST_ALL:
-		val = np->mc_all;
+		val = inet6_test_bit(MC6_ALL, sk);
 		break;
 
 	case IPV6_UNICAST_IF:
@@ -1381,15 +1371,15 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 		break;
 
 	case IPV6_MTU_DISCOVER:
-		val = np->pmtudisc;
+		val = READ_ONCE(np->pmtudisc);
 		break;
 
 	case IPV6_RECVERR:
-		val = np->recverr;
+		val = inet6_test_bit(RECVERR6, sk);
 		break;
 
 	case IPV6_FLOWINFO_SEND:
-		val = np->sndflow;
+		val = inet6_test_bit(SNDFLOW, sk);
 		break;
 
 	case IPV6_FLOWLABEL_MGR:
@@ -1424,33 +1414,35 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 	}
 
 	case IPV6_ADDR_PREFERENCES:
+		{
+		u8 srcprefs = READ_ONCE(np->srcprefs);
 		val = 0;
 
-		if (np->srcprefs & IPV6_PREFER_SRC_TMP)
+		if (srcprefs & IPV6_PREFER_SRC_TMP)
 			val |= IPV6_PREFER_SRC_TMP;
-		else if (np->srcprefs & IPV6_PREFER_SRC_PUBLIC)
+		else if (srcprefs & IPV6_PREFER_SRC_PUBLIC)
 			val |= IPV6_PREFER_SRC_PUBLIC;
 		else {
 			/* XXX: should we return system default? */
 			val |= IPV6_PREFER_SRC_PUBTMP_DEFAULT;
 		}
 
-		if (np->srcprefs & IPV6_PREFER_SRC_COA)
+		if (srcprefs & IPV6_PREFER_SRC_COA)
 			val |= IPV6_PREFER_SRC_COA;
 		else
 			val |= IPV6_PREFER_SRC_HOME;
 		break;
-
+		}
 	case IPV6_MINHOPCOUNT:
-		val = np->min_hopcount;
+		val = READ_ONCE(np->min_hopcount);
 		break;
 
 	case IPV6_DONTFRAG:
-		val = np->dontfrag;
+		val = inet6_test_bit(DONTFRAG, sk);
 		break;
 
 	case IPV6_AUTOFLOWLABEL:
-		val = ip6_autoflowlabel(sock_net(sk), np);
+		val = ip6_autoflowlabel(sock_net(sk), sk);
 		break;
 
 	case IPV6_RECVFRAGSIZE:
@@ -1458,11 +1450,11 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 		break;
 
 	case IPV6_ROUTER_ALERT_ISOLATE:
-		val = np->rtalert_isolate;
+		val = inet6_test_bit(RTALERT_ISOLATE, sk);
 		break;
 
 	case IPV6_RECVERR_RFC4884:
-		val = np->recverr_rfc4884;
+		val = inet6_test_bit(RECVERR6_RFC4884, sk);
 		break;
 
 	default:
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 5ce25bcb9974..99e28b444a4c 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -642,7 +642,7 @@ bool inet6_mc_check(const struct sock *sk, const struct in6_addr *mc_addr,
 	}
 	if (!mc) {
 		rcu_read_unlock();
-		return np->mc_all;
+		return inet6_test_bit(MC6_ALL, sk);
 	}
 	psl = rcu_dereference(mc->sflist);
 	if (!psl) {
@@ -1716,7 +1716,7 @@ static void ip6_mc_hdr(const struct sock *sk, struct sk_buff *skb,
 
 	hdr->payload_len = htons(len);
 	hdr->nexthdr = proto;
-	hdr->hop_limit = inet6_sk(sk)->hop_limit;
+	hdr->hop_limit = READ_ONCE(inet6_sk(sk)->hop_limit);
 
 	hdr->saddr = *saddr;
 	hdr->daddr = *daddr;
@@ -3011,8 +3011,6 @@ static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_s
 				continue;
 			state->im = rcu_dereference(state->idev->mc_list);
 		}
-		if (!state->im)
-			break;
 		psf = rcu_dereference(state->im->mca_sources);
 	}
 out:
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 553c8664e0a7..679443d7ecb5 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -500,7 +500,7 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr,
 					      csum_partial(icmp6h,
 							   skb->len, 0));
 
-	ip6_nd_hdr(skb, saddr, daddr, inet6_sk(sk)->hop_limit, skb->len);
+	ip6_nd_hdr(skb, saddr, daddr, READ_ONCE(inet6_sk(sk)->hop_limit), skb->len);
 
 	rcu_read_lock();
 	idev = __in6_dev_get(dst->dev);
@@ -1996,7 +1996,7 @@ static int __net_init ndisc_net_init(struct net *net)
 	np = inet6_sk(sk);
 	np->hop_limit = 255;
 	/* Do not loopback ndisc messages */
-	np->mc_loop = 0;
+	inet6_clear_bit(MC6_LOOP, sk);
 
 	return 0;
 }
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 5831aaa53d75..d2098dd4ceae 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -56,7 +56,7 @@ static int ping_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
 	if (addr_len < SIN6_LEN_RFC2133)
 		return -EINVAL;
 
-	return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr);
+	return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, &addr_len);
 }
 
 static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
@@ -89,7 +89,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 			return -EAFNOSUPPORT;
 		}
 		daddr = &(u->sin6_addr);
-		if (np->sndflow)
+		if (inet6_test_bit(SNDFLOW, sk))
 			fl6.flowlabel = u->sin6_flowinfo & IPV6_FLOWINFO_MASK;
 		if (__ipv6_addr_needs_scope_id(ipv6_addr_type(daddr)))
 			oif = u->sin6_scope_id;
@@ -118,7 +118,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	     l3mdev_master_ifindex_by_index(sock_net(sk), oif) != sk->sk_bound_dev_if))
 		return -EINVAL;
 
-	ipcm6_init_sk(&ipc6, np);
+	ipcm6_init_sk(&ipc6, sk);
 	ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
 	ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
 
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 42fcec3ecf5e..a2aa54a2baae 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -291,6 +291,7 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
 	       struct inet6_skb_parm *opt,
 	       u8 type, u8 code, int offset, __be32 info)
 {
+	bool recverr = inet6_test_bit(RECVERR6, sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	int err;
 	int harderr;
@@ -300,26 +301,26 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
 	   2. Socket is connected (otherwise the error indication
 	      is useless without recverr and error is hard.
 	 */
-	if (!np->recverr && sk->sk_state != TCP_ESTABLISHED)
+	if (!recverr && sk->sk_state != TCP_ESTABLISHED)
 		return;
 
 	harderr = icmpv6_err_convert(type, code, &err);
 	if (type == ICMPV6_PKT_TOOBIG) {
 		ip6_sk_update_pmtu(skb, sk, info);
-		harderr = (np->pmtudisc == IPV6_PMTUDISC_DO);
+		harderr = (READ_ONCE(np->pmtudisc) == IPV6_PMTUDISC_DO);
 	}
 	if (type == NDISC_REDIRECT) {
 		ip6_sk_redirect(skb, sk);
 		return;
 	}
-	if (np->recverr) {
+	if (recverr) {
 		u8 *payload = skb->data;
 		if (!inet_test_bit(HDRINCL, sk))
 			payload += offset;
 		ipv6_icmp_error(sk, skb, err, 0, ntohl(info), payload);
 	}
 
-	if (np->recverr || harderr) {
+	if (recverr || harderr) {
 		sk->sk_err = err;
 		sk_error_report(sk);
 	}
@@ -587,7 +588,6 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
 			struct flowi6 *fl6, struct dst_entry **dstp,
 			unsigned int flags, const struct sockcm_cookie *sockc)
 {
-	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct net *net = sock_net(sk);
 	struct ipv6hdr *iph;
 	struct sk_buff *skb;
@@ -668,7 +668,7 @@ out:
 error:
 	IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
 error_check:
-	if (err == -ENOBUFS && !np->recverr)
+	if (err == -ENOBUFS && !inet6_test_bit(RECVERR6, sk))
 		err = 0;
 	return err;
 }
@@ -795,7 +795,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 			return -EINVAL;
 
 		daddr = &sin6->sin6_addr;
-		if (np->sndflow) {
+		if (inet6_test_bit(SNDFLOW, sk)) {
 			fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
 			if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
 				flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
@@ -898,7 +898,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 		ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
 
 	if (ipc6.dontfrag < 0)
-		ipc6.dontfrag = np->dontfrag;
+		ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk);
 
 	if (msg->msg_flags&MSG_CONFIRM)
 		goto do_confirm;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 9c687b357e6a..b132feae3393 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -341,7 +341,7 @@ struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
 			       int flags)
 {
 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
-					1, DST_OBSOLETE_FORCE_CHK, flags);
+					DST_OBSOLETE_FORCE_CHK, flags);
 
 	if (rt) {
 		rt6_info_init(rt);
@@ -2622,7 +2622,7 @@ static struct dst_entry *ip6_route_output_flags_noref(struct net *net,
 	if (!any_src)
 		flags |= RT6_LOOKUP_F_HAS_SADDR;
 	else if (sk)
-		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
+		flags |= rt6_srcprefs2flags(READ_ONCE(inet6_sk(sk)->srcprefs));
 
 	return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
 }
@@ -2655,7 +2655,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
 	struct net_device *loopback_dev = net->loopback_dev;
 	struct dst_entry *new = NULL;
 
-	rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
+	rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev,
 		       DST_OBSOLETE_DEAD, 0);
 	if (rt) {
 		rt6_info_init(rt);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 44b6949d72b2..4eb0c6386aca 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -135,7 +135,7 @@ static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
 
 	sock_owned_by_me(sk);
 
-	return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
+	return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len);
 }
 
 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
@@ -163,7 +163,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 
 	memset(&fl6, 0, sizeof(fl6));
 
-	if (np->sndflow) {
+	if (inet6_test_bit(SNDFLOW, sk)) {
 		fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
 		IP6_ECN_flow_init(fl6.flowlabel);
 		if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
@@ -508,7 +508,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			tcp_ld_RTO_revert(sk, seq);
 	}
 
-	if (!sock_owned_by_user(sk) && np->recverr) {
+	if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) {
 		WRITE_ONCE(sk->sk_err, err);
 		sk_error_report(sk);
 	} else {
@@ -548,7 +548,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
 				    &ireq->ir_v6_rmt_addr);
 
 		fl6->daddr = ireq->ir_v6_rmt_addr;
-		if (np->repflow && ireq->pktopts)
+		if (inet6_test_bit(REPFLOW, sk) && ireq->pktopts)
 			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
 
 		tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
@@ -797,7 +797,7 @@ static void tcp_v6_init_req(struct request_sock *req,
 	    (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
 	     np->rxopt.bits.rxinfo ||
 	     np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
-	     np->rxopt.bits.rxohlim || np->repflow)) {
+	     np->rxopt.bits.rxohlim || inet6_test_bit(REPFLOW, sk_listener))) {
 		refcount_inc(&skb->users);
 		ireq->pktopts = skb;
 	}
@@ -1055,10 +1055,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
 	if (sk) {
 		oif = sk->sk_bound_dev_if;
 		if (sk_fullsock(sk)) {
-			const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
-
 			trace_tcp_send_reset(sk, skb);
-			if (np->repflow)
+			if (inet6_test_bit(REPFLOW, sk))
 				label = ip6_flowlabel(ipv6h);
 			priority = sk->sk_priority;
 			txhash = sk->sk_txhash;
@@ -1247,7 +1245,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
 		newnp->mcast_oif   = inet_iif(skb);
 		newnp->mcast_hops  = ip_hdr(skb)->ttl;
 		newnp->rcv_flowinfo = 0;
-		if (np->repflow)
+		if (inet6_test_bit(REPFLOW, sk))
 			newnp->flow_label = 0;
 
 		/*
@@ -1320,7 +1318,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
 	newnp->mcast_oif  = tcp_v6_iif(skb);
 	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
 	newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
-	if (np->repflow)
+	if (inet6_test_bit(REPFLOW, sk))
 		newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
 
 	/* Set ToS of the new socket based upon the value of incoming SYN.
@@ -1542,10 +1540,11 @@ ipv6_pktoptions:
 		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
 			np->mcast_oif = tcp_v6_iif(opt_skb);
 		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
-			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
+			WRITE_ONCE(np->mcast_hops,
+				   ipv6_hdr(opt_skb)->hop_limit);
 		if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
 			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
-		if (np->repflow)
+		if (inet6_test_bit(REPFLOW, sk))
 			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
 		if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
 			tcp_v6_restore_cb(opt_skb);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 86b5d509a468..622b10a549f7 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -410,10 +410,11 @@ try_again:
 		*addr_len = sizeof(*sin6);
 
 		BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk,
-						      (struct sockaddr *)sin6);
+						      (struct sockaddr *)sin6,
+						      addr_len);
 	}
 
-	if (udp_sk(sk)->gro_enabled)
+	if (udp_test_bit(GRO_ENABLED, sk))
 		udp_cmsg_recv(msg, sk, skb);
 
 	if (np->rxopt.all)
@@ -571,7 +572,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
 			       inet6_iif(skb), inet6_sdif(skb), udptable, NULL);
 
-	if (!sk || udp_sk(sk)->encap_type) {
+	if (!sk || READ_ONCE(udp_sk(sk)->encap_type)) {
 		/* No socket for error: try tunnels before discarding */
 		if (static_branch_unlikely(&udpv6_encap_needed_key)) {
 			sk = __udp6_lib_err_encap(net, hdr, offset, uh,
@@ -598,7 +599,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		if (!ip6_sk_accept_pmtu(sk))
 			goto out;
 		ip6_sk_update_pmtu(skb, sk, info);
-		if (np->pmtudisc != IPV6_PMTUDISC_DONT)
+		if (READ_ONCE(np->pmtudisc) != IPV6_PMTUDISC_DONT)
 			harderr = 1;
 	}
 	if (type == NDISC_REDIRECT) {
@@ -619,7 +620,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		goto out;
 	}
 
-	if (!np->recverr) {
+	if (!inet6_test_bit(RECVERR6, sk)) {
 		if (!harderr || sk->sk_state != TCP_ESTABLISHED)
 			goto out;
 	} else {
@@ -688,7 +689,8 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
 	}
 	nf_reset_ct(skb);
 
-	if (static_branch_unlikely(&udpv6_encap_needed_key) && up->encap_type) {
+	if (static_branch_unlikely(&udpv6_encap_needed_key) &&
+	    READ_ONCE(up->encap_type)) {
 		int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
 
 		/*
@@ -726,16 +728,17 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
 	/*
 	 * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c).
 	 */
-	if ((up->pcflag & UDPLITE_RECV_CC)  &&  UDP_SKB_CB(skb)->partial_cov) {
+	if (udp_test_bit(UDPLITE_RECV_CC, sk) && UDP_SKB_CB(skb)->partial_cov) {
+		u16 pcrlen = READ_ONCE(up->pcrlen);
 
-		if (up->pcrlen == 0) {          /* full coverage was set  */
+		if (pcrlen == 0) {          /* full coverage was set  */
 			net_dbg_ratelimited("UDPLITE6: partial coverage %d while full coverage %d requested\n",
 					    UDP_SKB_CB(skb)->cscov, skb->len);
 			goto drop;
 		}
-		if (UDP_SKB_CB(skb)->cscov  <  up->pcrlen) {
+		if (UDP_SKB_CB(skb)->cscov < pcrlen) {
 			net_dbg_ratelimited("UDPLITE6: coverage %d too small, need min %d\n",
-					    UDP_SKB_CB(skb)->cscov, up->pcrlen);
+					    UDP_SKB_CB(skb)->cscov, pcrlen);
 			goto drop;
 		}
 	}
@@ -858,7 +861,7 @@ start_lookup:
 		/* If zero checksum and no_check is not on for
 		 * the socket then skip it.
 		 */
-		if (!uh->check && !udp_sk(sk)->no_check6_rx)
+		if (!uh->check && !udp_get_no_check6_rx(sk))
 			continue;
 		if (!first) {
 			first = sk;
@@ -980,7 +983,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 		if (unlikely(rcu_dereference(sk->sk_rx_dst) != dst))
 			udp6_sk_rx_dst_set(sk, dst);
 
-		if (!uh->check && !udp_sk(sk)->no_check6_rx) {
+		if (!uh->check && !udp_get_no_check6_rx(sk)) {
 			if (refcounted)
 				sock_put(sk);
 			goto report_csum_error;
@@ -1002,7 +1005,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 	/* Unicast */
 	sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
 	if (sk) {
-		if (!uh->check && !udp_sk(sk)->no_check6_rx)
+		if (!uh->check && !udp_get_no_check6_rx(sk))
 			goto report_csum_error;
 		return udp6_unicast_rcv_skb(sk, skb, uh);
 	}
@@ -1155,7 +1158,7 @@ static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
 	if (addr_len < SIN6_LEN_RFC2133)
 		return -EINVAL;
 
-	return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr);
+	return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, &addr_len);
 }
 
 /**
@@ -1241,7 +1244,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
 			kfree_skb(skb);
 			return -EINVAL;
 		}
-		if (udp_sk(sk)->no_check6_tx) {
+		if (udp_get_no_check6_tx(sk)) {
 			kfree_skb(skb);
 			return -EINVAL;
 		}
@@ -1262,7 +1265,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
 
 	if (is_udplite)
 		csum = udplite_csum(skb);
-	else if (udp_sk(sk)->no_check6_tx) {   /* UDP csum disabled */
+	else if (udp_get_no_check6_tx(sk)) {   /* UDP csum disabled */
 		skb->ip_summed = CHECKSUM_NONE;
 		goto send;
 	} else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
@@ -1281,7 +1284,7 @@ csum_partial:
 send:
 	err = ip6_send_skb(skb);
 	if (err) {
-		if (err == -ENOBUFS && !inet6_sk(sk)->recverr) {
+		if (err == -ENOBUFS && !inet6_test_bit(RECVERR6, sk)) {
 			UDP6_INC_STATS(sock_net(sk),
 				       UDP_MIB_SNDBUFERRORS, is_udplite);
 			err = 0;
@@ -1332,7 +1335,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	int addr_len = msg->msg_namelen;
 	bool connected = false;
 	int ulen = len;
-	int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE;
+	int corkreq = udp_test_bit(CORK, sk) || msg->msg_flags & MSG_MORE;
 	int err;
 	int is_udplite = IS_UDPLITE(sk);
 	int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
@@ -1427,7 +1430,7 @@ do_udp_sendmsg:
 		fl6->fl6_dport = sin6->sin6_port;
 		daddr = &sin6->sin6_addr;
 
-		if (np->sndflow) {
+		if (inet6_test_bit(SNDFLOW, sk)) {
 			fl6->flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
 			if (fl6->flowlabel & IPV6_FLOWLABEL_MASK) {
 				flowlabel = fl6_sock_lookup(sk, fl6->flowlabel);
@@ -1508,6 +1511,7 @@ do_udp_sendmsg:
 	if (cgroup_bpf_enabled(CGROUP_UDP6_SENDMSG) && !connected) {
 		err = BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk,
 					   (struct sockaddr *)sin6,
+					   &addr_len,
 					   &fl6->saddr);
 		if (err)
 			goto out_no_dst;
@@ -1593,7 +1597,7 @@ back_from_confirm:
 
 do_append_data:
 	if (ipc6.dontfrag < 0)
-		ipc6.dontfrag = np->dontfrag;
+		ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk);
 	up->len += ulen;
 	err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr),
 			      &ipc6, fl6, (struct rt6_info *)dst,
@@ -1606,7 +1610,7 @@ do_append_data:
 		up->pending = 0;
 
 	if (err > 0)
-		err = np->recverr ? net_xmit_errno(err) : 0;
+		err = inet6_test_bit(RECVERR6, sk) ? net_xmit_errno(err) : 0;
 	release_sock(sk);
 
 out:
@@ -1644,11 +1648,11 @@ static void udpv6_splice_eof(struct socket *sock)
 	struct sock *sk = sock->sk;
 	struct udp_sock *up = udp_sk(sk);
 
-	if (!up->pending || READ_ONCE(up->corkflag))
+	if (!up->pending || udp_test_bit(CORK, sk))
 		return;
 
 	lock_sock(sk);
-	if (up->pending && !READ_ONCE(up->corkflag))
+	if (up->pending && !udp_test_bit(CORK, sk))
 		udp_v6_push_pending_frames(sk);
 	release_sock(sk);
 }
@@ -1670,7 +1674,7 @@ void udpv6_destroy_sock(struct sock *sk)
 			if (encap_destroy)
 				encap_destroy(sk);
 		}
-		if (up->encap_enabled) {
+		if (udp_test_bit(ENCAP_ENABLED, sk)) {
 			static_branch_dec(&udpv6_encap_needed_key);
 			udp_encap_disable();
 		}
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 267d491e9707..a60bec9b14f1 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -17,7 +17,6 @@
 static int udplitev6_sk_init(struct sock *sk)
 {
 	udpv6_init_sock(sk);
-	udp_sk(sk)->pcflag = UDPLITE_BIT;
 	pr_warn_once("UDP-Lite is deprecated and scheduled to be removed in 2025, "
 		     "please contact the netdev mailing list\n");
 	return 0;
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 4907ab241d6b..4156387248e4 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -81,14 +81,14 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	struct ipv6hdr *ip6h;
 	int len;
 	int ip6hlen = sizeof(struct ipv6hdr);
-
 	__u8 *udpdata;
 	__be32 *udpdata32;
-	__u16 encap_type = up->encap_type;
+	u16 encap_type;
 
 	if (skb->protocol == htons(ETH_P_IP))
 		return xfrm4_udp_encap_rcv(sk, skb);
 
+	encap_type = READ_ONCE(up->encap_type);
 	/* if this is not encapsulated socket, then just return now */
 	if (!encap_type)
 		return 1;
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 03608d3ded4b..8d21ff25f160 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1139,9 +1139,9 @@ static void l2tp_tunnel_destruct(struct sock *sk)
 	switch (tunnel->encap) {
 	case L2TP_ENCAPTYPE_UDP:
 		/* No longer an encapsulation socket. See net/ipv4/udp.c */
-		(udp_sk(sk))->encap_type = 0;
-		(udp_sk(sk))->encap_rcv = NULL;
-		(udp_sk(sk))->encap_destroy = NULL;
+		WRITE_ONCE(udp_sk(sk)->encap_type, 0);
+		udp_sk(sk)->encap_rcv = NULL;
+		udp_sk(sk)->encap_destroy = NULL;
 		break;
 	case L2TP_ENCAPTYPE_IP:
 		break;
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 11f3d375cec0..bb373e249237 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -431,7 +431,7 @@ static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr,
 			return -ENOTCONN;
 		lsa->l2tp_conn_id = lsk->peer_conn_id;
 		lsa->l2tp_addr = sk->sk_v6_daddr;
-		if (np->sndflow)
+		if (inet6_test_bit(SNDFLOW, sk))
 			lsa->l2tp_flowinfo = np->flow_label;
 	} else {
 		if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
@@ -528,7 +528,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 			return -EAFNOSUPPORT;
 
 		daddr = &lsa->l2tp_addr;
-		if (np->sndflow) {
+		if (inet6_test_bit(SNDFLOW, sk)) {
 			fl6.flowlabel = lsa->l2tp_flowinfo & IPV6_FLOWINFO_MASK;
 			if (fl6.flowlabel & IPV6_FLOWLABEL_MASK) {
 				flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
@@ -620,7 +620,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 		ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
 
 	if (ipc6.dontfrag < 0)
-		ipc6.dontfrag = np->dontfrag;
+		ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk);
 
 	if (msg->msg_flags & MSG_CONFIRM)
 		goto do_confirm;
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index a4af3b7675ef..c3a60fd19c37 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -309,7 +309,7 @@ ieee80211_tdls_chandef_vht_upgrade(struct ieee80211_sub_if_data *sdata,
 				   struct sta_info *sta)
 {
 	/* IEEE802.11ac-2013 Table E-4 */
-	u16 centers_80mhz[] = { 5210, 5290, 5530, 5610, 5690, 5775 };
+	static const u16 centers_80mhz[] = { 5210, 5290, 5530, 5610, 5690, 5775 };
 	struct cfg80211_chan_def uc = sta->tdls_chandef;
 	enum nl80211_chan_width max_width =
 		ieee80211_sta_cap_chan_bw(&sta->deflink);
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 4174076c66fa..05d47c0e5ec3 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1298,17 +1298,13 @@ static void set_sock_size(struct sock *sk, int mode, int val)
 static void set_mcast_loop(struct sock *sk, u_char loop)
 {
 	/* setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); */
-	lock_sock(sk);
 	inet_assign_bit(MC_LOOP, sk, loop);
 #ifdef CONFIG_IP_VS_IPV6
-	if (sk->sk_family == AF_INET6) {
-		struct ipv6_pinfo *np = inet6_sk(sk);
-
+	if (READ_ONCE(sk->sk_family) == AF_INET6) {
 		/* IPV6_MULTICAST_LOOP */
-		np->mc_loop = loop ? 1 : 0;
+		inet6_assign_bit(MC6_LOOP, sk, loop);
 	}
 #endif
-	release_sock(sk);
 }
 
 /*
@@ -1326,7 +1322,7 @@ static void set_mcast_ttl(struct sock *sk, u_char ttl)
 		struct ipv6_pinfo *np = inet6_sk(sk);
 
 		/* IPV6_MULTICAST_HOPS */
-		np->mcast_hops = ttl;
+		WRITE_ONCE(np->mcast_hops, ttl);
 	}
 #endif
 	release_sock(sk);
@@ -1345,7 +1341,7 @@ static void set_mcast_pmtudisc(struct sock *sk, int val)
 		struct ipv6_pinfo *np = inet6_sk(sk);
 
 		/* IPV6_MTU_DISCOVER */
-		np->pmtudisc = val;
+		WRITE_ONCE(np->pmtudisc, val);
 	}
 #endif
 	release_sock(sk);
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index fd66014d8a76..5f8094acd056 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -873,7 +873,7 @@ static void ovs_fragment(struct net *net, struct vport *vport,
 
 		prepare_frag(vport, skb, orig_network_offset,
 			     ovs_key_mac_proto(key));
-		dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1,
+		dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL,
 			 DST_OBSOLETE_NONE, DST_NOCOUNT);
 		ovs_rt.dst.dev = vport->dev;
 
@@ -890,7 +890,7 @@ static void ovs_fragment(struct net *net, struct vport *vport,
 		prepare_frag(vport, skb, orig_network_offset,
 			     ovs_key_mac_proto(key));
 		memset(&ovs_rt, 0, sizeof(ovs_rt));
-		dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1,
+		dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL,
 			 DST_OBSOLETE_NONE, DST_NOCOUNT);
 		ovs_rt.dst.dev = vport->dev;
 
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 1e20bbd687f1..1424bfeaca73 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -375,9 +375,9 @@ out:
 
 static const struct nla_policy route4_policy[TCA_ROUTE4_MAX + 1] = {
 	[TCA_ROUTE4_CLASSID]	= { .type = NLA_U32 },
-	[TCA_ROUTE4_TO]		= { .type = NLA_U32 },
-	[TCA_ROUTE4_FROM]	= { .type = NLA_U32 },
-	[TCA_ROUTE4_IIF]	= { .type = NLA_U32 },
+	[TCA_ROUTE4_TO]		= NLA_POLICY_MAX(NLA_U32, 0xFF),
+	[TCA_ROUTE4_FROM]	= NLA_POLICY_MAX(NLA_U32, 0xFF),
+	[TCA_ROUTE4_IIF]	= NLA_POLICY_MAX(NLA_U32, 0x7FFF),
 };
 
 static int route4_set_parms(struct net *net, struct tcf_proto *tp,
@@ -397,33 +397,37 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
 		return err;
 
 	if (tb[TCA_ROUTE4_TO]) {
-		if (new && handle & 0x8000)
+		if (new && handle & 0x8000) {
+			NL_SET_ERR_MSG(extack, "Invalid handle");
 			return -EINVAL;
+		}
 		to = nla_get_u32(tb[TCA_ROUTE4_TO]);
-		if (to > 0xFF)
-			return -EINVAL;
 		nhandle = to;
 	}
 
+	if (tb[TCA_ROUTE4_FROM] && tb[TCA_ROUTE4_IIF]) {
+		NL_SET_ERR_MSG_ATTR(extack, tb[TCA_ROUTE4_FROM],
+				    "'from' and 'fromif' are mutually exclusive");
+		return -EINVAL;
+	}
+
 	if (tb[TCA_ROUTE4_FROM]) {
-		if (tb[TCA_ROUTE4_IIF])
-			return -EINVAL;
 		id = nla_get_u32(tb[TCA_ROUTE4_FROM]);
-		if (id > 0xFF)
-			return -EINVAL;
 		nhandle |= id << 16;
 	} else if (tb[TCA_ROUTE4_IIF]) {
 		id = nla_get_u32(tb[TCA_ROUTE4_IIF]);
-		if (id > 0x7FFF)
-			return -EINVAL;
 		nhandle |= (id | 0x8000) << 16;
 	} else
 		nhandle |= 0xFFFF << 16;
 
 	if (handle && new) {
 		nhandle |= handle & 0x7F00;
-		if (nhandle != handle)
+		if (nhandle != handle) {
+			NL_SET_ERR_MSG_FMT(extack,
+					   "Handle mismatch constructed: %x (expected: %x)",
+					   handle, nhandle);
 			return -EINVAL;
+		}
 	}
 
 	if (!nhandle) {
@@ -478,7 +482,6 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
 	struct route4_filter __rcu **fp;
 	struct route4_filter *fold, *f1, *pfp, *f = NULL;
 	struct route4_bucket *b;
-	struct nlattr *opt = tca[TCA_OPTIONS];
 	struct nlattr *tb[TCA_ROUTE4_MAX + 1];
 	unsigned int h, th;
 	int err;
@@ -489,10 +492,12 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
 		return -EINVAL;
 	}
 
-	if (opt == NULL)
+	if (NL_REQ_ATTR_CHECK(extack, NULL, tca, TCA_OPTIONS)) {
+		NL_SET_ERR_MSG_MOD(extack, "Missing options");
 		return -EINVAL;
+	}
 
-	err = nla_parse_nested_deprecated(tb, TCA_ROUTE4_MAX, opt,
+	err = nla_parse_nested_deprecated(tb, TCA_ROUTE4_MAX, tca[TCA_OPTIONS],
 					  route4_policy, NULL);
 	if (err < 0)
 		return err;
diff --git a/net/sched/sch_frag.c b/net/sched/sch_frag.c
index a9bd0a235890..ce63414185fd 100644
--- a/net/sched/sch_frag.c
+++ b/net/sched/sch_frag.c
@@ -96,7 +96,7 @@ static int sch_fragment(struct net *net, struct sk_buff *skb,
 		unsigned long orig_dst;
 
 		sch_frag_prepare_frag(skb, xmit);
-		dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL, 1,
+		dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL,
 			 DST_OBSOLETE_NONE, DST_NOCOUNT);
 		sch_frag_rt.dst.dev = skb->dev;
 
@@ -112,7 +112,7 @@ static int sch_fragment(struct net *net, struct sk_buff *skb,
 
 		sch_frag_prepare_frag(skb, xmit);
 		memset(&sch_frag_rt, 0, sizeof(sch_frag_rt));
-		dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL, 1,
+		dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL,
 			 DST_OBSOLETE_NONE, DST_NOCOUNT);
 		sch_frag_rt.dst.dev = skb->dev;
 
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 43f2731bf590..5c0ed5909d85 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -128,7 +128,6 @@ static void sctp_v6_err_handle(struct sctp_transport *t, struct sk_buff *skb,
 {
 	struct sctp_association *asoc = t->asoc;
 	struct sock *sk = asoc->base.sk;
-	struct ipv6_pinfo *np;
 	int err = 0;
 
 	switch (type) {
@@ -149,9 +148,8 @@ static void sctp_v6_err_handle(struct sctp_transport *t, struct sk_buff *skb,
 		break;
 	}
 
-	np = inet6_sk(sk);
 	icmpv6_err_convert(type, code, &err);
-	if (!sock_owned_by_user(sk) && np->recverr) {
+	if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) {
 		sk->sk_err = err;
 		sk_error_report(sk);
 	} else {
@@ -298,7 +296,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
 	if (t->flowlabel & SCTP_FLOWLABEL_SET_MASK)
 		fl6->flowlabel = htonl(t->flowlabel & SCTP_FLOWLABEL_VAL_MASK);
 
-	if (np->sndflow && (fl6->flowlabel & IPV6_FLOWLABEL_MASK)) {
+	if (inet6_test_bit(SNDFLOW, sk) &&
+	    (fl6->flowlabel & IPV6_FLOWLABEL_MASK)) {
 		struct ip6_flowlabel *flowlabel;
 
 		flowlabel = fl6_sock_lookup(sk, fl6->flowlabel);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index e33b4f29f77c..d0143823658d 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1446,7 +1446,7 @@ u16 tipc_get_gap_ack_blks(struct tipc_gap_ack_blks **ga, struct tipc_link *l,
 		p = (struct tipc_gap_ack_blks *)msg_data(hdr);
 		sz = ntohs(p->len);
 		/* Sanity check */
-		if (sz == struct_size(p, gacks, p->ugack_cnt + p->bgack_cnt)) {
+		if (sz == struct_size(p, gacks, size_add(p->ugack_cnt, p->bgack_cnt))) {
 			/* Good, check if the desired type exists */
 			if ((uc && p->ugack_cnt) || (!uc && p->bgack_cnt))
 				goto ok;
@@ -1533,7 +1533,7 @@ static u16 tipc_build_gap_ack_blks(struct tipc_link *l, struct tipc_msg *hdr)
 			__tipc_build_gap_ack_blks(ga, l, ga->bgack_cnt) : 0;
 
 	/* Total len */
-	len = struct_size(ga, gacks, ga->bgack_cnt + ga->ugack_cnt);
+	len = struct_size(ga, gacks, size_add(ga->bgack_cnt, ga->ugack_cnt));
 	ga->len = htons(len);
 	return len;
 }
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index e9d1e83a859d..9634dfd636fd 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1491,7 +1491,7 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov,
 	 */
 	aead_size = sizeof(*aead_req) + crypto_aead_reqsize(ctx->aead_recv);
 	aead_size = ALIGN(aead_size, __alignof__(*dctx));
-	mem = kmalloc(aead_size + struct_size(dctx, sg, n_sgin + n_sgout),
+	mem = kmalloc(aead_size + struct_size(dctx, sg, size_add(n_sgin, n_sgout)),
 		      sk->sk_allocation);
 	if (!mem) {
 		err = -ENOMEM;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 3e8a04a13668..e10d07c76044 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -116,6 +116,7 @@
 #include <linux/freezer.h>
 #include <linux/file.h>
 #include <linux/btf_ids.h>
+#include <linux/bpf-cgroup.h>
 
 #include "scm.h"
 
@@ -1381,6 +1382,10 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
 		if (err)
 			goto out;
 
+		err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, addr, &alen);
+		if (err)
+			goto out;
+
 		if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
 		     test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
 		    !unix_sk(sk)->addr) {
@@ -1490,6 +1495,10 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
 	if (err)
 		goto out;
 
+	err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, &addr_len);
+	if (err)
+		goto out;
+
 	if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
 	     test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) {
 		err = unix_autobind(sk);
@@ -1770,6 +1779,13 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
 	} else {
 		err = addr->len;
 		memcpy(sunaddr, addr->name, addr->len);
+
+		if (peer)
+			BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err,
+					       CGROUP_UNIX_GETPEERNAME);
+		else
+			BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err,
+					       CGROUP_UNIX_GETSOCKNAME);
 	}
 	sock_put(sk);
 out:
@@ -1922,6 +1938,13 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
 		err = unix_validate_addr(sunaddr, msg->msg_namelen);
 		if (err)
 			goto out;
+
+		err = BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk,
+							    msg->msg_name,
+							    &msg->msg_namelen,
+							    NULL);
+		if (err)
+			goto out;
 	} else {
 		sunaddr = NULL;
 		err = -ENOTCONN;
@@ -2390,9 +2413,14 @@ int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
 						EPOLLOUT | EPOLLWRNORM |
 						EPOLLWRBAND);
 
-	if (msg->msg_name)
+	if (msg->msg_name) {
 		unix_copy_addr(msg, skb->sk);
 
+		BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk,
+						      msg->msg_name,
+						      &msg->msg_namelen);
+	}
+
 	if (size > skb->len - skip)
 		size = skb->len - skip;
 	else if (size < skb->len - skip)
@@ -2744,6 +2772,11 @@ unlock:
 			DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
 					 state->msg->msg_name);
 			unix_copy_addr(state->msg, skb->sk);
+
+			BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk,
+							      state->msg->msg_name,
+							      &state->msg->msg_namelen);
+
 			sunaddr = NULL;
 		}
 
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 020cf17ab7e4..013b65241b65 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1921,6 +1921,9 @@ out_err:
 			err = total_written;
 	}
 out:
+	if (sk->sk_type == SOCK_STREAM)
+		err = sk_stream_error(sk, msg->msg_flags, err);
+
 	release_sock(sk);
 	return err;
 }
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index b80bf681327b..fdb95da12029 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -63,6 +63,17 @@ struct virtio_vsock {
 
 	u32 guest_cid;
 	bool seqpacket_allow;
+
+	/* These fields are used only in tx path in function
+	 * 'virtio_transport_send_pkt_work()', so to save
+	 * stack space in it, place both of them here. Each
+	 * pointer from 'out_sgs' points to the corresponding
+	 * element in 'out_bufs' - this is initialized in
+	 * 'virtio_vsock_probe()'. Both fields are protected
+	 * by 'tx_lock'. +1 is needed for packet header.
+	 */
+	struct scatterlist *out_sgs[MAX_SKB_FRAGS + 1];
+	struct scatterlist out_bufs[MAX_SKB_FRAGS + 1];
 };
 
 static u32 virtio_transport_get_local_cid(void)
@@ -100,8 +111,8 @@ virtio_transport_send_pkt_work(struct work_struct *work)
 	vq = vsock->vqs[VSOCK_VQ_TX];
 
 	for (;;) {
-		struct scatterlist hdr, buf, *sgs[2];
 		int ret, in_sg = 0, out_sg = 0;
+		struct scatterlist **sgs;
 		struct sk_buff *skb;
 		bool reply;
 
@@ -111,12 +122,43 @@ virtio_transport_send_pkt_work(struct work_struct *work)
 
 		virtio_transport_deliver_tap_pkt(skb);
 		reply = virtio_vsock_skb_reply(skb);
-
-		sg_init_one(&hdr, virtio_vsock_hdr(skb), sizeof(*virtio_vsock_hdr(skb)));
-		sgs[out_sg++] = &hdr;
-		if (skb->len > 0) {
-			sg_init_one(&buf, skb->data, skb->len);
-			sgs[out_sg++] = &buf;
+		sgs = vsock->out_sgs;
+		sg_init_one(sgs[out_sg], virtio_vsock_hdr(skb),
+			    sizeof(*virtio_vsock_hdr(skb)));
+		out_sg++;
+
+		if (!skb_is_nonlinear(skb)) {
+			if (skb->len > 0) {
+				sg_init_one(sgs[out_sg], skb->data, skb->len);
+				out_sg++;
+			}
+		} else {
+			struct skb_shared_info *si;
+			int i;
+
+			/* If skb is nonlinear, then its buffer must contain
+			 * only header and nothing more. Data is stored in
+			 * the fragged part.
+			 */
+			WARN_ON_ONCE(skb_headroom(skb) != sizeof(*virtio_vsock_hdr(skb)));
+
+			si = skb_shinfo(skb);
+
+			for (i = 0; i < si->nr_frags; i++) {
+				skb_frag_t *skb_frag = &si->frags[i];
+				void *va;
+
+				/* We will use 'page_to_virt()' for the userspace page
+				 * here, because virtio or dma-mapping layers will call
+				 * 'virt_to_phys()' later to fill the buffer descriptor.
+				 * We don't touch memory at "virtual" address of this page.
+				 */
+				va = page_to_virt(skb_frag->bv_page);
+				sg_init_one(sgs[out_sg],
+					    va + skb_frag->bv_offset,
+					    skb_frag->bv_len);
+				out_sg++;
+			}
 		}
 
 		ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, skb, GFP_KERNEL);
@@ -413,6 +455,37 @@ static void virtio_vsock_rx_done(struct virtqueue *vq)
 	queue_work(virtio_vsock_workqueue, &vsock->rx_work);
 }
 
+static bool virtio_transport_can_msgzerocopy(int bufs_num)
+{
+	struct virtio_vsock *vsock;
+	bool res = false;
+
+	rcu_read_lock();
+
+	vsock = rcu_dereference(the_virtio_vsock);
+	if (vsock) {
+		struct virtqueue *vq = vsock->vqs[VSOCK_VQ_TX];
+
+		/* Check that tx queue is large enough to keep whole
+		 * data to send. This is needed, because when there is
+		 * not enough free space in the queue, current skb to
+		 * send will be reinserted to the head of tx list of
+		 * the socket to retry transmission later, so if skb
+		 * is bigger than whole queue, it will be reinserted
+		 * again and again, thus blocking other skbs to be sent.
+		 * Each page of the user provided buffer will be added
+		 * as a single buffer to the tx virtqueue, so compare
+		 * number of pages against maximum capacity of the queue.
+		 */
+		if (bufs_num <= vq->num_max)
+			res = true;
+	}
+
+	rcu_read_unlock();
+
+	return res;
+}
+
 static bool virtio_transport_seqpacket_allow(u32 remote_cid);
 
 static struct virtio_transport virtio_transport = {
@@ -462,6 +535,7 @@ static struct virtio_transport virtio_transport = {
 	},
 
 	.send_pkt = virtio_transport_send_pkt,
+	.can_msgzerocopy = virtio_transport_can_msgzerocopy,
 };
 
 static bool virtio_transport_seqpacket_allow(u32 remote_cid)
@@ -635,6 +709,7 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
 {
 	struct virtio_vsock *vsock = NULL;
 	int ret;
+	int i;
 
 	ret = mutex_lock_interruptible(&the_virtio_vsock_mutex);
 	if (ret)
@@ -677,6 +752,9 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
 	if (ret < 0)
 		goto out;
 
+	for (i = 0; i < ARRAY_SIZE(vsock->out_sgs); i++)
+		vsock->out_sgs[i] = &vsock->out_bufs[i];
+
 	rcu_assign_pointer(the_virtio_vsock, vsock);
 	virtio_vsock_vqs_start(vsock);
 
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 352d042b130b..e22c81435ef7 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -37,27 +37,89 @@ virtio_transport_get_ops(struct vsock_sock *vsk)
 	return container_of(t, struct virtio_transport, transport);
 }
 
-/* Returns a new packet on success, otherwise returns NULL.
- *
- * If NULL is returned, errp is set to a negative errno.
- */
-static struct sk_buff *
-virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info,
-			   size_t len,
-			   u32 src_cid,
-			   u32 src_port,
-			   u32 dst_cid,
-			   u32 dst_port)
-{
-	const size_t skb_len = VIRTIO_VSOCK_SKB_HEADROOM + len;
-	struct virtio_vsock_hdr *hdr;
-	struct sk_buff *skb;
-	void *payload;
-	int err;
+static bool virtio_transport_can_zcopy(const struct virtio_transport *t_ops,
+				       struct virtio_vsock_pkt_info *info,
+				       size_t pkt_len)
+{
+	struct iov_iter *iov_iter;
 
-	skb = virtio_vsock_alloc_skb(skb_len, GFP_KERNEL);
-	if (!skb)
-		return NULL;
+	if (!info->msg)
+		return false;
+
+	iov_iter = &info->msg->msg_iter;
+
+	if (iov_iter->iov_offset)
+		return false;
+
+	/* We can't send whole iov. */
+	if (iov_iter->count > pkt_len)
+		return false;
+
+	/* Check that transport can send data in zerocopy mode. */
+	t_ops = virtio_transport_get_ops(info->vsk);
+
+	if (t_ops->can_msgzerocopy) {
+		int pages_in_iov = iov_iter_npages(iov_iter, MAX_SKB_FRAGS);
+		int pages_to_send = min(pages_in_iov, MAX_SKB_FRAGS);
+
+		/* +1 is for packet header. */
+		return t_ops->can_msgzerocopy(pages_to_send + 1);
+	}
+
+	return true;
+}
+
+static int virtio_transport_init_zcopy_skb(struct vsock_sock *vsk,
+					   struct sk_buff *skb,
+					   struct msghdr *msg,
+					   bool zerocopy)
+{
+	struct ubuf_info *uarg;
+
+	if (msg->msg_ubuf) {
+		uarg = msg->msg_ubuf;
+		net_zcopy_get(uarg);
+	} else {
+		struct iov_iter *iter = &msg->msg_iter;
+		struct ubuf_info_msgzc *uarg_zc;
+
+		uarg = msg_zerocopy_realloc(sk_vsock(vsk),
+					    iter->count,
+					    NULL);
+		if (!uarg)
+			return -1;
+
+		uarg_zc = uarg_to_msgzc(uarg);
+		uarg_zc->zerocopy = zerocopy ? 1 : 0;
+	}
+
+	skb_zcopy_init(skb, uarg);
+
+	return 0;
+}
+
+static int virtio_transport_fill_skb(struct sk_buff *skb,
+				     struct virtio_vsock_pkt_info *info,
+				     size_t len,
+				     bool zcopy)
+{
+	if (zcopy)
+		return __zerocopy_sg_from_iter(info->msg, NULL, skb,
+					       &info->msg->msg_iter,
+					       len);
+
+	return memcpy_from_msg(skb_put(skb, len), info->msg, len);
+}
+
+static void virtio_transport_init_hdr(struct sk_buff *skb,
+				      struct virtio_vsock_pkt_info *info,
+				      size_t payload_len,
+				      u32 src_cid,
+				      u32 src_port,
+				      u32 dst_cid,
+				      u32 dst_port)
+{
+	struct virtio_vsock_hdr *hdr;
 
 	hdr = virtio_vsock_hdr(skb);
 	hdr->type	= cpu_to_le16(info->type);
@@ -67,43 +129,28 @@ virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info,
 	hdr->src_port	= cpu_to_le32(src_port);
 	hdr->dst_port	= cpu_to_le32(dst_port);
 	hdr->flags	= cpu_to_le32(info->flags);
-	hdr->len	= cpu_to_le32(len);
-
-	if (info->msg && len > 0) {
-		payload = skb_put(skb, len);
-		err = memcpy_from_msg(payload, info->msg, len);
-		if (err)
-			goto out;
-
-		if (msg_data_left(info->msg) == 0 &&
-		    info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) {
-			hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM);
-
-			if (info->msg->msg_flags & MSG_EOR)
-				hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
-		}
-	}
+	hdr->len	= cpu_to_le32(payload_len);
+}
 
-	if (info->reply)
-		virtio_vsock_skb_set_reply(skb);
+static void virtio_transport_copy_nonlinear_skb(const struct sk_buff *skb,
+						void *dst,
+						size_t len)
+{
+	struct iov_iter iov_iter = { 0 };
+	struct kvec kvec;
+	size_t to_copy;
 
-	trace_virtio_transport_alloc_pkt(src_cid, src_port,
-					 dst_cid, dst_port,
-					 len,
-					 info->type,
-					 info->op,
-					 info->flags);
+	kvec.iov_base = dst;
+	kvec.iov_len = len;
 
-	if (info->vsk && !skb_set_owner_sk_safe(skb, sk_vsock(info->vsk))) {
-		WARN_ONCE(1, "failed to allocate skb on vsock socket with sk_refcnt == 0\n");
-		goto out;
-	}
+	iov_iter.iter_type = ITER_KVEC;
+	iov_iter.kvec = &kvec;
+	iov_iter.nr_segs = 1;
 
-	return skb;
+	to_copy = min_t(size_t, len, skb->len);
 
-out:
-	kfree_skb(skb);
-	return NULL;
+	skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset,
+			       &iov_iter, to_copy);
 }
 
 /* Packet capture */
@@ -114,7 +161,6 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque)
 	struct af_vsockmon_hdr *hdr;
 	struct sk_buff *skb;
 	size_t payload_len;
-	void *payload_buf;
 
 	/* A packet could be split to fit the RX buffer, so we can retrieve
 	 * the payload length from the header and the buffer pointer taking
@@ -122,7 +168,6 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque)
 	 */
 	pkt_hdr = virtio_vsock_hdr(pkt);
 	payload_len = pkt->len;
-	payload_buf = pkt->data;
 
 	skb = alloc_skb(sizeof(*hdr) + sizeof(*pkt_hdr) + payload_len,
 			GFP_ATOMIC);
@@ -165,7 +210,13 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque)
 	skb_put_data(skb, pkt_hdr, sizeof(*pkt_hdr));
 
 	if (payload_len) {
-		skb_put_data(skb, payload_buf, payload_len);
+		if (skb_is_nonlinear(pkt)) {
+			void *data = skb_put(skb, payload_len);
+
+			virtio_transport_copy_nonlinear_skb(pkt, data, payload_len);
+		} else {
+			skb_put_data(skb, pkt->data, payload_len);
+		}
 	}
 
 	return skb;
@@ -189,6 +240,82 @@ static u16 virtio_transport_get_type(struct sock *sk)
 		return VIRTIO_VSOCK_TYPE_SEQPACKET;
 }
 
+/* Returns new sk_buff on success, otherwise returns NULL. */
+static struct sk_buff *virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info,
+						  size_t payload_len,
+						  bool zcopy,
+						  u32 src_cid,
+						  u32 src_port,
+						  u32 dst_cid,
+						  u32 dst_port)
+{
+	struct vsock_sock *vsk;
+	struct sk_buff *skb;
+	size_t skb_len;
+
+	skb_len = VIRTIO_VSOCK_SKB_HEADROOM;
+
+	if (!zcopy)
+		skb_len += payload_len;
+
+	skb = virtio_vsock_alloc_skb(skb_len, GFP_KERNEL);
+	if (!skb)
+		return NULL;
+
+	virtio_transport_init_hdr(skb, info, payload_len, src_cid, src_port,
+				  dst_cid, dst_port);
+
+	vsk = info->vsk;
+
+	/* If 'vsk' != NULL then payload is always present, so we
+	 * will never call '__zerocopy_sg_from_iter()' below without
+	 * setting skb owner in 'skb_set_owner_w()'. The only case
+	 * when 'vsk' == NULL is VIRTIO_VSOCK_OP_RST control message
+	 * without payload.
+	 */
+	WARN_ON_ONCE(!(vsk && (info->msg && payload_len)) && zcopy);
+
+	/* Set owner here, because '__zerocopy_sg_from_iter()' uses
+	 * owner of skb without check to update 'sk_wmem_alloc'.
+	 */
+	if (vsk)
+		skb_set_owner_w(skb, sk_vsock(vsk));
+
+	if (info->msg && payload_len > 0) {
+		int err;
+
+		err = virtio_transport_fill_skb(skb, info, payload_len, zcopy);
+		if (err)
+			goto out;
+
+		if (msg_data_left(info->msg) == 0 &&
+		    info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) {
+			struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
+
+			hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM);
+
+			if (info->msg->msg_flags & MSG_EOR)
+				hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
+		}
+	}
+
+	if (info->reply)
+		virtio_vsock_skb_set_reply(skb);
+
+	trace_virtio_transport_alloc_pkt(src_cid, src_port,
+					 dst_cid, dst_port,
+					 payload_len,
+					 info->type,
+					 info->op,
+					 info->flags,
+					 zcopy);
+
+	return skb;
+out:
+	kfree_skb(skb);
+	return NULL;
+}
+
 /* This function can only be used on connecting/connected sockets,
  * since a socket assigned to a transport is required.
  *
@@ -197,10 +324,12 @@ static u16 virtio_transport_get_type(struct sock *sk)
 static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
 					  struct virtio_vsock_pkt_info *info)
 {
+	u32 max_skb_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
 	u32 src_cid, src_port, dst_cid, dst_port;
 	const struct virtio_transport *t_ops;
 	struct virtio_vsock_sock *vvs;
 	u32 pkt_len = info->pkt_len;
+	bool can_zcopy = false;
 	u32 rest_len;
 	int ret;
 
@@ -229,15 +358,30 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
 	if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
 		return pkt_len;
 
+	if (info->msg) {
+		/* If zerocopy is not enabled by 'setsockopt()', we behave as
+		 * there is no MSG_ZEROCOPY flag set.
+		 */
+		if (!sock_flag(sk_vsock(vsk), SOCK_ZEROCOPY))
+			info->msg->msg_flags &= ~MSG_ZEROCOPY;
+
+		if (info->msg->msg_flags & MSG_ZEROCOPY)
+			can_zcopy = virtio_transport_can_zcopy(t_ops, info, pkt_len);
+
+		if (can_zcopy)
+			max_skb_len = min_t(u32, VIRTIO_VSOCK_MAX_PKT_BUF_SIZE,
+					    (MAX_SKB_FRAGS * PAGE_SIZE));
+	}
+
 	rest_len = pkt_len;
 
 	do {
 		struct sk_buff *skb;
 		size_t skb_len;
 
-		skb_len = min_t(u32, VIRTIO_VSOCK_MAX_PKT_BUF_SIZE, rest_len);
+		skb_len = min(max_skb_len, rest_len);
 
-		skb = virtio_transport_alloc_skb(info, skb_len,
+		skb = virtio_transport_alloc_skb(info, skb_len, can_zcopy,
 						 src_cid, src_port,
 						 dst_cid, dst_port);
 		if (!skb) {
@@ -245,6 +389,21 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
 			break;
 		}
 
+		/* We process buffer part by part, allocating skb on
+		 * each iteration. If this is last skb for this buffer
+		 * and MSG_ZEROCOPY mode is in use - we must allocate
+		 * completion for the current syscall.
+		 */
+		if (info->msg && info->msg->msg_flags & MSG_ZEROCOPY &&
+		    skb_len == rest_len && info->op == VIRTIO_VSOCK_OP_RW) {
+			if (virtio_transport_init_zcopy_skb(vsk, skb,
+							    info->msg,
+							    can_zcopy)) {
+				ret = -ENOMEM;
+				break;
+			}
+		}
+
 		virtio_transport_inc_tx_pkt(vvs, skb);
 
 		ret = t_ops->send_pkt(skb);
@@ -364,9 +523,10 @@ virtio_transport_stream_do_peek(struct vsock_sock *vsk,
 		spin_unlock_bh(&vvs->rx_lock);
 
 		/* sk_lock is held by caller so no one else can dequeue.
-		 * Unlock rx_lock since memcpy_to_msg() may sleep.
+		 * Unlock rx_lock since skb_copy_datagram_iter() may sleep.
 		 */
-		err = memcpy_to_msg(msg, skb->data, bytes);
+		err = skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset,
+					     &msg->msg_iter, bytes);
 		if (err)
 			goto out;
 
@@ -410,25 +570,27 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
 	while (total < len && !skb_queue_empty(&vvs->rx_queue)) {
 		skb = skb_peek(&vvs->rx_queue);
 
-		bytes = len - total;
-		if (bytes > skb->len)
-			bytes = skb->len;
+		bytes = min_t(size_t, len - total,
+			      skb->len - VIRTIO_VSOCK_SKB_CB(skb)->offset);
 
 		/* sk_lock is held by caller so no one else can dequeue.
-		 * Unlock rx_lock since memcpy_to_msg() may sleep.
+		 * Unlock rx_lock since skb_copy_datagram_iter() may sleep.
 		 */
 		spin_unlock_bh(&vvs->rx_lock);
 
-		err = memcpy_to_msg(msg, skb->data, bytes);
+		err = skb_copy_datagram_iter(skb,
+					     VIRTIO_VSOCK_SKB_CB(skb)->offset,
+					     &msg->msg_iter, bytes);
 		if (err)
 			goto out;
 
 		spin_lock_bh(&vvs->rx_lock);
 
 		total += bytes;
-		skb_pull(skb, bytes);
 
-		if (skb->len == 0) {
+		VIRTIO_VSOCK_SKB_CB(skb)->offset += bytes;
+
+		if (skb->len == VIRTIO_VSOCK_SKB_CB(skb)->offset) {
 			u32 pkt_len = le32_to_cpu(virtio_vsock_hdr(skb)->len);
 
 			virtio_transport_dec_rx_pkt(vvs, pkt_len);
@@ -492,9 +654,10 @@ virtio_transport_seqpacket_do_peek(struct vsock_sock *vsk,
 			spin_unlock_bh(&vvs->rx_lock);
 
 			/* sk_lock is held by caller so no one else can dequeue.
-			 * Unlock rx_lock since memcpy_to_msg() may sleep.
+			 * Unlock rx_lock since skb_copy_datagram_iter() may sleep.
 			 */
-			err = memcpy_to_msg(msg, skb->data, bytes);
+			err = skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset,
+						     &msg->msg_iter, bytes);
 			if (err)
 				return err;
 
@@ -553,11 +716,13 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
 				int err;
 
 				/* sk_lock is held by caller so no one else can dequeue.
-				 * Unlock rx_lock since memcpy_to_msg() may sleep.
+				 * Unlock rx_lock since skb_copy_datagram_iter() may sleep.
 				 */
 				spin_unlock_bh(&vvs->rx_lock);
 
-				err = memcpy_to_msg(msg, skb->data, bytes_to_copy);
+				err = skb_copy_datagram_iter(skb, 0,
+							     &msg->msg_iter,
+							     bytes_to_copy);
 				if (err) {
 					/* Copy of message failed. Rest of
 					 * fragments will be freed without copy.
@@ -954,7 +1119,7 @@ static int virtio_transport_reset_no_sock(const struct virtio_transport *t,
 	if (!t)
 		return -ENOTCONN;
 
-	reply = virtio_transport_alloc_skb(&info, 0,
+	reply = virtio_transport_alloc_skb(&info, 0, false,
 					   le64_to_cpu(hdr->dst_cid),
 					   le32_to_cpu(hdr->dst_port),
 					   le64_to_cpu(hdr->src_cid),
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 55f8b9b0e06d..7482d0aca504 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -1228,7 +1228,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 
 	xs->dev = dev;
 	xs->zc = xs->umem->zc;
-	xs->sg = !!(flags & XDP_USE_SG);
+	xs->sg = !!(xs->umem->flags & XDP_UMEM_SG_FLAG);
 	xs->queue_id = qid;
 	xp_add_xsk(xs->pool, xs);
 
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index b3f7b310811e..49cb9f9a09be 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -170,6 +170,9 @@ int xp_assign_dev(struct xsk_buff_pool *pool,
 	if (err)
 		return err;
 
+	if (flags & XDP_USE_SG)
+		pool->umem->flags |= XDP_UMEM_SG_FLAG;
+
 	if (flags & XDP_USE_NEED_WAKEUP)
 		pool->uses_need_wakeup = true;
 	/* Tx needs to be explicitly woken up the first time.  Also
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index d24b4d4f620e..5cdd3bca3637 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2566,7 +2566,7 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
 	default:
 		BUG();
 	}
-	xdst = dst_alloc(dst_ops, NULL, 1, DST_OBSOLETE_NONE, 0);
+	xdst = dst_alloc(dst_ops, NULL, DST_OBSOLETE_NONE, 0);
 
 	if (likely(xdst)) {
 		memset_after(xdst, 0, u.dst);
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 4ccf4236031c..90af76fa9dd8 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -169,12 +169,16 @@ endif
 TPROGS_CFLAGS += -Wall -O2
 TPROGS_CFLAGS += -Wmissing-prototypes
 TPROGS_CFLAGS += -Wstrict-prototypes
+TPROGS_CFLAGS += $(call try-run,\
+	printf "int main() { return 0; }" |\
+	$(CC) -Werror -fsanitize=bounds -x c - -o "$$TMP",-fsanitize=bounds,)
 
 TPROGS_CFLAGS += -I$(objtree)/usr/include
 TPROGS_CFLAGS += -I$(srctree)/tools/testing/selftests/bpf/
 TPROGS_CFLAGS += -I$(LIBBPF_INCLUDE)
 TPROGS_CFLAGS += -I$(srctree)/tools/include
 TPROGS_CFLAGS += -I$(srctree)/tools/perf
+TPROGS_CFLAGS += -I$(srctree)/tools/lib
 TPROGS_CFLAGS += -DHAVE_ATTR_TEST=0
 
 ifdef SYSROOT
@@ -314,6 +318,9 @@ XDP_SAMPLE_CFLAGS += -Wall -O2 \
 
 $(obj)/$(XDP_SAMPLE): TPROGS_CFLAGS = $(XDP_SAMPLE_CFLAGS)
 $(obj)/$(XDP_SAMPLE): $(src)/xdp_sample_user.h $(src)/xdp_sample_shared.h
+# Override includes for trace_helpers.o because __must_check won't be defined
+# in our include path.
+$(obj)/$(TRACE_HELPERS): TPROGS_CFLAGS := $(TPROGS_CFLAGS) -D__must_check=
 
 -include $(BPF_SAMPLES_PATH)/Makefile.target
 
diff --git a/samples/bpf/syscall_tp_user.c b/samples/bpf/syscall_tp_user.c
index 7a788bb837fc..7a09ac74fac0 100644
--- a/samples/bpf/syscall_tp_user.c
+++ b/samples/bpf/syscall_tp_user.c
@@ -17,9 +17,9 @@
 
 static void usage(const char *cmd)
 {
-	printf("USAGE: %s [-i num_progs] [-h]\n", cmd);
-	printf("       -i num_progs      # number of progs of the test\n");
-	printf("       -h                # help\n");
+	printf("USAGE: %s [-i nr_tests] [-h]\n", cmd);
+	printf("       -i nr_tests      # rounds of test to run\n");
+	printf("       -h               # help\n");
 }
 
 static void verify_map(int map_id)
@@ -45,14 +45,14 @@ static void verify_map(int map_id)
 	}
 }
 
-static int test(char *filename, int num_progs)
+static int test(char *filename, int nr_tests)
 {
-	int map0_fds[num_progs], map1_fds[num_progs], fd, i, j = 0;
-	struct bpf_link *links[num_progs * 4];
-	struct bpf_object *objs[num_progs];
+	int map0_fds[nr_tests], map1_fds[nr_tests], fd, i, j = 0;
+	struct bpf_link **links = NULL;
+	struct bpf_object *objs[nr_tests];
 	struct bpf_program *prog;
 
-	for (i = 0; i < num_progs; i++) {
+	for (i = 0; i < nr_tests; i++) {
 		objs[i] = bpf_object__open_file(filename, NULL);
 		if (libbpf_get_error(objs[i])) {
 			fprintf(stderr, "opening BPF object file failed\n");
@@ -60,6 +60,19 @@ static int test(char *filename, int num_progs)
 			goto cleanup;
 		}
 
+		/* One-time initialization */
+		if (!links) {
+			int nr_progs = 0;
+
+			bpf_object__for_each_program(prog, objs[i])
+				nr_progs += 1;
+
+			links = calloc(nr_progs * nr_tests, sizeof(struct bpf_link *));
+
+			if (!links)
+				goto cleanup;
+		}
+
 		/* load BPF program */
 		if (bpf_object__load(objs[i])) {
 			fprintf(stderr, "loading BPF object file failed\n");
@@ -101,14 +114,18 @@ static int test(char *filename, int num_progs)
 	close(fd);
 
 	/* verify the map */
-	for (i = 0; i < num_progs; i++) {
+	for (i = 0; i < nr_tests; i++) {
 		verify_map(map0_fds[i]);
 		verify_map(map1_fds[i]);
 	}
 
 cleanup:
-	for (j--; j >= 0; j--)
-		bpf_link__destroy(links[j]);
+	if (links) {
+		for (j--; j >= 0; j--)
+			bpf_link__destroy(links[j]);
+
+		free(links);
+	}
 
 	for (i--; i >= 0; i--)
 		bpf_object__close(objs[i]);
@@ -117,13 +134,13 @@ cleanup:
 
 int main(int argc, char **argv)
 {
-	int opt, num_progs = 1;
+	int opt, nr_tests = 1;
 	char filename[256];
 
 	while ((opt = getopt(argc, argv, "i:h")) != -1) {
 		switch (opt) {
 		case 'i':
-			num_progs = atoi(optarg);
+			nr_tests = atoi(optarg);
 			break;
 		case 'h':
 		default:
@@ -134,5 +151,5 @@ int main(int argc, char **argv)
 
 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
 
-	return test(filename, num_progs);
+	return test(filename, nr_tests);
 }
diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
index bd015ec9847b..2ce900f66d6e 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -36,11 +36,14 @@ CGROUP COMMANDS
 |		**cgroup_device** | **cgroup_inet4_bind** | **cgroup_inet6_bind** |
 |		**cgroup_inet4_post_bind** | **cgroup_inet6_post_bind** |
 |		**cgroup_inet4_connect** | **cgroup_inet6_connect** |
-|		**cgroup_inet4_getpeername** | **cgroup_inet6_getpeername** |
+|		**cgroup_unix_connect** | **cgroup_inet4_getpeername** |
+|		**cgroup_inet6_getpeername** | **cgroup_unix_getpeername** |
 |		**cgroup_inet4_getsockname** | **cgroup_inet6_getsockname** |
-|		**cgroup_udp4_sendmsg** | **cgroup_udp6_sendmsg** |
+|		**cgroup_unix_getsockname** | **cgroup_udp4_sendmsg** |
+|		**cgroup_udp6_sendmsg** | **cgroup_unix_sendmsg** |
 |		**cgroup_udp4_recvmsg** | **cgroup_udp6_recvmsg** |
-|		**cgroup_sysctl** | **cgroup_getsockopt** | **cgroup_setsockopt** |
+|		**cgroup_unix_recvmsg** | **cgroup_sysctl** |
+|		**cgroup_getsockopt** | **cgroup_setsockopt** |
 |		**cgroup_inet_sock_release** }
 |	*ATTACH_FLAGS* := { **multi** | **override** }
 
@@ -102,21 +105,28 @@ DESCRIPTION
 		  **post_bind6** return from bind(2) for an inet6 socket (since 4.17);
 		  **connect4** call to connect(2) for an inet4 socket (since 4.17);
 		  **connect6** call to connect(2) for an inet6 socket (since 4.17);
+		  **connect_unix** call to connect(2) for a unix socket (since 6.7);
 		  **sendmsg4** call to sendto(2), sendmsg(2), sendmmsg(2) for an
 		  unconnected udp4 socket (since 4.18);
 		  **sendmsg6** call to sendto(2), sendmsg(2), sendmmsg(2) for an
 		  unconnected udp6 socket (since 4.18);
+		  **sendmsg_unix** call to sendto(2), sendmsg(2), sendmmsg(2) for
+		  an unconnected unix socket (since 6.7);
 		  **recvmsg4** call to recvfrom(2), recvmsg(2), recvmmsg(2) for
 		  an unconnected udp4 socket (since 5.2);
 		  **recvmsg6** call to recvfrom(2), recvmsg(2), recvmmsg(2) for
 		  an unconnected udp6 socket (since 5.2);
+		  **recvmsg_unix** call to recvfrom(2), recvmsg(2), recvmmsg(2) for
+		  an unconnected unix socket (since 6.7);
 		  **sysctl** sysctl access (since 5.2);
 		  **getsockopt** call to getsockopt (since 5.3);
 		  **setsockopt** call to setsockopt (since 5.3);
 		  **getpeername4** call to getpeername(2) for an inet4 socket (since 5.8);
 		  **getpeername6** call to getpeername(2) for an inet6 socket (since 5.8);
+		  **getpeername_unix** call to getpeername(2) for a unix socket (since 6.7);
 		  **getsockname4** call to getsockname(2) for an inet4 socket (since 5.8);
 		  **getsockname6** call to getsockname(2) for an inet6 socket (since 5.8).
+		  **getsockname_unix** call to getsockname(2) for a unix socket (since 6.7);
 		  **sock_release** closing an userspace inet socket (since 5.9).
 
 	**bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG*
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index dcae81bd27ed..58e6a5b10ef7 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -47,9 +47,11 @@ PROG COMMANDS
 |		**cgroup/sock** | **cgroup/dev** | **lwt_in** | **lwt_out** | **lwt_xmit** |
 |		**lwt_seg6local** | **sockops** | **sk_skb** | **sk_msg** | **lirc_mode2** |
 |		**cgroup/bind4** | **cgroup/bind6** | **cgroup/post_bind4** | **cgroup/post_bind6** |
-|		**cgroup/connect4** | **cgroup/connect6** | **cgroup/getpeername4** | **cgroup/getpeername6** |
-|               **cgroup/getsockname4** | **cgroup/getsockname6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** |
-|		**cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl** |
+|		**cgroup/connect4** | **cgroup/connect6** | **cgroup/connect_unix** |
+|		**cgroup/getpeername4** | **cgroup/getpeername6** | **cgroup/getpeername_unix** |
+|		**cgroup/getsockname4** | **cgroup/getsockname6** | **cgroup/getsockname_unix** |
+|		**cgroup/sendmsg4** | **cgroup/sendmsg6** | **cgroup/sendmsg_unix** |
+|		**cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/recvmsg_unix** | **cgroup/sysctl** |
 |		**cgroup/getsockopt** | **cgroup/setsockopt** | **cgroup/sock_release** |
 |		**struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup**
 |	}
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 085bf18f3659..6e4f7ce6bc01 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -480,13 +480,13 @@ _bpftool()
                                 action tracepoint raw_tracepoint \
                                 xdp perf_event cgroup/skb cgroup/sock \
                                 cgroup/dev lwt_in lwt_out lwt_xmit \
-                                lwt_seg6local sockops sk_skb sk_msg \
-                                lirc_mode2 cgroup/bind4 cgroup/bind6 \
-                                cgroup/connect4 cgroup/connect6 \
-                                cgroup/getpeername4 cgroup/getpeername6 \
-                                cgroup/getsockname4 cgroup/getsockname6 \
-                                cgroup/sendmsg4 cgroup/sendmsg6 \
-                                cgroup/recvmsg4 cgroup/recvmsg6 \
+                                lwt_seg6local sockops sk_skb sk_msg lirc_mode2 \
+                                cgroup/bind4 cgroup/bind6 \
+                                cgroup/connect4 cgroup/connect6 cgroup/connect_unix \
+                                cgroup/getpeername4 cgroup/getpeername6 cgroup/getpeername_unix \
+                                cgroup/getsockname4 cgroup/getsockname6 cgroup/getsockname_unix \
+                                cgroup/sendmsg4 cgroup/sendmsg6 cgroup/sendmsg_unix \
+                                cgroup/recvmsg4 cgroup/recvmsg6 cgroup/recvmsg_unix \
                                 cgroup/post_bind4 cgroup/post_bind6 \
                                 cgroup/sysctl cgroup/getsockopt \
                                 cgroup/setsockopt cgroup/sock_release struct_ops \
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index ac846b0805b4..af6898c0f388 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -28,13 +28,15 @@
 	"                        cgroup_device | cgroup_inet4_bind |\n" \
 	"                        cgroup_inet6_bind | cgroup_inet4_post_bind |\n" \
 	"                        cgroup_inet6_post_bind | cgroup_inet4_connect |\n" \
-	"                        cgroup_inet6_connect | cgroup_inet4_getpeername |\n" \
-	"                        cgroup_inet6_getpeername | cgroup_inet4_getsockname |\n" \
-	"                        cgroup_inet6_getsockname | cgroup_udp4_sendmsg |\n" \
-	"                        cgroup_udp6_sendmsg | cgroup_udp4_recvmsg |\n" \
-	"                        cgroup_udp6_recvmsg | cgroup_sysctl |\n" \
-	"                        cgroup_getsockopt | cgroup_setsockopt |\n" \
-	"                        cgroup_inet_sock_release }"
+	"                        cgroup_inet6_connect | cgroup_unix_connect |\n" \
+	"                        cgroup_inet4_getpeername | cgroup_inet6_getpeername |\n" \
+	"                        cgroup_unix_getpeername | cgroup_inet4_getsockname |\n" \
+	"                        cgroup_inet6_getsockname | cgroup_unix_getsockname |\n" \
+	"                        cgroup_udp4_sendmsg | cgroup_udp6_sendmsg |\n" \
+	"                        cgroup_unix_sendmsg | cgroup_udp4_recvmsg |\n" \
+	"                        cgroup_udp6_recvmsg | cgroup_unix_recvmsg |\n" \
+	"                        cgroup_sysctl | cgroup_getsockopt |\n" \
+	"                        cgroup_setsockopt | cgroup_inet_sock_release }"
 
 static unsigned int query_flags;
 static struct btf *btf_vmlinux;
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 2883660d6b67..ee3ce2b8000d 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -708,17 +708,22 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h
 
 		codegen("\
 		\n\
-			skel->%1$s = skel_prep_map_data((void *)\"\\	    \n\
-		", ident);
+			{						    \n\
+				static const char data[] __attribute__((__aligned__(8))) = \"\\\n\
+		");
 		mmap_data = bpf_map__initial_value(map, &mmap_size);
 		print_hex(mmap_data, mmap_size);
 		codegen("\
 		\n\
-		\", %1$zd, %2$zd);					    \n\
-			if (!skel->%3$s)				    \n\
-				goto cleanup;				    \n\
-			skel->maps.%3$s.initial_value = (__u64) (long) skel->%3$s;\n\
-		", bpf_map_mmap_sz(map), mmap_size, ident);
+		\";							    \n\
+									    \n\
+				skel->%1$s = skel_prep_map_data((void *)data, %2$zd,\n\
+								sizeof(data) - 1);\n\
+				if (!skel->%1$s)			    \n\
+					goto cleanup;			    \n\
+				skel->maps.%1$s.initial_value = (__u64) (long) skel->%1$s;\n\
+			}						    \n\
+			", ident, bpf_map_mmap_sz(map));
 	}
 	codegen("\
 		\n\
@@ -733,32 +738,30 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h
 		{							    \n\
 			struct bpf_load_and_run_opts opts = {};		    \n\
 			int err;					    \n\
-									    \n\
-			opts.ctx = (struct bpf_loader_ctx *)skel;	    \n\
-			opts.data_sz = %2$d;				    \n\
-			opts.data = (void *)\"\\			    \n\
+			static const char opts_data[] __attribute__((__aligned__(8))) = \"\\\n\
 		",
-		obj_name, opts.data_sz);
+		obj_name);
 	print_hex(opts.data, opts.data_sz);
 	codegen("\
 		\n\
 		\";							    \n\
+			static const char opts_insn[] __attribute__((__aligned__(8))) = \"\\\n\
 		");
-
-	codegen("\
-		\n\
-			opts.insns_sz = %d;				    \n\
-			opts.insns = (void *)\"\\			    \n\
-		",
-		opts.insns_sz);
 	print_hex(opts.insns, opts.insns_sz);
 	codegen("\
 		\n\
 		\";							    \n\
+									    \n\
+			opts.ctx = (struct bpf_loader_ctx *)skel;	    \n\
+			opts.data_sz = sizeof(opts_data) - 1;		    \n\
+			opts.data = (void *)opts_data;			    \n\
+			opts.insns_sz = sizeof(opts_insn) - 1;		    \n\
+			opts.insns = (void *)opts_insn;			    \n\
+									    \n\
 			err = bpf_load_and_run(&opts);			    \n\
 			if (err < 0)					    \n\
 				return err;				    \n\
-		", obj_name);
+		");
 	bpf_object__for_each_map(map, obj) {
 		const char *mmap_flags;
 
@@ -1209,7 +1212,7 @@ static int do_skeleton(int argc, char **argv)
 	codegen("\
 		\n\
 									    \n\
-			s->data = (void *)%2$s__elf_bytes(&s->data_sz);	    \n\
+			s->data = %1$s__elf_bytes(&s->data_sz);		    \n\
 									    \n\
 			obj->skeleton = s;				    \n\
 			return 0;					    \n\
@@ -1218,12 +1221,12 @@ static int do_skeleton(int argc, char **argv)
 			return err;					    \n\
 		}							    \n\
 									    \n\
-		static inline const void *%2$s__elf_bytes(size_t *sz)	    \n\
+		static inline const void *%1$s__elf_bytes(size_t *sz)	    \n\
 		{							    \n\
-			*sz = %1$d;					    \n\
-			return (const void *)\"\\			    \n\
-		"
-		, file_sz, obj_name);
+			static const char data[] __attribute__((__aligned__(8))) = \"\\\n\
+		",
+		obj_name
+	);
 
 	/* embed contents of BPF object file */
 	print_hex(obj_data, file_sz);
@@ -1231,6 +1234,9 @@ static int do_skeleton(int argc, char **argv)
 	codegen("\
 		\n\
 		\";							    \n\
+									    \n\
+			*sz = sizeof(data) - 1;				    \n\
+			return (const void *)data;			    \n\
 		}							    \n\
 									    \n\
 		#ifdef __cplusplus					    \n\
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index 2e5c231e08ac..4b1407b05056 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -265,6 +265,7 @@ show_kprobe_multi_json(struct bpf_link_info *info, json_writer_t *wtr)
 	jsonw_bool_field(json_wtr, "retprobe",
 			 info->kprobe_multi.flags & BPF_F_KPROBE_MULTI_RETURN);
 	jsonw_uint_field(json_wtr, "func_cnt", info->kprobe_multi.count);
+	jsonw_uint_field(json_wtr, "missed", info->kprobe_multi.missed);
 	jsonw_name(json_wtr, "funcs");
 	jsonw_start_array(json_wtr);
 	addrs = u64_to_ptr(info->kprobe_multi.addrs);
@@ -301,6 +302,7 @@ show_perf_event_kprobe_json(struct bpf_link_info *info, json_writer_t *wtr)
 	jsonw_string_field(wtr, "func",
 			   u64_to_ptr(info->perf_event.kprobe.func_name));
 	jsonw_uint_field(wtr, "offset", info->perf_event.kprobe.offset);
+	jsonw_uint_field(wtr, "missed", info->perf_event.kprobe.missed);
 }
 
 static void
@@ -641,6 +643,8 @@ static void show_kprobe_multi_plain(struct bpf_link_info *info)
 	else
 		printf("\n\tkprobe.multi  ");
 	printf("func_cnt %u  ", info->kprobe_multi.count);
+	if (info->kprobe_multi.missed)
+		printf("missed %llu  ", info->kprobe_multi.missed);
 	addrs = (__u64 *)u64_to_ptr(info->kprobe_multi.addrs);
 	qsort(addrs, info->kprobe_multi.count, sizeof(__u64), cmp_u64);
 
@@ -683,6 +687,8 @@ static void show_perf_event_kprobe_plain(struct bpf_link_info *info)
 	printf("%s", buf);
 	if (info->perf_event.kprobe.offset)
 		printf("+%#x", info->perf_event.kprobe.offset);
+	if (info->perf_event.kprobe.missed)
+		printf("  missed %llu", info->perf_event.kprobe.missed);
 	printf("  ");
 }
 
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 8443a149dd17..7ec4f5671e7a 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -2475,9 +2475,10 @@ static int do_help(int argc, char **argv)
 		"                 sk_reuseport | flow_dissector | cgroup/sysctl |\n"
 		"                 cgroup/bind4 | cgroup/bind6 | cgroup/post_bind4 |\n"
 		"                 cgroup/post_bind6 | cgroup/connect4 | cgroup/connect6 |\n"
-		"                 cgroup/getpeername4 | cgroup/getpeername6 |\n"
-		"                 cgroup/getsockname4 | cgroup/getsockname6 | cgroup/sendmsg4 |\n"
-		"                 cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n"
+		"                 cgroup/connect_unix | cgroup/getpeername4 | cgroup/getpeername6 |\n"
+		"                 cgroup/getpeername_unix | cgroup/getsockname4 | cgroup/getsockname6 |\n"
+		"                 cgroup/getsockname_unix | cgroup/sendmsg4 | cgroup/sendmsg6 |\n"
+		"                 cgroup/sendmsg°unix | cgroup/recvmsg4 | cgroup/recvmsg6 | cgroup/recvmsg_unix |\n"
 		"                 cgroup/getsockopt | cgroup/setsockopt | cgroup/sock_release |\n"
 		"                 struct_ops | fentry | fexit | freplace | sk_lookup }\n"
 		"       ATTACH_TYPE := { sk_msg_verdict | sk_skb_verdict | sk_skb_stream_verdict |\n"
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 0448700890f7..7ba61b75bc0e 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -932,7 +932,14 @@ enum bpf_map_type {
 	 */
 	BPF_MAP_TYPE_CGROUP_STORAGE = BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED,
 	BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
-	BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+	BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED,
+	/* BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE is available to bpf programs
+	 * attaching to a cgroup. The new mechanism (BPF_MAP_TYPE_CGRP_STORAGE +
+	 * local percpu kptr) supports all BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE
+	 * functionality and more. So mark * BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE
+	 * deprecated.
+	 */
+	BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE = BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED,
 	BPF_MAP_TYPE_QUEUE,
 	BPF_MAP_TYPE_STACK,
 	BPF_MAP_TYPE_SK_STORAGE,
@@ -1040,6 +1047,11 @@ enum bpf_attach_type {
 	BPF_TCX_INGRESS,
 	BPF_TCX_EGRESS,
 	BPF_TRACE_UPROBE_MULTI,
+	BPF_CGROUP_UNIX_CONNECT,
+	BPF_CGROUP_UNIX_SENDMSG,
+	BPF_CGROUP_UNIX_RECVMSG,
+	BPF_CGROUP_UNIX_GETPEERNAME,
+	BPF_CGROUP_UNIX_GETSOCKNAME,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -2697,8 +2709,8 @@ union bpf_attr {
  * 		*bpf_socket* should be one of the following:
  *
  * 		* **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
- * 		* **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
- * 		  and **BPF_CGROUP_INET6_CONNECT**.
+ *		* **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**,
+ *		  **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**.
  *
  * 		This helper actually implements a subset of **setsockopt()**.
  * 		It supports the following *level*\ s:
@@ -2936,8 +2948,8 @@ union bpf_attr {
  * 		*bpf_socket* should be one of the following:
  *
  * 		* **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
- * 		* **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
- * 		  and **BPF_CGROUP_INET6_CONNECT**.
+ *		* **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**,
+ *		  **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**.
  *
  * 		This helper actually implements a subset of **getsockopt()**.
  * 		It supports the same set of *optname*\ s that is supported by
@@ -3257,6 +3269,11 @@ union bpf_attr {
  *			and *params*->smac will not be set as output. A common
  *			use case is to call **bpf_redirect_neigh**\ () after
  *			doing **bpf_fib_lookup**\ ().
+ *		**BPF_FIB_LOOKUP_SRC**
+ *			Derive and set source IP addr in *params*->ipv{4,6}_src
+ *			for the nexthop. If the src addr cannot be derived,
+ *			**BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this
+ *			case, *params*->dmac and *params*->smac are not set either.
  *
  *		*ctx* is either **struct xdp_md** for XDP programs or
  *		**struct sk_buff** tc cls_act programs.
@@ -5089,6 +5106,8 @@ union bpf_attr {
  *		**BPF_F_TIMER_ABS**
  *			Start the timer in absolute expire value instead of the
  *			default relative one.
+ *		**BPF_F_TIMER_CPU_PIN**
+ *			Timer will be pinned to the CPU of the caller.
  *
  *	Return
  *		0 on success.
@@ -6525,6 +6544,7 @@ struct bpf_link_info {
 			__aligned_u64 addrs;
 			__u32 count; /* in/out: kprobe_multi function count */
 			__u32 flags;
+			__u64 missed;
 		} kprobe_multi;
 		struct {
 			__u32 type; /* enum bpf_perf_event_type */
@@ -6540,6 +6560,7 @@ struct bpf_link_info {
 					__u32 name_len;
 					__u32 offset; /* offset from func_name */
 					__u64 addr;
+					__u64 missed;
 				} kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */
 				struct {
 					__aligned_u64 tp_name;   /* in/out */
@@ -6953,6 +6974,7 @@ enum {
 	BPF_FIB_LOOKUP_OUTPUT  = (1U << 1),
 	BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
 	BPF_FIB_LOOKUP_TBID    = (1U << 3),
+	BPF_FIB_LOOKUP_SRC     = (1U << 4),
 };
 
 enum {
@@ -6965,6 +6987,7 @@ enum {
 	BPF_FIB_LKUP_RET_UNSUPP_LWT,   /* fwd requires encapsulation */
 	BPF_FIB_LKUP_RET_NO_NEIGH,     /* no neighbor entry for nh */
 	BPF_FIB_LKUP_RET_FRAG_NEEDED,  /* fragmentation required to fwd */
+	BPF_FIB_LKUP_RET_NO_SRC_ADDR,  /* failed to derive IP src addr */
 };
 
 struct bpf_fib_lookup {
@@ -6999,6 +7022,9 @@ struct bpf_fib_lookup {
 		__u32	rt_metric;
 	};
 
+	/* input: source address to consider for lookup
+	 * output: source address result from lookup
+	 */
 	union {
 		__be32		ipv4_src;
 		__u32		ipv6_src[4];  /* in6_addr; network order */
@@ -7300,9 +7326,11 @@ struct bpf_core_relo {
  * Flags to control bpf_timer_start() behaviour.
  *     - BPF_F_TIMER_ABS: Timeout passed is absolute time, by default it is
  *       relative to current time.
+ *     - BPF_F_TIMER_CPU_PIN: Timer will be pinned to the CPU of the caller.
  */
 enum {
 	BPF_F_TIMER_ABS = (1ULL << 0),
+	BPF_F_TIMER_CPU_PIN = (1ULL << 1),
 };
 
 /* BPF numbers iterator state */
diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h
index c1634b95c223..2943a151d4f1 100644
--- a/tools/include/uapi/linux/netdev.h
+++ b/tools/include/uapi/linux/netdev.h
@@ -38,11 +38,27 @@ enum netdev_xdp_act {
 	NETDEV_XDP_ACT_MASK = 127,
 };
 
+/**
+ * enum netdev_xdp_rx_metadata
+ * @NETDEV_XDP_RX_METADATA_TIMESTAMP: Device is capable of exposing receive HW
+ *   timestamp via bpf_xdp_metadata_rx_timestamp().
+ * @NETDEV_XDP_RX_METADATA_HASH: Device is capable of exposing receive packet
+ *   hash via bpf_xdp_metadata_rx_hash().
+ */
+enum netdev_xdp_rx_metadata {
+	NETDEV_XDP_RX_METADATA_TIMESTAMP = 1,
+	NETDEV_XDP_RX_METADATA_HASH = 2,
+
+	/* private: */
+	NETDEV_XDP_RX_METADATA_MASK = 3,
+};
+
 enum {
 	NETDEV_A_DEV_IFINDEX = 1,
 	NETDEV_A_DEV_PAD,
 	NETDEV_A_DEV_XDP_FEATURES,
 	NETDEV_A_DEV_XDP_ZC_MAX_SEGS,
+	NETDEV_A_DEV_XDP_RX_METADATA_FEATURES,
 
 	__NETDEV_A_DEV_MAX,
 	NETDEV_A_DEV_MAX = (__NETDEV_A_DEV_MAX - 1)
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index bbab9ad9dc5a..77ceea575dc7 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -181,6 +181,7 @@ enum libbpf_tristate {
 #define __ksym __attribute__((section(".ksyms")))
 #define __kptr_untrusted __attribute__((btf_type_tag("kptr_untrusted")))
 #define __kptr __attribute__((btf_type_tag("kptr")))
+#define __percpu_kptr __attribute__((btf_type_tag("percpu_kptr")))
 
 #define bpf_ksym_exists(sym) ({									\
 	_Static_assert(!__builtin_constant_p(!!sym), #sym " should be marked as __weak");	\
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index 3803479dbe10..1c13f8e88833 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -362,8 +362,6 @@ struct pt_regs___arm64 {
 #define __PT_PARM7_REG a6
 #define __PT_PARM8_REG a7
 
-/* riscv does not select ARCH_HAS_SYSCALL_WRAPPER. */
-#define PT_REGS_SYSCALL_REGS(ctx) ctx
 #define __PT_PARM1_SYSCALL_REG __PT_PARM1_REG
 #define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG
 #define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 8484b563b53d..ee95fd379d4d 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -448,6 +448,165 @@ static int btf_parse_type_sec(struct btf *btf)
 	return 0;
 }
 
+static int btf_validate_str(const struct btf *btf, __u32 str_off, const char *what, __u32 type_id)
+{
+	const char *s;
+
+	s = btf__str_by_offset(btf, str_off);
+	if (!s) {
+		pr_warn("btf: type [%u]: invalid %s (string offset %u)\n", type_id, what, str_off);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int btf_validate_id(const struct btf *btf, __u32 id, __u32 ctx_id)
+{
+	const struct btf_type *t;
+
+	t = btf__type_by_id(btf, id);
+	if (!t) {
+		pr_warn("btf: type [%u]: invalid referenced type ID %u\n", ctx_id, id);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int btf_validate_type(const struct btf *btf, const struct btf_type *t, __u32 id)
+{
+	__u32 kind = btf_kind(t);
+	int err, i, n;
+
+	err = btf_validate_str(btf, t->name_off, "type name", id);
+	if (err)
+		return err;
+
+	switch (kind) {
+	case BTF_KIND_UNKN:
+	case BTF_KIND_INT:
+	case BTF_KIND_FWD:
+	case BTF_KIND_FLOAT:
+		break;
+	case BTF_KIND_PTR:
+	case BTF_KIND_TYPEDEF:
+	case BTF_KIND_VOLATILE:
+	case BTF_KIND_CONST:
+	case BTF_KIND_RESTRICT:
+	case BTF_KIND_VAR:
+	case BTF_KIND_DECL_TAG:
+	case BTF_KIND_TYPE_TAG:
+		err = btf_validate_id(btf, t->type, id);
+		if (err)
+			return err;
+		break;
+	case BTF_KIND_ARRAY: {
+		const struct btf_array *a = btf_array(t);
+
+		err = btf_validate_id(btf, a->type, id);
+		err = err ?: btf_validate_id(btf, a->index_type, id);
+		if (err)
+			return err;
+		break;
+	}
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION: {
+		const struct btf_member *m = btf_members(t);
+
+		n = btf_vlen(t);
+		for (i = 0; i < n; i++, m++) {
+			err = btf_validate_str(btf, m->name_off, "field name", id);
+			err = err ?: btf_validate_id(btf, m->type, id);
+			if (err)
+				return err;
+		}
+		break;
+	}
+	case BTF_KIND_ENUM: {
+		const struct btf_enum *m = btf_enum(t);
+
+		n = btf_vlen(t);
+		for (i = 0; i < n; i++, m++) {
+			err = btf_validate_str(btf, m->name_off, "enum name", id);
+			if (err)
+				return err;
+		}
+		break;
+	}
+	case BTF_KIND_ENUM64: {
+		const struct btf_enum64 *m = btf_enum64(t);
+
+		n = btf_vlen(t);
+		for (i = 0; i < n; i++, m++) {
+			err = btf_validate_str(btf, m->name_off, "enum name", id);
+			if (err)
+				return err;
+		}
+		break;
+	}
+	case BTF_KIND_FUNC: {
+		const struct btf_type *ft;
+
+		err = btf_validate_id(btf, t->type, id);
+		if (err)
+			return err;
+		ft = btf__type_by_id(btf, t->type);
+		if (btf_kind(ft) != BTF_KIND_FUNC_PROTO) {
+			pr_warn("btf: type [%u]: referenced type [%u] is not FUNC_PROTO\n", id, t->type);
+			return -EINVAL;
+		}
+		break;
+	}
+	case BTF_KIND_FUNC_PROTO: {
+		const struct btf_param *m = btf_params(t);
+
+		n = btf_vlen(t);
+		for (i = 0; i < n; i++, m++) {
+			err = btf_validate_str(btf, m->name_off, "param name", id);
+			err = err ?: btf_validate_id(btf, m->type, id);
+			if (err)
+				return err;
+		}
+		break;
+	}
+	case BTF_KIND_DATASEC: {
+		const struct btf_var_secinfo *m = btf_var_secinfos(t);
+
+		n = btf_vlen(t);
+		for (i = 0; i < n; i++, m++) {
+			err = btf_validate_id(btf, m->type, id);
+			if (err)
+				return err;
+		}
+		break;
+	}
+	default:
+		pr_warn("btf: type [%u]: unrecognized kind %u\n", id, kind);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/* Validate basic sanity of BTF. It's intentionally less thorough than
+ * kernel's validation and validates only properties of BTF that libbpf relies
+ * on to be correct (e.g., valid type IDs, valid string offsets, etc)
+ */
+static int btf_sanity_check(const struct btf *btf)
+{
+	const struct btf_type *t;
+	__u32 i, n = btf__type_cnt(btf);
+	int err;
+
+	for (i = 1; i < n; i++) {
+		t = btf_type_by_id(btf, i);
+		err = btf_validate_type(btf, t, i);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
 __u32 btf__type_cnt(const struct btf *btf)
 {
 	return btf->start_id + btf->nr_types;
@@ -902,6 +1061,7 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf)
 
 	err = btf_parse_str_sec(btf);
 	err = err ?: btf_parse_type_sec(btf);
+	err = err ?: btf_sanity_check(btf);
 	if (err)
 		goto done;
 
diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c
index 9d0296c1726a..2a158e8a8b7c 100644
--- a/tools/lib/bpf/elf.c
+++ b/tools/lib/bpf/elf.c
@@ -1,5 +1,8 @@
 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
 
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
 #include <libelf.h>
 #include <gelf.h>
 #include <fcntl.h>
@@ -10,6 +13,17 @@
 
 #define STRERR_BUFSIZE  128
 
+/* A SHT_GNU_versym section holds 16-bit words. This bit is set if
+ * the symbol is hidden and can only be seen when referenced using an
+ * explicit version number. This is a GNU extension.
+ */
+#define VERSYM_HIDDEN	0x8000
+
+/* This is the mask for the rest of the data in a word read from a
+ * SHT_GNU_versym section.
+ */
+#define VERSYM_VERSION	0x7fff
+
 int elf_open(const char *binary_path, struct elf_fd *elf_fd)
 {
 	char errmsg[STRERR_BUFSIZE];
@@ -64,13 +78,18 @@ struct elf_sym {
 	const char *name;
 	GElf_Sym sym;
 	GElf_Shdr sh;
+	int ver;
+	bool hidden;
 };
 
 struct elf_sym_iter {
 	Elf *elf;
 	Elf_Data *syms;
+	Elf_Data *versyms;
+	Elf_Data *verdefs;
 	size_t nr_syms;
 	size_t strtabidx;
+	size_t verdef_strtabidx;
 	size_t next_sym_idx;
 	struct elf_sym sym;
 	int st_type;
@@ -111,6 +130,26 @@ static int elf_sym_iter_new(struct elf_sym_iter *iter,
 	iter->nr_syms = iter->syms->d_size / sh.sh_entsize;
 	iter->elf = elf;
 	iter->st_type = st_type;
+
+	/* Version symbol table is meaningful to dynsym only */
+	if (sh_type != SHT_DYNSYM)
+		return 0;
+
+	scn = elf_find_next_scn_by_type(elf, SHT_GNU_versym, NULL);
+	if (!scn)
+		return 0;
+	iter->versyms = elf_getdata(scn, 0);
+
+	scn = elf_find_next_scn_by_type(elf, SHT_GNU_verdef, NULL);
+	if (!scn) {
+		pr_debug("elf: failed to find verdef ELF sections in '%s'\n", binary_path);
+		return -ENOENT;
+	}
+	if (!gelf_getshdr(scn, &sh))
+		return -EINVAL;
+	iter->verdef_strtabidx = sh.sh_link;
+	iter->verdefs = elf_getdata(scn, 0);
+
 	return 0;
 }
 
@@ -119,6 +158,7 @@ static struct elf_sym *elf_sym_iter_next(struct elf_sym_iter *iter)
 	struct elf_sym *ret = &iter->sym;
 	GElf_Sym *sym = &ret->sym;
 	const char *name = NULL;
+	GElf_Versym versym;
 	Elf_Scn *sym_scn;
 	size_t idx;
 
@@ -138,12 +178,80 @@ static struct elf_sym *elf_sym_iter_next(struct elf_sym_iter *iter)
 
 		iter->next_sym_idx = idx + 1;
 		ret->name = name;
+		ret->ver = 0;
+		ret->hidden = false;
+
+		if (iter->versyms) {
+			if (!gelf_getversym(iter->versyms, idx, &versym))
+				continue;
+			ret->ver = versym & VERSYM_VERSION;
+			ret->hidden = versym & VERSYM_HIDDEN;
+		}
 		return ret;
 	}
 
 	return NULL;
 }
 
+static const char *elf_get_vername(struct elf_sym_iter *iter, int ver)
+{
+	GElf_Verdaux verdaux;
+	GElf_Verdef verdef;
+	int offset;
+
+	offset = 0;
+	while (gelf_getverdef(iter->verdefs, offset, &verdef)) {
+		if (verdef.vd_ndx != ver) {
+			if (!verdef.vd_next)
+				break;
+
+			offset += verdef.vd_next;
+			continue;
+		}
+
+		if (!gelf_getverdaux(iter->verdefs, offset + verdef.vd_aux, &verdaux))
+			break;
+
+		return elf_strptr(iter->elf, iter->verdef_strtabidx, verdaux.vda_name);
+
+	}
+	return NULL;
+}
+
+static bool symbol_match(struct elf_sym_iter *iter, int sh_type, struct elf_sym *sym,
+			 const char *name, size_t name_len, const char *lib_ver)
+{
+	const char *ver_name;
+
+	/* Symbols are in forms of func, func@LIB_VER or func@@LIB_VER
+	 * make sure the func part matches the user specified name
+	 */
+	if (strncmp(sym->name, name, name_len) != 0)
+		return false;
+
+	/* ...but we don't want a search for "foo" to match 'foo2" also, so any
+	 * additional characters in sname should be of the form "@@LIB".
+	 */
+	if (sym->name[name_len] != '\0' && sym->name[name_len] != '@')
+		return false;
+
+	/* If user does not specify symbol version, then we got a match */
+	if (!lib_ver)
+		return true;
+
+	/* If user specifies symbol version, for dynamic symbols,
+	 * get version name from ELF verdef section for comparison.
+	 */
+	if (sh_type == SHT_DYNSYM) {
+		ver_name = elf_get_vername(iter, sym->ver);
+		if (!ver_name)
+			return false;
+		return strcmp(ver_name, lib_ver) == 0;
+	}
+
+	/* For normal symbols, it is already in form of func@LIB_VER */
+	return strcmp(sym->name, name) == 0;
+}
 
 /* Transform symbol's virtual address (absolute for binaries and relative
  * for shared libs) into file offset, which is what kernel is expecting
@@ -166,7 +274,8 @@ static unsigned long elf_sym_offset(struct elf_sym *sym)
 long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name)
 {
 	int i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB };
-	bool is_shared_lib, is_name_qualified;
+	const char *at_symbol, *lib_ver;
+	bool is_shared_lib;
 	long ret = -ENOENT;
 	size_t name_len;
 	GElf_Ehdr ehdr;
@@ -179,9 +288,18 @@ long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name)
 	/* for shared lib case, we do not need to calculate relative offset */
 	is_shared_lib = ehdr.e_type == ET_DYN;
 
-	name_len = strlen(name);
-	/* Does name specify "@@LIB"? */
-	is_name_qualified = strstr(name, "@@") != NULL;
+	/* Does name specify "@@LIB_VER" or "@LIB_VER" ? */
+	at_symbol = strchr(name, '@');
+	if (at_symbol) {
+		name_len = at_symbol - name;
+		/* skip second @ if it's @@LIB_VER case */
+		if (at_symbol[1] == '@')
+			at_symbol++;
+		lib_ver = at_symbol + 1;
+	} else {
+		name_len = strlen(name);
+		lib_ver = NULL;
+	}
 
 	/* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if
 	 * a binary is stripped, it may only have SHT_DYNSYM, and a fully-statically
@@ -201,20 +319,17 @@ long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name)
 			goto out;
 
 		while ((sym = elf_sym_iter_next(&iter))) {
-			/* User can specify func, func@@LIB or func@@LIB_VERSION. */
-			if (strncmp(sym->name, name, name_len) != 0)
-				continue;
-			/* ...but we don't want a search for "foo" to match 'foo2" also, so any
-			 * additional characters in sname should be of the form "@@LIB".
-			 */
-			if (!is_name_qualified && sym->name[name_len] != '\0' && sym->name[name_len] != '@')
+			if (!symbol_match(&iter, sh_types[i], sym, name, name_len, lib_ver))
 				continue;
 
 			cur_bind = GELF_ST_BIND(sym->sym.st_info);
 
 			if (ret > 0) {
 				/* handle multiple matches */
-				if (last_bind != STB_WEAK && cur_bind != STB_WEAK) {
+				if (elf_sym_offset(sym) == ret) {
+					/* same offset, no problem */
+					continue;
+				} else if (last_bind != STB_WEAK && cur_bind != STB_WEAK) {
 					/* Only accept one non-weak bind. */
 					pr_warn("elf: ambiguous match for '%s', '%s' in '%s'\n",
 						sym->name, name, binary_path);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 96ff1aa4bf6a..a295f6acf98f 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -82,17 +82,22 @@ static const char * const attach_type_name[] = {
 	[BPF_CGROUP_INET6_BIND]		= "cgroup_inet6_bind",
 	[BPF_CGROUP_INET4_CONNECT]	= "cgroup_inet4_connect",
 	[BPF_CGROUP_INET6_CONNECT]	= "cgroup_inet6_connect",
+	[BPF_CGROUP_UNIX_CONNECT]       = "cgroup_unix_connect",
 	[BPF_CGROUP_INET4_POST_BIND]	= "cgroup_inet4_post_bind",
 	[BPF_CGROUP_INET6_POST_BIND]	= "cgroup_inet6_post_bind",
 	[BPF_CGROUP_INET4_GETPEERNAME]	= "cgroup_inet4_getpeername",
 	[BPF_CGROUP_INET6_GETPEERNAME]	= "cgroup_inet6_getpeername",
+	[BPF_CGROUP_UNIX_GETPEERNAME]	= "cgroup_unix_getpeername",
 	[BPF_CGROUP_INET4_GETSOCKNAME]	= "cgroup_inet4_getsockname",
 	[BPF_CGROUP_INET6_GETSOCKNAME]	= "cgroup_inet6_getsockname",
+	[BPF_CGROUP_UNIX_GETSOCKNAME]	= "cgroup_unix_getsockname",
 	[BPF_CGROUP_UDP4_SENDMSG]	= "cgroup_udp4_sendmsg",
 	[BPF_CGROUP_UDP6_SENDMSG]	= "cgroup_udp6_sendmsg",
+	[BPF_CGROUP_UNIX_SENDMSG]	= "cgroup_unix_sendmsg",
 	[BPF_CGROUP_SYSCTL]		= "cgroup_sysctl",
 	[BPF_CGROUP_UDP4_RECVMSG]	= "cgroup_udp4_recvmsg",
 	[BPF_CGROUP_UDP6_RECVMSG]	= "cgroup_udp6_recvmsg",
+	[BPF_CGROUP_UNIX_RECVMSG]	= "cgroup_unix_recvmsg",
 	[BPF_CGROUP_GETSOCKOPT]		= "cgroup_getsockopt",
 	[BPF_CGROUP_SETSOCKOPT]		= "cgroup_setsockopt",
 	[BPF_SK_SKB_STREAM_PARSER]	= "sk_skb_stream_parser",
@@ -436,9 +441,11 @@ struct bpf_program {
 	int fd;
 	bool autoload;
 	bool autoattach;
+	bool sym_global;
 	bool mark_btf_static;
 	enum bpf_prog_type type;
 	enum bpf_attach_type expected_attach_type;
+	int exception_cb_idx;
 
 	int prog_ifindex;
 	__u32 attach_btf_obj_fd;
@@ -765,6 +772,7 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
 
 	prog->type = BPF_PROG_TYPE_UNSPEC;
 	prog->fd = -1;
+	prog->exception_cb_idx = -1;
 
 	/* libbpf's convention for SEC("?abc...") is that it's just like
 	 * SEC("abc...") but the corresponding bpf_program starts out with
@@ -871,14 +879,16 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
 		if (err)
 			return err;
 
+		if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL)
+			prog->sym_global = true;
+
 		/* if function is a global/weak symbol, but has restricted
 		 * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF FUNC
 		 * as static to enable more permissive BPF verification mode
 		 * with more outside context available to BPF verifier
 		 */
-		if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL
-		    && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN
-			|| ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL))
+		if (prog->sym_global && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN
+		    || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL))
 			prog->mark_btf_static = true;
 
 		nr_progs++;
@@ -3142,6 +3152,86 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
 		}
 	}
 
+	if (!kernel_supports(obj, FEAT_BTF_DECL_TAG))
+		goto skip_exception_cb;
+	for (i = 0; i < obj->nr_programs; i++) {
+		struct bpf_program *prog = &obj->programs[i];
+		int j, k, n;
+
+		if (prog_is_subprog(obj, prog))
+			continue;
+		n = btf__type_cnt(obj->btf);
+		for (j = 1; j < n; j++) {
+			const char *str = "exception_callback:", *name;
+			size_t len = strlen(str);
+			struct btf_type *t;
+
+			t = btf_type_by_id(obj->btf, j);
+			if (!btf_is_decl_tag(t) || btf_decl_tag(t)->component_idx != -1)
+				continue;
+
+			name = btf__str_by_offset(obj->btf, t->name_off);
+			if (strncmp(name, str, len))
+				continue;
+
+			t = btf_type_by_id(obj->btf, t->type);
+			if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) {
+				pr_warn("prog '%s': exception_callback:<value> decl tag not applied to the main program\n",
+					prog->name);
+				return -EINVAL;
+			}
+			if (strcmp(prog->name, btf__str_by_offset(obj->btf, t->name_off)))
+				continue;
+			/* Multiple callbacks are specified for the same prog,
+			 * the verifier will eventually return an error for this
+			 * case, hence simply skip appending a subprog.
+			 */
+			if (prog->exception_cb_idx >= 0) {
+				prog->exception_cb_idx = -1;
+				break;
+			}
+
+			name += len;
+			if (str_is_empty(name)) {
+				pr_warn("prog '%s': exception_callback:<value> decl tag contains empty value\n",
+					prog->name);
+				return -EINVAL;
+			}
+
+			for (k = 0; k < obj->nr_programs; k++) {
+				struct bpf_program *subprog = &obj->programs[k];
+
+				if (!prog_is_subprog(obj, subprog))
+					continue;
+				if (strcmp(name, subprog->name))
+					continue;
+				/* Enforce non-hidden, as from verifier point of
+				 * view it expects global functions, whereas the
+				 * mark_btf_static fixes up linkage as static.
+				 */
+				if (!subprog->sym_global || subprog->mark_btf_static) {
+					pr_warn("prog '%s': exception callback %s must be a global non-hidden function\n",
+						prog->name, subprog->name);
+					return -EINVAL;
+				}
+				/* Let's see if we already saw a static exception callback with the same name */
+				if (prog->exception_cb_idx >= 0) {
+					pr_warn("prog '%s': multiple subprogs with same name as exception callback '%s'\n",
+					        prog->name, subprog->name);
+					return -EINVAL;
+				}
+				prog->exception_cb_idx = k;
+				break;
+			}
+
+			if (prog->exception_cb_idx >= 0)
+				continue;
+			pr_warn("prog '%s': cannot find exception callback '%s'\n", prog->name, name);
+			return -ENOENT;
+		}
+	}
+skip_exception_cb:
+
 	sanitize = btf_needs_sanitization(obj);
 	if (sanitize) {
 		const void *raw_data;
@@ -6235,13 +6325,45 @@ static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_progra
 }
 
 static int
+bpf_object__append_subprog_code(struct bpf_object *obj, struct bpf_program *main_prog,
+				struct bpf_program *subprog)
+{
+       struct bpf_insn *insns;
+       size_t new_cnt;
+       int err;
+
+       subprog->sub_insn_off = main_prog->insns_cnt;
+
+       new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
+       insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
+       if (!insns) {
+               pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
+               return -ENOMEM;
+       }
+       main_prog->insns = insns;
+       main_prog->insns_cnt = new_cnt;
+
+       memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
+              subprog->insns_cnt * sizeof(*insns));
+
+       pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
+                main_prog->name, subprog->insns_cnt, subprog->name);
+
+       /* The subprog insns are now appended. Append its relos too. */
+       err = append_subprog_relos(main_prog, subprog);
+       if (err)
+               return err;
+       return 0;
+}
+
+static int
 bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
 		       struct bpf_program *prog)
 {
-	size_t sub_insn_idx, insn_idx, new_cnt;
+	size_t sub_insn_idx, insn_idx;
 	struct bpf_program *subprog;
-	struct bpf_insn *insns, *insn;
 	struct reloc_desc *relo;
+	struct bpf_insn *insn;
 	int err;
 
 	err = reloc_prog_func_and_line_info(obj, main_prog, prog);
@@ -6316,25 +6438,7 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
 		 *   and relocate.
 		 */
 		if (subprog->sub_insn_off == 0) {
-			subprog->sub_insn_off = main_prog->insns_cnt;
-
-			new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
-			insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
-			if (!insns) {
-				pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
-				return -ENOMEM;
-			}
-			main_prog->insns = insns;
-			main_prog->insns_cnt = new_cnt;
-
-			memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
-			       subprog->insns_cnt * sizeof(*insns));
-
-			pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
-				 main_prog->name, subprog->insns_cnt, subprog->name);
-
-			/* The subprog insns are now appended. Append its relos too. */
-			err = append_subprog_relos(main_prog, subprog);
+			err = bpf_object__append_subprog_code(obj, main_prog, subprog);
 			if (err)
 				return err;
 			err = bpf_object__reloc_code(obj, main_prog, subprog);
@@ -6568,6 +6672,25 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
 				prog->name, err);
 			return err;
 		}
+
+		/* Now, also append exception callback if it has not been done already. */
+		if (prog->exception_cb_idx >= 0) {
+			struct bpf_program *subprog = &obj->programs[prog->exception_cb_idx];
+
+			/* Calling exception callback directly is disallowed, which the
+			 * verifier will reject later. In case it was processed already,
+			 * we can skip this step, otherwise for all other valid cases we
+			 * have to append exception callback now.
+			 */
+			if (subprog->sub_insn_off == 0) {
+				err = bpf_object__append_subprog_code(obj, prog, subprog);
+				if (err)
+					return err;
+				err = bpf_object__reloc_code(obj, prog, subprog);
+				if (err)
+					return err;
+			}
+		}
 	}
 	/* Process data relos for main programs */
 	for (i = 0; i < obj->nr_programs; i++) {
@@ -8842,14 +8965,19 @@ static const struct bpf_sec_def section_defs[] = {
 	SEC_DEF("cgroup/bind6",		CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE),
 	SEC_DEF("cgroup/connect4",	CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE),
 	SEC_DEF("cgroup/connect6",	CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE),
+	SEC_DEF("cgroup/connect_unix",	CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_CONNECT, SEC_ATTACHABLE),
 	SEC_DEF("cgroup/sendmsg4",	CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE),
 	SEC_DEF("cgroup/sendmsg6",	CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE),
+	SEC_DEF("cgroup/sendmsg_unix",	CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_SENDMSG, SEC_ATTACHABLE),
 	SEC_DEF("cgroup/recvmsg4",	CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE),
 	SEC_DEF("cgroup/recvmsg6",	CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE),
+	SEC_DEF("cgroup/recvmsg_unix",	CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_RECVMSG, SEC_ATTACHABLE),
 	SEC_DEF("cgroup/getpeername4",	CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE),
 	SEC_DEF("cgroup/getpeername6",	CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE),
+	SEC_DEF("cgroup/getpeername_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETPEERNAME, SEC_ATTACHABLE),
 	SEC_DEF("cgroup/getsockname4",	CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE),
 	SEC_DEF("cgroup/getsockname6",	CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE),
+	SEC_DEF("cgroup/getsockname_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETSOCKNAME, SEC_ATTACHABLE),
 	SEC_DEF("cgroup/sysctl",	CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE),
 	SEC_DEF("cgroup/getsockopt",	CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE),
 	SEC_DEF("cgroup/setsockopt",	CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE),
@@ -10996,7 +11124,7 @@ static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, stru
 
 	*link = NULL;
 
-	n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%ms",
+	n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]",
 		   &probe_type, &binary_path, &func_name);
 	switch (n) {
 	case 1:
@@ -11506,14 +11634,14 @@ err_out:
 static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
 {
 	DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts);
-	char *probe_type = NULL, *binary_path = NULL, *func_name = NULL;
-	int n, ret = -EINVAL;
+	char *probe_type = NULL, *binary_path = NULL, *func_name = NULL, *func_off;
+	int n, c, ret = -EINVAL;
 	long offset = 0;
 
 	*link = NULL;
 
-	n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[a-zA-Z0-9_.]+%li",
-		   &probe_type, &binary_path, &func_name, &offset);
+	n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]",
+		   &probe_type, &binary_path, &func_name);
 	switch (n) {
 	case 1:
 		/* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */
@@ -11524,7 +11652,17 @@ static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf
 			prog->name, prog->sec_name);
 		break;
 	case 3:
-	case 4:
+		/* check if user specifies `+offset`, if yes, this should be
+		 * the last part of the string, make sure sscanf read to EOL
+		 */
+		func_off = strrchr(func_name, '+');
+		if (func_off) {
+			n = sscanf(func_off, "+%li%n", &offset, &c);
+			if (n == 1 && *(func_off + c) == '\0')
+				func_off[0] = '\0';
+			else
+				offset = 0;
+		}
 		opts.retprobe = strcmp(probe_type, "uretprobe") == 0 ||
 				strcmp(probe_type, "uretprobe.s") == 0;
 		if (opts.retprobe && offset != 0) {
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 0e52621cba43..475378438545 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -1229,6 +1229,7 @@ LIBBPF_API int bpf_tc_query(const struct bpf_tc_hook *hook,
 
 /* Ring buffer APIs */
 struct ring_buffer;
+struct ring;
 struct user_ring_buffer;
 
 typedef int (*ring_buffer_sample_fn)(void *ctx, void *data, size_t size);
@@ -1249,6 +1250,78 @@ LIBBPF_API int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms);
 LIBBPF_API int ring_buffer__consume(struct ring_buffer *rb);
 LIBBPF_API int ring_buffer__epoll_fd(const struct ring_buffer *rb);
 
+/**
+ * @brief **ring_buffer__ring()** returns the ringbuffer object inside a given
+ * ringbuffer manager representing a single BPF_MAP_TYPE_RINGBUF map instance.
+ *
+ * @param rb A ringbuffer manager object.
+ * @param idx An index into the ringbuffers contained within the ringbuffer
+ * manager object. The index is 0-based and corresponds to the order in which
+ * ring_buffer__add was called.
+ * @return A ringbuffer object on success; NULL and errno set if the index is
+ * invalid.
+ */
+LIBBPF_API struct ring *ring_buffer__ring(struct ring_buffer *rb,
+					  unsigned int idx);
+
+/**
+ * @brief **ring__consumer_pos()** returns the current consumer position in the
+ * given ringbuffer.
+ *
+ * @param r A ringbuffer object.
+ * @return The current consumer position.
+ */
+LIBBPF_API unsigned long ring__consumer_pos(const struct ring *r);
+
+/**
+ * @brief **ring__producer_pos()** returns the current producer position in the
+ * given ringbuffer.
+ *
+ * @param r A ringbuffer object.
+ * @return The current producer position.
+ */
+LIBBPF_API unsigned long ring__producer_pos(const struct ring *r);
+
+/**
+ * @brief **ring__avail_data_size()** returns the number of bytes in the
+ * ringbuffer not yet consumed. This has no locking associated with it, so it
+ * can be inaccurate if operations are ongoing while this is called. However, it
+ * should still show the correct trend over the long-term.
+ *
+ * @param r A ringbuffer object.
+ * @return The number of bytes not yet consumed.
+ */
+LIBBPF_API size_t ring__avail_data_size(const struct ring *r);
+
+/**
+ * @brief **ring__size()** returns the total size of the ringbuffer's map data
+ * area (excluding special producer/consumer pages). Effectively this gives the
+ * amount of usable bytes of data inside the ringbuffer.
+ *
+ * @param r A ringbuffer object.
+ * @return The total size of the ringbuffer map data area.
+ */
+LIBBPF_API size_t ring__size(const struct ring *r);
+
+/**
+ * @brief **ring__map_fd()** returns the file descriptor underlying the given
+ * ringbuffer.
+ *
+ * @param r A ringbuffer object.
+ * @return The underlying ringbuffer file descriptor
+ */
+LIBBPF_API int ring__map_fd(const struct ring *r);
+
+/**
+ * @brief **ring__consume()** consumes available ringbuffer data without event
+ * polling.
+ *
+ * @param r A ringbuffer object.
+ * @return The number of records consumed (or INT_MAX, whichever is less), or
+ * a negative number if any of the callbacks return an error.
+ */
+LIBBPF_API int ring__consume(struct ring *r);
+
 struct user_ring_buffer_opts {
 	size_t sz; /* size of this struct, for forward/backward compatibility */
 };
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 57712321490f..cc973b678a39 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -400,4 +400,11 @@ LIBBPF_1.3.0 {
 		bpf_program__attach_netfilter;
 		bpf_program__attach_tcx;
 		bpf_program__attach_uprobe_multi;
+		ring__avail_data_size;
+		ring__consume;
+		ring__consumer_pos;
+		ring__map_fd;
+		ring__producer_pos;
+		ring__size;
+		ring_buffer__ring;
 } LIBBPF_1.2.0;
diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c
index 02199364db13..aacb64278a01 100644
--- a/tools/lib/bpf/ringbuf.c
+++ b/tools/lib/bpf/ringbuf.c
@@ -34,7 +34,7 @@ struct ring {
 
 struct ring_buffer {
 	struct epoll_event *events;
-	struct ring *rings;
+	struct ring **rings;
 	size_t page_size;
 	int epoll_fd;
 	int ring_cnt;
@@ -57,7 +57,7 @@ struct ringbuf_hdr {
 	__u32 pad;
 };
 
-static void ringbuf_unmap_ring(struct ring_buffer *rb, struct ring *r)
+static void ringbuf_free_ring(struct ring_buffer *rb, struct ring *r)
 {
 	if (r->consumer_pos) {
 		munmap(r->consumer_pos, rb->page_size);
@@ -67,6 +67,8 @@ static void ringbuf_unmap_ring(struct ring_buffer *rb, struct ring *r)
 		munmap(r->producer_pos, rb->page_size + 2 * (r->mask + 1));
 		r->producer_pos = NULL;
 	}
+
+	free(r);
 }
 
 /* Add extra RINGBUF maps to this ring buffer manager */
@@ -107,8 +109,10 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
 		return libbpf_err(-ENOMEM);
 	rb->events = tmp;
 
-	r = &rb->rings[rb->ring_cnt];
-	memset(r, 0, sizeof(*r));
+	r = calloc(1, sizeof(*r));
+	if (!r)
+		return libbpf_err(-ENOMEM);
+	rb->rings[rb->ring_cnt] = r;
 
 	r->map_fd = map_fd;
 	r->sample_cb = sample_cb;
@@ -121,7 +125,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
 		err = -errno;
 		pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %d\n",
 			map_fd, err);
-		return libbpf_err(err);
+		goto err_out;
 	}
 	r->consumer_pos = tmp;
 
@@ -131,16 +135,16 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
 	 */
 	mmap_sz = rb->page_size + 2 * (__u64)info.max_entries;
 	if (mmap_sz != (__u64)(size_t)mmap_sz) {
+		err = -E2BIG;
 		pr_warn("ringbuf: ring buffer size (%u) is too big\n", info.max_entries);
-		return libbpf_err(-E2BIG);
+		goto err_out;
 	}
 	tmp = mmap(NULL, (size_t)mmap_sz, PROT_READ, MAP_SHARED, map_fd, rb->page_size);
 	if (tmp == MAP_FAILED) {
 		err = -errno;
-		ringbuf_unmap_ring(rb, r);
 		pr_warn("ringbuf: failed to mmap data pages for map fd=%d: %d\n",
 			map_fd, err);
-		return libbpf_err(err);
+		goto err_out;
 	}
 	r->producer_pos = tmp;
 	r->data = tmp + rb->page_size;
@@ -152,14 +156,17 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
 	e->data.fd = rb->ring_cnt;
 	if (epoll_ctl(rb->epoll_fd, EPOLL_CTL_ADD, map_fd, e) < 0) {
 		err = -errno;
-		ringbuf_unmap_ring(rb, r);
 		pr_warn("ringbuf: failed to epoll add map fd=%d: %d\n",
 			map_fd, err);
-		return libbpf_err(err);
+		goto err_out;
 	}
 
 	rb->ring_cnt++;
 	return 0;
+
+err_out:
+	ringbuf_free_ring(rb, r);
+	return libbpf_err(err);
 }
 
 void ring_buffer__free(struct ring_buffer *rb)
@@ -170,7 +177,7 @@ void ring_buffer__free(struct ring_buffer *rb)
 		return;
 
 	for (i = 0; i < rb->ring_cnt; ++i)
-		ringbuf_unmap_ring(rb, &rb->rings[i]);
+		ringbuf_free_ring(rb, rb->rings[i]);
 	if (rb->epoll_fd >= 0)
 		close(rb->epoll_fd);
 
@@ -278,7 +285,7 @@ int ring_buffer__consume(struct ring_buffer *rb)
 	int i;
 
 	for (i = 0; i < rb->ring_cnt; i++) {
-		struct ring *ring = &rb->rings[i];
+		struct ring *ring = rb->rings[i];
 
 		err = ringbuf_process_ring(ring);
 		if (err < 0)
@@ -305,7 +312,7 @@ int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms)
 
 	for (i = 0; i < cnt; i++) {
 		__u32 ring_id = rb->events[i].data.fd;
-		struct ring *ring = &rb->rings[ring_id];
+		struct ring *ring = rb->rings[ring_id];
 
 		err = ringbuf_process_ring(ring);
 		if (err < 0)
@@ -323,6 +330,58 @@ int ring_buffer__epoll_fd(const struct ring_buffer *rb)
 	return rb->epoll_fd;
 }
 
+struct ring *ring_buffer__ring(struct ring_buffer *rb, unsigned int idx)
+{
+	if (idx >= rb->ring_cnt)
+		return errno = ERANGE, NULL;
+
+	return rb->rings[idx];
+}
+
+unsigned long ring__consumer_pos(const struct ring *r)
+{
+	/* Synchronizes with smp_store_release() in ringbuf_process_ring(). */
+	return smp_load_acquire(r->consumer_pos);
+}
+
+unsigned long ring__producer_pos(const struct ring *r)
+{
+	/* Synchronizes with smp_store_release() in __bpf_ringbuf_reserve() in
+	 * the kernel.
+	 */
+	return smp_load_acquire(r->producer_pos);
+}
+
+size_t ring__avail_data_size(const struct ring *r)
+{
+	unsigned long cons_pos, prod_pos;
+
+	cons_pos = ring__consumer_pos(r);
+	prod_pos = ring__producer_pos(r);
+	return prod_pos - cons_pos;
+}
+
+size_t ring__size(const struct ring *r)
+{
+	return r->mask + 1;
+}
+
+int ring__map_fd(const struct ring *r)
+{
+	return r->map_fd;
+}
+
+int ring__consume(struct ring *r)
+{
+	int64_t res;
+
+	res = ringbuf_process_ring(r);
+	if (res < 0)
+		return libbpf_err(res);
+
+	return res > INT_MAX ? INT_MAX : res;
+}
+
 static void user_ringbuf_unmap_ring(struct user_ring_buffer *rb)
 {
 	if (rb->consumer_pos) {
diff --git a/tools/net/ynl/generated/netdev-user.c b/tools/net/ynl/generated/netdev-user.c
index 68b408ca0f7f..b5ffe8cd1144 100644
--- a/tools/net/ynl/generated/netdev-user.c
+++ b/tools/net/ynl/generated/netdev-user.c
@@ -45,12 +45,26 @@ const char *netdev_xdp_act_str(enum netdev_xdp_act value)
 	return netdev_xdp_act_strmap[value];
 }
 
+static const char * const netdev_xdp_rx_metadata_strmap[] = {
+	[0] = "timestamp",
+	[1] = "hash",
+};
+
+const char *netdev_xdp_rx_metadata_str(enum netdev_xdp_rx_metadata value)
+{
+	value = ffs(value) - 1;
+	if (value < 0 || value >= (int)MNL_ARRAY_SIZE(netdev_xdp_rx_metadata_strmap))
+		return NULL;
+	return netdev_xdp_rx_metadata_strmap[value];
+}
+
 /* Policies */
 struct ynl_policy_attr netdev_dev_policy[NETDEV_A_DEV_MAX + 1] = {
 	[NETDEV_A_DEV_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, },
 	[NETDEV_A_DEV_PAD] = { .name = "pad", .type = YNL_PT_IGNORE, },
 	[NETDEV_A_DEV_XDP_FEATURES] = { .name = "xdp-features", .type = YNL_PT_U64, },
 	[NETDEV_A_DEV_XDP_ZC_MAX_SEGS] = { .name = "xdp-zc-max-segs", .type = YNL_PT_U32, },
+	[NETDEV_A_DEV_XDP_RX_METADATA_FEATURES] = { .name = "xdp-rx-metadata-features", .type = YNL_PT_U64, },
 };
 
 struct ynl_policy_nest netdev_dev_nest = {
@@ -97,6 +111,11 @@ int netdev_dev_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
 				return MNL_CB_ERROR;
 			dst->_present.xdp_zc_max_segs = 1;
 			dst->xdp_zc_max_segs = mnl_attr_get_u32(attr);
+		} else if (type == NETDEV_A_DEV_XDP_RX_METADATA_FEATURES) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.xdp_rx_metadata_features = 1;
+			dst->xdp_rx_metadata_features = mnl_attr_get_u64(attr);
 		}
 	}
 
diff --git a/tools/net/ynl/generated/netdev-user.h b/tools/net/ynl/generated/netdev-user.h
index 0952d3261f4d..b4351ff34595 100644
--- a/tools/net/ynl/generated/netdev-user.h
+++ b/tools/net/ynl/generated/netdev-user.h
@@ -18,6 +18,7 @@ extern const struct ynl_family ynl_netdev_family;
 /* Enums */
 const char *netdev_op_str(int op);
 const char *netdev_xdp_act_str(enum netdev_xdp_act value);
+const char *netdev_xdp_rx_metadata_str(enum netdev_xdp_rx_metadata value);
 
 /* Common nested types */
 /* ============== NETDEV_CMD_DEV_GET ============== */
@@ -48,11 +49,13 @@ struct netdev_dev_get_rsp {
 		__u32 ifindex:1;
 		__u32 xdp_features:1;
 		__u32 xdp_zc_max_segs:1;
+		__u32 xdp_rx_metadata_features:1;
 	} _present;
 
 	__u32 ifindex;
 	__u64 xdp_features;
 	__u32 xdp_zc_max_segs;
+	__u64 xdp_rx_metadata_features;
 };
 
 void netdev_dev_get_rsp_free(struct netdev_dev_get_rsp *rsp);
diff --git a/tools/net/ynl/samples/Makefile b/tools/net/ynl/samples/Makefile
index f2db8bb78309..32abbc0af39e 100644
--- a/tools/net/ynl/samples/Makefile
+++ b/tools/net/ynl/samples/Makefile
@@ -4,7 +4,7 @@ include ../Makefile.deps
 
 CC=gcc
 CFLAGS=-std=gnu11 -O2 -W -Wall -Wextra -Wno-unused-parameter -Wshadow \
-	-I../lib/ -I../generated/ -idirafter $(UAPI_PATH)
+	-I../../../include/uapi -I../lib/ -I../generated/ -idirafter $(UAPI_PATH)
 ifeq ("$(DEBUG)","1")
   CFLAGS += -g -fsanitize=address -fsanitize=leak -static-libasan
 endif
diff --git a/tools/net/ynl/samples/netdev.c b/tools/net/ynl/samples/netdev.c
index 06433400dddd..b828225daad0 100644
--- a/tools/net/ynl/samples/netdev.c
+++ b/tools/net/ynl/samples/netdev.c
@@ -32,12 +32,18 @@ static void netdev_print_device(struct netdev_dev_get_rsp *d, unsigned int op)
 	if (!d->_present.xdp_features)
 		return;
 
-	printf("%llx:", d->xdp_features);
+	printf("xdp-features (%llx):", d->xdp_features);
 	for (int i = 0; d->xdp_features > 1U << i; i++) {
 		if (d->xdp_features & (1U << i))
 			printf(" %s", netdev_xdp_act_str(1 << i));
 	}
 
+	printf(" xdp-rx-metadata-features (%llx):", d->xdp_rx_metadata_features);
+	for (int i = 0; d->xdp_rx_metadata_features > 1U << i; i++) {
+		if (d->xdp_rx_metadata_features & (1U << i))
+			printf(" %s", netdev_xdp_rx_metadata_str(1 << i));
+	}
+
 	printf(" xdp-zc-max-segs=%u", d->xdp_zc_max_segs);
 
 	name = netdev_op_str(op);
diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64
index 3babaf3eee5c..5c2cc7e8c5d0 100644
--- a/tools/testing/selftests/bpf/DENYLIST.aarch64
+++ b/tools/testing/selftests/bpf/DENYLIST.aarch64
@@ -1,5 +1,6 @@
 bpf_cookie/multi_kprobe_attach_api               # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3
 bpf_cookie/multi_kprobe_link_api                 # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3
+exceptions					 # JIT does not support calling kfunc bpf_throw: -524
 fexit_sleep                                      # The test never returns. The remaining tests cannot start.
 kprobe_multi_bench_attach                        # needs CONFIG_FPROBE
 kprobe_multi_test                                # needs CONFIG_FPROBE
@@ -9,3 +10,4 @@ fexit_test/fexit_many_args                       # fexit_many_args:FAIL:fexit_ma
 fill_link_info/kprobe_multi_link_info            # bpf_program__attach_kprobe_multi_opts unexpected error: -95
 fill_link_info/kretprobe_multi_link_info         # bpf_program__attach_kprobe_multi_opts unexpected error: -95
 fill_link_info/kprobe_multi_invalid_ubuff        # bpf_program__attach_kprobe_multi_opts unexpected error: -95
+missed/kprobe_recursion                          # missed_kprobe_recursion__attach unexpected error: -95 (errno 95)
diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index 5061d9e24c16..1a63996c0304 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -1,29 +1,5 @@
 # TEMPORARY
 # Alphabetical order
-bloom_filter_map                         # failed to find kernel BTF type ID of '__x64_sys_getpgid': -3                (?)
-bpf_cookie                               # failed to open_and_load program: -524 (trampoline)
-bpf_loop                                 # attaches to __x64_sys_nanosleep
-cgrp_local_storage                       # prog_attach unexpected error: -524                                          (trampoline)
-dynptr/test_dynptr_skb_data
-dynptr/test_skb_readonly
-fexit_sleep                              # fexit_skel_load fexit skeleton failed                                       (trampoline)
+exceptions				 # JIT does not support calling kfunc bpf_throw				       (exceptions)
 get_stack_raw_tp                         # user_stack corrupted user stack                                             (no backchain userspace)
-iters/testmod_seq*                       # s390x doesn't support kfuncs in modules yet
-kprobe_multi_bench_attach                # bpf_program__attach_kprobe_multi_opts unexpected error: -95
-kprobe_multi_test                        # relies on fentry
-ksyms_btf/weak_ksyms*                    # test_ksyms_weak__open_and_load unexpected error: -22                        (kfunc)
-ksyms_module                             # test_ksyms_module__open_and_load unexpected error: -9                       (?)
-ksyms_module_libbpf                      # JIT does not support calling kernel function                                (kfunc)
-ksyms_module_lskel                       # test_ksyms_module_lskel__open_and_load unexpected error: -9                 (?)
-module_attach                            # skel_attach skeleton attach failed: -524                                    (trampoline)
-ringbuf                                  # skel_load skeleton load failed                                              (?)
 stacktrace_build_id                      # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2                   (?)
-test_lsm                                 # attach unexpected error: -524                                               (trampoline)
-trace_printk                             # trace_printk__load unexpected error: -2 (errno 2)                           (?)
-trace_vprintk                            # trace_vprintk__open_and_load unexpected error: -9                           (?)
-unpriv_bpf_disabled                      # fentry
-user_ringbuf                             # failed to find kernel BTF type ID of '__s390x_sys_prctl': -3                (?)
-verif_stats                              # trace_vprintk__open_and_load unexpected error: -9                           (?)
-xdp_bonding                              # failed to auto-attach program 'trace_on_entry': -524                        (trampoline)
-xdp_metadata                             # JIT does not support calling kernel function                                (kfunc)
-test_task_under_cgroup                   # JIT does not support calling kernel function                                (kfunc)
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index caede9b574cb..4225f975fce3 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -27,7 +27,11 @@ endif
 BPF_GCC		?= $(shell command -v bpf-gcc;)
 SAN_CFLAGS	?=
 SAN_LDFLAGS	?= $(SAN_CFLAGS)
-CFLAGS += -g -O0 -rdynamic -Wall -Werror $(GENFLAGS) $(SAN_CFLAGS)	\
+RELEASE		?=
+OPT_FLAGS	?= $(if $(RELEASE),-O2,-O0)
+CFLAGS += -g $(OPT_FLAGS) -rdynamic					\
+	  -Wall -Werror 						\
+	  $(GENFLAGS) $(SAN_CFLAGS)					\
 	  -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR)		\
 	  -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT)
 LDFLAGS += $(SAN_LDFLAGS)
@@ -104,7 +108,7 @@ TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
 	xskxceiver xdp_redirect_multi xdp_synproxy veristat xdp_hw_metadata \
 	xdp_features
 
-TEST_GEN_FILES += liburandom_read.so urandom_read sign-file
+TEST_GEN_FILES += liburandom_read.so urandom_read sign-file uprobe_multi
 
 # Emit succinct information message describing current building step
 # $1 - generic step name (e.g., CC, LINK, etc);
@@ -188,7 +192,7 @@ $(OUTPUT)/%:%.c
 	$(Q)$(LINK.c) $^ $(LDLIBS) -o $@
 
 # LLVM's ld.lld doesn't support all the architectures, so use it only on x86
-ifeq ($(SRCARCH),x86)
+ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 riscv))
 LLD := lld
 else
 LLD := ld
@@ -196,17 +200,20 @@ endif
 
 # Filter out -static for liburandom_read.so and its dependent targets so that static builds
 # do not fail. Static builds leave urandom_read relying on system-wide shared libraries.
-$(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c
+$(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c liburandom_read.map
 	$(call msg,LIB,,$@)
-	$(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS))   \
-		     $^ $(filter-out -static,$(LDLIBS))	     \
+	$(Q)$(CLANG) $(CLANG_TARGET_ARCH) \
+		     $(filter-out -static,$(CFLAGS) $(LDFLAGS)) \
+		     $(filter %.c,$^) $(filter-out -static,$(LDLIBS)) \
 		     -fuse-ld=$(LLD) -Wl,-znoseparate-code -Wl,--build-id=sha1 \
+		     -Wl,--version-script=liburandom_read.map \
 		     -fPIC -shared -o $@
 
 $(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_read.so
 	$(call msg,BINARY,,$@)
-	$(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \
-		     -lurandom_read $(filter-out -static,$(LDLIBS)) -L$(OUTPUT)  \
+	$(Q)$(CLANG) $(CLANG_TARGET_ARCH) \
+		     $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \
+		     -lurandom_read $(filter-out -static,$(LDLIBS)) -L$(OUTPUT) \
 		     -fuse-ld=$(LLD) -Wl,-znoseparate-code -Wl,--build-id=sha1 \
 		     -Wl,-rpath=. -o $@
 
@@ -238,7 +245,7 @@ $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(RUNQSLOWER_OUTPUT)
 		    BPFTOOL_OUTPUT=$(HOST_BUILD_DIR)/bpftool/		       \
 		    BPFOBJ_OUTPUT=$(BUILD_DIR)/libbpf			       \
 		    BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR)		       \
-		    EXTRA_CFLAGS='-g -O0 $(SAN_CFLAGS)'			       \
+		    EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS)'	       \
 		    EXTRA_LDFLAGS='$(SAN_LDFLAGS)' &&			       \
 		    cp $(RUNQSLOWER_OUTPUT)runqslower $@
 
@@ -276,7 +283,7 @@ $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile)    \
 		    $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/bpftool
 	$(Q)$(MAKE) $(submake_extras)  -C $(BPFTOOLDIR)			       \
 		    ARCH= CROSS_COMPILE= CC="$(HOSTCC)" LD="$(HOSTLD)" 	       \
-		    EXTRA_CFLAGS='-g -O0'				       \
+		    EXTRA_CFLAGS='-g $(OPT_FLAGS)'			       \
 		    OUTPUT=$(HOST_BUILD_DIR)/bpftool/			       \
 		    LIBBPF_OUTPUT=$(HOST_BUILD_DIR)/libbpf/		       \
 		    LIBBPF_DESTDIR=$(HOST_SCRATCH_DIR)/			       \
@@ -287,7 +294,7 @@ $(CROSS_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile)	\
 		    $(BPFOBJ) | $(BUILD_DIR)/bpftool
 	$(Q)$(MAKE) $(submake_extras)  -C $(BPFTOOLDIR)				\
 		    ARCH=$(ARCH) CROSS_COMPILE=$(CROSS_COMPILE)			\
-		    EXTRA_CFLAGS='-g -O0'					\
+		    EXTRA_CFLAGS='-g $(OPT_FLAGS)'				\
 		    OUTPUT=$(BUILD_DIR)/bpftool/				\
 		    LIBBPF_OUTPUT=$(BUILD_DIR)/libbpf/				\
 		    LIBBPF_DESTDIR=$(SCRATCH_DIR)/				\
@@ -310,7 +317,7 @@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile)		       \
 	   $(APIDIR)/linux/bpf.h					       \
 	   | $(BUILD_DIR)/libbpf
 	$(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \
-		    EXTRA_CFLAGS='-g -O0 $(SAN_CFLAGS)'			       \
+		    EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS)'	       \
 		    EXTRA_LDFLAGS='$(SAN_LDFLAGS)'			       \
 		    DESTDIR=$(SCRATCH_DIR) prefix= all install_headers
 
@@ -319,7 +326,7 @@ $(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile)		       \
 		$(APIDIR)/linux/bpf.h					       \
 		| $(HOST_BUILD_DIR)/libbpf
 	$(Q)$(MAKE) $(submake_extras) -C $(BPFDIR)                             \
-		    EXTRA_CFLAGS='-g -O0' ARCH= CROSS_COMPILE=		       \
+		    EXTRA_CFLAGS='-g $(OPT_FLAGS)' ARCH= CROSS_COMPILE=	       \
 		    OUTPUT=$(HOST_BUILD_DIR)/libbpf/			       \
 		    CC="$(HOSTCC)" LD="$(HOSTLD)"			       \
 		    DESTDIR=$(HOST_SCRATCH_DIR)/ prefix= all install_headers
@@ -640,7 +647,9 @@ $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT)
 	$(call msg,BINARY,,$@)
 	$(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
 
-$(OUTPUT)/xskxceiver: xskxceiver.c xskxceiver.h $(OUTPUT)/xsk.o $(OUTPUT)/xsk_xdp_progs.skel.h $(BPFOBJ) | $(OUTPUT)
+# Include find_bit.c to compile xskxceiver.
+EXTRA_SRC := $(TOOLSDIR)/lib/find_bit.c
+$(OUTPUT)/xskxceiver: $(EXTRA_SRC) xskxceiver.c xskxceiver.h $(OUTPUT)/xsk.o $(OUTPUT)/xsk_xdp_progs.skel.h $(BPFOBJ) | $(OUTPUT)
 	$(call msg,BINARY,,$@)
 	$(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
 
diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
index 209811b1993a..2c8cb3f61529 100644
--- a/tools/testing/selftests/bpf/bpf_experimental.h
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -131,4 +131,331 @@ extern int bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *nod
  */
 extern struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root) __ksym;
 
+/* Description
+ *	Allocates a percpu object of the type represented by 'local_type_id' in
+ *	program BTF. User may use the bpf_core_type_id_local macro to pass the
+ *	type ID of a struct in program BTF.
+ *
+ *	The 'local_type_id' parameter must be a known constant.
+ *	The 'meta' parameter is rewritten by the verifier, no need for BPF
+ *	program to set it.
+ * Returns
+ *	A pointer to a percpu object of the type corresponding to the passed in
+ *	'local_type_id', or NULL on failure.
+ */
+extern void *bpf_percpu_obj_new_impl(__u64 local_type_id, void *meta) __ksym;
+
+/* Convenience macro to wrap over bpf_percpu_obj_new_impl */
+#define bpf_percpu_obj_new(type) ((type __percpu_kptr *)bpf_percpu_obj_new_impl(bpf_core_type_id_local(type), NULL))
+
+/* Description
+ *	Free an allocated percpu object. All fields of the object that require
+ *	destruction will be destructed before the storage is freed.
+ *
+ *	The 'meta' parameter is rewritten by the verifier, no need for BPF
+ *	program to set it.
+ * Returns
+ *	Void.
+ */
+extern void bpf_percpu_obj_drop_impl(void *kptr, void *meta) __ksym;
+
+struct bpf_iter_task_vma;
+
+extern int bpf_iter_task_vma_new(struct bpf_iter_task_vma *it,
+				 struct task_struct *task,
+				 unsigned long addr) __ksym;
+extern struct vm_area_struct *bpf_iter_task_vma_next(struct bpf_iter_task_vma *it) __ksym;
+extern void bpf_iter_task_vma_destroy(struct bpf_iter_task_vma *it) __ksym;
+
+/* Convenience macro to wrap over bpf_obj_drop_impl */
+#define bpf_percpu_obj_drop(kptr) bpf_percpu_obj_drop_impl(kptr, NULL)
+
+/* Description
+ *	Throw a BPF exception from the program, immediately terminating its
+ *	execution and unwinding the stack. The supplied 'cookie' parameter
+ *	will be the return value of the program when an exception is thrown,
+ *	and the default exception callback is used. Otherwise, if an exception
+ *	callback is set using the '__exception_cb(callback)' declaration tag
+ *	on the main program, the 'cookie' parameter will be the callback's only
+ *	input argument.
+ *
+ *	Thus, in case of default exception callback, 'cookie' is subjected to
+ *	constraints on the program's return value (as with R0 on exit).
+ *	Otherwise, the return value of the marked exception callback will be
+ *	subjected to the same checks.
+ *
+ *	Note that throwing an exception with lingering resources (locks,
+ *	references, etc.) will lead to a verification error.
+ *
+ *	Note that callbacks *cannot* call this helper.
+ * Returns
+ *	Never.
+ * Throws
+ *	An exception with the specified 'cookie' value.
+ */
+extern void bpf_throw(u64 cookie) __ksym;
+
+/* This macro must be used to mark the exception callback corresponding to the
+ * main program. For example:
+ *
+ * int exception_cb(u64 cookie) {
+ *	return cookie;
+ * }
+ *
+ * SEC("tc")
+ * __exception_cb(exception_cb)
+ * int main_prog(struct __sk_buff *ctx) {
+ *	...
+ *	return TC_ACT_OK;
+ * }
+ *
+ * Here, exception callback for the main program will be 'exception_cb'. Note
+ * that this attribute can only be used once, and multiple exception callbacks
+ * specified for the main program will lead to verification error.
+ */
+#define __exception_cb(name) __attribute__((btf_decl_tag("exception_callback:" #name)))
+
+#define __bpf_assert_signed(x) _Generic((x), \
+    unsigned long: 0,       \
+    unsigned long long: 0,  \
+    signed long: 1,         \
+    signed long long: 1     \
+)
+
+#define __bpf_assert_check(LHS, op, RHS)								 \
+	_Static_assert(sizeof(&(LHS)), "1st argument must be an lvalue expression");			 \
+	_Static_assert(sizeof(LHS) == 8, "Only 8-byte integers are supported\n");			 \
+	_Static_assert(__builtin_constant_p(__bpf_assert_signed(LHS)), "internal static assert");	 \
+	_Static_assert(__builtin_constant_p((RHS)), "2nd argument must be a constant expression")
+
+#define __bpf_assert(LHS, op, cons, RHS, VAL)							\
+	({											\
+		(void)bpf_throw;								\
+		asm volatile ("if %[lhs] " op " %[rhs] goto +2; r1 = %[value]; call bpf_throw"	\
+			       : : [lhs] "r"(LHS), [rhs] cons(RHS), [value] "ri"(VAL) : );	\
+	})
+
+#define __bpf_assert_op_sign(LHS, op, cons, RHS, VAL, supp_sign)			\
+	({										\
+		__bpf_assert_check(LHS, op, RHS);					\
+		if (__bpf_assert_signed(LHS) && !(supp_sign))				\
+			__bpf_assert(LHS, "s" #op, cons, RHS, VAL);			\
+		else									\
+			__bpf_assert(LHS, #op, cons, RHS, VAL);				\
+	 })
+
+#define __bpf_assert_op(LHS, op, RHS, VAL, supp_sign)					\
+	({										\
+		if (sizeof(typeof(RHS)) == 8) {						\
+			const typeof(RHS) rhs_var = (RHS);				\
+			__bpf_assert_op_sign(LHS, op, "r", rhs_var, VAL, supp_sign);	\
+		} else {								\
+			__bpf_assert_op_sign(LHS, op, "i", RHS, VAL, supp_sign);	\
+		}									\
+	 })
+
+/* Description
+ *	Assert that a conditional expression is true.
+ * Returns
+ *	Void.
+ * Throws
+ *	An exception with the value zero when the assertion fails.
+ */
+#define bpf_assert(cond) if (!(cond)) bpf_throw(0);
+
+/* Description
+ *	Assert that a conditional expression is true.
+ * Returns
+ *	Void.
+ * Throws
+ *	An exception with the specified value when the assertion fails.
+ */
+#define bpf_assert_with(cond, value) if (!(cond)) bpf_throw(value);
+
+/* Description
+ *	Assert that LHS is equal to RHS. This statement updates the known value
+ *	of LHS during verification. Note that RHS must be a constant value, and
+ *	must fit within the data type of LHS.
+ * Returns
+ *	Void.
+ * Throws
+ *	An exception with the value zero when the assertion fails.
+ */
+#define bpf_assert_eq(LHS, RHS)						\
+	({								\
+		barrier_var(LHS);					\
+		__bpf_assert_op(LHS, ==, RHS, 0, true);			\
+	})
+
+/* Description
+ *	Assert that LHS is equal to RHS. This statement updates the known value
+ *	of LHS during verification. Note that RHS must be a constant value, and
+ *	must fit within the data type of LHS.
+ * Returns
+ *	Void.
+ * Throws
+ *	An exception with the specified value when the assertion fails.
+ */
+#define bpf_assert_eq_with(LHS, RHS, value)				\
+	({								\
+		barrier_var(LHS);					\
+		__bpf_assert_op(LHS, ==, RHS, value, true);		\
+	})
+
+/* Description
+ *	Assert that LHS is less than RHS. This statement updates the known
+ *	bounds of LHS during verification. Note that RHS must be a constant
+ *	value, and must fit within the data type of LHS.
+ * Returns
+ *	Void.
+ * Throws
+ *	An exception with the value zero when the assertion fails.
+ */
+#define bpf_assert_lt(LHS, RHS)						\
+	({								\
+		barrier_var(LHS);					\
+		__bpf_assert_op(LHS, <, RHS, 0, false);			\
+	})
+
+/* Description
+ *	Assert that LHS is less than RHS. This statement updates the known
+ *	bounds of LHS during verification. Note that RHS must be a constant
+ *	value, and must fit within the data type of LHS.
+ * Returns
+ *	Void.
+ * Throws
+ *	An exception with the specified value when the assertion fails.
+ */
+#define bpf_assert_lt_with(LHS, RHS, value)				\
+	({								\
+		barrier_var(LHS);					\
+		__bpf_assert_op(LHS, <, RHS, value, false);		\
+	})
+
+/* Description
+ *	Assert that LHS is greater than RHS. This statement updates the known
+ *	bounds of LHS during verification. Note that RHS must be a constant
+ *	value, and must fit within the data type of LHS.
+ * Returns
+ *	Void.
+ * Throws
+ *	An exception with the value zero when the assertion fails.
+ */
+#define bpf_assert_gt(LHS, RHS)						\
+	({								\
+		barrier_var(LHS);					\
+		__bpf_assert_op(LHS, >, RHS, 0, false);			\
+	})
+
+/* Description
+ *	Assert that LHS is greater than RHS. This statement updates the known
+ *	bounds of LHS during verification. Note that RHS must be a constant
+ *	value, and must fit within the data type of LHS.
+ * Returns
+ *	Void.
+ * Throws
+ *	An exception with the specified value when the assertion fails.
+ */
+#define bpf_assert_gt_with(LHS, RHS, value)				\
+	({								\
+		barrier_var(LHS);					\
+		__bpf_assert_op(LHS, >, RHS, value, false);		\
+	})
+
+/* Description
+ *	Assert that LHS is less than or equal to RHS. This statement updates the
+ *	known bounds of LHS during verification. Note that RHS must be a
+ *	constant value, and must fit within the data type of LHS.
+ * Returns
+ *	Void.
+ * Throws
+ *	An exception with the value zero when the assertion fails.
+ */
+#define bpf_assert_le(LHS, RHS)						\
+	({								\
+		barrier_var(LHS);					\
+		__bpf_assert_op(LHS, <=, RHS, 0, false);		\
+	})
+
+/* Description
+ *	Assert that LHS is less than or equal to RHS. This statement updates the
+ *	known bounds of LHS during verification. Note that RHS must be a
+ *	constant value, and must fit within the data type of LHS.
+ * Returns
+ *	Void.
+ * Throws
+ *	An exception with the specified value when the assertion fails.
+ */
+#define bpf_assert_le_with(LHS, RHS, value)				\
+	({								\
+		barrier_var(LHS);					\
+		__bpf_assert_op(LHS, <=, RHS, value, false);		\
+	})
+
+/* Description
+ *	Assert that LHS is greater than or equal to RHS. This statement updates
+ *	the known bounds of LHS during verification. Note that RHS must be a
+ *	constant value, and must fit within the data type of LHS.
+ * Returns
+ *	Void.
+ * Throws
+ *	An exception with the value zero when the assertion fails.
+ */
+#define bpf_assert_ge(LHS, RHS)						\
+	({								\
+		barrier_var(LHS);					\
+		__bpf_assert_op(LHS, >=, RHS, 0, false);		\
+	})
+
+/* Description
+ *	Assert that LHS is greater than or equal to RHS. This statement updates
+ *	the known bounds of LHS during verification. Note that RHS must be a
+ *	constant value, and must fit within the data type of LHS.
+ * Returns
+ *	Void.
+ * Throws
+ *	An exception with the specified value when the assertion fails.
+ */
+#define bpf_assert_ge_with(LHS, RHS, value)				\
+	({								\
+		barrier_var(LHS);					\
+		__bpf_assert_op(LHS, >=, RHS, value, false);		\
+	})
+
+/* Description
+ *	Assert that LHS is in the range [BEG, END] (inclusive of both). This
+ *	statement updates the known bounds of LHS during verification. Note
+ *	that both BEG and END must be constant values, and must fit within the
+ *	data type of LHS.
+ * Returns
+ *	Void.
+ * Throws
+ *	An exception with the value zero when the assertion fails.
+ */
+#define bpf_assert_range(LHS, BEG, END)					\
+	({								\
+		_Static_assert(BEG <= END, "BEG must be <= END");	\
+		barrier_var(LHS);					\
+		__bpf_assert_op(LHS, >=, BEG, 0, false);		\
+		__bpf_assert_op(LHS, <=, END, 0, false);		\
+	})
+
+/* Description
+ *	Assert that LHS is in the range [BEG, END] (inclusive of both). This
+ *	statement updates the known bounds of LHS during verification. Note
+ *	that both BEG and END must be constant values, and must fit within the
+ *	data type of LHS.
+ * Returns
+ *	Void.
+ * Throws
+ *	An exception with the specified value when the assertion fails.
+ */
+#define bpf_assert_range_with(LHS, BEG, END, value)			\
+	({								\
+		_Static_assert(BEG <= END, "BEG must be <= END");	\
+		barrier_var(LHS);					\
+		__bpf_assert_op(LHS, >=, BEG, value, false);		\
+		__bpf_assert_op(LHS, <=, END, value, false);		\
+	})
+
 #endif
diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h
index 642dda0e758a..5ca68ff0b59f 100644
--- a/tools/testing/selftests/bpf/bpf_kfuncs.h
+++ b/tools/testing/selftests/bpf/bpf_kfuncs.h
@@ -1,6 +1,8 @@
 #ifndef __BPF_KFUNCS__
 #define __BPF_KFUNCS__
 
+struct bpf_sock_addr_kern;
+
 /* Description
  *  Initializes an skb-type dynptr
  * Returns
@@ -41,4 +43,16 @@ extern bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *ptr) __ksym;
 extern __u32 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym;
 extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clone__init) __ksym;
 
+/* Description
+ *  Modify the address of a AF_UNIX sockaddr.
+ * Returns__bpf_kfunc
+ *  -EINVAL if the address size is too big or, 0 if the sockaddr was successfully modified.
+ */
+extern int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern,
+				      const __u8 *sun_path, __u32 sun_path__sz) __ksym;
+
+void *bpf_cast_to_kern_ctx(void *) __ksym;
+
+void *bpf_rdonly_cast(void *obj, __u32 btf_id) __ksym;
+
 #endif
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
index cefc5dd72573..a5e246f7b202 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -138,6 +138,10 @@ __bpf_kfunc void bpf_iter_testmod_seq_destroy(struct bpf_iter_testmod_seq *it)
 	it->cnt = 0;
 }
 
+__bpf_kfunc void bpf_kfunc_common_test(void)
+{
+}
+
 struct bpf_testmod_btf_type_tag_1 {
 	int a;
 };
@@ -343,6 +347,7 @@ BTF_SET8_START(bpf_testmod_common_kfunc_ids)
 BTF_ID_FLAGS(func, bpf_iter_testmod_seq_new, KF_ITER_NEW)
 BTF_ID_FLAGS(func, bpf_iter_testmod_seq_next, KF_ITER_NEXT | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_iter_testmod_seq_destroy, KF_ITER_DESTROY)
+BTF_ID_FLAGS(func, bpf_kfunc_common_test)
 BTF_SET8_END(bpf_testmod_common_kfunc_ids)
 
 static const struct btf_kfunc_id_set bpf_testmod_common_kfunc_set = {
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h
index f5c5b1375c24..7c664dd61059 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h
@@ -104,4 +104,6 @@ void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p);
 void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p);
 void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p);
 void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len);
+
+void bpf_kfunc_common_test(void) __ksym;
 #endif /* _BPF_TESTMOD_KFUNC_H */
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index 2caee8423ee0..5b1da2a32ea7 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -49,6 +49,10 @@
 	snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH,	\
 		 CGROUP_WORK_DIR)
 
+static __thread bool cgroup_workdir_mounted;
+
+static void __cleanup_cgroup_environment(void);
+
 static int __enable_controllers(const char *cgroup_path, const char *controllers)
 {
 	char path[PATH_MAX + 1];
@@ -195,6 +199,11 @@ int setup_cgroup_environment(void)
 
 	format_cgroup_path(cgroup_workdir, "");
 
+	if (mkdir(CGROUP_MOUNT_PATH, 0777) && errno != EEXIST) {
+		log_err("mkdir mount");
+		return 1;
+	}
+
 	if (unshare(CLONE_NEWNS)) {
 		log_err("unshare");
 		return 1;
@@ -209,9 +218,10 @@ int setup_cgroup_environment(void)
 		log_err("mount cgroup2");
 		return 1;
 	}
+	cgroup_workdir_mounted = true;
 
 	/* Cleanup existing failed runs, now that the environment is setup */
-	cleanup_cgroup_environment();
+	__cleanup_cgroup_environment();
 
 	if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {
 		log_err("mkdir cgroup work dir");
@@ -306,10 +316,25 @@ int join_parent_cgroup(const char *relative_path)
 }
 
 /**
+ * __cleanup_cgroup_environment() - Delete temporary cgroups
+ *
+ * This is a helper for cleanup_cgroup_environment() that is responsible for
+ * deletion of all temporary cgroups that have been created during the test.
+ */
+static void __cleanup_cgroup_environment(void)
+{
+	char cgroup_workdir[PATH_MAX + 1];
+
+	format_cgroup_path(cgroup_workdir, "");
+	join_cgroup_from_top(CGROUP_MOUNT_PATH);
+	nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
+}
+
+/**
  * cleanup_cgroup_environment() - Cleanup Cgroup Testing Environment
  *
  * This is an idempotent function to delete all temporary cgroups that
- * have been created during the test, including the cgroup testing work
+ * have been created during the test and unmount the cgroup testing work
  * directory.
  *
  * At call time, it moves the calling process to the root cgroup, and then
@@ -320,11 +345,10 @@ int join_parent_cgroup(const char *relative_path)
  */
 void cleanup_cgroup_environment(void)
 {
-	char cgroup_workdir[PATH_MAX + 1];
-
-	format_cgroup_path(cgroup_workdir, "");
-	join_cgroup_from_top(CGROUP_MOUNT_PATH);
-	nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
+	__cleanup_cgroup_environment();
+	if (cgroup_workdir_mounted && umount(CGROUP_MOUNT_PATH))
+		log_err("umount cgroup2");
+	cgroup_workdir_mounted = false;
 }
 
 /**
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index e41eb33b2704..02dd4409200e 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -84,3 +84,4 @@ CONFIG_USERFAULTFD=y
 CONFIG_VXLAN=y
 CONFIG_XDP_SOCKETS=y
 CONFIG_XFRM_INTERFACE=y
+CONFIG_VSOCKETS=y
diff --git a/tools/testing/selftests/bpf/liburandom_read.map b/tools/testing/selftests/bpf/liburandom_read.map
new file mode 100644
index 000000000000..38a97a419a04
--- /dev/null
+++ b/tools/testing/selftests/bpf/liburandom_read.map
@@ -0,0 +1,15 @@
+LIBURANDOM_READ_1.0.0 {
+	global:
+		urandlib_api;
+		urandlib_api_sameoffset;
+		urandlib_read_without_sema;
+		urandlib_read_with_sema;
+		urandlib_read_with_sema_semaphore;
+	local:
+		*;
+};
+
+LIBURANDOM_READ_2.0.0 {
+	global:
+		urandlib_api;
+} LIBURANDOM_READ_1.0.0;
diff --git a/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c
index 16f1671e4bde..66191ae9863c 100644
--- a/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c
+++ b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c
@@ -33,11 +33,11 @@ static void create_inner_maps(enum bpf_map_type map_type,
 {
 	int map_fd, map_index, ret;
 	__u32 map_key = 0, map_id;
-	char map_name[15];
+	char map_name[16];
 
 	for (map_index = 0; map_index < OUTER_MAP_ENTRIES; map_index++) {
 		memset(map_name, 0, sizeof(map_name));
-		sprintf(map_name, "inner_map_fd_%d", map_index);
+		snprintf(map_name, sizeof(map_name), "inner_map_fd_%d", map_index);
 		map_fd = bpf_map_create(map_type, map_name, sizeof(__u32),
 					sizeof(__u32), 1, NULL);
 		CHECK(map_fd < 0,
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index da72a3a66230..6db27a9088e9 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -11,6 +11,7 @@
 #include <arpa/inet.h>
 #include <sys/mount.h>
 #include <sys/stat.h>
+#include <sys/un.h>
 
 #include <linux/err.h>
 #include <linux/in.h>
@@ -257,6 +258,26 @@ static int connect_fd_to_addr(int fd,
 	return 0;
 }
 
+int connect_to_addr(const struct sockaddr_storage *addr, socklen_t addrlen, int type)
+{
+	int fd;
+
+	fd = socket(addr->ss_family, type, 0);
+	if (fd < 0) {
+		log_err("Failed to create client socket");
+		return -1;
+	}
+
+	if (connect_fd_to_addr(fd, addr, addrlen, false))
+		goto error_close;
+
+	return fd;
+
+error_close:
+	save_errno_close(fd);
+	return -1;
+}
+
 static const struct network_helper_opts default_opts;
 
 int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts)
@@ -380,6 +401,19 @@ int make_sockaddr(int family, const char *addr_str, __u16 port,
 		if (len)
 			*len = sizeof(*sin6);
 		return 0;
+	} else if (family == AF_UNIX) {
+		/* Note that we always use abstract unix sockets to avoid having
+		 * to clean up leftover files.
+		 */
+		struct sockaddr_un *sun = (void *)addr;
+
+		memset(addr, 0, sizeof(*sun));
+		sun->sun_family = family;
+		sun->sun_path[0] = 0;
+		strcpy(sun->sun_path + 1, addr_str);
+		if (len)
+			*len = offsetof(struct sockaddr_un, sun_path) + 1 + strlen(addr_str);
+		return 0;
 	}
 	return -1;
 }
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index 5eccc67d1a99..34f1200a781b 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -51,6 +51,7 @@ int *start_reuseport_server(int family, int type, const char *addr_str,
 			    __u16 port, int timeout_ms,
 			    unsigned int nr_listens);
 void free_fds(int *fds, unsigned int nr_close_fds);
+int connect_to_addr(const struct sockaddr_storage *addr, socklen_t len, int type);
 int connect_to_fd(int server_fd, int timeout_ms);
 int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts);
 int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms);
diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c
index b92770592563..465c1c3a3d3c 100644
--- a/tools/testing/selftests/bpf/prog_tests/align.c
+++ b/tools/testing/selftests/bpf/prog_tests/align.c
@@ -6,6 +6,7 @@
 
 struct bpf_reg_match {
 	unsigned int line;
+	const char *reg;
 	const char *match;
 };
 
@@ -39,13 +40,13 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			{0, "R1=ctx(off=0,imm=0)"},
-			{0, "R10=fp0"},
-			{0, "R3_w=2"},
-			{1, "R3_w=4"},
-			{2, "R3_w=8"},
-			{3, "R3_w=16"},
-			{4, "R3_w=32"},
+			{0, "R1", "ctx(off=0,imm=0)"},
+			{0, "R10", "fp0"},
+			{0, "R3_w", "2"},
+			{1, "R3_w", "4"},
+			{2, "R3_w", "8"},
+			{3, "R3_w", "16"},
+			{4, "R3_w", "32"},
 		},
 	},
 	{
@@ -67,19 +68,19 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			{0, "R1=ctx(off=0,imm=0)"},
-			{0, "R10=fp0"},
-			{0, "R3_w=1"},
-			{1, "R3_w=2"},
-			{2, "R3_w=4"},
-			{3, "R3_w=8"},
-			{4, "R3_w=16"},
-			{5, "R3_w=1"},
-			{6, "R4_w=32"},
-			{7, "R4_w=16"},
-			{8, "R4_w=8"},
-			{9, "R4_w=4"},
-			{10, "R4_w=2"},
+			{0, "R1", "ctx(off=0,imm=0)"},
+			{0, "R10", "fp0"},
+			{0, "R3_w", "1"},
+			{1, "R3_w", "2"},
+			{2, "R3_w", "4"},
+			{3, "R3_w", "8"},
+			{4, "R3_w", "16"},
+			{5, "R3_w", "1"},
+			{6, "R4_w", "32"},
+			{7, "R4_w", "16"},
+			{8, "R4_w", "8"},
+			{9, "R4_w", "4"},
+			{10, "R4_w", "2"},
 		},
 	},
 	{
@@ -96,14 +97,14 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			{0, "R1=ctx(off=0,imm=0)"},
-			{0, "R10=fp0"},
-			{0, "R3_w=4"},
-			{1, "R3_w=8"},
-			{2, "R3_w=10"},
-			{3, "R4_w=8"},
-			{4, "R4_w=12"},
-			{5, "R4_w=14"},
+			{0, "R1", "ctx(off=0,imm=0)"},
+			{0, "R10", "fp0"},
+			{0, "R3_w", "4"},
+			{1, "R3_w", "8"},
+			{2, "R3_w", "10"},
+			{3, "R4_w", "8"},
+			{4, "R4_w", "12"},
+			{5, "R4_w", "14"},
 		},
 	},
 	{
@@ -118,12 +119,12 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			{0, "R1=ctx(off=0,imm=0)"},
-			{0, "R10=fp0"},
-			{0, "R3_w=7"},
-			{1, "R3_w=7"},
-			{2, "R3_w=14"},
-			{3, "R3_w=56"},
+			{0, "R1", "ctx(off=0,imm=0)"},
+			{0, "R10", "fp0"},
+			{0, "R3_w", "7"},
+			{1, "R3_w", "7"},
+			{2, "R3_w", "14"},
+			{3, "R3_w", "56"},
 		},
 	},
 
@@ -161,19 +162,19 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			{6, "R0_w=pkt(off=8,r=8,imm=0)"},
-			{6, "R3_w=scalar(umax=255,var_off=(0x0; 0xff))"},
-			{7, "R3_w=scalar(umax=510,var_off=(0x0; 0x1fe))"},
-			{8, "R3_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"},
-			{9, "R3_w=scalar(umax=2040,var_off=(0x0; 0x7f8))"},
-			{10, "R3_w=scalar(umax=4080,var_off=(0x0; 0xff0))"},
-			{12, "R3_w=pkt_end(off=0,imm=0)"},
-			{17, "R4_w=scalar(umax=255,var_off=(0x0; 0xff))"},
-			{18, "R4_w=scalar(umax=8160,var_off=(0x0; 0x1fe0))"},
-			{19, "R4_w=scalar(umax=4080,var_off=(0x0; 0xff0))"},
-			{20, "R4_w=scalar(umax=2040,var_off=(0x0; 0x7f8))"},
-			{21, "R4_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"},
-			{22, "R4_w=scalar(umax=510,var_off=(0x0; 0x1fe))"},
+			{6, "R0_w", "pkt(off=8,r=8,imm=0)"},
+			{6, "R3_w", "var_off=(0x0; 0xff)"},
+			{7, "R3_w", "var_off=(0x0; 0x1fe)"},
+			{8, "R3_w", "var_off=(0x0; 0x3fc)"},
+			{9, "R3_w", "var_off=(0x0; 0x7f8)"},
+			{10, "R3_w", "var_off=(0x0; 0xff0)"},
+			{12, "R3_w", "pkt_end(off=0,imm=0)"},
+			{17, "R4_w", "var_off=(0x0; 0xff)"},
+			{18, "R4_w", "var_off=(0x0; 0x1fe0)"},
+			{19, "R4_w", "var_off=(0x0; 0xff0)"},
+			{20, "R4_w", "var_off=(0x0; 0x7f8)"},
+			{21, "R4_w", "var_off=(0x0; 0x3fc)"},
+			{22, "R4_w", "var_off=(0x0; 0x1fe)"},
 		},
 	},
 	{
@@ -194,16 +195,16 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			{6, "R3_w=scalar(umax=255,var_off=(0x0; 0xff))"},
-			{7, "R4_w=scalar(id=1,umax=255,var_off=(0x0; 0xff))"},
-			{8, "R4_w=scalar(umax=255,var_off=(0x0; 0xff))"},
-			{9, "R4_w=scalar(id=1,umax=255,var_off=(0x0; 0xff))"},
-			{10, "R4_w=scalar(umax=510,var_off=(0x0; 0x1fe))"},
-			{11, "R4_w=scalar(id=1,umax=255,var_off=(0x0; 0xff))"},
-			{12, "R4_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"},
-			{13, "R4_w=scalar(id=1,umax=255,var_off=(0x0; 0xff))"},
-			{14, "R4_w=scalar(umax=2040,var_off=(0x0; 0x7f8))"},
-			{15, "R4_w=scalar(umax=4080,var_off=(0x0; 0xff0))"},
+			{6, "R3_w", "var_off=(0x0; 0xff)"},
+			{7, "R4_w", "var_off=(0x0; 0xff)"},
+			{8, "R4_w", "var_off=(0x0; 0xff)"},
+			{9, "R4_w", "var_off=(0x0; 0xff)"},
+			{10, "R4_w", "var_off=(0x0; 0x1fe)"},
+			{11, "R4_w", "var_off=(0x0; 0xff)"},
+			{12, "R4_w", "var_off=(0x0; 0x3fc)"},
+			{13, "R4_w", "var_off=(0x0; 0xff)"},
+			{14, "R4_w", "var_off=(0x0; 0x7f8)"},
+			{15, "R4_w", "var_off=(0x0; 0xff0)"},
 		},
 	},
 	{
@@ -234,14 +235,14 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			{2, "R5_w=pkt(off=0,r=0,imm=0)"},
-			{4, "R5_w=pkt(off=14,r=0,imm=0)"},
-			{5, "R4_w=pkt(off=14,r=0,imm=0)"},
-			{9, "R2=pkt(off=0,r=18,imm=0)"},
-			{10, "R5=pkt(off=14,r=18,imm=0)"},
-			{10, "R4_w=scalar(umax=255,var_off=(0x0; 0xff))"},
-			{13, "R4_w=scalar(umax=65535,var_off=(0x0; 0xffff))"},
-			{14, "R4_w=scalar(umax=65535,var_off=(0x0; 0xffff))"},
+			{2, "R5_w", "pkt(off=0,r=0,imm=0)"},
+			{4, "R5_w", "pkt(off=14,r=0,imm=0)"},
+			{5, "R4_w", "pkt(off=14,r=0,imm=0)"},
+			{9, "R2", "pkt(off=0,r=18,imm=0)"},
+			{10, "R5", "pkt(off=14,r=18,imm=0)"},
+			{10, "R4_w", "var_off=(0x0; 0xff)"},
+			{13, "R4_w", "var_off=(0x0; 0xffff)"},
+			{14, "R4_w", "var_off=(0x0; 0xffff)"},
 		},
 	},
 	{
@@ -298,20 +299,20 @@ static struct bpf_align_test tests[] = {
 			/* Calculated offset in R6 has unknown value, but known
 			 * alignment of 4.
 			 */
-			{6, "R2_w=pkt(off=0,r=8,imm=0)"},
-			{7, "R6_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"},
+			{6, "R2_w", "pkt(off=0,r=8,imm=0)"},
+			{7, "R6_w", "var_off=(0x0; 0x3fc)"},
 			/* Offset is added to packet pointer R5, resulting in
 			 * known fixed offset, and variable offset from R6.
 			 */
-			{11, "R5_w=pkt(id=1,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"},
+			{11, "R5_w", "pkt(id=1,off=14,"},
 			/* At the time the word size load is performed from R5,
 			 * it's total offset is NET_IP_ALIGN + reg->off (0) +
 			 * reg->aux_off (14) which is 16.  Then the variable
 			 * offset is considered using reg->aux_off_align which
 			 * is 4 and meets the load's requirements.
 			 */
-			{15, "R4=pkt(id=1,off=18,r=18,umax=1020,var_off=(0x0; 0x3fc))"},
-			{15, "R5=pkt(id=1,off=14,r=18,umax=1020,var_off=(0x0; 0x3fc))"},
+			{15, "R4", "var_off=(0x0; 0x3fc)"},
+			{15, "R5", "var_off=(0x0; 0x3fc)"},
 			/* Variable offset is added to R5 packet pointer,
 			 * resulting in auxiliary alignment of 4. To avoid BPF
 			 * verifier's precision backtracking logging
@@ -319,46 +320,46 @@ static struct bpf_align_test tests[] = {
 			 * instruction to validate R5 state. We also check
 			 * that R4 is what it should be in such case.
 			 */
-			{18, "R4_w=pkt(id=2,off=0,r=0,umax=1020,var_off=(0x0; 0x3fc))"},
-			{18, "R5_w=pkt(id=2,off=0,r=0,umax=1020,var_off=(0x0; 0x3fc))"},
+			{18, "R4_w", "var_off=(0x0; 0x3fc)"},
+			{18, "R5_w", "var_off=(0x0; 0x3fc)"},
 			/* Constant offset is added to R5, resulting in
 			 * reg->off of 14.
 			 */
-			{19, "R5_w=pkt(id=2,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"},
+			{19, "R5_w", "pkt(id=2,off=14,"},
 			/* At the time the word size load is performed from R5,
 			 * its total fixed offset is NET_IP_ALIGN + reg->off
 			 * (14) which is 16.  Then the variable offset is 4-byte
 			 * aligned, so the total offset is 4-byte aligned and
 			 * meets the load's requirements.
 			 */
-			{24, "R4=pkt(id=2,off=18,r=18,umax=1020,var_off=(0x0; 0x3fc))"},
-			{24, "R5=pkt(id=2,off=14,r=18,umax=1020,var_off=(0x0; 0x3fc))"},
+			{24, "R4", "var_off=(0x0; 0x3fc)"},
+			{24, "R5", "var_off=(0x0; 0x3fc)"},
 			/* Constant offset is added to R5 packet pointer,
 			 * resulting in reg->off value of 14.
 			 */
-			{26, "R5_w=pkt(off=14,r=8"},
+			{26, "R5_w", "pkt(off=14,r=8,"},
 			/* Variable offset is added to R5, resulting in a
 			 * variable offset of (4n). See comment for insn #18
 			 * for R4 = R5 trick.
 			 */
-			{28, "R4_w=pkt(id=3,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"},
-			{28, "R5_w=pkt(id=3,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"},
+			{28, "R4_w", "var_off=(0x0; 0x3fc)"},
+			{28, "R5_w", "var_off=(0x0; 0x3fc)"},
 			/* Constant is added to R5 again, setting reg->off to 18. */
-			{29, "R5_w=pkt(id=3,off=18,r=0,umax=1020,var_off=(0x0; 0x3fc))"},
+			{29, "R5_w", "pkt(id=3,off=18,"},
 			/* And once more we add a variable; resulting var_off
 			 * is still (4n), fixed offset is not changed.
 			 * Also, we create a new reg->id.
 			 */
-			{31, "R4_w=pkt(id=4,off=18,r=0,umax=2040,var_off=(0x0; 0x7fc)"},
-			{31, "R5_w=pkt(id=4,off=18,r=0,umax=2040,var_off=(0x0; 0x7fc)"},
+			{31, "R4_w", "var_off=(0x0; 0x7fc)"},
+			{31, "R5_w", "var_off=(0x0; 0x7fc)"},
 			/* At the time the word size load is performed from R5,
 			 * its total fixed offset is NET_IP_ALIGN + reg->off (18)
 			 * which is 20.  Then the variable offset is (4n), so
 			 * the total offset is 4-byte aligned and meets the
 			 * load's requirements.
 			 */
-			{35, "R4=pkt(id=4,off=22,r=22,umax=2040,var_off=(0x0; 0x7fc)"},
-			{35, "R5=pkt(id=4,off=18,r=22,umax=2040,var_off=(0x0; 0x7fc)"},
+			{35, "R4", "var_off=(0x0; 0x7fc)"},
+			{35, "R5", "var_off=(0x0; 0x7fc)"},
 		},
 	},
 	{
@@ -396,36 +397,36 @@ static struct bpf_align_test tests[] = {
 			/* Calculated offset in R6 has unknown value, but known
 			 * alignment of 4.
 			 */
-			{6, "R2_w=pkt(off=0,r=8,imm=0)"},
-			{7, "R6_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"},
+			{6, "R2_w", "pkt(off=0,r=8,imm=0)"},
+			{7, "R6_w", "var_off=(0x0; 0x3fc)"},
 			/* Adding 14 makes R6 be (4n+2) */
-			{8, "R6_w=scalar(umin=14,umax=1034,var_off=(0x2; 0x7fc))"},
+			{8, "R6_w", "var_off=(0x2; 0x7fc)"},
 			/* Packet pointer has (4n+2) offset */
-			{11, "R5_w=pkt(id=1,off=0,r=0,umin=14,umax=1034,var_off=(0x2; 0x7fc)"},
-			{12, "R4=pkt(id=1,off=4,r=0,umin=14,umax=1034,var_off=(0x2; 0x7fc)"},
+			{11, "R5_w", "var_off=(0x2; 0x7fc)"},
+			{12, "R4", "var_off=(0x2; 0x7fc)"},
 			/* At the time the word size load is performed from R5,
 			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
 			 * which is 2.  Then the variable offset is (4n+2), so
 			 * the total offset is 4-byte aligned and meets the
 			 * load's requirements.
 			 */
-			{15, "R5=pkt(id=1,off=0,r=4,umin=14,umax=1034,var_off=(0x2; 0x7fc)"},
+			{15, "R5", "var_off=(0x2; 0x7fc)"},
 			/* Newly read value in R6 was shifted left by 2, so has
 			 * known alignment of 4.
 			 */
-			{17, "R6_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"},
+			{17, "R6_w", "var_off=(0x0; 0x3fc)"},
 			/* Added (4n) to packet pointer's (4n+2) var_off, giving
 			 * another (4n+2).
 			 */
-			{19, "R5_w=pkt(id=2,off=0,r=0,umin=14,umax=2054,var_off=(0x2; 0xffc)"},
-			{20, "R4=pkt(id=2,off=4,r=0,umin=14,umax=2054,var_off=(0x2; 0xffc)"},
+			{19, "R5_w", "var_off=(0x2; 0xffc)"},
+			{20, "R4", "var_off=(0x2; 0xffc)"},
 			/* At the time the word size load is performed from R5,
 			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
 			 * which is 2.  Then the variable offset is (4n+2), so
 			 * the total offset is 4-byte aligned and meets the
 			 * load's requirements.
 			 */
-			{23, "R5=pkt(id=2,off=0,r=4,umin=14,umax=2054,var_off=(0x2; 0xffc)"},
+			{23, "R5", "var_off=(0x2; 0xffc)"},
 		},
 	},
 	{
@@ -458,18 +459,18 @@ static struct bpf_align_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.result = REJECT,
 		.matches = {
-			{3, "R5_w=pkt_end(off=0,imm=0)"},
+			{3, "R5_w", "pkt_end(off=0,imm=0)"},
 			/* (ptr - ptr) << 2 == unknown, (4n) */
-			{5, "R5_w=scalar(smax=9223372036854775804,umax=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc)"},
+			{5, "R5_w", "var_off=(0x0; 0xfffffffffffffffc)"},
 			/* (4n) + 14 == (4n+2).  We blow our bounds, because
 			 * the add could overflow.
 			 */
-			{6, "R5_w=scalar(smin=-9223372036854775806,smax=9223372036854775806,umin=2,umax=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"},
+			{6, "R5_w", "var_off=(0x2; 0xfffffffffffffffc)"},
 			/* Checked s>=0 */
-			{9, "R5=scalar(umin=2,umax=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"},
+			{9, "R5", "var_off=(0x2; 0x7ffffffffffffffc)"},
 			/* packet pointer + nonnegative (4n+2) */
-			{11, "R6_w=pkt(id=1,off=0,r=0,umin=2,umax=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"},
-			{12, "R4_w=pkt(id=1,off=4,r=0,umin=2,umax=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"},
+			{11, "R6_w", "var_off=(0x2; 0x7ffffffffffffffc)"},
+			{12, "R4_w", "var_off=(0x2; 0x7ffffffffffffffc)"},
 			/* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
 			 * We checked the bounds, but it might have been able
 			 * to overflow if the packet pointer started in the
@@ -477,7 +478,7 @@ static struct bpf_align_test tests[] = {
 			 * So we did not get a 'range' on R6, and the access
 			 * attempt will fail.
 			 */
-			{15, "R6_w=pkt(id=1,off=0,r=0,umin=2,umax=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"},
+			{15, "R6_w", "var_off=(0x2; 0x7ffffffffffffffc)"},
 		}
 	},
 	{
@@ -512,24 +513,23 @@ static struct bpf_align_test tests[] = {
 			/* Calculated offset in R6 has unknown value, but known
 			 * alignment of 4.
 			 */
-			{6, "R2_w=pkt(off=0,r=8,imm=0)"},
-			{8, "R6_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"},
+			{6, "R2_w", "pkt(off=0,r=8,imm=0)"},
+			{8, "R6_w", "var_off=(0x0; 0x3fc)"},
 			/* Adding 14 makes R6 be (4n+2) */
-			{9, "R6_w=scalar(umin=14,umax=1034,var_off=(0x2; 0x7fc))"},
+			{9, "R6_w", "var_off=(0x2; 0x7fc)"},
 			/* New unknown value in R7 is (4n) */
-			{10, "R7_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"},
+			{10, "R7_w", "var_off=(0x0; 0x3fc)"},
 			/* Subtracting it from R6 blows our unsigned bounds */
-			{11, "R6=scalar(smin=-1006,smax=1034,umin=2,umax=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"},
+			{11, "R6", "var_off=(0x2; 0xfffffffffffffffc)"},
 			/* Checked s>= 0 */
-			{14, "R6=scalar(umin=2,umax=1034,var_off=(0x2; 0x7fc))"},
+			{14, "R6", "var_off=(0x2; 0x7fc)"},
 			/* At the time the word size load is performed from R5,
 			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
 			 * which is 2.  Then the variable offset is (4n+2), so
 			 * the total offset is 4-byte aligned and meets the
 			 * load's requirements.
 			 */
-			{20, "R5=pkt(id=2,off=0,r=4,umin=2,umax=1034,var_off=(0x2; 0x7fc)"},
-
+			{20, "R5", "var_off=(0x2; 0x7fc)"},
 		},
 	},
 	{
@@ -566,23 +566,23 @@ static struct bpf_align_test tests[] = {
 			/* Calculated offset in R6 has unknown value, but known
 			 * alignment of 4.
 			 */
-			{6, "R2_w=pkt(off=0,r=8,imm=0)"},
-			{9, "R6_w=scalar(umax=60,var_off=(0x0; 0x3c))"},
+			{6, "R2_w", "pkt(off=0,r=8,imm=0)"},
+			{9, "R6_w", "var_off=(0x0; 0x3c)"},
 			/* Adding 14 makes R6 be (4n+2) */
-			{10, "R6_w=scalar(umin=14,umax=74,var_off=(0x2; 0x7c))"},
+			{10, "R6_w", "var_off=(0x2; 0x7c)"},
 			/* Subtracting from packet pointer overflows ubounds */
-			{13, "R5_w=pkt(id=2,off=0,r=8,umin=18446744073709551542,umax=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c)"},
+			{13, "R5_w", "var_off=(0xffffffffffffff82; 0x7c)"},
 			/* New unknown value in R7 is (4n), >= 76 */
-			{14, "R7_w=scalar(umin=76,umax=1096,var_off=(0x0; 0x7fc))"},
+			{14, "R7_w", "var_off=(0x0; 0x7fc)"},
 			/* Adding it to packet pointer gives nice bounds again */
-			{16, "R5_w=pkt(id=3,off=0,r=0,umin=2,umax=1082,var_off=(0x2; 0x7fc)"},
+			{16, "R5_w", "var_off=(0x2; 0x7fc)"},
 			/* At the time the word size load is performed from R5,
 			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
 			 * which is 2.  Then the variable offset is (4n+2), so
 			 * the total offset is 4-byte aligned and meets the
 			 * load's requirements.
 			 */
-			{20, "R5=pkt(id=3,off=0,r=4,umin=2,umax=1082,var_off=(0x2; 0x7fc)"},
+			{20, "R5", "var_off=(0x2; 0x7fc)"},
 		},
 	},
 };
@@ -635,6 +635,7 @@ static int do_test_single(struct bpf_align_test *test)
 		line_ptr = strtok(bpf_vlog_copy, "\n");
 		for (i = 0; i < MAX_MATCHES; i++) {
 			struct bpf_reg_match m = test->matches[i];
+			const char *p;
 			int tmp;
 
 			if (!m.match)
@@ -649,8 +650,8 @@ static int do_test_single(struct bpf_align_test *test)
 				line_ptr = strtok(NULL, "\n");
 			}
 			if (!line_ptr) {
-				printf("Failed to find line %u for match: %s\n",
-				       m.line, m.match);
+				printf("Failed to find line %u for match: %s=%s\n",
+				       m.line, m.reg, m.match);
 				ret = 1;
 				printf("%s", bpf_vlog);
 				break;
@@ -667,15 +668,15 @@ static int do_test_single(struct bpf_align_test *test)
 			 *   6: R0_w=pkt(off=8,r=8,imm=0) R1=ctx(off=0,imm=0) R2_w=pkt(off=0,r=8,imm=0) R3_w=pkt_end(off=0,imm=0) R10=fp0
 			 *   6: (71) r3 = *(u8 *)(r2 +0)           ; R2_w=pkt(off=0,r=8,imm=0) R3_w=scalar(umax=255,var_off=(0x0; 0xff))
 			 */
-			while (!strstr(line_ptr, m.match)) {
+			while (!(p = strstr(line_ptr, m.reg)) || !strstr(p, m.match)) {
 				cur_line = -1;
 				line_ptr = strtok(NULL, "\n");
 				sscanf(line_ptr ?: "", "%u: ", &cur_line);
 				if (!line_ptr || cur_line != m.line)
 					break;
 			}
-			if (cur_line != m.line || !line_ptr || !strstr(line_ptr, m.match)) {
-				printf("Failed to find match %u: %s\n", m.line, m.match);
+			if (cur_line != m.line || !line_ptr || !(p = strstr(line_ptr, m.reg)) || !strstr(p, m.match)) {
+				printf("Failed to find match %u: %s=%s\n", m.line, m.reg, m.match);
 				ret = 1;
 				printf("%s", bpf_vlog);
 				break;
diff --git a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c
index d2d9e965eba5..053f4d6da77a 100644
--- a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c
@@ -193,8 +193,8 @@ error:
 
 void test_bloom_filter_map(void)
 {
-	__u32 *rand_vals, nr_rand_vals;
-	struct bloom_filter_map *skel;
+	__u32 *rand_vals = NULL, nr_rand_vals = 0;
+	struct bloom_filter_map *skel = NULL;
 	int err;
 
 	test_fail_cases();
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 1f02168103dd..41aba139b20b 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -10,7 +10,7 @@
 #include "bpf_iter_task.skel.h"
 #include "bpf_iter_task_stack.skel.h"
 #include "bpf_iter_task_file.skel.h"
-#include "bpf_iter_task_vma.skel.h"
+#include "bpf_iter_task_vmas.skel.h"
 #include "bpf_iter_task_btf.skel.h"
 #include "bpf_iter_tcp4.skel.h"
 #include "bpf_iter_tcp6.skel.h"
@@ -1399,19 +1399,19 @@ static void str_strip_first_line(char *str)
 static void test_task_vma_common(struct bpf_iter_attach_opts *opts)
 {
 	int err, iter_fd = -1, proc_maps_fd = -1;
-	struct bpf_iter_task_vma *skel;
+	struct bpf_iter_task_vmas *skel;
 	int len, read_size = 4;
 	char maps_path[64];
 
-	skel = bpf_iter_task_vma__open();
-	if (!ASSERT_OK_PTR(skel, "bpf_iter_task_vma__open"))
+	skel = bpf_iter_task_vmas__open();
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_task_vmas__open"))
 		return;
 
 	skel->bss->pid = getpid();
 	skel->bss->one_task = opts ? 1 : 0;
 
-	err = bpf_iter_task_vma__load(skel);
-	if (!ASSERT_OK(err, "bpf_iter_task_vma__load"))
+	err = bpf_iter_task_vmas__load(skel);
+	if (!ASSERT_OK(err, "bpf_iter_task_vmas__load"))
 		goto out;
 
 	skel->links.proc_maps = bpf_program__attach_iter(
@@ -1462,25 +1462,25 @@ static void test_task_vma_common(struct bpf_iter_attach_opts *opts)
 out:
 	close(proc_maps_fd);
 	close(iter_fd);
-	bpf_iter_task_vma__destroy(skel);
+	bpf_iter_task_vmas__destroy(skel);
 }
 
 static void test_task_vma_dead_task(void)
 {
-	struct bpf_iter_task_vma *skel;
+	struct bpf_iter_task_vmas *skel;
 	int wstatus, child_pid = -1;
 	time_t start_tm, cur_tm;
 	int err, iter_fd = -1;
 	int wait_sec = 3;
 
-	skel = bpf_iter_task_vma__open();
-	if (!ASSERT_OK_PTR(skel, "bpf_iter_task_vma__open"))
+	skel = bpf_iter_task_vmas__open();
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_task_vmas__open"))
 		return;
 
 	skel->bss->pid = getpid();
 
-	err = bpf_iter_task_vma__load(skel);
-	if (!ASSERT_OK(err, "bpf_iter_task_vma__load"))
+	err = bpf_iter_task_vmas__load(skel);
+	if (!ASSERT_OK(err, "bpf_iter_task_vmas__load"))
 		goto out;
 
 	skel->links.proc_maps = bpf_program__attach_iter(
@@ -1533,7 +1533,7 @@ static void test_task_vma_dead_task(void)
 out:
 	waitpid(child_pid, &wstatus, 0);
 	close(iter_fd);
-	bpf_iter_task_vma__destroy(skel);
+	bpf_iter_task_vmas__destroy(skel);
 }
 
 void test_bpf_sockmap_map_iter_fd(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 4e0cdb593318..92d51f377fe5 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -7296,7 +7296,7 @@ static struct btf_dedup_test dedup_tests[] = {
 			BTF_FUNC_PROTO_ENC(0, 2),			/* [3] */
 				BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1),
 				BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(3), 1),
-			BTF_FUNC_ENC(NAME_NTH(4), 2),			/* [4] */
+			BTF_FUNC_ENC(NAME_NTH(4), 3),			/* [4] */
 			/* tag -> t */
 			BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1),		/* [5] */
 			BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1),		/* [6] */
@@ -7317,7 +7317,7 @@ static struct btf_dedup_test dedup_tests[] = {
 			BTF_FUNC_PROTO_ENC(0, 2),			/* [3] */
 				BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1),
 				BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(3), 1),
-			BTF_FUNC_ENC(NAME_NTH(4), 2),			/* [4] */
+			BTF_FUNC_ENC(NAME_NTH(4), 3),			/* [4] */
 			BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1),		/* [5] */
 			BTF_DECL_TAG_ENC(NAME_NTH(5), 4, -1),		/* [6] */
 			BTF_DECL_TAG_ENC(NAME_NTH(5), 4, 1),		/* [7] */
diff --git a/tools/testing/selftests/bpf/prog_tests/connect_ping.c b/tools/testing/selftests/bpf/prog_tests/connect_ping.c
index 289218c2216c..40fe571f2fe7 100644
--- a/tools/testing/selftests/bpf/prog_tests/connect_ping.c
+++ b/tools/testing/selftests/bpf/prog_tests/connect_ping.c
@@ -28,9 +28,9 @@ static void subtest(int cgroup_fd, struct connect_ping *skel,
 		.sin6_family = AF_INET6,
 		.sin6_addr = IN6ADDR_LOOPBACK_INIT,
 	};
-	struct sockaddr *sa;
+	struct sockaddr *sa = NULL;
 	socklen_t sa_len;
-	int protocol;
+	int protocol = -1;
 	int sock_fd;
 
 	switch (family) {
diff --git a/tools/testing/selftests/bpf/prog_tests/exceptions.c b/tools/testing/selftests/bpf/prog_tests/exceptions.c
new file mode 100644
index 000000000000..516f4a13013c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/exceptions.c
@@ -0,0 +1,409 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "exceptions.skel.h"
+#include "exceptions_ext.skel.h"
+#include "exceptions_fail.skel.h"
+#include "exceptions_assert.skel.h"
+
+static char log_buf[1024 * 1024];
+
+static void test_exceptions_failure(void)
+{
+	RUN_TESTS(exceptions_fail);
+}
+
+static void test_exceptions_success(void)
+{
+	LIBBPF_OPTS(bpf_test_run_opts, ropts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1,
+	);
+	struct exceptions_ext *eskel = NULL;
+	struct exceptions *skel;
+	int ret;
+
+	skel = exceptions__open();
+	if (!ASSERT_OK_PTR(skel, "exceptions__open"))
+		return;
+
+	ret = exceptions__load(skel);
+	if (!ASSERT_OK(ret, "exceptions__load"))
+		goto done;
+
+	if (!ASSERT_OK(bpf_map_update_elem(bpf_map__fd(skel->maps.jmp_table), &(int){0},
+					   &(int){bpf_program__fd(skel->progs.exception_tail_call_target)}, BPF_ANY),
+		       "bpf_map_update_elem jmp_table"))
+		goto done;
+
+#define RUN_SUCCESS(_prog, return_val)						  \
+	if (!test__start_subtest(#_prog)) goto _prog##_##return_val;		  \
+	ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs._prog), &ropts); \
+	ASSERT_OK(ret, #_prog " prog run ret");					  \
+	ASSERT_EQ(ropts.retval, return_val, #_prog " prog run retval");		  \
+	_prog##_##return_val:
+
+	RUN_SUCCESS(exception_throw_always_1, 64);
+	RUN_SUCCESS(exception_throw_always_2, 32);
+	RUN_SUCCESS(exception_throw_unwind_1, 16);
+	RUN_SUCCESS(exception_throw_unwind_2, 32);
+	RUN_SUCCESS(exception_throw_default, 0);
+	RUN_SUCCESS(exception_throw_default_value, 5);
+	RUN_SUCCESS(exception_tail_call, 24);
+	RUN_SUCCESS(exception_ext, 0);
+	RUN_SUCCESS(exception_ext_mod_cb_runtime, 35);
+	RUN_SUCCESS(exception_throw_subprog, 1);
+	RUN_SUCCESS(exception_assert_nz_gfunc, 1);
+	RUN_SUCCESS(exception_assert_zero_gfunc, 1);
+	RUN_SUCCESS(exception_assert_neg_gfunc, 1);
+	RUN_SUCCESS(exception_assert_pos_gfunc, 1);
+	RUN_SUCCESS(exception_assert_negeq_gfunc, 1);
+	RUN_SUCCESS(exception_assert_poseq_gfunc, 1);
+	RUN_SUCCESS(exception_assert_nz_gfunc_with, 1);
+	RUN_SUCCESS(exception_assert_zero_gfunc_with, 1);
+	RUN_SUCCESS(exception_assert_neg_gfunc_with, 1);
+	RUN_SUCCESS(exception_assert_pos_gfunc_with, 1);
+	RUN_SUCCESS(exception_assert_negeq_gfunc_with, 1);
+	RUN_SUCCESS(exception_assert_poseq_gfunc_with, 1);
+	RUN_SUCCESS(exception_bad_assert_nz_gfunc, 0);
+	RUN_SUCCESS(exception_bad_assert_zero_gfunc, 0);
+	RUN_SUCCESS(exception_bad_assert_neg_gfunc, 0);
+	RUN_SUCCESS(exception_bad_assert_pos_gfunc, 0);
+	RUN_SUCCESS(exception_bad_assert_negeq_gfunc, 0);
+	RUN_SUCCESS(exception_bad_assert_poseq_gfunc, 0);
+	RUN_SUCCESS(exception_bad_assert_nz_gfunc_with, 100);
+	RUN_SUCCESS(exception_bad_assert_zero_gfunc_with, 105);
+	RUN_SUCCESS(exception_bad_assert_neg_gfunc_with, 200);
+	RUN_SUCCESS(exception_bad_assert_pos_gfunc_with, 0);
+	RUN_SUCCESS(exception_bad_assert_negeq_gfunc_with, 101);
+	RUN_SUCCESS(exception_bad_assert_poseq_gfunc_with, 99);
+	RUN_SUCCESS(exception_assert_range, 1);
+	RUN_SUCCESS(exception_assert_range_with, 1);
+	RUN_SUCCESS(exception_bad_assert_range, 0);
+	RUN_SUCCESS(exception_bad_assert_range_with, 10);
+
+#define RUN_EXT(load_ret, attach_err, expr, msg, after_link)			  \
+	{									  \
+		LIBBPF_OPTS(bpf_object_open_opts, o, .kernel_log_buf = log_buf,		 \
+						     .kernel_log_size = sizeof(log_buf), \
+						     .kernel_log_level = 2);		 \
+		exceptions_ext__destroy(eskel);					  \
+		eskel = exceptions_ext__open_opts(&o);				  \
+		struct bpf_program *prog = NULL;				  \
+		struct bpf_link *link = NULL;					  \
+		if (!ASSERT_OK_PTR(eskel, "exceptions_ext__open"))		  \
+			goto done;						  \
+		(expr);								  \
+		ASSERT_OK_PTR(bpf_program__name(prog), bpf_program__name(prog));  \
+		if (!ASSERT_EQ(exceptions_ext__load(eskel), load_ret,		  \
+			       "exceptions_ext__load"))	{			  \
+			printf("%s\n", log_buf);				  \
+			goto done;						  \
+		}								  \
+		if (load_ret != 0) {						  \
+			if (!ASSERT_OK_PTR(strstr(log_buf, msg), "strstr")) {	  \
+				printf("%s\n", log_buf);			  \
+				goto done;					  \
+			}							  \
+		}								  \
+		if (!load_ret && attach_err) {					  \
+			if (!ASSERT_ERR_PTR(link = bpf_program__attach(prog), "attach err")) \
+				goto done;					  \
+		} else if (!load_ret) {						  \
+			if (!ASSERT_OK_PTR(link = bpf_program__attach(prog), "attach ok"))  \
+				goto done;					  \
+			(void)(after_link);					  \
+			bpf_link__destroy(link);				  \
+		}								  \
+	}
+
+	if (test__start_subtest("non-throwing fentry -> exception_cb"))
+		RUN_EXT(-EINVAL, true, ({
+			prog = eskel->progs.pfentry;
+			bpf_program__set_autoload(prog, true);
+			if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+				       bpf_program__fd(skel->progs.exception_ext_mod_cb_runtime),
+				       "exception_cb_mod"), "set_attach_target"))
+				goto done;
+		}), "FENTRY/FEXIT programs cannot attach to exception callback", 0);
+
+	if (test__start_subtest("throwing fentry -> exception_cb"))
+		RUN_EXT(-EINVAL, true, ({
+			prog = eskel->progs.throwing_fentry;
+			bpf_program__set_autoload(prog, true);
+			if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+				       bpf_program__fd(skel->progs.exception_ext_mod_cb_runtime),
+				       "exception_cb_mod"), "set_attach_target"))
+				goto done;
+		}), "FENTRY/FEXIT programs cannot attach to exception callback", 0);
+
+	if (test__start_subtest("non-throwing fexit -> exception_cb"))
+		RUN_EXT(-EINVAL, true, ({
+			prog = eskel->progs.pfexit;
+			bpf_program__set_autoload(prog, true);
+			if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+				       bpf_program__fd(skel->progs.exception_ext_mod_cb_runtime),
+				       "exception_cb_mod"), "set_attach_target"))
+				goto done;
+		}), "FENTRY/FEXIT programs cannot attach to exception callback", 0);
+
+	if (test__start_subtest("throwing fexit -> exception_cb"))
+		RUN_EXT(-EINVAL, true, ({
+			prog = eskel->progs.throwing_fexit;
+			bpf_program__set_autoload(prog, true);
+			if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+				       bpf_program__fd(skel->progs.exception_ext_mod_cb_runtime),
+				       "exception_cb_mod"), "set_attach_target"))
+				goto done;
+		}), "FENTRY/FEXIT programs cannot attach to exception callback", 0);
+
+	if (test__start_subtest("throwing extension (with custom cb) -> exception_cb"))
+		RUN_EXT(-EINVAL, true, ({
+			prog = eskel->progs.throwing_exception_cb_extension;
+			bpf_program__set_autoload(prog, true);
+			if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+				       bpf_program__fd(skel->progs.exception_ext_mod_cb_runtime),
+				       "exception_cb_mod"), "set_attach_target"))
+				goto done;
+		}), "Extension programs cannot attach to exception callback", 0);
+
+	if (test__start_subtest("throwing extension -> global func in exception_cb"))
+		RUN_EXT(0, false, ({
+			prog = eskel->progs.throwing_exception_cb_extension;
+			bpf_program__set_autoload(prog, true);
+			if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+				       bpf_program__fd(skel->progs.exception_ext_mod_cb_runtime),
+				       "exception_cb_mod_global"), "set_attach_target"))
+				goto done;
+		}), "", ({ RUN_SUCCESS(exception_ext_mod_cb_runtime, 131); }));
+
+	if (test__start_subtest("throwing extension (with custom cb) -> global func in exception_cb"))
+		RUN_EXT(0, false, ({
+			prog = eskel->progs.throwing_extension;
+			bpf_program__set_autoload(prog, true);
+			if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+				       bpf_program__fd(skel->progs.exception_ext),
+				       "exception_ext_global"), "set_attach_target"))
+				goto done;
+		}), "", ({ RUN_SUCCESS(exception_ext, 128); }));
+
+	if (test__start_subtest("non-throwing fentry -> non-throwing subprog"))
+		/* non-throwing fentry -> non-throwing subprog : OK */
+		RUN_EXT(0, false, ({
+			prog = eskel->progs.pfentry;
+			bpf_program__set_autoload(prog, true);
+			if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+				       bpf_program__fd(skel->progs.exception_throw_subprog),
+				       "subprog"), "set_attach_target"))
+				goto done;
+		}), "", 0);
+
+	if (test__start_subtest("throwing fentry -> non-throwing subprog"))
+		/* throwing fentry -> non-throwing subprog : OK */
+		RUN_EXT(0, false, ({
+			prog = eskel->progs.throwing_fentry;
+			bpf_program__set_autoload(prog, true);
+			if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+				       bpf_program__fd(skel->progs.exception_throw_subprog),
+				       "subprog"), "set_attach_target"))
+				goto done;
+		}), "", 0);
+
+	if (test__start_subtest("non-throwing fentry -> throwing subprog"))
+		/* non-throwing fentry -> throwing subprog : OK */
+		RUN_EXT(0, false, ({
+			prog = eskel->progs.pfentry;
+			bpf_program__set_autoload(prog, true);
+			if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+				       bpf_program__fd(skel->progs.exception_throw_subprog),
+				       "throwing_subprog"), "set_attach_target"))
+				goto done;
+		}), "", 0);
+
+	if (test__start_subtest("throwing fentry -> throwing subprog"))
+		/* throwing fentry -> throwing subprog : OK */
+		RUN_EXT(0, false, ({
+			prog = eskel->progs.throwing_fentry;
+			bpf_program__set_autoload(prog, true);
+			if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+				       bpf_program__fd(skel->progs.exception_throw_subprog),
+				       "throwing_subprog"), "set_attach_target"))
+				goto done;
+		}), "", 0);
+
+	if (test__start_subtest("non-throwing fexit -> non-throwing subprog"))
+		/* non-throwing fexit -> non-throwing subprog : OK */
+		RUN_EXT(0, false, ({
+			prog = eskel->progs.pfexit;
+			bpf_program__set_autoload(prog, true);
+			if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+				       bpf_program__fd(skel->progs.exception_throw_subprog),
+				       "subprog"), "set_attach_target"))
+				goto done;
+		}), "", 0);
+
+	if (test__start_subtest("throwing fexit -> non-throwing subprog"))
+		/* throwing fexit -> non-throwing subprog : OK */
+		RUN_EXT(0, false, ({
+			prog = eskel->progs.throwing_fexit;
+			bpf_program__set_autoload(prog, true);
+			if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+				       bpf_program__fd(skel->progs.exception_throw_subprog),
+				       "subprog"), "set_attach_target"))
+				goto done;
+		}), "", 0);
+
+	if (test__start_subtest("non-throwing fexit -> throwing subprog"))
+		/* non-throwing fexit -> throwing subprog : OK */
+		RUN_EXT(0, false, ({
+			prog = eskel->progs.pfexit;
+			bpf_program__set_autoload(prog, true);
+			if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+				       bpf_program__fd(skel->progs.exception_throw_subprog),
+				       "throwing_subprog"), "set_attach_target"))
+				goto done;
+		}), "", 0);
+
+	if (test__start_subtest("throwing fexit -> throwing subprog"))
+		/* throwing fexit -> throwing subprog : OK */
+		RUN_EXT(0, false, ({
+			prog = eskel->progs.throwing_fexit;
+			bpf_program__set_autoload(prog, true);
+			if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+				       bpf_program__fd(skel->progs.exception_throw_subprog),
+				       "throwing_subprog"), "set_attach_target"))
+				goto done;
+		}), "", 0);
+
+	/* fmod_ret not allowed for subprog - Check so we remember to handle its
+	 * throwing specification compatibility with target when supported.
+	 */
+	if (test__start_subtest("non-throwing fmod_ret -> non-throwing subprog"))
+		RUN_EXT(-EINVAL, true, ({
+			prog = eskel->progs.pfmod_ret;
+			bpf_program__set_autoload(prog, true);
+			if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+				       bpf_program__fd(skel->progs.exception_throw_subprog),
+				       "subprog"), "set_attach_target"))
+				goto done;
+		}), "can't modify return codes of BPF program", 0);
+
+	/* fmod_ret not allowed for subprog - Check so we remember to handle its
+	 * throwing specification compatibility with target when supported.
+	 */
+	if (test__start_subtest("non-throwing fmod_ret -> non-throwing global subprog"))
+		RUN_EXT(-EINVAL, true, ({
+			prog = eskel->progs.pfmod_ret;
+			bpf_program__set_autoload(prog, true);
+			if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+				       bpf_program__fd(skel->progs.exception_throw_subprog),
+				       "global_subprog"), "set_attach_target"))
+				goto done;
+		}), "can't modify return codes of BPF program", 0);
+
+	if (test__start_subtest("non-throwing extension -> non-throwing subprog"))
+		/* non-throwing extension -> non-throwing subprog : BAD (!global) */
+		RUN_EXT(-EINVAL, true, ({
+			prog = eskel->progs.extension;
+			bpf_program__set_autoload(prog, true);
+			if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+				       bpf_program__fd(skel->progs.exception_throw_subprog),
+				       "subprog"), "set_attach_target"))
+				goto done;
+		}), "subprog() is not a global function", 0);
+
+	if (test__start_subtest("non-throwing extension -> throwing subprog"))
+		/* non-throwing extension -> throwing subprog : BAD (!global) */
+		RUN_EXT(-EINVAL, true, ({
+			prog = eskel->progs.extension;
+			bpf_program__set_autoload(prog, true);
+			if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+				       bpf_program__fd(skel->progs.exception_throw_subprog),
+				       "throwing_subprog"), "set_attach_target"))
+				goto done;
+		}), "throwing_subprog() is not a global function", 0);
+
+	if (test__start_subtest("non-throwing extension -> non-throwing subprog"))
+		/* non-throwing extension -> non-throwing global subprog : OK */
+		RUN_EXT(0, false, ({
+			prog = eskel->progs.extension;
+			bpf_program__set_autoload(prog, true);
+			if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+				       bpf_program__fd(skel->progs.exception_throw_subprog),
+				       "global_subprog"), "set_attach_target"))
+				goto done;
+		}), "", 0);
+
+	if (test__start_subtest("non-throwing extension -> throwing global subprog"))
+		/* non-throwing extension -> throwing global subprog : OK */
+		RUN_EXT(0, false, ({
+			prog = eskel->progs.extension;
+			bpf_program__set_autoload(prog, true);
+			if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+				       bpf_program__fd(skel->progs.exception_throw_subprog),
+				       "throwing_global_subprog"), "set_attach_target"))
+				goto done;
+		}), "", 0);
+
+	if (test__start_subtest("throwing extension -> throwing global subprog"))
+		/* throwing extension -> throwing global subprog : OK */
+		RUN_EXT(0, false, ({
+			prog = eskel->progs.throwing_extension;
+			bpf_program__set_autoload(prog, true);
+			if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+				       bpf_program__fd(skel->progs.exception_throw_subprog),
+				       "throwing_global_subprog"), "set_attach_target"))
+				goto done;
+		}), "", 0);
+
+	if (test__start_subtest("throwing extension -> non-throwing global subprog"))
+		/* throwing extension -> non-throwing global subprog : OK */
+		RUN_EXT(0, false, ({
+			prog = eskel->progs.throwing_extension;
+			bpf_program__set_autoload(prog, true);
+			if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+				       bpf_program__fd(skel->progs.exception_throw_subprog),
+				       "global_subprog"), "set_attach_target"))
+				goto done;
+		}), "", 0);
+
+	if (test__start_subtest("non-throwing extension -> main subprog"))
+		/* non-throwing extension -> main subprog : OK */
+		RUN_EXT(0, false, ({
+			prog = eskel->progs.extension;
+			bpf_program__set_autoload(prog, true);
+			if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+				       bpf_program__fd(skel->progs.exception_throw_subprog),
+				       "exception_throw_subprog"), "set_attach_target"))
+				goto done;
+		}), "", 0);
+
+	if (test__start_subtest("throwing extension -> main subprog"))
+		/* throwing extension -> main subprog : OK */
+		RUN_EXT(0, false, ({
+			prog = eskel->progs.throwing_extension;
+			bpf_program__set_autoload(prog, true);
+			if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+				       bpf_program__fd(skel->progs.exception_throw_subprog),
+				       "exception_throw_subprog"), "set_attach_target"))
+				goto done;
+		}), "", 0);
+
+done:
+	exceptions_ext__destroy(eskel);
+	exceptions__destroy(skel);
+}
+
+static void test_exceptions_assertions(void)
+{
+	RUN_TESTS(exceptions_assert);
+}
+
+void test_exceptions(void)
+{
+	test_exceptions_success();
+	test_exceptions_failure();
+	test_exceptions_assertions();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
index 2fd05649bad1..4ad4cd69152e 100644
--- a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
@@ -11,9 +11,13 @@
 
 #define NS_TEST			"fib_lookup_ns"
 #define IPV6_IFACE_ADDR		"face::face"
+#define IPV6_IFACE_ADDR_SEC	"cafe::cafe"
+#define IPV6_ADDR_DST		"face::3"
 #define IPV6_NUD_FAILED_ADDR	"face::1"
 #define IPV6_NUD_STALE_ADDR	"face::2"
 #define IPV4_IFACE_ADDR		"10.0.0.254"
+#define IPV4_IFACE_ADDR_SEC	"10.1.0.254"
+#define IPV4_ADDR_DST		"10.2.0.254"
 #define IPV4_NUD_FAILED_ADDR	"10.0.0.1"
 #define IPV4_NUD_STALE_ADDR	"10.0.0.2"
 #define IPV4_TBID_ADDR		"172.0.0.254"
@@ -31,6 +35,7 @@ struct fib_lookup_test {
 	const char *desc;
 	const char *daddr;
 	int expected_ret;
+	const char *expected_src;
 	int lookup_flags;
 	__u32 tbid;
 	__u8 dmac[6];
@@ -69,6 +74,22 @@ static const struct fib_lookup_test tests[] = {
 	  .daddr = IPV6_TBID_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
 	  .lookup_flags = BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID, .tbid = 100,
 	  .dmac = DMAC_INIT2, },
+	{ .desc = "IPv4 set src addr from netdev",
+	  .daddr = IPV4_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+	  .expected_src = IPV4_IFACE_ADDR,
+	  .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, },
+	{ .desc = "IPv6 set src addr from netdev",
+	  .daddr = IPV6_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+	  .expected_src = IPV6_IFACE_ADDR,
+	  .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, },
+	{ .desc = "IPv4 set prefsrc addr from route",
+	  .daddr = IPV4_ADDR_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+	  .expected_src = IPV4_IFACE_ADDR_SEC,
+	  .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, },
+	{ .desc = "IPv6 set prefsrc addr route",
+	  .daddr = IPV6_ADDR_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+	  .expected_src = IPV6_IFACE_ADDR_SEC,
+	  .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, },
 };
 
 static int ifindex;
@@ -97,6 +118,13 @@ static int setup_netns(void)
 	SYS(fail, "ip neigh add %s dev veth1 nud failed", IPV4_NUD_FAILED_ADDR);
 	SYS(fail, "ip neigh add %s dev veth1 lladdr %s nud stale", IPV4_NUD_STALE_ADDR, DMAC);
 
+	/* Setup for prefsrc IP addr selection */
+	SYS(fail, "ip addr add %s/24 dev veth1", IPV4_IFACE_ADDR_SEC);
+	SYS(fail, "ip route add %s/32 dev veth1 src %s", IPV4_ADDR_DST, IPV4_IFACE_ADDR_SEC);
+
+	SYS(fail, "ip addr add %s/64 dev veth1 nodad", IPV6_IFACE_ADDR_SEC);
+	SYS(fail, "ip route add %s/128 dev veth1 src %s", IPV6_ADDR_DST, IPV6_IFACE_ADDR_SEC);
+
 	/* Setup for tbid lookup tests */
 	SYS(fail, "ip addr add %s/24 dev veth2", IPV4_TBID_ADDR);
 	SYS(fail, "ip route del %s/24 dev veth2", IPV4_TBID_NET);
@@ -133,9 +161,12 @@ static int set_lookup_params(struct bpf_fib_lookup *params, const struct fib_loo
 
 	if (inet_pton(AF_INET6, test->daddr, params->ipv6_dst) == 1) {
 		params->family = AF_INET6;
-		ret = inet_pton(AF_INET6, IPV6_IFACE_ADDR, params->ipv6_src);
-		if (!ASSERT_EQ(ret, 1, "inet_pton(IPV6_IFACE_ADDR)"))
-			return -1;
+		if (!(test->lookup_flags & BPF_FIB_LOOKUP_SRC)) {
+			ret = inet_pton(AF_INET6, IPV6_IFACE_ADDR, params->ipv6_src);
+			if (!ASSERT_EQ(ret, 1, "inet_pton(IPV6_IFACE_ADDR)"))
+				return -1;
+		}
+
 		return 0;
 	}
 
@@ -143,9 +174,12 @@ static int set_lookup_params(struct bpf_fib_lookup *params, const struct fib_loo
 	if (!ASSERT_EQ(ret, 1, "convert IP[46] address"))
 		return -1;
 	params->family = AF_INET;
-	ret = inet_pton(AF_INET, IPV4_IFACE_ADDR, &params->ipv4_src);
-	if (!ASSERT_EQ(ret, 1, "inet_pton(IPV4_IFACE_ADDR)"))
-		return -1;
+
+	if (!(test->lookup_flags & BPF_FIB_LOOKUP_SRC)) {
+		ret = inet_pton(AF_INET, IPV4_IFACE_ADDR, &params->ipv4_src);
+		if (!ASSERT_EQ(ret, 1, "inet_pton(IPV4_IFACE_ADDR)"))
+			return -1;
+	}
 
 	return 0;
 }
@@ -156,6 +190,40 @@ static void mac_str(char *b, const __u8 *mac)
 		mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
 }
 
+static void assert_src_ip(struct bpf_fib_lookup *fib_params, const char *expected_src)
+{
+	int ret;
+	__u32 src6[4];
+	__be32 src4;
+
+	switch (fib_params->family) {
+	case AF_INET6:
+		ret = inet_pton(AF_INET6, expected_src, src6);
+		ASSERT_EQ(ret, 1, "inet_pton(expected_src)");
+
+		ret = memcmp(src6, fib_params->ipv6_src, sizeof(fib_params->ipv6_src));
+		if (!ASSERT_EQ(ret, 0, "fib_lookup ipv6 src")) {
+			char str_src6[64];
+
+			inet_ntop(AF_INET6, fib_params->ipv6_src, str_src6,
+				  sizeof(str_src6));
+			printf("ipv6 expected %s actual %s ", expected_src,
+			       str_src6);
+		}
+
+		break;
+	case AF_INET:
+		ret = inet_pton(AF_INET, expected_src, &src4);
+		ASSERT_EQ(ret, 1, "inet_pton(expected_src)");
+
+		ASSERT_EQ(fib_params->ipv4_src, src4, "fib_lookup ipv4 src");
+
+		break;
+	default:
+		PRINT_FAIL("invalid addr family: %d", fib_params->family);
+	}
+}
+
 void test_fib_lookup(void)
 {
 	struct bpf_fib_lookup *fib_params;
@@ -207,6 +275,9 @@ void test_fib_lookup(void)
 		ASSERT_EQ(skel->bss->fib_lookup_ret, tests[i].expected_ret,
 			  "fib_lookup_ret");
 
+		if (tests[i].expected_src)
+			assert_src_ip(fib_params, tests[i].expected_src);
+
 		ret = memcmp(tests[i].dmac, fib_params->dmac, sizeof(tests[i].dmac));
 		if (!ASSERT_EQ(ret, 0, "dmac not match")) {
 			char expected[18], actual[18];
diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
index 9d768e083714..97142a4db374 100644
--- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
+++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
@@ -308,7 +308,7 @@ void test_fill_link_info(void)
 		return;
 
 	/* load kallsyms to compare the addr */
-	if (!ASSERT_OK(load_kallsyms_refresh(), "load_kallsyms_refresh"))
+	if (!ASSERT_OK(load_kallsyms(), "load_kallsyms"))
 		goto cleanup;
 
 	kprobe_addr = ksym_get_addr(KPROBE_FUNC);
diff --git a/tools/testing/selftests/bpf/prog_tests/iters.c b/tools/testing/selftests/bpf/prog_tests/iters.c
index 10804ae5ae97..b696873c5455 100644
--- a/tools/testing/selftests/bpf/prog_tests/iters.c
+++ b/tools/testing/selftests/bpf/prog_tests/iters.c
@@ -8,6 +8,7 @@
 #include "iters_looping.skel.h"
 #include "iters_num.skel.h"
 #include "iters_testmod_seq.skel.h"
+#include "iters_task_vma.skel.h"
 
 static void subtest_num_iters(void)
 {
@@ -90,6 +91,61 @@ cleanup:
 	iters_testmod_seq__destroy(skel);
 }
 
+static void subtest_task_vma_iters(void)
+{
+	unsigned long start, end, bpf_iter_start, bpf_iter_end;
+	struct iters_task_vma *skel;
+	char rest_of_line[1000];
+	unsigned int seen;
+	FILE *f = NULL;
+	int err;
+
+	skel = iters_task_vma__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+		return;
+
+	skel->bss->target_pid = getpid();
+
+	err = iters_task_vma__attach(skel);
+	if (!ASSERT_OK(err, "skel_attach"))
+		goto cleanup;
+
+	getpgid(skel->bss->target_pid);
+	iters_task_vma__detach(skel);
+
+	if (!ASSERT_GT(skel->bss->vmas_seen, 0, "vmas_seen_gt_zero"))
+		goto cleanup;
+
+	f = fopen("/proc/self/maps", "r");
+	if (!ASSERT_OK_PTR(f, "proc_maps_fopen"))
+		goto cleanup;
+
+	seen = 0;
+	while (fscanf(f, "%lx-%lx %[^\n]\n", &start, &end, rest_of_line) == 3) {
+		/* [vsyscall] vma isn't _really_ part of task->mm vmas.
+		 * /proc/PID/maps returns it when out of vmas - see get_gate_vma
+		 * calls in fs/proc/task_mmu.c
+		 */
+		if (strstr(rest_of_line, "[vsyscall]"))
+			continue;
+
+		bpf_iter_start = skel->bss->vm_ranges[seen].vm_start;
+		bpf_iter_end = skel->bss->vm_ranges[seen].vm_end;
+
+		ASSERT_EQ(bpf_iter_start, start, "vma->vm_start match");
+		ASSERT_EQ(bpf_iter_end, end, "vma->vm_end match");
+		seen++;
+	}
+
+	if (!ASSERT_EQ(skel->bss->vmas_seen, seen, "vmas_seen_eq"))
+		goto cleanup;
+
+cleanup:
+	if (f)
+		fclose(f);
+	iters_task_vma__destroy(skel);
+}
+
 void test_iters(void)
 {
 	RUN_TESTS(iters_state_safety);
@@ -103,4 +159,6 @@ void test_iters(void)
 		subtest_num_iters();
 	if (test__start_subtest("testmod_seq"))
 		subtest_testmod_seq_iters();
+	if (test__start_subtest("task_vma"))
+		subtest_task_vma_iters();
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_testmod_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_testmod_test.c
index 1fbe7e4ac00a..9d03528f05db 100644
--- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_testmod_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_testmod_test.c
@@ -4,6 +4,8 @@
 #include "trace_helpers.h"
 #include "bpf/libbpf_internal.h"
 
+static struct ksyms *ksyms;
+
 static void kprobe_multi_testmod_check(struct kprobe_multi *skel)
 {
 	ASSERT_EQ(skel->bss->kprobe_testmod_test1_result, 1, "kprobe_test1_result");
@@ -50,12 +52,12 @@ static void test_testmod_attach_api_addrs(void)
 	LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
 	unsigned long long addrs[3];
 
-	addrs[0] = ksym_get_addr("bpf_testmod_fentry_test1");
-	ASSERT_NEQ(addrs[0], 0, "ksym_get_addr");
-	addrs[1] = ksym_get_addr("bpf_testmod_fentry_test2");
-	ASSERT_NEQ(addrs[1], 0, "ksym_get_addr");
-	addrs[2] = ksym_get_addr("bpf_testmod_fentry_test3");
-	ASSERT_NEQ(addrs[2], 0, "ksym_get_addr");
+	addrs[0] = ksym_get_addr_local(ksyms, "bpf_testmod_fentry_test1");
+	ASSERT_NEQ(addrs[0], 0, "ksym_get_addr_local");
+	addrs[1] = ksym_get_addr_local(ksyms, "bpf_testmod_fentry_test2");
+	ASSERT_NEQ(addrs[1], 0, "ksym_get_addr_local");
+	addrs[2] = ksym_get_addr_local(ksyms, "bpf_testmod_fentry_test3");
+	ASSERT_NEQ(addrs[2], 0, "ksym_get_addr_local");
 
 	opts.addrs = (const unsigned long *) addrs;
 	opts.cnt = ARRAY_SIZE(addrs);
@@ -79,11 +81,15 @@ static void test_testmod_attach_api_syms(void)
 
 void serial_test_kprobe_multi_testmod_test(void)
 {
-	if (!ASSERT_OK(load_kallsyms_refresh(), "load_kallsyms_refresh"))
+	ksyms = load_kallsyms_local();
+	if (!ASSERT_OK_PTR(ksyms, "load_kallsyms_local"))
 		return;
 
 	if (test__start_subtest("testmod_attach_api_syms"))
 		test_testmod_attach_api_syms();
+
 	if (test__start_subtest("testmod_attach_api_addrs"))
 		test_testmod_attach_api_addrs();
+
+	free_kallsyms_local(ksyms);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c
index efb8bd43653c..c440ea3311ed 100644
--- a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c
+++ b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c
@@ -142,10 +142,14 @@ static void test_libbpf_bpf_map_type_str(void)
 		/* Special case for map_type_name BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED
 		 * where it and BPF_MAP_TYPE_CGROUP_STORAGE have the same enum value
 		 * (map_type). For this enum value, libbpf_bpf_map_type_str() picks
-		 * BPF_MAP_TYPE_CGROUP_STORAGE.
+		 * BPF_MAP_TYPE_CGROUP_STORAGE. The same for
+		 * BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED and
+		 * BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE.
 		 */
 		if (strcmp(map_type_name, "BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED") == 0)
 			continue;
+		if (strcmp(map_type_name, "BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED") == 0)
+			continue;
 
 		ASSERT_STREQ(buf, map_type_name, "exp_str_value");
 	}
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c
index 18cf7b17463d..69dc31383b78 100644
--- a/tools/testing/selftests/bpf/prog_tests/linked_list.c
+++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c
@@ -65,8 +65,8 @@ static struct {
 	{ "map_compat_raw_tp", "tracing progs cannot use bpf_{list_head,rb_root} yet" },
 	{ "map_compat_raw_tp_w", "tracing progs cannot use bpf_{list_head,rb_root} yet" },
 	{ "obj_type_id_oor", "local type ID argument must be in range [0, U32_MAX]" },
-	{ "obj_new_no_composite", "bpf_obj_new type ID argument must be of a struct" },
-	{ "obj_new_no_struct", "bpf_obj_new type ID argument must be of a struct" },
+	{ "obj_new_no_composite", "bpf_obj_new/bpf_percpu_obj_new type ID argument must be of a struct" },
+	{ "obj_new_no_struct", "bpf_obj_new/bpf_percpu_obj_new type ID argument must be of a struct" },
 	{ "obj_drop_non_zero_off", "R1 must have zero offset when passed to release func" },
 	{ "new_null_ret", "R0 invalid mem access 'ptr_or_null_'" },
 	{ "obj_new_acq", "Unreleased reference id=" },
@@ -268,7 +268,7 @@ end:
 
 static void list_and_rb_node_same_struct(bool refcount_field)
 {
-	int bpf_rb_node_btf_id, bpf_refcount_btf_id, foo_btf_id;
+	int bpf_rb_node_btf_id, bpf_refcount_btf_id = 0, foo_btf_id;
 	struct btf *btf;
 	int id, err;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h
index 61333f2a03f9..e9190574e79f 100644
--- a/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h
+++ b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h
@@ -49,7 +49,8 @@ static int open_tuntap(const char *dev_name, bool need_mac)
 		return -1;
 
 	ifr.ifr_flags = IFF_NO_PI | (need_mac ? IFF_TAP : IFF_TUN);
-	memcpy(ifr.ifr_name, dev_name, IFNAMSIZ);
+	strncpy(ifr.ifr_name, dev_name, IFNAMSIZ - 1);
+	ifr.ifr_name[IFNAMSIZ - 1] = '\0';
 
 	err = ioctl(fd, TUNSETIFF, &ifr);
 	if (!ASSERT_OK(err, "ioctl(TUNSETIFF)")) {
diff --git a/tools/testing/selftests/bpf/prog_tests/missed.c b/tools/testing/selftests/bpf/prog_tests/missed.c
new file mode 100644
index 000000000000..70d90c43537c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/missed.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "missed_kprobe.skel.h"
+#include "missed_kprobe_recursion.skel.h"
+#include "missed_tp_recursion.skel.h"
+
+/*
+ * Putting kprobe on bpf_fentry_test1 that calls bpf_kfunc_common_test
+ * kfunc, which has also kprobe on. The latter won't get triggered due
+ * to kprobe recursion check and kprobe missed counter is incremented.
+ */
+static void test_missed_perf_kprobe(void)
+{
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
+	struct bpf_link_info info = {};
+	struct missed_kprobe *skel;
+	__u32 len = sizeof(info);
+	int err, prog_fd;
+
+	skel = missed_kprobe__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "missed_kprobe__open_and_load"))
+		goto cleanup;
+
+	err = missed_kprobe__attach(skel);
+	if (!ASSERT_OK(err, "missed_kprobe__attach"))
+		goto cleanup;
+
+	prog_fd = bpf_program__fd(skel->progs.trigger);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "test_run");
+	ASSERT_EQ(topts.retval, 0, "test_run");
+
+	err = bpf_link_get_info_by_fd(bpf_link__fd(skel->links.test2), &info, &len);
+	if (!ASSERT_OK(err, "bpf_link_get_info_by_fd"))
+		goto cleanup;
+
+	ASSERT_EQ(info.type, BPF_LINK_TYPE_PERF_EVENT, "info.type");
+	ASSERT_EQ(info.perf_event.type, BPF_PERF_EVENT_KPROBE, "info.perf_event.type");
+	ASSERT_EQ(info.perf_event.kprobe.missed, 1, "info.perf_event.kprobe.missed");
+
+cleanup:
+	missed_kprobe__destroy(skel);
+}
+
+static __u64 get_missed_count(int fd)
+{
+	struct bpf_prog_info info = {};
+	__u32 len = sizeof(info);
+	int err;
+
+	err = bpf_prog_get_info_by_fd(fd, &info, &len);
+	if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd"))
+		return (__u64) -1;
+	return info.recursion_misses;
+}
+
+/*
+ * Putting kprobe.multi on bpf_fentry_test1 that calls bpf_kfunc_common_test
+ * kfunc which has 3 perf event kprobes and 1 kprobe.multi attached.
+ *
+ * Because fprobe (kprobe.multi attach layear) does not have strict recursion
+ * check the kprobe's bpf_prog_active check is hit for test2-5.
+ */
+static void test_missed_kprobe_recursion(void)
+{
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
+	struct missed_kprobe_recursion *skel;
+	int err, prog_fd;
+
+	skel = missed_kprobe_recursion__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "missed_kprobe_recursion__open_and_load"))
+		goto cleanup;
+
+	err = missed_kprobe_recursion__attach(skel);
+	if (!ASSERT_OK(err, "missed_kprobe_recursion__attach"))
+		goto cleanup;
+
+	prog_fd = bpf_program__fd(skel->progs.trigger);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "test_run");
+	ASSERT_EQ(topts.retval, 0, "test_run");
+
+	ASSERT_EQ(get_missed_count(bpf_program__fd(skel->progs.test1)), 0, "test1_recursion_misses");
+	ASSERT_GE(get_missed_count(bpf_program__fd(skel->progs.test2)), 1, "test2_recursion_misses");
+	ASSERT_GE(get_missed_count(bpf_program__fd(skel->progs.test3)), 1, "test3_recursion_misses");
+	ASSERT_GE(get_missed_count(bpf_program__fd(skel->progs.test4)), 1, "test4_recursion_misses");
+	ASSERT_GE(get_missed_count(bpf_program__fd(skel->progs.test5)), 1, "test5_recursion_misses");
+
+cleanup:
+	missed_kprobe_recursion__destroy(skel);
+}
+
+/*
+ * Putting kprobe on bpf_fentry_test1 that calls bpf_printk and invokes
+ * bpf_trace_printk tracepoint. The bpf_trace_printk tracepoint has test[234]
+ * programs attached to it.
+ *
+ * Because kprobe execution goes through bpf_prog_active check, programs
+ * attached to the tracepoint will fail the recursion check and increment
+ * the recursion_misses stats.
+ */
+static void test_missed_tp_recursion(void)
+{
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
+	struct missed_tp_recursion *skel;
+	int err, prog_fd;
+
+	skel = missed_tp_recursion__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "missed_tp_recursion__open_and_load"))
+		goto cleanup;
+
+	err = missed_tp_recursion__attach(skel);
+	if (!ASSERT_OK(err, "missed_tp_recursion__attach"))
+		goto cleanup;
+
+	prog_fd = bpf_program__fd(skel->progs.trigger);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "test_run");
+	ASSERT_EQ(topts.retval, 0, "test_run");
+
+	ASSERT_EQ(get_missed_count(bpf_program__fd(skel->progs.test1)), 0, "test1_recursion_misses");
+	ASSERT_EQ(get_missed_count(bpf_program__fd(skel->progs.test2)), 1, "test2_recursion_misses");
+	ASSERT_EQ(get_missed_count(bpf_program__fd(skel->progs.test3)), 1, "test3_recursion_misses");
+	ASSERT_EQ(get_missed_count(bpf_program__fd(skel->progs.test4)), 1, "test4_recursion_misses");
+
+cleanup:
+	missed_tp_recursion__destroy(skel);
+}
+
+void test_missed(void)
+{
+	if (test__start_subtest("perf_kprobe"))
+		test_missed_perf_kprobe();
+	if (test__start_subtest("kprobe_recursion"))
+		test_missed_kprobe_recursion();
+	if (test__start_subtest("tp_recursion"))
+		test_missed_tp_recursion();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/module_fentry_shadow.c b/tools/testing/selftests/bpf/prog_tests/module_fentry_shadow.c
index c7636e18b1eb..aa9f67eb1c95 100644
--- a/tools/testing/selftests/bpf/prog_tests/module_fentry_shadow.c
+++ b/tools/testing/selftests/bpf/prog_tests/module_fentry_shadow.c
@@ -61,6 +61,11 @@ void test_module_fentry_shadow(void)
 	int link_fd[2] = {};
 	__s32 btf_id[2] = {};
 
+	if (!env.has_testmod) {
+		test__skip();
+		return;
+	}
+
 	LIBBPF_OPTS(bpf_prog_load_opts, load_opts,
 		.expected_attach_type = BPF_TRACE_FENTRY,
 	);
diff --git a/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c b/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c
new file mode 100644
index 000000000000..343da65864d6
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "percpu_alloc_array.skel.h"
+#include "percpu_alloc_cgrp_local_storage.skel.h"
+#include "percpu_alloc_fail.skel.h"
+
+static void test_array(void)
+{
+	struct percpu_alloc_array *skel;
+	int err, prog_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+	skel = percpu_alloc_array__open();
+	if (!ASSERT_OK_PTR(skel, "percpu_alloc_array__open"))
+		return;
+
+	bpf_program__set_autoload(skel->progs.test_array_map_1, true);
+	bpf_program__set_autoload(skel->progs.test_array_map_2, true);
+	bpf_program__set_autoload(skel->progs.test_array_map_3, true);
+	bpf_program__set_autoload(skel->progs.test_array_map_4, true);
+
+	skel->bss->my_pid = getpid();
+	skel->rodata->nr_cpus = libbpf_num_possible_cpus();
+
+	err = percpu_alloc_array__load(skel);
+	if (!ASSERT_OK(err, "percpu_alloc_array__load"))
+		goto out;
+
+	err = percpu_alloc_array__attach(skel);
+	if (!ASSERT_OK(err, "percpu_alloc_array__attach"))
+		goto out;
+
+	prog_fd = bpf_program__fd(skel->progs.test_array_map_1);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "test_run array_map 1-4");
+	ASSERT_EQ(topts.retval, 0, "test_run array_map 1-4");
+	ASSERT_EQ(skel->bss->cpu0_field_d, 2, "cpu0_field_d");
+	ASSERT_EQ(skel->bss->sum_field_c, 1, "sum_field_c");
+out:
+	percpu_alloc_array__destroy(skel);
+}
+
+static void test_array_sleepable(void)
+{
+	struct percpu_alloc_array *skel;
+	int err, prog_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+	skel = percpu_alloc_array__open();
+	if (!ASSERT_OK_PTR(skel, "percpu_alloc__open"))
+		return;
+
+	bpf_program__set_autoload(skel->progs.test_array_map_10, true);
+
+	skel->bss->my_pid = getpid();
+	skel->rodata->nr_cpus = libbpf_num_possible_cpus();
+
+	err = percpu_alloc_array__load(skel);
+	if (!ASSERT_OK(err, "percpu_alloc_array__load"))
+		goto out;
+
+	err = percpu_alloc_array__attach(skel);
+	if (!ASSERT_OK(err, "percpu_alloc_array__attach"))
+		goto out;
+
+	prog_fd = bpf_program__fd(skel->progs.test_array_map_10);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "test_run array_map_10");
+	ASSERT_EQ(topts.retval, 0, "test_run array_map_10");
+	ASSERT_EQ(skel->bss->cpu0_field_d, 2, "cpu0_field_d");
+	ASSERT_EQ(skel->bss->sum_field_c, 1, "sum_field_c");
+out:
+	percpu_alloc_array__destroy(skel);
+}
+
+static void test_cgrp_local_storage(void)
+{
+	struct percpu_alloc_cgrp_local_storage *skel;
+	int err, cgroup_fd, prog_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+	cgroup_fd = test__join_cgroup("/percpu_alloc");
+	if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /percpu_alloc"))
+		return;
+
+	skel = percpu_alloc_cgrp_local_storage__open();
+	if (!ASSERT_OK_PTR(skel, "percpu_alloc_cgrp_local_storage__open"))
+		goto close_fd;
+
+	skel->bss->my_pid = getpid();
+	skel->rodata->nr_cpus = libbpf_num_possible_cpus();
+
+	err = percpu_alloc_cgrp_local_storage__load(skel);
+	if (!ASSERT_OK(err, "percpu_alloc_cgrp_local_storage__load"))
+		goto destroy_skel;
+
+	err = percpu_alloc_cgrp_local_storage__attach(skel);
+	if (!ASSERT_OK(err, "percpu_alloc_cgrp_local_storage__attach"))
+		goto destroy_skel;
+
+	prog_fd = bpf_program__fd(skel->progs.test_cgrp_local_storage_1);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "test_run cgrp_local_storage 1-3");
+	ASSERT_EQ(topts.retval, 0, "test_run cgrp_local_storage 1-3");
+	ASSERT_EQ(skel->bss->cpu0_field_d, 2, "cpu0_field_d");
+	ASSERT_EQ(skel->bss->sum_field_c, 1, "sum_field_c");
+
+destroy_skel:
+	percpu_alloc_cgrp_local_storage__destroy(skel);
+close_fd:
+	close(cgroup_fd);
+}
+
+static void test_failure(void) {
+	RUN_TESTS(percpu_alloc_fail);
+}
+
+void test_percpu_alloc(void)
+{
+	if (test__start_subtest("array"))
+		test_array();
+	if (test__start_subtest("array_sleepable"))
+		test_array_sleepable();
+	if (test__start_subtest("cgrp_local_storage"))
+		test_cgrp_local_storage();
+	if (test__start_subtest("failure_tests"))
+		test_failure();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/preempted_bpf_ma_op.c b/tools/testing/selftests/bpf/prog_tests/preempted_bpf_ma_op.c
new file mode 100644
index 000000000000..3a2ec3923fca
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/preempted_bpf_ma_op.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <test_progs.h>
+
+#include "preempted_bpf_ma_op.skel.h"
+
+#define ALLOC_THREAD_NR 4
+#define ALLOC_LOOP_NR 512
+
+struct alloc_ctx {
+	/* output */
+	int run_err;
+	/* input */
+	int fd;
+	bool *nomem_err;
+};
+
+static void *run_alloc_prog(void *data)
+{
+	struct alloc_ctx *ctx = data;
+	cpu_set_t cpu_set;
+	int i;
+
+	CPU_ZERO(&cpu_set);
+	CPU_SET(0, &cpu_set);
+	pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
+
+	for (i = 0; i < ALLOC_LOOP_NR && !*ctx->nomem_err; i++) {
+		LIBBPF_OPTS(bpf_test_run_opts, topts);
+		int err;
+
+		err = bpf_prog_test_run_opts(ctx->fd, &topts);
+		ctx->run_err |= err | topts.retval;
+	}
+
+	return NULL;
+}
+
+void test_preempted_bpf_ma_op(void)
+{
+	struct alloc_ctx ctx[ALLOC_THREAD_NR];
+	struct preempted_bpf_ma_op *skel;
+	pthread_t tid[ALLOC_THREAD_NR];
+	int i, err;
+
+	skel = preempted_bpf_ma_op__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open_and_load"))
+		return;
+
+	err = preempted_bpf_ma_op__attach(skel);
+	if (!ASSERT_OK(err, "attach"))
+		goto out;
+
+	for (i = 0; i < ARRAY_SIZE(ctx); i++) {
+		struct bpf_program *prog;
+		char name[8];
+
+		snprintf(name, sizeof(name), "test%d", i);
+		prog = bpf_object__find_program_by_name(skel->obj, name);
+		if (!ASSERT_OK_PTR(prog, "no test prog"))
+			goto out;
+
+		ctx[i].run_err = 0;
+		ctx[i].fd = bpf_program__fd(prog);
+		ctx[i].nomem_err = &skel->bss->nomem_err;
+	}
+
+	memset(tid, 0, sizeof(tid));
+	for (i = 0; i < ARRAY_SIZE(tid); i++) {
+		err = pthread_create(&tid[i], NULL, run_alloc_prog, &ctx[i]);
+		if (!ASSERT_OK(err, "pthread_create"))
+			break;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(tid); i++) {
+		if (!tid[i])
+			break;
+		pthread_join(tid[i], NULL);
+		ASSERT_EQ(ctx[i].run_err, 0, "run prog err");
+	}
+
+	ASSERT_FALSE(skel->bss->nomem_err, "ENOMEM");
+out:
+	preempted_bpf_ma_op__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
index 722c5f2a7776..a043af9cd6d9 100644
--- a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
@@ -14,7 +14,7 @@ static void test_queue_stack_map_by_type(int type)
 	int i, err, prog_fd, map_in_fd, map_out_fd;
 	char file[32], buf[128];
 	struct bpf_object *obj;
-	struct iphdr iph;
+	struct iphdr iph = {};
 	LIBBPF_OPTS(bpf_test_run_opts, topts,
 		.data_in = &pkt_v4,
 		.data_size_in = sizeof(pkt_v4),
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
index ac104dc652e3..48c5695b7abf 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -91,6 +91,9 @@ static void ringbuf_subtest(void)
 	int err, cnt, rb_fd;
 	int page_size = getpagesize();
 	void *mmap_ptr, *tmp_ptr;
+	struct ring *ring;
+	int map_fd;
+	unsigned long avail_data, ring_size, cons_pos, prod_pos;
 
 	skel = test_ringbuf_lskel__open();
 	if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
@@ -162,6 +165,13 @@ static void ringbuf_subtest(void)
 
 	trigger_samples();
 
+	ring = ring_buffer__ring(ringbuf, 0);
+	if (!ASSERT_OK_PTR(ring, "ring_buffer__ring_idx_0"))
+		goto cleanup;
+
+	map_fd = ring__map_fd(ring);
+	ASSERT_EQ(map_fd, skel->maps.ringbuf.map_fd, "ring_map_fd");
+
 	/* 2 submitted + 1 discarded records */
 	CHECK(skel->bss->avail_data != 3 * rec_sz,
 	      "err_avail_size", "exp %ld, got %ld\n",
@@ -176,6 +186,18 @@ static void ringbuf_subtest(void)
 	      "err_prod_pos", "exp %ld, got %ld\n",
 	      3L * rec_sz, skel->bss->prod_pos);
 
+	/* verify getting this data directly via the ring object yields the same
+	 * results
+	 */
+	avail_data = ring__avail_data_size(ring);
+	ASSERT_EQ(avail_data, 3 * rec_sz, "ring_avail_size");
+	ring_size = ring__size(ring);
+	ASSERT_EQ(ring_size, page_size, "ring_ring_size");
+	cons_pos = ring__consumer_pos(ring);
+	ASSERT_EQ(cons_pos, 0, "ring_cons_pos");
+	prod_pos = ring__producer_pos(ring);
+	ASSERT_EQ(prod_pos, 3 * rec_sz, "ring_prod_pos");
+
 	/* poll for samples */
 	err = ring_buffer__poll(ringbuf, -1);
 
@@ -282,6 +304,10 @@ static void ringbuf_subtest(void)
 	err = ring_buffer__consume(ringbuf);
 	CHECK(err < 0, "rb_consume", "failed: %d\b", err);
 
+	/* also consume using ring__consume to make sure it works the same */
+	err = ring__consume(ring);
+	ASSERT_GE(err, 0, "ring_consume");
+
 	/* 3 rounds, 2 samples each */
 	cnt = atomic_xchg(&sample_cnt, 0);
 	CHECK(cnt != 6, "cnt", "exp %d samples, got %d\n", 6, cnt);
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
index 1455911d9fcb..58522195081b 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
@@ -42,6 +42,8 @@ void test_ringbuf_multi(void)
 {
 	struct test_ringbuf_multi *skel;
 	struct ring_buffer *ringbuf = NULL;
+	struct ring *ring_old;
+	struct ring *ring;
 	int err;
 	int page_size = getpagesize();
 	int proto_fd = -1;
@@ -84,11 +86,24 @@ void test_ringbuf_multi(void)
 	if (CHECK(!ringbuf, "ringbuf_create", "failed to create ringbuf\n"))
 		goto cleanup;
 
+	/* verify ring_buffer__ring returns expected results */
+	ring = ring_buffer__ring(ringbuf, 0);
+	if (!ASSERT_OK_PTR(ring, "ring_buffer__ring_idx_0"))
+		goto cleanup;
+	ring_old = ring;
+	ring = ring_buffer__ring(ringbuf, 1);
+	ASSERT_ERR_PTR(ring, "ring_buffer__ring_idx_1");
+
 	err = ring_buffer__add(ringbuf, bpf_map__fd(skel->maps.ringbuf2),
 			      process_sample, (void *)(long)2);
 	if (CHECK(err, "ringbuf_add", "failed to add another ring\n"))
 		goto cleanup;
 
+	/* verify adding a new ring didn't invalidate our older pointer */
+	ring = ring_buffer__ring(ringbuf, 0);
+	if (!ASSERT_EQ(ring, ring_old, "ring_buffer__ring_again"))
+		goto cleanup;
+
 	err = test_ringbuf_multi__attach(skel);
 	if (CHECK(err, "skel_attach", "skeleton attachment failed: %d\n", err))
 		goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/section_names.c b/tools/testing/selftests/bpf/prog_tests/section_names.c
index 8b571890c57e..c3d78846f31a 100644
--- a/tools/testing/selftests/bpf/prog_tests/section_names.c
+++ b/tools/testing/selftests/bpf/prog_tests/section_names.c
@@ -124,6 +124,11 @@ static struct sec_name_test tests[] = {
 		{0, BPF_CGROUP_INET6_CONNECT},
 	},
 	{
+		"cgroup/connect_unix",
+		{0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_CONNECT},
+		{0, BPF_CGROUP_UNIX_CONNECT},
+	},
+	{
 		"cgroup/sendmsg4",
 		{0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG},
 		{0, BPF_CGROUP_UDP4_SENDMSG},
@@ -134,6 +139,11 @@ static struct sec_name_test tests[] = {
 		{0, BPF_CGROUP_UDP6_SENDMSG},
 	},
 	{
+		"cgroup/sendmsg_unix",
+		{0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_SENDMSG},
+		{0, BPF_CGROUP_UNIX_SENDMSG},
+	},
+	{
 		"cgroup/recvmsg4",
 		{0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG},
 		{0, BPF_CGROUP_UDP4_RECVMSG},
@@ -144,6 +154,11 @@ static struct sec_name_test tests[] = {
 		{0, BPF_CGROUP_UDP6_RECVMSG},
 	},
 	{
+		"cgroup/recvmsg_unix",
+		{0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_RECVMSG},
+		{0, BPF_CGROUP_UNIX_RECVMSG},
+	},
+	{
 		"cgroup/sysctl",
 		{0, BPF_PROG_TYPE_CGROUP_SYSCTL, BPF_CGROUP_SYSCTL},
 		{0, BPF_CGROUP_SYSCTL},
@@ -158,6 +173,36 @@ static struct sec_name_test tests[] = {
 		{0, BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT},
 		{0, BPF_CGROUP_SETSOCKOPT},
 	},
+	{
+		"cgroup/getpeername4",
+		{0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME},
+		{0, BPF_CGROUP_INET4_GETPEERNAME},
+	},
+	{
+		"cgroup/getpeername6",
+		{0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME},
+		{0, BPF_CGROUP_INET6_GETPEERNAME},
+	},
+	{
+		"cgroup/getpeername_unix",
+		{0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETPEERNAME},
+		{0, BPF_CGROUP_UNIX_GETPEERNAME},
+	},
+	{
+		"cgroup/getsockname4",
+		{0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME},
+		{0, BPF_CGROUP_INET4_GETSOCKNAME},
+	},
+	{
+		"cgroup/getsockname6",
+		{0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME},
+		{0, BPF_CGROUP_INET6_GETSOCKNAME},
+	},
+	{
+		"cgroup/getsockname_unix",
+		{0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETSOCKNAME},
+		{0, BPF_CGROUP_UNIX_GETSOCKNAME},
+	},
 };
 
 static void test_prog_type_by_name(const struct sec_name_test *test)
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_addr.c b/tools/testing/selftests/bpf/prog_tests/sock_addr.c
new file mode 100644
index 000000000000..5fd617718991
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sock_addr.c
@@ -0,0 +1,612 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/un.h>
+
+#include "test_progs.h"
+
+#include "connect_unix_prog.skel.h"
+#include "sendmsg_unix_prog.skel.h"
+#include "recvmsg_unix_prog.skel.h"
+#include "getsockname_unix_prog.skel.h"
+#include "getpeername_unix_prog.skel.h"
+#include "network_helpers.h"
+
+#define SERVUN_ADDRESS         "bpf_cgroup_unix_test"
+#define SERVUN_REWRITE_ADDRESS "bpf_cgroup_unix_test_rewrite"
+#define SRCUN_ADDRESS	       "bpf_cgroup_unix_test_src"
+
+enum sock_addr_test_type {
+	SOCK_ADDR_TEST_BIND,
+	SOCK_ADDR_TEST_CONNECT,
+	SOCK_ADDR_TEST_SENDMSG,
+	SOCK_ADDR_TEST_RECVMSG,
+	SOCK_ADDR_TEST_GETSOCKNAME,
+	SOCK_ADDR_TEST_GETPEERNAME,
+};
+
+typedef void *(*load_fn)(int cgroup_fd);
+typedef void (*destroy_fn)(void *skel);
+
+struct sock_addr_test {
+	enum sock_addr_test_type type;
+	const char *name;
+	/* BPF prog properties */
+	load_fn loadfn;
+	destroy_fn destroyfn;
+	/* Socket properties */
+	int socket_family;
+	int socket_type;
+	/* IP:port pairs for BPF prog to override */
+	const char *requested_addr;
+	unsigned short requested_port;
+	const char *expected_addr;
+	unsigned short expected_port;
+	const char *expected_src_addr;
+};
+
+static void *connect_unix_prog_load(int cgroup_fd)
+{
+	struct connect_unix_prog *skel;
+
+	skel = connect_unix_prog__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		goto cleanup;
+
+	skel->links.connect_unix_prog = bpf_program__attach_cgroup(
+		skel->progs.connect_unix_prog, cgroup_fd);
+	if (!ASSERT_OK_PTR(skel->links.connect_unix_prog, "prog_attach"))
+		goto cleanup;
+
+	return skel;
+cleanup:
+	connect_unix_prog__destroy(skel);
+	return NULL;
+}
+
+static void connect_unix_prog_destroy(void *skel)
+{
+	connect_unix_prog__destroy(skel);
+}
+
+static void *sendmsg_unix_prog_load(int cgroup_fd)
+{
+	struct sendmsg_unix_prog *skel;
+
+	skel = sendmsg_unix_prog__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		goto cleanup;
+
+	skel->links.sendmsg_unix_prog = bpf_program__attach_cgroup(
+		skel->progs.sendmsg_unix_prog, cgroup_fd);
+	if (!ASSERT_OK_PTR(skel->links.sendmsg_unix_prog, "prog_attach"))
+		goto cleanup;
+
+	return skel;
+cleanup:
+	sendmsg_unix_prog__destroy(skel);
+	return NULL;
+}
+
+static void sendmsg_unix_prog_destroy(void *skel)
+{
+	sendmsg_unix_prog__destroy(skel);
+}
+
+static void *recvmsg_unix_prog_load(int cgroup_fd)
+{
+	struct recvmsg_unix_prog *skel;
+
+	skel = recvmsg_unix_prog__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		goto cleanup;
+
+	skel->links.recvmsg_unix_prog = bpf_program__attach_cgroup(
+		skel->progs.recvmsg_unix_prog, cgroup_fd);
+	if (!ASSERT_OK_PTR(skel->links.recvmsg_unix_prog, "prog_attach"))
+		goto cleanup;
+
+	return skel;
+cleanup:
+	recvmsg_unix_prog__destroy(skel);
+	return NULL;
+}
+
+static void recvmsg_unix_prog_destroy(void *skel)
+{
+	recvmsg_unix_prog__destroy(skel);
+}
+
+static void *getsockname_unix_prog_load(int cgroup_fd)
+{
+	struct getsockname_unix_prog *skel;
+
+	skel = getsockname_unix_prog__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		goto cleanup;
+
+	skel->links.getsockname_unix_prog = bpf_program__attach_cgroup(
+		skel->progs.getsockname_unix_prog, cgroup_fd);
+	if (!ASSERT_OK_PTR(skel->links.getsockname_unix_prog, "prog_attach"))
+		goto cleanup;
+
+	return skel;
+cleanup:
+	getsockname_unix_prog__destroy(skel);
+	return NULL;
+}
+
+static void getsockname_unix_prog_destroy(void *skel)
+{
+	getsockname_unix_prog__destroy(skel);
+}
+
+static void *getpeername_unix_prog_load(int cgroup_fd)
+{
+	struct getpeername_unix_prog *skel;
+
+	skel = getpeername_unix_prog__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		goto cleanup;
+
+	skel->links.getpeername_unix_prog = bpf_program__attach_cgroup(
+		skel->progs.getpeername_unix_prog, cgroup_fd);
+	if (!ASSERT_OK_PTR(skel->links.getpeername_unix_prog, "prog_attach"))
+		goto cleanup;
+
+	return skel;
+cleanup:
+	getpeername_unix_prog__destroy(skel);
+	return NULL;
+}
+
+static void getpeername_unix_prog_destroy(void *skel)
+{
+	getpeername_unix_prog__destroy(skel);
+}
+
+static struct sock_addr_test tests[] = {
+	{
+		SOCK_ADDR_TEST_CONNECT,
+		"connect_unix",
+		connect_unix_prog_load,
+		connect_unix_prog_destroy,
+		AF_UNIX,
+		SOCK_STREAM,
+		SERVUN_ADDRESS,
+		0,
+		SERVUN_REWRITE_ADDRESS,
+		0,
+		NULL,
+	},
+	{
+		SOCK_ADDR_TEST_SENDMSG,
+		"sendmsg_unix",
+		sendmsg_unix_prog_load,
+		sendmsg_unix_prog_destroy,
+		AF_UNIX,
+		SOCK_DGRAM,
+		SERVUN_ADDRESS,
+		0,
+		SERVUN_REWRITE_ADDRESS,
+		0,
+		NULL,
+	},
+	{
+		SOCK_ADDR_TEST_RECVMSG,
+		"recvmsg_unix-dgram",
+		recvmsg_unix_prog_load,
+		recvmsg_unix_prog_destroy,
+		AF_UNIX,
+		SOCK_DGRAM,
+		SERVUN_REWRITE_ADDRESS,
+		0,
+		SERVUN_REWRITE_ADDRESS,
+		0,
+		SERVUN_ADDRESS,
+	},
+	{
+		SOCK_ADDR_TEST_RECVMSG,
+		"recvmsg_unix-stream",
+		recvmsg_unix_prog_load,
+		recvmsg_unix_prog_destroy,
+		AF_UNIX,
+		SOCK_STREAM,
+		SERVUN_REWRITE_ADDRESS,
+		0,
+		SERVUN_REWRITE_ADDRESS,
+		0,
+		SERVUN_ADDRESS,
+	},
+	{
+		SOCK_ADDR_TEST_GETSOCKNAME,
+		"getsockname_unix",
+		getsockname_unix_prog_load,
+		getsockname_unix_prog_destroy,
+		AF_UNIX,
+		SOCK_STREAM,
+		SERVUN_ADDRESS,
+		0,
+		SERVUN_REWRITE_ADDRESS,
+		0,
+		NULL,
+	},
+	{
+		SOCK_ADDR_TEST_GETPEERNAME,
+		"getpeername_unix",
+		getpeername_unix_prog_load,
+		getpeername_unix_prog_destroy,
+		AF_UNIX,
+		SOCK_STREAM,
+		SERVUN_ADDRESS,
+		0,
+		SERVUN_REWRITE_ADDRESS,
+		0,
+		NULL,
+	},
+};
+
+typedef int (*info_fn)(int, struct sockaddr *, socklen_t *);
+
+static int cmp_addr(const struct sockaddr_storage *addr1, socklen_t addr1_len,
+		    const struct sockaddr_storage *addr2, socklen_t addr2_len,
+		    bool cmp_port)
+{
+	const struct sockaddr_in *four1, *four2;
+	const struct sockaddr_in6 *six1, *six2;
+	const struct sockaddr_un *un1, *un2;
+
+	if (addr1->ss_family != addr2->ss_family)
+		return -1;
+
+	if (addr1_len != addr2_len)
+		return -1;
+
+	if (addr1->ss_family == AF_INET) {
+		four1 = (const struct sockaddr_in *)addr1;
+		four2 = (const struct sockaddr_in *)addr2;
+		return !((four1->sin_port == four2->sin_port || !cmp_port) &&
+			 four1->sin_addr.s_addr == four2->sin_addr.s_addr);
+	} else if (addr1->ss_family == AF_INET6) {
+		six1 = (const struct sockaddr_in6 *)addr1;
+		six2 = (const struct sockaddr_in6 *)addr2;
+		return !((six1->sin6_port == six2->sin6_port || !cmp_port) &&
+			 !memcmp(&six1->sin6_addr, &six2->sin6_addr,
+				 sizeof(struct in6_addr)));
+	} else if (addr1->ss_family == AF_UNIX) {
+		un1 = (const struct sockaddr_un *)addr1;
+		un2 = (const struct sockaddr_un *)addr2;
+		return memcmp(un1, un2, addr1_len);
+	}
+
+	return -1;
+}
+
+static int cmp_sock_addr(info_fn fn, int sock1,
+			 const struct sockaddr_storage *addr2,
+			 socklen_t addr2_len, bool cmp_port)
+{
+	struct sockaddr_storage addr1;
+	socklen_t len1 = sizeof(addr1);
+
+	memset(&addr1, 0, len1);
+	if (fn(sock1, (struct sockaddr *)&addr1, (socklen_t *)&len1) != 0)
+		return -1;
+
+	return cmp_addr(&addr1, len1, addr2, addr2_len, cmp_port);
+}
+
+static int cmp_local_addr(int sock1, const struct sockaddr_storage *addr2,
+			  socklen_t addr2_len, bool cmp_port)
+{
+	return cmp_sock_addr(getsockname, sock1, addr2, addr2_len, cmp_port);
+}
+
+static int cmp_peer_addr(int sock1, const struct sockaddr_storage *addr2,
+			 socklen_t addr2_len, bool cmp_port)
+{
+	return cmp_sock_addr(getpeername, sock1, addr2, addr2_len, cmp_port);
+}
+
+static void test_bind(struct sock_addr_test *test)
+{
+	struct sockaddr_storage expected_addr;
+	socklen_t expected_addr_len = sizeof(struct sockaddr_storage);
+	int serv = -1, client = -1, err;
+
+	serv = start_server(test->socket_family, test->socket_type,
+			    test->requested_addr, test->requested_port, 0);
+	if (!ASSERT_GE(serv, 0, "start_server"))
+		goto cleanup;
+
+	err = make_sockaddr(test->socket_family,
+			    test->expected_addr, test->expected_port,
+			    &expected_addr, &expected_addr_len);
+	if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+		goto cleanup;
+
+	err = cmp_local_addr(serv, &expected_addr, expected_addr_len, true);
+	if (!ASSERT_EQ(err, 0, "cmp_local_addr"))
+		goto cleanup;
+
+	/* Try to connect to server just in case */
+	client = connect_to_addr(&expected_addr, expected_addr_len, test->socket_type);
+	if (!ASSERT_GE(client, 0, "connect_to_addr"))
+		goto cleanup;
+
+cleanup:
+	if (client != -1)
+		close(client);
+	if (serv != -1)
+		close(serv);
+}
+
+static void test_connect(struct sock_addr_test *test)
+{
+	struct sockaddr_storage addr, expected_addr, expected_src_addr;
+	socklen_t addr_len = sizeof(struct sockaddr_storage),
+		  expected_addr_len = sizeof(struct sockaddr_storage),
+		  expected_src_addr_len = sizeof(struct sockaddr_storage);
+	int serv = -1, client = -1, err;
+
+	serv = start_server(test->socket_family, test->socket_type,
+			    test->expected_addr, test->expected_port, 0);
+	if (!ASSERT_GE(serv, 0, "start_server"))
+		goto cleanup;
+
+	err = make_sockaddr(test->socket_family, test->requested_addr, test->requested_port,
+			    &addr, &addr_len);
+	if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+		goto cleanup;
+
+	client = connect_to_addr(&addr, addr_len, test->socket_type);
+	if (!ASSERT_GE(client, 0, "connect_to_addr"))
+		goto cleanup;
+
+	err = make_sockaddr(test->socket_family, test->expected_addr, test->expected_port,
+			    &expected_addr, &expected_addr_len);
+	if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+		goto cleanup;
+
+	if (test->expected_src_addr) {
+		err = make_sockaddr(test->socket_family, test->expected_src_addr, 0,
+				    &expected_src_addr, &expected_src_addr_len);
+		if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+			goto cleanup;
+	}
+
+	err = cmp_peer_addr(client, &expected_addr, expected_addr_len, true);
+	if (!ASSERT_EQ(err, 0, "cmp_peer_addr"))
+		goto cleanup;
+
+	if (test->expected_src_addr) {
+		err = cmp_local_addr(client, &expected_src_addr, expected_src_addr_len, false);
+		if (!ASSERT_EQ(err, 0, "cmp_local_addr"))
+			goto cleanup;
+	}
+cleanup:
+	if (client != -1)
+		close(client);
+	if (serv != -1)
+		close(serv);
+}
+
+static void test_xmsg(struct sock_addr_test *test)
+{
+	struct sockaddr_storage addr, src_addr;
+	socklen_t addr_len = sizeof(struct sockaddr_storage),
+		  src_addr_len = sizeof(struct sockaddr_storage);
+	struct msghdr hdr;
+	struct iovec iov;
+	char data = 'a';
+	int serv = -1, client = -1, err;
+
+	/* Unlike the other tests, here we test that we can rewrite the src addr
+	 * with a recvmsg() hook.
+	 */
+
+	serv = start_server(test->socket_family, test->socket_type,
+			    test->expected_addr, test->expected_port, 0);
+	if (!ASSERT_GE(serv, 0, "start_server"))
+		goto cleanup;
+
+	client = socket(test->socket_family, test->socket_type, 0);
+	if (!ASSERT_GE(client, 0, "socket"))
+		goto cleanup;
+
+	/* AF_UNIX sockets have to be bound to something to trigger the recvmsg bpf program. */
+	if (test->socket_family == AF_UNIX) {
+		err = make_sockaddr(AF_UNIX, SRCUN_ADDRESS, 0, &src_addr, &src_addr_len);
+		if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+			goto cleanup;
+
+		err = bind(client, (const struct sockaddr *) &src_addr, src_addr_len);
+		if (!ASSERT_OK(err, "bind"))
+			goto cleanup;
+	}
+
+	err = make_sockaddr(test->socket_family, test->requested_addr, test->requested_port,
+			    &addr, &addr_len);
+	if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+		goto cleanup;
+
+	if (test->socket_type == SOCK_DGRAM) {
+		memset(&iov, 0, sizeof(iov));
+		iov.iov_base = &data;
+		iov.iov_len = sizeof(data);
+
+		memset(&hdr, 0, sizeof(hdr));
+		hdr.msg_name = (void *)&addr;
+		hdr.msg_namelen = addr_len;
+		hdr.msg_iov = &iov;
+		hdr.msg_iovlen = 1;
+
+		err = sendmsg(client, &hdr, 0);
+		if (!ASSERT_EQ(err, sizeof(data), "sendmsg"))
+			goto cleanup;
+	} else {
+		/* Testing with connection-oriented sockets is only valid for
+		 * recvmsg() tests.
+		 */
+		if (!ASSERT_EQ(test->type, SOCK_ADDR_TEST_RECVMSG, "recvmsg"))
+			goto cleanup;
+
+		err = connect(client, (const struct sockaddr *)&addr, addr_len);
+		if (!ASSERT_OK(err, "connect"))
+			goto cleanup;
+
+		err = send(client, &data, sizeof(data), 0);
+		if (!ASSERT_EQ(err, sizeof(data), "send"))
+			goto cleanup;
+
+		err = listen(serv, 0);
+		if (!ASSERT_OK(err, "listen"))
+			goto cleanup;
+
+		err = accept(serv, NULL, NULL);
+		if (!ASSERT_GE(err, 0, "accept"))
+			goto cleanup;
+
+		close(serv);
+		serv = err;
+	}
+
+	addr_len = src_addr_len = sizeof(struct sockaddr_storage);
+
+	err = recvfrom(serv, &data, sizeof(data), 0, (struct sockaddr *) &src_addr, &src_addr_len);
+	if (!ASSERT_EQ(err, sizeof(data), "recvfrom"))
+		goto cleanup;
+
+	ASSERT_EQ(data, 'a', "data mismatch");
+
+	if (test->expected_src_addr) {
+		err = make_sockaddr(test->socket_family, test->expected_src_addr, 0,
+				    &addr, &addr_len);
+		if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+			goto cleanup;
+
+		err = cmp_addr(&src_addr, src_addr_len, &addr, addr_len, false);
+		if (!ASSERT_EQ(err, 0, "cmp_addr"))
+			goto cleanup;
+	}
+
+cleanup:
+	if (client != -1)
+		close(client);
+	if (serv != -1)
+		close(serv);
+}
+
+static void test_getsockname(struct sock_addr_test *test)
+{
+	struct sockaddr_storage expected_addr;
+	socklen_t expected_addr_len = sizeof(struct sockaddr_storage);
+	int serv = -1, err;
+
+	serv = start_server(test->socket_family, test->socket_type,
+			    test->requested_addr, test->requested_port, 0);
+	if (!ASSERT_GE(serv, 0, "start_server"))
+		goto cleanup;
+
+	err = make_sockaddr(test->socket_family,
+			    test->expected_addr, test->expected_port,
+			    &expected_addr, &expected_addr_len);
+	if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+		goto cleanup;
+
+	err = cmp_local_addr(serv, &expected_addr, expected_addr_len, true);
+	if (!ASSERT_EQ(err, 0, "cmp_local_addr"))
+		goto cleanup;
+
+cleanup:
+	if (serv != -1)
+		close(serv);
+}
+
+static void test_getpeername(struct sock_addr_test *test)
+{
+	struct sockaddr_storage addr, expected_addr;
+	socklen_t addr_len = sizeof(struct sockaddr_storage),
+		  expected_addr_len = sizeof(struct sockaddr_storage);
+	int serv = -1, client = -1, err;
+
+	serv = start_server(test->socket_family, test->socket_type,
+			    test->requested_addr, test->requested_port, 0);
+	if (!ASSERT_GE(serv, 0, "start_server"))
+		goto cleanup;
+
+	err = make_sockaddr(test->socket_family, test->requested_addr, test->requested_port,
+			    &addr, &addr_len);
+	if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+		goto cleanup;
+
+	client = connect_to_addr(&addr, addr_len, test->socket_type);
+	if (!ASSERT_GE(client, 0, "connect_to_addr"))
+		goto cleanup;
+
+	err = make_sockaddr(test->socket_family, test->expected_addr, test->expected_port,
+			    &expected_addr, &expected_addr_len);
+	if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+		goto cleanup;
+
+	err = cmp_peer_addr(client, &expected_addr, expected_addr_len, true);
+	if (!ASSERT_EQ(err, 0, "cmp_peer_addr"))
+		goto cleanup;
+
+cleanup:
+	if (client != -1)
+		close(client);
+	if (serv != -1)
+		close(serv);
+}
+
+void test_sock_addr(void)
+{
+	int cgroup_fd = -1;
+	void *skel;
+
+	cgroup_fd = test__join_cgroup("/sock_addr");
+	if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup"))
+		goto cleanup;
+
+	for (size_t i = 0; i < ARRAY_SIZE(tests); ++i) {
+		struct sock_addr_test *test = &tests[i];
+
+		if (!test__start_subtest(test->name))
+			continue;
+
+		skel = test->loadfn(cgroup_fd);
+		if (!skel)
+			continue;
+
+		switch (test->type) {
+		/* Not exercised yet but we leave this code here for when the
+		 * INET and INET6 sockaddr tests are migrated to this file in
+		 * the future.
+		 */
+		case SOCK_ADDR_TEST_BIND:
+			test_bind(test);
+			break;
+		case SOCK_ADDR_TEST_CONNECT:
+			test_connect(test);
+			break;
+		case SOCK_ADDR_TEST_SENDMSG:
+		case SOCK_ADDR_TEST_RECVMSG:
+			test_xmsg(test);
+			break;
+		case SOCK_ADDR_TEST_GETSOCKNAME:
+			test_getsockname(test);
+			break;
+		case SOCK_ADDR_TEST_GETPEERNAME:
+			test_getpeername(test);
+			break;
+		default:
+			ASSERT_TRUE(false, "Unknown sock addr test type");
+			break;
+		}
+
+		test->destroyfn(skel);
+	}
+
+cleanup:
+	if (cgroup_fd >= 0)
+		close(cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index dda7060e86a0..f75f84d0b3d7 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -359,7 +359,7 @@ out:
 static void test_sockmap_skb_verdict_shutdown(void)
 {
 	struct epoll_event ev, events[MAX_EVENTS];
-	int n, err, map, verdict, s, c1, p1;
+	int n, err, map, verdict, s, c1 = -1, p1 = -1;
 	struct test_sockmap_pass_prog *skel;
 	int epollfd;
 	int zero = 0;
@@ -414,9 +414,9 @@ out:
 static void test_sockmap_skb_verdict_fionread(bool pass_prog)
 {
 	int expected, zero = 0, sent, recvd, avail;
-	int err, map, verdict, s, c0, c1, p0, p1;
-	struct test_sockmap_pass_prog *pass;
-	struct test_sockmap_drop_prog *drop;
+	int err, map, verdict, s, c0 = -1, c1 = -1, p0 = -1, p1 = -1;
+	struct test_sockmap_pass_prog *pass = NULL;
+	struct test_sockmap_drop_prog *drop = NULL;
 	char buf[256] = "0123456789";
 
 	if (pass_prog) {
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
index 36d829a65aa4..e880f97bc44d 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
@@ -378,7 +378,7 @@ static inline int enable_reuseport(int s, int progfd)
 static inline int socket_loopback_reuseport(int family, int sotype, int progfd)
 {
 	struct sockaddr_storage addr;
-	socklen_t len;
+	socklen_t len = 0;
 	int err, s;
 
 	init_addr_loopback(family, &addr, &len);
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index 8df8cbb447f1..a934d430c20c 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -73,7 +73,7 @@ static void test_insert_bound(struct test_sockmap_listen *skel __always_unused,
 			      int family, int sotype, int mapfd)
 {
 	struct sockaddr_storage addr;
-	socklen_t len;
+	socklen_t len = 0;
 	u32 key = 0;
 	u64 value;
 	int err, s;
@@ -871,7 +871,7 @@ static void test_msg_redir_to_listening(struct test_sockmap_listen *skel,
 
 static void redir_partial(int family, int sotype, int sock_map, int parser_map)
 {
-	int s, c0, c1, p0, p1;
+	int s, c0 = -1, c1 = -1, p0 = -1, p1 = -1;
 	int err, n, key, value;
 	char buf[] = "abc";
 
@@ -1336,53 +1336,59 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
 	}
 }
 
-static void unix_redir_to_connected(int sotype, int sock_mapfd,
-			       int verd_mapfd, enum redir_mode mode)
+static void pairs_redir_to_connected(int cli0, int peer0, int cli1, int peer1,
+				     int sock_mapfd, int verd_mapfd, enum redir_mode mode)
 {
 	const char *log_prefix = redir_mode_str(mode);
-	int c0, c1, p0, p1;
 	unsigned int pass;
 	int err, n;
-	int sfd[2];
 	u32 key;
 	char b;
 
 	zero_verdict_count(verd_mapfd);
 
-	if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
-		return;
-	c0 = sfd[0], p0 = sfd[1];
-
-	if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
-		goto close0;
-	c1 = sfd[0], p1 = sfd[1];
-
-	err = add_to_sockmap(sock_mapfd, p0, p1);
+	err = add_to_sockmap(sock_mapfd, peer0, peer1);
 	if (err)
-		goto close;
+		return;
 
-	n = write(c1, "a", 1);
+	n = write(cli1, "a", 1);
 	if (n < 0)
 		FAIL_ERRNO("%s: write", log_prefix);
 	if (n == 0)
 		FAIL("%s: incomplete write", log_prefix);
 	if (n < 1)
-		goto close;
+		return;
 
 	key = SK_PASS;
 	err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
 	if (err)
-		goto close;
+		return;
 	if (pass != 1)
 		FAIL("%s: want pass count 1, have %d", log_prefix, pass);
 
-	n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC);
+	n = recv_timeout(mode == REDIR_INGRESS ? peer0 : cli0, &b, 1, 0, IO_TIMEOUT_SEC);
 	if (n < 0)
 		FAIL_ERRNO("%s: recv_timeout", log_prefix);
 	if (n == 0)
 		FAIL("%s: incomplete recv", log_prefix);
+}
+
+static void unix_redir_to_connected(int sotype, int sock_mapfd,
+			       int verd_mapfd, enum redir_mode mode)
+{
+	int c0, c1, p0, p1;
+	int sfd[2];
+
+	if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
+		return;
+	c0 = sfd[0], p0 = sfd[1];
+
+	if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
+		goto close0;
+	c1 = sfd[0], p1 = sfd[1];
+
+	pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode);
 
-close:
 	xclose(c1);
 	xclose(p1);
 close0:
@@ -1661,14 +1667,8 @@ close_peer0:
 static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd,
 				   enum redir_mode mode)
 {
-	const char *log_prefix = redir_mode_str(mode);
 	int c0, c1, p0, p1;
-	unsigned int pass;
-	int err, n;
-	u32 key;
-	char b;
-
-	zero_verdict_count(verd_mapfd);
+	int err;
 
 	err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
 	if (err)
@@ -1677,32 +1677,8 @@ static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd,
 	if (err)
 		goto close_cli0;
 
-	err = add_to_sockmap(sock_mapfd, p0, p1);
-	if (err)
-		goto close_cli1;
+	pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode);
 
-	n = write(c1, "a", 1);
-	if (n < 0)
-		FAIL_ERRNO("%s: write", log_prefix);
-	if (n == 0)
-		FAIL("%s: incomplete write", log_prefix);
-	if (n < 1)
-		goto close_cli1;
-
-	key = SK_PASS;
-	err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
-	if (err)
-		goto close_cli1;
-	if (pass != 1)
-		FAIL("%s: want pass count 1, have %d", log_prefix, pass);
-
-	n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC);
-	if (n < 0)
-		FAIL_ERRNO("%s: recv_timeout", log_prefix);
-	if (n == 0)
-		FAIL("%s: incomplete recv", log_prefix);
-
-close_cli1:
 	xclose(c1);
 	xclose(p1);
 close_cli0:
@@ -1747,15 +1723,9 @@ static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map
 static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd,
 					int verd_mapfd, enum redir_mode mode)
 {
-	const char *log_prefix = redir_mode_str(mode);
 	int c0, c1, p0, p1;
-	unsigned int pass;
-	int err, n;
 	int sfd[2];
-	u32 key;
-	char b;
-
-	zero_verdict_count(verd_mapfd);
+	int err;
 
 	if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd))
 		return;
@@ -1765,32 +1735,8 @@ static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd,
 	if (err)
 		goto close;
 
-	err = add_to_sockmap(sock_mapfd, p0, p1);
-	if (err)
-		goto close_cli1;
-
-	n = write(c1, "a", 1);
-	if (n < 0)
-		FAIL_ERRNO("%s: write", log_prefix);
-	if (n == 0)
-		FAIL("%s: incomplete write", log_prefix);
-	if (n < 1)
-		goto close_cli1;
-
-	key = SK_PASS;
-	err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
-	if (err)
-		goto close_cli1;
-	if (pass != 1)
-		FAIL("%s: want pass count 1, have %d", log_prefix, pass);
-
-	n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC);
-	if (n < 0)
-		FAIL_ERRNO("%s: recv_timeout", log_prefix);
-	if (n == 0)
-		FAIL("%s: incomplete recv", log_prefix);
+	pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode);
 
-close_cli1:
 	xclose(c1);
 	xclose(p1);
 close:
@@ -1827,15 +1773,9 @@ static void inet_unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
 static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd,
 					int verd_mapfd, enum redir_mode mode)
 {
-	const char *log_prefix = redir_mode_str(mode);
 	int c0, c1, p0, p1;
-	unsigned int pass;
-	int err, n;
 	int sfd[2];
-	u32 key;
-	char b;
-
-	zero_verdict_count(verd_mapfd);
+	int err;
 
 	err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
 	if (err)
@@ -1845,32 +1785,8 @@ static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd,
 		goto close_cli0;
 	c1 = sfd[0], p1 = sfd[1];
 
-	err = add_to_sockmap(sock_mapfd, p0, p1);
-	if (err)
-		goto close;
-
-	n = write(c1, "a", 1);
-	if (n < 0)
-		FAIL_ERRNO("%s: write", log_prefix);
-	if (n == 0)
-		FAIL("%s: incomplete write", log_prefix);
-	if (n < 1)
-		goto close;
-
-	key = SK_PASS;
-	err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
-	if (err)
-		goto close;
-	if (pass != 1)
-		FAIL("%s: want pass count 1, have %d", log_prefix, pass);
-
-	n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC);
-	if (n < 0)
-		FAIL_ERRNO("%s: recv_timeout", log_prefix);
-	if (n == 0)
-		FAIL("%s: incomplete recv", log_prefix);
+	pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode);
 
-close:
 	xclose(c1);
 	xclose(p1);
 close_cli0:
diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
index 58fe2c586ed7..fc6b2954e8f5 100644
--- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c
+++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
@@ -218,12 +218,14 @@ out:
 	bpf_object__close(obj);
 }
 
-static void test_tailcall_count(const char *which)
+static void test_tailcall_count(const char *which, bool test_fentry,
+				bool test_fexit)
 {
+	struct bpf_object *obj = NULL, *fentry_obj = NULL, *fexit_obj = NULL;
+	struct bpf_link *fentry_link = NULL, *fexit_link = NULL;
 	int err, map_fd, prog_fd, main_fd, data_fd, i, val;
 	struct bpf_map *prog_array, *data_map;
 	struct bpf_program *prog;
-	struct bpf_object *obj;
 	char buff[128] = {};
 	LIBBPF_OPTS(bpf_test_run_opts, topts,
 		.data_in = buff,
@@ -265,23 +267,105 @@ static void test_tailcall_count(const char *which)
 	if (CHECK_FAIL(err))
 		goto out;
 
+	if (test_fentry) {
+		fentry_obj = bpf_object__open_file("tailcall_bpf2bpf_fentry.bpf.o",
+						   NULL);
+		if (!ASSERT_OK_PTR(fentry_obj, "open fentry_obj file"))
+			goto out;
+
+		prog = bpf_object__find_program_by_name(fentry_obj, "fentry");
+		if (!ASSERT_OK_PTR(prog, "find fentry prog"))
+			goto out;
+
+		err = bpf_program__set_attach_target(prog, prog_fd,
+						     "subprog_tail");
+		if (!ASSERT_OK(err, "set_attach_target subprog_tail"))
+			goto out;
+
+		err = bpf_object__load(fentry_obj);
+		if (!ASSERT_OK(err, "load fentry_obj"))
+			goto out;
+
+		fentry_link = bpf_program__attach_trace(prog);
+		if (!ASSERT_OK_PTR(fentry_link, "attach_trace"))
+			goto out;
+	}
+
+	if (test_fexit) {
+		fexit_obj = bpf_object__open_file("tailcall_bpf2bpf_fexit.bpf.o",
+						  NULL);
+		if (!ASSERT_OK_PTR(fexit_obj, "open fexit_obj file"))
+			goto out;
+
+		prog = bpf_object__find_program_by_name(fexit_obj, "fexit");
+		if (!ASSERT_OK_PTR(prog, "find fexit prog"))
+			goto out;
+
+		err = bpf_program__set_attach_target(prog, prog_fd,
+						     "subprog_tail");
+		if (!ASSERT_OK(err, "set_attach_target subprog_tail"))
+			goto out;
+
+		err = bpf_object__load(fexit_obj);
+		if (!ASSERT_OK(err, "load fexit_obj"))
+			goto out;
+
+		fexit_link = bpf_program__attach_trace(prog);
+		if (!ASSERT_OK_PTR(fexit_link, "attach_trace"))
+			goto out;
+	}
+
 	err = bpf_prog_test_run_opts(main_fd, &topts);
 	ASSERT_OK(err, "tailcall");
 	ASSERT_EQ(topts.retval, 1, "tailcall retval");
 
 	data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
 	if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
-		return;
+		goto out;
 
 	data_fd = bpf_map__fd(data_map);
-	if (CHECK_FAIL(map_fd < 0))
-		return;
+	if (CHECK_FAIL(data_fd < 0))
+		goto out;
 
 	i = 0;
 	err = bpf_map_lookup_elem(data_fd, &i, &val);
 	ASSERT_OK(err, "tailcall count");
 	ASSERT_EQ(val, 33, "tailcall count");
 
+	if (test_fentry) {
+		data_map = bpf_object__find_map_by_name(fentry_obj, ".bss");
+		if (!ASSERT_FALSE(!data_map || !bpf_map__is_internal(data_map),
+				  "find tailcall_bpf2bpf_fentry.bss map"))
+			goto out;
+
+		data_fd = bpf_map__fd(data_map);
+		if (!ASSERT_FALSE(data_fd < 0,
+				  "find tailcall_bpf2bpf_fentry.bss map fd"))
+			goto out;
+
+		i = 0;
+		err = bpf_map_lookup_elem(data_fd, &i, &val);
+		ASSERT_OK(err, "fentry count");
+		ASSERT_EQ(val, 33, "fentry count");
+	}
+
+	if (test_fexit) {
+		data_map = bpf_object__find_map_by_name(fexit_obj, ".bss");
+		if (!ASSERT_FALSE(!data_map || !bpf_map__is_internal(data_map),
+				  "find tailcall_bpf2bpf_fexit.bss map"))
+			goto out;
+
+		data_fd = bpf_map__fd(data_map);
+		if (!ASSERT_FALSE(data_fd < 0,
+				  "find tailcall_bpf2bpf_fexit.bss map fd"))
+			goto out;
+
+		i = 0;
+		err = bpf_map_lookup_elem(data_fd, &i, &val);
+		ASSERT_OK(err, "fexit count");
+		ASSERT_EQ(val, 33, "fexit count");
+	}
+
 	i = 0;
 	err = bpf_map_delete_elem(map_fd, &i);
 	if (CHECK_FAIL(err))
@@ -291,6 +375,10 @@ static void test_tailcall_count(const char *which)
 	ASSERT_OK(err, "tailcall");
 	ASSERT_OK(topts.retval, "tailcall retval");
 out:
+	bpf_link__destroy(fentry_link);
+	bpf_link__destroy(fexit_link);
+	bpf_object__close(fentry_obj);
+	bpf_object__close(fexit_obj);
 	bpf_object__close(obj);
 }
 
@@ -299,7 +387,7 @@ out:
  */
 static void test_tailcall_3(void)
 {
-	test_tailcall_count("tailcall3.bpf.o");
+	test_tailcall_count("tailcall3.bpf.o", false, false);
 }
 
 /* test_tailcall_6 checks that the count value of the tail call limit
@@ -307,7 +395,7 @@ static void test_tailcall_3(void)
  */
 static void test_tailcall_6(void)
 {
-	test_tailcall_count("tailcall6.bpf.o");
+	test_tailcall_count("tailcall6.bpf.o", false, false);
 }
 
 /* test_tailcall_4 checks that the kernel properly selects indirect jump
@@ -352,11 +440,11 @@ static void test_tailcall_4(void)
 
 	data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
 	if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
-		return;
+		goto out;
 
 	data_fd = bpf_map__fd(data_map);
-	if (CHECK_FAIL(map_fd < 0))
-		return;
+	if (CHECK_FAIL(data_fd < 0))
+		goto out;
 
 	for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
 		snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
@@ -442,11 +530,11 @@ static void test_tailcall_5(void)
 
 	data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
 	if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
-		return;
+		goto out;
 
 	data_fd = bpf_map__fd(data_map);
-	if (CHECK_FAIL(map_fd < 0))
-		return;
+	if (CHECK_FAIL(data_fd < 0))
+		goto out;
 
 	for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
 		snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
@@ -631,11 +719,11 @@ static void test_tailcall_bpf2bpf_2(void)
 
 	data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
 	if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
-		return;
+		goto out;
 
 	data_fd = bpf_map__fd(data_map);
-	if (CHECK_FAIL(map_fd < 0))
-		return;
+	if (CHECK_FAIL(data_fd < 0))
+		goto out;
 
 	i = 0;
 	err = bpf_map_lookup_elem(data_fd, &i, &val);
@@ -805,11 +893,11 @@ static void test_tailcall_bpf2bpf_4(bool noise)
 
 	data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
 	if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
-		return;
+		goto out;
 
 	data_fd = bpf_map__fd(data_map);
-	if (CHECK_FAIL(map_fd < 0))
-		return;
+	if (CHECK_FAIL(data_fd < 0))
+		goto out;
 
 	i = 0;
 	val.noise = noise;
@@ -872,7 +960,7 @@ static void test_tailcall_bpf2bpf_6(void)
 	ASSERT_EQ(topts.retval, 0, "tailcall retval");
 
 	data_fd = bpf_map__fd(obj->maps.bss);
-	if (!ASSERT_GE(map_fd, 0, "bss map fd"))
+	if (!ASSERT_GE(data_fd, 0, "bss map fd"))
 		goto out;
 
 	i = 0;
@@ -884,6 +972,139 @@ out:
 	tailcall_bpf2bpf6__destroy(obj);
 }
 
+/* test_tailcall_bpf2bpf_fentry checks that the count value of the tail call
+ * limit enforcement matches with expectations when tailcall is preceded with
+ * bpf2bpf call, and the bpf2bpf call is traced by fentry.
+ */
+static void test_tailcall_bpf2bpf_fentry(void)
+{
+	test_tailcall_count("tailcall_bpf2bpf2.bpf.o", true, false);
+}
+
+/* test_tailcall_bpf2bpf_fexit checks that the count value of the tail call
+ * limit enforcement matches with expectations when tailcall is preceded with
+ * bpf2bpf call, and the bpf2bpf call is traced by fexit.
+ */
+static void test_tailcall_bpf2bpf_fexit(void)
+{
+	test_tailcall_count("tailcall_bpf2bpf2.bpf.o", false, true);
+}
+
+/* test_tailcall_bpf2bpf_fentry_fexit checks that the count value of the tail
+ * call limit enforcement matches with expectations when tailcall is preceded
+ * with bpf2bpf call, and the bpf2bpf call is traced by both fentry and fexit.
+ */
+static void test_tailcall_bpf2bpf_fentry_fexit(void)
+{
+	test_tailcall_count("tailcall_bpf2bpf2.bpf.o", true, true);
+}
+
+/* test_tailcall_bpf2bpf_fentry_entry checks that the count value of the tail
+ * call limit enforcement matches with expectations when tailcall is preceded
+ * with bpf2bpf call, and the bpf2bpf caller is traced by fentry.
+ */
+static void test_tailcall_bpf2bpf_fentry_entry(void)
+{
+	struct bpf_object *tgt_obj = NULL, *fentry_obj = NULL;
+	int err, map_fd, prog_fd, data_fd, i, val;
+	struct bpf_map *prog_array, *data_map;
+	struct bpf_link *fentry_link = NULL;
+	struct bpf_program *prog;
+	char buff[128] = {};
+
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = buff,
+		.data_size_in = sizeof(buff),
+		.repeat = 1,
+	);
+
+	err = bpf_prog_test_load("tailcall_bpf2bpf2.bpf.o",
+				 BPF_PROG_TYPE_SCHED_CLS,
+				 &tgt_obj, &prog_fd);
+	if (!ASSERT_OK(err, "load tgt_obj"))
+		return;
+
+	prog_array = bpf_object__find_map_by_name(tgt_obj, "jmp_table");
+	if (!ASSERT_OK_PTR(prog_array, "find jmp_table map"))
+		goto out;
+
+	map_fd = bpf_map__fd(prog_array);
+	if (!ASSERT_FALSE(map_fd < 0, "find jmp_table map fd"))
+		goto out;
+
+	prog = bpf_object__find_program_by_name(tgt_obj, "classifier_0");
+	if (!ASSERT_OK_PTR(prog, "find classifier_0 prog"))
+		goto out;
+
+	prog_fd = bpf_program__fd(prog);
+	if (!ASSERT_FALSE(prog_fd < 0, "find classifier_0 prog fd"))
+		goto out;
+
+	i = 0;
+	err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+	if (!ASSERT_OK(err, "update jmp_table"))
+		goto out;
+
+	fentry_obj = bpf_object__open_file("tailcall_bpf2bpf_fentry.bpf.o",
+					   NULL);
+	if (!ASSERT_OK_PTR(fentry_obj, "open fentry_obj file"))
+		goto out;
+
+	prog = bpf_object__find_program_by_name(fentry_obj, "fentry");
+	if (!ASSERT_OK_PTR(prog, "find fentry prog"))
+		goto out;
+
+	err = bpf_program__set_attach_target(prog, prog_fd, "classifier_0");
+	if (!ASSERT_OK(err, "set_attach_target classifier_0"))
+		goto out;
+
+	err = bpf_object__load(fentry_obj);
+	if (!ASSERT_OK(err, "load fentry_obj"))
+		goto out;
+
+	fentry_link = bpf_program__attach_trace(prog);
+	if (!ASSERT_OK_PTR(fentry_link, "attach_trace"))
+		goto out;
+
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "tailcall");
+	ASSERT_EQ(topts.retval, 1, "tailcall retval");
+
+	data_map = bpf_object__find_map_by_name(tgt_obj, "tailcall.bss");
+	if (!ASSERT_FALSE(!data_map || !bpf_map__is_internal(data_map),
+			  "find tailcall.bss map"))
+		goto out;
+
+	data_fd = bpf_map__fd(data_map);
+	if (!ASSERT_FALSE(data_fd < 0, "find tailcall.bss map fd"))
+		goto out;
+
+	i = 0;
+	err = bpf_map_lookup_elem(data_fd, &i, &val);
+	ASSERT_OK(err, "tailcall count");
+	ASSERT_EQ(val, 34, "tailcall count");
+
+	data_map = bpf_object__find_map_by_name(fentry_obj, ".bss");
+	if (!ASSERT_FALSE(!data_map || !bpf_map__is_internal(data_map),
+			  "find tailcall_bpf2bpf_fentry.bss map"))
+		goto out;
+
+	data_fd = bpf_map__fd(data_map);
+	if (!ASSERT_FALSE(data_fd < 0,
+			  "find tailcall_bpf2bpf_fentry.bss map fd"))
+		goto out;
+
+	i = 0;
+	err = bpf_map_lookup_elem(data_fd, &i, &val);
+	ASSERT_OK(err, "fentry count");
+	ASSERT_EQ(val, 1, "fentry count");
+
+out:
+	bpf_link__destroy(fentry_link);
+	bpf_object__close(fentry_obj);
+	bpf_object__close(tgt_obj);
+}
+
 void test_tailcalls(void)
 {
 	if (test__start_subtest("tailcall_1"))
@@ -910,4 +1131,12 @@ void test_tailcalls(void)
 		test_tailcall_bpf2bpf_4(true);
 	if (test__start_subtest("tailcall_bpf2bpf_6"))
 		test_tailcall_bpf2bpf_6();
+	if (test__start_subtest("tailcall_bpf2bpf_fentry"))
+		test_tailcall_bpf2bpf_fentry();
+	if (test__start_subtest("tailcall_bpf2bpf_fexit"))
+		test_tailcall_bpf2bpf_fexit();
+	if (test__start_subtest("tailcall_bpf2bpf_fentry_fexit"))
+		test_tailcall_bpf2bpf_fentry_fexit();
+	if (test__start_subtest("tailcall_bpf2bpf_fentry_entry"))
+		test_tailcall_bpf2bpf_fentry_entry();
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c
index ce2c61d62fc6..760ad96b4be0 100644
--- a/tools/testing/selftests/bpf/prog_tests/timer.c
+++ b/tools/testing/selftests/bpf/prog_tests/timer.c
@@ -15,6 +15,7 @@ static int timer(struct timer *timer_skel)
 
 	ASSERT_EQ(timer_skel->data->callback_check, 52, "callback_check1");
 	ASSERT_EQ(timer_skel->data->callback2_check, 52, "callback2_check1");
+	ASSERT_EQ(timer_skel->bss->pinned_callback_check, 0, "pinned_callback_check1");
 
 	prog_fd = bpf_program__fd(timer_skel->progs.test1);
 	err = bpf_prog_test_run_opts(prog_fd, &topts);
@@ -33,6 +34,9 @@ static int timer(struct timer *timer_skel)
 	/* check that timer_cb3() was executed twice */
 	ASSERT_EQ(timer_skel->bss->abs_data, 12, "abs_data");
 
+	/* check that timer_cb_pinned() was executed twice */
+	ASSERT_EQ(timer_skel->bss->pinned_callback_check, 2, "pinned_callback_check");
+
 	/* check that there were no errors in timer execution */
 	ASSERT_EQ(timer_skel->bss->err, 0, "err");
 
diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe.c b/tools/testing/selftests/bpf/prog_tests/uprobe.c
new file mode 100644
index 000000000000..cf3e0e7a64fa
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Hengqi Chen */
+
+#include <test_progs.h>
+#include "test_uprobe.skel.h"
+
+static FILE *urand_spawn(int *pid)
+{
+	FILE *f;
+
+	/* urandom_read's stdout is wired into f */
+	f = popen("./urandom_read 1 report-pid", "r");
+	if (!f)
+		return NULL;
+
+	if (fscanf(f, "%d", pid) != 1) {
+		pclose(f);
+		errno = EINVAL;
+		return NULL;
+	}
+
+	return f;
+}
+
+static int urand_trigger(FILE **urand_pipe)
+{
+	int exit_code;
+
+	/* pclose() waits for child process to exit and returns their exit code */
+	exit_code = pclose(*urand_pipe);
+	*urand_pipe = NULL;
+
+	return exit_code;
+}
+
+void test_uprobe(void)
+{
+	LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
+	struct test_uprobe *skel;
+	FILE *urand_pipe = NULL;
+	int urand_pid = 0, err;
+
+	skel = test_uprobe__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	urand_pipe = urand_spawn(&urand_pid);
+	if (!ASSERT_OK_PTR(urand_pipe, "urand_spawn"))
+		goto cleanup;
+
+	skel->bss->my_pid = urand_pid;
+
+	/* Manual attach uprobe to urandlib_api
+	 * There are two `urandlib_api` symbols in .dynsym section:
+	 *   - urandlib_api@LIBURANDOM_READ_1.0.0
+	 *   - urandlib_api@@LIBURANDOM_READ_2.0.0
+	 * Both are global bind and would cause a conflict if user
+	 * specify the symbol name without a version suffix
+	 */
+	uprobe_opts.func_name = "urandlib_api";
+	skel->links.test4 = bpf_program__attach_uprobe_opts(skel->progs.test4,
+							    urand_pid,
+							    "./liburandom_read.so",
+							    0 /* offset */,
+							    &uprobe_opts);
+	if (!ASSERT_ERR_PTR(skel->links.test4, "urandlib_api_attach_conflict"))
+		goto cleanup;
+
+	uprobe_opts.func_name = "urandlib_api@LIBURANDOM_READ_1.0.0";
+	skel->links.test4 = bpf_program__attach_uprobe_opts(skel->progs.test4,
+							    urand_pid,
+							    "./liburandom_read.so",
+							    0 /* offset */,
+							    &uprobe_opts);
+	if (!ASSERT_OK_PTR(skel->links.test4, "urandlib_api_attach_ok"))
+		goto cleanup;
+
+	/* Auto attach 3 u[ret]probes to urandlib_api_sameoffset */
+	err = test_uprobe__attach(skel);
+	if (!ASSERT_OK(err, "skel_attach"))
+		goto cleanup;
+
+	/* trigger urandom_read */
+	ASSERT_OK(urand_trigger(&urand_pipe), "urand_exit_code");
+
+	ASSERT_EQ(skel->bss->test1_result, 1, "urandlib_api_sameoffset");
+	ASSERT_EQ(skel->bss->test2_result, 1, "urandlib_api_sameoffset@v1");
+	ASSERT_EQ(skel->bss->test3_result, 3, "urandlib_api_sameoffset@@v2");
+	ASSERT_EQ(skel->bss->test4_result, 1, "urandlib_api");
+
+cleanup:
+	if (urand_pipe)
+		pclose(urand_pipe);
+	test_uprobe__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
index 626c461fa34d..4439ba9392f8 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
@@ -226,7 +226,7 @@ static int verify_xsk_metadata(struct xsk *xsk)
 	__u64 comp_addr;
 	void *data;
 	__u64 addr;
-	__u32 idx;
+	__u32 idx = 0;
 	int ret;
 
 	ret = recvfrom(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, NULL);
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c
index dd923dc637d5..dd923dc637d5 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c
diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
index 38a57a2e70db..799fff4995d8 100644
--- a/tools/testing/selftests/bpf/progs/bpf_misc.h
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -99,6 +99,9 @@
 #elif defined(__TARGET_ARCH_arm64)
 #define SYSCALL_WRAPPER 1
 #define SYS_PREFIX "__arm64_"
+#elif defined(__TARGET_ARCH_riscv)
+#define SYSCALL_WRAPPER 1
+#define SYS_PREFIX "__riscv_"
 #else
 #define SYSCALL_WRAPPER 0
 #define SYS_PREFIX "__se_"
diff --git a/tools/testing/selftests/bpf/progs/connect_unix_prog.c b/tools/testing/selftests/bpf/progs/connect_unix_prog.c
new file mode 100644
index 000000000000..ca8aa2f116b3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/connect_unix_prog.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+
+#include <string.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_kfuncs.h"
+
+__u8 SERVUN_REWRITE_ADDRESS[] = "\0bpf_cgroup_unix_test_rewrite";
+
+SEC("cgroup/connect_unix")
+int connect_unix_prog(struct bpf_sock_addr *ctx)
+{
+	struct bpf_sock_addr_kern *sa_kern = bpf_cast_to_kern_ctx(ctx);
+	struct sockaddr_un *sa_kern_unaddr;
+	__u32 unaddrlen = offsetof(struct sockaddr_un, sun_path) +
+			  sizeof(SERVUN_REWRITE_ADDRESS) - 1;
+	int ret;
+
+	/* Rewrite destination. */
+	ret = bpf_sock_addr_set_sun_path(sa_kern, SERVUN_REWRITE_ADDRESS,
+					 sizeof(SERVUN_REWRITE_ADDRESS) - 1);
+	if (ret)
+		return 0;
+
+	if (sa_kern->uaddrlen != unaddrlen)
+		return 0;
+
+	sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr,
+						bpf_core_type_id_kernel(struct sockaddr_un));
+	if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS,
+			sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0)
+		return 0;
+
+	return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/exceptions.c b/tools/testing/selftests/bpf/progs/exceptions.c
new file mode 100644
index 000000000000..2811ee842b01
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/exceptions.c
@@ -0,0 +1,368 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_endian.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+#ifndef ETH_P_IP
+#define ETH_P_IP 0x0800
+#endif
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+	__uint(max_entries, 4);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+static __noinline int static_func(u64 i)
+{
+	bpf_throw(32);
+	return i;
+}
+
+__noinline int global2static_simple(u64 i)
+{
+	static_func(i + 2);
+	return i - 1;
+}
+
+__noinline int global2static(u64 i)
+{
+	if (i == ETH_P_IP)
+		bpf_throw(16);
+	return static_func(i);
+}
+
+static __noinline int static2global(u64 i)
+{
+	return global2static(i) + i;
+}
+
+SEC("tc")
+int exception_throw_always_1(struct __sk_buff *ctx)
+{
+	bpf_throw(64);
+	return 0;
+}
+
+/* In this case, the global func will never be seen executing after call to
+ * static subprog, hence verifier will DCE the remaining instructions. Ensure we
+ * are resilient to that.
+ */
+SEC("tc")
+int exception_throw_always_2(struct __sk_buff *ctx)
+{
+	return global2static_simple(ctx->protocol);
+}
+
+SEC("tc")
+int exception_throw_unwind_1(struct __sk_buff *ctx)
+{
+	return static2global(bpf_ntohs(ctx->protocol));
+}
+
+SEC("tc")
+int exception_throw_unwind_2(struct __sk_buff *ctx)
+{
+	return static2global(bpf_ntohs(ctx->protocol) - 1);
+}
+
+SEC("tc")
+int exception_throw_default(struct __sk_buff *ctx)
+{
+	bpf_throw(0);
+	return 1;
+}
+
+SEC("tc")
+int exception_throw_default_value(struct __sk_buff *ctx)
+{
+	bpf_throw(5);
+	return 1;
+}
+
+SEC("tc")
+int exception_tail_call_target(struct __sk_buff *ctx)
+{
+	bpf_throw(16);
+	return 0;
+}
+
+static __noinline
+int exception_tail_call_subprog(struct __sk_buff *ctx)
+{
+	volatile int ret = 10;
+
+	bpf_tail_call_static(ctx, &jmp_table, 0);
+	return ret;
+}
+
+SEC("tc")
+int exception_tail_call(struct __sk_buff *ctx) {
+	volatile int ret = 0;
+
+	ret = exception_tail_call_subprog(ctx);
+	return ret + 8;
+}
+
+__noinline int exception_ext_global(struct __sk_buff *ctx)
+{
+	volatile int ret = 0;
+
+	return ret;
+}
+
+static __noinline int exception_ext_static(struct __sk_buff *ctx)
+{
+	return exception_ext_global(ctx);
+}
+
+SEC("tc")
+int exception_ext(struct __sk_buff *ctx)
+{
+	return exception_ext_static(ctx);
+}
+
+__noinline int exception_cb_mod_global(u64 cookie)
+{
+	volatile int ret = 0;
+
+	return ret;
+}
+
+/* Example of how the exception callback supplied during verification can still
+ * introduce extensions by calling to dummy global functions, and alter runtime
+ * behavior.
+ *
+ * Right now we don't allow freplace attachment to exception callback itself,
+ * but if the need arises this restriction is technically feasible to relax in
+ * the future.
+ */
+__noinline int exception_cb_mod(u64 cookie)
+{
+	return exception_cb_mod_global(cookie) + cookie + 10;
+}
+
+SEC("tc")
+__exception_cb(exception_cb_mod)
+int exception_ext_mod_cb_runtime(struct __sk_buff *ctx)
+{
+	bpf_throw(25);
+	return 0;
+}
+
+__noinline static int subprog(struct __sk_buff *ctx)
+{
+	return bpf_ktime_get_ns();
+}
+
+__noinline static int throwing_subprog(struct __sk_buff *ctx)
+{
+	if (ctx->tstamp)
+		bpf_throw(0);
+	return bpf_ktime_get_ns();
+}
+
+__noinline int global_subprog(struct __sk_buff *ctx)
+{
+	return bpf_ktime_get_ns();
+}
+
+__noinline int throwing_global_subprog(struct __sk_buff *ctx)
+{
+	if (ctx->tstamp)
+		bpf_throw(0);
+	return bpf_ktime_get_ns();
+}
+
+SEC("tc")
+int exception_throw_subprog(struct __sk_buff *ctx)
+{
+	switch (ctx->protocol) {
+	case 1:
+		return subprog(ctx);
+	case 2:
+		return global_subprog(ctx);
+	case 3:
+		return throwing_subprog(ctx);
+	case 4:
+		return throwing_global_subprog(ctx);
+	default:
+		break;
+	}
+	bpf_throw(1);
+	return 0;
+}
+
+__noinline int assert_nz_gfunc(u64 c)
+{
+	volatile u64 cookie = c;
+
+	bpf_assert(cookie != 0);
+	return 0;
+}
+
+__noinline int assert_zero_gfunc(u64 c)
+{
+	volatile u64 cookie = c;
+
+	bpf_assert_eq(cookie, 0);
+	return 0;
+}
+
+__noinline int assert_neg_gfunc(s64 c)
+{
+	volatile s64 cookie = c;
+
+	bpf_assert_lt(cookie, 0);
+	return 0;
+}
+
+__noinline int assert_pos_gfunc(s64 c)
+{
+	volatile s64 cookie = c;
+
+	bpf_assert_gt(cookie, 0);
+	return 0;
+}
+
+__noinline int assert_negeq_gfunc(s64 c)
+{
+	volatile s64 cookie = c;
+
+	bpf_assert_le(cookie, -1);
+	return 0;
+}
+
+__noinline int assert_poseq_gfunc(s64 c)
+{
+	volatile s64 cookie = c;
+
+	bpf_assert_ge(cookie, 1);
+	return 0;
+}
+
+__noinline int assert_nz_gfunc_with(u64 c)
+{
+	volatile u64 cookie = c;
+
+	bpf_assert_with(cookie != 0, cookie + 100);
+	return 0;
+}
+
+__noinline int assert_zero_gfunc_with(u64 c)
+{
+	volatile u64 cookie = c;
+
+	bpf_assert_eq_with(cookie, 0, cookie + 100);
+	return 0;
+}
+
+__noinline int assert_neg_gfunc_with(s64 c)
+{
+	volatile s64 cookie = c;
+
+	bpf_assert_lt_with(cookie, 0, cookie + 100);
+	return 0;
+}
+
+__noinline int assert_pos_gfunc_with(s64 c)
+{
+	volatile s64 cookie = c;
+
+	bpf_assert_gt_with(cookie, 0, cookie + 100);
+	return 0;
+}
+
+__noinline int assert_negeq_gfunc_with(s64 c)
+{
+	volatile s64 cookie = c;
+
+	bpf_assert_le_with(cookie, -1, cookie + 100);
+	return 0;
+}
+
+__noinline int assert_poseq_gfunc_with(s64 c)
+{
+	volatile s64 cookie = c;
+
+	bpf_assert_ge_with(cookie, 1, cookie + 100);
+	return 0;
+}
+
+#define check_assert(name, cookie, tag)				\
+SEC("tc")							\
+int exception##tag##name(struct __sk_buff *ctx)			\
+{								\
+	return name(cookie) + 1;				\
+}
+
+check_assert(assert_nz_gfunc, 5, _);
+check_assert(assert_zero_gfunc, 0, _);
+check_assert(assert_neg_gfunc, -100, _);
+check_assert(assert_pos_gfunc, 100, _);
+check_assert(assert_negeq_gfunc, -1, _);
+check_assert(assert_poseq_gfunc, 1, _);
+
+check_assert(assert_nz_gfunc_with, 5, _);
+check_assert(assert_zero_gfunc_with, 0, _);
+check_assert(assert_neg_gfunc_with, -100, _);
+check_assert(assert_pos_gfunc_with, 100, _);
+check_assert(assert_negeq_gfunc_with, -1, _);
+check_assert(assert_poseq_gfunc_with, 1, _);
+
+check_assert(assert_nz_gfunc, 0, _bad_);
+check_assert(assert_zero_gfunc, 5, _bad_);
+check_assert(assert_neg_gfunc, 100, _bad_);
+check_assert(assert_pos_gfunc, -100, _bad_);
+check_assert(assert_negeq_gfunc, 1, _bad_);
+check_assert(assert_poseq_gfunc, -1, _bad_);
+
+check_assert(assert_nz_gfunc_with, 0, _bad_);
+check_assert(assert_zero_gfunc_with, 5, _bad_);
+check_assert(assert_neg_gfunc_with, 100, _bad_);
+check_assert(assert_pos_gfunc_with, -100, _bad_);
+check_assert(assert_negeq_gfunc_with, 1, _bad_);
+check_assert(assert_poseq_gfunc_with, -1, _bad_);
+
+SEC("tc")
+int exception_assert_range(struct __sk_buff *ctx)
+{
+	u64 time = bpf_ktime_get_ns();
+
+	bpf_assert_range(time, 0, ~0ULL);
+	return 1;
+}
+
+SEC("tc")
+int exception_assert_range_with(struct __sk_buff *ctx)
+{
+	u64 time = bpf_ktime_get_ns();
+
+	bpf_assert_range_with(time, 0, ~0ULL, 10);
+	return 1;
+}
+
+SEC("tc")
+int exception_bad_assert_range(struct __sk_buff *ctx)
+{
+	u64 time = bpf_ktime_get_ns();
+
+	bpf_assert_range(time, -100, 100);
+	return 1;
+}
+
+SEC("tc")
+int exception_bad_assert_range_with(struct __sk_buff *ctx)
+{
+	u64 time = bpf_ktime_get_ns();
+
+	bpf_assert_range_with(time, -1000, 1000, 10);
+	return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/exceptions_assert.c b/tools/testing/selftests/bpf/progs/exceptions_assert.c
new file mode 100644
index 000000000000..e1e5c54a6a11
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/exceptions_assert.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <limits.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_endian.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+#define check_assert(type, op, name, value)				\
+	SEC("?tc")							\
+	__log_level(2) __failure					\
+	int check_assert_##op##_##name(void *ctx)			\
+	{								\
+		type num = bpf_ktime_get_ns();				\
+		bpf_assert_##op(num, value);				\
+		return *(u64 *)num;					\
+	}
+
+__msg(": R0_w=-2147483648 R10=fp0")
+check_assert(s64, eq, int_min, INT_MIN);
+__msg(": R0_w=2147483647 R10=fp0")
+check_assert(s64, eq, int_max, INT_MAX);
+__msg(": R0_w=0 R10=fp0")
+check_assert(s64, eq, zero, 0);
+__msg(": R0_w=-9223372036854775808 R1_w=-9223372036854775808 R10=fp0")
+check_assert(s64, eq, llong_min, LLONG_MIN);
+__msg(": R0_w=9223372036854775807 R1_w=9223372036854775807 R10=fp0")
+check_assert(s64, eq, llong_max, LLONG_MAX);
+
+__msg(": R0_w=scalar(smax=2147483646) R10=fp0")
+check_assert(s64, lt, pos, INT_MAX);
+__msg(": R0_w=scalar(smax=-1,umin=9223372036854775808,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
+check_assert(s64, lt, zero, 0);
+__msg(": R0_w=scalar(smax=-2147483649,umin=9223372036854775808,umax=18446744071562067967,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
+check_assert(s64, lt, neg, INT_MIN);
+
+__msg(": R0_w=scalar(smax=2147483647) R10=fp0")
+check_assert(s64, le, pos, INT_MAX);
+__msg(": R0_w=scalar(smax=0) R10=fp0")
+check_assert(s64, le, zero, 0);
+__msg(": R0_w=scalar(smax=-2147483648,umin=9223372036854775808,umax=18446744071562067968,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
+check_assert(s64, le, neg, INT_MIN);
+
+__msg(": R0_w=scalar(smin=umin=2147483648,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))")
+check_assert(s64, gt, pos, INT_MAX);
+__msg(": R0_w=scalar(smin=umin=1,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))")
+check_assert(s64, gt, zero, 0);
+__msg(": R0_w=scalar(smin=-2147483647) R10=fp0")
+check_assert(s64, gt, neg, INT_MIN);
+
+__msg(": R0_w=scalar(smin=umin=2147483647,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))")
+check_assert(s64, ge, pos, INT_MAX);
+__msg(": R0_w=scalar(smin=0,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff)) R10=fp0")
+check_assert(s64, ge, zero, 0);
+__msg(": R0_w=scalar(smin=-2147483648) R10=fp0")
+check_assert(s64, ge, neg, INT_MIN);
+
+SEC("?tc")
+__log_level(2) __failure
+__msg(": R0=0 R1=ctx(off=0,imm=0) R2=scalar(smin=smin32=-2147483646,smax=smax32=2147483645) R10=fp0")
+int check_assert_range_s64(struct __sk_buff *ctx)
+{
+	struct bpf_sock *sk = ctx->sk;
+	s64 num;
+
+	_Static_assert(_Generic((sk->rx_queue_mapping), s32: 1, default: 0), "type match");
+	if (!sk)
+		return 0;
+	num = sk->rx_queue_mapping;
+	bpf_assert_range(num, INT_MIN + 2, INT_MAX - 2);
+	return *((u8 *)ctx + num);
+}
+
+SEC("?tc")
+__log_level(2) __failure
+__msg(": R1=ctx(off=0,imm=0) R2=scalar(smin=umin=smin32=umin32=4096,smax=umax=smax32=umax32=8192,var_off=(0x0; 0x3fff))")
+int check_assert_range_u64(struct __sk_buff *ctx)
+{
+	u64 num = ctx->len;
+
+	bpf_assert_range(num, 4096, 8192);
+	return *((u8 *)ctx + num);
+}
+
+SEC("?tc")
+__log_level(2) __failure
+__msg(": R0=0 R1=ctx(off=0,imm=0) R2=4096 R10=fp0")
+int check_assert_single_range_s64(struct __sk_buff *ctx)
+{
+	struct bpf_sock *sk = ctx->sk;
+	s64 num;
+
+	_Static_assert(_Generic((sk->rx_queue_mapping), s32: 1, default: 0), "type match");
+	if (!sk)
+		return 0;
+	num = sk->rx_queue_mapping;
+
+	bpf_assert_range(num, 4096, 4096);
+	return *((u8 *)ctx + num);
+}
+
+SEC("?tc")
+__log_level(2) __failure
+__msg(": R1=ctx(off=0,imm=0) R2=4096 R10=fp0")
+int check_assert_single_range_u64(struct __sk_buff *ctx)
+{
+	u64 num = ctx->len;
+
+	bpf_assert_range(num, 4096, 4096);
+	return *((u8 *)ctx + num);
+}
+
+SEC("?tc")
+__log_level(2) __failure
+__msg(": R1=pkt(off=64,r=64,imm=0) R2=pkt_end(off=0,imm=0) R6=pkt(off=0,r=64,imm=0) R10=fp0")
+int check_assert_generic(struct __sk_buff *ctx)
+{
+	u8 *data_end = (void *)(long)ctx->data_end;
+	u8 *data = (void *)(long)ctx->data;
+
+	bpf_assert(data + 64 <= data_end);
+	return data[128];
+}
+
+SEC("?fentry/bpf_check")
+__failure __msg("At program exit the register R0 has value (0x40; 0x0)")
+int check_assert_with_return(void *ctx)
+{
+	bpf_assert_with(!ctx, 64);
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/exceptions_ext.c b/tools/testing/selftests/bpf/progs/exceptions_ext.c
new file mode 100644
index 000000000000..743c05185d9b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/exceptions_ext.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_experimental.h"
+
+SEC("?fentry")
+int pfentry(void *ctx)
+{
+	return 0;
+}
+
+SEC("?fentry")
+int throwing_fentry(void *ctx)
+{
+	bpf_throw(0);
+	return 0;
+}
+
+__noinline int exception_cb(u64 cookie)
+{
+	return cookie + 64;
+}
+
+SEC("?freplace")
+int extension(struct __sk_buff *ctx)
+{
+	return 0;
+}
+
+SEC("?freplace")
+__exception_cb(exception_cb)
+int throwing_exception_cb_extension(u64 cookie)
+{
+	bpf_throw(32);
+	return 0;
+}
+
+SEC("?freplace")
+__exception_cb(exception_cb)
+int throwing_extension(struct __sk_buff *ctx)
+{
+	bpf_throw(64);
+	return 0;
+}
+
+SEC("?fexit")
+int pfexit(void *ctx)
+{
+	return 0;
+}
+
+SEC("?fexit")
+int throwing_fexit(void *ctx)
+{
+	bpf_throw(0);
+	return 0;
+}
+
+SEC("?fmod_ret")
+int pfmod_ret(void *ctx)
+{
+	return 0;
+}
+
+SEC("?fmod_ret")
+int throwing_fmod_ret(void *ctx)
+{
+	bpf_throw(0);
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/exceptions_fail.c b/tools/testing/selftests/bpf/progs/exceptions_fail.c
new file mode 100644
index 000000000000..4c39e920dac2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/exceptions_fail.c
@@ -0,0 +1,347 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+extern void bpf_rcu_read_lock(void) __ksym;
+
+#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8)))
+
+struct foo {
+	struct bpf_rb_node node;
+};
+
+struct hmap_elem {
+	struct bpf_timer timer;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 64);
+	__type(key, int);
+	__type(value, struct hmap_elem);
+} hmap SEC(".maps");
+
+private(A) struct bpf_spin_lock lock;
+private(A) struct bpf_rb_root rbtree __contains(foo, node);
+
+__noinline void *exception_cb_bad_ret_type(u64 cookie)
+{
+	return NULL;
+}
+
+__noinline int exception_cb_bad_arg_0(void)
+{
+	return 0;
+}
+
+__noinline int exception_cb_bad_arg_2(int a, int b)
+{
+	return 0;
+}
+
+__noinline int exception_cb_ok_arg_small(int a)
+{
+	return 0;
+}
+
+SEC("?tc")
+__exception_cb(exception_cb_bad_ret_type)
+__failure __msg("Global function exception_cb_bad_ret_type() doesn't return scalar.")
+int reject_exception_cb_type_1(struct __sk_buff *ctx)
+{
+	bpf_throw(0);
+	return 0;
+}
+
+SEC("?tc")
+__exception_cb(exception_cb_bad_arg_0)
+__failure __msg("exception cb only supports single integer argument")
+int reject_exception_cb_type_2(struct __sk_buff *ctx)
+{
+	bpf_throw(0);
+	return 0;
+}
+
+SEC("?tc")
+__exception_cb(exception_cb_bad_arg_2)
+__failure __msg("exception cb only supports single integer argument")
+int reject_exception_cb_type_3(struct __sk_buff *ctx)
+{
+	bpf_throw(0);
+	return 0;
+}
+
+SEC("?tc")
+__exception_cb(exception_cb_ok_arg_small)
+__success
+int reject_exception_cb_type_4(struct __sk_buff *ctx)
+{
+	bpf_throw(0);
+	return 0;
+}
+
+__noinline
+static int timer_cb(void *map, int *key, struct bpf_timer *timer)
+{
+	bpf_throw(0);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot be called from callback subprog")
+int reject_async_callback_throw(struct __sk_buff *ctx)
+{
+	struct hmap_elem *elem;
+
+	elem = bpf_map_lookup_elem(&hmap, &(int){0});
+	if (!elem)
+		return 0;
+	return bpf_timer_set_callback(&elem->timer, timer_cb);
+}
+
+__noinline static int subprog_lock(struct __sk_buff *ctx)
+{
+	volatile int ret = 0;
+
+	bpf_spin_lock(&lock);
+	if (ctx->len)
+		bpf_throw(0);
+	return ret;
+}
+
+SEC("?tc")
+__failure __msg("function calls are not allowed while holding a lock")
+int reject_with_lock(void *ctx)
+{
+	bpf_spin_lock(&lock);
+	bpf_throw(0);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("function calls are not allowed while holding a lock")
+int reject_subprog_with_lock(void *ctx)
+{
+	return subprog_lock(ctx);
+}
+
+SEC("?tc")
+__failure __msg("bpf_rcu_read_unlock is missing")
+int reject_with_rcu_read_lock(void *ctx)
+{
+	bpf_rcu_read_lock();
+	bpf_throw(0);
+	return 0;
+}
+
+__noinline static int throwing_subprog(struct __sk_buff *ctx)
+{
+	if (ctx->len)
+		bpf_throw(0);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("bpf_rcu_read_unlock is missing")
+int reject_subprog_with_rcu_read_lock(void *ctx)
+{
+	bpf_rcu_read_lock();
+	return throwing_subprog(ctx);
+}
+
+static bool rbless(struct bpf_rb_node *n1, const struct bpf_rb_node *n2)
+{
+	bpf_throw(0);
+	return true;
+}
+
+SEC("?tc")
+__failure __msg("function calls are not allowed while holding a lock")
+int reject_with_rbtree_add_throw(void *ctx)
+{
+	struct foo *f;
+
+	f = bpf_obj_new(typeof(*f));
+	if (!f)
+		return 0;
+	bpf_spin_lock(&lock);
+	bpf_rbtree_add(&rbtree, &f->node, rbless);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("Unreleased reference")
+int reject_with_reference(void *ctx)
+{
+	struct foo *f;
+
+	f = bpf_obj_new(typeof(*f));
+	if (!f)
+		return 0;
+	bpf_throw(0);
+	return 0;
+}
+
+__noinline static int subprog_ref(struct __sk_buff *ctx)
+{
+	struct foo *f;
+
+	f = bpf_obj_new(typeof(*f));
+	if (!f)
+		return 0;
+	bpf_throw(0);
+	return 0;
+}
+
+__noinline static int subprog_cb_ref(u32 i, void *ctx)
+{
+	bpf_throw(0);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("Unreleased reference")
+int reject_with_cb_reference(void *ctx)
+{
+	struct foo *f;
+
+	f = bpf_obj_new(typeof(*f));
+	if (!f)
+		return 0;
+	bpf_loop(5, subprog_cb_ref, NULL, 0);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot be called from callback")
+int reject_with_cb(void *ctx)
+{
+	bpf_loop(5, subprog_cb_ref, NULL, 0);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("Unreleased reference")
+int reject_with_subprog_reference(void *ctx)
+{
+	return subprog_ref(ctx) + 1;
+}
+
+__noinline int throwing_exception_cb(u64 c)
+{
+	bpf_throw(0);
+	return c;
+}
+
+__noinline int exception_cb1(u64 c)
+{
+	return c;
+}
+
+__noinline int exception_cb2(u64 c)
+{
+	return c;
+}
+
+static __noinline int static_func(struct __sk_buff *ctx)
+{
+	return exception_cb1(ctx->tstamp);
+}
+
+__noinline int global_func(struct __sk_buff *ctx)
+{
+	return exception_cb1(ctx->tstamp);
+}
+
+SEC("?tc")
+__exception_cb(throwing_exception_cb)
+__failure __msg("cannot be called from callback subprog")
+int reject_throwing_exception_cb(struct __sk_buff *ctx)
+{
+	return 0;
+}
+
+SEC("?tc")
+__exception_cb(exception_cb1)
+__failure __msg("cannot call exception cb directly")
+int reject_exception_cb_call_global_func(struct __sk_buff *ctx)
+{
+	return global_func(ctx);
+}
+
+SEC("?tc")
+__exception_cb(exception_cb1)
+__failure __msg("cannot call exception cb directly")
+int reject_exception_cb_call_static_func(struct __sk_buff *ctx)
+{
+	return static_func(ctx);
+}
+
+SEC("?tc")
+__exception_cb(exception_cb1)
+__exception_cb(exception_cb2)
+__failure __msg("multiple exception callback tags for main subprog")
+int reject_multiple_exception_cb(struct __sk_buff *ctx)
+{
+	bpf_throw(0);
+	return 16;
+}
+
+__noinline int exception_cb_bad_ret(u64 c)
+{
+	return c;
+}
+
+SEC("?fentry/bpf_check")
+__exception_cb(exception_cb_bad_ret)
+__failure __msg("At program exit the register R0 has unknown scalar value should")
+int reject_set_exception_cb_bad_ret1(void *ctx)
+{
+	return 0;
+}
+
+SEC("?fentry/bpf_check")
+__failure __msg("At program exit the register R0 has value (0x40; 0x0) should")
+int reject_set_exception_cb_bad_ret2(void *ctx)
+{
+	bpf_throw(64);
+	return 0;
+}
+
+__noinline static int loop_cb1(u32 index, int *ctx)
+{
+	bpf_throw(0);
+	return 0;
+}
+
+__noinline static int loop_cb2(u32 index, int *ctx)
+{
+	bpf_throw(0);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot be called from callback")
+int reject_exception_throw_cb(struct __sk_buff *ctx)
+{
+	bpf_loop(5, loop_cb1, NULL, 0);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot be called from callback")
+int reject_exception_throw_cb_diff(struct __sk_buff *ctx)
+{
+	if (ctx->protocol)
+		bpf_loop(5, loop_cb1, NULL, 0);
+	else
+		bpf_loop(5, loop_cb2, NULL, 0);
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/getpeername_unix_prog.c b/tools/testing/selftests/bpf/progs/getpeername_unix_prog.c
new file mode 100644
index 000000000000..9c078f34bbb2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/getpeername_unix_prog.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+
+#include <string.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_kfuncs.h"
+
+__u8 SERVUN_REWRITE_ADDRESS[] = "\0bpf_cgroup_unix_test_rewrite";
+
+SEC("cgroup/getpeername_unix")
+int getpeername_unix_prog(struct bpf_sock_addr *ctx)
+{
+	struct bpf_sock_addr_kern *sa_kern = bpf_cast_to_kern_ctx(ctx);
+	struct sockaddr_un *sa_kern_unaddr;
+	__u32 unaddrlen = offsetof(struct sockaddr_un, sun_path) +
+			  sizeof(SERVUN_REWRITE_ADDRESS) - 1;
+	int ret;
+
+	ret = bpf_sock_addr_set_sun_path(sa_kern, SERVUN_REWRITE_ADDRESS,
+					 sizeof(SERVUN_REWRITE_ADDRESS) - 1);
+	if (ret)
+		return 1;
+
+	if (sa_kern->uaddrlen != unaddrlen)
+		return 1;
+
+	sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr,
+						bpf_core_type_id_kernel(struct sockaddr_un));
+	if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS,
+			sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0)
+		return 1;
+
+	return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/getsockname_unix_prog.c b/tools/testing/selftests/bpf/progs/getsockname_unix_prog.c
new file mode 100644
index 000000000000..ac7145111497
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/getsockname_unix_prog.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+
+#include <string.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_kfuncs.h"
+
+__u8 SERVUN_REWRITE_ADDRESS[] = "\0bpf_cgroup_unix_test_rewrite";
+
+SEC("cgroup/getsockname_unix")
+int getsockname_unix_prog(struct bpf_sock_addr *ctx)
+{
+	struct bpf_sock_addr_kern *sa_kern = bpf_cast_to_kern_ctx(ctx);
+	struct sockaddr_un *sa_kern_unaddr;
+	__u32 unaddrlen = offsetof(struct sockaddr_un, sun_path) +
+			  sizeof(SERVUN_REWRITE_ADDRESS) - 1;
+	int ret;
+
+	ret = bpf_sock_addr_set_sun_path(sa_kern, SERVUN_REWRITE_ADDRESS,
+					 sizeof(SERVUN_REWRITE_ADDRESS) - 1);
+	if (ret)
+		return 1;
+
+	if (sa_kern->uaddrlen != unaddrlen)
+		return 1;
+
+	sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr,
+						bpf_core_type_id_kernel(struct sockaddr_un));
+	if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS,
+			sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0)
+		return 1;
+
+	return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/iters_task_vma.c b/tools/testing/selftests/bpf/progs/iters_task_vma.c
new file mode 100644
index 000000000000..44edecfdfaee
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/iters_task_vma.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include "bpf_experimental.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+pid_t target_pid = 0;
+unsigned int vmas_seen = 0;
+
+struct {
+	__u64 vm_start;
+	__u64 vm_end;
+} vm_ranges[1000];
+
+SEC("raw_tp/sys_enter")
+int iter_task_vma_for_each(const void *ctx)
+{
+	struct task_struct *task = bpf_get_current_task_btf();
+	struct vm_area_struct *vma;
+	unsigned int seen = 0;
+
+	if (task->pid != target_pid)
+		return 0;
+
+	if (vmas_seen)
+		return 0;
+
+	bpf_for_each(task_vma, vma, task, 0) {
+		if (seen >= 1000)
+			break;
+
+		vm_ranges[seen].vm_start = vma->vm_start;
+		vm_ranges[seen].vm_end = vma->vm_end;
+		seen++;
+	}
+
+	vmas_seen = seen;
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/missed_kprobe.c b/tools/testing/selftests/bpf/progs/missed_kprobe.c
new file mode 100644
index 000000000000..7f9ef701f5de
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/missed_kprobe.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+/*
+ * No tests in here, just to trigger 'bpf_fentry_test*'
+ * through tracing test_run
+ */
+SEC("fentry/bpf_modify_return_test")
+int BPF_PROG(trigger)
+{
+	return 0;
+}
+
+SEC("kprobe/bpf_fentry_test1")
+int test1(struct pt_regs *ctx)
+{
+	bpf_kfunc_common_test();
+	return 0;
+}
+
+SEC("kprobe/bpf_kfunc_common_test")
+int test2(struct pt_regs *ctx)
+{
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c b/tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c
new file mode 100644
index 000000000000..8ea71cbd6c45
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+/*
+ * No tests in here, just to trigger 'bpf_fentry_test*'
+ * through tracing test_run
+ */
+SEC("fentry/bpf_modify_return_test")
+int BPF_PROG(trigger)
+{
+	return 0;
+}
+
+SEC("kprobe.multi/bpf_fentry_test1")
+int test1(struct pt_regs *ctx)
+{
+	bpf_kfunc_common_test();
+	return 0;
+}
+
+SEC("kprobe/bpf_kfunc_common_test")
+int test2(struct pt_regs *ctx)
+{
+	return 0;
+}
+
+SEC("kprobe/bpf_kfunc_common_test")
+int test3(struct pt_regs *ctx)
+{
+	return 0;
+}
+
+SEC("kprobe/bpf_kfunc_common_test")
+int test4(struct pt_regs *ctx)
+{
+	return 0;
+}
+
+SEC("kprobe.multi/bpf_kfunc_common_test")
+int test5(struct pt_regs *ctx)
+{
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/missed_tp_recursion.c b/tools/testing/selftests/bpf/progs/missed_tp_recursion.c
new file mode 100644
index 000000000000..762385f827c5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/missed_tp_recursion.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+/*
+ * No tests in here, just to trigger 'bpf_fentry_test*'
+ * through tracing test_run
+ */
+SEC("fentry/bpf_modify_return_test")
+int BPF_PROG(trigger)
+{
+	return 0;
+}
+
+SEC("kprobe/bpf_fentry_test1")
+int test1(struct pt_regs *ctx)
+{
+	bpf_printk("test");
+	return 0;
+}
+
+SEC("tp/bpf_trace/bpf_trace_printk")
+int test2(struct pt_regs *ctx)
+{
+	return 0;
+}
+
+SEC("tp/bpf_trace/bpf_trace_printk")
+int test3(struct pt_regs *ctx)
+{
+	return 0;
+}
+
+SEC("tp/bpf_trace/bpf_trace_printk")
+int test4(struct pt_regs *ctx)
+{
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/percpu_alloc_array.c b/tools/testing/selftests/bpf/progs/percpu_alloc_array.c
new file mode 100644
index 000000000000..37c2d2608ec0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/percpu_alloc_array.c
@@ -0,0 +1,190 @@
+#include "bpf_experimental.h"
+
+struct val_t {
+	long b, c, d;
+};
+
+struct elem {
+	long sum;
+	struct val_t __percpu_kptr *pc;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, struct elem);
+} array SEC(".maps");
+
+void bpf_rcu_read_lock(void) __ksym;
+void bpf_rcu_read_unlock(void) __ksym;
+
+const volatile int nr_cpus;
+
+/* Initialize the percpu object */
+SEC("?fentry/bpf_fentry_test1")
+int BPF_PROG(test_array_map_1)
+{
+	struct val_t __percpu_kptr *p;
+	struct elem *e;
+	int index = 0;
+
+	e = bpf_map_lookup_elem(&array, &index);
+	if (!e)
+		return 0;
+
+	p = bpf_percpu_obj_new(struct val_t);
+	if (!p)
+		return 0;
+
+	p = bpf_kptr_xchg(&e->pc, p);
+	if (p)
+		bpf_percpu_obj_drop(p);
+
+	return 0;
+}
+
+/* Update percpu data */
+SEC("?fentry/bpf_fentry_test2")
+int BPF_PROG(test_array_map_2)
+{
+	struct val_t __percpu_kptr *p;
+	struct val_t *v;
+	struct elem *e;
+	int index = 0;
+
+	e = bpf_map_lookup_elem(&array, &index);
+	if (!e)
+		return 0;
+
+	p = e->pc;
+	if (!p)
+		return 0;
+
+	v = bpf_per_cpu_ptr(p, 0);
+	if (!v)
+		return 0;
+	v->c = 1;
+	v->d = 2;
+
+	return 0;
+}
+
+int cpu0_field_d, sum_field_c;
+int my_pid;
+
+/* Summarize percpu data */
+SEC("?fentry/bpf_fentry_test3")
+int BPF_PROG(test_array_map_3)
+{
+	struct val_t __percpu_kptr *p;
+	int i, index = 0;
+	struct val_t *v;
+	struct elem *e;
+
+	if ((bpf_get_current_pid_tgid() >> 32) != my_pid)
+		return 0;
+
+	e = bpf_map_lookup_elem(&array, &index);
+	if (!e)
+		return 0;
+
+	p = e->pc;
+	if (!p)
+		return 0;
+
+	bpf_for(i, 0, nr_cpus) {
+		v = bpf_per_cpu_ptr(p, i);
+		if (v) {
+			if (i == 0)
+				cpu0_field_d = v->d;
+			sum_field_c += v->c;
+		}
+	}
+
+	return 0;
+}
+
+/* Explicitly free allocated percpu data */
+SEC("?fentry/bpf_fentry_test4")
+int BPF_PROG(test_array_map_4)
+{
+	struct val_t __percpu_kptr *p;
+	struct elem *e;
+	int index = 0;
+
+	e = bpf_map_lookup_elem(&array, &index);
+	if (!e)
+		return 0;
+
+	/* delete */
+	p = bpf_kptr_xchg(&e->pc, NULL);
+	if (p) {
+		bpf_percpu_obj_drop(p);
+	}
+
+	return 0;
+}
+
+SEC("?fentry.s/bpf_fentry_test1")
+int BPF_PROG(test_array_map_10)
+{
+	struct val_t __percpu_kptr *p, *p1;
+	int i, index = 0;
+	struct val_t *v;
+	struct elem *e;
+
+	if ((bpf_get_current_pid_tgid() >> 32) != my_pid)
+		return 0;
+
+	e = bpf_map_lookup_elem(&array, &index);
+	if (!e)
+		return 0;
+
+	bpf_rcu_read_lock();
+	p = e->pc;
+	if (!p) {
+		p = bpf_percpu_obj_new(struct val_t);
+		if (!p)
+			goto out;
+
+		p1 = bpf_kptr_xchg(&e->pc, p);
+		if (p1) {
+			/* race condition */
+			bpf_percpu_obj_drop(p1);
+		}
+	}
+
+	v = bpf_this_cpu_ptr(p);
+	v->c = 3;
+	v = bpf_this_cpu_ptr(p);
+	v->c = 0;
+
+	v = bpf_per_cpu_ptr(p, 0);
+	if (!v)
+		goto out;
+	v->c = 1;
+	v->d = 2;
+
+	/* delete */
+	p1 = bpf_kptr_xchg(&e->pc, NULL);
+	if (!p1)
+		goto out;
+
+	bpf_for(i, 0, nr_cpus) {
+		v = bpf_per_cpu_ptr(p, i);
+		if (v) {
+			if (i == 0)
+				cpu0_field_d = v->d;
+			sum_field_c += v->c;
+		}
+	}
+
+	/* finally release p */
+	bpf_percpu_obj_drop(p1);
+out:
+	bpf_rcu_read_unlock();
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/percpu_alloc_cgrp_local_storage.c b/tools/testing/selftests/bpf/progs/percpu_alloc_cgrp_local_storage.c
new file mode 100644
index 000000000000..a2acf9aa6c24
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/percpu_alloc_cgrp_local_storage.c
@@ -0,0 +1,109 @@
+#include "bpf_experimental.h"
+
+struct val_t {
+	long b, c, d;
+};
+
+struct elem {
+	long sum;
+	struct val_t __percpu_kptr *pc;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_CGRP_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, struct elem);
+} cgrp SEC(".maps");
+
+const volatile int nr_cpus;
+
+/* Initialize the percpu object */
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test_cgrp_local_storage_1)
+{
+	struct task_struct *task;
+	struct val_t __percpu_kptr *p;
+	struct elem *e;
+
+	task = bpf_get_current_task_btf();
+	e = bpf_cgrp_storage_get(&cgrp, task->cgroups->dfl_cgrp, 0,
+				 BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (!e)
+		return 0;
+
+	p = bpf_percpu_obj_new(struct val_t);
+	if (!p)
+		return 0;
+
+	p = bpf_kptr_xchg(&e->pc, p);
+	if (p)
+		bpf_percpu_obj_drop(p);
+
+	return 0;
+}
+
+/* Percpu data collection */
+SEC("fentry/bpf_fentry_test2")
+int BPF_PROG(test_cgrp_local_storage_2)
+{
+	struct task_struct *task;
+	struct val_t __percpu_kptr *p;
+	struct val_t *v;
+	struct elem *e;
+
+	task = bpf_get_current_task_btf();
+	e = bpf_cgrp_storage_get(&cgrp, task->cgroups->dfl_cgrp, 0, 0);
+	if (!e)
+		return 0;
+
+	p = e->pc;
+	if (!p)
+		return 0;
+
+	v = bpf_per_cpu_ptr(p, 0);
+	if (!v)
+		return 0;
+	v->c = 1;
+	v->d = 2;
+	return 0;
+}
+
+int cpu0_field_d, sum_field_c;
+int my_pid;
+
+/* Summarize percpu data collection */
+SEC("fentry/bpf_fentry_test3")
+int BPF_PROG(test_cgrp_local_storage_3)
+{
+	struct task_struct *task;
+	struct val_t __percpu_kptr *p;
+	struct val_t *v;
+	struct elem *e;
+	int i;
+
+	if ((bpf_get_current_pid_tgid() >> 32) != my_pid)
+		return 0;
+
+	task = bpf_get_current_task_btf();
+	e = bpf_cgrp_storage_get(&cgrp, task->cgroups->dfl_cgrp, 0, 0);
+	if (!e)
+		return 0;
+
+	p = e->pc;
+	if (!p)
+		return 0;
+
+	bpf_for(i, 0, nr_cpus) {
+		v = bpf_per_cpu_ptr(p, i);
+		if (v) {
+			if (i == 0)
+				cpu0_field_d = v->d;
+			sum_field_c += v->c;
+		}
+	}
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c b/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c
new file mode 100644
index 000000000000..1a891d30f1fe
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c
@@ -0,0 +1,164 @@
+#include "bpf_experimental.h"
+#include "bpf_misc.h"
+
+struct val_t {
+	long b, c, d;
+};
+
+struct val2_t {
+	long b;
+};
+
+struct val_with_ptr_t {
+	char *p;
+};
+
+struct val_with_rb_root_t {
+	struct bpf_spin_lock lock;
+};
+
+struct elem {
+	long sum;
+	struct val_t __percpu_kptr *pc;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, struct elem);
+} array SEC(".maps");
+
+long ret;
+
+SEC("?fentry/bpf_fentry_test1")
+__failure __msg("store to referenced kptr disallowed")
+int BPF_PROG(test_array_map_1)
+{
+	struct val_t __percpu_kptr *p;
+	struct elem *e;
+	int index = 0;
+
+	e = bpf_map_lookup_elem(&array, &index);
+	if (!e)
+		return 0;
+
+	p = bpf_percpu_obj_new(struct val_t);
+	if (!p)
+		return 0;
+
+	p = bpf_kptr_xchg(&e->pc, p);
+	if (p)
+		bpf_percpu_obj_drop(p);
+
+	e->pc = (struct val_t __percpu_kptr *)ret;
+	return 0;
+}
+
+SEC("?fentry/bpf_fentry_test1")
+__failure __msg("invalid kptr access, R2 type=percpu_ptr_val2_t expected=ptr_val_t")
+int BPF_PROG(test_array_map_2)
+{
+	struct val2_t __percpu_kptr *p2;
+	struct val_t __percpu_kptr *p;
+	struct elem *e;
+	int index = 0;
+
+	e = bpf_map_lookup_elem(&array, &index);
+	if (!e)
+		return 0;
+
+	p2 = bpf_percpu_obj_new(struct val2_t);
+	if (!p2)
+		return 0;
+
+	p = bpf_kptr_xchg(&e->pc, p2);
+	if (p)
+		bpf_percpu_obj_drop(p);
+
+	return 0;
+}
+
+SEC("?fentry.s/bpf_fentry_test1")
+__failure __msg("R1 type=scalar expected=percpu_ptr_, percpu_rcu_ptr_, percpu_trusted_ptr_")
+int BPF_PROG(test_array_map_3)
+{
+	struct val_t __percpu_kptr *p, *p1;
+	struct val_t *v;
+	struct elem *e;
+	int index = 0;
+
+	e = bpf_map_lookup_elem(&array, &index);
+	if (!e)
+		return 0;
+
+	p = bpf_percpu_obj_new(struct val_t);
+	if (!p)
+		return 0;
+
+	p1 = bpf_kptr_xchg(&e->pc, p);
+	if (p1)
+		bpf_percpu_obj_drop(p1);
+
+	v = bpf_this_cpu_ptr(p);
+	ret = v->b;
+	return 0;
+}
+
+SEC("?fentry.s/bpf_fentry_test1")
+__failure __msg("arg#0 expected for bpf_percpu_obj_drop_impl()")
+int BPF_PROG(test_array_map_4)
+{
+	struct val_t __percpu_kptr *p;
+
+	p = bpf_percpu_obj_new(struct val_t);
+	if (!p)
+		return 0;
+
+	bpf_obj_drop(p);
+	return 0;
+}
+
+SEC("?fentry.s/bpf_fentry_test1")
+__failure __msg("arg#0 expected for bpf_obj_drop_impl()")
+int BPF_PROG(test_array_map_5)
+{
+	struct val_t *p;
+
+	p = bpf_obj_new(struct val_t);
+	if (!p)
+		return 0;
+
+	bpf_percpu_obj_drop(p);
+	return 0;
+}
+
+SEC("?fentry.s/bpf_fentry_test1")
+__failure __msg("bpf_percpu_obj_new type ID argument must be of a struct of scalars")
+int BPF_PROG(test_array_map_6)
+{
+	struct val_with_ptr_t __percpu_kptr *p;
+
+	p = bpf_percpu_obj_new(struct val_with_ptr_t);
+	if (!p)
+		return 0;
+
+	bpf_percpu_obj_drop(p);
+	return 0;
+}
+
+SEC("?fentry.s/bpf_fentry_test1")
+__failure __msg("bpf_percpu_obj_new type ID argument must not contain special fields")
+int BPF_PROG(test_array_map_7)
+{
+	struct val_with_rb_root_t __percpu_kptr *p;
+
+	p = bpf_percpu_obj_new(struct val_with_rb_root_t);
+	if (!p)
+		return 0;
+
+	bpf_percpu_obj_drop(p);
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/preempted_bpf_ma_op.c b/tools/testing/selftests/bpf/progs/preempted_bpf_ma_op.c
new file mode 100644
index 000000000000..55907ef961bf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/preempted_bpf_ma_op.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_experimental.h"
+
+struct bin_data {
+	char data[256];
+	struct bpf_spin_lock lock;
+};
+
+struct map_value {
+	struct bin_data __kptr * data;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, int);
+	__type(value, struct map_value);
+	__uint(max_entries, 2048);
+} array SEC(".maps");
+
+char _license[] SEC("license") = "GPL";
+
+bool nomem_err = false;
+
+static int del_array(unsigned int i, int *from)
+{
+	struct map_value *value;
+	struct bin_data *old;
+
+	value = bpf_map_lookup_elem(&array, from);
+	if (!value)
+		return 1;
+
+	old = bpf_kptr_xchg(&value->data, NULL);
+	if (old)
+		bpf_obj_drop(old);
+
+	(*from)++;
+	return 0;
+}
+
+static int add_array(unsigned int i, int *from)
+{
+	struct bin_data *old, *new;
+	struct map_value *value;
+
+	value = bpf_map_lookup_elem(&array, from);
+	if (!value)
+		return 1;
+
+	new = bpf_obj_new(typeof(*new));
+	if (!new) {
+		nomem_err = true;
+		return 1;
+	}
+
+	old = bpf_kptr_xchg(&value->data, new);
+	if (old)
+		bpf_obj_drop(old);
+
+	(*from)++;
+	return 0;
+}
+
+static void del_then_add_array(int from)
+{
+	int i;
+
+	i = from;
+	bpf_loop(512, del_array, &i, 0);
+
+	i = from;
+	bpf_loop(512, add_array, &i, 0);
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG2(test0, int, a)
+{
+	del_then_add_array(0);
+	return 0;
+}
+
+SEC("fentry/bpf_fentry_test2")
+int BPF_PROG2(test1, int, a, u64, b)
+{
+	del_then_add_array(512);
+	return 0;
+}
+
+SEC("fentry/bpf_fentry_test3")
+int BPF_PROG2(test2, char, a, int, b, u64, c)
+{
+	del_then_add_array(1024);
+	return 0;
+}
+
+SEC("fentry/bpf_fentry_test4")
+int BPF_PROG2(test3, void *, a, char, b, int, c, u64, d)
+{
+	del_then_add_array(1536);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h
index f799d87e8700..897061930cb7 100644
--- a/tools/testing/selftests/bpf/progs/profiler.inc.h
+++ b/tools/testing/selftests/bpf/progs/profiler.inc.h
@@ -609,7 +609,7 @@ out:
 }
 
 SEC("tracepoint/syscalls/sys_enter_kill")
-int tracepoint__syscalls__sys_enter_kill(struct trace_event_raw_sys_enter* ctx)
+int tracepoint__syscalls__sys_enter_kill(struct syscall_trace_enter* ctx)
 {
 	struct bpf_func_stats_ctx stats_ctx;
 
diff --git a/tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c b/tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c
new file mode 100644
index 000000000000..4dfbc8552558
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+
+#include <string.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_kfuncs.h"
+
+__u8 SERVUN_ADDRESS[] = "\0bpf_cgroup_unix_test";
+
+SEC("cgroup/recvmsg_unix")
+int recvmsg_unix_prog(struct bpf_sock_addr *ctx)
+{
+	struct bpf_sock_addr_kern *sa_kern = bpf_cast_to_kern_ctx(ctx);
+	struct sockaddr_un *sa_kern_unaddr;
+	__u32 unaddrlen = offsetof(struct sockaddr_un, sun_path) +
+			  sizeof(SERVUN_ADDRESS) - 1;
+	int ret;
+
+	ret = bpf_sock_addr_set_sun_path(sa_kern, SERVUN_ADDRESS,
+					 sizeof(SERVUN_ADDRESS) - 1);
+	if (ret)
+		return 1;
+
+	if (sa_kern->uaddrlen != unaddrlen)
+		return 1;
+
+	sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr,
+						bpf_core_type_id_kernel(struct sockaddr_un));
+	if (memcmp(sa_kern_unaddr->sun_path, SERVUN_ADDRESS,
+			sizeof(SERVUN_ADDRESS) - 1) != 0)
+		return 1;
+
+	return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c b/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c
new file mode 100644
index 000000000000..1f67e832666e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+
+#include <string.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_kfuncs.h"
+
+__u8 SERVUN_REWRITE_ADDRESS[] = "\0bpf_cgroup_unix_test_rewrite";
+
+SEC("cgroup/sendmsg_unix")
+int sendmsg_unix_prog(struct bpf_sock_addr *ctx)
+{
+	struct bpf_sock_addr_kern *sa_kern = bpf_cast_to_kern_ctx(ctx);
+	struct sockaddr_un *sa_kern_unaddr;
+	__u32 unaddrlen = offsetof(struct sockaddr_un, sun_path) +
+			  sizeof(SERVUN_REWRITE_ADDRESS) - 1;
+	int ret;
+
+	/* Rewrite destination. */
+	ret = bpf_sock_addr_set_sun_path(sa_kern, SERVUN_REWRITE_ADDRESS,
+					 sizeof(SERVUN_REWRITE_ADDRESS) - 1);
+	if (ret)
+		return 0;
+
+	if (sa_kern->uaddrlen != unaddrlen)
+		return 0;
+
+	sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr,
+						bpf_core_type_id_kernel(struct sockaddr_un));
+	if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS,
+			sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0)
+		return 0;
+
+	return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_fentry.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_fentry.c
new file mode 100644
index 000000000000..8436c6729167
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_fentry.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Leon Hwang */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+int count = 0;
+
+SEC("fentry/subprog_tail")
+int BPF_PROG(fentry, struct sk_buff *skb)
+{
+	count++;
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_fexit.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_fexit.c
new file mode 100644
index 000000000000..fe16412c6e6e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_fexit.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Leon Hwang */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+int count = 0;
+
+SEC("fexit/subprog_tail")
+int BPF_PROG(fexit, struct sk_buff *skb)
+{
+	count++;
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_ldsx_insn.c b/tools/testing/selftests/bpf/progs/test_ldsx_insn.c
index 67c14ba1e87b..3ddcb3777912 100644
--- a/tools/testing/selftests/bpf/progs/test_ldsx_insn.c
+++ b/tools/testing/selftests/bpf/progs/test_ldsx_insn.c
@@ -6,7 +6,8 @@
 #include <bpf/bpf_tracing.h>
 
 #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
-     (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18
+     (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) ||       \
+     defined(__TARGET_ARCH_s390)) && __clang_major__ >= 18
 const volatile int skip = 0;
 #else
 const volatile int skip = 1;
@@ -104,7 +105,11 @@ int _tc(volatile struct __sk_buff *skb)
 		      "%[tmp_mark] = r1"
 		      : [tmp_mark]"=r"(tmp_mark)
 		      : [ctx]"r"(skb),
-			[off_mark]"i"(offsetof(struct __sk_buff, mark))
+			[off_mark]"i"(offsetof(struct __sk_buff, mark)
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+			+ sizeof(skb->mark) - 1
+#endif
+			)
 		      : "r1");
 #else
 	tmp_mark = (char)skb->mark;
diff --git a/tools/testing/selftests/bpf/progs/test_uprobe.c b/tools/testing/selftests/bpf/progs/test_uprobe.c
new file mode 100644
index 000000000000..896c88a4960d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_uprobe.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Hengqi Chen */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+pid_t my_pid = 0;
+
+int test1_result = 0;
+int test2_result = 0;
+int test3_result = 0;
+int test4_result = 0;
+
+SEC("uprobe/./liburandom_read.so:urandlib_api_sameoffset")
+int BPF_UPROBE(test1)
+{
+	pid_t pid = bpf_get_current_pid_tgid() >> 32;
+
+	if (pid != my_pid)
+		return 0;
+
+	test1_result = 1;
+	return 0;
+}
+
+SEC("uprobe/./liburandom_read.so:urandlib_api_sameoffset@LIBURANDOM_READ_1.0.0")
+int BPF_UPROBE(test2)
+{
+	pid_t pid = bpf_get_current_pid_tgid() >> 32;
+
+	if (pid != my_pid)
+		return 0;
+
+	test2_result = 1;
+	return 0;
+}
+
+SEC("uretprobe/./liburandom_read.so:urandlib_api_sameoffset@@LIBURANDOM_READ_2.0.0")
+int BPF_URETPROBE(test3, int ret)
+{
+	pid_t pid = bpf_get_current_pid_tgid() >> 32;
+
+	if (pid != my_pid)
+		return 0;
+
+	test3_result = ret;
+	return 0;
+}
+
+SEC("uprobe")
+int BPF_UPROBE(test4)
+{
+	pid_t pid = bpf_get_current_pid_tgid() >> 32;
+
+	if (pid != my_pid)
+		return 0;
+
+	test4_result = 1;
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_vmlinux.c b/tools/testing/selftests/bpf/progs/test_vmlinux.c
index 4b8e37f7fd06..78b23934d9f8 100644
--- a/tools/testing/selftests/bpf/progs/test_vmlinux.c
+++ b/tools/testing/selftests/bpf/progs/test_vmlinux.c
@@ -16,12 +16,12 @@ bool kprobe_called = false;
 bool fentry_called = false;
 
 SEC("tp/syscalls/sys_enter_nanosleep")
-int handle__tp(struct trace_event_raw_sys_enter *args)
+int handle__tp(struct syscall_trace_enter *args)
 {
 	struct __kernel_timespec *ts;
 	long tv_nsec;
 
-	if (args->id != __NR_nanosleep)
+	if (args->nr != __NR_nanosleep)
 		return 0;
 
 	ts = (void *)args->args[0];
diff --git a/tools/testing/selftests/bpf/progs/timer.c b/tools/testing/selftests/bpf/progs/timer.c
index 9a16d95213e1..8b946c8188c6 100644
--- a/tools/testing/selftests/bpf/progs/timer.c
+++ b/tools/testing/selftests/bpf/progs/timer.c
@@ -51,7 +51,7 @@ struct {
 	__uint(max_entries, 1);
 	__type(key, int);
 	__type(value, struct elem);
-} abs_timer SEC(".maps");
+} abs_timer SEC(".maps"), soft_timer_pinned SEC(".maps"), abs_timer_pinned SEC(".maps");
 
 __u64 bss_data;
 __u64 abs_data;
@@ -59,6 +59,8 @@ __u64 err;
 __u64 ok;
 __u64 callback_check = 52;
 __u64 callback2_check = 52;
+__u64 pinned_callback_check;
+__s32 pinned_cpu;
 
 #define ARRAY 1
 #define HTAB 2
@@ -329,3 +331,62 @@ int BPF_PROG2(test3, int, a)
 
 	return 0;
 }
+
+/* callback for pinned timer */
+static int timer_cb_pinned(void *map, int *key, struct bpf_timer *timer)
+{
+	__s32 cpu = bpf_get_smp_processor_id();
+
+	if (cpu != pinned_cpu)
+		err |= 16384;
+
+	pinned_callback_check++;
+	return 0;
+}
+
+static void test_pinned_timer(bool soft)
+{
+	int key = 0;
+	void *map;
+	struct bpf_timer *timer;
+	__u64 flags = BPF_F_TIMER_CPU_PIN;
+	__u64 start_time;
+
+	if (soft) {
+		map = &soft_timer_pinned;
+		start_time = 0;
+	} else {
+		map = &abs_timer_pinned;
+		start_time = bpf_ktime_get_boot_ns();
+		flags |= BPF_F_TIMER_ABS;
+	}
+
+	timer = bpf_map_lookup_elem(map, &key);
+	if (timer) {
+		if (bpf_timer_init(timer, map, CLOCK_BOOTTIME) != 0)
+			err |= 4096;
+		bpf_timer_set_callback(timer, timer_cb_pinned);
+		pinned_cpu = bpf_get_smp_processor_id();
+		bpf_timer_start(timer, start_time + 1000, flags);
+	} else {
+		err |= 8192;
+	}
+}
+
+SEC("fentry/bpf_fentry_test4")
+int BPF_PROG2(test4, int, a)
+{
+	bpf_printk("test4");
+	test_pinned_timer(true);
+
+	return 0;
+}
+
+SEC("fentry/bpf_fentry_test5")
+int BPF_PROG2(test5, int, a)
+{
+	bpf_printk("test5");
+	test_pinned_timer(false);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/verifier_bswap.c b/tools/testing/selftests/bpf/progs/verifier_bswap.c
index 8893094725f0..107525fb4a6a 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bswap.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bswap.c
@@ -5,7 +5,9 @@
 #include "bpf_misc.h"
 
 #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
-     (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18
+	(defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
+        defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \
+	__clang_major__ >= 18
 
 SEC("socket")
 __description("BSWAP, 16")
diff --git a/tools/testing/selftests/bpf/progs/verifier_gotol.c b/tools/testing/selftests/bpf/progs/verifier_gotol.c
index 2dae5322a18e..9f202eda952f 100644
--- a/tools/testing/selftests/bpf/progs/verifier_gotol.c
+++ b/tools/testing/selftests/bpf/progs/verifier_gotol.c
@@ -5,7 +5,9 @@
 #include "bpf_misc.h"
 
 #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
-     (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18
+	(defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
+        defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \
+	__clang_major__ >= 18
 
 SEC("socket")
 __description("gotol, small_imm")
diff --git a/tools/testing/selftests/bpf/progs/verifier_ldsx.c b/tools/testing/selftests/bpf/progs/verifier_ldsx.c
index 0c638f45aaf1..375525329637 100644
--- a/tools/testing/selftests/bpf/progs/verifier_ldsx.c
+++ b/tools/testing/selftests/bpf/progs/verifier_ldsx.c
@@ -5,19 +5,25 @@
 #include "bpf_misc.h"
 
 #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
-     (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18
+	(defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
+        defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \
+	__clang_major__ >= 18
 
 SEC("socket")
 __description("LDSX, S8")
 __success __success_unpriv __retval(-2)
 __naked void ldsx_s8(void)
 {
-	asm volatile ("					\
-	r1 = 0x3fe;					\
-	*(u64 *)(r10 - 8) = r1;				\
-	r0 = *(s8 *)(r10 - 8);				\
-	exit;						\
-"	::: __clobber_all);
+	asm volatile (
+	"r1 = 0x3fe;"
+	"*(u64 *)(r10 - 8) = r1;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	"r0 = *(s8 *)(r10 - 8);"
+#else
+	"r0 = *(s8 *)(r10 - 1);"
+#endif
+	"exit;"
+	::: __clobber_all);
 }
 
 SEC("socket")
@@ -25,12 +31,16 @@ __description("LDSX, S16")
 __success __success_unpriv __retval(-2)
 __naked void ldsx_s16(void)
 {
-	asm volatile ("					\
-	r1 = 0x3fffe;					\
-	*(u64 *)(r10 - 8) = r1;				\
-	r0 = *(s16 *)(r10 - 8);				\
-	exit;						\
-"	::: __clobber_all);
+	asm volatile (
+	"r1 = 0x3fffe;"
+	"*(u64 *)(r10 - 8) = r1;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	"r0 = *(s16 *)(r10 - 8);"
+#else
+	"r0 = *(s16 *)(r10 - 2);"
+#endif
+	"exit;"
+	::: __clobber_all);
 }
 
 SEC("socket")
@@ -38,35 +48,43 @@ __description("LDSX, S32")
 __success __success_unpriv __retval(-1)
 __naked void ldsx_s32(void)
 {
-	asm volatile ("					\
-	r1 = 0xfffffffe;				\
-	*(u64 *)(r10 - 8) = r1;				\
-	r0 = *(s32 *)(r10 - 8);				\
-	r0 >>= 1;					\
-	exit;						\
-"	::: __clobber_all);
+	asm volatile (
+	"r1 = 0xfffffffe;"
+	"*(u64 *)(r10 - 8) = r1;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	"r0 = *(s32 *)(r10 - 8);"
+#else
+	"r0 = *(s32 *)(r10 - 4);"
+#endif
+	"r0 >>= 1;"
+	"exit;"
+	::: __clobber_all);
 }
 
 SEC("socket")
 __description("LDSX, S8 range checking, privileged")
 __log_level(2) __success __retval(1)
-__msg("R1_w=scalar(smin=-128,smax=127)")
+__msg("R1_w=scalar(smin=smin32=-128,smax=smax32=127)")
 __naked void ldsx_s8_range_priv(void)
 {
-	asm volatile ("					\
-	call %[bpf_get_prandom_u32];			\
-	*(u64 *)(r10 - 8) = r0;				\
-	r1 = *(s8 *)(r10 - 8);				\
-	/* r1 with s8 range */				\
-	if r1 s> 0x7f goto l0_%=;			\
-	if r1 s< -0x80 goto l0_%=;			\
-	r0 = 1;						\
-l1_%=:							\
-	exit;						\
-l0_%=:							\
-	r0 = 2;						\
-	goto l1_%=;					\
-"	:
+	asm volatile (
+	"call %[bpf_get_prandom_u32];"
+	"*(u64 *)(r10 - 8) = r0;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	"r1 = *(s8 *)(r10 - 8);"
+#else
+	"r1 = *(s8 *)(r10 - 1);"
+#endif
+	/* r1 with s8 range */
+	"if r1 s> 0x7f goto l0_%=;"
+	"if r1 s< -0x80 goto l0_%=;"
+	"r0 = 1;"
+"l1_%=:"
+	"exit;"
+"l0_%=:"
+	"r0 = 2;"
+	"goto l1_%=;"
+	:
 	: __imm(bpf_get_prandom_u32)
 	: __clobber_all);
 }
@@ -76,20 +94,24 @@ __description("LDSX, S16 range checking")
 __success __success_unpriv __retval(1)
 __naked void ldsx_s16_range(void)
 {
-	asm volatile ("					\
-	call %[bpf_get_prandom_u32];			\
-	*(u64 *)(r10 - 8) = r0;				\
-	r1 = *(s16 *)(r10 - 8);				\
-	/* r1 with s16 range */				\
-	if r1 s> 0x7fff goto l0_%=;			\
-	if r1 s< -0x8000 goto l0_%=;			\
-	r0 = 1;						\
-l1_%=:							\
-	exit;						\
-l0_%=:							\
-	r0 = 2;						\
-	goto l1_%=;					\
-"	:
+	asm volatile (
+	"call %[bpf_get_prandom_u32];"
+	"*(u64 *)(r10 - 8) = r0;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	"r1 = *(s16 *)(r10 - 8);"
+#else
+	"r1 = *(s16 *)(r10 - 2);"
+#endif
+	/* r1 with s16 range */
+	"if r1 s> 0x7fff goto l0_%=;"
+	"if r1 s< -0x8000 goto l0_%=;"
+	"r0 = 1;"
+"l1_%=:"
+	"exit;"
+"l0_%=:"
+	"r0 = 2;"
+	"goto l1_%=;"
+	:
 	: __imm(bpf_get_prandom_u32)
 	: __clobber_all);
 }
@@ -99,20 +121,24 @@ __description("LDSX, S32 range checking")
 __success __success_unpriv __retval(1)
 __naked void ldsx_s32_range(void)
 {
-	asm volatile ("					\
-	call %[bpf_get_prandom_u32];			\
-	*(u64 *)(r10 - 8) = r0;				\
-	r1 = *(s32 *)(r10 - 8);				\
-	/* r1 with s16 range */				\
-	if r1 s> 0x7fffFFFF goto l0_%=;			\
-	if r1 s< -0x80000000 goto l0_%=;		\
-	r0 = 1;						\
-l1_%=:							\
-	exit;						\
-l0_%=:							\
-	r0 = 2;						\
-	goto l1_%=;					\
-"	:
+	asm volatile (
+	"call %[bpf_get_prandom_u32];"
+	"*(u64 *)(r10 - 8) = r0;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	"r1 = *(s32 *)(r10 - 8);"
+#else
+	"r1 = *(s32 *)(r10 - 4);"
+#endif
+	/* r1 with s16 range */
+	"if r1 s> 0x7fffFFFF goto l0_%=;"
+	"if r1 s< -0x80000000 goto l0_%=;"
+	"r0 = 1;"
+"l1_%=:"
+	"exit;"
+"l0_%=:"
+	"r0 = 2;"
+	"goto l1_%=;"
+	:
 	: __imm(bpf_get_prandom_u32)
 	: __clobber_all);
 }
diff --git a/tools/testing/selftests/bpf/progs/verifier_movsx.c b/tools/testing/selftests/bpf/progs/verifier_movsx.c
index 3c8ac2c57b1b..b2a04d1179d0 100644
--- a/tools/testing/selftests/bpf/progs/verifier_movsx.c
+++ b/tools/testing/selftests/bpf/progs/verifier_movsx.c
@@ -5,7 +5,9 @@
 #include "bpf_misc.h"
 
 #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
-     (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18
+	(defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
+        defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \
+	__clang_major__ >= 18
 
 SEC("socket")
 __description("MOV32SX, S8")
diff --git a/tools/testing/selftests/bpf/progs/verifier_sdiv.c b/tools/testing/selftests/bpf/progs/verifier_sdiv.c
index 0990f8825675..8fc5174808b2 100644
--- a/tools/testing/selftests/bpf/progs/verifier_sdiv.c
+++ b/tools/testing/selftests/bpf/progs/verifier_sdiv.c
@@ -5,7 +5,9 @@
 #include "bpf_misc.h"
 
 #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
-     (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18
+	(defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
+        defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \
+	__clang_major__ >= 18
 
 SEC("socket")
 __description("SDIV32, non-zero imm divisor, check 1")
diff --git a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c
index 24369f242853..ccde6a4c6319 100644
--- a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c
+++ b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c
@@ -3,11 +3,12 @@
 
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
-#include "xsk_xdp_metadata.h"
+#include <linux/if_ether.h>
+#include "xsk_xdp_common.h"
 
 struct {
 	__uint(type, BPF_MAP_TYPE_XSKMAP);
-	__uint(max_entries, 1);
+	__uint(max_entries, 2);
 	__uint(key_size, sizeof(int));
 	__uint(value_size, sizeof(int));
 } xsk SEC(".maps");
@@ -52,4 +53,21 @@ SEC("xdp.frags") int xsk_xdp_populate_metadata(struct xdp_md *xdp)
 	return bpf_redirect_map(&xsk, 0, XDP_DROP);
 }
 
+SEC("xdp") int xsk_xdp_shared_umem(struct xdp_md *xdp)
+{
+	void *data = (void *)(long)xdp->data;
+	void *data_end = (void *)(long)xdp->data_end;
+	struct ethhdr *eth = data;
+
+	if (eth + 1 > data_end)
+		return XDP_DROP;
+
+	/* Redirecting packets based on the destination MAC address */
+	idx = ((unsigned int)(eth->h_dest[5])) / 2;
+	if (idx > MAX_SOCKETS)
+		return XDP_DROP;
+
+	return bpf_redirect_map(&xsk, idx, XDP_DROP);
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
index 0cfece7ff4f8..0ed67b6b31dd 100755
--- a/tools/testing/selftests/bpf/test_bpftool_synctypes.py
+++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
@@ -509,6 +509,15 @@ def main():
     source_map_types.remove('cgroup_storage_deprecated')
     source_map_types.add('cgroup_storage')
 
+    # The same applied to BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED and
+    # BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE which share the same enum value
+    # and source_map_types picks
+    # BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED/percpu_cgroup_storage_deprecated.
+    # Replace 'percpu_cgroup_storage_deprecated' with 'percpu_cgroup_storage'
+    # so it aligns with what `bpftool map help` shows.
+    source_map_types.remove('percpu_cgroup_storage_deprecated')
+    source_map_types.add('percpu_cgroup_storage')
+
     help_map_types = map_info.get_map_help()
     help_map_options = map_info.get_options()
     map_info.close()
diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
index b4edd8454934..37ffa57f28a1 100644
--- a/tools/testing/selftests/bpf/test_loader.c
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -69,7 +69,7 @@ static int tester_init(struct test_loader *tester)
 {
 	if (!tester->log_buf) {
 		tester->log_buf_sz = TEST_LOADER_LOG_BUF_SZ;
-		tester->log_buf = malloc(tester->log_buf_sz);
+		tester->log_buf = calloc(tester->log_buf_sz, 1);
 		if (!ASSERT_OK_PTR(tester->log_buf, "tester_log_buf"))
 			return -ENOMEM;
 	}
@@ -538,7 +538,7 @@ void run_subtest(struct test_loader *tester,
 		 bool unpriv)
 {
 	struct test_subspec *subspec = unpriv ? &spec->unpriv : &spec->priv;
-	struct bpf_program *tprog, *tprog_iter;
+	struct bpf_program *tprog = NULL, *tprog_iter;
 	struct test_spec *spec_iter;
 	struct cap_state caps = {};
 	struct bpf_object *tobj;
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 4d582cac2c09..1b9387890148 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -255,7 +255,7 @@ static void print_subtest_name(int test_num, int subtest_num,
 			       const char *test_name, char *subtest_name,
 			       char *result)
 {
-	char test_num_str[TEST_NUM_WIDTH + 1];
+	char test_num_str[32];
 
 	snprintf(test_num_str, sizeof(test_num_str), "%d/%d", test_num, subtest_num);
 
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 77bd492c6024..2f9f6f250f17 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -417,6 +417,8 @@ int get_bpf_max_tramp_links(void);
 #define SYS_NANOSLEEP_KPROBE_NAME "__s390x_sys_nanosleep"
 #elif defined(__aarch64__)
 #define SYS_NANOSLEEP_KPROBE_NAME "__arm64_sys_nanosleep"
+#elif defined(__riscv)
+#define SYS_NANOSLEEP_KPROBE_NAME "__riscv_sys_nanosleep"
 #else
 #define SYS_NANOSLEEP_KPROBE_NAME "sys_nanosleep"
 #endif
diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index 2aa5a3445056..65aafe0003db 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -73,17 +73,33 @@
 #
 # Run test suite for physical device in loopback mode
 #   sudo ./test_xsk.sh -i IFACE
+#
+# Run test suite in a specific mode only [skb,drv,zc]
+#   sudo ./test_xsk.sh -m MODE
+#
+# List available tests
+#   ./test_xsk.sh -l
+#
+# Run a specific test from the test suite
+#   sudo ./test_xsk.sh -t TEST_NAME
+#
+# Display the available command line options
+#   ./test_xsk.sh -h
 
 . xsk_prereqs.sh
 
 ETH=""
 
-while getopts "vi:d" flag
+while getopts "vi:dm:lt:h" flag
 do
 	case "${flag}" in
 		v) verbose=1;;
 		d) debug=1;;
 		i) ETH=${OPTARG};;
+		m) MODE=${OPTARG};;
+		l) list=1;;
+		t) TEST=${OPTARG};;
+		h) help=1;;
 	esac
 done
 
@@ -131,6 +147,16 @@ setup_vethPairs() {
 	ip link set ${VETH0} up
 }
 
+if [[ $list -eq 1 ]]; then
+        ./${XSKOBJ} -l
+        exit
+fi
+
+if [[ $help -eq 1 ]]; then
+	./${XSKOBJ}
+        exit
+fi
+
 if [ ! -z $ETH ]; then
 	VETH0=${ETH}
 	VETH1=${ETH}
@@ -153,6 +179,14 @@ if [[ $verbose -eq 1 ]]; then
 	ARGS+="-v "
 fi
 
+if [ -n "$MODE" ]; then
+	ARGS+="-m ${MODE} "
+fi
+
+if [ -n "$TEST" ]; then
+	ARGS+="-t ${TEST} "
+fi
+
 retval=$?
 test_status $retval "${TEST_NAME}"
 
@@ -175,6 +209,10 @@ else
 	cleanup_iface ${ETH} ${MTU}
 fi
 
+if [[ $list -eq 1 ]]; then
+    exit
+fi
+
 TEST_NAME="XSK_SELFTESTS_${VETH0}_BUSY_POLL"
 busy_poll=1
 
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index f83d9f65c65b..4faa898ff7fc 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -7,6 +7,7 @@
 #include <errno.h>
 #include <fcntl.h>
 #include <poll.h>
+#include <pthread.h>
 #include <unistd.h>
 #include <linux/perf_event.h>
 #include <sys/mman.h>
@@ -14,104 +15,165 @@
 #include <linux/limits.h>
 #include <libelf.h>
 #include <gelf.h>
+#include "bpf/libbpf_internal.h"
 
 #define TRACEFS_PIPE	"/sys/kernel/tracing/trace_pipe"
 #define DEBUGFS_PIPE	"/sys/kernel/debug/tracing/trace_pipe"
 
-#define MAX_SYMS 400000
-static struct ksym syms[MAX_SYMS];
-static int sym_cnt;
+struct ksyms {
+	struct ksym *syms;
+	size_t sym_cap;
+	size_t sym_cnt;
+};
+
+static struct ksyms *ksyms;
+static pthread_mutex_t ksyms_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static int ksyms__add_symbol(struct ksyms *ksyms, const char *name,
+			     unsigned long addr)
+{
+	void *tmp;
+
+	tmp = strdup(name);
+	if (!tmp)
+		return -ENOMEM;
+	ksyms->syms[ksyms->sym_cnt].addr = addr;
+	ksyms->syms[ksyms->sym_cnt].name = tmp;
+	ksyms->sym_cnt++;
+	return 0;
+}
+
+void free_kallsyms_local(struct ksyms *ksyms)
+{
+	unsigned int i;
+
+	if (!ksyms)
+		return;
+
+	if (!ksyms->syms) {
+		free(ksyms);
+		return;
+	}
+
+	for (i = 0; i < ksyms->sym_cnt; i++)
+		free(ksyms->syms[i].name);
+	free(ksyms->syms);
+	free(ksyms);
+}
 
 static int ksym_cmp(const void *p1, const void *p2)
 {
 	return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr;
 }
 
-int load_kallsyms_refresh(void)
+struct ksyms *load_kallsyms_local(void)
 {
 	FILE *f;
 	char func[256], buf[256];
 	char symbol;
 	void *addr;
-	int i = 0;
-
-	sym_cnt = 0;
+	int ret;
+	struct ksyms *ksyms;
 
 	f = fopen("/proc/kallsyms", "r");
 	if (!f)
-		return -ENOENT;
+		return NULL;
+
+	ksyms = calloc(1, sizeof(struct ksyms));
+	if (!ksyms) {
+		fclose(f);
+		return NULL;
+	}
 
 	while (fgets(buf, sizeof(buf), f)) {
 		if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3)
 			break;
 		if (!addr)
 			continue;
-		if (i >= MAX_SYMS)
-			return -EFBIG;
 
-		syms[i].addr = (long) addr;
-		syms[i].name = strdup(func);
-		i++;
+		ret = libbpf_ensure_mem((void **) &ksyms->syms, &ksyms->sym_cap,
+					sizeof(struct ksym), ksyms->sym_cnt + 1);
+		if (ret)
+			goto error;
+		ret = ksyms__add_symbol(ksyms, func, (unsigned long)addr);
+		if (ret)
+			goto error;
 	}
 	fclose(f);
-	sym_cnt = i;
-	qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp);
-	return 0;
+	qsort(ksyms->syms, ksyms->sym_cnt, sizeof(struct ksym), ksym_cmp);
+	return ksyms;
+
+error:
+	fclose(f);
+	free_kallsyms_local(ksyms);
+	return NULL;
 }
 
 int load_kallsyms(void)
 {
-	/*
-	 * This is called/used from multiplace places,
-	 * load symbols just once.
-	 */
-	if (sym_cnt)
-		return 0;
-	return load_kallsyms_refresh();
+	pthread_mutex_lock(&ksyms_mutex);
+	if (!ksyms)
+		ksyms = load_kallsyms_local();
+	pthread_mutex_unlock(&ksyms_mutex);
+	return ksyms ? 0 : 1;
 }
 
-struct ksym *ksym_search(long key)
+struct ksym *ksym_search_local(struct ksyms *ksyms, long key)
 {
-	int start = 0, end = sym_cnt;
+	int start = 0, end = ksyms->sym_cnt;
 	int result;
 
 	/* kallsyms not loaded. return NULL */
-	if (sym_cnt <= 0)
+	if (ksyms->sym_cnt <= 0)
 		return NULL;
 
 	while (start < end) {
 		size_t mid = start + (end - start) / 2;
 
-		result = key - syms[mid].addr;
+		result = key - ksyms->syms[mid].addr;
 		if (result < 0)
 			end = mid;
 		else if (result > 0)
 			start = mid + 1;
 		else
-			return &syms[mid];
+			return &ksyms->syms[mid];
 	}
 
-	if (start >= 1 && syms[start - 1].addr < key &&
-	    key < syms[start].addr)
+	if (start >= 1 && ksyms->syms[start - 1].addr < key &&
+	    key < ksyms->syms[start].addr)
 		/* valid ksym */
-		return &syms[start - 1];
+		return &ksyms->syms[start - 1];
 
 	/* out of range. return _stext */
-	return &syms[0];
+	return &ksyms->syms[0];
 }
 
-long ksym_get_addr(const char *name)
+struct ksym *ksym_search(long key)
+{
+	if (!ksyms)
+		return NULL;
+	return ksym_search_local(ksyms, key);
+}
+
+long ksym_get_addr_local(struct ksyms *ksyms, const char *name)
 {
 	int i;
 
-	for (i = 0; i < sym_cnt; i++) {
-		if (strcmp(syms[i].name, name) == 0)
-			return syms[i].addr;
+	for (i = 0; i < ksyms->sym_cnt; i++) {
+		if (strcmp(ksyms->syms[i].name, name) == 0)
+			return ksyms->syms[i].addr;
 	}
 
 	return 0;
 }
 
+long ksym_get_addr(const char *name)
+{
+	if (!ksyms)
+		return 0;
+	return ksym_get_addr_local(ksyms, name);
+}
+
 /* open kallsyms and read symbol addresses on the fly. Without caching all symbols,
  * this is faster than load + find.
  */
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
index 876f3e711df6..04fd1da7079d 100644
--- a/tools/testing/selftests/bpf/trace_helpers.h
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -11,13 +11,17 @@ struct ksym {
 	long addr;
 	char *name;
 };
+struct ksyms;
 
 int load_kallsyms(void);
-int load_kallsyms_refresh(void);
-
 struct ksym *ksym_search(long key);
 long ksym_get_addr(const char *name);
 
+struct ksyms *load_kallsyms_local(void);
+struct ksym *ksym_search_local(struct ksyms *ksyms, long key);
+long ksym_get_addr_local(struct ksyms *ksyms, const char *name);
+void free_kallsyms_local(struct ksyms *ksyms);
+
 /* open kallsyms and find addresses on the fly, faster than load + search. */
 int kallsyms_find(const char *sym, unsigned long long *addr);
 
diff --git a/tools/testing/selftests/bpf/urandom_read.c b/tools/testing/selftests/bpf/urandom_read.c
index e92644d0fa75..4ed795655b9f 100644
--- a/tools/testing/selftests/bpf/urandom_read.c
+++ b/tools/testing/selftests/bpf/urandom_read.c
@@ -11,6 +11,9 @@
 #define _SDT_HAS_SEMAPHORES 1
 #include "sdt.h"
 
+#define SHARED 1
+#include "bpf/libbpf_internal.h"
+
 #define SEC(name) __attribute__((section(name), used))
 
 #define BUF_SIZE 256
@@ -21,10 +24,14 @@ void urand_read_without_sema(int iter_num, int iter_cnt, int read_sz);
 void urandlib_read_with_sema(int iter_num, int iter_cnt, int read_sz);
 void urandlib_read_without_sema(int iter_num, int iter_cnt, int read_sz);
 
+int urandlib_api(void);
+COMPAT_VERSION(urandlib_api_old, urandlib_api, LIBURANDOM_READ_1.0.0)
+int urandlib_api_old(void);
+int urandlib_api_sameoffset(void);
+
 unsigned short urand_read_with_sema_semaphore SEC(".probes");
 
-static __attribute__((noinline))
-void urandom_read(int fd, int count)
+static noinline void urandom_read(int fd, int count)
 {
 	char buf[BUF_SIZE];
 	int i;
@@ -83,6 +90,10 @@ int main(int argc, char *argv[])
 
 	urandom_read(fd, count);
 
+	urandlib_api();
+	urandlib_api_old();
+	urandlib_api_sameoffset();
+
 	close(fd);
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/urandom_read_lib1.c b/tools/testing/selftests/bpf/urandom_read_lib1.c
index 86186e24b740..8c1356d8b4ee 100644
--- a/tools/testing/selftests/bpf/urandom_read_lib1.c
+++ b/tools/testing/selftests/bpf/urandom_read_lib1.c
@@ -3,6 +3,9 @@
 #define _SDT_HAS_SEMAPHORES 1
 #include "sdt.h"
 
+#define SHARED 1
+#include "bpf/libbpf_internal.h"
+
 #define SEC(name) __attribute__((section(name), used))
 
 unsigned short urandlib_read_with_sema_semaphore SEC(".probes");
@@ -11,3 +14,22 @@ void urandlib_read_with_sema(int iter_num, int iter_cnt, int read_sz)
 {
 	STAP_PROBE3(urandlib, read_with_sema, iter_num, iter_cnt, read_sz);
 }
+
+COMPAT_VERSION(urandlib_api_v1, urandlib_api, LIBURANDOM_READ_1.0.0)
+int urandlib_api_v1(void)
+{
+	return 1;
+}
+
+DEFAULT_VERSION(urandlib_api_v2, urandlib_api, LIBURANDOM_READ_2.0.0)
+int urandlib_api_v2(void)
+{
+	return 2;
+}
+
+COMPAT_VERSION(urandlib_api_sameoffset, urandlib_api_sameoffset, LIBURANDOM_READ_1.0.0)
+DEFAULT_VERSION(urandlib_api_sameoffset, urandlib_api_sameoffset, LIBURANDOM_READ_2.0.0)
+int urandlib_api_sameoffset(void)
+{
+	return 3;
+}
diff --git a/tools/testing/selftests/bpf/xdp_features.c b/tools/testing/selftests/bpf/xdp_features.c
index b449788fbd39..595c79141cf3 100644
--- a/tools/testing/selftests/bpf/xdp_features.c
+++ b/tools/testing/selftests/bpf/xdp_features.c
@@ -360,9 +360,9 @@ static int recv_msg(int sockfd, void *buf, size_t bufsize, void *val,
 static int dut_run(struct xdp_features *skel)
 {
 	int flags = XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_DRV_MODE;
-	int state, err, *sockfd, ctrl_sockfd, echo_sockfd;
+	int state, err = 0, *sockfd, ctrl_sockfd, echo_sockfd;
 	struct sockaddr_storage ctrl_addr;
-	pthread_t dut_thread;
+	pthread_t dut_thread = 0;
 	socklen_t addrlen;
 
 	sockfd = start_reuseport_server(AF_INET6, SOCK_STREAM, NULL,
diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
index 613321eb84c1..17c980138796 100644
--- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
@@ -234,7 +234,7 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t
 	struct pollfd fds[rxq + 1];
 	__u64 comp_addr;
 	__u64 addr;
-	__u32 idx;
+	__u32 idx = 0;
 	int ret;
 	int i;
 
diff --git a/tools/testing/selftests/bpf/xsk.c b/tools/testing/selftests/bpf/xsk.c
index d9fb2b730a2c..e574711eeb84 100644
--- a/tools/testing/selftests/bpf/xsk.c
+++ b/tools/testing/selftests/bpf/xsk.c
@@ -442,10 +442,9 @@ void xsk_clear_xskmap(struct bpf_map *map)
 	bpf_map_delete_elem(map_fd, &index);
 }
 
-int xsk_update_xskmap(struct bpf_map *map, struct xsk_socket *xsk)
+int xsk_update_xskmap(struct bpf_map *map, struct xsk_socket *xsk, u32 index)
 {
 	int map_fd, sock_fd;
-	u32 index = 0;
 
 	map_fd = bpf_map__fd(map);
 	sock_fd = xsk_socket__fd(xsk);
diff --git a/tools/testing/selftests/bpf/xsk.h b/tools/testing/selftests/bpf/xsk.h
index d93200fdaa8d..771570bc3731 100644
--- a/tools/testing/selftests/bpf/xsk.h
+++ b/tools/testing/selftests/bpf/xsk.h
@@ -204,7 +204,7 @@ struct xsk_umem_config {
 
 int xsk_attach_xdp_program(struct bpf_program *prog, int ifindex, u32 xdp_flags);
 void xsk_detach_xdp_program(int ifindex, u32 xdp_flags);
-int xsk_update_xskmap(struct bpf_map *map, struct xsk_socket *xsk);
+int xsk_update_xskmap(struct bpf_map *map, struct xsk_socket *xsk, u32 index);
 void xsk_clear_xskmap(struct bpf_map *map);
 bool xsk_is_in_mode(u32 ifindex, int mode);
 
diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh
index 29175682c44d..47c7b8064f38 100755
--- a/tools/testing/selftests/bpf/xsk_prereqs.sh
+++ b/tools/testing/selftests/bpf/xsk_prereqs.sh
@@ -83,9 +83,11 @@ exec_xskxceiver()
 	fi
 
 	./${XSKOBJ} -i ${VETH0} -i ${VETH1} ${ARGS}
-
 	retval=$?
-	test_status $retval "${TEST_NAME}"
-	statusList+=($retval)
-	nameList+=(${TEST_NAME})
+
+	if [[ $list -ne 1 ]]; then
+	    test_status $retval "${TEST_NAME}"
+	    statusList+=($retval)
+	    nameList+=(${TEST_NAME})
+	fi
 }
diff --git a/tools/testing/selftests/bpf/xsk_xdp_common.h b/tools/testing/selftests/bpf/xsk_xdp_common.h
new file mode 100644
index 000000000000..5a6f36f07383
--- /dev/null
+++ b/tools/testing/selftests/bpf/xsk_xdp_common.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef XSK_XDP_COMMON_H_
+#define XSK_XDP_COMMON_H_
+
+#define MAX_SOCKETS 2
+
+struct xdp_info {
+	__u64 count;
+} __attribute__((aligned(32)));
+
+#endif /* XSK_XDP_COMMON_H_ */
diff --git a/tools/testing/selftests/bpf/xsk_xdp_metadata.h b/tools/testing/selftests/bpf/xsk_xdp_metadata.h
deleted file mode 100644
index 943133da378a..000000000000
--- a/tools/testing/selftests/bpf/xsk_xdp_metadata.h
+++ /dev/null
@@ -1,5 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-struct xdp_info {
-	__u64 count;
-} __attribute__((aligned(32)));
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index 2827f2d7cf30..591ca9637b23 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -80,6 +80,7 @@
 #include <linux/if_ether.h>
 #include <linux/mman.h>
 #include <linux/netdev.h>
+#include <linux/bitmap.h>
 #include <arpa/inet.h>
 #include <net/if.h>
 #include <locale.h>
@@ -102,10 +103,12 @@
 #include <bpf/bpf.h>
 #include <linux/filter.h>
 #include "../kselftest.h"
-#include "xsk_xdp_metadata.h"
+#include "xsk_xdp_common.h"
 
-static const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62";
-static const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61";
+static bool opt_verbose;
+static bool opt_print_tests;
+static enum test_mode opt_mode = TEST_MODE_ALL;
+static u32 opt_run_test = RUN_ALL_TESTS;
 
 static void __exit_with_error(int error, const char *file, const char *func, int line)
 {
@@ -154,10 +157,10 @@ static void write_payload(void *dest, u32 pkt_nb, u32 start, u32 size)
 		ptr[i] = htonl(pkt_nb << 16 | (i + start));
 }
 
-static void gen_eth_hdr(struct ifobject *ifobject, struct ethhdr *eth_hdr)
+static void gen_eth_hdr(struct xsk_socket_info *xsk, struct ethhdr *eth_hdr)
 {
-	memcpy(eth_hdr->h_dest, ifobject->dst_mac, ETH_ALEN);
-	memcpy(eth_hdr->h_source, ifobject->src_mac, ETH_ALEN);
+	memcpy(eth_hdr->h_dest, xsk->dst_mac, ETH_ALEN);
+	memcpy(eth_hdr->h_source, xsk->src_mac, ETH_ALEN);
 	eth_hdr->h_proto = htons(ETH_P_LOOPBACK);
 }
 
@@ -255,7 +258,7 @@ static int __xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_i
 	cfg.bind_flags = ifobject->bind_flags;
 	if (shared)
 		cfg.bind_flags |= XDP_SHARED_UMEM;
-	if (ifobject->pkt_stream && ifobject->mtu > MAX_ETH_PKT_SIZE)
+	if (ifobject->mtu > MAX_ETH_PKT_SIZE)
 		cfg.bind_flags |= XDP_USE_SG;
 
 	txr = ifobject->tx_on ? &xsk->tx : NULL;
@@ -310,19 +313,28 @@ static struct option long_options[] = {
 	{"interface", required_argument, 0, 'i'},
 	{"busy-poll", no_argument, 0, 'b'},
 	{"verbose", no_argument, 0, 'v'},
+	{"mode", required_argument, 0, 'm'},
+	{"list", no_argument, 0, 'l'},
+	{"test", required_argument, 0, 't'},
+	{"help", no_argument, 0, 'h'},
 	{0, 0, 0, 0}
 };
 
-static void usage(const char *prog)
+static void print_usage(char **argv)
 {
 	const char *str =
-		"  Usage: %s [OPTIONS]\n"
+		"  Usage: xskxceiver [OPTIONS]\n"
 		"  Options:\n"
 		"  -i, --interface      Use interface\n"
 		"  -v, --verbose        Verbose output\n"
-		"  -b, --busy-poll      Enable busy poll\n";
+		"  -b, --busy-poll      Enable busy poll\n"
+		"  -m, --mode           Run only mode skb, drv, or zc\n"
+		"  -l, --list           List all available tests\n"
+		"  -t, --test           Run a specific test. Enter number from -l option.\n"
+		"  -h, --help           Display this help and exit\n";
 
-	ksft_print_msg(str, prog);
+	ksft_print_msg(str, basename(argv[0]));
+	ksft_exit_xfail();
 }
 
 static bool validate_interface(struct ifobject *ifobj)
@@ -342,7 +354,7 @@ static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj
 	opterr = 0;
 
 	for (;;) {
-		c = getopt_long(argc, argv, "i:vb", long_options, &option_index);
+		c = getopt_long(argc, argv, "i:vbm:lt:", long_options, &option_index);
 		if (c == -1)
 			break;
 
@@ -371,9 +383,28 @@ static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj
 			ifobj_tx->busy_poll = true;
 			ifobj_rx->busy_poll = true;
 			break;
+		case 'm':
+			if (!strncmp("skb", optarg, strlen(optarg)))
+				opt_mode = TEST_MODE_SKB;
+			else if (!strncmp("drv", optarg, strlen(optarg)))
+				opt_mode = TEST_MODE_DRV;
+			else if (!strncmp("zc", optarg, strlen(optarg)))
+				opt_mode = TEST_MODE_ZC;
+			else
+				print_usage(argv);
+			break;
+		case 'l':
+			opt_print_tests = true;
+			break;
+		case 't':
+			errno = 0;
+			opt_run_test = strtol(optarg, NULL, 0);
+			if (errno)
+				print_usage(argv);
+			break;
+		case 'h':
 		default:
-			usage(basename(argv[0]));
-			ksft_exit_xfail();
+			print_usage(argv);
 		}
 	}
 }
@@ -396,11 +427,9 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
 		if (i == 0) {
 			ifobj->rx_on = false;
 			ifobj->tx_on = true;
-			ifobj->pkt_stream = test->tx_pkt_stream_default;
 		} else {
 			ifobj->rx_on = true;
 			ifobj->tx_on = false;
-			ifobj->pkt_stream = test->rx_pkt_stream_default;
 		}
 
 		memset(ifobj->umem, 0, sizeof(*ifobj->umem));
@@ -410,6 +439,15 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
 		for (j = 0; j < MAX_SOCKETS; j++) {
 			memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j]));
 			ifobj->xsk_arr[j].rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+			if (i == 0)
+				ifobj->xsk_arr[j].pkt_stream = test->tx_pkt_stream_default;
+			else
+				ifobj->xsk_arr[j].pkt_stream = test->rx_pkt_stream_default;
+
+			memcpy(ifobj->xsk_arr[j].src_mac, g_mac, ETH_ALEN);
+			memcpy(ifobj->xsk_arr[j].dst_mac, g_mac, ETH_ALEN);
+			ifobj->xsk_arr[j].src_mac[5] += ((j * 2) + 0);
+			ifobj->xsk_arr[j].dst_mac[5] += ((j * 2) + 1);
 		}
 	}
 
@@ -427,7 +465,8 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
 }
 
 static void test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
-			   struct ifobject *ifobj_rx, enum test_mode mode)
+			   struct ifobject *ifobj_rx, enum test_mode mode,
+			   const struct test_spec *test_to_run)
 {
 	struct pkt_stream *tx_pkt_stream;
 	struct pkt_stream *rx_pkt_stream;
@@ -449,6 +488,8 @@ static void test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
 			ifobj->bind_flags |= XDP_COPY;
 	}
 
+	strncpy(test->name, test_to_run->name, MAX_TEST_NAME_SIZE);
+	test->test_func = test_to_run->test_func;
 	test->mode = mode;
 	__test_spec_init(test, ifobj_tx, ifobj_rx);
 }
@@ -458,11 +499,6 @@ static void test_spec_reset(struct test_spec *test)
 	__test_spec_init(test, test->ifobj_tx, test->ifobj_rx);
 }
 
-static void test_spec_set_name(struct test_spec *test, const char *name)
-{
-	strncpy(test->name, name, MAX_TEST_NAME_SIZE);
-}
-
 static void test_spec_set_xdp_prog(struct test_spec *test, struct bpf_program *xdp_prog_rx,
 				   struct bpf_program *xdp_prog_tx, struct bpf_map *xskmap_rx,
 				   struct bpf_map *xskmap_tx)
@@ -495,8 +531,10 @@ static int test_spec_set_mtu(struct test_spec *test, int mtu)
 
 static void pkt_stream_reset(struct pkt_stream *pkt_stream)
 {
-	if (pkt_stream)
+	if (pkt_stream) {
 		pkt_stream->current_pkt_nb = 0;
+		pkt_stream->nb_rx_pkts = 0;
+	}
 }
 
 static struct pkt *pkt_stream_get_next_tx_pkt(struct pkt_stream *pkt_stream)
@@ -526,17 +564,17 @@ static void pkt_stream_delete(struct pkt_stream *pkt_stream)
 
 static void pkt_stream_restore_default(struct test_spec *test)
 {
-	struct pkt_stream *tx_pkt_stream = test->ifobj_tx->pkt_stream;
-	struct pkt_stream *rx_pkt_stream = test->ifobj_rx->pkt_stream;
+	struct pkt_stream *tx_pkt_stream = test->ifobj_tx->xsk->pkt_stream;
+	struct pkt_stream *rx_pkt_stream = test->ifobj_rx->xsk->pkt_stream;
 
 	if (tx_pkt_stream != test->tx_pkt_stream_default) {
-		pkt_stream_delete(test->ifobj_tx->pkt_stream);
-		test->ifobj_tx->pkt_stream = test->tx_pkt_stream_default;
+		pkt_stream_delete(test->ifobj_tx->xsk->pkt_stream);
+		test->ifobj_tx->xsk->pkt_stream = test->tx_pkt_stream_default;
 	}
 
 	if (rx_pkt_stream != test->rx_pkt_stream_default) {
-		pkt_stream_delete(test->ifobj_rx->pkt_stream);
-		test->ifobj_rx->pkt_stream = test->rx_pkt_stream_default;
+		pkt_stream_delete(test->ifobj_rx->xsk->pkt_stream);
+		test->ifobj_rx->xsk->pkt_stream = test->rx_pkt_stream_default;
 	}
 }
 
@@ -596,14 +634,16 @@ static u32 pkt_nb_frags(u32 frame_size, struct pkt_stream *pkt_stream, struct pk
 	return nb_frags;
 }
 
-static void pkt_set(struct xsk_umem_info *umem, struct pkt *pkt, int offset, u32 len)
+static void pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len)
 {
 	pkt->offset = offset;
 	pkt->len = len;
-	if (len > MAX_ETH_JUMBO_SIZE)
+	if (len > MAX_ETH_JUMBO_SIZE) {
 		pkt->valid = false;
-	else
+	} else {
 		pkt->valid = true;
+		pkt_stream->nb_valid_entries++;
+	}
 }
 
 static u32 pkt_get_buffer_len(struct xsk_umem_info *umem, u32 len)
@@ -611,7 +651,7 @@ static u32 pkt_get_buffer_len(struct xsk_umem_info *umem, u32 len)
 	return ceil_u32(len, umem->frame_size) * umem->frame_size;
 }
 
-static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb_pkts, u32 pkt_len)
+static struct pkt_stream *__pkt_stream_generate(u32 nb_pkts, u32 pkt_len, u32 nb_start, u32 nb_off)
 {
 	struct pkt_stream *pkt_stream;
 	u32 i;
@@ -625,41 +665,45 @@ static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb
 	for (i = 0; i < nb_pkts; i++) {
 		struct pkt *pkt = &pkt_stream->pkts[i];
 
-		pkt_set(umem, pkt, 0, pkt_len);
-		pkt->pkt_nb = i;
+		pkt_set(pkt_stream, pkt, 0, pkt_len);
+		pkt->pkt_nb = nb_start + i * nb_off;
 	}
 
 	return pkt_stream;
 }
 
-static struct pkt_stream *pkt_stream_clone(struct xsk_umem_info *umem,
-					   struct pkt_stream *pkt_stream)
+static struct pkt_stream *pkt_stream_generate(u32 nb_pkts, u32 pkt_len)
 {
-	return pkt_stream_generate(umem, pkt_stream->nb_pkts, pkt_stream->pkts[0].len);
+	return __pkt_stream_generate(nb_pkts, pkt_len, 0, 1);
+}
+
+static struct pkt_stream *pkt_stream_clone(struct pkt_stream *pkt_stream)
+{
+	return pkt_stream_generate(pkt_stream->nb_pkts, pkt_stream->pkts[0].len);
 }
 
 static void pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len)
 {
 	struct pkt_stream *pkt_stream;
 
-	pkt_stream = pkt_stream_generate(test->ifobj_tx->umem, nb_pkts, pkt_len);
-	test->ifobj_tx->pkt_stream = pkt_stream;
-	pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, nb_pkts, pkt_len);
-	test->ifobj_rx->pkt_stream = pkt_stream;
+	pkt_stream = pkt_stream_generate(nb_pkts, pkt_len);
+	test->ifobj_tx->xsk->pkt_stream = pkt_stream;
+	pkt_stream = pkt_stream_generate(nb_pkts, pkt_len);
+	test->ifobj_rx->xsk->pkt_stream = pkt_stream;
 }
 
 static void __pkt_stream_replace_half(struct ifobject *ifobj, u32 pkt_len,
 				      int offset)
 {
-	struct xsk_umem_info *umem = ifobj->umem;
 	struct pkt_stream *pkt_stream;
 	u32 i;
 
-	pkt_stream = pkt_stream_clone(umem, ifobj->pkt_stream);
-	for (i = 1; i < ifobj->pkt_stream->nb_pkts; i += 2)
-		pkt_set(umem, &pkt_stream->pkts[i], offset, pkt_len);
+	pkt_stream = pkt_stream_clone(ifobj->xsk->pkt_stream);
+	for (i = 1; i < ifobj->xsk->pkt_stream->nb_pkts; i += 2)
+		pkt_set(pkt_stream, &pkt_stream->pkts[i], offset, pkt_len);
 
-	ifobj->pkt_stream = pkt_stream;
+	ifobj->xsk->pkt_stream = pkt_stream;
+	pkt_stream->nb_valid_entries /= 2;
 }
 
 static void pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int offset)
@@ -670,15 +714,34 @@ static void pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int off
 
 static void pkt_stream_receive_half(struct test_spec *test)
 {
-	struct xsk_umem_info *umem = test->ifobj_rx->umem;
-	struct pkt_stream *pkt_stream = test->ifobj_tx->pkt_stream;
+	struct pkt_stream *pkt_stream = test->ifobj_tx->xsk->pkt_stream;
 	u32 i;
 
-	test->ifobj_rx->pkt_stream = pkt_stream_generate(umem, pkt_stream->nb_pkts,
-							 pkt_stream->pkts[0].len);
-	pkt_stream = test->ifobj_rx->pkt_stream;
+	test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(pkt_stream->nb_pkts,
+							      pkt_stream->pkts[0].len);
+	pkt_stream = test->ifobj_rx->xsk->pkt_stream;
 	for (i = 1; i < pkt_stream->nb_pkts; i += 2)
 		pkt_stream->pkts[i].valid = false;
+
+	pkt_stream->nb_valid_entries /= 2;
+}
+
+static void pkt_stream_even_odd_sequence(struct test_spec *test)
+{
+	struct pkt_stream *pkt_stream;
+	u32 i;
+
+	for (i = 0; i < test->nb_sockets; i++) {
+		pkt_stream = test->ifobj_tx->xsk_arr[i].pkt_stream;
+		pkt_stream = __pkt_stream_generate(pkt_stream->nb_pkts / 2,
+						   pkt_stream->pkts[0].len, i, 2);
+		test->ifobj_tx->xsk_arr[i].pkt_stream = pkt_stream;
+
+		pkt_stream = test->ifobj_rx->xsk_arr[i].pkt_stream;
+		pkt_stream = __pkt_stream_generate(pkt_stream->nb_pkts / 2,
+						   pkt_stream->pkts[0].len, i, 2);
+		test->ifobj_rx->xsk_arr[i].pkt_stream = pkt_stream;
+	}
 }
 
 static u64 pkt_get_addr(struct pkt *pkt, struct xsk_umem_info *umem)
@@ -693,16 +756,16 @@ static void pkt_stream_cancel(struct pkt_stream *pkt_stream)
 	pkt_stream->current_pkt_nb--;
 }
 
-static void pkt_generate(struct ifobject *ifobject, u64 addr, u32 len, u32 pkt_nb,
-			 u32 bytes_written)
+static void pkt_generate(struct xsk_socket_info *xsk, struct xsk_umem_info *umem, u64 addr, u32 len,
+			 u32 pkt_nb, u32 bytes_written)
 {
-	void *data = xsk_umem__get_data(ifobject->umem->buffer, addr);
+	void *data = xsk_umem__get_data(umem->buffer, addr);
 
 	if (len < MIN_PKT_SIZE)
 		return;
 
 	if (!bytes_written) {
-		gen_eth_hdr(ifobject, data);
+		gen_eth_hdr(xsk, data);
 
 		len -= PKT_HDR_SIZE;
 		data += PKT_HDR_SIZE;
@@ -747,8 +810,15 @@ static struct pkt_stream *__pkt_stream_generate_custom(struct ifobject *ifobj, s
 			len = 0;
 		}
 
+		print_verbose("offset: %d len: %u valid: %u options: %u pkt_nb: %u\n",
+			      pkt->offset, pkt->len, pkt->valid, pkt->options, pkt->pkt_nb);
+
 		if (pkt->valid && pkt->len > pkt_stream->max_pkt_len)
 			pkt_stream->max_pkt_len = pkt->len;
+
+		if (pkt->valid)
+			pkt_stream->nb_valid_entries++;
+
 		pkt_nb++;
 	}
 
@@ -762,10 +832,10 @@ static void pkt_stream_generate_custom(struct test_spec *test, struct pkt *pkts,
 	struct pkt_stream *pkt_stream;
 
 	pkt_stream = __pkt_stream_generate_custom(test->ifobj_tx, pkts, nb_pkts, true);
-	test->ifobj_tx->pkt_stream = pkt_stream;
+	test->ifobj_tx->xsk->pkt_stream = pkt_stream;
 
 	pkt_stream = __pkt_stream_generate_custom(test->ifobj_rx, pkts, nb_pkts, false);
-	test->ifobj_rx->pkt_stream = pkt_stream;
+	test->ifobj_rx->xsk->pkt_stream = pkt_stream;
 }
 
 static void pkt_print_data(u32 *data, u32 cnt)
@@ -777,7 +847,7 @@ static void pkt_print_data(u32 *data, u32 cnt)
 
 		seqnum = ntohl(*data) & 0xffff;
 		pkt_nb = ntohl(*data) >> 16;
-		fprintf(stdout, "%u:%u ", pkt_nb, seqnum);
+		ksft_print_msg("%u:%u ", pkt_nb, seqnum);
 		data++;
 	}
 }
@@ -789,13 +859,13 @@ static void pkt_dump(void *pkt, u32 len, bool eth_header)
 
 	if (eth_header) {
 		/*extract L2 frame */
-		fprintf(stdout, "DEBUG>> L2: dst mac: ");
+		ksft_print_msg("DEBUG>> L2: dst mac: ");
 		for (i = 0; i < ETH_ALEN; i++)
-			fprintf(stdout, "%02X", ethhdr->h_dest[i]);
+			ksft_print_msg("%02X", ethhdr->h_dest[i]);
 
-		fprintf(stdout, "\nDEBUG>> L2: src mac: ");
+		ksft_print_msg("\nDEBUG>> L2: src mac: ");
 		for (i = 0; i < ETH_ALEN; i++)
-			fprintf(stdout, "%02X", ethhdr->h_source[i]);
+			ksft_print_msg("%02X", ethhdr->h_source[i]);
 
 		data = pkt + PKT_HDR_SIZE;
 	} else {
@@ -803,15 +873,15 @@ static void pkt_dump(void *pkt, u32 len, bool eth_header)
 	}
 
 	/*extract L5 frame */
-	fprintf(stdout, "\nDEBUG>> L5: seqnum: ");
+	ksft_print_msg("\nDEBUG>> L5: seqnum: ");
 	pkt_print_data(data, PKT_DUMP_NB_TO_PRINT);
-	fprintf(stdout, "....");
+	ksft_print_msg("....");
 	if (len > PKT_DUMP_NB_TO_PRINT * sizeof(u32)) {
-		fprintf(stdout, "\n.... ");
+		ksft_print_msg("\n.... ");
 		pkt_print_data(data + len / sizeof(u32) - PKT_DUMP_NB_TO_PRINT,
 			       PKT_DUMP_NB_TO_PRINT);
 	}
-	fprintf(stdout, "\n---------------------------------------\n");
+	ksft_print_msg("\n---------------------------------------\n");
 }
 
 static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr)
@@ -916,36 +986,42 @@ static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len)
 	return true;
 }
 
-static void kick_tx(struct xsk_socket_info *xsk)
+static int kick_tx(struct xsk_socket_info *xsk)
 {
 	int ret;
 
 	ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
 	if (ret >= 0)
-		return;
+		return TEST_PASS;
 	if (errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN) {
 		usleep(100);
-		return;
+		return TEST_PASS;
 	}
-	exit_with_error(errno);
+	return TEST_FAILURE;
 }
 
-static void kick_rx(struct xsk_socket_info *xsk)
+static int kick_rx(struct xsk_socket_info *xsk)
 {
 	int ret;
 
 	ret = recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL);
 	if (ret < 0)
-		exit_with_error(errno);
+		return TEST_FAILURE;
+
+	return TEST_PASS;
 }
 
 static int complete_pkts(struct xsk_socket_info *xsk, int batch_size)
 {
 	unsigned int rcvd;
 	u32 idx;
+	int ret;
 
-	if (xsk_ring_prod__needs_wakeup(&xsk->tx))
-		kick_tx(xsk);
+	if (xsk_ring_prod__needs_wakeup(&xsk->tx)) {
+		ret = kick_tx(xsk);
+		if (ret)
+			return TEST_FAILURE;
+	}
 
 	rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx);
 	if (rcvd) {
@@ -964,153 +1040,207 @@ static int complete_pkts(struct xsk_socket_info *xsk, int batch_size)
 	return TEST_PASS;
 }
 
-static int receive_pkts(struct test_spec *test, struct pollfd *fds)
+static int __receive_pkts(struct test_spec *test, struct xsk_socket_info *xsk)
 {
-	struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0};
-	struct pkt_stream *pkt_stream = test->ifobj_rx->pkt_stream;
-	struct xsk_socket_info *xsk = test->ifobj_rx->xsk;
+	u32 frags_processed = 0, nb_frags = 0, pkt_len = 0;
 	u32 idx_rx = 0, idx_fq = 0, rcvd, pkts_sent = 0;
+	struct pkt_stream *pkt_stream = xsk->pkt_stream;
 	struct ifobject *ifobj = test->ifobj_rx;
 	struct xsk_umem_info *umem = xsk->umem;
+	struct pollfd fds = { };
 	struct pkt *pkt;
+	u64 first_addr = 0;
 	int ret;
 
-	ret = gettimeofday(&tv_now, NULL);
-	if (ret)
-		exit_with_error(errno);
-	timeradd(&tv_now, &tv_timeout, &tv_end);
+	fds.fd = xsk_socket__fd(xsk->xsk);
+	fds.events = POLLIN;
 
-	pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent);
-	while (pkt) {
-		u32 frags_processed = 0, nb_frags = 0, pkt_len = 0;
-		u64 first_addr;
+	ret = kick_rx(xsk);
+	if (ret)
+		return TEST_FAILURE;
 
-		ret = gettimeofday(&tv_now, NULL);
-		if (ret)
-			exit_with_error(errno);
-		if (timercmp(&tv_now, &tv_end, >)) {
-			ksft_print_msg("ERROR: [%s] Receive loop timed out\n", __func__);
+	if (ifobj->use_poll) {
+		ret = poll(&fds, 1, POLL_TMOUT);
+		if (ret < 0)
 			return TEST_FAILURE;
-		}
-
-		kick_rx(xsk);
-		if (ifobj->use_poll) {
-			ret = poll(fds, 1, POLL_TMOUT);
-			if (ret < 0)
-				exit_with_error(errno);
 
-			if (!ret) {
-				if (!is_umem_valid(test->ifobj_tx))
-					return TEST_PASS;
-
-				ksft_print_msg("ERROR: [%s] Poll timed out\n", __func__);
-				return TEST_FAILURE;
-			}
+		if (!ret) {
+			if (!is_umem_valid(test->ifobj_tx))
+				return TEST_PASS;
 
-			if (!(fds->revents & POLLIN))
-				continue;
+			ksft_print_msg("ERROR: [%s] Poll timed out\n", __func__);
+			return TEST_CONTINUE;
 		}
 
-		rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
-		if (!rcvd)
-			continue;
+		if (!(fds.revents & POLLIN))
+			return TEST_CONTINUE;
+	}
 
-		if (ifobj->use_fill_ring) {
-			ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
-			while (ret != rcvd) {
+	rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
+	if (!rcvd)
+		return TEST_CONTINUE;
+
+	if (ifobj->use_fill_ring) {
+		ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
+		while (ret != rcvd) {
+			if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
+				ret = poll(&fds, 1, POLL_TMOUT);
 				if (ret < 0)
-					exit_with_error(-ret);
-				if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
-					ret = poll(fds, 1, POLL_TMOUT);
-					if (ret < 0)
-						exit_with_error(errno);
-				}
-				ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
+					return TEST_FAILURE;
 			}
+			ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
 		}
+	}
 
-		while (frags_processed < rcvd) {
-			const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++);
-			u64 addr = desc->addr, orig;
+	while (frags_processed < rcvd) {
+		const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++);
+		u64 addr = desc->addr, orig;
 
-			orig = xsk_umem__extract_addr(addr);
-			addr = xsk_umem__add_offset_to_addr(addr);
+		orig = xsk_umem__extract_addr(addr);
+		addr = xsk_umem__add_offset_to_addr(addr);
 
+		if (!nb_frags) {
+			pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent);
 			if (!pkt) {
 				ksft_print_msg("[%s] received too many packets addr: %lx len %u\n",
 					       __func__, addr, desc->len);
 				return TEST_FAILURE;
 			}
+		}
 
-			if (!is_frag_valid(umem, addr, desc->len, pkt->pkt_nb, pkt_len) ||
-			    !is_offset_correct(umem, pkt, addr) ||
-			    (ifobj->use_metadata && !is_metadata_correct(pkt, umem->buffer, addr)))
-				return TEST_FAILURE;
+		print_verbose("Rx: addr: %lx len: %u options: %u pkt_nb: %u valid: %u\n",
+			      addr, desc->len, desc->options, pkt->pkt_nb, pkt->valid);
 
-			if (!nb_frags++)
-				first_addr = addr;
-			frags_processed++;
-			pkt_len += desc->len;
-			if (ifobj->use_fill_ring)
-				*xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig;
+		if (!is_frag_valid(umem, addr, desc->len, pkt->pkt_nb, pkt_len) ||
+		    !is_offset_correct(umem, pkt, addr) || (ifobj->use_metadata &&
+		    !is_metadata_correct(pkt, umem->buffer, addr)))
+			return TEST_FAILURE;
 
-			if (pkt_continues(desc->options))
-				continue;
+		if (!nb_frags++)
+			first_addr = addr;
+		frags_processed++;
+		pkt_len += desc->len;
+		if (ifobj->use_fill_ring)
+			*xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig;
 
-			/* The complete packet has been received */
-			if (!is_pkt_valid(pkt, umem->buffer, first_addr, pkt_len) ||
-			    !is_offset_correct(umem, pkt, addr))
-				return TEST_FAILURE;
+		if (pkt_continues(desc->options))
+			continue;
 
-			pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent);
-			nb_frags = 0;
-			pkt_len = 0;
-		}
+		/* The complete packet has been received */
+		if (!is_pkt_valid(pkt, umem->buffer, first_addr, pkt_len) ||
+		    !is_offset_correct(umem, pkt, addr))
+			return TEST_FAILURE;
 
-		if (nb_frags) {
-			/* In the middle of a packet. Start over from beginning of packet. */
-			idx_rx -= nb_frags;
-			xsk_ring_cons__cancel(&xsk->rx, nb_frags);
-			if (ifobj->use_fill_ring) {
-				idx_fq -= nb_frags;
-				xsk_ring_prod__cancel(&umem->fq, nb_frags);
-			}
-			frags_processed -= nb_frags;
+		pkt_stream->nb_rx_pkts++;
+		nb_frags = 0;
+		pkt_len = 0;
+	}
+
+	if (nb_frags) {
+		/* In the middle of a packet. Start over from beginning of packet. */
+		idx_rx -= nb_frags;
+		xsk_ring_cons__cancel(&xsk->rx, nb_frags);
+		if (ifobj->use_fill_ring) {
+			idx_fq -= nb_frags;
+			xsk_ring_prod__cancel(&umem->fq, nb_frags);
 		}
+		frags_processed -= nb_frags;
+	}
 
-		if (ifobj->use_fill_ring)
-			xsk_ring_prod__submit(&umem->fq, frags_processed);
-		if (ifobj->release_rx)
-			xsk_ring_cons__release(&xsk->rx, frags_processed);
+	if (ifobj->use_fill_ring)
+		xsk_ring_prod__submit(&umem->fq, frags_processed);
+	if (ifobj->release_rx)
+		xsk_ring_cons__release(&xsk->rx, frags_processed);
+
+	pthread_mutex_lock(&pacing_mutex);
+	pkts_in_flight -= pkts_sent;
+	pthread_mutex_unlock(&pacing_mutex);
+	pkts_sent = 0;
+
+return TEST_CONTINUE;
+}
+
+bool all_packets_received(struct test_spec *test, struct xsk_socket_info *xsk, u32 sock_num,
+			  unsigned long *bitmap)
+{
+	struct pkt_stream *pkt_stream = xsk->pkt_stream;
+
+	if (!pkt_stream) {
+		__set_bit(sock_num, bitmap);
+		return false;
+	}
+
+	if (pkt_stream->nb_rx_pkts == pkt_stream->nb_valid_entries) {
+		__set_bit(sock_num, bitmap);
+		if (bitmap_full(bitmap, test->nb_sockets))
+			return true;
+	}
+
+	return false;
+}
+
+static int receive_pkts(struct test_spec *test)
+{
+	struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0};
+	DECLARE_BITMAP(bitmap, test->nb_sockets);
+	struct xsk_socket_info *xsk;
+	u32 sock_num = 0;
+	int res, ret;
 
-		pthread_mutex_lock(&pacing_mutex);
-		pkts_in_flight -= pkts_sent;
-		pthread_mutex_unlock(&pacing_mutex);
-		pkts_sent = 0;
+	ret = gettimeofday(&tv_now, NULL);
+	if (ret)
+		exit_with_error(errno);
+
+	timeradd(&tv_now, &tv_timeout, &tv_end);
+
+	while (1) {
+		xsk = &test->ifobj_rx->xsk_arr[sock_num];
+
+		if ((all_packets_received(test, xsk, sock_num, bitmap)))
+			break;
+
+		res = __receive_pkts(test, xsk);
+		if (!(res == TEST_PASS || res == TEST_CONTINUE))
+			return res;
+
+		ret = gettimeofday(&tv_now, NULL);
+		if (ret)
+			exit_with_error(errno);
+
+		if (timercmp(&tv_now, &tv_end, >)) {
+			ksft_print_msg("ERROR: [%s] Receive loop timed out\n", __func__);
+			return TEST_FAILURE;
+		}
+		sock_num = (sock_num + 1) % test->nb_sockets;
 	}
 
 	return TEST_PASS;
 }
 
-static int __send_pkts(struct ifobject *ifobject, struct pollfd *fds, bool timeout)
+static int __send_pkts(struct ifobject *ifobject, struct xsk_socket_info *xsk, bool timeout)
 {
 	u32 i, idx = 0, valid_pkts = 0, valid_frags = 0, buffer_len;
-	struct pkt_stream *pkt_stream = ifobject->pkt_stream;
-	struct xsk_socket_info *xsk = ifobject->xsk;
+	struct pkt_stream *pkt_stream = xsk->pkt_stream;
 	struct xsk_umem_info *umem = ifobject->umem;
 	bool use_poll = ifobject->use_poll;
+	struct pollfd fds = { };
 	int ret;
 
 	buffer_len = pkt_get_buffer_len(umem, pkt_stream->max_pkt_len);
 	/* pkts_in_flight might be negative if many invalid packets are sent */
 	if (pkts_in_flight >= (int)((umem_size(umem) - BATCH_SIZE * buffer_len) / buffer_len)) {
-		kick_tx(xsk);
+		ret = kick_tx(xsk);
+		if (ret)
+			return TEST_FAILURE;
 		return TEST_CONTINUE;
 	}
 
+	fds.fd = xsk_socket__fd(xsk->xsk);
+	fds.events = POLLOUT;
+
 	while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE) {
 		if (use_poll) {
-			ret = poll(fds, 1, POLL_TMOUT);
+			ret = poll(&fds, 1, POLL_TMOUT);
 			if (timeout) {
 				if (ret < 0) {
 					ksft_print_msg("ERROR: [%s] Poll error %d\n",
@@ -1161,10 +1291,13 @@ static int __send_pkts(struct ifobject *ifobject, struct pollfd *fds, bool timeo
 				tx_desc->options = 0;
 			}
 			if (pkt->valid)
-				pkt_generate(ifobject, tx_desc->addr, tx_desc->len, pkt->pkt_nb,
+				pkt_generate(xsk, umem, tx_desc->addr, tx_desc->len, pkt->pkt_nb,
 					     bytes_written);
 			bytes_written += tx_desc->len;
 
+			print_verbose("Tx addr: %llx len: %u options: %u pkt_nb: %u\n",
+				      tx_desc->addr, tx_desc->len, tx_desc->options, pkt->pkt_nb);
+
 			if (nb_frags_left) {
 				i++;
 				if (pkt_stream->verbatim)
@@ -1186,7 +1319,7 @@ static int __send_pkts(struct ifobject *ifobject, struct pollfd *fds, bool timeo
 	xsk->outstanding_tx += valid_frags;
 
 	if (use_poll) {
-		ret = poll(fds, 1, POLL_TMOUT);
+		ret = poll(&fds, 1, POLL_TMOUT);
 		if (ret <= 0) {
 			if (ret == 0 && timeout)
 				return TEST_PASS;
@@ -1207,33 +1340,67 @@ static int __send_pkts(struct ifobject *ifobject, struct pollfd *fds, bool timeo
 	return TEST_CONTINUE;
 }
 
-static void wait_for_tx_completion(struct xsk_socket_info *xsk)
+static int wait_for_tx_completion(struct xsk_socket_info *xsk)
 {
-	while (xsk->outstanding_tx)
+	struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0};
+	int ret;
+
+	ret = gettimeofday(&tv_now, NULL);
+	if (ret)
+		exit_with_error(errno);
+	timeradd(&tv_now, &tv_timeout, &tv_end);
+
+	while (xsk->outstanding_tx) {
+		ret = gettimeofday(&tv_now, NULL);
+		if (ret)
+			exit_with_error(errno);
+		if (timercmp(&tv_now, &tv_end, >)) {
+			ksft_print_msg("ERROR: [%s] Transmission loop timed out\n", __func__);
+			return TEST_FAILURE;
+		}
+
 		complete_pkts(xsk, BATCH_SIZE);
+	}
+
+	return TEST_PASS;
+}
+
+bool all_packets_sent(struct test_spec *test, unsigned long *bitmap)
+{
+	return bitmap_full(bitmap, test->nb_sockets);
 }
 
 static int send_pkts(struct test_spec *test, struct ifobject *ifobject)
 {
-	struct pkt_stream *pkt_stream = ifobject->pkt_stream;
 	bool timeout = !is_umem_valid(test->ifobj_rx);
-	struct pollfd fds = { };
-	u32 ret;
+	DECLARE_BITMAP(bitmap, test->nb_sockets);
+	u32 i, ret;
 
-	fds.fd = xsk_socket__fd(ifobject->xsk->xsk);
-	fds.events = POLLOUT;
+	while (!(all_packets_sent(test, bitmap))) {
+		for (i = 0; i < test->nb_sockets; i++) {
+			struct pkt_stream *pkt_stream;
 
-	while (pkt_stream->current_pkt_nb < pkt_stream->nb_pkts) {
-		ret = __send_pkts(ifobject, &fds, timeout);
-		if (ret == TEST_CONTINUE && !test->fail)
-			continue;
-		if ((ret || test->fail) && !timeout)
-			return TEST_FAILURE;
-		if (ret == TEST_PASS && timeout)
-			return ret;
+			pkt_stream = ifobject->xsk_arr[i].pkt_stream;
+			if (!pkt_stream || pkt_stream->current_pkt_nb >= pkt_stream->nb_pkts) {
+				__set_bit(i, bitmap);
+				continue;
+			}
+			ret = __send_pkts(ifobject, &ifobject->xsk_arr[i], timeout);
+			if (ret == TEST_CONTINUE && !test->fail)
+				continue;
+
+			if ((ret || test->fail) && !timeout)
+				return TEST_FAILURE;
+
+			if (ret == TEST_PASS && timeout)
+				return ret;
+
+			ret = wait_for_tx_completion(&ifobject->xsk_arr[i]);
+			if (ret)
+				return TEST_FAILURE;
+		}
 	}
 
-	wait_for_tx_completion(ifobject->xsk);
 	return TEST_PASS;
 }
 
@@ -1266,7 +1433,9 @@ static int validate_rx_dropped(struct ifobject *ifobject)
 	struct xdp_statistics stats;
 	int err;
 
-	kick_rx(ifobject->xsk);
+	err = kick_rx(ifobject->xsk);
+	if (err)
+		return TEST_FAILURE;
 
 	err = get_xsk_stats(xsk, &stats);
 	if (err)
@@ -1278,8 +1447,8 @@ static int validate_rx_dropped(struct ifobject *ifobject)
 	 * packet being invalid). Since the last packet may or may not have
 	 * been dropped already, both outcomes must be allowed.
 	 */
-	if (stats.rx_dropped == ifobject->pkt_stream->nb_pkts / 2 ||
-	    stats.rx_dropped == ifobject->pkt_stream->nb_pkts / 2 - 1)
+	if (stats.rx_dropped == ifobject->xsk->pkt_stream->nb_pkts / 2 ||
+	    stats.rx_dropped == ifobject->xsk->pkt_stream->nb_pkts / 2 - 1)
 		return TEST_PASS;
 
 	return TEST_FAILURE;
@@ -1292,7 +1461,9 @@ static int validate_rx_full(struct ifobject *ifobject)
 	int err;
 
 	usleep(1000);
-	kick_rx(ifobject->xsk);
+	err = kick_rx(ifobject->xsk);
+	if (err)
+		return TEST_FAILURE;
 
 	err = get_xsk_stats(xsk, &stats);
 	if (err)
@@ -1311,7 +1482,9 @@ static int validate_fill_empty(struct ifobject *ifobject)
 	int err;
 
 	usleep(1000);
-	kick_rx(ifobject->xsk);
+	err = kick_rx(ifobject->xsk);
+	if (err)
+		return TEST_FAILURE;
 
 	err = get_xsk_stats(xsk, &stats);
 	if (err)
@@ -1339,9 +1512,10 @@ static int validate_tx_invalid_descs(struct ifobject *ifobject)
 		return TEST_FAILURE;
 	}
 
-	if (stats.tx_invalid_descs != ifobject->pkt_stream->nb_pkts / 2) {
+	if (stats.tx_invalid_descs != ifobject->xsk->pkt_stream->nb_pkts / 2) {
 		ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%u] expected [%u]\n",
-			       __func__, stats.tx_invalid_descs, ifobject->pkt_stream->nb_pkts);
+			       __func__, stats.tx_invalid_descs,
+			       ifobject->xsk->pkt_stream->nb_pkts);
 		return TEST_FAILURE;
 	}
 
@@ -1433,6 +1607,7 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
 	LIBBPF_OPTS(bpf_xdp_query_opts, opts);
 	void *bufs;
 	int ret;
+	u32 i;
 
 	if (ifobject->umem->unaligned_mode)
 		mmap_flags |= MAP_HUGETLB | MAP_HUGE_2MB;
@@ -1455,11 +1630,14 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
 	if (!ifobject->rx_on)
 		return;
 
-	xsk_populate_fill_ring(ifobject->umem, ifobject->pkt_stream, ifobject->use_fill_ring);
+	xsk_populate_fill_ring(ifobject->umem, ifobject->xsk->pkt_stream, ifobject->use_fill_ring);
 
-	ret = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk);
-	if (ret)
-		exit_with_error(errno);
+	for (i = 0; i < test->nb_sockets; i++) {
+		ifobject->xsk = &ifobject->xsk_arr[i];
+		ret = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk, i);
+		if (ret)
+			exit_with_error(errno);
+	}
 }
 
 static void *worker_testapp_validate_tx(void *arg)
@@ -1475,8 +1653,6 @@ static void *worker_testapp_validate_tx(void *arg)
 			thread_common_ops_tx(test, ifobject);
 	}
 
-	print_verbose("Sending %d packets on interface %s\n", ifobject->pkt_stream->nb_pkts,
-		      ifobject->ifname);
 	err = send_pkts(test, ifobject);
 
 	if (!err && ifobject->validation_func)
@@ -1491,26 +1667,23 @@ static void *worker_testapp_validate_rx(void *arg)
 {
 	struct test_spec *test = (struct test_spec *)arg;
 	struct ifobject *ifobject = test->ifobj_rx;
-	struct pollfd fds = { };
 	int err;
 
 	if (test->current_step == 1) {
 		thread_common_ops(test, ifobject);
 	} else {
 		xsk_clear_xskmap(ifobject->xskmap);
-		err = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk);
+		err = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk, 0);
 		if (err) {
-			printf("Error: Failed to update xskmap, error %s\n", strerror(-err));
+			ksft_print_msg("Error: Failed to update xskmap, error %s\n",
+				       strerror(-err));
 			exit_with_error(-err);
 		}
 	}
 
-	fds.fd = xsk_socket__fd(ifobject->xsk->xsk);
-	fds.events = POLLIN;
-
 	pthread_barrier_wait(&barr);
 
-	err = receive_pkts(test, &fds);
+	err = receive_pkts(test);
 
 	if (!err && ifobject->validation_func)
 		err = ifobject->validation_func(ifobject);
@@ -1564,7 +1737,7 @@ static void xsk_reattach_xdp(struct ifobject *ifobj, struct bpf_program *xdp_pro
 	xsk_detach_xdp_program(ifobj->ifindex, mode_to_xdp_flags(ifobj->mode));
 	err = xsk_attach_xdp_program(xdp_prog, ifobj->ifindex, mode_to_xdp_flags(mode));
 	if (err) {
-		printf("Error attaching XDP program\n");
+		ksft_print_msg("Error attaching XDP program\n");
 		exit_with_error(-err);
 	}
 
@@ -1619,11 +1792,11 @@ static int __testapp_validate_traffic(struct test_spec *test, struct ifobject *i
 	if (ifobj2) {
 		if (pthread_barrier_init(&barr, NULL, 2))
 			exit_with_error(errno);
-		pkt_stream_reset(ifobj2->pkt_stream);
+		pkt_stream_reset(ifobj2->xsk->pkt_stream);
 	}
 
 	test->current_step++;
-	pkt_stream_reset(ifobj1->pkt_stream);
+	pkt_stream_reset(ifobj1->xsk->pkt_stream);
 	pkts_in_flight = 0;
 
 	signal(SIGUSR1, handler);
@@ -1647,9 +1820,15 @@ static int __testapp_validate_traffic(struct test_spec *test, struct ifobject *i
 		pthread_join(t0, NULL);
 
 	if (test->total_steps == test->current_step || test->fail) {
+		u32 i;
+
 		if (ifobj2)
-			xsk_socket__delete(ifobj2->xsk->xsk);
-		xsk_socket__delete(ifobj1->xsk->xsk);
+			for (i = 0; i < test->nb_sockets; i++)
+				xsk_socket__delete(ifobj2->xsk_arr[i].xsk);
+
+		for (i = 0; i < test->nb_sockets; i++)
+			xsk_socket__delete(ifobj1->xsk_arr[i].xsk);
+
 		testapp_clean_xsk_umem(ifobj1);
 		if (ifobj2 && !ifobj2->shared_umem)
 			testapp_clean_xsk_umem(ifobj2);
@@ -1682,7 +1861,6 @@ static int testapp_teardown(struct test_spec *test)
 {
 	int i;
 
-	test_spec_set_name(test, "TEARDOWN");
 	for (i = 0; i < MAX_TEARDOWN_ITER; i++) {
 		if (testapp_validate_traffic(test))
 			return TEST_FAILURE;
@@ -1704,18 +1882,17 @@ static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2)
 	*ifobj2 = tmp_ifobj;
 }
 
-static int testapp_bidi(struct test_spec *test)
+static int testapp_bidirectional(struct test_spec *test)
 {
 	int res;
 
-	test_spec_set_name(test, "BIDIRECTIONAL");
 	test->ifobj_tx->rx_on = true;
 	test->ifobj_rx->tx_on = true;
 	test->total_steps = 2;
 	if (testapp_validate_traffic(test))
 		return TEST_FAILURE;
 
-	print_verbose("Switching Tx/Rx vectors\n");
+	print_verbose("Switching Tx/Rx direction\n");
 	swap_directions(&test->ifobj_rx, &test->ifobj_tx);
 	res = __testapp_validate_traffic(test, test->ifobj_rx, test->ifobj_tx);
 
@@ -1723,42 +1900,44 @@ static int testapp_bidi(struct test_spec *test)
 	return res;
 }
 
-static void swap_xsk_resources(struct ifobject *ifobj_tx, struct ifobject *ifobj_rx)
+static int swap_xsk_resources(struct test_spec *test)
 {
 	int ret;
 
-	xsk_socket__delete(ifobj_tx->xsk->xsk);
-	xsk_socket__delete(ifobj_rx->xsk->xsk);
-	ifobj_tx->xsk = &ifobj_tx->xsk_arr[1];
-	ifobj_rx->xsk = &ifobj_rx->xsk_arr[1];
+	test->ifobj_tx->xsk_arr[0].pkt_stream = NULL;
+	test->ifobj_rx->xsk_arr[0].pkt_stream = NULL;
+	test->ifobj_tx->xsk_arr[1].pkt_stream = test->tx_pkt_stream_default;
+	test->ifobj_rx->xsk_arr[1].pkt_stream = test->rx_pkt_stream_default;
+	test->ifobj_tx->xsk = &test->ifobj_tx->xsk_arr[1];
+	test->ifobj_rx->xsk = &test->ifobj_rx->xsk_arr[1];
 
-	ret = xsk_update_xskmap(ifobj_rx->xskmap, ifobj_rx->xsk->xsk);
+	ret = xsk_update_xskmap(test->ifobj_rx->xskmap, test->ifobj_rx->xsk->xsk, 0);
 	if (ret)
-		exit_with_error(errno);
+		return TEST_FAILURE;
+
+	return TEST_PASS;
 }
 
-static int testapp_bpf_res(struct test_spec *test)
+static int testapp_xdp_prog_cleanup(struct test_spec *test)
 {
-	test_spec_set_name(test, "BPF_RES");
 	test->total_steps = 2;
 	test->nb_sockets = 2;
 	if (testapp_validate_traffic(test))
 		return TEST_FAILURE;
 
-	swap_xsk_resources(test->ifobj_tx, test->ifobj_rx);
+	if (swap_xsk_resources(test))
+		return TEST_FAILURE;
 	return testapp_validate_traffic(test);
 }
 
 static int testapp_headroom(struct test_spec *test)
 {
-	test_spec_set_name(test, "UMEM_HEADROOM");
 	test->ifobj_rx->umem->frame_headroom = UMEM_HEADROOM_TEST_SIZE;
 	return testapp_validate_traffic(test);
 }
 
 static int testapp_stats_rx_dropped(struct test_spec *test)
 {
-	test_spec_set_name(test, "STAT_RX_DROPPED");
 	if (test->mode == TEST_MODE_ZC) {
 		ksft_test_result_skip("Can not run RX_DROPPED test for ZC mode\n");
 		return TEST_SKIP;
@@ -1774,7 +1953,6 @@ static int testapp_stats_rx_dropped(struct test_spec *test)
 
 static int testapp_stats_tx_invalid_descs(struct test_spec *test)
 {
-	test_spec_set_name(test, "STAT_TX_INVALID");
 	pkt_stream_replace_half(test, XSK_UMEM__INVALID_FRAME_SIZE, 0);
 	test->ifobj_tx->validation_func = validate_tx_invalid_descs;
 	return testapp_validate_traffic(test);
@@ -1782,10 +1960,8 @@ static int testapp_stats_tx_invalid_descs(struct test_spec *test)
 
 static int testapp_stats_rx_full(struct test_spec *test)
 {
-	test_spec_set_name(test, "STAT_RX_FULL");
 	pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE);
-	test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem,
-							 DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
+	test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
 
 	test->ifobj_rx->xsk->rxqsize = DEFAULT_UMEM_BUFFERS;
 	test->ifobj_rx->release_rx = false;
@@ -1795,19 +1971,16 @@ static int testapp_stats_rx_full(struct test_spec *test)
 
 static int testapp_stats_fill_empty(struct test_spec *test)
 {
-	test_spec_set_name(test, "STAT_RX_FILL_EMPTY");
 	pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE);
-	test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem,
-							 DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
+	test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
 
 	test->ifobj_rx->use_fill_ring = false;
 	test->ifobj_rx->validation_func = validate_fill_empty;
 	return testapp_validate_traffic(test);
 }
 
-static int testapp_unaligned(struct test_spec *test)
+static int testapp_send_receive_unaligned(struct test_spec *test)
 {
-	test_spec_set_name(test, "UNALIGNED_MODE");
 	test->ifobj_tx->umem->unaligned_mode = true;
 	test->ifobj_rx->umem->unaligned_mode = true;
 	/* Let half of the packets straddle a 4K buffer boundary */
@@ -1816,9 +1989,8 @@ static int testapp_unaligned(struct test_spec *test)
 	return testapp_validate_traffic(test);
 }
 
-static int testapp_unaligned_mb(struct test_spec *test)
+static int testapp_send_receive_unaligned_mb(struct test_spec *test)
 {
-	test_spec_set_name(test, "UNALIGNED_MODE_9K");
 	test->mtu = MAX_ETH_JUMBO_SIZE;
 	test->ifobj_tx->umem->unaligned_mode = true;
 	test->ifobj_rx->umem->unaligned_mode = true;
@@ -1834,9 +2006,8 @@ static int testapp_single_pkt(struct test_spec *test)
 	return testapp_validate_traffic(test);
 }
 
-static int testapp_multi_buffer(struct test_spec *test)
+static int testapp_send_receive_mb(struct test_spec *test)
 {
-	test_spec_set_name(test, "RUN_TO_COMPLETION_9K_PACKETS");
 	test->mtu = MAX_ETH_JUMBO_SIZE;
 	pkt_stream_replace(test, DEFAULT_PKT_CNT, MAX_ETH_JUMBO_SIZE);
 
@@ -1933,7 +2104,6 @@ static int testapp_xdp_drop(struct test_spec *test)
 	struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
 	struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
 
-	test_spec_set_name(test, "XDP_DROP_HALF");
 	test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_drop, skel_tx->progs.xsk_xdp_drop,
 			       skel_rx->maps.xsk, skel_tx->maps.xsk);
 
@@ -1941,7 +2111,7 @@ static int testapp_xdp_drop(struct test_spec *test)
 	return testapp_validate_traffic(test);
 }
 
-static int testapp_xdp_metadata_count(struct test_spec *test)
+static int testapp_xdp_metadata_copy(struct test_spec *test)
 {
 	struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
 	struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
@@ -1955,19 +2125,38 @@ static int testapp_xdp_metadata_count(struct test_spec *test)
 	test->ifobj_rx->use_metadata = true;
 
 	data_map = bpf_object__find_map_by_name(skel_rx->obj, "xsk_xdp_.bss");
-	if (!data_map || !bpf_map__is_internal(data_map))
-		exit_with_error(ENOMEM);
+	if (!data_map || !bpf_map__is_internal(data_map)) {
+		ksft_print_msg("Error: could not find bss section of XDP program\n");
+		return TEST_FAILURE;
+	}
 
-	if (bpf_map_update_elem(bpf_map__fd(data_map), &key, &count, BPF_ANY))
-		exit_with_error(errno);
+	if (bpf_map_update_elem(bpf_map__fd(data_map), &key, &count, BPF_ANY)) {
+		ksft_print_msg("Error: could not update count element\n");
+		return TEST_FAILURE;
+	}
 
 	return testapp_validate_traffic(test);
 }
 
-static int testapp_poll_txq_tmout(struct test_spec *test)
+static int testapp_xdp_shared_umem(struct test_spec *test)
 {
-	test_spec_set_name(test, "POLL_TXQ_FULL");
+	struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
+	struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
+
+	test->total_steps = 1;
+	test->nb_sockets = 2;
+
+	test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_shared_umem,
+			       skel_tx->progs.xsk_xdp_shared_umem,
+			       skel_rx->maps.xsk, skel_tx->maps.xsk);
+
+	pkt_stream_even_odd_sequence(test);
+
+	return testapp_validate_traffic(test);
+}
 
+static int testapp_poll_txq_tmout(struct test_spec *test)
+{
 	test->ifobj_tx->use_poll = true;
 	/* create invalid frame by set umem frame_size and pkt length equal to 2048 */
 	test->ifobj_tx->umem->frame_size = 2048;
@@ -1977,7 +2166,6 @@ static int testapp_poll_txq_tmout(struct test_spec *test)
 
 static int testapp_poll_rxq_tmout(struct test_spec *test)
 {
-	test_spec_set_name(test, "POLL_RXQ_EMPTY");
 	test->ifobj_rx->use_poll = true;
 	return testapp_validate_traffic_single_thread(test, test->ifobj_rx);
 }
@@ -1987,7 +2175,6 @@ static int testapp_too_many_frags(struct test_spec *test)
 	struct pkt pkts[2 * XSK_DESC__MAX_SKB_FRAGS + 2] = {};
 	u32 max_frags, i;
 
-	test_spec_set_name(test, "TOO_MANY_FRAGS");
 	if (test->mode == TEST_MODE_ZC)
 		max_frags = test->ifobj_tx->xdp_zc_max_segs;
 	else
@@ -2054,20 +2241,16 @@ static bool hugepages_present(void)
 	return true;
 }
 
-static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *src_mac,
-		       thread_func_t func_ptr)
+static void init_iface(struct ifobject *ifobj, thread_func_t func_ptr)
 {
 	LIBBPF_OPTS(bpf_xdp_query_opts, query_opts);
 	int err;
 
-	memcpy(ifobj->dst_mac, dst_mac, ETH_ALEN);
-	memcpy(ifobj->src_mac, src_mac, ETH_ALEN);
-
 	ifobj->func_ptr = func_ptr;
 
 	err = xsk_load_xdp_programs(ifobj);
 	if (err) {
-		printf("Error loading XDP program\n");
+		ksft_print_msg("Error loading XDP program\n");
 		exit_with_error(err);
 	}
 
@@ -2091,138 +2274,98 @@ static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *
 	}
 }
 
-static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_type type)
-{
-	int ret = TEST_SKIP;
-
-	switch (type) {
-	case TEST_TYPE_STATS_RX_DROPPED:
-		ret = testapp_stats_rx_dropped(test);
-		break;
-	case TEST_TYPE_STATS_TX_INVALID_DESCS:
-		ret = testapp_stats_tx_invalid_descs(test);
-		break;
-	case TEST_TYPE_STATS_RX_FULL:
-		ret = testapp_stats_rx_full(test);
-		break;
-	case TEST_TYPE_STATS_FILL_EMPTY:
-		ret = testapp_stats_fill_empty(test);
-		break;
-	case TEST_TYPE_TEARDOWN:
-		ret = testapp_teardown(test);
-		break;
-	case TEST_TYPE_BIDI:
-		ret = testapp_bidi(test);
-		break;
-	case TEST_TYPE_BPF_RES:
-		ret = testapp_bpf_res(test);
-		break;
-	case TEST_TYPE_RUN_TO_COMPLETION:
-		test_spec_set_name(test, "RUN_TO_COMPLETION");
-		ret = testapp_validate_traffic(test);
-		break;
-	case TEST_TYPE_RUN_TO_COMPLETION_MB:
-		ret = testapp_multi_buffer(test);
-		break;
-	case TEST_TYPE_RUN_TO_COMPLETION_SINGLE_PKT:
-		test_spec_set_name(test, "RUN_TO_COMPLETION_SINGLE_PKT");
-		ret = testapp_single_pkt(test);
-		break;
-	case TEST_TYPE_RUN_TO_COMPLETION_2K_FRAME:
-		test_spec_set_name(test, "RUN_TO_COMPLETION_2K_FRAME_SIZE");
-		test->ifobj_tx->umem->frame_size = 2048;
-		test->ifobj_rx->umem->frame_size = 2048;
-		pkt_stream_replace(test, DEFAULT_PKT_CNT, MIN_PKT_SIZE);
-		ret = testapp_validate_traffic(test);
-		break;
-	case TEST_TYPE_RX_POLL:
-		test->ifobj_rx->use_poll = true;
-		test_spec_set_name(test, "POLL_RX");
-		ret = testapp_validate_traffic(test);
-		break;
-	case TEST_TYPE_TX_POLL:
-		test->ifobj_tx->use_poll = true;
-		test_spec_set_name(test, "POLL_TX");
-		ret = testapp_validate_traffic(test);
-		break;
-	case TEST_TYPE_POLL_TXQ_TMOUT:
-		ret = testapp_poll_txq_tmout(test);
-		break;
-	case TEST_TYPE_POLL_RXQ_TMOUT:
-		ret = testapp_poll_rxq_tmout(test);
-		break;
-	case TEST_TYPE_ALIGNED_INV_DESC:
-		test_spec_set_name(test, "ALIGNED_INV_DESC");
-		ret = testapp_invalid_desc(test);
-		break;
-	case TEST_TYPE_ALIGNED_INV_DESC_2K_FRAME:
-		test_spec_set_name(test, "ALIGNED_INV_DESC_2K_FRAME_SIZE");
-		test->ifobj_tx->umem->frame_size = 2048;
-		test->ifobj_rx->umem->frame_size = 2048;
-		ret = testapp_invalid_desc(test);
-		break;
-	case TEST_TYPE_UNALIGNED_INV_DESC:
-		test_spec_set_name(test, "UNALIGNED_INV_DESC");
-		test->ifobj_tx->umem->unaligned_mode = true;
-		test->ifobj_rx->umem->unaligned_mode = true;
-		ret = testapp_invalid_desc(test);
-		break;
-	case TEST_TYPE_UNALIGNED_INV_DESC_4K1_FRAME: {
-		u64 page_size, umem_size;
-
-		test_spec_set_name(test, "UNALIGNED_INV_DESC_4K1_FRAME_SIZE");
-		/* Odd frame size so the UMEM doesn't end near a page boundary. */
-		test->ifobj_tx->umem->frame_size = 4001;
-		test->ifobj_rx->umem->frame_size = 4001;
-		test->ifobj_tx->umem->unaligned_mode = true;
-		test->ifobj_rx->umem->unaligned_mode = true;
-		/* This test exists to test descriptors that staddle the end of
-		 * the UMEM but not a page.
-		 */
-		page_size = sysconf(_SC_PAGESIZE);
-		umem_size = test->ifobj_tx->umem->num_frames * test->ifobj_tx->umem->frame_size;
-		assert(umem_size % page_size > MIN_PKT_SIZE);
-		assert(umem_size % page_size < page_size - MIN_PKT_SIZE);
-		ret = testapp_invalid_desc(test);
-		break;
-	}
-	case TEST_TYPE_ALIGNED_INV_DESC_MB:
-		test_spec_set_name(test, "ALIGNED_INV_DESC_MULTI_BUFF");
-		ret = testapp_invalid_desc_mb(test);
-		break;
-	case TEST_TYPE_UNALIGNED_INV_DESC_MB:
-		test_spec_set_name(test, "UNALIGNED_INV_DESC_MULTI_BUFF");
-		test->ifobj_tx->umem->unaligned_mode = true;
-		test->ifobj_rx->umem->unaligned_mode = true;
-		ret = testapp_invalid_desc_mb(test);
-		break;
-	case TEST_TYPE_UNALIGNED:
-		ret = testapp_unaligned(test);
-		break;
-	case TEST_TYPE_UNALIGNED_MB:
-		ret = testapp_unaligned_mb(test);
-		break;
-	case TEST_TYPE_HEADROOM:
-		ret = testapp_headroom(test);
-		break;
-	case TEST_TYPE_XDP_DROP_HALF:
-		ret = testapp_xdp_drop(test);
-		break;
-	case TEST_TYPE_XDP_METADATA_COUNT:
-		test_spec_set_name(test, "XDP_METADATA_COUNT");
-		ret = testapp_xdp_metadata_count(test);
-		break;
-	case TEST_TYPE_XDP_METADATA_COUNT_MB:
-		test_spec_set_name(test, "XDP_METADATA_COUNT_MULTI_BUFF");
-		test->mtu = MAX_ETH_JUMBO_SIZE;
-		ret = testapp_xdp_metadata_count(test);
-		break;
-	case TEST_TYPE_TOO_MANY_FRAGS:
-		ret = testapp_too_many_frags(test);
-		break;
-	default:
-		break;
-	}
+static int testapp_send_receive(struct test_spec *test)
+{
+	return testapp_validate_traffic(test);
+}
+
+static int testapp_send_receive_2k_frame(struct test_spec *test)
+{
+	test->ifobj_tx->umem->frame_size = 2048;
+	test->ifobj_rx->umem->frame_size = 2048;
+	pkt_stream_replace(test, DEFAULT_PKT_CNT, MIN_PKT_SIZE);
+	return testapp_validate_traffic(test);
+}
+
+static int testapp_poll_rx(struct test_spec *test)
+{
+	test->ifobj_rx->use_poll = true;
+	return testapp_validate_traffic(test);
+}
+
+static int testapp_poll_tx(struct test_spec *test)
+{
+	test->ifobj_tx->use_poll = true;
+	return testapp_validate_traffic(test);
+}
+
+static int testapp_aligned_inv_desc(struct test_spec *test)
+{
+	return testapp_invalid_desc(test);
+}
+
+static int testapp_aligned_inv_desc_2k_frame(struct test_spec *test)
+{
+	test->ifobj_tx->umem->frame_size = 2048;
+	test->ifobj_rx->umem->frame_size = 2048;
+	return testapp_invalid_desc(test);
+}
+
+static int testapp_unaligned_inv_desc(struct test_spec *test)
+{
+	test->ifobj_tx->umem->unaligned_mode = true;
+	test->ifobj_rx->umem->unaligned_mode = true;
+	return testapp_invalid_desc(test);
+}
+
+static int testapp_unaligned_inv_desc_4001_frame(struct test_spec *test)
+{
+	u64 page_size, umem_size;
+
+	/* Odd frame size so the UMEM doesn't end near a page boundary. */
+	test->ifobj_tx->umem->frame_size = 4001;
+	test->ifobj_rx->umem->frame_size = 4001;
+	test->ifobj_tx->umem->unaligned_mode = true;
+	test->ifobj_rx->umem->unaligned_mode = true;
+	/* This test exists to test descriptors that staddle the end of
+	 * the UMEM but not a page.
+	 */
+	page_size = sysconf(_SC_PAGESIZE);
+	umem_size = test->ifobj_tx->umem->num_frames * test->ifobj_tx->umem->frame_size;
+	assert(umem_size % page_size > MIN_PKT_SIZE);
+	assert(umem_size % page_size < page_size - MIN_PKT_SIZE);
+
+	return testapp_invalid_desc(test);
+}
+
+static int testapp_aligned_inv_desc_mb(struct test_spec *test)
+{
+	return testapp_invalid_desc_mb(test);
+}
+
+static int testapp_unaligned_inv_desc_mb(struct test_spec *test)
+{
+	test->ifobj_tx->umem->unaligned_mode = true;
+	test->ifobj_rx->umem->unaligned_mode = true;
+	return testapp_invalid_desc_mb(test);
+}
+
+static int testapp_xdp_metadata(struct test_spec *test)
+{
+	return testapp_xdp_metadata_copy(test);
+}
+
+static int testapp_xdp_metadata_mb(struct test_spec *test)
+{
+	test->mtu = MAX_ETH_JUMBO_SIZE;
+	return testapp_xdp_metadata_copy(test);
+}
+
+static void run_pkt_test(struct test_spec *test)
+{
+	int ret;
+
+	ret = test->test_func(test);
 
 	if (ret == TEST_PASS)
 		ksft_test_result_pass("PASS: %s %s%s\n", mode_string(test), busy_poll_string(test),
@@ -2290,13 +2433,56 @@ static bool is_xdp_supported(int ifindex)
 	return true;
 }
 
+static const struct test_spec tests[] = {
+	{.name = "SEND_RECEIVE", .test_func = testapp_send_receive},
+	{.name = "SEND_RECEIVE_2K_FRAME", .test_func = testapp_send_receive_2k_frame},
+	{.name = "SEND_RECEIVE_SINGLE_PKT", .test_func = testapp_single_pkt},
+	{.name = "POLL_RX", .test_func = testapp_poll_rx},
+	{.name = "POLL_TX", .test_func = testapp_poll_tx},
+	{.name = "POLL_RXQ_FULL", .test_func = testapp_poll_rxq_tmout},
+	{.name = "POLL_TXQ_FULL", .test_func = testapp_poll_txq_tmout},
+	{.name = "SEND_RECEIVE_UNALIGNED", .test_func = testapp_send_receive_unaligned},
+	{.name = "ALIGNED_INV_DESC", .test_func = testapp_aligned_inv_desc},
+	{.name = "ALIGNED_INV_DESC_2K_FRAME_SIZE", .test_func = testapp_aligned_inv_desc_2k_frame},
+	{.name = "UNALIGNED_INV_DESC", .test_func = testapp_unaligned_inv_desc},
+	{.name = "UNALIGNED_INV_DESC_4001_FRAME_SIZE",
+	 .test_func = testapp_unaligned_inv_desc_4001_frame},
+	{.name = "UMEM_HEADROOM", .test_func = testapp_headroom},
+	{.name = "TEARDOWN", .test_func = testapp_teardown},
+	{.name = "BIDIRECTIONAL", .test_func = testapp_bidirectional},
+	{.name = "STAT_RX_DROPPED", .test_func = testapp_stats_rx_dropped},
+	{.name = "STAT_TX_INVALID", .test_func = testapp_stats_tx_invalid_descs},
+	{.name = "STAT_RX_FULL", .test_func = testapp_stats_rx_full},
+	{.name = "STAT_FILL_EMPTY", .test_func = testapp_stats_fill_empty},
+	{.name = "XDP_PROG_CLEANUP", .test_func = testapp_xdp_prog_cleanup},
+	{.name = "XDP_DROP_HALF", .test_func = testapp_xdp_drop},
+	{.name = "XDP_SHARED_UMEM", .test_func = testapp_xdp_shared_umem},
+	{.name = "XDP_METADATA_COPY", .test_func = testapp_xdp_metadata},
+	{.name = "XDP_METADATA_COPY_MULTI_BUFF", .test_func = testapp_xdp_metadata_mb},
+	{.name = "SEND_RECEIVE_9K_PACKETS", .test_func = testapp_send_receive_mb},
+	{.name = "SEND_RECEIVE_UNALIGNED_9K_PACKETS",
+	 .test_func = testapp_send_receive_unaligned_mb},
+	{.name = "ALIGNED_INV_DESC_MULTI_BUFF", .test_func = testapp_aligned_inv_desc_mb},
+	{.name = "UNALIGNED_INV_DESC_MULTI_BUFF", .test_func = testapp_unaligned_inv_desc_mb},
+	{.name = "TOO_MANY_FRAGS", .test_func = testapp_too_many_frags},
+};
+
+static void print_tests(void)
+{
+	u32 i;
+
+	printf("Tests:\n");
+	for (i = 0; i < ARRAY_SIZE(tests); i++)
+		printf("%u: %s\n", i, tests[i].name);
+}
+
 int main(int argc, char **argv)
 {
 	struct pkt_stream *rx_pkt_stream_default;
 	struct pkt_stream *tx_pkt_stream_default;
 	struct ifobject *ifobj_tx, *ifobj_rx;
+	u32 i, j, failed_tests = 0, nb_tests;
 	int modes = TEST_MODE_SKB + 1;
-	u32 i, j, failed_tests = 0;
 	struct test_spec test;
 	bool shared_netdev;
 
@@ -2314,14 +2500,21 @@ int main(int argc, char **argv)
 
 	parse_command_line(ifobj_tx, ifobj_rx, argc, argv);
 
+	if (opt_print_tests) {
+		print_tests();
+		ksft_exit_xpass();
+	}
+	if (opt_run_test != RUN_ALL_TESTS && opt_run_test >= ARRAY_SIZE(tests)) {
+		ksft_print_msg("Error: test %u does not exist.\n", opt_run_test);
+		ksft_exit_xfail();
+	}
+
 	shared_netdev = (ifobj_tx->ifindex == ifobj_rx->ifindex);
 	ifobj_tx->shared_umem = shared_netdev;
 	ifobj_rx->shared_umem = shared_netdev;
 
-	if (!validate_interface(ifobj_tx) || !validate_interface(ifobj_rx)) {
-		usage(basename(argv[0]));
-		ksft_exit_xfail();
-	}
+	if (!validate_interface(ifobj_tx) || !validate_interface(ifobj_rx))
+		print_usage(argv);
 
 	if (is_xdp_supported(ifobj_tx->ifindex)) {
 		modes++;
@@ -2329,23 +2522,46 @@ int main(int argc, char **argv)
 			modes++;
 	}
 
-	init_iface(ifobj_rx, MAC1, MAC2, worker_testapp_validate_rx);
-	init_iface(ifobj_tx, MAC2, MAC1, worker_testapp_validate_tx);
+	init_iface(ifobj_rx, worker_testapp_validate_rx);
+	init_iface(ifobj_tx, worker_testapp_validate_tx);
 
-	test_spec_init(&test, ifobj_tx, ifobj_rx, 0);
-	tx_pkt_stream_default = pkt_stream_generate(ifobj_tx->umem, DEFAULT_PKT_CNT, MIN_PKT_SIZE);
-	rx_pkt_stream_default = pkt_stream_generate(ifobj_rx->umem, DEFAULT_PKT_CNT, MIN_PKT_SIZE);
+	test_spec_init(&test, ifobj_tx, ifobj_rx, 0, &tests[0]);
+	tx_pkt_stream_default = pkt_stream_generate(DEFAULT_PKT_CNT, MIN_PKT_SIZE);
+	rx_pkt_stream_default = pkt_stream_generate(DEFAULT_PKT_CNT, MIN_PKT_SIZE);
 	if (!tx_pkt_stream_default || !rx_pkt_stream_default)
 		exit_with_error(ENOMEM);
 	test.tx_pkt_stream_default = tx_pkt_stream_default;
 	test.rx_pkt_stream_default = rx_pkt_stream_default;
 
-	ksft_set_plan(modes * TEST_TYPE_MAX);
+	if (opt_run_test == RUN_ALL_TESTS)
+		nb_tests = ARRAY_SIZE(tests);
+	else
+		nb_tests = 1;
+	if (opt_mode == TEST_MODE_ALL) {
+		ksft_set_plan(modes * nb_tests);
+	} else {
+		if (opt_mode == TEST_MODE_DRV && modes <= TEST_MODE_DRV) {
+			ksft_print_msg("Error: XDP_DRV mode not supported.\n");
+			ksft_exit_xfail();
+		}
+		if (opt_mode == TEST_MODE_ZC && modes <= TEST_MODE_ZC) {
+			ksft_print_msg("Error: zero-copy mode not supported.\n");
+			ksft_exit_xfail();
+		}
+
+		ksft_set_plan(nb_tests);
+	}
 
 	for (i = 0; i < modes; i++) {
-		for (j = 0; j < TEST_TYPE_MAX; j++) {
-			test_spec_init(&test, ifobj_tx, ifobj_rx, i);
-			run_pkt_test(&test, i, j);
+		if (opt_mode != TEST_MODE_ALL && i != opt_mode)
+			continue;
+
+		for (j = 0; j < ARRAY_SIZE(tests); j++) {
+			if (opt_run_test != RUN_ALL_TESTS && j != opt_run_test)
+				continue;
+
+			test_spec_init(&test, ifobj_tx, ifobj_rx, i, &tests[j]);
+			run_pkt_test(&test);
 			usleep(USLEEP_MAX);
 
 			if (test.fail)
diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h
index 233b66cef64a..f174df2d693f 100644
--- a/tools/testing/selftests/bpf/xskxceiver.h
+++ b/tools/testing/selftests/bpf/xskxceiver.h
@@ -5,7 +5,10 @@
 #ifndef XSKXCEIVER_H_
 #define XSKXCEIVER_H_
 
+#include <limits.h>
+
 #include "xsk_xdp_progs.skel.h"
+#include "xsk_xdp_common.h"
 
 #ifndef SOL_XDP
 #define SOL_XDP 283
@@ -33,8 +36,7 @@
 #define TEST_SKIP 2
 #define MAX_INTERFACES 2
 #define MAX_INTERFACE_NAME_CHARS 16
-#define MAX_SOCKETS 2
-#define MAX_TEST_NAME_SIZE 32
+#define MAX_TEST_NAME_SIZE 48
 #define MAX_TEARDOWN_ITER 10
 #define PKT_HDR_SIZE (sizeof(struct ethhdr) + 2) /* Just to align the data in the packet */
 #define MIN_PKT_SIZE 64
@@ -56,6 +58,8 @@
 #define XSK_DESC__MAX_SKB_FRAGS 18
 #define HUGEPAGE_SIZE (2 * 1024 * 1024)
 #define PKT_DUMP_NB_TO_PRINT 16
+#define RUN_ALL_TESTS UINT_MAX
+#define NUM_MAC_ADDRESSES 4
 
 #define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0)
 
@@ -63,43 +67,9 @@ enum test_mode {
 	TEST_MODE_SKB,
 	TEST_MODE_DRV,
 	TEST_MODE_ZC,
-	TEST_MODE_MAX
-};
-
-enum test_type {
-	TEST_TYPE_RUN_TO_COMPLETION,
-	TEST_TYPE_RUN_TO_COMPLETION_2K_FRAME,
-	TEST_TYPE_RUN_TO_COMPLETION_SINGLE_PKT,
-	TEST_TYPE_RX_POLL,
-	TEST_TYPE_TX_POLL,
-	TEST_TYPE_POLL_RXQ_TMOUT,
-	TEST_TYPE_POLL_TXQ_TMOUT,
-	TEST_TYPE_UNALIGNED,
-	TEST_TYPE_ALIGNED_INV_DESC,
-	TEST_TYPE_ALIGNED_INV_DESC_2K_FRAME,
-	TEST_TYPE_UNALIGNED_INV_DESC,
-	TEST_TYPE_UNALIGNED_INV_DESC_4K1_FRAME,
-	TEST_TYPE_HEADROOM,
-	TEST_TYPE_TEARDOWN,
-	TEST_TYPE_BIDI,
-	TEST_TYPE_STATS_RX_DROPPED,
-	TEST_TYPE_STATS_TX_INVALID_DESCS,
-	TEST_TYPE_STATS_RX_FULL,
-	TEST_TYPE_STATS_FILL_EMPTY,
-	TEST_TYPE_BPF_RES,
-	TEST_TYPE_XDP_DROP_HALF,
-	TEST_TYPE_XDP_METADATA_COUNT,
-	TEST_TYPE_XDP_METADATA_COUNT_MB,
-	TEST_TYPE_RUN_TO_COMPLETION_MB,
-	TEST_TYPE_UNALIGNED_MB,
-	TEST_TYPE_ALIGNED_INV_DESC_MB,
-	TEST_TYPE_UNALIGNED_INV_DESC_MB,
-	TEST_TYPE_TOO_MANY_FRAGS,
-	TEST_TYPE_MAX
+	TEST_MODE_ALL
 };
 
-static bool opt_verbose;
-
 struct xsk_umem_info {
 	struct xsk_ring_prod fq;
 	struct xsk_ring_cons cq;
@@ -118,8 +88,11 @@ struct xsk_socket_info {
 	struct xsk_ring_prod tx;
 	struct xsk_umem_info *umem;
 	struct xsk_socket *xsk;
+	struct pkt_stream *pkt_stream;
 	u32 outstanding_tx;
 	u32 rxqsize;
+	u8 dst_mac[ETH_ALEN];
+	u8 src_mac[ETH_ALEN];
 };
 
 struct pkt {
@@ -135,12 +108,16 @@ struct pkt_stream {
 	u32 current_pkt_nb;
 	struct pkt *pkts;
 	u32 max_pkt_len;
+	u32 nb_rx_pkts;
+	u32 nb_valid_entries;
 	bool verbatim;
 };
 
 struct ifobject;
+struct test_spec;
 typedef int (*validation_func_t)(struct ifobject *ifobj);
 typedef void *(*thread_func_t)(void *arg);
+typedef int (*test_func_t)(struct test_spec *test);
 
 struct ifobject {
 	char ifname[MAX_INTERFACE_NAME_CHARS];
@@ -149,7 +126,6 @@ struct ifobject {
 	struct xsk_umem_info *umem;
 	thread_func_t func_ptr;
 	validation_func_t validation_func;
-	struct pkt_stream *pkt_stream;
 	struct xsk_xdp_progs *xdp_progs;
 	struct bpf_map *xskmap;
 	struct bpf_program *xdp_prog;
@@ -169,8 +145,6 @@ struct ifobject {
 	bool unaligned_supp;
 	bool multi_buff_supp;
 	bool multi_buff_zc_supp;
-	u8 dst_mac[ETH_ALEN];
-	u8 src_mac[ETH_ALEN];
 };
 
 struct test_spec {
@@ -182,6 +156,7 @@ struct test_spec {
 	struct bpf_program *xdp_prog_tx;
 	struct bpf_map *xskmap_rx;
 	struct bpf_map *xskmap_tx;
+	test_func_t test_func;
 	int mtu;
 	u16 total_steps;
 	u16 current_step;
@@ -196,4 +171,6 @@ pthread_mutex_t pacing_mutex = PTHREAD_MUTEX_INITIALIZER;
 
 int pkts_in_flight;
 
+static const u8 g_mac[ETH_ALEN] = {0x55, 0x44, 0x33, 0x22, 0x11, 0x00};
+
 #endif				/* XSKXCEIVER_H_ */
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index 488f4964365e..5f2b3f6c0d74 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -31,6 +31,9 @@ ALL_TESTS="
 "
 
 devdummy="test-dummy0"
+VERBOSE=0
+PAUSE=no
+PAUSE_ON_FAIL=no
 
 # Kselftest framework requirement - SKIP code is 4.
 ksft_skip=4
@@ -51,35 +54,102 @@ check_fail()
 	fi
 }
 
+run_cmd_common()
+{
+	local cmd="$*"
+	local out
+	if [ "$VERBOSE" = "1" ]; then
+		echo "COMMAND: ${cmd}"
+	fi
+	out=$($cmd 2>&1)
+	rc=$?
+	if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+		echo "    $out"
+	fi
+	return $rc
+}
+
+run_cmd() {
+	run_cmd_common "$@"
+	rc=$?
+	check_err $rc
+	return $rc
+}
+run_cmd_fail()
+{
+	run_cmd_common "$@"
+	rc=$?
+	check_fail $rc
+	return $rc
+}
+
+run_cmd_grep_common()
+{
+	local find="$1"; shift
+	local cmd="$*"
+	local out
+	if [ "$VERBOSE" = "1" ]; then
+		echo "COMMAND: ${cmd} 2>&1 | grep -q '${find}'"
+	fi
+	out=$($cmd 2>&1 | grep -q "${find}" 2>&1)
+	return $?
+}
+
+run_cmd_grep() {
+	run_cmd_grep_common "$@"
+	rc=$?
+	check_err $rc
+	return $rc
+}
+
+run_cmd_grep_fail()
+{
+	run_cmd_grep_common "$@"
+	rc=$?
+	check_fail $rc
+	return $rc
+}
+
+end_test()
+{
+	echo "$*"
+	[ "${VERBOSE}" = "1" ] && echo
+
+	if [[ $ret -ne 0 ]] && [[ "${PAUSE_ON_FAIL}" = "yes" ]]; then
+		echo "Hit enter to continue"
+		read a
+	fi;
+
+	if [ "${PAUSE}" = "yes" ]; then
+		echo "Hit enter to continue"
+		read a
+	fi
+
+}
+
+
 kci_add_dummy()
 {
-	ip link add name "$devdummy" type dummy
-	check_err $?
-	ip link set "$devdummy" up
-	check_err $?
+	run_cmd ip link add name "$devdummy" type dummy
+	run_cmd ip link set "$devdummy" up
 }
 
 kci_del_dummy()
 {
-	ip link del dev "$devdummy"
-	check_err $?
+	run_cmd ip link del dev "$devdummy"
 }
 
 kci_test_netconf()
 {
 	dev="$1"
 	r=$ret
-
-	ip netconf show dev "$dev" > /dev/null
-	check_err $?
-
+	run_cmd ip netconf show dev "$dev"
 	for f in 4 6; do
-		ip -$f netconf show dev "$dev" > /dev/null
-		check_err $?
+		run_cmd ip -$f netconf show dev "$dev"
 	done
 
 	if [ $ret -ne 0 ] ;then
-		echo "FAIL: ip netconf show $dev"
+		end_test "FAIL: ip netconf show $dev"
 		test $r -eq 0 && ret=0
 		return 1
 	fi
@@ -92,43 +162,27 @@ kci_test_bridge()
 	vlandev="testbr-vlan1"
 
 	local ret=0
-	ip link add name "$devbr" type bridge
-	check_err $?
-
-	ip link set dev "$devdummy" master "$devbr"
-	check_err $?
-
-	ip link set "$devbr" up
-	check_err $?
-
-	ip link add link "$devbr" name "$vlandev" type vlan id 1
-	check_err $?
-	ip addr add dev "$vlandev" 10.200.7.23/30
-	check_err $?
-	ip -6 addr add dev "$vlandev" dead:42::1234/64
-	check_err $?
-	ip -d link > /dev/null
-	check_err $?
-	ip r s t all > /dev/null
-	check_err $?
+	run_cmd ip link add name "$devbr" type bridge
+	run_cmd ip link set dev "$devdummy" master "$devbr"
+	run_cmd ip link set "$devbr" up
+	run_cmd ip link add link "$devbr" name "$vlandev" type vlan id 1
+	run_cmd ip addr add dev "$vlandev" 10.200.7.23/30
+	run_cmd ip -6 addr add dev "$vlandev" dead:42::1234/64
+	run_cmd ip -d link
+	run_cmd ip r s t all
 
 	for name in "$devbr" "$vlandev" "$devdummy" ; do
 		kci_test_netconf "$name"
 	done
-
-	ip -6 addr del dev "$vlandev" dead:42::1234/64
-	check_err $?
-
-	ip link del dev "$vlandev"
-	check_err $?
-	ip link del dev "$devbr"
-	check_err $?
+	run_cmd ip -6 addr del dev "$vlandev" dead:42::1234/64
+	run_cmd ip link del dev "$vlandev"
+	run_cmd ip link del dev "$devbr"
 
 	if [ $ret -ne 0 ];then
-		echo "FAIL: bridge setup"
+		end_test "FAIL: bridge setup"
 		return 1
 	fi
-	echo "PASS: bridge setup"
+	end_test "PASS: bridge setup"
 
 }
 
@@ -139,34 +193,23 @@ kci_test_gre()
 	loc=10.0.0.1
 
 	local ret=0
-	ip tunnel add $gredev mode gre remote $rem local $loc ttl 1
-	check_err $?
-	ip link set $gredev up
-	check_err $?
-	ip addr add 10.23.7.10 dev $gredev
-	check_err $?
-	ip route add 10.23.8.0/30 dev $gredev
-	check_err $?
-	ip addr add dev "$devdummy" 10.23.7.11/24
-	check_err $?
-	ip link > /dev/null
-	check_err $?
-	ip addr > /dev/null
-	check_err $?
+	run_cmd ip tunnel add $gredev mode gre remote $rem local $loc ttl 1
+	run_cmd ip link set $gredev up
+	run_cmd ip addr add 10.23.7.10 dev $gredev
+	run_cmd ip route add 10.23.8.0/30 dev $gredev
+	run_cmd ip addr add dev "$devdummy" 10.23.7.11/24
+	run_cmd ip link
+	run_cmd ip addr
 
 	kci_test_netconf "$gredev"
-
-	ip addr del dev "$devdummy" 10.23.7.11/24
-	check_err $?
-
-	ip link del $gredev
-	check_err $?
+	run_cmd ip addr del dev "$devdummy" 10.23.7.11/24
+	run_cmd ip link del $gredev
 
 	if [ $ret -ne 0 ];then
-		echo "FAIL: gre tunnel endpoint"
+		end_test "FAIL: gre tunnel endpoint"
 		return 1
 	fi
-	echo "PASS: gre tunnel endpoint"
+	end_test "PASS: gre tunnel endpoint"
 }
 
 # tc uses rtnetlink too, for full tc testing
@@ -176,56 +219,40 @@ kci_test_tc()
 	dev=lo
 	local ret=0
 
-	tc qdisc add dev "$dev" root handle 1: htb
-	check_err $?
-	tc class add dev "$dev" parent 1: classid 1:10 htb rate 1mbit
-	check_err $?
-	tc filter add dev "$dev" parent 1:0 prio 5 handle ffe: protocol ip u32 divisor 256
-	check_err $?
-	tc filter add dev "$dev" parent 1:0 prio 5 handle ffd: protocol ip u32 divisor 256
-	check_err $?
-	tc filter add dev "$dev" parent 1:0 prio 5 handle ffc: protocol ip u32 divisor 256
-	check_err $?
-	tc filter add dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:3 u32 ht ffe:2: match ip src 10.0.0.3 flowid 1:10
-	check_err $?
-	tc filter add dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:2 u32 ht ffe:2: match ip src 10.0.0.2 flowid 1:10
-	check_err $?
-	tc filter show dev "$dev" parent  1:0 > /dev/null
-	check_err $?
-	tc filter del dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:3 u32
-	check_err $?
-	tc filter show dev "$dev" parent  1:0 > /dev/null
-	check_err $?
-	tc qdisc del dev "$dev" root handle 1: htb
-	check_err $?
+	run_cmd tc qdisc add dev "$dev" root handle 1: htb
+	run_cmd tc class add dev "$dev" parent 1: classid 1:10 htb rate 1mbit
+	run_cmd tc filter add dev "$dev" parent 1:0 prio 5 handle ffe: protocol ip u32 divisor 256
+	run_cmd tc filter add dev "$dev" parent 1:0 prio 5 handle ffd: protocol ip u32 divisor 256
+	run_cmd tc filter add dev "$dev" parent 1:0 prio 5 handle ffc: protocol ip u32 divisor 256
+	run_cmd tc filter add dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:3 u32 ht ffe:2: match ip src 10.0.0.3 flowid 1:10
+	run_cmd tc filter add dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:2 u32 ht ffe:2: match ip src 10.0.0.2 flowid 1:10
+	run_cmd tc filter show dev "$dev" parent  1:0
+	run_cmd tc filter del dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:3 u32
+	run_cmd tc filter show dev "$dev" parent  1:0
+	run_cmd tc qdisc del dev "$dev" root handle 1: htb
 
 	if [ $ret -ne 0 ];then
-		echo "FAIL: tc htb hierarchy"
+		end_test "FAIL: tc htb hierarchy"
 		return 1
 	fi
-	echo "PASS: tc htb hierarchy"
+	end_test "PASS: tc htb hierarchy"
 
 }
 
 kci_test_polrouting()
 {
 	local ret=0
-	ip rule add fwmark 1 lookup 100
-	check_err $?
-	ip route add local 0.0.0.0/0 dev lo table 100
-	check_err $?
-	ip r s t all > /dev/null
-	check_err $?
-	ip rule del fwmark 1 lookup 100
-	check_err $?
-	ip route del local 0.0.0.0/0 dev lo table 100
-	check_err $?
+	run_cmd ip rule add fwmark 1 lookup 100
+	run_cmd ip route add local 0.0.0.0/0 dev lo table 100
+	run_cmd ip r s t all
+	run_cmd ip rule del fwmark 1 lookup 100
+	run_cmd ip route del local 0.0.0.0/0 dev lo table 100
 
 	if [ $ret -ne 0 ];then
-		echo "FAIL: policy route test"
+		end_test "FAIL: policy route test"
 		return 1
 	fi
-	echo "PASS: policy routing"
+	end_test "PASS: policy routing"
 }
 
 kci_test_route_get()
@@ -233,65 +260,51 @@ kci_test_route_get()
 	local hash_policy=$(sysctl -n net.ipv4.fib_multipath_hash_policy)
 
 	local ret=0
-
-	ip route get 127.0.0.1 > /dev/null
-	check_err $?
-	ip route get 127.0.0.1 dev "$devdummy" > /dev/null
-	check_err $?
-	ip route get ::1 > /dev/null
-	check_err $?
-	ip route get fe80::1 dev "$devdummy" > /dev/null
-	check_err $?
-	ip route get 127.0.0.1 from 127.0.0.1 oif lo tos 0x10 mark 0x1 > /dev/null
-	check_err $?
-	ip route get ::1 from ::1 iif lo oif lo tos 0x10 mark 0x1 > /dev/null
-	check_err $?
-	ip addr add dev "$devdummy" 10.23.7.11/24
-	check_err $?
-	ip route get 10.23.7.11 from 10.23.7.12 iif "$devdummy" > /dev/null
-	check_err $?
-	ip route add 10.23.8.0/24 \
+	run_cmd ip route get 127.0.0.1
+	run_cmd ip route get 127.0.0.1 dev "$devdummy"
+	run_cmd ip route get ::1
+	run_cmd ip route get fe80::1 dev "$devdummy"
+	run_cmd ip route get 127.0.0.1 from 127.0.0.1 oif lo tos 0x10 mark 0x1
+	run_cmd ip route get ::1 from ::1 iif lo oif lo tos 0x10 mark 0x1
+	run_cmd ip addr add dev "$devdummy" 10.23.7.11/24
+	run_cmd ip route get 10.23.7.11 from 10.23.7.12 iif "$devdummy"
+	run_cmd ip route add 10.23.8.0/24 \
 		nexthop via 10.23.7.13 dev "$devdummy" \
 		nexthop via 10.23.7.14 dev "$devdummy"
-	check_err $?
+
 	sysctl -wq net.ipv4.fib_multipath_hash_policy=0
-	ip route get 10.23.8.11 > /dev/null
-	check_err $?
+	run_cmd ip route get 10.23.8.11
 	sysctl -wq net.ipv4.fib_multipath_hash_policy=1
-	ip route get 10.23.8.11 > /dev/null
-	check_err $?
+	run_cmd ip route get 10.23.8.11
 	sysctl -wq net.ipv4.fib_multipath_hash_policy="$hash_policy"
-	ip route del 10.23.8.0/24
-	check_err $?
-	ip addr del dev "$devdummy" 10.23.7.11/24
-	check_err $?
+	run_cmd ip route del 10.23.8.0/24
+	run_cmd ip addr del dev "$devdummy" 10.23.7.11/24
+
 
 	if [ $ret -ne 0 ];then
-		echo "FAIL: route get"
+		end_test "FAIL: route get"
 		return 1
 	fi
 
-	echo "PASS: route get"
+	end_test "PASS: route get"
 }
 
 kci_test_addrlft()
 {
 	for i in $(seq 10 100) ;do
 		lft=$(((RANDOM%3) + 1))
-		ip addr add 10.23.11.$i/32 dev "$devdummy" preferred_lft $lft valid_lft $((lft+1))
-		check_err $?
+		run_cmd ip addr add 10.23.11.$i/32 dev "$devdummy" preferred_lft $lft valid_lft $((lft+1))
 	done
 
 	sleep 5
-
-	ip addr show dev "$devdummy" | grep "10.23.11."
+	run_cmd_grep "10.23.11." ip addr show dev "$devdummy"
 	if [ $? -eq 0 ]; then
-		echo "FAIL: preferred_lft addresses remaining"
 		check_err 1
+		end_test "FAIL: preferred_lft addresses remaining"
 		return
 	fi
 
-	echo "PASS: preferred_lft addresses have expired"
+	end_test "PASS: preferred_lft addresses have expired"
 }
 
 kci_test_promote_secondaries()
@@ -310,27 +323,17 @@ kci_test_promote_secondaries()
 
 	[ $promote -eq 0 ] && sysctl -q net.ipv4.conf.$devdummy.promote_secondaries=0
 
-	echo "PASS: promote_secondaries complete"
+	end_test "PASS: promote_secondaries complete"
 }
 
 kci_test_addrlabel()
 {
 	local ret=0
-
-	ip addrlabel add prefix dead::/64 dev lo label 1
-	check_err $?
-
-	ip addrlabel list |grep -q "prefix dead::/64 dev lo label 1"
-	check_err $?
-
-	ip addrlabel del prefix dead::/64 dev lo label 1 2> /dev/null
-	check_err $?
-
-	ip addrlabel add prefix dead::/64 label 1 2> /dev/null
-	check_err $?
-
-	ip addrlabel del prefix dead::/64 label 1 2> /dev/null
-	check_err $?
+	run_cmd ip addrlabel add prefix dead::/64 dev lo label 1
+	run_cmd_grep "prefix dead::/64 dev lo label 1" ip addrlabel list
+	run_cmd ip addrlabel del prefix dead::/64 dev lo label 1
+	run_cmd ip addrlabel add prefix dead::/64 label 1
+	run_cmd ip addrlabel del prefix dead::/64 label 1
 
 	# concurrent add/delete
 	for i in $(seq 1 1000); do
@@ -346,11 +349,11 @@ kci_test_addrlabel()
 	ip addrlabel del prefix 1c3::/64 label 12345 2>/dev/null
 
 	if [ $ret -ne 0 ];then
-		echo "FAIL: ipv6 addrlabel"
+		end_test "FAIL: ipv6 addrlabel"
 		return 1
 	fi
 
-	echo "PASS: ipv6 addrlabel"
+	end_test "PASS: ipv6 addrlabel"
 }
 
 kci_test_ifalias()
@@ -358,35 +361,28 @@ kci_test_ifalias()
 	local ret=0
 	namewant=$(uuidgen)
 	syspathname="/sys/class/net/$devdummy/ifalias"
-
-	ip link set dev "$devdummy" alias "$namewant"
-	check_err $?
+	run_cmd ip link set dev "$devdummy" alias "$namewant"
 
 	if [ $ret -ne 0 ]; then
-		echo "FAIL: cannot set interface alias of $devdummy to $namewant"
+		end_test "FAIL: cannot set interface alias of $devdummy to $namewant"
 		return 1
 	fi
-
-	ip link show "$devdummy" | grep -q "alias $namewant"
-	check_err $?
+	run_cmd_grep "alias $namewant" ip link show "$devdummy"
 
 	if [ -r "$syspathname" ] ; then
 		read namehave < "$syspathname"
 		if [ "$namewant" != "$namehave" ]; then
-			echo "FAIL: did set ifalias $namewant but got $namehave"
+			end_test "FAIL: did set ifalias $namewant but got $namehave"
 			return 1
 		fi
 
 		namewant=$(uuidgen)
 		echo "$namewant" > "$syspathname"
-	        ip link show "$devdummy" | grep -q "alias $namewant"
-		check_err $?
+	        run_cmd_grep "alias $namewant" ip link show "$devdummy"
 
 		# sysfs interface allows to delete alias again
 		echo "" > "$syspathname"
-
-	        ip link show "$devdummy" | grep -q "alias $namewant"
-		check_fail $?
+	        run_cmd_grep_fail "alias $namewant" ip link show "$devdummy"
 
 		for i in $(seq 1 100); do
 			uuidgen > "$syspathname" &
@@ -395,57 +391,48 @@ kci_test_ifalias()
 		wait
 
 		# re-add the alias -- kernel should free mem when dummy dev is removed
-		ip link set dev "$devdummy" alias "$namewant"
-		check_err $?
+		run_cmd ip link set dev "$devdummy" alias "$namewant"
+
 	fi
 
 	if [ $ret -ne 0 ]; then
-		echo "FAIL: set interface alias $devdummy to $namewant"
+		end_test "FAIL: set interface alias $devdummy to $namewant"
 		return 1
 	fi
 
-	echo "PASS: set ifalias $namewant for $devdummy"
+	end_test "PASS: set ifalias $namewant for $devdummy"
 }
 
 kci_test_vrf()
 {
 	vrfname="test-vrf"
 	local ret=0
-
-	ip link show type vrf 2>/dev/null
+	run_cmd ip link show type vrf
 	if [ $? -ne 0 ]; then
-		echo "SKIP: vrf: iproute2 too old"
+		end_test "SKIP: vrf: iproute2 too old"
 		return $ksft_skip
 	fi
-
-	ip link add "$vrfname" type vrf table 10
-	check_err $?
+	run_cmd ip link add "$vrfname" type vrf table 10
 	if [ $ret -ne 0 ];then
-		echo "FAIL: can't add vrf interface, skipping test"
+		end_test "FAIL: can't add vrf interface, skipping test"
 		return 0
 	fi
-
-	ip -br link show type vrf | grep -q "$vrfname"
-	check_err $?
+	run_cmd_grep "$vrfname" ip -br link show type vrf
 	if [ $ret -ne 0 ];then
-		echo "FAIL: created vrf device not found"
+		end_test "FAIL: created vrf device not found"
 		return 1
 	fi
 
-	ip link set dev "$vrfname" up
-	check_err $?
-
-	ip link set dev "$devdummy" master "$vrfname"
-	check_err $?
-	ip link del dev "$vrfname"
-	check_err $?
+	run_cmd ip link set dev "$vrfname" up
+	run_cmd ip link set dev "$devdummy" master "$vrfname"
+	run_cmd ip link del dev "$vrfname"
 
 	if [ $ret -ne 0 ];then
-		echo "FAIL: vrf"
+		end_test "FAIL: vrf"
 		return 1
 	fi
 
-	echo "PASS: vrf"
+	end_test "PASS: vrf"
 }
 
 kci_test_encap_vxlan()
@@ -454,84 +441,44 @@ kci_test_encap_vxlan()
 	vxlan="test-vxlan0"
 	vlan="test-vlan0"
 	testns="$1"
-
-	ip -netns "$testns" link add "$vxlan" type vxlan id 42 group 239.1.1.1 \
-		dev "$devdummy" dstport 4789 2>/dev/null
+	run_cmd ip -netns "$testns" link add "$vxlan" type vxlan id 42 group 239.1.1.1 \
+		dev "$devdummy" dstport 4789
 	if [ $? -ne 0 ]; then
-		echo "FAIL: can't add vxlan interface, skipping test"
+		end_test "FAIL: can't add vxlan interface, skipping test"
 		return 0
 	fi
-	check_err $?
 
-	ip -netns "$testns" addr add 10.2.11.49/24 dev "$vxlan"
-	check_err $?
-
-	ip -netns "$testns" link set up dev "$vxlan"
-	check_err $?
-
-	ip -netns "$testns" link add link "$vxlan" name "$vlan" type vlan id 1
-	check_err $?
+	run_cmd ip -netns "$testns" addr add 10.2.11.49/24 dev "$vxlan"
+	run_cmd ip -netns "$testns" link set up dev "$vxlan"
+	run_cmd ip -netns "$testns" link add link "$vxlan" name "$vlan" type vlan id 1
 
 	# changelink testcases
-	ip -netns "$testns" link set dev "$vxlan" type vxlan vni 43 2>/dev/null
-	check_fail $?
-
-	ip -netns "$testns" link set dev "$vxlan" type vxlan group ffe5::5 dev "$devdummy" 2>/dev/null
-	check_fail $?
-
-	ip -netns "$testns" link set dev "$vxlan" type vxlan ttl inherit 2>/dev/null
-	check_fail $?
-
-	ip -netns "$testns" link set dev "$vxlan" type vxlan ttl 64
-	check_err $?
-
-	ip -netns "$testns" link set dev "$vxlan" type vxlan nolearning
-	check_err $?
-
-	ip -netns "$testns" link set dev "$vxlan" type vxlan proxy 2>/dev/null
-	check_fail $?
-
-	ip -netns "$testns" link set dev "$vxlan" type vxlan norsc 2>/dev/null
-	check_fail $?
-
-	ip -netns "$testns" link set dev "$vxlan" type vxlan l2miss 2>/dev/null
-	check_fail $?
-
-	ip -netns "$testns" link set dev "$vxlan" type vxlan l3miss 2>/dev/null
-	check_fail $?
-
-	ip -netns "$testns" link set dev "$vxlan" type vxlan external 2>/dev/null
-	check_fail $?
-
-	ip -netns "$testns" link set dev "$vxlan" type vxlan udpcsum 2>/dev/null
-	check_fail $?
-
-	ip -netns "$testns" link set dev "$vxlan" type vxlan udp6zerocsumtx 2>/dev/null
-	check_fail $?
-
-	ip -netns "$testns" link set dev "$vxlan" type vxlan udp6zerocsumrx 2>/dev/null
-	check_fail $?
-
-	ip -netns "$testns" link set dev "$vxlan" type vxlan remcsumtx 2>/dev/null
-	check_fail $?
-
-	ip -netns "$testns" link set dev "$vxlan" type vxlan remcsumrx 2>/dev/null
-	check_fail $?
-
-	ip -netns "$testns" link set dev "$vxlan" type vxlan gbp 2>/dev/null
-	check_fail $?
-
-	ip -netns "$testns" link set dev "$vxlan" type vxlan gpe 2>/dev/null
-	check_fail $?
-
-	ip -netns "$testns" link del "$vxlan"
-	check_err $?
+	run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan vni 43
+	run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan group ffe5::5 dev "$devdummy"
+	run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan ttl inherit
+
+	run_cmd ip -netns "$testns" link set dev "$vxlan" type vxlan ttl 64
+	run_cmd ip -netns "$testns" link set dev "$vxlan" type vxlan nolearning
+
+	run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan proxy
+	run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan norsc
+	run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan l2miss
+	run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan l3miss
+	run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan external
+	run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan udpcsum
+	run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan udp6zerocsumtx
+	run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan udp6zerocsumrx
+	run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan remcsumtx
+	run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan remcsumrx
+	run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan gbp
+	run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan gpe
+	run_cmd ip -netns "$testns" link del "$vxlan"
 
 	if [ $ret -ne 0 ]; then
-		echo "FAIL: vxlan"
+		end_test "FAIL: vxlan"
 		return 1
 	fi
-	echo "PASS: vxlan"
+	end_test "PASS: vxlan"
 }
 
 kci_test_encap_fou()
@@ -539,39 +486,32 @@ kci_test_encap_fou()
 	local ret=0
 	name="test-fou"
 	testns="$1"
-
-	ip fou help 2>&1 |grep -q 'Usage: ip fou'
+	run_cmd_grep 'Usage: ip fou' ip fou help
 	if [ $? -ne 0 ];then
-		echo "SKIP: fou: iproute2 too old"
+		end_test "SKIP: fou: iproute2 too old"
 		return $ksft_skip
 	fi
 
 	if ! /sbin/modprobe -q -n fou; then
-		echo "SKIP: module fou is not found"
+		end_test "SKIP: module fou is not found"
 		return $ksft_skip
 	fi
 	/sbin/modprobe -q fou
-	ip -netns "$testns" fou add port 7777 ipproto 47 2>/dev/null
+
+	run_cmd ip -netns "$testns" fou add port 7777 ipproto 47
 	if [ $? -ne 0 ];then
-		echo "FAIL: can't add fou port 7777, skipping test"
+		end_test "FAIL: can't add fou port 7777, skipping test"
 		return 1
 	fi
-
-	ip -netns "$testns" fou add port 8888 ipproto 4
-	check_err $?
-
-	ip -netns "$testns" fou del port 9999 2>/dev/null
-	check_fail $?
-
-	ip -netns "$testns" fou del port 7777
-	check_err $?
-
+	run_cmd ip -netns "$testns" fou add port 8888 ipproto 4
+	run_cmd_fail ip -netns "$testns" fou del port 9999
+	run_cmd ip -netns "$testns" fou del port 7777
 	if [ $ret -ne 0 ]; then
-		echo "FAIL: fou"
+		end_test "FAIL: fou"s
 		return 1
 	fi
 
-	echo "PASS: fou"
+	end_test "PASS: fou"
 }
 
 # test various encap methods, use netns to avoid unwanted interference
@@ -579,25 +519,16 @@ kci_test_encap()
 {
 	testns="testns"
 	local ret=0
-
-	ip netns add "$testns"
+	run_cmd ip netns add "$testns"
 	if [ $? -ne 0 ]; then
-		echo "SKIP encap tests: cannot add net namespace $testns"
+		end_test "SKIP encap tests: cannot add net namespace $testns"
 		return $ksft_skip
 	fi
-
-	ip -netns "$testns" link set lo up
-	check_err $?
-
-	ip -netns "$testns" link add name "$devdummy" type dummy
-	check_err $?
-	ip -netns "$testns" link set "$devdummy" up
-	check_err $?
-
-	kci_test_encap_vxlan "$testns"
-	check_err $?
-	kci_test_encap_fou "$testns"
-	check_err $?
+	run_cmd ip -netns "$testns" link set lo up
+	run_cmd ip -netns "$testns" link add name "$devdummy" type dummy
+	run_cmd ip -netns "$testns" link set "$devdummy" up
+	run_cmd kci_test_encap_vxlan "$testns"
+	run_cmd kci_test_encap_fou "$testns"
 
 	ip netns del "$testns"
 	return $ret
@@ -607,41 +538,28 @@ kci_test_macsec()
 {
 	msname="test_macsec0"
 	local ret=0
-
-	ip macsec help 2>&1 | grep -q "^Usage: ip macsec"
+	run_cmd_grep "^Usage: ip macsec" ip macsec help
 	if [ $? -ne 0 ]; then
-		echo "SKIP: macsec: iproute2 too old"
+		end_test "SKIP: macsec: iproute2 too old"
 		return $ksft_skip
 	fi
-
-	ip link add link "$devdummy" "$msname" type macsec port 42 encrypt on
-	check_err $?
+	run_cmd ip link add link "$devdummy" "$msname" type macsec port 42 encrypt on
 	if [ $ret -ne 0 ];then
-		echo "FAIL: can't add macsec interface, skipping test"
+		end_test "FAIL: can't add macsec interface, skipping test"
 		return 1
 	fi
-
-	ip macsec add "$msname" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012
-	check_err $?
-
-	ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef"
-	check_err $?
-
-	ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on key 00 0123456789abcdef0123456789abcdef
-	check_err $?
-
-	ip macsec show > /dev/null
-	check_err $?
-
-	ip link del dev "$msname"
-	check_err $?
+	run_cmd ip macsec add "$msname" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012
+	run_cmd ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef"
+	run_cmd ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on key 00 0123456789abcdef0123456789abcdef
+	run_cmd ip macsec show
+	run_cmd ip link del dev "$msname"
 
 	if [ $ret -ne 0 ];then
-		echo "FAIL: macsec"
+		end_test "FAIL: macsec"
 		return 1
 	fi
 
-	echo "PASS: macsec"
+	end_test "PASS: macsec"
 }
 
 kci_test_macsec_offload()
@@ -650,19 +568,18 @@ kci_test_macsec_offload()
 	sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/
 	probed=false
 	local ret=0
-
-	ip macsec help 2>&1 | grep -q "^Usage: ip macsec"
+	run_cmd_grep "^Usage: ip macsec" ip macsec help
 	if [ $? -ne 0 ]; then
-		echo "SKIP: macsec: iproute2 too old"
+		end_test "SKIP: macsec: iproute2 too old"
 		return $ksft_skip
 	fi
 
 	# setup netdevsim since dummydev doesn't have offload support
 	if [ ! -w /sys/bus/netdevsim/new_device ] ; then
-		modprobe -q netdevsim
-		check_err $?
+		run_cmd modprobe -q netdevsim
+
 		if [ $ret -ne 0 ]; then
-			echo "SKIP: macsec_offload can't load netdevsim"
+			end_test "SKIP: macsec_offload can't load netdevsim"
 			return $ksft_skip
 		fi
 		probed=true
@@ -675,43 +592,25 @@ kci_test_macsec_offload()
 
 	ip link set $dev up
 	if [ ! -d $sysfsd ] ; then
-		echo "FAIL: macsec_offload can't create device $dev"
+		end_test "FAIL: macsec_offload can't create device $dev"
 		return 1
 	fi
-
-	ethtool -k $dev | grep -q 'macsec-hw-offload: on'
+	run_cmd_grep 'macsec-hw-offload: on' ethtool -k $dev
 	if [ $? -eq 1 ] ; then
-		echo "FAIL: macsec_offload netdevsim doesn't support MACsec offload"
+		end_test "FAIL: macsec_offload netdevsim doesn't support MACsec offload"
 		return 1
 	fi
-
-	ip link add link $dev kci_macsec1 type macsec port 4 offload mac
-	check_err $?
-
-	ip link add link $dev kci_macsec2 type macsec address "aa:bb:cc:dd:ee:ff" port 5 offload mac
-	check_err $?
-
-	ip link add link $dev kci_macsec3 type macsec sci abbacdde01020304 offload mac
-	check_err $?
-
-	ip link add link $dev kci_macsec4 type macsec port 8 offload mac 2> /dev/null
-	check_fail $?
+	run_cmd ip link add link $dev kci_macsec1 type macsec port 4 offload mac
+	run_cmd ip link add link $dev kci_macsec2 type macsec address "aa:bb:cc:dd:ee:ff" port 5 offload mac
+	run_cmd ip link add link $dev kci_macsec3 type macsec sci abbacdde01020304 offload mac
+	run_cmd_fail ip link add link $dev kci_macsec4 type macsec port 8 offload mac
 
 	msname=kci_macsec1
-
-	ip macsec add "$msname" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012
-	check_err $?
-
-	ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef"
-	check_err $?
-
-	ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on \
+	run_cmd ip macsec add "$msname" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012
+	run_cmd ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef"
+	run_cmd ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on \
 		key 00 0123456789abcdef0123456789abcdef
-	check_err $?
-
-	ip macsec add "$msname" rx port 1235 address "1c:ed:de:ad:be:ef" 2> /dev/null
-	check_fail $?
-
+	run_cmd_fail ip macsec add "$msname" rx port 1235 address "1c:ed:de:ad:be:ef"
 	# clean up any leftovers
 	for msdev in kci_macsec{1,2,3,4} ; do
 	    ip link del $msdev 2> /dev/null
@@ -720,10 +619,10 @@ kci_test_macsec_offload()
 	$probed && rmmod netdevsim
 
 	if [ $ret -ne 0 ]; then
-		echo "FAIL: macsec_offload"
+		end_test "FAIL: macsec_offload"
 		return 1
 	fi
-	echo "PASS: macsec_offload"
+	end_test "PASS: macsec_offload"
 }
 
 #-------------------------------------------------------------------
@@ -755,8 +654,7 @@ kci_test_ipsec()
 	ip addr add $srcip dev $devdummy
 
 	# flush to be sure there's nothing configured
-	ip x s flush ; ip x p flush
-	check_err $?
+	run_cmd ip x s flush ; ip x p flush
 
 	# start the monitor in the background
 	tmpfile=`mktemp /var/run/ipsectestXXX`
@@ -764,72 +662,57 @@ kci_test_ipsec()
 	sleep 0.2
 
 	ipsecid="proto esp src $srcip dst $dstip spi 0x07"
-	ip x s add $ipsecid \
+	run_cmd ip x s add $ipsecid \
             mode transport reqid 0x07 replay-window 32 \
             $algo sel src $srcip/24 dst $dstip/24
-	check_err $?
 
-	lines=`ip x s list | grep $srcip | grep $dstip | wc -l`
-	test $lines -eq 2
-	check_err $?
 
-	ip x s count | grep -q "SAD count 1"
-	check_err $?
+	lines=`ip x s list | grep $srcip | grep $dstip | wc -l`
+	run_cmd test $lines -eq 2
+	run_cmd_grep "SAD count 1" ip x s count
 
 	lines=`ip x s get $ipsecid | grep $srcip | grep $dstip | wc -l`
-	test $lines -eq 2
-	check_err $?
-
-	ip x s delete $ipsecid
-	check_err $?
+	run_cmd test $lines -eq 2
+	run_cmd ip x s delete $ipsecid
 
 	lines=`ip x s list | wc -l`
-	test $lines -eq 0
-	check_err $?
+	run_cmd test $lines -eq 0
 
 	ipsecsel="dir out src $srcip/24 dst $dstip/24"
-	ip x p add $ipsecsel \
+	run_cmd ip x p add $ipsecsel \
 		    tmpl proto esp src $srcip dst $dstip \
 		    spi 0x07 mode transport reqid 0x07
-	check_err $?
+
 
 	lines=`ip x p list | grep $srcip | grep $dstip | wc -l`
-	test $lines -eq 2
-	check_err $?
+	run_cmd test $lines -eq 2
 
-	ip x p count | grep -q "SPD IN  0 OUT 1 FWD 0"
-	check_err $?
+	run_cmd_grep "SPD IN  0 OUT 1 FWD 0" ip x p count
 
 	lines=`ip x p get $ipsecsel | grep $srcip | grep $dstip | wc -l`
-	test $lines -eq 2
-	check_err $?
+	run_cmd test $lines -eq 2
 
-	ip x p delete $ipsecsel
-	check_err $?
+	run_cmd ip x p delete $ipsecsel
 
 	lines=`ip x p list | wc -l`
-	test $lines -eq 0
-	check_err $?
+	run_cmd test $lines -eq 0
 
 	# check the monitor results
 	kill $mpid
 	lines=`wc -l $tmpfile | cut "-d " -f1`
-	test $lines -eq 20
-	check_err $?
+	run_cmd test $lines -eq 20
 	rm -rf $tmpfile
 
 	# clean up any leftovers
-	ip x s flush
-	check_err $?
-	ip x p flush
-	check_err $?
+	run_cmd ip x s flush
+	run_cmd ip x p flush
 	ip addr del $srcip/32 dev $devdummy
 
 	if [ $ret -ne 0 ]; then
-		echo "FAIL: ipsec"
+		end_test "FAIL: ipsec"
 		return 1
 	fi
-	echo "PASS: ipsec"
+	end_test "PASS: ipsec"
 }
 
 #-------------------------------------------------------------------
@@ -857,10 +740,9 @@ kci_test_ipsec_offload()
 
 	# setup netdevsim since dummydev doesn't have offload support
 	if [ ! -w /sys/bus/netdevsim/new_device ] ; then
-		modprobe -q netdevsim
-		check_err $?
+		run_cmd modprobe -q netdevsim
 		if [ $ret -ne 0 ]; then
-			echo "SKIP: ipsec_offload can't load netdevsim"
+			end_test "SKIP: ipsec_offload can't load netdevsim"
 			return $ksft_skip
 		fi
 		probed=true
@@ -874,11 +756,11 @@ kci_test_ipsec_offload()
 	ip addr add $srcip dev $dev
 	ip link set $dev up
 	if [ ! -d $sysfsd ] ; then
-		echo "FAIL: ipsec_offload can't create device $dev"
+		end_test "FAIL: ipsec_offload can't create device $dev"
 		return 1
 	fi
 	if [ ! -f $sysfsf ] ; then
-		echo "FAIL: ipsec_offload netdevsim doesn't support IPsec offload"
+		end_test "FAIL: ipsec_offload netdevsim doesn't support IPsec offload"
 		return 1
 	fi
 
@@ -886,40 +768,39 @@ kci_test_ipsec_offload()
 	ip x s flush ; ip x p flush
 
 	# create offloaded SAs, both in and out
-	ip x p add dir out src $srcip/24 dst $dstip/24 \
+	run_cmd ip x p add dir out src $srcip/24 dst $dstip/24 \
 	    tmpl proto esp src $srcip dst $dstip spi 9 \
 	    mode transport reqid 42
-	check_err $?
-	ip x p add dir in src $dstip/24 dst $srcip/24 \
+
+	run_cmd ip x p add dir in src $dstip/24 dst $srcip/24 \
 	    tmpl proto esp src $dstip dst $srcip spi 9 \
 	    mode transport reqid 42
-	check_err $?
 
-	ip x s add proto esp src $srcip dst $dstip spi 9 \
+	run_cmd ip x s add proto esp src $srcip dst $dstip spi 9 \
 	    mode transport reqid 42 $algo sel src $srcip/24 dst $dstip/24 \
 	    offload dev $dev dir out
-	check_err $?
-	ip x s add proto esp src $dstip dst $srcip spi 9 \
+
+	run_cmd ip x s add proto esp src $dstip dst $srcip spi 9 \
 	    mode transport reqid 42 $algo sel src $dstip/24 dst $srcip/24 \
 	    offload dev $dev dir in
-	check_err $?
+
 	if [ $ret -ne 0 ]; then
-		echo "FAIL: ipsec_offload can't create SA"
+		end_test "FAIL: ipsec_offload can't create SA"
 		return 1
 	fi
 
 	# does offload show up in ip output
 	lines=`ip x s list | grep -c "crypto offload parameters: dev $dev dir"`
 	if [ $lines -ne 2 ] ; then
-		echo "FAIL: ipsec_offload SA offload missing from list output"
 		check_err 1
+		end_test "FAIL: ipsec_offload SA offload missing from list output"
 	fi
 
 	# use ping to exercise the Tx path
 	ping -I $dev -c 3 -W 1 -i 0 $dstip >/dev/null
 
 	# does driver have correct offload info
-	diff $sysfsf - << EOF
+	run_cmd diff $sysfsf - << EOF
 SA count=2 tx=3
 sa[0] tx ipaddr=0x00000000 00000000 00000000 00000000
 sa[0]    spi=0x00000009 proto=0x32 salt=0x61626364 crypt=1
@@ -929,7 +810,7 @@ sa[1]    spi=0x00000009 proto=0x32 salt=0x61626364 crypt=1
 sa[1]    key=0x34333231 38373635 32313039 36353433
 EOF
 	if [ $? -ne 0 ] ; then
-		echo "FAIL: ipsec_offload incorrect driver data"
+		end_test "FAIL: ipsec_offload incorrect driver data"
 		check_err 1
 	fi
 
@@ -938,8 +819,8 @@ EOF
 	ip x p flush
 	lines=`grep -c "SA count=0" $sysfsf`
 	if [ $lines -ne 1 ] ; then
-		echo "FAIL: ipsec_offload SA not removed from driver"
 		check_err 1
+		end_test "FAIL: ipsec_offload SA not removed from driver"
 	fi
 
 	# clean up any leftovers
@@ -947,10 +828,10 @@ EOF
 	$probed && rmmod netdevsim
 
 	if [ $ret -ne 0 ]; then
-		echo "FAIL: ipsec_offload"
+		end_test "FAIL: ipsec_offload"
 		return 1
 	fi
-	echo "PASS: ipsec_offload"
+	end_test "PASS: ipsec_offload"
 }
 
 kci_test_gretap()
@@ -959,46 +840,38 @@ kci_test_gretap()
 	DEV_NS=gretap00
 	local ret=0
 
-	ip netns add "$testns"
+	run_cmd ip netns add "$testns"
 	if [ $? -ne 0 ]; then
-		echo "SKIP gretap tests: cannot add net namespace $testns"
+		end_test "SKIP gretap tests: cannot add net namespace $testns"
 		return $ksft_skip
 	fi
 
-	ip link help gretap 2>&1 | grep -q "^Usage:"
+	run_cmd_grep "^Usage:" ip link help gretap
 	if [ $? -ne 0 ];then
-		echo "SKIP: gretap: iproute2 too old"
+		end_test "SKIP: gretap: iproute2 too old"
 		ip netns del "$testns"
 		return $ksft_skip
 	fi
 
 	# test native tunnel
-	ip -netns "$testns" link add dev "$DEV_NS" type gretap seq \
+	run_cmd ip -netns "$testns" link add dev "$DEV_NS" type gretap seq \
 		key 102 local 172.16.1.100 remote 172.16.1.200
-	check_err $?
-
-	ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
-	check_err $?
 
-	ip -netns "$testns" link set dev $DEV_NS up
-	check_err $?
 
-	ip -netns "$testns" link del "$DEV_NS"
-	check_err $?
+	run_cmd ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
+	run_cmd ip -netns "$testns" link set dev $DEV_NS ups
+	run_cmd ip -netns "$testns" link del "$DEV_NS"
 
 	# test external mode
-	ip -netns "$testns" link add dev "$DEV_NS" type gretap external
-	check_err $?
-
-	ip -netns "$testns" link del "$DEV_NS"
-	check_err $?
+	run_cmd ip -netns "$testns" link add dev "$DEV_NS" type gretap external
+	run_cmd ip -netns "$testns" link del "$DEV_NS"
 
 	if [ $ret -ne 0 ]; then
-		echo "FAIL: gretap"
+		end_test "FAIL: gretap"
 		ip netns del "$testns"
 		return 1
 	fi
-	echo "PASS: gretap"
+	end_test "PASS: gretap"
 
 	ip netns del "$testns"
 }
@@ -1009,46 +882,38 @@ kci_test_ip6gretap()
 	DEV_NS=ip6gretap00
 	local ret=0
 
-	ip netns add "$testns"
+	run_cmd ip netns add "$testns"
 	if [ $? -ne 0 ]; then
-		echo "SKIP ip6gretap tests: cannot add net namespace $testns"
+		end_test "SKIP ip6gretap tests: cannot add net namespace $testns"
 		return $ksft_skip
 	fi
 
-	ip link help ip6gretap 2>&1 | grep -q "^Usage:"
+	run_cmd_grep "^Usage:" ip link help ip6gretap
 	if [ $? -ne 0 ];then
-		echo "SKIP: ip6gretap: iproute2 too old"
+		end_test "SKIP: ip6gretap: iproute2 too old"
 		ip netns del "$testns"
 		return $ksft_skip
 	fi
 
 	# test native tunnel
-	ip -netns "$testns" link add dev "$DEV_NS" type ip6gretap seq \
+	run_cmd ip -netns "$testns" link add dev "$DEV_NS" type ip6gretap seq \
 		key 102 local fc00:100::1 remote fc00:100::2
-	check_err $?
 
-	ip -netns "$testns" addr add dev "$DEV_NS" fc00:200::1/96
-	check_err $?
 
-	ip -netns "$testns" link set dev $DEV_NS up
-	check_err $?
-
-	ip -netns "$testns" link del "$DEV_NS"
-	check_err $?
+	run_cmd ip -netns "$testns" addr add dev "$DEV_NS" fc00:200::1/96
+	run_cmd ip -netns "$testns" link set dev $DEV_NS up
+	run_cmd ip -netns "$testns" link del "$DEV_NS"
 
 	# test external mode
-	ip -netns "$testns" link add dev "$DEV_NS" type ip6gretap external
-	check_err $?
-
-	ip -netns "$testns" link del "$DEV_NS"
-	check_err $?
+	run_cmd ip -netns "$testns" link add dev "$DEV_NS" type ip6gretap external
+	run_cmd ip -netns "$testns" link del "$DEV_NS"
 
 	if [ $ret -ne 0 ]; then
-		echo "FAIL: ip6gretap"
+		end_test "FAIL: ip6gretap"
 		ip netns del "$testns"
 		return 1
 	fi
-	echo "PASS: ip6gretap"
+	end_test "PASS: ip6gretap"
 
 	ip netns del "$testns"
 }
@@ -1058,62 +923,47 @@ kci_test_erspan()
 	testns="testns"
 	DEV_NS=erspan00
 	local ret=0
-
-	ip link help erspan 2>&1 | grep -q "^Usage:"
+	run_cmd_grep "^Usage:" ip link help erspan
 	if [ $? -ne 0 ];then
-		echo "SKIP: erspan: iproute2 too old"
+		end_test "SKIP: erspan: iproute2 too old"
 		return $ksft_skip
 	fi
-
-	ip netns add "$testns"
+	run_cmd ip netns add "$testns"
 	if [ $? -ne 0 ]; then
-		echo "SKIP erspan tests: cannot add net namespace $testns"
+		end_test "SKIP erspan tests: cannot add net namespace $testns"
 		return $ksft_skip
 	fi
 
 	# test native tunnel erspan v1
-	ip -netns "$testns" link add dev "$DEV_NS" type erspan seq \
+	run_cmd ip -netns "$testns" link add dev "$DEV_NS" type erspan seq \
 		key 102 local 172.16.1.100 remote 172.16.1.200 \
 		erspan_ver 1 erspan 488
-	check_err $?
 
-	ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
-	check_err $?
 
-	ip -netns "$testns" link set dev $DEV_NS up
-	check_err $?
-
-	ip -netns "$testns" link del "$DEV_NS"
-	check_err $?
+	run_cmd ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
+	run_cmd ip -netns "$testns" link set dev $DEV_NS up
+	run_cmd ip -netns "$testns" link del "$DEV_NS"
 
 	# test native tunnel erspan v2
-	ip -netns "$testns" link add dev "$DEV_NS" type erspan seq \
+	run_cmd ip -netns "$testns" link add dev "$DEV_NS" type erspan seq \
 		key 102 local 172.16.1.100 remote 172.16.1.200 \
 		erspan_ver 2 erspan_dir ingress erspan_hwid 7
-	check_err $?
-
-	ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
-	check_err $?
 
-	ip -netns "$testns" link set dev $DEV_NS up
-	check_err $?
 
-	ip -netns "$testns" link del "$DEV_NS"
-	check_err $?
+	run_cmd ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
+	run_cmd ip -netns "$testns" link set dev $DEV_NS up
+	run_cmd ip -netns "$testns" link del "$DEV_NS"
 
 	# test external mode
-	ip -netns "$testns" link add dev "$DEV_NS" type erspan external
-	check_err $?
-
-	ip -netns "$testns" link del "$DEV_NS"
-	check_err $?
+	run_cmd ip -netns "$testns" link add dev "$DEV_NS" type erspan external
+	run_cmd ip -netns "$testns" link del "$DEV_NS"
 
 	if [ $ret -ne 0 ]; then
-		echo "FAIL: erspan"
+		end_test "FAIL: erspan"
 		ip netns del "$testns"
 		return 1
 	fi
-	echo "PASS: erspan"
+	end_test "PASS: erspan"
 
 	ip netns del "$testns"
 }
@@ -1123,63 +973,49 @@ kci_test_ip6erspan()
 	testns="testns"
 	DEV_NS=ip6erspan00
 	local ret=0
-
-	ip link help ip6erspan 2>&1 | grep -q "^Usage:"
+	run_cmd_grep "^Usage:" ip link help ip6erspan
 	if [ $? -ne 0 ];then
-		echo "SKIP: ip6erspan: iproute2 too old"
+		end_test "SKIP: ip6erspan: iproute2 too old"
 		return $ksft_skip
 	fi
-
-	ip netns add "$testns"
+	run_cmd ip netns add "$testns"
 	if [ $? -ne 0 ]; then
-		echo "SKIP ip6erspan tests: cannot add net namespace $testns"
+		end_test "SKIP ip6erspan tests: cannot add net namespace $testns"
 		return $ksft_skip
 	fi
 
 	# test native tunnel ip6erspan v1
-	ip -netns "$testns" link add dev "$DEV_NS" type ip6erspan seq \
+	run_cmd ip -netns "$testns" link add dev "$DEV_NS" type ip6erspan seq \
 		key 102 local fc00:100::1 remote fc00:100::2 \
 		erspan_ver 1 erspan 488
-	check_err $?
 
-	ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
-	check_err $?
 
-	ip -netns "$testns" link set dev $DEV_NS up
-	check_err $?
-
-	ip -netns "$testns" link del "$DEV_NS"
-	check_err $?
+	run_cmd ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
+	run_cmd ip -netns "$testns" link set dev $DEV_NS up
+	run_cmd ip -netns "$testns" link del "$DEV_NS"
 
 	# test native tunnel ip6erspan v2
-	ip -netns "$testns" link add dev "$DEV_NS" type ip6erspan seq \
+	run_cmd ip -netns "$testns" link add dev "$DEV_NS" type ip6erspan seq \
 		key 102 local fc00:100::1 remote fc00:100::2 \
 		erspan_ver 2 erspan_dir ingress erspan_hwid 7
-	check_err $?
 
-	ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
-	check_err $?
 
-	ip -netns "$testns" link set dev $DEV_NS up
-	check_err $?
-
-	ip -netns "$testns" link del "$DEV_NS"
-	check_err $?
+	run_cmd ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
+	run_cmd ip -netns "$testns" link set dev $DEV_NS up
+	run_cmd ip -netns "$testns" link del "$DEV_NS"
 
 	# test external mode
-	ip -netns "$testns" link add dev "$DEV_NS" \
+	run_cmd ip -netns "$testns" link add dev "$DEV_NS" \
 		type ip6erspan external
-	check_err $?
 
-	ip -netns "$testns" link del "$DEV_NS"
-	check_err $?
+	run_cmd ip -netns "$testns" link del "$DEV_NS"
 
 	if [ $ret -ne 0 ]; then
-		echo "FAIL: ip6erspan"
+		end_test "FAIL: ip6erspan"
 		ip netns del "$testns"
 		return 1
 	fi
-	echo "PASS: ip6erspan"
+	end_test "PASS: ip6erspan"
 
 	ip netns del "$testns"
 }
@@ -1195,45 +1031,35 @@ kci_test_fdb_get()
 	dstip="10.0.2.3"
 	local ret=0
 
-	bridge fdb help 2>&1 |grep -q 'bridge fdb get'
+	run_cmd_grep 'bridge fdb get' bridge fdb help
 	if [ $? -ne 0 ];then
-		echo "SKIP: fdb get tests: iproute2 too old"
+		end_test "SKIP: fdb get tests: iproute2 too old"
 		return $ksft_skip
 	fi
 
-	ip netns add testns
+	run_cmd ip netns add testns
 	if [ $? -ne 0 ]; then
-		echo "SKIP fdb get tests: cannot add net namespace $testns"
+		end_test "SKIP fdb get tests: cannot add net namespace $testns"
 		return $ksft_skip
 	fi
-
-	$IP link add "$vxlandev" type vxlan id 10 local $localip \
-                dstport 4789 2>/dev/null
-	check_err $?
-	$IP link add name "$brdev" type bridge &>/dev/null
-	check_err $?
-	$IP link set dev "$vxlandev" master "$brdev" &>/dev/null
-	check_err $?
-	$BRIDGE fdb add $test_mac dev "$vxlandev" master &>/dev/null
-	check_err $?
-	$BRIDGE fdb add $test_mac dev "$vxlandev" dst $dstip self &>/dev/null
-	check_err $?
-
-	$BRIDGE fdb get $test_mac brport "$vxlandev" 2>/dev/null | grep -q "dev $vxlandev master $brdev"
-	check_err $?
-	$BRIDGE fdb get $test_mac br "$brdev" 2>/dev/null | grep -q "dev $vxlandev master $brdev"
-	check_err $?
-	$BRIDGE fdb get $test_mac dev "$vxlandev" self 2>/dev/null | grep -q "dev $vxlandev dst $dstip"
-	check_err $?
+	run_cmd $IP link add "$vxlandev" type vxlan id 10 local $localip \
+                dstport 4789
+	run_cmd $IP link add name "$brdev" type bridge
+	run_cmd $IP link set dev "$vxlandev" master "$brdev"
+	run_cmd $BRIDGE fdb add $test_mac dev "$vxlandev" master
+	run_cmd $BRIDGE fdb add $test_mac dev "$vxlandev" dst $dstip self
+	run_cmd_grep "dev $vxlandev master $brdev" $BRIDGE fdb get $test_mac brport "$vxlandev"
+	run_cmd_grep "dev $vxlandev master $brdev" $BRIDGE fdb get $test_mac br "$brdev"
+	run_cmd_grep "dev $vxlandev dst $dstip" $BRIDGE fdb get $test_mac dev "$vxlandev" self
 
 	ip netns del testns &>/dev/null
 
 	if [ $ret -ne 0 ]; then
-		echo "FAIL: bridge fdb get"
+		end_test "FAIL: bridge fdb get"
 		return 1
 	fi
 
-	echo "PASS: bridge fdb get"
+	end_test "PASS: bridge fdb get"
 }
 
 kci_test_neigh_get()
@@ -1243,50 +1069,38 @@ kci_test_neigh_get()
 	dstip6=dead::2
 	local ret=0
 
-	ip neigh help 2>&1 |grep -q 'ip neigh get'
+	run_cmd_grep 'ip neigh get' ip neigh help
 	if [ $? -ne 0 ];then
-		echo "SKIP: fdb get tests: iproute2 too old"
+		end_test "SKIP: fdb get tests: iproute2 too old"
 		return $ksft_skip
 	fi
 
 	# ipv4
-	ip neigh add $dstip lladdr $dstmac dev "$devdummy"  > /dev/null
-	check_err $?
-	ip neigh get $dstip dev "$devdummy" 2> /dev/null | grep -q "$dstmac"
-	check_err $?
-	ip neigh del $dstip lladdr $dstmac dev "$devdummy"  > /dev/null
-	check_err $?
+	run_cmd ip neigh add $dstip lladdr $dstmac dev "$devdummy"
+	run_cmd_grep "$dstmac" ip neigh get $dstip dev "$devdummy"
+	run_cmd ip neigh del $dstip lladdr $dstmac dev "$devdummy"
 
 	# ipv4 proxy
-	ip neigh add proxy $dstip dev "$devdummy" > /dev/null
-	check_err $?
-	ip neigh get proxy $dstip dev "$devdummy" 2>/dev/null | grep -q "$dstip"
-	check_err $?
-	ip neigh del proxy $dstip dev "$devdummy" > /dev/null
-	check_err $?
+	run_cmd ip neigh add proxy $dstip dev "$devdummy"
+	run_cmd_grep "$dstip" ip neigh get proxy $dstip dev "$devdummy"
+	run_cmd ip neigh del proxy $dstip dev "$devdummy"
 
 	# ipv6
-	ip neigh add $dstip6 lladdr $dstmac dev "$devdummy"  > /dev/null
-	check_err $?
-	ip neigh get $dstip6 dev "$devdummy" 2> /dev/null | grep -q "$dstmac"
-	check_err $?
-	ip neigh del $dstip6 lladdr $dstmac dev "$devdummy"  > /dev/null
-	check_err $?
+	run_cmd ip neigh add $dstip6 lladdr $dstmac dev "$devdummy"
+	run_cmd_grep "$dstmac" ip neigh get $dstip6 dev "$devdummy"
+	run_cmd ip neigh del $dstip6 lladdr $dstmac dev "$devdummy"
 
 	# ipv6 proxy
-	ip neigh add proxy $dstip6 dev "$devdummy" > /dev/null
-	check_err $?
-	ip neigh get proxy $dstip6 dev "$devdummy" 2>/dev/null | grep -q "$dstip6"
-	check_err $?
-	ip neigh del proxy $dstip6 dev "$devdummy" > /dev/null
-	check_err $?
+	run_cmd ip neigh add proxy $dstip6 dev "$devdummy"
+	run_cmd_grep "$dstip6" ip neigh get proxy $dstip6 dev "$devdummy"
+	run_cmd ip neigh del proxy $dstip6 dev "$devdummy"
 
 	if [ $ret -ne 0 ];then
-		echo "FAIL: neigh get"
+		end_test "FAIL: neigh get"
 		return 1
 	fi
 
-	echo "PASS: neigh get"
+	end_test "PASS: neigh get"
 }
 
 kci_test_bridge_parent_id()
@@ -1296,10 +1110,9 @@ kci_test_bridge_parent_id()
 	probed=false
 
 	if [ ! -w /sys/bus/netdevsim/new_device ] ; then
-		modprobe -q netdevsim
-		check_err $?
+		run_cmd modprobe -q netdevsim
 		if [ $ret -ne 0 ]; then
-			echo "SKIP: bridge_parent_id can't load netdevsim"
+			end_test "SKIP: bridge_parent_id can't load netdevsim"
 			return $ksft_skip
 		fi
 		probed=true
@@ -1312,13 +1125,11 @@ kci_test_bridge_parent_id()
 	udevadm settle
 	dev10=`ls ${sysfsnet}10/net/`
 	dev20=`ls ${sysfsnet}20/net/`
-
-	ip link add name test-bond0 type bond mode 802.3ad
-	ip link set dev $dev10 master test-bond0
-	ip link set dev $dev20 master test-bond0
-	ip link add name test-br0 type bridge
-	ip link set dev test-bond0 master test-br0
-	check_err $?
+	run_cmd ip link add name test-bond0 type bond mode 802.3ad
+	run_cmd ip link set dev $dev10 master test-bond0
+	run_cmd ip link set dev $dev20 master test-bond0
+	run_cmd ip link add name test-br0 type bridge
+	run_cmd ip link set dev test-bond0 master test-br0
 
 	# clean up any leftovers
 	ip link del dev test-br0
@@ -1328,10 +1139,10 @@ kci_test_bridge_parent_id()
 	$probed && rmmod netdevsim
 
 	if [ $ret -ne 0 ]; then
-		echo "FAIL: bridge_parent_id"
+		end_test "FAIL: bridge_parent_id"
 		return 1
 	fi
-	echo "PASS: bridge_parent_id"
+	end_test "PASS: bridge_parent_id"
 }
 
 address_get_proto()
@@ -1409,10 +1220,10 @@ do_test_address_proto()
 	ip address del dev "$devdummy" "$addr3"
 
 	if [ $ret -ne 0 ]; then
-		echo "FAIL: address proto $what"
+		end_test "FAIL: address proto $what"
 		return 1
 	fi
-	echo "PASS: address proto $what"
+	end_test "PASS: address proto $what"
 }
 
 kci_test_address_proto()
@@ -1435,7 +1246,7 @@ kci_test_rtnl()
 
 	kci_add_dummy
 	if [ $ret -ne 0 ];then
-		echo "FAIL: cannot add dummy interface"
+		end_test "FAIL: cannot add dummy interface"
 		return 1
 	fi
 
@@ -1455,31 +1266,39 @@ usage: ${0##*/} OPTS
 
         -t <test>   Test(s) to run (default: all)
                     (options: $(echo $ALL_TESTS))
+        -v          Verbose mode (show commands and output)
+        -P          Pause after every test
+        -p          Pause after every failing test before cleanup (for debugging)
 EOF
 }
 
 #check for needed privileges
 if [ "$(id -u)" -ne 0 ];then
-	echo "SKIP: Need root privileges"
+	end_test "SKIP: Need root privileges"
 	exit $ksft_skip
 fi
 
 for x in ip tc;do
 	$x -Version 2>/dev/null >/dev/null
 	if [ $? -ne 0 ];then
-		echo "SKIP: Could not run test without the $x tool"
+		end_test "SKIP: Could not run test without the $x tool"
 		exit $ksft_skip
 	fi
 done
 
-while getopts t:h o; do
+while getopts t:hvpP o; do
 	case $o in
 		t) TESTS=$OPTARG;;
+		v) VERBOSE=1;;
+		p) PAUSE_ON_FAIL=yes;;
+		P) PAUSE=yes;;
 		h) usage; exit 0;;
 		*) usage; exit 1;;
 	esac
 done
 
+[ $PAUSE = "yes" ] && PAUSE_ON_FAIL="no"
+
 kci_test_rtnl
 
 exit $?
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json b/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json
index 5272049566d6..a4a83fb3e96f 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json
@@ -1351,5 +1351,54 @@
         "teardown": [
             "$TC qdisc del dev $DEV1 ingress"
         ]
+    },
+    {
+        "id": "e470",
+        "name": "Try to delete class referenced by fw after a replace",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 parent root handle 10: drr",
+            "$TC class add dev $DEV1 parent root classid 1 drr",
+            "$TC filter add dev $DEV1 parent 10: handle 1 prio 1 fw classid 10:1 action ok",
+            "$TC filter replace dev $DEV1 parent 10: handle 1 prio 1 fw classid 10:1 action drop"
+        ],
+        "cmdUnderTest": "$TC class delete dev $DEV1 parent 10: classid 10:1",
+        "expExitCode": "2",
+        "verifyCmd": "$TC class show dev $DEV1",
+        "matchPattern": "class drr 10:1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 parent root drr"
+        ]
+    },
+    {
+        "id": "ec1a",
+        "name": "Replace fw classid with nil",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 parent root handle 10: drr",
+            "$TC class add dev $DEV1 parent root classid 1 drr",
+            "$TC filter add dev $DEV1 parent 10: handle 1 prio 1 fw classid 10:1 action ok"
+        ],
+        "cmdUnderTest": "$TC filter replace dev $DEV1 parent 10: handle 1 prio 1 fw action drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent 10:",
+        "matchPattern": "fw chain 0 handle 0x1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 parent root drr"
+        ]
     }
 ]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/route.json b/tools/testing/selftests/tc-testing/tc-tests/filters/route.json
index 1f6f19f02997..8d8de8f65aef 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/route.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/route.json
@@ -177,5 +177,30 @@
         "teardown": [
             "$TC qdisc del dev $DEV1 ingress"
         ]
+    },
+    {
+        "id": "b042",
+        "name": "Try to delete class referenced by route after a replace",
+        "category": [
+            "filter",
+            "route"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 parent root handle 10: drr",
+            "$TC class add dev $DEV1 parent root classid 1 drr",
+            "$TC filter add dev $DEV1 parent 10: prio 1 route from 10 classid 10:1 action ok",
+            "$TC filter replace dev $DEV1 parent 10: prio 1 route from 5 classid 10:1 action drop"
+        ],
+        "cmdUnderTest": "$TC class delete dev $DEV1 parent 10: classid 10:1",
+        "expExitCode": "2",
+        "verifyCmd": "$TC class show dev $DEV1",
+        "matchPattern": "class drr 10:1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 parent root drr"
+        ]
     }
 ]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json
index bd64a4bf11ab..ddc7c355be0a 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json
@@ -247,5 +247,30 @@
         "teardown": [
             "$TC qdisc del dev $DEV1 ingress"
         ]
+    },
+    {
+        "id": "0c37",
+        "name": "Try to delete class referenced by u32 after a replace",
+        "category": [
+            "filter",
+            "u32"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 parent root handle 10: drr",
+            "$TC class add dev $DEV1 parent root classid 1 drr",
+            "$TC filter add dev $DEV1 parent 10: prio 1 u32 match icmp type 1 0xff classid 10:1 action ok",
+            "$TC filter replace dev $DEV1 parent 10: prio 1 u32 match icmp type 1 0xff classid 10:1 action drop"
+        ],
+        "cmdUnderTest": "$TC class delete dev $DEV1 parent 10: classid 10:1",
+        "expExitCode": "2",
+        "verifyCmd": "$TC class show dev $DEV1",
+        "matchPattern": "class drr 10:1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 parent root drr"
+        ]
     }
 ]
diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c
index 01b636d3039a..6779d5008b27 100644
--- a/tools/testing/vsock/util.c
+++ b/tools/testing/vsock/util.c
@@ -211,102 +211,138 @@ int vsock_seqpacket_accept(unsigned int cid, unsigned int port,
 	return vsock_accept(cid, port, clientaddrp, SOCK_SEQPACKET);
 }
 
-/* Transmit one byte and check the return value.
+/* Transmit bytes from a buffer and check the return value.
  *
  * expected_ret:
  *  <0 Negative errno (for testing errors)
  *   0 End-of-file
- *   1 Success
+ *  >0 Success (bytes successfully written)
  */
-void send_byte(int fd, int expected_ret, int flags)
+void send_buf(int fd, const void *buf, size_t len, int flags,
+	      ssize_t expected_ret)
 {
-	const uint8_t byte = 'A';
-	ssize_t nwritten;
+	ssize_t nwritten = 0;
+	ssize_t ret;
 
 	timeout_begin(TIMEOUT);
 	do {
-		nwritten = send(fd, &byte, sizeof(byte), flags);
-		timeout_check("write");
-	} while (nwritten < 0 && errno == EINTR);
+		ret = send(fd, buf + nwritten, len - nwritten, flags);
+		timeout_check("send");
+
+		if (ret == 0 || (ret < 0 && errno != EINTR))
+			break;
+
+		nwritten += ret;
+	} while (nwritten < len);
 	timeout_end();
 
 	if (expected_ret < 0) {
-		if (nwritten != -1) {
-			fprintf(stderr, "bogus send(2) return value %zd\n",
-				nwritten);
+		if (ret != -1) {
+			fprintf(stderr, "bogus send(2) return value %zd (expected %zd)\n",
+				ret, expected_ret);
 			exit(EXIT_FAILURE);
 		}
 		if (errno != -expected_ret) {
-			perror("write");
+			perror("send");
 			exit(EXIT_FAILURE);
 		}
 		return;
 	}
 
-	if (nwritten < 0) {
-		perror("write");
+	if (ret < 0) {
+		perror("send");
 		exit(EXIT_FAILURE);
 	}
-	if (nwritten == 0) {
-		if (expected_ret == 0)
-			return;
 
-		fprintf(stderr, "unexpected EOF while sending byte\n");
-		exit(EXIT_FAILURE);
-	}
-	if (nwritten != sizeof(byte)) {
-		fprintf(stderr, "bogus send(2) return value %zd\n", nwritten);
+	if (nwritten != expected_ret) {
+		if (ret == 0)
+			fprintf(stderr, "unexpected EOF while sending bytes\n");
+
+		fprintf(stderr, "bogus send(2) bytes written %zd (expected %zd)\n",
+			nwritten, expected_ret);
 		exit(EXIT_FAILURE);
 	}
 }
 
-/* Receive one byte and check the return value.
+/* Receive bytes in a buffer and check the return value.
  *
  * expected_ret:
  *  <0 Negative errno (for testing errors)
  *   0 End-of-file
- *   1 Success
+ *  >0 Success (bytes successfully read)
  */
-void recv_byte(int fd, int expected_ret, int flags)
+void recv_buf(int fd, void *buf, size_t len, int flags, ssize_t expected_ret)
 {
-	uint8_t byte;
-	ssize_t nread;
+	ssize_t nread = 0;
+	ssize_t ret;
 
 	timeout_begin(TIMEOUT);
 	do {
-		nread = recv(fd, &byte, sizeof(byte), flags);
-		timeout_check("read");
-	} while (nread < 0 && errno == EINTR);
+		ret = recv(fd, buf + nread, len - nread, flags);
+		timeout_check("recv");
+
+		if (ret == 0 || (ret < 0 && errno != EINTR))
+			break;
+
+		nread += ret;
+	} while (nread < len);
 	timeout_end();
 
 	if (expected_ret < 0) {
-		if (nread != -1) {
-			fprintf(stderr, "bogus recv(2) return value %zd\n",
-				nread);
+		if (ret != -1) {
+			fprintf(stderr, "bogus recv(2) return value %zd (expected %zd)\n",
+				ret, expected_ret);
 			exit(EXIT_FAILURE);
 		}
 		if (errno != -expected_ret) {
-			perror("read");
+			perror("recv");
 			exit(EXIT_FAILURE);
 		}
 		return;
 	}
 
-	if (nread < 0) {
-		perror("read");
+	if (ret < 0) {
+		perror("recv");
 		exit(EXIT_FAILURE);
 	}
-	if (nread == 0) {
-		if (expected_ret == 0)
-			return;
 
-		fprintf(stderr, "unexpected EOF while receiving byte\n");
-		exit(EXIT_FAILURE);
-	}
-	if (nread != sizeof(byte)) {
-		fprintf(stderr, "bogus recv(2) return value %zd\n", nread);
+	if (nread != expected_ret) {
+		if (ret == 0)
+			fprintf(stderr, "unexpected EOF while receiving bytes\n");
+
+		fprintf(stderr, "bogus recv(2) bytes read %zd (expected %zd)\n",
+			nread, expected_ret);
 		exit(EXIT_FAILURE);
 	}
+}
+
+/* Transmit one byte and check the return value.
+ *
+ * expected_ret:
+ *  <0 Negative errno (for testing errors)
+ *   0 End-of-file
+ *   1 Success
+ */
+void send_byte(int fd, int expected_ret, int flags)
+{
+	const uint8_t byte = 'A';
+
+	send_buf(fd, &byte, sizeof(byte), flags, expected_ret);
+}
+
+/* Receive one byte and check the return value.
+ *
+ * expected_ret:
+ *  <0 Negative errno (for testing errors)
+ *   0 End-of-file
+ *   1 Success
+ */
+void recv_byte(int fd, int expected_ret, int flags)
+{
+	uint8_t byte;
+
+	recv_buf(fd, &byte, sizeof(byte), flags, expected_ret);
+
 	if (byte != 'A') {
 		fprintf(stderr, "unexpected byte read %c\n", byte);
 		exit(EXIT_FAILURE);
diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h
index fb99208a95ea..e5407677ce05 100644
--- a/tools/testing/vsock/util.h
+++ b/tools/testing/vsock/util.h
@@ -42,6 +42,9 @@ int vsock_stream_accept(unsigned int cid, unsigned int port,
 int vsock_seqpacket_accept(unsigned int cid, unsigned int port,
 			   struct sockaddr_vm *clientaddrp);
 void vsock_wait_remote_close(int fd);
+void send_buf(int fd, const void *buf, size_t len, int flags,
+	      ssize_t expected_ret);
+void recv_buf(int fd, void *buf, size_t len, int flags, ssize_t expected_ret);
 void send_byte(int fd, int expected_ret, int flags);
 void recv_byte(int fd, int expected_ret, int flags);
 void run_tests(const struct test_case *test_cases,
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index 90718c2fd4ea..da4cb819a183 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -19,6 +19,7 @@
 #include <time.h>
 #include <sys/mman.h>
 #include <poll.h>
+#include <signal.h>
 
 #include "timeout.h"
 #include "control.h"
@@ -261,7 +262,6 @@ static void test_msg_peek_client(const struct test_opts *opts,
 				 bool seqpacket)
 {
 	unsigned char buf[MSG_PEEK_BUF_LEN];
-	ssize_t send_size;
 	int fd;
 	int i;
 
@@ -280,17 +280,7 @@ static void test_msg_peek_client(const struct test_opts *opts,
 
 	control_expectln("SRVREADY");
 
-	send_size = send(fd, buf, sizeof(buf), 0);
-
-	if (send_size < 0) {
-		perror("send");
-		exit(EXIT_FAILURE);
-	}
-
-	if (send_size != sizeof(buf)) {
-		fprintf(stderr, "Invalid send size %zi\n", send_size);
-		exit(EXIT_FAILURE);
-	}
+	send_buf(fd, buf, sizeof(buf), 0, sizeof(buf));
 
 	close(fd);
 }
@@ -301,7 +291,6 @@ static void test_msg_peek_server(const struct test_opts *opts,
 	unsigned char buf_half[MSG_PEEK_BUF_LEN / 2];
 	unsigned char buf_normal[MSG_PEEK_BUF_LEN];
 	unsigned char buf_peek[MSG_PEEK_BUF_LEN];
-	ssize_t res;
 	int fd;
 
 	if (seqpacket)
@@ -315,34 +304,16 @@ static void test_msg_peek_server(const struct test_opts *opts,
 	}
 
 	/* Peek from empty socket. */
-	res = recv(fd, buf_peek, sizeof(buf_peek), MSG_PEEK | MSG_DONTWAIT);
-	if (res != -1) {
-		fprintf(stderr, "expected recv(2) failure, got %zi\n", res);
-		exit(EXIT_FAILURE);
-	}
-
-	if (errno != EAGAIN) {
-		perror("EAGAIN expected");
-		exit(EXIT_FAILURE);
-	}
+	recv_buf(fd, buf_peek, sizeof(buf_peek), MSG_PEEK | MSG_DONTWAIT,
+		 -EAGAIN);
 
 	control_writeln("SRVREADY");
 
 	/* Peek part of data. */
-	res = recv(fd, buf_half, sizeof(buf_half), MSG_PEEK);
-	if (res != sizeof(buf_half)) {
-		fprintf(stderr, "recv(2) + MSG_PEEK, expected %zu, got %zi\n",
-			sizeof(buf_half), res);
-		exit(EXIT_FAILURE);
-	}
+	recv_buf(fd, buf_half, sizeof(buf_half), MSG_PEEK, sizeof(buf_half));
 
 	/* Peek whole data. */
-	res = recv(fd, buf_peek, sizeof(buf_peek), MSG_PEEK);
-	if (res != sizeof(buf_peek)) {
-		fprintf(stderr, "recv(2) + MSG_PEEK, expected %zu, got %zi\n",
-			sizeof(buf_peek), res);
-		exit(EXIT_FAILURE);
-	}
+	recv_buf(fd, buf_peek, sizeof(buf_peek), MSG_PEEK, sizeof(buf_peek));
 
 	/* Compare partial and full peek. */
 	if (memcmp(buf_half, buf_peek, sizeof(buf_half))) {
@@ -355,22 +326,11 @@ static void test_msg_peek_server(const struct test_opts *opts,
 		 * so check it with MSG_PEEK. We must get length
 		 * of the message.
 		 */
-		res = recv(fd, buf_half, sizeof(buf_half), MSG_PEEK |
-			   MSG_TRUNC);
-		if (res != sizeof(buf_peek)) {
-			fprintf(stderr,
-				"recv(2) + MSG_PEEK | MSG_TRUNC, exp %zu, got %zi\n",
-				sizeof(buf_half), res);
-			exit(EXIT_FAILURE);
-		}
+		recv_buf(fd, buf_half, sizeof(buf_half), MSG_PEEK | MSG_TRUNC,
+			 sizeof(buf_peek));
 	}
 
-	res = recv(fd, buf_normal, sizeof(buf_normal), 0);
-	if (res != sizeof(buf_normal)) {
-		fprintf(stderr, "recv(2), expected %zu, got %zi\n",
-			sizeof(buf_normal), res);
-		exit(EXIT_FAILURE);
-	}
+	recv_buf(fd, buf_normal, sizeof(buf_normal), 0, sizeof(buf_normal));
 
 	/* Compare full peek and normal read. */
 	if (memcmp(buf_peek, buf_normal, sizeof(buf_peek))) {
@@ -415,7 +375,6 @@ static void test_seqpacket_msg_bounds_client(const struct test_opts *opts)
 	msg_count = SOCK_BUF_SIZE / MAX_MSG_SIZE;
 
 	for (int i = 0; i < msg_count; i++) {
-		ssize_t send_size;
 		size_t buf_size;
 		int flags;
 		void *buf;
@@ -443,17 +402,7 @@ static void test_seqpacket_msg_bounds_client(const struct test_opts *opts)
 			flags = 0;
 		}
 
-		send_size = send(fd, buf, buf_size, flags);
-
-		if (send_size < 0) {
-			perror("send");
-			exit(EXIT_FAILURE);
-		}
-
-		if (send_size != buf_size) {
-			fprintf(stderr, "Invalid send size\n");
-			exit(EXIT_FAILURE);
-		}
+		send_buf(fd, buf, buf_size, flags, buf_size);
 
 		/*
 		 * Hash sum is computed at both client and server in
@@ -554,10 +503,7 @@ static void test_seqpacket_msg_trunc_client(const struct test_opts *opts)
 		exit(EXIT_FAILURE);
 	}
 
-	if (send(fd, buf, sizeof(buf), 0) != sizeof(buf)) {
-		perror("send failed");
-		exit(EXIT_FAILURE);
-	}
+	send_buf(fd, buf, sizeof(buf), 0, sizeof(buf));
 
 	control_writeln("SENDDONE");
 	close(fd);
@@ -679,7 +625,6 @@ static void test_seqpacket_timeout_server(const struct test_opts *opts)
 static void test_seqpacket_bigmsg_client(const struct test_opts *opts)
 {
 	unsigned long sock_buf_size;
-	ssize_t send_size;
 	socklen_t len;
 	void *data;
 	int fd;
@@ -706,18 +651,7 @@ static void test_seqpacket_bigmsg_client(const struct test_opts *opts)
 		exit(EXIT_FAILURE);
 	}
 
-	send_size = send(fd, data, sock_buf_size, 0);
-	if (send_size != -1) {
-		fprintf(stderr, "expected 'send(2)' failure, got %zi\n",
-			send_size);
-		exit(EXIT_FAILURE);
-	}
-
-	if (errno != EMSGSIZE) {
-		fprintf(stderr, "expected EMSGSIZE in 'errno', got %i\n",
-			errno);
-		exit(EXIT_FAILURE);
-	}
+	send_buf(fd, data, sock_buf_size, 0, -EMSGSIZE);
 
 	control_writeln("CLISENT");
 
@@ -771,15 +705,9 @@ static void test_seqpacket_invalid_rec_buffer_client(const struct test_opts *opt
 	memset(buf1, BUF_PATTERN_1, buf_size);
 	memset(buf2, BUF_PATTERN_2, buf_size);
 
-	if (send(fd, buf1, buf_size, 0) != buf_size) {
-		perror("send failed");
-		exit(EXIT_FAILURE);
-	}
+	send_buf(fd, buf1, buf_size, 0, buf_size);
 
-	if (send(fd, buf2, buf_size, 0) != buf_size) {
-		perror("send failed");
-		exit(EXIT_FAILURE);
-	}
+	send_buf(fd, buf2, buf_size, 0, buf_size);
 
 	close(fd);
 }
@@ -900,7 +828,6 @@ static void test_stream_poll_rcvlowat_client(const struct test_opts *opts)
 	unsigned long lowat_val = RCVLOWAT_BUF_SIZE;
 	char buf[RCVLOWAT_BUF_SIZE];
 	struct pollfd fds;
-	ssize_t read_res;
 	short poll_flags;
 	int fd;
 
@@ -955,12 +882,7 @@ static void test_stream_poll_rcvlowat_client(const struct test_opts *opts)
 	/* Use MSG_DONTWAIT, if call is going to wait, EAGAIN
 	 * will be returned.
 	 */
-	read_res = recv(fd, buf, sizeof(buf), MSG_DONTWAIT);
-	if (read_res != RCVLOWAT_BUF_SIZE) {
-		fprintf(stderr, "Unexpected recv result %zi\n",
-			read_res);
-		exit(EXIT_FAILURE);
-	}
+	recv_buf(fd, buf, sizeof(buf), MSG_DONTWAIT, RCVLOWAT_BUF_SIZE);
 
 	control_writeln("POLLDONE");
 
@@ -972,7 +894,7 @@ static void test_stream_poll_rcvlowat_client(const struct test_opts *opts)
 static void test_inv_buf_client(const struct test_opts *opts, bool stream)
 {
 	unsigned char data[INV_BUF_TEST_DATA_LEN] = {0};
-	ssize_t ret;
+	ssize_t expected_ret;
 	int fd;
 
 	if (stream)
@@ -988,39 +910,18 @@ static void test_inv_buf_client(const struct test_opts *opts, bool stream)
 	control_expectln("SENDDONE");
 
 	/* Use invalid buffer here. */
-	ret = recv(fd, NULL, sizeof(data), 0);
-	if (ret != -1) {
-		fprintf(stderr, "expected recv(2) failure, got %zi\n", ret);
-		exit(EXIT_FAILURE);
-	}
-
-	if (errno != EFAULT) {
-		fprintf(stderr, "unexpected recv(2) errno %d\n", errno);
-		exit(EXIT_FAILURE);
-	}
-
-	ret = recv(fd, data, sizeof(data), MSG_DONTWAIT);
+	recv_buf(fd, NULL, sizeof(data), 0, -EFAULT);
 
 	if (stream) {
 		/* For SOCK_STREAM we must continue reading. */
-		if (ret != sizeof(data)) {
-			fprintf(stderr, "expected recv(2) success, got %zi\n", ret);
-			exit(EXIT_FAILURE);
-		}
-		/* Don't check errno in case of success. */
+		expected_ret = sizeof(data);
 	} else {
 		/* For SOCK_SEQPACKET socket's queue must be empty. */
-		if (ret != -1) {
-			fprintf(stderr, "expected recv(2) failure, got %zi\n", ret);
-			exit(EXIT_FAILURE);
-		}
-
-		if (errno != EAGAIN) {
-			fprintf(stderr, "unexpected recv(2) errno %d\n", errno);
-			exit(EXIT_FAILURE);
-		}
+		expected_ret = -EAGAIN;
 	}
 
+	recv_buf(fd, data, sizeof(data), MSG_DONTWAIT, expected_ret);
+
 	control_writeln("DONE");
 
 	close(fd);
@@ -1029,7 +930,6 @@ static void test_inv_buf_client(const struct test_opts *opts, bool stream)
 static void test_inv_buf_server(const struct test_opts *opts, bool stream)
 {
 	unsigned char data[INV_BUF_TEST_DATA_LEN] = {0};
-	ssize_t res;
 	int fd;
 
 	if (stream)
@@ -1042,11 +942,7 @@ static void test_inv_buf_server(const struct test_opts *opts, bool stream)
 		exit(EXIT_FAILURE);
 	}
 
-	res = send(fd, data, sizeof(data), 0);
-	if (res != sizeof(data)) {
-		fprintf(stderr, "unexpected send(2) result %zi\n", res);
-		exit(EXIT_FAILURE);
-	}
+	send_buf(fd, data, sizeof(data), 0, sizeof(data));
 
 	control_writeln("SENDDONE");
 
@@ -1080,7 +976,6 @@ static void test_seqpacket_inv_buf_server(const struct test_opts *opts)
 
 static void test_stream_virtio_skb_merge_client(const struct test_opts *opts)
 {
-	ssize_t res;
 	int fd;
 
 	fd = vsock_stream_connect(opts->peer_cid, 1234);
@@ -1090,22 +985,14 @@ static void test_stream_virtio_skb_merge_client(const struct test_opts *opts)
 	}
 
 	/* Send first skbuff. */
-	res = send(fd, HELLO_STR, strlen(HELLO_STR), 0);
-	if (res != strlen(HELLO_STR)) {
-		fprintf(stderr, "unexpected send(2) result %zi\n", res);
-		exit(EXIT_FAILURE);
-	}
+	send_buf(fd, HELLO_STR, strlen(HELLO_STR), 0, strlen(HELLO_STR));
 
 	control_writeln("SEND0");
 	/* Peer reads part of first skbuff. */
 	control_expectln("REPLY0");
 
 	/* Send second skbuff, it will be appended to the first. */
-	res = send(fd, WORLD_STR, strlen(WORLD_STR), 0);
-	if (res != strlen(WORLD_STR)) {
-		fprintf(stderr, "unexpected send(2) result %zi\n", res);
-		exit(EXIT_FAILURE);
-	}
+	send_buf(fd, WORLD_STR, strlen(WORLD_STR), 0, strlen(WORLD_STR));
 
 	control_writeln("SEND1");
 	/* Peer reads merged skbuff packet. */
@@ -1116,8 +1003,8 @@ static void test_stream_virtio_skb_merge_client(const struct test_opts *opts)
 
 static void test_stream_virtio_skb_merge_server(const struct test_opts *opts)
 {
+	size_t read = 0, to_read;
 	unsigned char buf[64];
-	ssize_t res;
 	int fd;
 
 	fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
@@ -1129,26 +1016,21 @@ static void test_stream_virtio_skb_merge_server(const struct test_opts *opts)
 	control_expectln("SEND0");
 
 	/* Read skbuff partially. */
-	res = recv(fd, buf, 2, 0);
-	if (res != 2) {
-		fprintf(stderr, "expected recv(2) returns 2 bytes, got %zi\n", res);
-		exit(EXIT_FAILURE);
-	}
+	to_read = 2;
+	recv_buf(fd, buf + read, to_read, 0, to_read);
+	read += to_read;
 
 	control_writeln("REPLY0");
 	control_expectln("SEND1");
 
-	res = recv(fd, buf + 2, sizeof(buf) - 2, 0);
-	if (res != 8) {
-		fprintf(stderr, "expected recv(2) returns 8 bytes, got %zi\n", res);
-		exit(EXIT_FAILURE);
-	}
+	/* Read the rest of both buffers */
+	to_read = strlen(HELLO_STR WORLD_STR) - read;
+	recv_buf(fd, buf + read, to_read, 0, to_read);
+	read += to_read;
 
-	res = recv(fd, buf, sizeof(buf) - 8 - 2, MSG_DONTWAIT);
-	if (res != -1) {
-		fprintf(stderr, "expected recv(2) failure, got %zi\n", res);
-		exit(EXIT_FAILURE);
-	}
+	/* No more bytes should be there */
+	to_read = sizeof(buf) - read;
+	recv_buf(fd, buf + read, to_read, MSG_DONTWAIT, -EAGAIN);
 
 	if (memcmp(buf, HELLO_STR WORLD_STR, strlen(HELLO_STR WORLD_STR))) {
 		fprintf(stderr, "pattern mismatch\n");
@@ -1170,6 +1052,133 @@ static void test_seqpacket_msg_peek_server(const struct test_opts *opts)
 	return test_msg_peek_server(opts, true);
 }
 
+static sig_atomic_t have_sigpipe;
+
+static void sigpipe(int signo)
+{
+	have_sigpipe = 1;
+}
+
+static void test_stream_check_sigpipe(int fd)
+{
+	ssize_t res;
+
+	have_sigpipe = 0;
+
+	res = send(fd, "A", 1, 0);
+	if (res != -1) {
+		fprintf(stderr, "expected send(2) failure, got %zi\n", res);
+		exit(EXIT_FAILURE);
+	}
+
+	if (!have_sigpipe) {
+		fprintf(stderr, "SIGPIPE expected\n");
+		exit(EXIT_FAILURE);
+	}
+
+	have_sigpipe = 0;
+
+	res = send(fd, "A", 1, MSG_NOSIGNAL);
+	if (res != -1) {
+		fprintf(stderr, "expected send(2) failure, got %zi\n", res);
+		exit(EXIT_FAILURE);
+	}
+
+	if (have_sigpipe) {
+		fprintf(stderr, "SIGPIPE not expected\n");
+		exit(EXIT_FAILURE);
+	}
+}
+
+static void test_stream_shutwr_client(const struct test_opts *opts)
+{
+	int fd;
+
+	struct sigaction act = {
+		.sa_handler = sigpipe,
+	};
+
+	sigaction(SIGPIPE, &act, NULL);
+
+	fd = vsock_stream_connect(opts->peer_cid, 1234);
+	if (fd < 0) {
+		perror("connect");
+		exit(EXIT_FAILURE);
+	}
+
+	if (shutdown(fd, SHUT_WR)) {
+		perror("shutdown");
+		exit(EXIT_FAILURE);
+	}
+
+	test_stream_check_sigpipe(fd);
+
+	control_writeln("CLIENTDONE");
+
+	close(fd);
+}
+
+static void test_stream_shutwr_server(const struct test_opts *opts)
+{
+	int fd;
+
+	fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+	if (fd < 0) {
+		perror("accept");
+		exit(EXIT_FAILURE);
+	}
+
+	control_expectln("CLIENTDONE");
+
+	close(fd);
+}
+
+static void test_stream_shutrd_client(const struct test_opts *opts)
+{
+	int fd;
+
+	struct sigaction act = {
+		.sa_handler = sigpipe,
+	};
+
+	sigaction(SIGPIPE, &act, NULL);
+
+	fd = vsock_stream_connect(opts->peer_cid, 1234);
+	if (fd < 0) {
+		perror("connect");
+		exit(EXIT_FAILURE);
+	}
+
+	control_expectln("SHUTRDDONE");
+
+	test_stream_check_sigpipe(fd);
+
+	control_writeln("CLIENTDONE");
+
+	close(fd);
+}
+
+static void test_stream_shutrd_server(const struct test_opts *opts)
+{
+	int fd;
+
+	fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+	if (fd < 0) {
+		perror("accept");
+		exit(EXIT_FAILURE);
+	}
+
+	if (shutdown(fd, SHUT_RD)) {
+		perror("shutdown");
+		exit(EXIT_FAILURE);
+	}
+
+	control_writeln("SHUTRDDONE");
+	control_expectln("CLIENTDONE");
+
+	close(fd);
+}
+
 static struct test_case test_cases[] = {
 	{
 		.name = "SOCK_STREAM connection reset",
@@ -1250,6 +1259,16 @@ static struct test_case test_cases[] = {
 		.run_client = test_seqpacket_msg_peek_client,
 		.run_server = test_seqpacket_msg_peek_server,
 	},
+	{
+		.name = "SOCK_STREAM SHUT_WR",
+		.run_client = test_stream_shutwr_client,
+		.run_server = test_stream_shutwr_server,
+	},
+	{
+		.name = "SOCK_STREAM SHUT_RD",
+		.run_client = test_stream_shutrd_client,
+		.run_server = test_stream_shutrd_server,
+	},
 	{},
 };
author	Huacai Chen <chenhuacai@loongson.cn>	2023-11-01 10:55:00 +0800
committer	Huacai Chen <chenhuacai@loongson.cn>	2023-11-01 10:55:00 +0800
commit	a6bdc082ad1c91d389a6ba0c7a1945818f732114 (patch)
tree	fa630701d5d2a8bc1ab8c4abf759663bbb81aeeb
parent	ffc253263a1375a65fa6c9f62a893e9767fbebfa (diff)
parent	99c9991f4e5d77328187187d0c921a3b62bfa998 (diff)